10957b409SSimon J. Gerraty /*
20957b409SSimon J. Gerraty * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
30957b409SSimon J. Gerraty *
40957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining
50957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the
60957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including
70957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish,
80957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to
90957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to
100957b409SSimon J. Gerraty * the following conditions:
110957b409SSimon J. Gerraty *
120957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be
130957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software.
140957b409SSimon J. Gerraty *
150957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
160957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
170957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
180957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
190957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
200957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
210957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
220957b409SSimon J. Gerraty * SOFTWARE.
230957b409SSimon J. Gerraty */
240957b409SSimon J. Gerraty
250957b409SSimon J. Gerraty #include "inner.h"
260957b409SSimon J. Gerraty
270957b409SSimon J. Gerraty /*
280957b409SSimon J. Gerraty * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
290957b409SSimon J. Gerraty * that right-shifting a signed negative integer copies the sign bit
300957b409SSimon J. Gerraty * (arithmetic right-shift). This is "implementation-defined behaviour",
310957b409SSimon J. Gerraty * i.e. it is not undefined, but it may differ between compilers. Each
320957b409SSimon J. Gerraty * compiler is supposed to document its behaviour in that respect. GCC
330957b409SSimon J. Gerraty * explicitly defines that an arithmetic right shift is used. We expect
340957b409SSimon J. Gerraty * all other compilers to do the same, because underlying CPU offer an
350957b409SSimon J. Gerraty * arithmetic right shift opcode that could not be used otherwise.
360957b409SSimon J. Gerraty */
370957b409SSimon J. Gerraty #if BR_NO_ARITH_SHIFT
380957b409SSimon J. Gerraty #define ARSH(x, n) (((uint32_t)(x) >> (n)) \
390957b409SSimon J. Gerraty | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
400957b409SSimon J. Gerraty #define ARSHW(x, n) (((uint64_t)(x) >> (n)) \
410957b409SSimon J. Gerraty | ((-((uint64_t)(x) >> 63)) << (64 - (n))))
420957b409SSimon J. Gerraty #else
430957b409SSimon J. Gerraty #define ARSH(x, n) ((*(int32_t *)&(x)) >> (n))
440957b409SSimon J. Gerraty #define ARSHW(x, n) ((*(int64_t *)&(x)) >> (n))
450957b409SSimon J. Gerraty #endif
460957b409SSimon J. Gerraty
470957b409SSimon J. Gerraty /*
480957b409SSimon J. Gerraty * Convert an integer from unsigned big-endian encoding to a sequence of
490957b409SSimon J. Gerraty * 30-bit words in little-endian order. The final "partial" word is
500957b409SSimon J. Gerraty * returned.
510957b409SSimon J. Gerraty */
520957b409SSimon J. Gerraty static uint32_t
be8_to_le30(uint32_t * dst,const unsigned char * src,size_t len)530957b409SSimon J. Gerraty be8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
540957b409SSimon J. Gerraty {
550957b409SSimon J. Gerraty uint32_t acc;
560957b409SSimon J. Gerraty int acc_len;
570957b409SSimon J. Gerraty
580957b409SSimon J. Gerraty acc = 0;
590957b409SSimon J. Gerraty acc_len = 0;
600957b409SSimon J. Gerraty while (len -- > 0) {
610957b409SSimon J. Gerraty uint32_t b;
620957b409SSimon J. Gerraty
630957b409SSimon J. Gerraty b = src[len];
640957b409SSimon J. Gerraty if (acc_len < 22) {
650957b409SSimon J. Gerraty acc |= b << acc_len;
660957b409SSimon J. Gerraty acc_len += 8;
670957b409SSimon J. Gerraty } else {
680957b409SSimon J. Gerraty *dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
690957b409SSimon J. Gerraty acc = b >> (30 - acc_len);
700957b409SSimon J. Gerraty acc_len -= 22;
710957b409SSimon J. Gerraty }
720957b409SSimon J. Gerraty }
730957b409SSimon J. Gerraty return acc;
740957b409SSimon J. Gerraty }
750957b409SSimon J. Gerraty
760957b409SSimon J. Gerraty /*
770957b409SSimon J. Gerraty * Convert an integer (30-bit words, little-endian) to unsigned
780957b409SSimon J. Gerraty * big-endian encoding. The total encoding length is provided; all
790957b409SSimon J. Gerraty * the destination bytes will be filled.
800957b409SSimon J. Gerraty */
810957b409SSimon J. Gerraty static void
le30_to_be8(unsigned char * dst,size_t len,const uint32_t * src)820957b409SSimon J. Gerraty le30_to_be8(unsigned char *dst, size_t len, const uint32_t *src)
830957b409SSimon J. Gerraty {
840957b409SSimon J. Gerraty uint32_t acc;
850957b409SSimon J. Gerraty int acc_len;
860957b409SSimon J. Gerraty
870957b409SSimon J. Gerraty acc = 0;
880957b409SSimon J. Gerraty acc_len = 0;
890957b409SSimon J. Gerraty while (len -- > 0) {
900957b409SSimon J. Gerraty if (acc_len < 8) {
910957b409SSimon J. Gerraty uint32_t w;
920957b409SSimon J. Gerraty
930957b409SSimon J. Gerraty w = *src ++;
940957b409SSimon J. Gerraty dst[len] = (unsigned char)(acc | (w << acc_len));
950957b409SSimon J. Gerraty acc = w >> (8 - acc_len);
960957b409SSimon J. Gerraty acc_len += 22;
970957b409SSimon J. Gerraty } else {
980957b409SSimon J. Gerraty dst[len] = (unsigned char)acc;
990957b409SSimon J. Gerraty acc >>= 8;
1000957b409SSimon J. Gerraty acc_len -= 8;
1010957b409SSimon J. Gerraty }
1020957b409SSimon J. Gerraty }
1030957b409SSimon J. Gerraty }
1040957b409SSimon J. Gerraty
1050957b409SSimon J. Gerraty /*
1060957b409SSimon J. Gerraty * Multiply two integers. Source integers are represented as arrays of
1070957b409SSimon J. Gerraty * nine 30-bit words, for values up to 2^270-1. Result is encoded over
1080957b409SSimon J. Gerraty * 18 words of 30 bits each.
1090957b409SSimon J. Gerraty */
1100957b409SSimon J. Gerraty static void
mul9(uint32_t * d,const uint32_t * a,const uint32_t * b)1110957b409SSimon J. Gerraty mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
1120957b409SSimon J. Gerraty {
1130957b409SSimon J. Gerraty /*
1140957b409SSimon J. Gerraty * Maximum intermediate result is no more than
1150957b409SSimon J. Gerraty * 10376293531797946367, which fits in 64 bits. Reason:
1160957b409SSimon J. Gerraty *
1170957b409SSimon J. Gerraty * 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
1180957b409SSimon J. Gerraty * 10376293531797946367 < 9663676407 * 2^30
1190957b409SSimon J. Gerraty *
1200957b409SSimon J. Gerraty * Thus, adding together 9 products of 30-bit integers, with
1210957b409SSimon J. Gerraty * a carry of at most 9663676406, yields an integer that fits
1220957b409SSimon J. Gerraty * on 64 bits and generates a carry of at most 9663676406.
1230957b409SSimon J. Gerraty */
1240957b409SSimon J. Gerraty uint64_t t[17];
1250957b409SSimon J. Gerraty uint64_t cc;
1260957b409SSimon J. Gerraty int i;
1270957b409SSimon J. Gerraty
1280957b409SSimon J. Gerraty t[ 0] = MUL31(a[0], b[0]);
1290957b409SSimon J. Gerraty t[ 1] = MUL31(a[0], b[1])
1300957b409SSimon J. Gerraty + MUL31(a[1], b[0]);
1310957b409SSimon J. Gerraty t[ 2] = MUL31(a[0], b[2])
1320957b409SSimon J. Gerraty + MUL31(a[1], b[1])
1330957b409SSimon J. Gerraty + MUL31(a[2], b[0]);
1340957b409SSimon J. Gerraty t[ 3] = MUL31(a[0], b[3])
1350957b409SSimon J. Gerraty + MUL31(a[1], b[2])
1360957b409SSimon J. Gerraty + MUL31(a[2], b[1])
1370957b409SSimon J. Gerraty + MUL31(a[3], b[0]);
1380957b409SSimon J. Gerraty t[ 4] = MUL31(a[0], b[4])
1390957b409SSimon J. Gerraty + MUL31(a[1], b[3])
1400957b409SSimon J. Gerraty + MUL31(a[2], b[2])
1410957b409SSimon J. Gerraty + MUL31(a[3], b[1])
1420957b409SSimon J. Gerraty + MUL31(a[4], b[0]);
1430957b409SSimon J. Gerraty t[ 5] = MUL31(a[0], b[5])
1440957b409SSimon J. Gerraty + MUL31(a[1], b[4])
1450957b409SSimon J. Gerraty + MUL31(a[2], b[3])
1460957b409SSimon J. Gerraty + MUL31(a[3], b[2])
1470957b409SSimon J. Gerraty + MUL31(a[4], b[1])
1480957b409SSimon J. Gerraty + MUL31(a[5], b[0]);
1490957b409SSimon J. Gerraty t[ 6] = MUL31(a[0], b[6])
1500957b409SSimon J. Gerraty + MUL31(a[1], b[5])
1510957b409SSimon J. Gerraty + MUL31(a[2], b[4])
1520957b409SSimon J. Gerraty + MUL31(a[3], b[3])
1530957b409SSimon J. Gerraty + MUL31(a[4], b[2])
1540957b409SSimon J. Gerraty + MUL31(a[5], b[1])
1550957b409SSimon J. Gerraty + MUL31(a[6], b[0]);
1560957b409SSimon J. Gerraty t[ 7] = MUL31(a[0], b[7])
1570957b409SSimon J. Gerraty + MUL31(a[1], b[6])
1580957b409SSimon J. Gerraty + MUL31(a[2], b[5])
1590957b409SSimon J. Gerraty + MUL31(a[3], b[4])
1600957b409SSimon J. Gerraty + MUL31(a[4], b[3])
1610957b409SSimon J. Gerraty + MUL31(a[5], b[2])
1620957b409SSimon J. Gerraty + MUL31(a[6], b[1])
1630957b409SSimon J. Gerraty + MUL31(a[7], b[0]);
1640957b409SSimon J. Gerraty t[ 8] = MUL31(a[0], b[8])
1650957b409SSimon J. Gerraty + MUL31(a[1], b[7])
1660957b409SSimon J. Gerraty + MUL31(a[2], b[6])
1670957b409SSimon J. Gerraty + MUL31(a[3], b[5])
1680957b409SSimon J. Gerraty + MUL31(a[4], b[4])
1690957b409SSimon J. Gerraty + MUL31(a[5], b[3])
1700957b409SSimon J. Gerraty + MUL31(a[6], b[2])
1710957b409SSimon J. Gerraty + MUL31(a[7], b[1])
1720957b409SSimon J. Gerraty + MUL31(a[8], b[0]);
1730957b409SSimon J. Gerraty t[ 9] = MUL31(a[1], b[8])
1740957b409SSimon J. Gerraty + MUL31(a[2], b[7])
1750957b409SSimon J. Gerraty + MUL31(a[3], b[6])
1760957b409SSimon J. Gerraty + MUL31(a[4], b[5])
1770957b409SSimon J. Gerraty + MUL31(a[5], b[4])
1780957b409SSimon J. Gerraty + MUL31(a[6], b[3])
1790957b409SSimon J. Gerraty + MUL31(a[7], b[2])
1800957b409SSimon J. Gerraty + MUL31(a[8], b[1]);
1810957b409SSimon J. Gerraty t[10] = MUL31(a[2], b[8])
1820957b409SSimon J. Gerraty + MUL31(a[3], b[7])
1830957b409SSimon J. Gerraty + MUL31(a[4], b[6])
1840957b409SSimon J. Gerraty + MUL31(a[5], b[5])
1850957b409SSimon J. Gerraty + MUL31(a[6], b[4])
1860957b409SSimon J. Gerraty + MUL31(a[7], b[3])
1870957b409SSimon J. Gerraty + MUL31(a[8], b[2]);
1880957b409SSimon J. Gerraty t[11] = MUL31(a[3], b[8])
1890957b409SSimon J. Gerraty + MUL31(a[4], b[7])
1900957b409SSimon J. Gerraty + MUL31(a[5], b[6])
1910957b409SSimon J. Gerraty + MUL31(a[6], b[5])
1920957b409SSimon J. Gerraty + MUL31(a[7], b[4])
1930957b409SSimon J. Gerraty + MUL31(a[8], b[3]);
1940957b409SSimon J. Gerraty t[12] = MUL31(a[4], b[8])
1950957b409SSimon J. Gerraty + MUL31(a[5], b[7])
1960957b409SSimon J. Gerraty + MUL31(a[6], b[6])
1970957b409SSimon J. Gerraty + MUL31(a[7], b[5])
1980957b409SSimon J. Gerraty + MUL31(a[8], b[4]);
1990957b409SSimon J. Gerraty t[13] = MUL31(a[5], b[8])
2000957b409SSimon J. Gerraty + MUL31(a[6], b[7])
2010957b409SSimon J. Gerraty + MUL31(a[7], b[6])
2020957b409SSimon J. Gerraty + MUL31(a[8], b[5]);
2030957b409SSimon J. Gerraty t[14] = MUL31(a[6], b[8])
2040957b409SSimon J. Gerraty + MUL31(a[7], b[7])
2050957b409SSimon J. Gerraty + MUL31(a[8], b[6]);
2060957b409SSimon J. Gerraty t[15] = MUL31(a[7], b[8])
2070957b409SSimon J. Gerraty + MUL31(a[8], b[7]);
2080957b409SSimon J. Gerraty t[16] = MUL31(a[8], b[8]);
2090957b409SSimon J. Gerraty
2100957b409SSimon J. Gerraty /*
2110957b409SSimon J. Gerraty * Propagate carries.
2120957b409SSimon J. Gerraty */
2130957b409SSimon J. Gerraty cc = 0;
2140957b409SSimon J. Gerraty for (i = 0; i < 17; i ++) {
2150957b409SSimon J. Gerraty uint64_t w;
2160957b409SSimon J. Gerraty
2170957b409SSimon J. Gerraty w = t[i] + cc;
2180957b409SSimon J. Gerraty d[i] = (uint32_t)w & 0x3FFFFFFF;
2190957b409SSimon J. Gerraty cc = w >> 30;
2200957b409SSimon J. Gerraty }
2210957b409SSimon J. Gerraty d[17] = (uint32_t)cc;
2220957b409SSimon J. Gerraty }
2230957b409SSimon J. Gerraty
2240957b409SSimon J. Gerraty /*
2250957b409SSimon J. Gerraty * Square a 270-bit integer, represented as an array of nine 30-bit words.
2260957b409SSimon J. Gerraty * Result uses 18 words of 30 bits each.
2270957b409SSimon J. Gerraty */
2280957b409SSimon J. Gerraty static void
square9(uint32_t * d,const uint32_t * a)2290957b409SSimon J. Gerraty square9(uint32_t *d, const uint32_t *a)
2300957b409SSimon J. Gerraty {
2310957b409SSimon J. Gerraty uint64_t t[17];
2320957b409SSimon J. Gerraty uint64_t cc;
2330957b409SSimon J. Gerraty int i;
2340957b409SSimon J. Gerraty
2350957b409SSimon J. Gerraty t[ 0] = MUL31(a[0], a[0]);
2360957b409SSimon J. Gerraty t[ 1] = ((MUL31(a[0], a[1])) << 1);
2370957b409SSimon J. Gerraty t[ 2] = MUL31(a[1], a[1])
2380957b409SSimon J. Gerraty + ((MUL31(a[0], a[2])) << 1);
2390957b409SSimon J. Gerraty t[ 3] = ((MUL31(a[0], a[3])
2400957b409SSimon J. Gerraty + MUL31(a[1], a[2])) << 1);
2410957b409SSimon J. Gerraty t[ 4] = MUL31(a[2], a[2])
2420957b409SSimon J. Gerraty + ((MUL31(a[0], a[4])
2430957b409SSimon J. Gerraty + MUL31(a[1], a[3])) << 1);
2440957b409SSimon J. Gerraty t[ 5] = ((MUL31(a[0], a[5])
2450957b409SSimon J. Gerraty + MUL31(a[1], a[4])
2460957b409SSimon J. Gerraty + MUL31(a[2], a[3])) << 1);
2470957b409SSimon J. Gerraty t[ 6] = MUL31(a[3], a[3])
2480957b409SSimon J. Gerraty + ((MUL31(a[0], a[6])
2490957b409SSimon J. Gerraty + MUL31(a[1], a[5])
2500957b409SSimon J. Gerraty + MUL31(a[2], a[4])) << 1);
2510957b409SSimon J. Gerraty t[ 7] = ((MUL31(a[0], a[7])
2520957b409SSimon J. Gerraty + MUL31(a[1], a[6])
2530957b409SSimon J. Gerraty + MUL31(a[2], a[5])
2540957b409SSimon J. Gerraty + MUL31(a[3], a[4])) << 1);
2550957b409SSimon J. Gerraty t[ 8] = MUL31(a[4], a[4])
2560957b409SSimon J. Gerraty + ((MUL31(a[0], a[8])
2570957b409SSimon J. Gerraty + MUL31(a[1], a[7])
2580957b409SSimon J. Gerraty + MUL31(a[2], a[6])
2590957b409SSimon J. Gerraty + MUL31(a[3], a[5])) << 1);
2600957b409SSimon J. Gerraty t[ 9] = ((MUL31(a[1], a[8])
2610957b409SSimon J. Gerraty + MUL31(a[2], a[7])
2620957b409SSimon J. Gerraty + MUL31(a[3], a[6])
2630957b409SSimon J. Gerraty + MUL31(a[4], a[5])) << 1);
2640957b409SSimon J. Gerraty t[10] = MUL31(a[5], a[5])
2650957b409SSimon J. Gerraty + ((MUL31(a[2], a[8])
2660957b409SSimon J. Gerraty + MUL31(a[3], a[7])
2670957b409SSimon J. Gerraty + MUL31(a[4], a[6])) << 1);
2680957b409SSimon J. Gerraty t[11] = ((MUL31(a[3], a[8])
2690957b409SSimon J. Gerraty + MUL31(a[4], a[7])
2700957b409SSimon J. Gerraty + MUL31(a[5], a[6])) << 1);
2710957b409SSimon J. Gerraty t[12] = MUL31(a[6], a[6])
2720957b409SSimon J. Gerraty + ((MUL31(a[4], a[8])
2730957b409SSimon J. Gerraty + MUL31(a[5], a[7])) << 1);
2740957b409SSimon J. Gerraty t[13] = ((MUL31(a[5], a[8])
2750957b409SSimon J. Gerraty + MUL31(a[6], a[7])) << 1);
2760957b409SSimon J. Gerraty t[14] = MUL31(a[7], a[7])
2770957b409SSimon J. Gerraty + ((MUL31(a[6], a[8])) << 1);
2780957b409SSimon J. Gerraty t[15] = ((MUL31(a[7], a[8])) << 1);
2790957b409SSimon J. Gerraty t[16] = MUL31(a[8], a[8]);
2800957b409SSimon J. Gerraty
2810957b409SSimon J. Gerraty /*
2820957b409SSimon J. Gerraty * Propagate carries.
2830957b409SSimon J. Gerraty */
2840957b409SSimon J. Gerraty cc = 0;
2850957b409SSimon J. Gerraty for (i = 0; i < 17; i ++) {
2860957b409SSimon J. Gerraty uint64_t w;
2870957b409SSimon J. Gerraty
2880957b409SSimon J. Gerraty w = t[i] + cc;
2890957b409SSimon J. Gerraty d[i] = (uint32_t)w & 0x3FFFFFFF;
2900957b409SSimon J. Gerraty cc = w >> 30;
2910957b409SSimon J. Gerraty }
2920957b409SSimon J. Gerraty d[17] = (uint32_t)cc;
2930957b409SSimon J. Gerraty }
2940957b409SSimon J. Gerraty
2950957b409SSimon J. Gerraty /*
2960957b409SSimon J. Gerraty * Base field modulus for P-256.
2970957b409SSimon J. Gerraty */
2980957b409SSimon J. Gerraty static const uint32_t F256[] = {
2990957b409SSimon J. Gerraty
3000957b409SSimon J. Gerraty 0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000,
3010957b409SSimon J. Gerraty 0x00000000, 0x00001000, 0x3FFFC000, 0x0000FFFF
3020957b409SSimon J. Gerraty };
3030957b409SSimon J. Gerraty
3040957b409SSimon J. Gerraty /*
3050957b409SSimon J. Gerraty * The 'b' curve equation coefficient for P-256.
3060957b409SSimon J. Gerraty */
3070957b409SSimon J. Gerraty static const uint32_t P256_B[] = {
3080957b409SSimon J. Gerraty
3090957b409SSimon J. Gerraty 0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65,
3100957b409SSimon J. Gerraty 0x2EF555DA, 0x293E7B3E, 0x0D762A8E, 0x00005AC6
3110957b409SSimon J. Gerraty };
3120957b409SSimon J. Gerraty
3130957b409SSimon J. Gerraty /*
3140957b409SSimon J. Gerraty * Addition in the field. Source operands shall fit on 257 bits; output
3150957b409SSimon J. Gerraty * will be lower than twice the modulus.
3160957b409SSimon J. Gerraty */
3170957b409SSimon J. Gerraty static void
add_f256(uint32_t * d,const uint32_t * a,const uint32_t * b)3180957b409SSimon J. Gerraty add_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
3190957b409SSimon J. Gerraty {
3200957b409SSimon J. Gerraty uint32_t w, cc;
3210957b409SSimon J. Gerraty int i;
3220957b409SSimon J. Gerraty
3230957b409SSimon J. Gerraty cc = 0;
3240957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
3250957b409SSimon J. Gerraty w = a[i] + b[i] + cc;
3260957b409SSimon J. Gerraty d[i] = w & 0x3FFFFFFF;
3270957b409SSimon J. Gerraty cc = w >> 30;
3280957b409SSimon J. Gerraty }
3290957b409SSimon J. Gerraty w >>= 16;
3300957b409SSimon J. Gerraty d[8] &= 0xFFFF;
3310957b409SSimon J. Gerraty d[3] -= w << 6;
3320957b409SSimon J. Gerraty d[6] -= w << 12;
3330957b409SSimon J. Gerraty d[7] += w << 14;
3340957b409SSimon J. Gerraty cc = w;
3350957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
3360957b409SSimon J. Gerraty w = d[i] + cc;
3370957b409SSimon J. Gerraty d[i] = w & 0x3FFFFFFF;
3380957b409SSimon J. Gerraty cc = ARSH(w, 30);
3390957b409SSimon J. Gerraty }
3400957b409SSimon J. Gerraty }
3410957b409SSimon J. Gerraty
3420957b409SSimon J. Gerraty /*
3430957b409SSimon J. Gerraty * Subtraction in the field. Source operands shall be smaller than twice
3440957b409SSimon J. Gerraty * the modulus; the result will fulfil the same property.
3450957b409SSimon J. Gerraty */
3460957b409SSimon J. Gerraty static void
sub_f256(uint32_t * d,const uint32_t * a,const uint32_t * b)3470957b409SSimon J. Gerraty sub_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
3480957b409SSimon J. Gerraty {
3490957b409SSimon J. Gerraty uint32_t w, cc;
3500957b409SSimon J. Gerraty int i;
3510957b409SSimon J. Gerraty
3520957b409SSimon J. Gerraty /*
3530957b409SSimon J. Gerraty * We really compute a - b + 2*p to make sure that the result is
3540957b409SSimon J. Gerraty * positive.
3550957b409SSimon J. Gerraty */
3560957b409SSimon J. Gerraty w = a[0] - b[0] - 0x00002;
3570957b409SSimon J. Gerraty d[0] = w & 0x3FFFFFFF;
3580957b409SSimon J. Gerraty w = a[1] - b[1] + ARSH(w, 30);
3590957b409SSimon J. Gerraty d[1] = w & 0x3FFFFFFF;
3600957b409SSimon J. Gerraty w = a[2] - b[2] + ARSH(w, 30);
3610957b409SSimon J. Gerraty d[2] = w & 0x3FFFFFFF;
3620957b409SSimon J. Gerraty w = a[3] - b[3] + ARSH(w, 30) + 0x00080;
3630957b409SSimon J. Gerraty d[3] = w & 0x3FFFFFFF;
3640957b409SSimon J. Gerraty w = a[4] - b[4] + ARSH(w, 30);
3650957b409SSimon J. Gerraty d[4] = w & 0x3FFFFFFF;
3660957b409SSimon J. Gerraty w = a[5] - b[5] + ARSH(w, 30);
3670957b409SSimon J. Gerraty d[5] = w & 0x3FFFFFFF;
3680957b409SSimon J. Gerraty w = a[6] - b[6] + ARSH(w, 30) + 0x02000;
3690957b409SSimon J. Gerraty d[6] = w & 0x3FFFFFFF;
3700957b409SSimon J. Gerraty w = a[7] - b[7] + ARSH(w, 30) - 0x08000;
3710957b409SSimon J. Gerraty d[7] = w & 0x3FFFFFFF;
3720957b409SSimon J. Gerraty w = a[8] - b[8] + ARSH(w, 30) + 0x20000;
3730957b409SSimon J. Gerraty d[8] = w & 0xFFFF;
3740957b409SSimon J. Gerraty w >>= 16;
3750957b409SSimon J. Gerraty d[8] &= 0xFFFF;
3760957b409SSimon J. Gerraty d[3] -= w << 6;
3770957b409SSimon J. Gerraty d[6] -= w << 12;
3780957b409SSimon J. Gerraty d[7] += w << 14;
3790957b409SSimon J. Gerraty cc = w;
3800957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
3810957b409SSimon J. Gerraty w = d[i] + cc;
3820957b409SSimon J. Gerraty d[i] = w & 0x3FFFFFFF;
3830957b409SSimon J. Gerraty cc = ARSH(w, 30);
3840957b409SSimon J. Gerraty }
3850957b409SSimon J. Gerraty }
3860957b409SSimon J. Gerraty
3870957b409SSimon J. Gerraty /*
3880957b409SSimon J. Gerraty * Compute a multiplication in F256. Source operands shall be less than
3890957b409SSimon J. Gerraty * twice the modulus.
3900957b409SSimon J. Gerraty */
3910957b409SSimon J. Gerraty static void
mul_f256(uint32_t * d,const uint32_t * a,const uint32_t * b)3920957b409SSimon J. Gerraty mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
3930957b409SSimon J. Gerraty {
3940957b409SSimon J. Gerraty uint32_t t[18];
3950957b409SSimon J. Gerraty uint64_t s[18];
3960957b409SSimon J. Gerraty uint64_t cc, x;
3970957b409SSimon J. Gerraty uint32_t z, c;
3980957b409SSimon J. Gerraty int i;
3990957b409SSimon J. Gerraty
4000957b409SSimon J. Gerraty mul9(t, a, b);
4010957b409SSimon J. Gerraty
4020957b409SSimon J. Gerraty /*
4030957b409SSimon J. Gerraty * Modular reduction: each high word in added/subtracted where
4040957b409SSimon J. Gerraty * necessary.
4050957b409SSimon J. Gerraty *
4060957b409SSimon J. Gerraty * The modulus is:
4070957b409SSimon J. Gerraty * p = 2^256 - 2^224 + 2^192 + 2^96 - 1
4080957b409SSimon J. Gerraty * Therefore:
4090957b409SSimon J. Gerraty * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
4100957b409SSimon J. Gerraty *
4110957b409SSimon J. Gerraty * For a word x at bit offset n (n >= 256), we have:
4120957b409SSimon J. Gerraty * x*2^n = x*2^(n-32) - x*2^(n-64)
4130957b409SSimon J. Gerraty * - x*2^(n - 160) + x*2^(n-256) mod p
4140957b409SSimon J. Gerraty *
4150957b409SSimon J. Gerraty * Thus, we can nullify the high word if we reinject it at some
4160957b409SSimon J. Gerraty * proper emplacements.
4170957b409SSimon J. Gerraty *
4180957b409SSimon J. Gerraty * We use 64-bit intermediate words to allow for carries to
4190957b409SSimon J. Gerraty * accumulate easily, before performing the final propagation.
4200957b409SSimon J. Gerraty */
4210957b409SSimon J. Gerraty for (i = 0; i < 18; i ++) {
4220957b409SSimon J. Gerraty s[i] = t[i];
4230957b409SSimon J. Gerraty }
4240957b409SSimon J. Gerraty
4250957b409SSimon J. Gerraty for (i = 17; i >= 9; i --) {
4260957b409SSimon J. Gerraty uint64_t y;
4270957b409SSimon J. Gerraty
4280957b409SSimon J. Gerraty y = s[i];
4290957b409SSimon J. Gerraty s[i - 1] += ARSHW(y, 2);
4300957b409SSimon J. Gerraty s[i - 2] += (y << 28) & 0x3FFFFFFF;
4310957b409SSimon J. Gerraty s[i - 2] -= ARSHW(y, 4);
4320957b409SSimon J. Gerraty s[i - 3] -= (y << 26) & 0x3FFFFFFF;
4330957b409SSimon J. Gerraty s[i - 5] -= ARSHW(y, 10);
4340957b409SSimon J. Gerraty s[i - 6] -= (y << 20) & 0x3FFFFFFF;
4350957b409SSimon J. Gerraty s[i - 8] += ARSHW(y, 16);
4360957b409SSimon J. Gerraty s[i - 9] += (y << 14) & 0x3FFFFFFF;
4370957b409SSimon J. Gerraty }
4380957b409SSimon J. Gerraty
4390957b409SSimon J. Gerraty /*
4400957b409SSimon J. Gerraty * Carry propagation must be signed. Moreover, we may have overdone
4410957b409SSimon J. Gerraty * it a bit, and obtain a negative result.
4420957b409SSimon J. Gerraty *
4430957b409SSimon J. Gerraty * The loop above ran 9 times; each time, each word was augmented
4440957b409SSimon J. Gerraty * by at most one extra word (in absolute value). Thus, the top
4450957b409SSimon J. Gerraty * word must in fine fit in 39 bits, so the carry below will fit
4460957b409SSimon J. Gerraty * on 9 bits.
4470957b409SSimon J. Gerraty */
4480957b409SSimon J. Gerraty cc = 0;
4490957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
4500957b409SSimon J. Gerraty x = s[i] + cc;
4510957b409SSimon J. Gerraty d[i] = (uint32_t)x & 0x3FFFFFFF;
4520957b409SSimon J. Gerraty cc = ARSHW(x, 30);
4530957b409SSimon J. Gerraty }
4540957b409SSimon J. Gerraty
4550957b409SSimon J. Gerraty /*
4560957b409SSimon J. Gerraty * All nine words fit on 30 bits, but there may be an extra
4570957b409SSimon J. Gerraty * carry for a few bits (at most 9), and that carry may be
4580957b409SSimon J. Gerraty * negative. Moreover, we want the result to fit on 257 bits.
4590957b409SSimon J. Gerraty * The two lines below ensure that the word in d[] has length
4600957b409SSimon J. Gerraty * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
4610957b409SSimon J. Gerraty * significant length of cc is less than 24 bits, so we will be
4620957b409SSimon J. Gerraty * able to switch to 32-bit operations.
4630957b409SSimon J. Gerraty */
4640957b409SSimon J. Gerraty cc = ARSHW(x, 16);
4650957b409SSimon J. Gerraty d[8] &= 0xFFFF;
4660957b409SSimon J. Gerraty
4670957b409SSimon J. Gerraty /*
4680957b409SSimon J. Gerraty * One extra round of reduction, for cc*2^256, which means
4690957b409SSimon J. Gerraty * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
4700957b409SSimon J. Gerraty * value. If cc is negative, then it may happen (rarely, but
4710957b409SSimon J. Gerraty * not neglectibly so) that the result would be negative. In
4720957b409SSimon J. Gerraty * order to avoid that, if cc is negative, then we add the
4730957b409SSimon J. Gerraty * modulus once. Note that if cc is negative, then propagating
4740957b409SSimon J. Gerraty * that carry must yield a value lower than the modulus, so
4750957b409SSimon J. Gerraty * adding the modulus once will keep the final result under
4760957b409SSimon J. Gerraty * twice the modulus.
4770957b409SSimon J. Gerraty */
4780957b409SSimon J. Gerraty z = (uint32_t)cc;
4790957b409SSimon J. Gerraty d[3] -= z << 6;
4800957b409SSimon J. Gerraty d[6] -= (z << 12) & 0x3FFFFFFF;
4810957b409SSimon J. Gerraty d[7] -= ARSH(z, 18);
4820957b409SSimon J. Gerraty d[7] += (z << 14) & 0x3FFFFFFF;
4830957b409SSimon J. Gerraty d[8] += ARSH(z, 16);
4840957b409SSimon J. Gerraty c = z >> 31;
4850957b409SSimon J. Gerraty d[0] -= c;
4860957b409SSimon J. Gerraty d[3] += c << 6;
4870957b409SSimon J. Gerraty d[6] += c << 12;
4880957b409SSimon J. Gerraty d[7] -= c << 14;
4890957b409SSimon J. Gerraty d[8] += c << 16;
4900957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
4910957b409SSimon J. Gerraty uint32_t w;
4920957b409SSimon J. Gerraty
4930957b409SSimon J. Gerraty w = d[i] + z;
4940957b409SSimon J. Gerraty d[i] = w & 0x3FFFFFFF;
4950957b409SSimon J. Gerraty z = ARSH(w, 30);
4960957b409SSimon J. Gerraty }
4970957b409SSimon J. Gerraty }
4980957b409SSimon J. Gerraty
4990957b409SSimon J. Gerraty /*
5000957b409SSimon J. Gerraty * Compute a square in F256. Source operand shall be less than
5010957b409SSimon J. Gerraty * twice the modulus.
5020957b409SSimon J. Gerraty */
5030957b409SSimon J. Gerraty static void
square_f256(uint32_t * d,const uint32_t * a)5040957b409SSimon J. Gerraty square_f256(uint32_t *d, const uint32_t *a)
5050957b409SSimon J. Gerraty {
5060957b409SSimon J. Gerraty uint32_t t[18];
5070957b409SSimon J. Gerraty uint64_t s[18];
5080957b409SSimon J. Gerraty uint64_t cc, x;
5090957b409SSimon J. Gerraty uint32_t z, c;
5100957b409SSimon J. Gerraty int i;
5110957b409SSimon J. Gerraty
5120957b409SSimon J. Gerraty square9(t, a);
5130957b409SSimon J. Gerraty
5140957b409SSimon J. Gerraty /*
5150957b409SSimon J. Gerraty * Modular reduction: each high word in added/subtracted where
5160957b409SSimon J. Gerraty * necessary.
5170957b409SSimon J. Gerraty *
5180957b409SSimon J. Gerraty * The modulus is:
5190957b409SSimon J. Gerraty * p = 2^256 - 2^224 + 2^192 + 2^96 - 1
5200957b409SSimon J. Gerraty * Therefore:
5210957b409SSimon J. Gerraty * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
5220957b409SSimon J. Gerraty *
5230957b409SSimon J. Gerraty * For a word x at bit offset n (n >= 256), we have:
5240957b409SSimon J. Gerraty * x*2^n = x*2^(n-32) - x*2^(n-64)
5250957b409SSimon J. Gerraty * - x*2^(n - 160) + x*2^(n-256) mod p
5260957b409SSimon J. Gerraty *
5270957b409SSimon J. Gerraty * Thus, we can nullify the high word if we reinject it at some
5280957b409SSimon J. Gerraty * proper emplacements.
5290957b409SSimon J. Gerraty *
5300957b409SSimon J. Gerraty * We use 64-bit intermediate words to allow for carries to
5310957b409SSimon J. Gerraty * accumulate easily, before performing the final propagation.
5320957b409SSimon J. Gerraty */
5330957b409SSimon J. Gerraty for (i = 0; i < 18; i ++) {
5340957b409SSimon J. Gerraty s[i] = t[i];
5350957b409SSimon J. Gerraty }
5360957b409SSimon J. Gerraty
5370957b409SSimon J. Gerraty for (i = 17; i >= 9; i --) {
5380957b409SSimon J. Gerraty uint64_t y;
5390957b409SSimon J. Gerraty
5400957b409SSimon J. Gerraty y = s[i];
5410957b409SSimon J. Gerraty s[i - 1] += ARSHW(y, 2);
5420957b409SSimon J. Gerraty s[i - 2] += (y << 28) & 0x3FFFFFFF;
5430957b409SSimon J. Gerraty s[i - 2] -= ARSHW(y, 4);
5440957b409SSimon J. Gerraty s[i - 3] -= (y << 26) & 0x3FFFFFFF;
5450957b409SSimon J. Gerraty s[i - 5] -= ARSHW(y, 10);
5460957b409SSimon J. Gerraty s[i - 6] -= (y << 20) & 0x3FFFFFFF;
5470957b409SSimon J. Gerraty s[i - 8] += ARSHW(y, 16);
5480957b409SSimon J. Gerraty s[i - 9] += (y << 14) & 0x3FFFFFFF;
5490957b409SSimon J. Gerraty }
5500957b409SSimon J. Gerraty
5510957b409SSimon J. Gerraty /*
5520957b409SSimon J. Gerraty * Carry propagation must be signed. Moreover, we may have overdone
5530957b409SSimon J. Gerraty * it a bit, and obtain a negative result.
5540957b409SSimon J. Gerraty *
5550957b409SSimon J. Gerraty * The loop above ran 9 times; each time, each word was augmented
5560957b409SSimon J. Gerraty * by at most one extra word (in absolute value). Thus, the top
5570957b409SSimon J. Gerraty * word must in fine fit in 39 bits, so the carry below will fit
5580957b409SSimon J. Gerraty * on 9 bits.
5590957b409SSimon J. Gerraty */
5600957b409SSimon J. Gerraty cc = 0;
5610957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
5620957b409SSimon J. Gerraty x = s[i] + cc;
5630957b409SSimon J. Gerraty d[i] = (uint32_t)x & 0x3FFFFFFF;
5640957b409SSimon J. Gerraty cc = ARSHW(x, 30);
5650957b409SSimon J. Gerraty }
5660957b409SSimon J. Gerraty
5670957b409SSimon J. Gerraty /*
5680957b409SSimon J. Gerraty * All nine words fit on 30 bits, but there may be an extra
5690957b409SSimon J. Gerraty * carry for a few bits (at most 9), and that carry may be
5700957b409SSimon J. Gerraty * negative. Moreover, we want the result to fit on 257 bits.
5710957b409SSimon J. Gerraty * The two lines below ensure that the word in d[] has length
5720957b409SSimon J. Gerraty * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
5730957b409SSimon J. Gerraty * significant length of cc is less than 24 bits, so we will be
5740957b409SSimon J. Gerraty * able to switch to 32-bit operations.
5750957b409SSimon J. Gerraty */
5760957b409SSimon J. Gerraty cc = ARSHW(x, 16);
5770957b409SSimon J. Gerraty d[8] &= 0xFFFF;
5780957b409SSimon J. Gerraty
5790957b409SSimon J. Gerraty /*
5800957b409SSimon J. Gerraty * One extra round of reduction, for cc*2^256, which means
5810957b409SSimon J. Gerraty * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
5820957b409SSimon J. Gerraty * value. If cc is negative, then it may happen (rarely, but
5830957b409SSimon J. Gerraty * not neglectibly so) that the result would be negative. In
5840957b409SSimon J. Gerraty * order to avoid that, if cc is negative, then we add the
5850957b409SSimon J. Gerraty * modulus once. Note that if cc is negative, then propagating
5860957b409SSimon J. Gerraty * that carry must yield a value lower than the modulus, so
5870957b409SSimon J. Gerraty * adding the modulus once will keep the final result under
5880957b409SSimon J. Gerraty * twice the modulus.
5890957b409SSimon J. Gerraty */
5900957b409SSimon J. Gerraty z = (uint32_t)cc;
5910957b409SSimon J. Gerraty d[3] -= z << 6;
5920957b409SSimon J. Gerraty d[6] -= (z << 12) & 0x3FFFFFFF;
5930957b409SSimon J. Gerraty d[7] -= ARSH(z, 18);
5940957b409SSimon J. Gerraty d[7] += (z << 14) & 0x3FFFFFFF;
5950957b409SSimon J. Gerraty d[8] += ARSH(z, 16);
5960957b409SSimon J. Gerraty c = z >> 31;
5970957b409SSimon J. Gerraty d[0] -= c;
5980957b409SSimon J. Gerraty d[3] += c << 6;
5990957b409SSimon J. Gerraty d[6] += c << 12;
6000957b409SSimon J. Gerraty d[7] -= c << 14;
6010957b409SSimon J. Gerraty d[8] += c << 16;
6020957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
6030957b409SSimon J. Gerraty uint32_t w;
6040957b409SSimon J. Gerraty
6050957b409SSimon J. Gerraty w = d[i] + z;
6060957b409SSimon J. Gerraty d[i] = w & 0x3FFFFFFF;
6070957b409SSimon J. Gerraty z = ARSH(w, 30);
6080957b409SSimon J. Gerraty }
6090957b409SSimon J. Gerraty }
6100957b409SSimon J. Gerraty
6110957b409SSimon J. Gerraty /*
6120957b409SSimon J. Gerraty * Perform a "final reduction" in field F256 (field for curve P-256).
6130957b409SSimon J. Gerraty * The source value must be less than twice the modulus. If the value
6140957b409SSimon J. Gerraty * is not lower than the modulus, then the modulus is subtracted and
6150957b409SSimon J. Gerraty * this function returns 1; otherwise, it leaves it untouched and it
6160957b409SSimon J. Gerraty * returns 0.
6170957b409SSimon J. Gerraty */
6180957b409SSimon J. Gerraty static uint32_t
reduce_final_f256(uint32_t * d)6190957b409SSimon J. Gerraty reduce_final_f256(uint32_t *d)
6200957b409SSimon J. Gerraty {
6210957b409SSimon J. Gerraty uint32_t t[9];
6220957b409SSimon J. Gerraty uint32_t cc;
6230957b409SSimon J. Gerraty int i;
6240957b409SSimon J. Gerraty
6250957b409SSimon J. Gerraty cc = 0;
6260957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
6270957b409SSimon J. Gerraty uint32_t w;
6280957b409SSimon J. Gerraty
6290957b409SSimon J. Gerraty w = d[i] - F256[i] - cc;
6300957b409SSimon J. Gerraty cc = w >> 31;
6310957b409SSimon J. Gerraty t[i] = w & 0x3FFFFFFF;
6320957b409SSimon J. Gerraty }
6330957b409SSimon J. Gerraty cc ^= 1;
6340957b409SSimon J. Gerraty CCOPY(cc, d, t, sizeof t);
6350957b409SSimon J. Gerraty return cc;
6360957b409SSimon J. Gerraty }
6370957b409SSimon J. Gerraty
6380957b409SSimon J. Gerraty /*
6390957b409SSimon J. Gerraty * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
6400957b409SSimon J. Gerraty * are such that:
6410957b409SSimon J. Gerraty * X = x / z^2
6420957b409SSimon J. Gerraty * Y = y / z^3
6430957b409SSimon J. Gerraty * For the point at infinity, z = 0.
6440957b409SSimon J. Gerraty * Each point thus admits many possible representations.
6450957b409SSimon J. Gerraty *
6460957b409SSimon J. Gerraty * Coordinates are represented in arrays of 32-bit integers, each holding
6470957b409SSimon J. Gerraty * 30 bits of data. Values may also be slightly greater than the modulus,
6480957b409SSimon J. Gerraty * but they will always be lower than twice the modulus.
6490957b409SSimon J. Gerraty */
6500957b409SSimon J. Gerraty typedef struct {
6510957b409SSimon J. Gerraty uint32_t x[9];
6520957b409SSimon J. Gerraty uint32_t y[9];
6530957b409SSimon J. Gerraty uint32_t z[9];
6540957b409SSimon J. Gerraty } p256_jacobian;
6550957b409SSimon J. Gerraty
6560957b409SSimon J. Gerraty /*
6570957b409SSimon J. Gerraty * Convert a point to affine coordinates:
6580957b409SSimon J. Gerraty * - If the point is the point at infinity, then all three coordinates
6590957b409SSimon J. Gerraty * are set to 0.
6600957b409SSimon J. Gerraty * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
6610957b409SSimon J. Gerraty * coordinates are the 'X' and 'Y' affine coordinates.
6620957b409SSimon J. Gerraty * The coordinates are guaranteed to be lower than the modulus.
6630957b409SSimon J. Gerraty */
6640957b409SSimon J. Gerraty static void
p256_to_affine(p256_jacobian * P)6650957b409SSimon J. Gerraty p256_to_affine(p256_jacobian *P)
6660957b409SSimon J. Gerraty {
6670957b409SSimon J. Gerraty uint32_t t1[9], t2[9];
6680957b409SSimon J. Gerraty int i;
6690957b409SSimon J. Gerraty
6700957b409SSimon J. Gerraty /*
6710957b409SSimon J. Gerraty * Invert z with a modular exponentiation: the modulus is
6720957b409SSimon J. Gerraty * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is
6730957b409SSimon J. Gerraty * p-2. Exponent bit pattern (from high to low) is:
6740957b409SSimon J. Gerraty * - 32 bits of value 1
6750957b409SSimon J. Gerraty * - 31 bits of value 0
6760957b409SSimon J. Gerraty * - 1 bit of value 1
6770957b409SSimon J. Gerraty * - 96 bits of value 0
6780957b409SSimon J. Gerraty * - 94 bits of value 1
6790957b409SSimon J. Gerraty * - 1 bit of value 0
6800957b409SSimon J. Gerraty * - 1 bit of value 1
6810957b409SSimon J. Gerraty * Thus, we precompute z^(2^31-1) to speed things up.
6820957b409SSimon J. Gerraty *
6830957b409SSimon J. Gerraty * If z = 0 (point at infinity) then the modular exponentiation
6840957b409SSimon J. Gerraty * will yield 0, which leads to the expected result (all three
6850957b409SSimon J. Gerraty * coordinates set to 0).
6860957b409SSimon J. Gerraty */
6870957b409SSimon J. Gerraty
6880957b409SSimon J. Gerraty /*
6890957b409SSimon J. Gerraty * A simple square-and-multiply for z^(2^31-1). We could save about
6900957b409SSimon J. Gerraty * two dozen multiplications here with an addition chain, but
6910957b409SSimon J. Gerraty * this would require a bit more code, and extra stack buffers.
6920957b409SSimon J. Gerraty */
6930957b409SSimon J. Gerraty memcpy(t1, P->z, sizeof P->z);
6940957b409SSimon J. Gerraty for (i = 0; i < 30; i ++) {
6950957b409SSimon J. Gerraty square_f256(t1, t1);
6960957b409SSimon J. Gerraty mul_f256(t1, t1, P->z);
6970957b409SSimon J. Gerraty }
6980957b409SSimon J. Gerraty
6990957b409SSimon J. Gerraty /*
7000957b409SSimon J. Gerraty * Square-and-multiply. Apart from the squarings, we have a few
7010957b409SSimon J. Gerraty * multiplications to set bits to 1; we multiply by the original z
7020957b409SSimon J. Gerraty * for setting 1 bit, and by t1 for setting 31 bits.
7030957b409SSimon J. Gerraty */
7040957b409SSimon J. Gerraty memcpy(t2, P->z, sizeof P->z);
7050957b409SSimon J. Gerraty for (i = 1; i < 256; i ++) {
7060957b409SSimon J. Gerraty square_f256(t2, t2);
7070957b409SSimon J. Gerraty switch (i) {
7080957b409SSimon J. Gerraty case 31:
7090957b409SSimon J. Gerraty case 190:
7100957b409SSimon J. Gerraty case 221:
7110957b409SSimon J. Gerraty case 252:
7120957b409SSimon J. Gerraty mul_f256(t2, t2, t1);
7130957b409SSimon J. Gerraty break;
7140957b409SSimon J. Gerraty case 63:
7150957b409SSimon J. Gerraty case 253:
7160957b409SSimon J. Gerraty case 255:
7170957b409SSimon J. Gerraty mul_f256(t2, t2, P->z);
7180957b409SSimon J. Gerraty break;
7190957b409SSimon J. Gerraty }
7200957b409SSimon J. Gerraty }
7210957b409SSimon J. Gerraty
7220957b409SSimon J. Gerraty /*
7230957b409SSimon J. Gerraty * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
7240957b409SSimon J. Gerraty */
7250957b409SSimon J. Gerraty mul_f256(t1, t2, t2);
7260957b409SSimon J. Gerraty mul_f256(P->x, t1, P->x);
7270957b409SSimon J. Gerraty mul_f256(t1, t1, t2);
7280957b409SSimon J. Gerraty mul_f256(P->y, t1, P->y);
7290957b409SSimon J. Gerraty reduce_final_f256(P->x);
7300957b409SSimon J. Gerraty reduce_final_f256(P->y);
7310957b409SSimon J. Gerraty
7320957b409SSimon J. Gerraty /*
7330957b409SSimon J. Gerraty * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
7340957b409SSimon J. Gerraty * this will set z to 1.
7350957b409SSimon J. Gerraty */
7360957b409SSimon J. Gerraty mul_f256(P->z, P->z, t2);
7370957b409SSimon J. Gerraty reduce_final_f256(P->z);
7380957b409SSimon J. Gerraty }
7390957b409SSimon J. Gerraty
7400957b409SSimon J. Gerraty /*
7410957b409SSimon J. Gerraty * Double a point in P-256. This function works for all valid points,
7420957b409SSimon J. Gerraty * including the point at infinity.
7430957b409SSimon J. Gerraty */
7440957b409SSimon J. Gerraty static void
p256_double(p256_jacobian * Q)7450957b409SSimon J. Gerraty p256_double(p256_jacobian *Q)
7460957b409SSimon J. Gerraty {
7470957b409SSimon J. Gerraty /*
7480957b409SSimon J. Gerraty * Doubling formulas are:
7490957b409SSimon J. Gerraty *
7500957b409SSimon J. Gerraty * s = 4*x*y^2
7510957b409SSimon J. Gerraty * m = 3*(x + z^2)*(x - z^2)
7520957b409SSimon J. Gerraty * x' = m^2 - 2*s
7530957b409SSimon J. Gerraty * y' = m*(s - x') - 8*y^4
7540957b409SSimon J. Gerraty * z' = 2*y*z
7550957b409SSimon J. Gerraty *
7560957b409SSimon J. Gerraty * These formulas work for all points, including points of order 2
7570957b409SSimon J. Gerraty * and points at infinity:
7580957b409SSimon J. Gerraty * - If y = 0 then z' = 0. But there is no such point in P-256
7590957b409SSimon J. Gerraty * anyway.
7600957b409SSimon J. Gerraty * - If z = 0 then z' = 0.
7610957b409SSimon J. Gerraty */
7620957b409SSimon J. Gerraty uint32_t t1[9], t2[9], t3[9], t4[9];
7630957b409SSimon J. Gerraty
7640957b409SSimon J. Gerraty /*
7650957b409SSimon J. Gerraty * Compute z^2 in t1.
7660957b409SSimon J. Gerraty */
7670957b409SSimon J. Gerraty square_f256(t1, Q->z);
7680957b409SSimon J. Gerraty
7690957b409SSimon J. Gerraty /*
7700957b409SSimon J. Gerraty * Compute x-z^2 in t2 and x+z^2 in t1.
7710957b409SSimon J. Gerraty */
7720957b409SSimon J. Gerraty add_f256(t2, Q->x, t1);
7730957b409SSimon J. Gerraty sub_f256(t1, Q->x, t1);
7740957b409SSimon J. Gerraty
7750957b409SSimon J. Gerraty /*
7760957b409SSimon J. Gerraty * Compute 3*(x+z^2)*(x-z^2) in t1.
7770957b409SSimon J. Gerraty */
7780957b409SSimon J. Gerraty mul_f256(t3, t1, t2);
7790957b409SSimon J. Gerraty add_f256(t1, t3, t3);
7800957b409SSimon J. Gerraty add_f256(t1, t3, t1);
7810957b409SSimon J. Gerraty
7820957b409SSimon J. Gerraty /*
7830957b409SSimon J. Gerraty * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
7840957b409SSimon J. Gerraty */
7850957b409SSimon J. Gerraty square_f256(t3, Q->y);
7860957b409SSimon J. Gerraty add_f256(t3, t3, t3);
7870957b409SSimon J. Gerraty mul_f256(t2, Q->x, t3);
7880957b409SSimon J. Gerraty add_f256(t2, t2, t2);
7890957b409SSimon J. Gerraty
7900957b409SSimon J. Gerraty /*
7910957b409SSimon J. Gerraty * Compute x' = m^2 - 2*s.
7920957b409SSimon J. Gerraty */
7930957b409SSimon J. Gerraty square_f256(Q->x, t1);
7940957b409SSimon J. Gerraty sub_f256(Q->x, Q->x, t2);
7950957b409SSimon J. Gerraty sub_f256(Q->x, Q->x, t2);
7960957b409SSimon J. Gerraty
7970957b409SSimon J. Gerraty /*
7980957b409SSimon J. Gerraty * Compute z' = 2*y*z.
7990957b409SSimon J. Gerraty */
8000957b409SSimon J. Gerraty mul_f256(t4, Q->y, Q->z);
8010957b409SSimon J. Gerraty add_f256(Q->z, t4, t4);
8020957b409SSimon J. Gerraty
8030957b409SSimon J. Gerraty /*
8040957b409SSimon J. Gerraty * Compute y' = m*(s - x') - 8*y^4. Note that we already have
8050957b409SSimon J. Gerraty * 2*y^2 in t3.
8060957b409SSimon J. Gerraty */
8070957b409SSimon J. Gerraty sub_f256(t2, t2, Q->x);
8080957b409SSimon J. Gerraty mul_f256(Q->y, t1, t2);
8090957b409SSimon J. Gerraty square_f256(t4, t3);
8100957b409SSimon J. Gerraty add_f256(t4, t4, t4);
8110957b409SSimon J. Gerraty sub_f256(Q->y, Q->y, t4);
8120957b409SSimon J. Gerraty }
8130957b409SSimon J. Gerraty
8140957b409SSimon J. Gerraty /*
8150957b409SSimon J. Gerraty * Add point P2 to point P1.
8160957b409SSimon J. Gerraty *
8170957b409SSimon J. Gerraty * This function computes the wrong result in the following cases:
8180957b409SSimon J. Gerraty *
8190957b409SSimon J. Gerraty * - If P1 == 0 but P2 != 0
8200957b409SSimon J. Gerraty * - If P1 != 0 but P2 == 0
8210957b409SSimon J. Gerraty * - If P1 == P2
8220957b409SSimon J. Gerraty *
8230957b409SSimon J. Gerraty * In all three cases, P1 is set to the point at infinity.
8240957b409SSimon J. Gerraty *
8250957b409SSimon J. Gerraty * Returned value is 0 if one of the following occurs:
8260957b409SSimon J. Gerraty *
8270957b409SSimon J. Gerraty * - P1 and P2 have the same Y coordinate
8280957b409SSimon J. Gerraty * - P1 == 0 and P2 == 0
8290957b409SSimon J. Gerraty * - The Y coordinate of one of the points is 0 and the other point is
8300957b409SSimon J. Gerraty * the point at infinity.
8310957b409SSimon J. Gerraty *
8320957b409SSimon J. Gerraty * The third case cannot actually happen with valid points, since a point
8330957b409SSimon J. Gerraty * with Y == 0 is a point of order 2, and there is no point of order 2 on
8340957b409SSimon J. Gerraty * curve P-256.
8350957b409SSimon J. Gerraty *
8360957b409SSimon J. Gerraty * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
8370957b409SSimon J. Gerraty * can apply the following:
8380957b409SSimon J. Gerraty *
8390957b409SSimon J. Gerraty * - If the result is not the point at infinity, then it is correct.
8400957b409SSimon J. Gerraty * - Otherwise, if the returned value is 1, then this is a case of
8410957b409SSimon J. Gerraty * P1+P2 == 0, so the result is indeed the point at infinity.
8420957b409SSimon J. Gerraty * - Otherwise, P1 == P2, so a "double" operation should have been
8430957b409SSimon J. Gerraty * performed.
8440957b409SSimon J. Gerraty */
8450957b409SSimon J. Gerraty static uint32_t
p256_add(p256_jacobian * P1,const p256_jacobian * P2)8460957b409SSimon J. Gerraty p256_add(p256_jacobian *P1, const p256_jacobian *P2)
8470957b409SSimon J. Gerraty {
8480957b409SSimon J. Gerraty /*
8490957b409SSimon J. Gerraty * Addtions formulas are:
8500957b409SSimon J. Gerraty *
8510957b409SSimon J. Gerraty * u1 = x1 * z2^2
8520957b409SSimon J. Gerraty * u2 = x2 * z1^2
8530957b409SSimon J. Gerraty * s1 = y1 * z2^3
8540957b409SSimon J. Gerraty * s2 = y2 * z1^3
8550957b409SSimon J. Gerraty * h = u2 - u1
8560957b409SSimon J. Gerraty * r = s2 - s1
8570957b409SSimon J. Gerraty * x3 = r^2 - h^3 - 2 * u1 * h^2
8580957b409SSimon J. Gerraty * y3 = r * (u1 * h^2 - x3) - s1 * h^3
8590957b409SSimon J. Gerraty * z3 = h * z1 * z2
8600957b409SSimon J. Gerraty */
8610957b409SSimon J. Gerraty uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
8620957b409SSimon J. Gerraty uint32_t ret;
8630957b409SSimon J. Gerraty int i;
8640957b409SSimon J. Gerraty
8650957b409SSimon J. Gerraty /*
8660957b409SSimon J. Gerraty * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
8670957b409SSimon J. Gerraty */
8680957b409SSimon J. Gerraty square_f256(t3, P2->z);
8690957b409SSimon J. Gerraty mul_f256(t1, P1->x, t3);
8700957b409SSimon J. Gerraty mul_f256(t4, P2->z, t3);
8710957b409SSimon J. Gerraty mul_f256(t3, P1->y, t4);
8720957b409SSimon J. Gerraty
8730957b409SSimon J. Gerraty /*
8740957b409SSimon J. Gerraty * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
8750957b409SSimon J. Gerraty */
8760957b409SSimon J. Gerraty square_f256(t4, P1->z);
8770957b409SSimon J. Gerraty mul_f256(t2, P2->x, t4);
8780957b409SSimon J. Gerraty mul_f256(t5, P1->z, t4);
8790957b409SSimon J. Gerraty mul_f256(t4, P2->y, t5);
8800957b409SSimon J. Gerraty
8810957b409SSimon J. Gerraty /*
8820957b409SSimon J. Gerraty * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
8830957b409SSimon J. Gerraty * We need to test whether r is zero, so we will do some extra
8840957b409SSimon J. Gerraty * reduce.
8850957b409SSimon J. Gerraty */
8860957b409SSimon J. Gerraty sub_f256(t2, t2, t1);
8870957b409SSimon J. Gerraty sub_f256(t4, t4, t3);
8880957b409SSimon J. Gerraty reduce_final_f256(t4);
8890957b409SSimon J. Gerraty ret = 0;
8900957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
8910957b409SSimon J. Gerraty ret |= t4[i];
8920957b409SSimon J. Gerraty }
8930957b409SSimon J. Gerraty ret = (ret | -ret) >> 31;
8940957b409SSimon J. Gerraty
8950957b409SSimon J. Gerraty /*
8960957b409SSimon J. Gerraty * Compute u1*h^2 (in t6) and h^3 (in t5);
8970957b409SSimon J. Gerraty */
8980957b409SSimon J. Gerraty square_f256(t7, t2);
8990957b409SSimon J. Gerraty mul_f256(t6, t1, t7);
9000957b409SSimon J. Gerraty mul_f256(t5, t7, t2);
9010957b409SSimon J. Gerraty
9020957b409SSimon J. Gerraty /*
9030957b409SSimon J. Gerraty * Compute x3 = r^2 - h^3 - 2*u1*h^2.
9040957b409SSimon J. Gerraty */
9050957b409SSimon J. Gerraty square_f256(P1->x, t4);
9060957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t5);
9070957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t6);
9080957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t6);
9090957b409SSimon J. Gerraty
9100957b409SSimon J. Gerraty /*
9110957b409SSimon J. Gerraty * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
9120957b409SSimon J. Gerraty */
9130957b409SSimon J. Gerraty sub_f256(t6, t6, P1->x);
9140957b409SSimon J. Gerraty mul_f256(P1->y, t4, t6);
9150957b409SSimon J. Gerraty mul_f256(t1, t5, t3);
9160957b409SSimon J. Gerraty sub_f256(P1->y, P1->y, t1);
9170957b409SSimon J. Gerraty
9180957b409SSimon J. Gerraty /*
9190957b409SSimon J. Gerraty * Compute z3 = h*z1*z2.
9200957b409SSimon J. Gerraty */
9210957b409SSimon J. Gerraty mul_f256(t1, P1->z, P2->z);
9220957b409SSimon J. Gerraty mul_f256(P1->z, t1, t2);
9230957b409SSimon J. Gerraty
9240957b409SSimon J. Gerraty return ret;
9250957b409SSimon J. Gerraty }
9260957b409SSimon J. Gerraty
9270957b409SSimon J. Gerraty /*
9280957b409SSimon J. Gerraty * Add point P2 to point P1. This is a specialised function for the
9290957b409SSimon J. Gerraty * case when P2 is a non-zero point in affine coordinate.
9300957b409SSimon J. Gerraty *
9310957b409SSimon J. Gerraty * This function computes the wrong result in the following cases:
9320957b409SSimon J. Gerraty *
9330957b409SSimon J. Gerraty * - If P1 == 0
9340957b409SSimon J. Gerraty * - If P1 == P2
9350957b409SSimon J. Gerraty *
9360957b409SSimon J. Gerraty * In both cases, P1 is set to the point at infinity.
9370957b409SSimon J. Gerraty *
9380957b409SSimon J. Gerraty * Returned value is 0 if one of the following occurs:
9390957b409SSimon J. Gerraty *
9400957b409SSimon J. Gerraty * - P1 and P2 have the same Y coordinate
9410957b409SSimon J. Gerraty * - The Y coordinate of P2 is 0 and P1 is the point at infinity.
9420957b409SSimon J. Gerraty *
9430957b409SSimon J. Gerraty * The second case cannot actually happen with valid points, since a point
9440957b409SSimon J. Gerraty * with Y == 0 is a point of order 2, and there is no point of order 2 on
9450957b409SSimon J. Gerraty * curve P-256.
9460957b409SSimon J. Gerraty *
9470957b409SSimon J. Gerraty * Therefore, assuming that P1 != 0 on input, then the caller
9480957b409SSimon J. Gerraty * can apply the following:
9490957b409SSimon J. Gerraty *
9500957b409SSimon J. Gerraty * - If the result is not the point at infinity, then it is correct.
9510957b409SSimon J. Gerraty * - Otherwise, if the returned value is 1, then this is a case of
9520957b409SSimon J. Gerraty * P1+P2 == 0, so the result is indeed the point at infinity.
9530957b409SSimon J. Gerraty * - Otherwise, P1 == P2, so a "double" operation should have been
9540957b409SSimon J. Gerraty * performed.
9550957b409SSimon J. Gerraty */
9560957b409SSimon J. Gerraty static uint32_t
p256_add_mixed(p256_jacobian * P1,const p256_jacobian * P2)9570957b409SSimon J. Gerraty p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
9580957b409SSimon J. Gerraty {
9590957b409SSimon J. Gerraty /*
9600957b409SSimon J. Gerraty * Addtions formulas are:
9610957b409SSimon J. Gerraty *
9620957b409SSimon J. Gerraty * u1 = x1
9630957b409SSimon J. Gerraty * u2 = x2 * z1^2
9640957b409SSimon J. Gerraty * s1 = y1
9650957b409SSimon J. Gerraty * s2 = y2 * z1^3
9660957b409SSimon J. Gerraty * h = u2 - u1
9670957b409SSimon J. Gerraty * r = s2 - s1
9680957b409SSimon J. Gerraty * x3 = r^2 - h^3 - 2 * u1 * h^2
9690957b409SSimon J. Gerraty * y3 = r * (u1 * h^2 - x3) - s1 * h^3
9700957b409SSimon J. Gerraty * z3 = h * z1
9710957b409SSimon J. Gerraty */
9720957b409SSimon J. Gerraty uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
9730957b409SSimon J. Gerraty uint32_t ret;
9740957b409SSimon J. Gerraty int i;
9750957b409SSimon J. Gerraty
9760957b409SSimon J. Gerraty /*
9770957b409SSimon J. Gerraty * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
9780957b409SSimon J. Gerraty */
9790957b409SSimon J. Gerraty memcpy(t1, P1->x, sizeof t1);
9800957b409SSimon J. Gerraty memcpy(t3, P1->y, sizeof t3);
9810957b409SSimon J. Gerraty
9820957b409SSimon J. Gerraty /*
9830957b409SSimon J. Gerraty * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
9840957b409SSimon J. Gerraty */
9850957b409SSimon J. Gerraty square_f256(t4, P1->z);
9860957b409SSimon J. Gerraty mul_f256(t2, P2->x, t4);
9870957b409SSimon J. Gerraty mul_f256(t5, P1->z, t4);
9880957b409SSimon J. Gerraty mul_f256(t4, P2->y, t5);
9890957b409SSimon J. Gerraty
9900957b409SSimon J. Gerraty /*
9910957b409SSimon J. Gerraty * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
9920957b409SSimon J. Gerraty * We need to test whether r is zero, so we will do some extra
9930957b409SSimon J. Gerraty * reduce.
9940957b409SSimon J. Gerraty */
9950957b409SSimon J. Gerraty sub_f256(t2, t2, t1);
9960957b409SSimon J. Gerraty sub_f256(t4, t4, t3);
9970957b409SSimon J. Gerraty reduce_final_f256(t4);
9980957b409SSimon J. Gerraty ret = 0;
9990957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
10000957b409SSimon J. Gerraty ret |= t4[i];
10010957b409SSimon J. Gerraty }
10020957b409SSimon J. Gerraty ret = (ret | -ret) >> 31;
10030957b409SSimon J. Gerraty
10040957b409SSimon J. Gerraty /*
10050957b409SSimon J. Gerraty * Compute u1*h^2 (in t6) and h^3 (in t5);
10060957b409SSimon J. Gerraty */
10070957b409SSimon J. Gerraty square_f256(t7, t2);
10080957b409SSimon J. Gerraty mul_f256(t6, t1, t7);
10090957b409SSimon J. Gerraty mul_f256(t5, t7, t2);
10100957b409SSimon J. Gerraty
10110957b409SSimon J. Gerraty /*
10120957b409SSimon J. Gerraty * Compute x3 = r^2 - h^3 - 2*u1*h^2.
10130957b409SSimon J. Gerraty */
10140957b409SSimon J. Gerraty square_f256(P1->x, t4);
10150957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t5);
10160957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t6);
10170957b409SSimon J. Gerraty sub_f256(P1->x, P1->x, t6);
10180957b409SSimon J. Gerraty
10190957b409SSimon J. Gerraty /*
10200957b409SSimon J. Gerraty * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
10210957b409SSimon J. Gerraty */
10220957b409SSimon J. Gerraty sub_f256(t6, t6, P1->x);
10230957b409SSimon J. Gerraty mul_f256(P1->y, t4, t6);
10240957b409SSimon J. Gerraty mul_f256(t1, t5, t3);
10250957b409SSimon J. Gerraty sub_f256(P1->y, P1->y, t1);
10260957b409SSimon J. Gerraty
10270957b409SSimon J. Gerraty /*
10280957b409SSimon J. Gerraty * Compute z3 = h*z1*z2.
10290957b409SSimon J. Gerraty */
10300957b409SSimon J. Gerraty mul_f256(P1->z, P1->z, t2);
10310957b409SSimon J. Gerraty
10320957b409SSimon J. Gerraty return ret;
10330957b409SSimon J. Gerraty }
10340957b409SSimon J. Gerraty
10350957b409SSimon J. Gerraty /*
10360957b409SSimon J. Gerraty * Decode a P-256 point. This function does not support the point at
10370957b409SSimon J. Gerraty * infinity. Returned value is 0 if the point is invalid, 1 otherwise.
10380957b409SSimon J. Gerraty */
10390957b409SSimon J. Gerraty static uint32_t
p256_decode(p256_jacobian * P,const void * src,size_t len)10400957b409SSimon J. Gerraty p256_decode(p256_jacobian *P, const void *src, size_t len)
10410957b409SSimon J. Gerraty {
10420957b409SSimon J. Gerraty const unsigned char *buf;
10430957b409SSimon J. Gerraty uint32_t tx[9], ty[9], t1[9], t2[9];
10440957b409SSimon J. Gerraty uint32_t bad;
10450957b409SSimon J. Gerraty int i;
10460957b409SSimon J. Gerraty
10470957b409SSimon J. Gerraty if (len != 65) {
10480957b409SSimon J. Gerraty return 0;
10490957b409SSimon J. Gerraty }
10500957b409SSimon J. Gerraty buf = src;
10510957b409SSimon J. Gerraty
10520957b409SSimon J. Gerraty /*
10530957b409SSimon J. Gerraty * First byte must be 0x04 (uncompressed format). We could support
10540957b409SSimon J. Gerraty * "hybrid format" (first byte is 0x06 or 0x07, and encodes the
10550957b409SSimon J. Gerraty * least significant bit of the Y coordinate), but it is explicitly
10560957b409SSimon J. Gerraty * forbidden by RFC 5480 (section 2.2).
10570957b409SSimon J. Gerraty */
10580957b409SSimon J. Gerraty bad = NEQ(buf[0], 0x04);
10590957b409SSimon J. Gerraty
10600957b409SSimon J. Gerraty /*
10610957b409SSimon J. Gerraty * Decode the coordinates, and check that they are both lower
10620957b409SSimon J. Gerraty * than the modulus.
10630957b409SSimon J. Gerraty */
10640957b409SSimon J. Gerraty tx[8] = be8_to_le30(tx, buf + 1, 32);
10650957b409SSimon J. Gerraty ty[8] = be8_to_le30(ty, buf + 33, 32);
10660957b409SSimon J. Gerraty bad |= reduce_final_f256(tx);
10670957b409SSimon J. Gerraty bad |= reduce_final_f256(ty);
10680957b409SSimon J. Gerraty
10690957b409SSimon J. Gerraty /*
10700957b409SSimon J. Gerraty * Check curve equation.
10710957b409SSimon J. Gerraty */
10720957b409SSimon J. Gerraty square_f256(t1, tx);
10730957b409SSimon J. Gerraty mul_f256(t1, tx, t1);
10740957b409SSimon J. Gerraty square_f256(t2, ty);
10750957b409SSimon J. Gerraty sub_f256(t1, t1, tx);
10760957b409SSimon J. Gerraty sub_f256(t1, t1, tx);
10770957b409SSimon J. Gerraty sub_f256(t1, t1, tx);
10780957b409SSimon J. Gerraty add_f256(t1, t1, P256_B);
10790957b409SSimon J. Gerraty sub_f256(t1, t1, t2);
10800957b409SSimon J. Gerraty reduce_final_f256(t1);
10810957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
10820957b409SSimon J. Gerraty bad |= t1[i];
10830957b409SSimon J. Gerraty }
10840957b409SSimon J. Gerraty
10850957b409SSimon J. Gerraty /*
10860957b409SSimon J. Gerraty * Copy coordinates to the point structure.
10870957b409SSimon J. Gerraty */
10880957b409SSimon J. Gerraty memcpy(P->x, tx, sizeof tx);
10890957b409SSimon J. Gerraty memcpy(P->y, ty, sizeof ty);
10900957b409SSimon J. Gerraty memset(P->z, 0, sizeof P->z);
10910957b409SSimon J. Gerraty P->z[0] = 1;
10920957b409SSimon J. Gerraty return EQ(bad, 0);
10930957b409SSimon J. Gerraty }
10940957b409SSimon J. Gerraty
10950957b409SSimon J. Gerraty /*
10960957b409SSimon J. Gerraty * Encode a point into a buffer. This function assumes that the point is
10970957b409SSimon J. Gerraty * valid, in affine coordinates, and not the point at infinity.
10980957b409SSimon J. Gerraty */
10990957b409SSimon J. Gerraty static void
p256_encode(void * dst,const p256_jacobian * P)11000957b409SSimon J. Gerraty p256_encode(void *dst, const p256_jacobian *P)
11010957b409SSimon J. Gerraty {
11020957b409SSimon J. Gerraty unsigned char *buf;
11030957b409SSimon J. Gerraty
11040957b409SSimon J. Gerraty buf = dst;
11050957b409SSimon J. Gerraty buf[0] = 0x04;
11060957b409SSimon J. Gerraty le30_to_be8(buf + 1, 32, P->x);
11070957b409SSimon J. Gerraty le30_to_be8(buf + 33, 32, P->y);
11080957b409SSimon J. Gerraty }
11090957b409SSimon J. Gerraty
11100957b409SSimon J. Gerraty /*
11110957b409SSimon J. Gerraty * Multiply a curve point by an integer. The integer is assumed to be
11120957b409SSimon J. Gerraty * lower than the curve order, and the base point must not be the point
11130957b409SSimon J. Gerraty * at infinity.
11140957b409SSimon J. Gerraty */
11150957b409SSimon J. Gerraty static void
p256_mul(p256_jacobian * P,const unsigned char * x,size_t xlen)11160957b409SSimon J. Gerraty p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)
11170957b409SSimon J. Gerraty {
11180957b409SSimon J. Gerraty /*
11190957b409SSimon J. Gerraty * qz is a flag that is initially 1, and remains equal to 1
11200957b409SSimon J. Gerraty * as long as the point is the point at infinity.
11210957b409SSimon J. Gerraty *
11220957b409SSimon J. Gerraty * We use a 2-bit window to handle multiplier bits by pairs.
11230957b409SSimon J. Gerraty * The precomputed window really is the points P2 and P3.
11240957b409SSimon J. Gerraty */
11250957b409SSimon J. Gerraty uint32_t qz;
11260957b409SSimon J. Gerraty p256_jacobian P2, P3, Q, T, U;
11270957b409SSimon J. Gerraty
11280957b409SSimon J. Gerraty /*
11290957b409SSimon J. Gerraty * Compute window values.
11300957b409SSimon J. Gerraty */
11310957b409SSimon J. Gerraty P2 = *P;
11320957b409SSimon J. Gerraty p256_double(&P2);
11330957b409SSimon J. Gerraty P3 = *P;
11340957b409SSimon J. Gerraty p256_add(&P3, &P2);
11350957b409SSimon J. Gerraty
11360957b409SSimon J. Gerraty /*
11370957b409SSimon J. Gerraty * We start with Q = 0. We process multiplier bits 2 by 2.
11380957b409SSimon J. Gerraty */
11390957b409SSimon J. Gerraty memset(&Q, 0, sizeof Q);
11400957b409SSimon J. Gerraty qz = 1;
11410957b409SSimon J. Gerraty while (xlen -- > 0) {
11420957b409SSimon J. Gerraty int k;
11430957b409SSimon J. Gerraty
11440957b409SSimon J. Gerraty for (k = 6; k >= 0; k -= 2) {
11450957b409SSimon J. Gerraty uint32_t bits;
11460957b409SSimon J. Gerraty uint32_t bnz;
11470957b409SSimon J. Gerraty
11480957b409SSimon J. Gerraty p256_double(&Q);
11490957b409SSimon J. Gerraty p256_double(&Q);
11500957b409SSimon J. Gerraty T = *P;
11510957b409SSimon J. Gerraty U = Q;
11520957b409SSimon J. Gerraty bits = (*x >> k) & (uint32_t)3;
11530957b409SSimon J. Gerraty bnz = NEQ(bits, 0);
11540957b409SSimon J. Gerraty CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
11550957b409SSimon J. Gerraty CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
11560957b409SSimon J. Gerraty p256_add(&U, &T);
11570957b409SSimon J. Gerraty CCOPY(bnz & qz, &Q, &T, sizeof Q);
11580957b409SSimon J. Gerraty CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
11590957b409SSimon J. Gerraty qz &= ~bnz;
11600957b409SSimon J. Gerraty }
11610957b409SSimon J. Gerraty x ++;
11620957b409SSimon J. Gerraty }
11630957b409SSimon J. Gerraty *P = Q;
11640957b409SSimon J. Gerraty }
11650957b409SSimon J. Gerraty
11660957b409SSimon J. Gerraty /*
11670957b409SSimon J. Gerraty * Precomputed window: k*G points, where G is the curve generator, and k
11680957b409SSimon J. Gerraty * is an integer from 1 to 15 (inclusive). The X and Y coordinates of
11690957b409SSimon J. Gerraty * the point are encoded as 9 words of 30 bits each (little-endian
11700957b409SSimon J. Gerraty * order).
11710957b409SSimon J. Gerraty */
11720957b409SSimon J. Gerraty static const uint32_t Gwin[15][18] = {
11730957b409SSimon J. Gerraty
11740957b409SSimon J. Gerraty { 0x1898C296, 0x1284E517, 0x1EB33A0F, 0x00DF604B,
11750957b409SSimon J. Gerraty 0x2440F277, 0x339B958E, 0x04247F8B, 0x347CB84B,
11760957b409SSimon J. Gerraty 0x00006B17, 0x37BF51F5, 0x2ED901A0, 0x3315ECEC,
11770957b409SSimon J. Gerraty 0x338CD5DA, 0x0F9E162B, 0x1FAD29F0, 0x27F9B8EE,
11780957b409SSimon J. Gerraty 0x10B8BF86, 0x00004FE3 },
11790957b409SSimon J. Gerraty
11800957b409SSimon J. Gerraty { 0x07669978, 0x182D23F1, 0x3F21B35A, 0x225A789D,
11810957b409SSimon J. Gerraty 0x351AC3C0, 0x08E00C12, 0x34F7E8A5, 0x1EC62340,
11820957b409SSimon J. Gerraty 0x00007CF2, 0x227873D1, 0x3812DE74, 0x0E982299,
11830957b409SSimon J. Gerraty 0x1F6B798F, 0x3430DBBA, 0x366B1A7D, 0x2D040293,
11840957b409SSimon J. Gerraty 0x154436E3, 0x00000777 },
11850957b409SSimon J. Gerraty
11860957b409SSimon J. Gerraty { 0x06E7FD6C, 0x2D05986F, 0x3ADA985F, 0x31ADC87B,
11870957b409SSimon J. Gerraty 0x0BF165E6, 0x1FBE5475, 0x30A44C8F, 0x3934698C,
11880957b409SSimon J. Gerraty 0x00005ECB, 0x227D5032, 0x29E6C49E, 0x04FB83D9,
11890957b409SSimon J. Gerraty 0x0AAC0D8E, 0x24A2ECD8, 0x2C1B3869, 0x0FF7E374,
11900957b409SSimon J. Gerraty 0x19031266, 0x00008734 },
11910957b409SSimon J. Gerraty
11920957b409SSimon J. Gerraty { 0x2B030852, 0x024C0911, 0x05596EF5, 0x07F8B6DE,
11930957b409SSimon J. Gerraty 0x262BD003, 0x3779967B, 0x08FBBA02, 0x128D4CB4,
11940957b409SSimon J. Gerraty 0x0000E253, 0x184ED8C6, 0x310B08FC, 0x30EE0055,
11950957b409SSimon J. Gerraty 0x3F25B0FC, 0x062D764E, 0x3FB97F6A, 0x33CC719D,
11960957b409SSimon J. Gerraty 0x15D69318, 0x0000E0F1 },
11970957b409SSimon J. Gerraty
11980957b409SSimon J. Gerraty { 0x03D033ED, 0x05552837, 0x35BE5242, 0x2320BF47,
11990957b409SSimon J. Gerraty 0x268FDFEF, 0x13215821, 0x140D2D78, 0x02DE9454,
12000957b409SSimon J. Gerraty 0x00005159, 0x3DA16DA4, 0x0742ED13, 0x0D80888D,
12010957b409SSimon J. Gerraty 0x004BC035, 0x0A79260D, 0x06FCDAFE, 0x2727D8AE,
12020957b409SSimon J. Gerraty 0x1F6A2412, 0x0000E0C1 },
12030957b409SSimon J. Gerraty
12040957b409SSimon J. Gerraty { 0x3C2291A9, 0x1AC2ABA4, 0x3B215B4C, 0x131D037A,
12050957b409SSimon J. Gerraty 0x17DDE302, 0x0C90B2E2, 0x0602C92D, 0x05CA9DA9,
12060957b409SSimon J. Gerraty 0x0000B01A, 0x0FC77FE2, 0x35F1214E, 0x07E16BDF,
12070957b409SSimon J. Gerraty 0x003DDC07, 0x2703791C, 0x3038B7EE, 0x3DAD56FE,
12080957b409SSimon J. Gerraty 0x041D0C8D, 0x0000E85C },
12090957b409SSimon J. Gerraty
12100957b409SSimon J. Gerraty { 0x3187B2A3, 0x0018A1C0, 0x00FEF5B3, 0x3E7E2E2A,
12110957b409SSimon J. Gerraty 0x01FB607E, 0x2CC199F0, 0x37B4625B, 0x0EDBE82F,
12120957b409SSimon J. Gerraty 0x00008E53, 0x01F400B4, 0x15786A1B, 0x3041B21C,
12130957b409SSimon J. Gerraty 0x31CD8CF2, 0x35900053, 0x1A7E0E9B, 0x318366D0,
12140957b409SSimon J. Gerraty 0x076F780C, 0x000073EB },
12150957b409SSimon J. Gerraty
12160957b409SSimon J. Gerraty { 0x1B6FB393, 0x13767707, 0x3CE97DBB, 0x348E2603,
12170957b409SSimon J. Gerraty 0x354CADC1, 0x09D0B4EA, 0x1B053404, 0x1DE76FBA,
12180957b409SSimon J. Gerraty 0x000062D9, 0x0F09957E, 0x295029A8, 0x3E76A78D,
12190957b409SSimon J. Gerraty 0x3B547DAE, 0x27CEE0A2, 0x0575DC45, 0x1D8244FF,
12200957b409SSimon J. Gerraty 0x332F647A, 0x0000AD5A },
12210957b409SSimon J. Gerraty
12220957b409SSimon J. Gerraty { 0x10949EE0, 0x1E7A292E, 0x06DF8B3D, 0x02B2E30B,
12230957b409SSimon J. Gerraty 0x31F8729E, 0x24E35475, 0x30B71878, 0x35EDBFB7,
12240957b409SSimon J. Gerraty 0x0000EA68, 0x0DD048FA, 0x21688929, 0x0DE823FE,
12250957b409SSimon J. Gerraty 0x1C53FAA9, 0x0EA0C84D, 0x052A592A, 0x1FCE7870,
12260957b409SSimon J. Gerraty 0x11325CB2, 0x00002A27 },
12270957b409SSimon J. Gerraty
12280957b409SSimon J. Gerraty { 0x04C5723F, 0x30D81A50, 0x048306E4, 0x329B11C7,
12290957b409SSimon J. Gerraty 0x223FB545, 0x085347A8, 0x2993E591, 0x1B5ACA8E,
12300957b409SSimon J. Gerraty 0x0000CEF6, 0x04AF0773, 0x28D2EEA9, 0x2751EEEC,
12310957b409SSimon J. Gerraty 0x037B4A7F, 0x3B4C1059, 0x08F37674, 0x2AE906E1,
12320957b409SSimon J. Gerraty 0x18A88A6A, 0x00008786 },
12330957b409SSimon J. Gerraty
12340957b409SSimon J. Gerraty { 0x34BC21D1, 0x0CCE474D, 0x15048BF4, 0x1D0BB409,
12350957b409SSimon J. Gerraty 0x021CDA16, 0x20DE76C3, 0x34C59063, 0x04EDE20E,
12360957b409SSimon J. Gerraty 0x00003ED1, 0x282A3740, 0x0BE3BBF3, 0x29889DAE,
12370957b409SSimon J. Gerraty 0x03413697, 0x34C68A09, 0x210EBE93, 0x0C8A224C,
12380957b409SSimon J. Gerraty 0x0826B331, 0x00009099 },
12390957b409SSimon J. Gerraty
12400957b409SSimon J. Gerraty { 0x0624E3C4, 0x140317BA, 0x2F82C99D, 0x260C0A2C,
12410957b409SSimon J. Gerraty 0x25D55179, 0x194DCC83, 0x3D95E462, 0x356F6A05,
12420957b409SSimon J. Gerraty 0x0000741D, 0x0D4481D3, 0x2657FC8B, 0x1BA5CA71,
12430957b409SSimon J. Gerraty 0x3AE44B0D, 0x07B1548E, 0x0E0D5522, 0x05FDC567,
12440957b409SSimon J. Gerraty 0x2D1AA70E, 0x00000770 },
12450957b409SSimon J. Gerraty
12460957b409SSimon J. Gerraty { 0x06072C01, 0x23857675, 0x1EAD58A9, 0x0B8A12D9,
12470957b409SSimon J. Gerraty 0x1EE2FC79, 0x0177CB61, 0x0495A618, 0x20DEB82B,
12480957b409SSimon J. Gerraty 0x0000177C, 0x2FC7BFD8, 0x310EEF8B, 0x1FB4DF39,
12490957b409SSimon J. Gerraty 0x3B8530E8, 0x0F4E7226, 0x0246B6D0, 0x2A558A24,
12500957b409SSimon J. Gerraty 0x163353AF, 0x000063BB },
12510957b409SSimon J. Gerraty
12520957b409SSimon J. Gerraty { 0x24D2920B, 0x1C249DCC, 0x2069C5E5, 0x09AB2F9E,
12530957b409SSimon J. Gerraty 0x36DF3CF1, 0x1991FD0C, 0x062B97A7, 0x1E80070E,
12540957b409SSimon J. Gerraty 0x000054E7, 0x20D0B375, 0x2E9F20BD, 0x35090081,
12550957b409SSimon J. Gerraty 0x1C7A9DDC, 0x22E7C371, 0x087E3016, 0x03175421,
12560957b409SSimon J. Gerraty 0x3C6ECA7D, 0x0000F599 },
12570957b409SSimon J. Gerraty
12580957b409SSimon J. Gerraty { 0x259B9D5F, 0x0D9A318F, 0x23A0EF16, 0x00EBE4B7,
12590957b409SSimon J. Gerraty 0x088265AE, 0x2CDE2666, 0x2BAE7ADF, 0x1371A5C6,
12600957b409SSimon J. Gerraty 0x0000F045, 0x0D034F36, 0x1F967378, 0x1B5FA3F4,
12610957b409SSimon J. Gerraty 0x0EC8739D, 0x1643E62A, 0x1653947E, 0x22D1F4E6,
12620957b409SSimon J. Gerraty 0x0FB8D64B, 0x0000B5B9 }
12630957b409SSimon J. Gerraty };
12640957b409SSimon J. Gerraty
12650957b409SSimon J. Gerraty /*
12660957b409SSimon J. Gerraty * Lookup one of the Gwin[] values, by index. This is constant-time.
12670957b409SSimon J. Gerraty */
12680957b409SSimon J. Gerraty static void
lookup_Gwin(p256_jacobian * T,uint32_t idx)12690957b409SSimon J. Gerraty lookup_Gwin(p256_jacobian *T, uint32_t idx)
12700957b409SSimon J. Gerraty {
12710957b409SSimon J. Gerraty uint32_t xy[18];
12720957b409SSimon J. Gerraty uint32_t k;
12730957b409SSimon J. Gerraty size_t u;
12740957b409SSimon J. Gerraty
12750957b409SSimon J. Gerraty memset(xy, 0, sizeof xy);
12760957b409SSimon J. Gerraty for (k = 0; k < 15; k ++) {
12770957b409SSimon J. Gerraty uint32_t m;
12780957b409SSimon J. Gerraty
12790957b409SSimon J. Gerraty m = -EQ(idx, k + 1);
12800957b409SSimon J. Gerraty for (u = 0; u < 18; u ++) {
12810957b409SSimon J. Gerraty xy[u] |= m & Gwin[k][u];
12820957b409SSimon J. Gerraty }
12830957b409SSimon J. Gerraty }
12840957b409SSimon J. Gerraty memcpy(T->x, &xy[0], sizeof T->x);
12850957b409SSimon J. Gerraty memcpy(T->y, &xy[9], sizeof T->y);
12860957b409SSimon J. Gerraty memset(T->z, 0, sizeof T->z);
12870957b409SSimon J. Gerraty T->z[0] = 1;
12880957b409SSimon J. Gerraty }
12890957b409SSimon J. Gerraty
12900957b409SSimon J. Gerraty /*
12910957b409SSimon J. Gerraty * Multiply the generator by an integer. The integer is assumed non-zero
12920957b409SSimon J. Gerraty * and lower than the curve order.
12930957b409SSimon J. Gerraty */
12940957b409SSimon J. Gerraty static void
p256_mulgen(p256_jacobian * P,const unsigned char * x,size_t xlen)12950957b409SSimon J. Gerraty p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)
12960957b409SSimon J. Gerraty {
12970957b409SSimon J. Gerraty /*
12980957b409SSimon J. Gerraty * qz is a flag that is initially 1, and remains equal to 1
12990957b409SSimon J. Gerraty * as long as the point is the point at infinity.
13000957b409SSimon J. Gerraty *
13010957b409SSimon J. Gerraty * We use a 4-bit window to handle multiplier bits by groups
13020957b409SSimon J. Gerraty * of 4. The precomputed window is constant static data, with
13030957b409SSimon J. Gerraty * points in affine coordinates; we use a constant-time lookup.
13040957b409SSimon J. Gerraty */
13050957b409SSimon J. Gerraty p256_jacobian Q;
13060957b409SSimon J. Gerraty uint32_t qz;
13070957b409SSimon J. Gerraty
13080957b409SSimon J. Gerraty memset(&Q, 0, sizeof Q);
13090957b409SSimon J. Gerraty qz = 1;
13100957b409SSimon J. Gerraty while (xlen -- > 0) {
13110957b409SSimon J. Gerraty int k;
13120957b409SSimon J. Gerraty unsigned bx;
13130957b409SSimon J. Gerraty
13140957b409SSimon J. Gerraty bx = *x ++;
13150957b409SSimon J. Gerraty for (k = 0; k < 2; k ++) {
13160957b409SSimon J. Gerraty uint32_t bits;
13170957b409SSimon J. Gerraty uint32_t bnz;
13180957b409SSimon J. Gerraty p256_jacobian T, U;
13190957b409SSimon J. Gerraty
13200957b409SSimon J. Gerraty p256_double(&Q);
13210957b409SSimon J. Gerraty p256_double(&Q);
13220957b409SSimon J. Gerraty p256_double(&Q);
13230957b409SSimon J. Gerraty p256_double(&Q);
13240957b409SSimon J. Gerraty bits = (bx >> 4) & 0x0F;
13250957b409SSimon J. Gerraty bnz = NEQ(bits, 0);
13260957b409SSimon J. Gerraty lookup_Gwin(&T, bits);
13270957b409SSimon J. Gerraty U = Q;
13280957b409SSimon J. Gerraty p256_add_mixed(&U, &T);
13290957b409SSimon J. Gerraty CCOPY(bnz & qz, &Q, &T, sizeof Q);
13300957b409SSimon J. Gerraty CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
13310957b409SSimon J. Gerraty qz &= ~bnz;
13320957b409SSimon J. Gerraty bx <<= 4;
13330957b409SSimon J. Gerraty }
13340957b409SSimon J. Gerraty }
13350957b409SSimon J. Gerraty *P = Q;
13360957b409SSimon J. Gerraty }
13370957b409SSimon J. Gerraty
13380957b409SSimon J. Gerraty static const unsigned char P256_G[] = {
13390957b409SSimon J. Gerraty 0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
13400957b409SSimon J. Gerraty 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
13410957b409SSimon J. Gerraty 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
13420957b409SSimon J. Gerraty 0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
13430957b409SSimon J. Gerraty 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
13440957b409SSimon J. Gerraty 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
13450957b409SSimon J. Gerraty 0x68, 0x37, 0xBF, 0x51, 0xF5
13460957b409SSimon J. Gerraty };
13470957b409SSimon J. Gerraty
13480957b409SSimon J. Gerraty static const unsigned char P256_N[] = {
13490957b409SSimon J. Gerraty 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
13500957b409SSimon J. Gerraty 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
13510957b409SSimon J. Gerraty 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
13520957b409SSimon J. Gerraty 0x25, 0x51
13530957b409SSimon J. Gerraty };
13540957b409SSimon J. Gerraty
13550957b409SSimon J. Gerraty static const unsigned char *
api_generator(int curve,size_t * len)13560957b409SSimon J. Gerraty api_generator(int curve, size_t *len)
13570957b409SSimon J. Gerraty {
13580957b409SSimon J. Gerraty (void)curve;
13590957b409SSimon J. Gerraty *len = sizeof P256_G;
13600957b409SSimon J. Gerraty return P256_G;
13610957b409SSimon J. Gerraty }
13620957b409SSimon J. Gerraty
13630957b409SSimon J. Gerraty static const unsigned char *
api_order(int curve,size_t * len)13640957b409SSimon J. Gerraty api_order(int curve, size_t *len)
13650957b409SSimon J. Gerraty {
13660957b409SSimon J. Gerraty (void)curve;
13670957b409SSimon J. Gerraty *len = sizeof P256_N;
13680957b409SSimon J. Gerraty return P256_N;
13690957b409SSimon J. Gerraty }
13700957b409SSimon J. Gerraty
13710957b409SSimon J. Gerraty static size_t
api_xoff(int curve,size_t * len)13720957b409SSimon J. Gerraty api_xoff(int curve, size_t *len)
13730957b409SSimon J. Gerraty {
13740957b409SSimon J. Gerraty (void)curve;
13750957b409SSimon J. Gerraty *len = 32;
13760957b409SSimon J. Gerraty return 1;
13770957b409SSimon J. Gerraty }
13780957b409SSimon J. Gerraty
13790957b409SSimon J. Gerraty static uint32_t
api_mul(unsigned char * G,size_t Glen,const unsigned char * x,size_t xlen,int curve)13800957b409SSimon J. Gerraty api_mul(unsigned char *G, size_t Glen,
13810957b409SSimon J. Gerraty const unsigned char *x, size_t xlen, int curve)
13820957b409SSimon J. Gerraty {
13830957b409SSimon J. Gerraty uint32_t r;
13840957b409SSimon J. Gerraty p256_jacobian P;
13850957b409SSimon J. Gerraty
13860957b409SSimon J. Gerraty (void)curve;
1387*cc9e6590SSimon J. Gerraty if (Glen != 65) {
1388*cc9e6590SSimon J. Gerraty return 0;
1389*cc9e6590SSimon J. Gerraty }
13900957b409SSimon J. Gerraty r = p256_decode(&P, G, Glen);
13910957b409SSimon J. Gerraty p256_mul(&P, x, xlen);
13920957b409SSimon J. Gerraty p256_to_affine(&P);
13930957b409SSimon J. Gerraty p256_encode(G, &P);
13940957b409SSimon J. Gerraty return r;
13950957b409SSimon J. Gerraty }
13960957b409SSimon J. Gerraty
13970957b409SSimon J. Gerraty static size_t
api_mulgen(unsigned char * R,const unsigned char * x,size_t xlen,int curve)13980957b409SSimon J. Gerraty api_mulgen(unsigned char *R,
13990957b409SSimon J. Gerraty const unsigned char *x, size_t xlen, int curve)
14000957b409SSimon J. Gerraty {
14010957b409SSimon J. Gerraty p256_jacobian P;
14020957b409SSimon J. Gerraty
14030957b409SSimon J. Gerraty (void)curve;
14040957b409SSimon J. Gerraty p256_mulgen(&P, x, xlen);
14050957b409SSimon J. Gerraty p256_to_affine(&P);
14060957b409SSimon J. Gerraty p256_encode(R, &P);
14070957b409SSimon J. Gerraty return 65;
14080957b409SSimon J. Gerraty }
14090957b409SSimon J. Gerraty
14100957b409SSimon J. Gerraty static uint32_t
api_muladd(unsigned char * A,const unsigned char * B,size_t len,const unsigned char * x,size_t xlen,const unsigned char * y,size_t ylen,int curve)14110957b409SSimon J. Gerraty api_muladd(unsigned char *A, const unsigned char *B, size_t len,
14120957b409SSimon J. Gerraty const unsigned char *x, size_t xlen,
14130957b409SSimon J. Gerraty const unsigned char *y, size_t ylen, int curve)
14140957b409SSimon J. Gerraty {
14150957b409SSimon J. Gerraty p256_jacobian P, Q;
14160957b409SSimon J. Gerraty uint32_t r, t, z;
14170957b409SSimon J. Gerraty int i;
14180957b409SSimon J. Gerraty
14190957b409SSimon J. Gerraty (void)curve;
1420*cc9e6590SSimon J. Gerraty if (len != 65) {
1421*cc9e6590SSimon J. Gerraty return 0;
1422*cc9e6590SSimon J. Gerraty }
14230957b409SSimon J. Gerraty r = p256_decode(&P, A, len);
14240957b409SSimon J. Gerraty p256_mul(&P, x, xlen);
14250957b409SSimon J. Gerraty if (B == NULL) {
14260957b409SSimon J. Gerraty p256_mulgen(&Q, y, ylen);
14270957b409SSimon J. Gerraty } else {
14280957b409SSimon J. Gerraty r &= p256_decode(&Q, B, len);
14290957b409SSimon J. Gerraty p256_mul(&Q, y, ylen);
14300957b409SSimon J. Gerraty }
14310957b409SSimon J. Gerraty
14320957b409SSimon J. Gerraty /*
14330957b409SSimon J. Gerraty * The final addition may fail in case both points are equal.
14340957b409SSimon J. Gerraty */
14350957b409SSimon J. Gerraty t = p256_add(&P, &Q);
14360957b409SSimon J. Gerraty reduce_final_f256(P.z);
14370957b409SSimon J. Gerraty z = 0;
14380957b409SSimon J. Gerraty for (i = 0; i < 9; i ++) {
14390957b409SSimon J. Gerraty z |= P.z[i];
14400957b409SSimon J. Gerraty }
14410957b409SSimon J. Gerraty z = EQ(z, 0);
14420957b409SSimon J. Gerraty p256_double(&Q);
14430957b409SSimon J. Gerraty
14440957b409SSimon J. Gerraty /*
14450957b409SSimon J. Gerraty * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
14460957b409SSimon J. Gerraty * have the following:
14470957b409SSimon J. Gerraty *
14480957b409SSimon J. Gerraty * z = 0, t = 0 return P (normal addition)
14490957b409SSimon J. Gerraty * z = 0, t = 1 return P (normal addition)
14500957b409SSimon J. Gerraty * z = 1, t = 0 return Q (a 'double' case)
14510957b409SSimon J. Gerraty * z = 1, t = 1 report an error (P+Q = 0)
14520957b409SSimon J. Gerraty */
14530957b409SSimon J. Gerraty CCOPY(z & ~t, &P, &Q, sizeof Q);
14540957b409SSimon J. Gerraty p256_to_affine(&P);
14550957b409SSimon J. Gerraty p256_encode(A, &P);
14560957b409SSimon J. Gerraty r &= ~(z & t);
14570957b409SSimon J. Gerraty return r;
14580957b409SSimon J. Gerraty }
14590957b409SSimon J. Gerraty
14600957b409SSimon J. Gerraty /* see bearssl_ec.h */
14610957b409SSimon J. Gerraty const br_ec_impl br_ec_p256_m31 = {
14620957b409SSimon J. Gerraty (uint32_t)0x00800000,
14630957b409SSimon J. Gerraty &api_generator,
14640957b409SSimon J. Gerraty &api_order,
14650957b409SSimon J. Gerraty &api_xoff,
14660957b409SSimon J. Gerraty &api_mul,
14670957b409SSimon J. Gerraty &api_mulgen,
14680957b409SSimon J. Gerraty &api_muladd
14690957b409SSimon J. Gerraty };
1470