1*f9fbec18Smcpowers /*
2*f9fbec18Smcpowers * ***** BEGIN LICENSE BLOCK *****
3*f9fbec18Smcpowers * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4*f9fbec18Smcpowers *
5*f9fbec18Smcpowers * The contents of this file are subject to the Mozilla Public License Version
6*f9fbec18Smcpowers * 1.1 (the "License"); you may not use this file except in compliance with
7*f9fbec18Smcpowers * the License. You may obtain a copy of the License at
8*f9fbec18Smcpowers * http://www.mozilla.org/MPL/
9*f9fbec18Smcpowers *
10*f9fbec18Smcpowers * Software distributed under the License is distributed on an "AS IS" basis,
11*f9fbec18Smcpowers * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12*f9fbec18Smcpowers * for the specific language governing rights and limitations under the
13*f9fbec18Smcpowers * License.
14*f9fbec18Smcpowers *
15*f9fbec18Smcpowers * The Original Code is the elliptic curve math library for prime field curves.
16*f9fbec18Smcpowers *
17*f9fbec18Smcpowers * The Initial Developer of the Original Code is
18*f9fbec18Smcpowers * Sun Microsystems, Inc.
19*f9fbec18Smcpowers * Portions created by the Initial Developer are Copyright (C) 2003
20*f9fbec18Smcpowers * the Initial Developer. All Rights Reserved.
21*f9fbec18Smcpowers *
22*f9fbec18Smcpowers * Contributor(s):
23*f9fbec18Smcpowers * Douglas Stebila <douglas@stebila.ca>
24*f9fbec18Smcpowers *
25*f9fbec18Smcpowers * Alternatively, the contents of this file may be used under the terms of
26*f9fbec18Smcpowers * either the GNU General Public License Version 2 or later (the "GPL"), or
27*f9fbec18Smcpowers * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28*f9fbec18Smcpowers * in which case the provisions of the GPL or the LGPL are applicable instead
29*f9fbec18Smcpowers * of those above. If you wish to allow use of your version of this file only
30*f9fbec18Smcpowers * under the terms of either the GPL or the LGPL, and not to allow others to
31*f9fbec18Smcpowers * use your version of this file under the terms of the MPL, indicate your
32*f9fbec18Smcpowers * decision by deleting the provisions above and replace them with the notice
33*f9fbec18Smcpowers * and other provisions required by the GPL or the LGPL. If you do not delete
34*f9fbec18Smcpowers * the provisions above, a recipient may use your version of this file under
35*f9fbec18Smcpowers * the terms of any one of the MPL, the GPL or the LGPL.
36*f9fbec18Smcpowers *
37*f9fbec18Smcpowers * ***** END LICENSE BLOCK ***** */
38*f9fbec18Smcpowers /*
39*f9fbec18Smcpowers * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
40*f9fbec18Smcpowers * Use is subject to license terms.
41*f9fbec18Smcpowers *
42*f9fbec18Smcpowers * Sun elects to use this software under the MPL license.
43*f9fbec18Smcpowers */
44*f9fbec18Smcpowers
45*f9fbec18Smcpowers #pragma ident "%Z%%M% %I% %E% SMI"
46*f9fbec18Smcpowers
47*f9fbec18Smcpowers #include "ecp.h"
48*f9fbec18Smcpowers #include "mpi.h"
49*f9fbec18Smcpowers #include "mplogic.h"
50*f9fbec18Smcpowers #include "mpi-priv.h"
51*f9fbec18Smcpowers #ifndef _KERNEL
52*f9fbec18Smcpowers #include <stdlib.h>
53*f9fbec18Smcpowers #endif
54*f9fbec18Smcpowers
55*f9fbec18Smcpowers /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r.
56*f9fbec18Smcpowers * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
57*f9fbec18Smcpowers * Elliptic Curve Cryptography. */
58*f9fbec18Smcpowers mp_err
ec_GFp_nistp256_mod(const mp_int * a,mp_int * r,const GFMethod * meth)59*f9fbec18Smcpowers ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
60*f9fbec18Smcpowers {
61*f9fbec18Smcpowers mp_err res = MP_OKAY;
62*f9fbec18Smcpowers mp_size a_used = MP_USED(a);
63*f9fbec18Smcpowers int a_bits = mpl_significant_bits(a);
64*f9fbec18Smcpowers mp_digit carry;
65*f9fbec18Smcpowers
66*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT
67*f9fbec18Smcpowers mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0;
68*f9fbec18Smcpowers mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
69*f9fbec18Smcpowers int r8; /* must be a signed value ! */
70*f9fbec18Smcpowers #else
71*f9fbec18Smcpowers mp_digit a4=0, a5=0, a6=0, a7=0;
72*f9fbec18Smcpowers mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
73*f9fbec18Smcpowers mp_digit r0, r1, r2, r3;
74*f9fbec18Smcpowers int r4; /* must be a signed value ! */
75*f9fbec18Smcpowers #endif
76*f9fbec18Smcpowers /* for polynomials larger than twice the field size
77*f9fbec18Smcpowers * use regular reduction */
78*f9fbec18Smcpowers if (a_bits < 256) {
79*f9fbec18Smcpowers if (a == r) return MP_OKAY;
80*f9fbec18Smcpowers return mp_copy(a,r);
81*f9fbec18Smcpowers }
82*f9fbec18Smcpowers if (a_bits > 512) {
83*f9fbec18Smcpowers MP_CHECKOK(mp_mod(a, &meth->irr, r));
84*f9fbec18Smcpowers } else {
85*f9fbec18Smcpowers
86*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT
87*f9fbec18Smcpowers switch (a_used) {
88*f9fbec18Smcpowers case 16:
89*f9fbec18Smcpowers a15 = MP_DIGIT(a,15);
90*f9fbec18Smcpowers case 15:
91*f9fbec18Smcpowers a14 = MP_DIGIT(a,14);
92*f9fbec18Smcpowers case 14:
93*f9fbec18Smcpowers a13 = MP_DIGIT(a,13);
94*f9fbec18Smcpowers case 13:
95*f9fbec18Smcpowers a12 = MP_DIGIT(a,12);
96*f9fbec18Smcpowers case 12:
97*f9fbec18Smcpowers a11 = MP_DIGIT(a,11);
98*f9fbec18Smcpowers case 11:
99*f9fbec18Smcpowers a10 = MP_DIGIT(a,10);
100*f9fbec18Smcpowers case 10:
101*f9fbec18Smcpowers a9 = MP_DIGIT(a,9);
102*f9fbec18Smcpowers case 9:
103*f9fbec18Smcpowers a8 = MP_DIGIT(a,8);
104*f9fbec18Smcpowers }
105*f9fbec18Smcpowers
106*f9fbec18Smcpowers r0 = MP_DIGIT(a,0);
107*f9fbec18Smcpowers r1 = MP_DIGIT(a,1);
108*f9fbec18Smcpowers r2 = MP_DIGIT(a,2);
109*f9fbec18Smcpowers r3 = MP_DIGIT(a,3);
110*f9fbec18Smcpowers r4 = MP_DIGIT(a,4);
111*f9fbec18Smcpowers r5 = MP_DIGIT(a,5);
112*f9fbec18Smcpowers r6 = MP_DIGIT(a,6);
113*f9fbec18Smcpowers r7 = MP_DIGIT(a,7);
114*f9fbec18Smcpowers
115*f9fbec18Smcpowers /* sum 1 */
116*f9fbec18Smcpowers MP_ADD_CARRY(r3, a11, r3, 0, carry);
117*f9fbec18Smcpowers MP_ADD_CARRY(r4, a12, r4, carry, carry);
118*f9fbec18Smcpowers MP_ADD_CARRY(r5, a13, r5, carry, carry);
119*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, carry, carry);
120*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry);
121*f9fbec18Smcpowers r8 = carry;
122*f9fbec18Smcpowers MP_ADD_CARRY(r3, a11, r3, 0, carry);
123*f9fbec18Smcpowers MP_ADD_CARRY(r4, a12, r4, carry, carry);
124*f9fbec18Smcpowers MP_ADD_CARRY(r5, a13, r5, carry, carry);
125*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, carry, carry);
126*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry);
127*f9fbec18Smcpowers r8 += carry;
128*f9fbec18Smcpowers /* sum 2 */
129*f9fbec18Smcpowers MP_ADD_CARRY(r3, a12, r3, 0, carry);
130*f9fbec18Smcpowers MP_ADD_CARRY(r4, a13, r4, carry, carry);
131*f9fbec18Smcpowers MP_ADD_CARRY(r5, a14, r5, carry, carry);
132*f9fbec18Smcpowers MP_ADD_CARRY(r6, a15, r6, carry, carry);
133*f9fbec18Smcpowers MP_ADD_CARRY(r7, 0, r7, carry, carry);
134*f9fbec18Smcpowers r8 += carry;
135*f9fbec18Smcpowers /* combine last bottom of sum 3 with second sum 2 */
136*f9fbec18Smcpowers MP_ADD_CARRY(r0, a8, r0, 0, carry);
137*f9fbec18Smcpowers MP_ADD_CARRY(r1, a9, r1, carry, carry);
138*f9fbec18Smcpowers MP_ADD_CARRY(r2, a10, r2, carry, carry);
139*f9fbec18Smcpowers MP_ADD_CARRY(r3, a12, r3, carry, carry);
140*f9fbec18Smcpowers MP_ADD_CARRY(r4, a13, r4, carry, carry);
141*f9fbec18Smcpowers MP_ADD_CARRY(r5, a14, r5, carry, carry);
142*f9fbec18Smcpowers MP_ADD_CARRY(r6, a15, r6, carry, carry);
143*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */
144*f9fbec18Smcpowers r8 += carry;
145*f9fbec18Smcpowers /* sum 3 (rest of it)*/
146*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, 0, carry);
147*f9fbec18Smcpowers MP_ADD_CARRY(r7, 0, r7, carry, carry);
148*f9fbec18Smcpowers r8 += carry;
149*f9fbec18Smcpowers /* sum 4 (rest of it)*/
150*f9fbec18Smcpowers MP_ADD_CARRY(r0, a9, r0, 0, carry);
151*f9fbec18Smcpowers MP_ADD_CARRY(r1, a10, r1, carry, carry);
152*f9fbec18Smcpowers MP_ADD_CARRY(r2, a11, r2, carry, carry);
153*f9fbec18Smcpowers MP_ADD_CARRY(r3, a13, r3, carry, carry);
154*f9fbec18Smcpowers MP_ADD_CARRY(r4, a14, r4, carry, carry);
155*f9fbec18Smcpowers MP_ADD_CARRY(r5, a15, r5, carry, carry);
156*f9fbec18Smcpowers MP_ADD_CARRY(r6, a13, r6, carry, carry);
157*f9fbec18Smcpowers MP_ADD_CARRY(r7, a8, r7, carry, carry);
158*f9fbec18Smcpowers r8 += carry;
159*f9fbec18Smcpowers /* diff 5 */
160*f9fbec18Smcpowers MP_SUB_BORROW(r0, a11, r0, 0, carry);
161*f9fbec18Smcpowers MP_SUB_BORROW(r1, a12, r1, carry, carry);
162*f9fbec18Smcpowers MP_SUB_BORROW(r2, a13, r2, carry, carry);
163*f9fbec18Smcpowers MP_SUB_BORROW(r3, 0, r3, carry, carry);
164*f9fbec18Smcpowers MP_SUB_BORROW(r4, 0, r4, carry, carry);
165*f9fbec18Smcpowers MP_SUB_BORROW(r5, 0, r5, carry, carry);
166*f9fbec18Smcpowers MP_SUB_BORROW(r6, a8, r6, carry, carry);
167*f9fbec18Smcpowers MP_SUB_BORROW(r7, a10, r7, carry, carry);
168*f9fbec18Smcpowers r8 -= carry;
169*f9fbec18Smcpowers /* diff 6 */
170*f9fbec18Smcpowers MP_SUB_BORROW(r0, a12, r0, 0, carry);
171*f9fbec18Smcpowers MP_SUB_BORROW(r1, a13, r1, carry, carry);
172*f9fbec18Smcpowers MP_SUB_BORROW(r2, a14, r2, carry, carry);
173*f9fbec18Smcpowers MP_SUB_BORROW(r3, a15, r3, carry, carry);
174*f9fbec18Smcpowers MP_SUB_BORROW(r4, 0, r4, carry, carry);
175*f9fbec18Smcpowers MP_SUB_BORROW(r5, 0, r5, carry, carry);
176*f9fbec18Smcpowers MP_SUB_BORROW(r6, a9, r6, carry, carry);
177*f9fbec18Smcpowers MP_SUB_BORROW(r7, a11, r7, carry, carry);
178*f9fbec18Smcpowers r8 -= carry;
179*f9fbec18Smcpowers /* diff 7 */
180*f9fbec18Smcpowers MP_SUB_BORROW(r0, a13, r0, 0, carry);
181*f9fbec18Smcpowers MP_SUB_BORROW(r1, a14, r1, carry, carry);
182*f9fbec18Smcpowers MP_SUB_BORROW(r2, a15, r2, carry, carry);
183*f9fbec18Smcpowers MP_SUB_BORROW(r3, a8, r3, carry, carry);
184*f9fbec18Smcpowers MP_SUB_BORROW(r4, a9, r4, carry, carry);
185*f9fbec18Smcpowers MP_SUB_BORROW(r5, a10, r5, carry, carry);
186*f9fbec18Smcpowers MP_SUB_BORROW(r6, 0, r6, carry, carry);
187*f9fbec18Smcpowers MP_SUB_BORROW(r7, a12, r7, carry, carry);
188*f9fbec18Smcpowers r8 -= carry;
189*f9fbec18Smcpowers /* diff 8 */
190*f9fbec18Smcpowers MP_SUB_BORROW(r0, a14, r0, 0, carry);
191*f9fbec18Smcpowers MP_SUB_BORROW(r1, a15, r1, carry, carry);
192*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry);
193*f9fbec18Smcpowers MP_SUB_BORROW(r3, a9, r3, carry, carry);
194*f9fbec18Smcpowers MP_SUB_BORROW(r4, a10, r4, carry, carry);
195*f9fbec18Smcpowers MP_SUB_BORROW(r5, a11, r5, carry, carry);
196*f9fbec18Smcpowers MP_SUB_BORROW(r6, 0, r6, carry, carry);
197*f9fbec18Smcpowers MP_SUB_BORROW(r7, a13, r7, carry, carry);
198*f9fbec18Smcpowers r8 -= carry;
199*f9fbec18Smcpowers
200*f9fbec18Smcpowers /* reduce the overflows */
201*f9fbec18Smcpowers while (r8 > 0) {
202*f9fbec18Smcpowers mp_digit r8_d = r8;
203*f9fbec18Smcpowers MP_ADD_CARRY(r0, r8_d, r0, 0, carry);
204*f9fbec18Smcpowers MP_ADD_CARRY(r1, 0, r1, carry, carry);
205*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry);
206*f9fbec18Smcpowers MP_ADD_CARRY(r3, -r8_d, r3, carry, carry);
207*f9fbec18Smcpowers MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry);
208*f9fbec18Smcpowers MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry);
209*f9fbec18Smcpowers MP_ADD_CARRY(r6, -(r8_d+1), r6, carry, carry);
210*f9fbec18Smcpowers MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry);
211*f9fbec18Smcpowers r8 = carry;
212*f9fbec18Smcpowers }
213*f9fbec18Smcpowers
214*f9fbec18Smcpowers /* reduce the underflows */
215*f9fbec18Smcpowers while (r8 < 0) {
216*f9fbec18Smcpowers mp_digit r8_d = -r8;
217*f9fbec18Smcpowers MP_SUB_BORROW(r0, r8_d, r0, 0, carry);
218*f9fbec18Smcpowers MP_SUB_BORROW(r1, 0, r1, carry, carry);
219*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry);
220*f9fbec18Smcpowers MP_SUB_BORROW(r3, -r8_d, r3, carry, carry);
221*f9fbec18Smcpowers MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry);
222*f9fbec18Smcpowers MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry);
223*f9fbec18Smcpowers MP_SUB_BORROW(r6, -(r8_d+1), r6, carry, carry);
224*f9fbec18Smcpowers MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry);
225*f9fbec18Smcpowers r8 = -carry;
226*f9fbec18Smcpowers }
227*f9fbec18Smcpowers if (a != r) {
228*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r,8));
229*f9fbec18Smcpowers }
230*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS;
231*f9fbec18Smcpowers MP_USED(r) = 8;
232*f9fbec18Smcpowers
233*f9fbec18Smcpowers MP_DIGIT(r,7) = r7;
234*f9fbec18Smcpowers MP_DIGIT(r,6) = r6;
235*f9fbec18Smcpowers MP_DIGIT(r,5) = r5;
236*f9fbec18Smcpowers MP_DIGIT(r,4) = r4;
237*f9fbec18Smcpowers MP_DIGIT(r,3) = r3;
238*f9fbec18Smcpowers MP_DIGIT(r,2) = r2;
239*f9fbec18Smcpowers MP_DIGIT(r,1) = r1;
240*f9fbec18Smcpowers MP_DIGIT(r,0) = r0;
241*f9fbec18Smcpowers
242*f9fbec18Smcpowers /* final reduction if necessary */
243*f9fbec18Smcpowers if ((r7 == MP_DIGIT_MAX) &&
244*f9fbec18Smcpowers ((r6 > 1) || ((r6 == 1) &&
245*f9fbec18Smcpowers (r5 || r4 || r3 ||
246*f9fbec18Smcpowers ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX)
247*f9fbec18Smcpowers && (r0 == MP_DIGIT_MAX)))))) {
248*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r));
249*f9fbec18Smcpowers }
250*f9fbec18Smcpowers #ifdef notdef
251*f9fbec18Smcpowers
252*f9fbec18Smcpowers
253*f9fbec18Smcpowers /* smooth the negatives */
254*f9fbec18Smcpowers while (MP_SIGN(r) != MP_ZPOS) {
255*f9fbec18Smcpowers MP_CHECKOK(mp_add(r, &meth->irr, r));
256*f9fbec18Smcpowers }
257*f9fbec18Smcpowers while (MP_USED(r) > 8) {
258*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r));
259*f9fbec18Smcpowers }
260*f9fbec18Smcpowers
261*f9fbec18Smcpowers /* final reduction if necessary */
262*f9fbec18Smcpowers if (MP_DIGIT(r,7) >= MP_DIGIT(&meth->irr,7)) {
263*f9fbec18Smcpowers if (mp_cmp(r,&meth->irr) != MP_LT) {
264*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r));
265*f9fbec18Smcpowers }
266*f9fbec18Smcpowers }
267*f9fbec18Smcpowers #endif
268*f9fbec18Smcpowers s_mp_clamp(r);
269*f9fbec18Smcpowers #else
270*f9fbec18Smcpowers switch (a_used) {
271*f9fbec18Smcpowers case 8:
272*f9fbec18Smcpowers a7 = MP_DIGIT(a,7);
273*f9fbec18Smcpowers case 7:
274*f9fbec18Smcpowers a6 = MP_DIGIT(a,6);
275*f9fbec18Smcpowers case 6:
276*f9fbec18Smcpowers a5 = MP_DIGIT(a,5);
277*f9fbec18Smcpowers case 5:
278*f9fbec18Smcpowers a4 = MP_DIGIT(a,4);
279*f9fbec18Smcpowers }
280*f9fbec18Smcpowers a7l = a7 << 32;
281*f9fbec18Smcpowers a7h = a7 >> 32;
282*f9fbec18Smcpowers a6l = a6 << 32;
283*f9fbec18Smcpowers a6h = a6 >> 32;
284*f9fbec18Smcpowers a5l = a5 << 32;
285*f9fbec18Smcpowers a5h = a5 >> 32;
286*f9fbec18Smcpowers a4l = a4 << 32;
287*f9fbec18Smcpowers a4h = a4 >> 32;
288*f9fbec18Smcpowers r3 = MP_DIGIT(a,3);
289*f9fbec18Smcpowers r2 = MP_DIGIT(a,2);
290*f9fbec18Smcpowers r1 = MP_DIGIT(a,1);
291*f9fbec18Smcpowers r0 = MP_DIGIT(a,0);
292*f9fbec18Smcpowers
293*f9fbec18Smcpowers /* sum 1 */
294*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry);
295*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6, r2, carry, carry);
296*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry);
297*f9fbec18Smcpowers r4 = carry;
298*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry);
299*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6, r2, carry, carry);
300*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry);
301*f9fbec18Smcpowers r4 += carry;
302*f9fbec18Smcpowers /* sum 2 */
303*f9fbec18Smcpowers MP_ADD_CARRY(r1, a6l, r1, 0, carry);
304*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
305*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7h, r3, carry, carry);
306*f9fbec18Smcpowers r4 += carry;
307*f9fbec18Smcpowers MP_ADD_CARRY(r1, a6l, r1, 0, carry);
308*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry);
309*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7h, r3, carry, carry);
310*f9fbec18Smcpowers r4 += carry;
311*f9fbec18Smcpowers
312*f9fbec18Smcpowers /* sum 3 */
313*f9fbec18Smcpowers MP_ADD_CARRY(r0, a4, r0, 0, carry);
314*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry);
315*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry);
316*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry);
317*f9fbec18Smcpowers r4 += carry;
318*f9fbec18Smcpowers /* sum 4 */
319*f9fbec18Smcpowers MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry);
320*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry);
321*f9fbec18Smcpowers MP_ADD_CARRY(r2, a7, r2, carry, carry);
322*f9fbec18Smcpowers MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry);
323*f9fbec18Smcpowers r4 += carry;
324*f9fbec18Smcpowers /* diff 5 */
325*f9fbec18Smcpowers MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry);
326*f9fbec18Smcpowers MP_SUB_BORROW(r1, a6h, r1, carry, carry);
327*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry);
328*f9fbec18Smcpowers MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry);
329*f9fbec18Smcpowers r4 -= carry;
330*f9fbec18Smcpowers /* diff 6 */
331*f9fbec18Smcpowers MP_SUB_BORROW(r0, a6, r0, 0, carry);
332*f9fbec18Smcpowers MP_SUB_BORROW(r1, a7, r1, carry, carry);
333*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry);
334*f9fbec18Smcpowers MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry);
335*f9fbec18Smcpowers r4 -= carry;
336*f9fbec18Smcpowers /* diff 7 */
337*f9fbec18Smcpowers MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry);
338*f9fbec18Smcpowers MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry);
339*f9fbec18Smcpowers MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry);
340*f9fbec18Smcpowers MP_SUB_BORROW(r3, a6l, r3, carry, carry);
341*f9fbec18Smcpowers r4 -= carry;
342*f9fbec18Smcpowers /* diff 8 */
343*f9fbec18Smcpowers MP_SUB_BORROW(r0, a7, r0, 0, carry);
344*f9fbec18Smcpowers MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry);
345*f9fbec18Smcpowers MP_SUB_BORROW(r2, a5, r2, carry, carry);
346*f9fbec18Smcpowers MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry);
347*f9fbec18Smcpowers r4 -= carry;
348*f9fbec18Smcpowers
349*f9fbec18Smcpowers /* reduce the overflows */
350*f9fbec18Smcpowers while (r4 > 0) {
351*f9fbec18Smcpowers mp_digit r4_long = r4;
352*f9fbec18Smcpowers mp_digit r4l = (r4_long << 32);
353*f9fbec18Smcpowers MP_ADD_CARRY(r0, r4_long, r0, 0, carry);
354*f9fbec18Smcpowers MP_ADD_CARRY(r1, -r4l, r1, carry, carry);
355*f9fbec18Smcpowers MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry);
356*f9fbec18Smcpowers MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry);
357*f9fbec18Smcpowers r4 = carry;
358*f9fbec18Smcpowers }
359*f9fbec18Smcpowers
360*f9fbec18Smcpowers /* reduce the underflows */
361*f9fbec18Smcpowers while (r4 < 0) {
362*f9fbec18Smcpowers mp_digit r4_long = -r4;
363*f9fbec18Smcpowers mp_digit r4l = (r4_long << 32);
364*f9fbec18Smcpowers MP_SUB_BORROW(r0, r4_long, r0, 0, carry);
365*f9fbec18Smcpowers MP_SUB_BORROW(r1, -r4l, r1, carry, carry);
366*f9fbec18Smcpowers MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry);
367*f9fbec18Smcpowers MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry);
368*f9fbec18Smcpowers r4 = -carry;
369*f9fbec18Smcpowers }
370*f9fbec18Smcpowers
371*f9fbec18Smcpowers if (a != r) {
372*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r,4));
373*f9fbec18Smcpowers }
374*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS;
375*f9fbec18Smcpowers MP_USED(r) = 4;
376*f9fbec18Smcpowers
377*f9fbec18Smcpowers MP_DIGIT(r,3) = r3;
378*f9fbec18Smcpowers MP_DIGIT(r,2) = r2;
379*f9fbec18Smcpowers MP_DIGIT(r,1) = r1;
380*f9fbec18Smcpowers MP_DIGIT(r,0) = r0;
381*f9fbec18Smcpowers
382*f9fbec18Smcpowers /* final reduction if necessary */
383*f9fbec18Smcpowers if ((r3 > 0xFFFFFFFF00000001ULL) ||
384*f9fbec18Smcpowers ((r3 == 0xFFFFFFFF00000001ULL) &&
385*f9fbec18Smcpowers (r2 || (r1 >> 32)||
386*f9fbec18Smcpowers (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
387*f9fbec18Smcpowers /* very rare, just use mp_sub */
388*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r));
389*f9fbec18Smcpowers }
390*f9fbec18Smcpowers
391*f9fbec18Smcpowers s_mp_clamp(r);
392*f9fbec18Smcpowers #endif
393*f9fbec18Smcpowers }
394*f9fbec18Smcpowers
395*f9fbec18Smcpowers CLEANUP:
396*f9fbec18Smcpowers return res;
397*f9fbec18Smcpowers }
398*f9fbec18Smcpowers
399*f9fbec18Smcpowers /* Compute the square of polynomial a, reduce modulo p256. Store the
400*f9fbec18Smcpowers * result in r. r could be a. Uses optimized modular reduction for p256.
401*f9fbec18Smcpowers */
402*f9fbec18Smcpowers mp_err
ec_GFp_nistp256_sqr(const mp_int * a,mp_int * r,const GFMethod * meth)403*f9fbec18Smcpowers ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
404*f9fbec18Smcpowers {
405*f9fbec18Smcpowers mp_err res = MP_OKAY;
406*f9fbec18Smcpowers
407*f9fbec18Smcpowers MP_CHECKOK(mp_sqr(a, r));
408*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
409*f9fbec18Smcpowers CLEANUP:
410*f9fbec18Smcpowers return res;
411*f9fbec18Smcpowers }
412*f9fbec18Smcpowers
413*f9fbec18Smcpowers /* Compute the product of two polynomials a and b, reduce modulo p256.
414*f9fbec18Smcpowers * Store the result in r. r could be a or b; a could be b. Uses
415*f9fbec18Smcpowers * optimized modular reduction for p256. */
416*f9fbec18Smcpowers mp_err
ec_GFp_nistp256_mul(const mp_int * a,const mp_int * b,mp_int * r,const GFMethod * meth)417*f9fbec18Smcpowers ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
418*f9fbec18Smcpowers const GFMethod *meth)
419*f9fbec18Smcpowers {
420*f9fbec18Smcpowers mp_err res = MP_OKAY;
421*f9fbec18Smcpowers
422*f9fbec18Smcpowers MP_CHECKOK(mp_mul(a, b, r));
423*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
424*f9fbec18Smcpowers CLEANUP:
425*f9fbec18Smcpowers return res;
426*f9fbec18Smcpowers }
427*f9fbec18Smcpowers
428*f9fbec18Smcpowers /* Wire in fast field arithmetic and precomputation of base point for
429*f9fbec18Smcpowers * named curves. */
430*f9fbec18Smcpowers mp_err
ec_group_set_gfp256(ECGroup * group,ECCurveName name)431*f9fbec18Smcpowers ec_group_set_gfp256(ECGroup *group, ECCurveName name)
432*f9fbec18Smcpowers {
433*f9fbec18Smcpowers if (name == ECCurve_NIST_P256) {
434*f9fbec18Smcpowers group->meth->field_mod = &ec_GFp_nistp256_mod;
435*f9fbec18Smcpowers group->meth->field_mul = &ec_GFp_nistp256_mul;
436*f9fbec18Smcpowers group->meth->field_sqr = &ec_GFp_nistp256_sqr;
437*f9fbec18Smcpowers }
438*f9fbec18Smcpowers return MP_OKAY;
439*f9fbec18Smcpowers }
440