xref: /titanic_51/usr/src/common/crypto/ecc/ecp_192.c (revision f9fbec18f5b458b560ecf45d3db8e8bd56bf6942)
1*f9fbec18Smcpowers /*
2*f9fbec18Smcpowers  * ***** BEGIN LICENSE BLOCK *****
3*f9fbec18Smcpowers  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4*f9fbec18Smcpowers  *
5*f9fbec18Smcpowers  * The contents of this file are subject to the Mozilla Public License Version
6*f9fbec18Smcpowers  * 1.1 (the "License"); you may not use this file except in compliance with
7*f9fbec18Smcpowers  * the License. You may obtain a copy of the License at
8*f9fbec18Smcpowers  * http://www.mozilla.org/MPL/
9*f9fbec18Smcpowers  *
10*f9fbec18Smcpowers  * Software distributed under the License is distributed on an "AS IS" basis,
11*f9fbec18Smcpowers  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12*f9fbec18Smcpowers  * for the specific language governing rights and limitations under the
13*f9fbec18Smcpowers  * License.
14*f9fbec18Smcpowers  *
15*f9fbec18Smcpowers  * The Original Code is the elliptic curve math library for prime field curves.
16*f9fbec18Smcpowers  *
17*f9fbec18Smcpowers  * The Initial Developer of the Original Code is
18*f9fbec18Smcpowers  * Sun Microsystems, Inc.
19*f9fbec18Smcpowers  * Portions created by the Initial Developer are Copyright (C) 2003
20*f9fbec18Smcpowers  * the Initial Developer. All Rights Reserved.
21*f9fbec18Smcpowers  *
22*f9fbec18Smcpowers  * Contributor(s):
23*f9fbec18Smcpowers  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
24*f9fbec18Smcpowers  *
25*f9fbec18Smcpowers  * Alternatively, the contents of this file may be used under the terms of
26*f9fbec18Smcpowers  * either the GNU General Public License Version 2 or later (the "GPL"), or
27*f9fbec18Smcpowers  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28*f9fbec18Smcpowers  * in which case the provisions of the GPL or the LGPL are applicable instead
29*f9fbec18Smcpowers  * of those above. If you wish to allow use of your version of this file only
30*f9fbec18Smcpowers  * under the terms of either the GPL or the LGPL, and not to allow others to
31*f9fbec18Smcpowers  * use your version of this file under the terms of the MPL, indicate your
32*f9fbec18Smcpowers  * decision by deleting the provisions above and replace them with the notice
33*f9fbec18Smcpowers  * and other provisions required by the GPL or the LGPL. If you do not delete
34*f9fbec18Smcpowers  * the provisions above, a recipient may use your version of this file under
35*f9fbec18Smcpowers  * the terms of any one of the MPL, the GPL or the LGPL.
36*f9fbec18Smcpowers  *
37*f9fbec18Smcpowers  * ***** END LICENSE BLOCK ***** */
38*f9fbec18Smcpowers /*
39*f9fbec18Smcpowers  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
40*f9fbec18Smcpowers  * Use is subject to license terms.
41*f9fbec18Smcpowers  *
42*f9fbec18Smcpowers  * Sun elects to use this software under the MPL license.
43*f9fbec18Smcpowers  */
44*f9fbec18Smcpowers 
45*f9fbec18Smcpowers #pragma ident	"%Z%%M%	%I%	%E% SMI"
46*f9fbec18Smcpowers 
47*f9fbec18Smcpowers #include "ecp.h"
48*f9fbec18Smcpowers #include "mpi.h"
49*f9fbec18Smcpowers #include "mplogic.h"
50*f9fbec18Smcpowers #include "mpi-priv.h"
51*f9fbec18Smcpowers #ifndef _KERNEL
52*f9fbec18Smcpowers #include <stdlib.h>
53*f9fbec18Smcpowers #endif
54*f9fbec18Smcpowers 
55*f9fbec18Smcpowers #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
56*f9fbec18Smcpowers 
57*f9fbec18Smcpowers /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
58*f9fbec18Smcpowers  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
59*f9fbec18Smcpowers  * Implementation of the NIST Elliptic Curves over Prime Fields. */
60*f9fbec18Smcpowers mp_err
61*f9fbec18Smcpowers ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
62*f9fbec18Smcpowers {
63*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
64*f9fbec18Smcpowers 	mp_size a_used = MP_USED(a);
65*f9fbec18Smcpowers 	mp_digit r3;
66*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
67*f9fbec18Smcpowers 	mp_digit carry;
68*f9fbec18Smcpowers #endif
69*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT
70*f9fbec18Smcpowers 	mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
71*f9fbec18Smcpowers         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
72*f9fbec18Smcpowers #else
73*f9fbec18Smcpowers 	mp_digit a5 = 0, a4 = 0, a3 = 0;
74*f9fbec18Smcpowers         mp_digit r0, r1, r2;
75*f9fbec18Smcpowers #endif
76*f9fbec18Smcpowers 
77*f9fbec18Smcpowers 	/* reduction not needed if a is not larger than field size */
78*f9fbec18Smcpowers 	if (a_used < ECP192_DIGITS) {
79*f9fbec18Smcpowers 		if (a == r) {
80*f9fbec18Smcpowers 			return MP_OKAY;
81*f9fbec18Smcpowers 		}
82*f9fbec18Smcpowers 		return mp_copy(a, r);
83*f9fbec18Smcpowers 	}
84*f9fbec18Smcpowers 
85*f9fbec18Smcpowers 	/* for polynomials larger than twice the field size, use regular
86*f9fbec18Smcpowers 	 * reduction */
87*f9fbec18Smcpowers 	if (a_used > ECP192_DIGITS*2) {
88*f9fbec18Smcpowers 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
89*f9fbec18Smcpowers 	} else {
90*f9fbec18Smcpowers 		/* copy out upper words of a */
91*f9fbec18Smcpowers 
92*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT
93*f9fbec18Smcpowers 
94*f9fbec18Smcpowers 		/* in all the math below,
95*f9fbec18Smcpowers 		 * nXb is most signifiant, nXa is least significant */
96*f9fbec18Smcpowers 		switch (a_used) {
97*f9fbec18Smcpowers 		case 12:
98*f9fbec18Smcpowers 			a5b = MP_DIGIT(a, 11);
99*f9fbec18Smcpowers 		case 11:
100*f9fbec18Smcpowers 			a5a = MP_DIGIT(a, 10);
101*f9fbec18Smcpowers 		case 10:
102*f9fbec18Smcpowers 			a4b = MP_DIGIT(a, 9);
103*f9fbec18Smcpowers 		case 9:
104*f9fbec18Smcpowers 			a4a = MP_DIGIT(a, 8);
105*f9fbec18Smcpowers 		case 8:
106*f9fbec18Smcpowers 			a3b = MP_DIGIT(a, 7);
107*f9fbec18Smcpowers 		case 7:
108*f9fbec18Smcpowers 			a3a = MP_DIGIT(a, 6);
109*f9fbec18Smcpowers 		}
110*f9fbec18Smcpowers 
111*f9fbec18Smcpowers 
112*f9fbec18Smcpowers                 r2b= MP_DIGIT(a, 5);
113*f9fbec18Smcpowers                 r2a= MP_DIGIT(a, 4);
114*f9fbec18Smcpowers                 r1b = MP_DIGIT(a, 3);
115*f9fbec18Smcpowers                 r1a = MP_DIGIT(a, 2);
116*f9fbec18Smcpowers                 r0b = MP_DIGIT(a, 1);
117*f9fbec18Smcpowers                 r0a = MP_DIGIT(a, 0);
118*f9fbec18Smcpowers 
119*f9fbec18Smcpowers 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
120*f9fbec18Smcpowers 		MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
121*f9fbec18Smcpowers 		MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
122*f9fbec18Smcpowers 		MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
123*f9fbec18Smcpowers 		MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
124*f9fbec18Smcpowers 		MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
125*f9fbec18Smcpowers 		MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
126*f9fbec18Smcpowers 		r3 = carry; carry = 0;
127*f9fbec18Smcpowers 		MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
128*f9fbec18Smcpowers 		MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
129*f9fbec18Smcpowers 		MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
130*f9fbec18Smcpowers 		MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
131*f9fbec18Smcpowers 		MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
132*f9fbec18Smcpowers 		MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
133*f9fbec18Smcpowers 		r3 += carry;
134*f9fbec18Smcpowers 		MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
135*f9fbec18Smcpowers 		MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
136*f9fbec18Smcpowers 		MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
137*f9fbec18Smcpowers 		MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
138*f9fbec18Smcpowers 		r3 += carry;
139*f9fbec18Smcpowers 
140*f9fbec18Smcpowers 		/* reduce out the carry */
141*f9fbec18Smcpowers 		while (r3) {
142*f9fbec18Smcpowers 			MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
143*f9fbec18Smcpowers 			MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
144*f9fbec18Smcpowers 			MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
145*f9fbec18Smcpowers 			MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
146*f9fbec18Smcpowers 			MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
147*f9fbec18Smcpowers 			MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
148*f9fbec18Smcpowers 			r3 = carry;
149*f9fbec18Smcpowers 		}
150*f9fbec18Smcpowers 
151*f9fbec18Smcpowers 		/* check for final reduction */
152*f9fbec18Smcpowers 		/*
153*f9fbec18Smcpowers 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
154*f9fbec18Smcpowers 		 * 0xffffffffffffffff. That means we can only be over and need
155*f9fbec18Smcpowers 		 * one more reduction
156*f9fbec18Smcpowers 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
157*f9fbec18Smcpowers 		 *     and
158*f9fbec18Smcpowers 		 *     r1 == 0xffffffffffffffffff   or
159*f9fbec18Smcpowers 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
160*f9fbec18Smcpowers 		 * In all cases, we subtract the field (or add the 2's
161*f9fbec18Smcpowers 		 * complement value (1,1,0)).  (r0, r1, r2)
162*f9fbec18Smcpowers 		 */
163*f9fbec18Smcpowers 		if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
164*f9fbec18Smcpowers 			&& (r1b == 0xffffffff) ) &&
165*f9fbec18Smcpowers 			   ((r1a == 0xffffffff) ||
166*f9fbec18Smcpowers 			    (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
167*f9fbec18Smcpowers 					(r0b == 0xffffffff)) ) {
168*f9fbec18Smcpowers 			/* do a quick subtract */
169*f9fbec18Smcpowers 			MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
170*f9fbec18Smcpowers 			r0b += carry;
171*f9fbec18Smcpowers 			r1a = r1b = r2a = r2b = 0;
172*f9fbec18Smcpowers 		}
173*f9fbec18Smcpowers 
174*f9fbec18Smcpowers 		/* set the lower words of r */
175*f9fbec18Smcpowers 		if (a != r) {
176*f9fbec18Smcpowers 			MP_CHECKOK(s_mp_pad(r, 6));
177*f9fbec18Smcpowers 		}
178*f9fbec18Smcpowers 		MP_DIGIT(r, 5) = r2b;
179*f9fbec18Smcpowers 		MP_DIGIT(r, 4) = r2a;
180*f9fbec18Smcpowers 		MP_DIGIT(r, 3) = r1b;
181*f9fbec18Smcpowers 		MP_DIGIT(r, 2) = r1a;
182*f9fbec18Smcpowers 		MP_DIGIT(r, 1) = r0b;
183*f9fbec18Smcpowers 		MP_DIGIT(r, 0) = r0a;
184*f9fbec18Smcpowers 		MP_USED(r) = 6;
185*f9fbec18Smcpowers #else
186*f9fbec18Smcpowers 		switch (a_used) {
187*f9fbec18Smcpowers 		case 6:
188*f9fbec18Smcpowers 			a5 = MP_DIGIT(a, 5);
189*f9fbec18Smcpowers 		case 5:
190*f9fbec18Smcpowers 			a4 = MP_DIGIT(a, 4);
191*f9fbec18Smcpowers 		case 4:
192*f9fbec18Smcpowers 			a3 = MP_DIGIT(a, 3);
193*f9fbec18Smcpowers 		}
194*f9fbec18Smcpowers 
195*f9fbec18Smcpowers                 r2 = MP_DIGIT(a, 2);
196*f9fbec18Smcpowers                 r1 = MP_DIGIT(a, 1);
197*f9fbec18Smcpowers                 r0 = MP_DIGIT(a, 0);
198*f9fbec18Smcpowers 
199*f9fbec18Smcpowers 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
200*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
201*f9fbec18Smcpowers 		MP_ADD_CARRY(r0, a3, r0, 0,     carry);
202*f9fbec18Smcpowers 		MP_ADD_CARRY(r1, a3, r1, carry, carry);
203*f9fbec18Smcpowers 		MP_ADD_CARRY(r2, a4, r2, carry, carry);
204*f9fbec18Smcpowers 		r3 = carry;
205*f9fbec18Smcpowers 		MP_ADD_CARRY(r0, a5, r0, 0,     carry);
206*f9fbec18Smcpowers 		MP_ADD_CARRY(r1, a5, r1, carry, carry);
207*f9fbec18Smcpowers 		MP_ADD_CARRY(r2, a5, r2, carry, carry);
208*f9fbec18Smcpowers 		r3 += carry;
209*f9fbec18Smcpowers 		MP_ADD_CARRY(r1, a4, r1, 0,     carry);
210*f9fbec18Smcpowers 		MP_ADD_CARRY(r2,  0, r2, carry, carry);
211*f9fbec18Smcpowers 		r3 += carry;
212*f9fbec18Smcpowers 
213*f9fbec18Smcpowers #else
214*f9fbec18Smcpowers                 r2 = MP_DIGIT(a, 2);
215*f9fbec18Smcpowers                 r1 = MP_DIGIT(a, 1);
216*f9fbec18Smcpowers                 r0 = MP_DIGIT(a, 0);
217*f9fbec18Smcpowers 
218*f9fbec18Smcpowers                 /* set the lower words of r */
219*f9fbec18Smcpowers                 __asm__ (
220*f9fbec18Smcpowers                 "xorq   %3,%3           \n\t"
221*f9fbec18Smcpowers                 "addq   %4,%0           \n\t"
222*f9fbec18Smcpowers                 "adcq   %4,%1           \n\t"
223*f9fbec18Smcpowers                 "adcq   %5,%2           \n\t"
224*f9fbec18Smcpowers                 "adcq   $0,%3           \n\t"
225*f9fbec18Smcpowers                 "addq   %6,%0           \n\t"
226*f9fbec18Smcpowers                 "adcq   %6,%1           \n\t"
227*f9fbec18Smcpowers                 "adcq   %6,%2           \n\t"
228*f9fbec18Smcpowers                 "adcq   $0,%3           \n\t"
229*f9fbec18Smcpowers                 "addq   %5,%1           \n\t"
230*f9fbec18Smcpowers                 "adcq   $0,%2           \n\t"
231*f9fbec18Smcpowers                 "adcq   $0,%3           \n\t"
232*f9fbec18Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
233*f9fbec18Smcpowers 		  "=r"(a4), "=r"(a5)
234*f9fbec18Smcpowers                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
235*f9fbec18Smcpowers 		  "4" (a3), "5" (a4), "6"(a5)
236*f9fbec18Smcpowers                 : "%cc" );
237*f9fbec18Smcpowers #endif
238*f9fbec18Smcpowers 
239*f9fbec18Smcpowers 		/* reduce out the carry */
240*f9fbec18Smcpowers 		while (r3) {
241*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
242*f9fbec18Smcpowers 			MP_ADD_CARRY(r0, r3, r0, 0,     carry);
243*f9fbec18Smcpowers 			MP_ADD_CARRY(r1, r3, r1, carry, carry);
244*f9fbec18Smcpowers 			MP_ADD_CARRY(r2,  0, r2, carry, carry);
245*f9fbec18Smcpowers 			r3 = carry;
246*f9fbec18Smcpowers #else
247*f9fbec18Smcpowers 			a3=r3;
248*f9fbec18Smcpowers               		__asm__ (
249*f9fbec18Smcpowers                 	"xorq   %3,%3           \n\t"
250*f9fbec18Smcpowers                 	"addq   %4,%0           \n\t"
251*f9fbec18Smcpowers                 	"adcq   %4,%1           \n\t"
252*f9fbec18Smcpowers                 	"adcq   $0,%2           \n\t"
253*f9fbec18Smcpowers                 	"adcq   $0,%3           \n\t"
254*f9fbec18Smcpowers                 	: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
255*f9fbec18Smcpowers                 	: "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
256*f9fbec18Smcpowers                 	: "%cc" );
257*f9fbec18Smcpowers #endif
258*f9fbec18Smcpowers 		}
259*f9fbec18Smcpowers 
260*f9fbec18Smcpowers 		/* check for final reduction */
261*f9fbec18Smcpowers 		/*
262*f9fbec18Smcpowers 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
263*f9fbec18Smcpowers 		 * 0xffffffffffffffff. That means we can only be over and need
264*f9fbec18Smcpowers 		 * one more reduction
265*f9fbec18Smcpowers 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
266*f9fbec18Smcpowers 		 *     and
267*f9fbec18Smcpowers 		 *     r1 == 0xffffffffffffffffff   or
268*f9fbec18Smcpowers 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
269*f9fbec18Smcpowers 		 * In all cases, we subtract the field (or add the 2's
270*f9fbec18Smcpowers 		 * complement value (1,1,0)).  (r0, r1, r2)
271*f9fbec18Smcpowers 		 */
272*f9fbec18Smcpowers 		if (r3 || ((r2 == MP_DIGIT_MAX) &&
273*f9fbec18Smcpowers 		      ((r1 == MP_DIGIT_MAX) ||
274*f9fbec18Smcpowers 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
275*f9fbec18Smcpowers 			/* do a quick subtract */
276*f9fbec18Smcpowers 			r0++;
277*f9fbec18Smcpowers 			r1 = r2 = 0;
278*f9fbec18Smcpowers 		}
279*f9fbec18Smcpowers 		/* set the lower words of r */
280*f9fbec18Smcpowers 		if (a != r) {
281*f9fbec18Smcpowers 			MP_CHECKOK(s_mp_pad(r, 3));
282*f9fbec18Smcpowers 		}
283*f9fbec18Smcpowers 		MP_DIGIT(r, 2) = r2;
284*f9fbec18Smcpowers 		MP_DIGIT(r, 1) = r1;
285*f9fbec18Smcpowers 		MP_DIGIT(r, 0) = r0;
286*f9fbec18Smcpowers 		MP_USED(r) = 3;
287*f9fbec18Smcpowers #endif
288*f9fbec18Smcpowers 	}
289*f9fbec18Smcpowers 
290*f9fbec18Smcpowers   CLEANUP:
291*f9fbec18Smcpowers 	return res;
292*f9fbec18Smcpowers }
293*f9fbec18Smcpowers 
294*f9fbec18Smcpowers #ifndef ECL_THIRTY_TWO_BIT
295*f9fbec18Smcpowers /* Compute the sum of 192 bit curves. Do the work in-line since the
296*f9fbec18Smcpowers  * number of words are so small, we don't want to overhead of mp function
297*f9fbec18Smcpowers  * calls.  Uses optimized modular reduction for p192.
298*f9fbec18Smcpowers  */
299*f9fbec18Smcpowers mp_err
300*f9fbec18Smcpowers ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
301*f9fbec18Smcpowers 			const GFMethod *meth)
302*f9fbec18Smcpowers {
303*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
304*f9fbec18Smcpowers 	mp_digit a0 = 0, a1 = 0, a2 = 0;
305*f9fbec18Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0;
306*f9fbec18Smcpowers 	mp_digit carry;
307*f9fbec18Smcpowers 
308*f9fbec18Smcpowers 	switch(MP_USED(a)) {
309*f9fbec18Smcpowers 	case 3:
310*f9fbec18Smcpowers 		a2 = MP_DIGIT(a,2);
311*f9fbec18Smcpowers 	case 2:
312*f9fbec18Smcpowers 		a1 = MP_DIGIT(a,1);
313*f9fbec18Smcpowers 	case 1:
314*f9fbec18Smcpowers 		a0 = MP_DIGIT(a,0);
315*f9fbec18Smcpowers 	}
316*f9fbec18Smcpowers 	switch(MP_USED(b)) {
317*f9fbec18Smcpowers 	case 3:
318*f9fbec18Smcpowers 		r2 = MP_DIGIT(b,2);
319*f9fbec18Smcpowers 	case 2:
320*f9fbec18Smcpowers 		r1 = MP_DIGIT(b,1);
321*f9fbec18Smcpowers 	case 1:
322*f9fbec18Smcpowers 		r0 = MP_DIGIT(b,0);
323*f9fbec18Smcpowers 	}
324*f9fbec18Smcpowers 
325*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
326*f9fbec18Smcpowers 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
327*f9fbec18Smcpowers 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
328*f9fbec18Smcpowers 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
329*f9fbec18Smcpowers #else
330*f9fbec18Smcpowers 	__asm__ (
331*f9fbec18Smcpowers                 "xorq   %3,%3           \n\t"
332*f9fbec18Smcpowers                 "addq   %4,%0           \n\t"
333*f9fbec18Smcpowers                 "adcq   %5,%1           \n\t"
334*f9fbec18Smcpowers                 "adcq   %6,%2           \n\t"
335*f9fbec18Smcpowers                 "adcq   $0,%3           \n\t"
336*f9fbec18Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
337*f9fbec18Smcpowers                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
338*f9fbec18Smcpowers 		  "1" (r1), "2" (r2)
339*f9fbec18Smcpowers                 : "%cc" );
340*f9fbec18Smcpowers #endif
341*f9fbec18Smcpowers 
342*f9fbec18Smcpowers 	/* Do quick 'subract' if we've gone over
343*f9fbec18Smcpowers 	 * (add the 2's complement of the curve field) */
344*f9fbec18Smcpowers 	if (carry || ((r2 == MP_DIGIT_MAX) &&
345*f9fbec18Smcpowers 		      ((r1 == MP_DIGIT_MAX) ||
346*f9fbec18Smcpowers 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
347*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
348*f9fbec18Smcpowers 		MP_ADD_CARRY(r0, 1, r0, 0,     carry);
349*f9fbec18Smcpowers 		MP_ADD_CARRY(r1, 1, r1, carry, carry);
350*f9fbec18Smcpowers 		MP_ADD_CARRY(r2, 0, r2, carry, carry);
351*f9fbec18Smcpowers #else
352*f9fbec18Smcpowers 		__asm__ (
353*f9fbec18Smcpowers 			"addq   $1,%0           \n\t"
354*f9fbec18Smcpowers 			"adcq   $1,%1           \n\t"
355*f9fbec18Smcpowers 			"adcq   $0,%2           \n\t"
356*f9fbec18Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2)
357*f9fbec18Smcpowers 			: "0" (r0), "1" (r1), "2" (r2)
358*f9fbec18Smcpowers 			: "%cc" );
359*f9fbec18Smcpowers #endif
360*f9fbec18Smcpowers 	}
361*f9fbec18Smcpowers 
362*f9fbec18Smcpowers 
363*f9fbec18Smcpowers 	MP_CHECKOK(s_mp_pad(r, 3));
364*f9fbec18Smcpowers 	MP_DIGIT(r, 2) = r2;
365*f9fbec18Smcpowers 	MP_DIGIT(r, 1) = r1;
366*f9fbec18Smcpowers 	MP_DIGIT(r, 0) = r0;
367*f9fbec18Smcpowers 	MP_SIGN(r) = MP_ZPOS;
368*f9fbec18Smcpowers 	MP_USED(r) = 3;
369*f9fbec18Smcpowers 	s_mp_clamp(r);
370*f9fbec18Smcpowers 
371*f9fbec18Smcpowers 
372*f9fbec18Smcpowers   CLEANUP:
373*f9fbec18Smcpowers 	return res;
374*f9fbec18Smcpowers }
375*f9fbec18Smcpowers 
376*f9fbec18Smcpowers /* Compute the diff of 192 bit curves. Do the work in-line since the
377*f9fbec18Smcpowers  * number of words are so small, we don't want to overhead of mp function
378*f9fbec18Smcpowers  * calls.  Uses optimized modular reduction for p192.
379*f9fbec18Smcpowers  */
380*f9fbec18Smcpowers mp_err
381*f9fbec18Smcpowers ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
382*f9fbec18Smcpowers 			const GFMethod *meth)
383*f9fbec18Smcpowers {
384*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
385*f9fbec18Smcpowers 	mp_digit b0 = 0, b1 = 0, b2 = 0;
386*f9fbec18Smcpowers 	mp_digit r0 = 0, r1 = 0, r2 = 0;
387*f9fbec18Smcpowers 	mp_digit borrow;
388*f9fbec18Smcpowers 
389*f9fbec18Smcpowers 	switch(MP_USED(a)) {
390*f9fbec18Smcpowers 	case 3:
391*f9fbec18Smcpowers 		r2 = MP_DIGIT(a,2);
392*f9fbec18Smcpowers 	case 2:
393*f9fbec18Smcpowers 		r1 = MP_DIGIT(a,1);
394*f9fbec18Smcpowers 	case 1:
395*f9fbec18Smcpowers 		r0 = MP_DIGIT(a,0);
396*f9fbec18Smcpowers 	}
397*f9fbec18Smcpowers 
398*f9fbec18Smcpowers 	switch(MP_USED(b)) {
399*f9fbec18Smcpowers 	case 3:
400*f9fbec18Smcpowers 		b2 = MP_DIGIT(b,2);
401*f9fbec18Smcpowers 	case 2:
402*f9fbec18Smcpowers 		b1 = MP_DIGIT(b,1);
403*f9fbec18Smcpowers 	case 1:
404*f9fbec18Smcpowers 		b0 = MP_DIGIT(b,0);
405*f9fbec18Smcpowers 	}
406*f9fbec18Smcpowers 
407*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
408*f9fbec18Smcpowers 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
409*f9fbec18Smcpowers 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
410*f9fbec18Smcpowers 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
411*f9fbec18Smcpowers #else
412*f9fbec18Smcpowers 	__asm__ (
413*f9fbec18Smcpowers                 "xorq   %3,%3           \n\t"
414*f9fbec18Smcpowers                 "subq   %4,%0           \n\t"
415*f9fbec18Smcpowers                 "sbbq   %5,%1           \n\t"
416*f9fbec18Smcpowers                 "sbbq   %6,%2           \n\t"
417*f9fbec18Smcpowers                 "adcq   $0,%3           \n\t"
418*f9fbec18Smcpowers                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
419*f9fbec18Smcpowers                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
420*f9fbec18Smcpowers 		  "1" (r1), "2" (r2)
421*f9fbec18Smcpowers                 : "%cc" );
422*f9fbec18Smcpowers #endif
423*f9fbec18Smcpowers 
424*f9fbec18Smcpowers 	/* Do quick 'add' if we've gone under 0
425*f9fbec18Smcpowers 	 * (subtract the 2's complement of the curve field) */
426*f9fbec18Smcpowers 	if (borrow) {
427*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD
428*f9fbec18Smcpowers 		MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
429*f9fbec18Smcpowers 		MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
430*f9fbec18Smcpowers 		MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
431*f9fbec18Smcpowers #else
432*f9fbec18Smcpowers 		__asm__ (
433*f9fbec18Smcpowers 			"subq   $1,%0           \n\t"
434*f9fbec18Smcpowers 			"sbbq   $1,%1           \n\t"
435*f9fbec18Smcpowers 			"sbbq   $0,%2           \n\t"
436*f9fbec18Smcpowers 			: "=r"(r0), "=r"(r1), "=r"(r2)
437*f9fbec18Smcpowers 			: "0" (r0), "1" (r1), "2" (r2)
438*f9fbec18Smcpowers 			: "%cc" );
439*f9fbec18Smcpowers #endif
440*f9fbec18Smcpowers 	}
441*f9fbec18Smcpowers 
442*f9fbec18Smcpowers 	MP_CHECKOK(s_mp_pad(r, 3));
443*f9fbec18Smcpowers 	MP_DIGIT(r, 2) = r2;
444*f9fbec18Smcpowers 	MP_DIGIT(r, 1) = r1;
445*f9fbec18Smcpowers 	MP_DIGIT(r, 0) = r0;
446*f9fbec18Smcpowers 	MP_SIGN(r) = MP_ZPOS;
447*f9fbec18Smcpowers 	MP_USED(r) = 3;
448*f9fbec18Smcpowers 	s_mp_clamp(r);
449*f9fbec18Smcpowers 
450*f9fbec18Smcpowers   CLEANUP:
451*f9fbec18Smcpowers 	return res;
452*f9fbec18Smcpowers }
453*f9fbec18Smcpowers 
454*f9fbec18Smcpowers #endif
455*f9fbec18Smcpowers 
456*f9fbec18Smcpowers /* Compute the square of polynomial a, reduce modulo p192. Store the
457*f9fbec18Smcpowers  * result in r.  r could be a.  Uses optimized modular reduction for p192.
458*f9fbec18Smcpowers  */
459*f9fbec18Smcpowers mp_err
460*f9fbec18Smcpowers ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
461*f9fbec18Smcpowers {
462*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
463*f9fbec18Smcpowers 
464*f9fbec18Smcpowers 	MP_CHECKOK(mp_sqr(a, r));
465*f9fbec18Smcpowers 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
466*f9fbec18Smcpowers   CLEANUP:
467*f9fbec18Smcpowers 	return res;
468*f9fbec18Smcpowers }
469*f9fbec18Smcpowers 
470*f9fbec18Smcpowers /* Compute the product of two polynomials a and b, reduce modulo p192.
471*f9fbec18Smcpowers  * Store the result in r.  r could be a or b; a could be b.  Uses
472*f9fbec18Smcpowers  * optimized modular reduction for p192. */
473*f9fbec18Smcpowers mp_err
474*f9fbec18Smcpowers ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
475*f9fbec18Smcpowers 					const GFMethod *meth)
476*f9fbec18Smcpowers {
477*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
478*f9fbec18Smcpowers 
479*f9fbec18Smcpowers 	MP_CHECKOK(mp_mul(a, b, r));
480*f9fbec18Smcpowers 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
481*f9fbec18Smcpowers   CLEANUP:
482*f9fbec18Smcpowers 	return res;
483*f9fbec18Smcpowers }
484*f9fbec18Smcpowers 
485*f9fbec18Smcpowers /* Divides two field elements. If a is NULL, then returns the inverse of
486*f9fbec18Smcpowers  * b. */
487*f9fbec18Smcpowers mp_err
488*f9fbec18Smcpowers ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
489*f9fbec18Smcpowers 		   const GFMethod *meth)
490*f9fbec18Smcpowers {
491*f9fbec18Smcpowers 	mp_err res = MP_OKAY;
492*f9fbec18Smcpowers 	mp_int t;
493*f9fbec18Smcpowers 
494*f9fbec18Smcpowers 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
495*f9fbec18Smcpowers 	if (a == NULL) {
496*f9fbec18Smcpowers 		return  mp_invmod(b, &meth->irr, r);
497*f9fbec18Smcpowers 	} else {
498*f9fbec18Smcpowers 		/* MPI doesn't support divmod, so we implement it using invmod and
499*f9fbec18Smcpowers 		 * mulmod. */
500*f9fbec18Smcpowers 		MP_CHECKOK(mp_init(&t, FLAG(b)));
501*f9fbec18Smcpowers 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
502*f9fbec18Smcpowers 		MP_CHECKOK(mp_mul(a, &t, r));
503*f9fbec18Smcpowers 		MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
504*f9fbec18Smcpowers 	  CLEANUP:
505*f9fbec18Smcpowers 		mp_clear(&t);
506*f9fbec18Smcpowers 		return res;
507*f9fbec18Smcpowers 	}
508*f9fbec18Smcpowers }
509*f9fbec18Smcpowers 
510*f9fbec18Smcpowers /* Wire in fast field arithmetic and precomputation of base point for
511*f9fbec18Smcpowers  * named curves. */
512*f9fbec18Smcpowers mp_err
513*f9fbec18Smcpowers ec_group_set_gfp192(ECGroup *group, ECCurveName name)
514*f9fbec18Smcpowers {
515*f9fbec18Smcpowers 	if (name == ECCurve_NIST_P192) {
516*f9fbec18Smcpowers 		group->meth->field_mod = &ec_GFp_nistp192_mod;
517*f9fbec18Smcpowers 		group->meth->field_mul = &ec_GFp_nistp192_mul;
518*f9fbec18Smcpowers 		group->meth->field_sqr = &ec_GFp_nistp192_sqr;
519*f9fbec18Smcpowers 		group->meth->field_div = &ec_GFp_nistp192_div;
520*f9fbec18Smcpowers #ifndef ECL_THIRTY_TWO_BIT
521*f9fbec18Smcpowers 		group->meth->field_add = &ec_GFp_nistp192_add;
522*f9fbec18Smcpowers 		group->meth->field_sub = &ec_GFp_nistp192_sub;
523*f9fbec18Smcpowers #endif
524*f9fbec18Smcpowers 	}
525*f9fbec18Smcpowers 	return MP_OKAY;
526*f9fbec18Smcpowers }
527