xref: /illumos-gate/usr/src/common/crypto/ecc/ecl_gf.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1 /*
2  * ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  * http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * The Original Code is the elliptic curve math library.
16  *
17  * The Initial Developer of the Original Code is
18  * Sun Microsystems, Inc.
19  * Portions created by the Initial Developer are Copyright (C) 2003
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *   Stephen Fung <fungstep@hotmail.com> and
24  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
25  *
26  * Alternatively, the contents of this file may be used under the terms of
27  * either the GNU General Public License Version 2 or later (the "GPL"), or
28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29  * in which case the provisions of the GPL or the LGPL are applicable instead
30  * of those above. If you wish to allow use of your version of this file only
31  * under the terms of either the GPL or the LGPL, and not to allow others to
32  * use your version of this file under the terms of the MPL, indicate your
33  * decision by deleting the provisions above and replace them with the notice
34  * and other provisions required by the GPL or the LGPL. If you do not delete
35  * the provisions above, a recipient may use your version of this file under
36  * the terms of any one of the MPL, the GPL or the LGPL.
37  *
38  * ***** END LICENSE BLOCK ***** */
39 /*
40  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
41  * Use is subject to license terms.
42  *
43  * Sun elects to use this software under the MPL license.
44  */
45 
46 #pragma ident	"%Z%%M%	%I%	%E% SMI"
47 
48 #include "mpi.h"
49 #include "mp_gf2m.h"
50 #include "ecl-priv.h"
51 #include "mpi-priv.h"
52 #ifndef _KERNEL
53 #include <stdlib.h>
54 #endif
55 
56 /* Allocate memory for a new GFMethod object. */
57 GFMethod *
58 GFMethod_new(int kmflag)
59 {
60 	mp_err res = MP_OKAY;
61 	GFMethod *meth;
62 #ifdef _KERNEL
63 	meth = (GFMethod *) kmem_alloc(sizeof(GFMethod), kmflag);
64 #else
65 	meth = (GFMethod *) malloc(sizeof(GFMethod));
66 	if (meth == NULL)
67 		return NULL;
68 #endif
69 	meth->constructed = MP_YES;
70 	MP_DIGITS(&meth->irr) = 0;
71 	meth->extra_free = NULL;
72 	MP_CHECKOK(mp_init(&meth->irr, kmflag));
73 
74   CLEANUP:
75 	if (res != MP_OKAY) {
76 		GFMethod_free(meth);
77 		return NULL;
78 	}
79 	return meth;
80 }
81 
82 /* Construct a generic GFMethod for arithmetic over prime fields with
83  * irreducible irr. */
84 GFMethod *
85 GFMethod_consGFp(const mp_int *irr)
86 {
87 	mp_err res = MP_OKAY;
88 	GFMethod *meth = NULL;
89 
90 	meth = GFMethod_new(FLAG(irr));
91 	if (meth == NULL)
92 		return NULL;
93 
94 	MP_CHECKOK(mp_copy(irr, &meth->irr));
95 	meth->irr_arr[0] = mpl_significant_bits(irr);
96 	meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
97 		meth->irr_arr[4] = 0;
98 	switch(MP_USED(&meth->irr)) {
99 	/* maybe we need 1 and 2 words here as well?*/
100 	case 3:
101 		meth->field_add = &ec_GFp_add_3;
102 		meth->field_sub = &ec_GFp_sub_3;
103 		break;
104 	case 4:
105 		meth->field_add = &ec_GFp_add_4;
106 		meth->field_sub = &ec_GFp_sub_4;
107 		break;
108 	case 5:
109 		meth->field_add = &ec_GFp_add_5;
110 		meth->field_sub = &ec_GFp_sub_5;
111 		break;
112 	case 6:
113 		meth->field_add = &ec_GFp_add_6;
114 		meth->field_sub = &ec_GFp_sub_6;
115 		break;
116 	default:
117 		meth->field_add = &ec_GFp_add;
118 		meth->field_sub = &ec_GFp_sub;
119 	}
120 	meth->field_neg = &ec_GFp_neg;
121 	meth->field_mod = &ec_GFp_mod;
122 	meth->field_mul = &ec_GFp_mul;
123 	meth->field_sqr = &ec_GFp_sqr;
124 	meth->field_div = &ec_GFp_div;
125 	meth->field_enc = NULL;
126 	meth->field_dec = NULL;
127 	meth->extra1 = NULL;
128 	meth->extra2 = NULL;
129 	meth->extra_free = NULL;
130 
131   CLEANUP:
132 	if (res != MP_OKAY) {
133 		GFMethod_free(meth);
134 		return NULL;
135 	}
136 	return meth;
137 }
138 
139 /* Construct a generic GFMethod for arithmetic over binary polynomial
140  * fields with irreducible irr that has array representation irr_arr (see
141  * ecl-priv.h for description of the representation).  If irr_arr is NULL,
142  * then it is constructed from the bitstring representation. */
143 GFMethod *
144 GFMethod_consGF2m(const mp_int *irr, const unsigned int irr_arr[5])
145 {
146 	mp_err res = MP_OKAY;
147 	int ret;
148 	GFMethod *meth = NULL;
149 
150 	meth = GFMethod_new(FLAG(irr));
151 	if (meth == NULL)
152 		return NULL;
153 
154 	MP_CHECKOK(mp_copy(irr, &meth->irr));
155 	if (irr_arr != NULL) {
156 		/* Irreducible polynomials are either trinomials or pentanomials. */
157 		meth->irr_arr[0] = irr_arr[0];
158 		meth->irr_arr[1] = irr_arr[1];
159 		meth->irr_arr[2] = irr_arr[2];
160 		if (irr_arr[2] > 0) {
161 			meth->irr_arr[3] = irr_arr[3];
162 			meth->irr_arr[4] = irr_arr[4];
163 		} else {
164 			meth->irr_arr[3] = meth->irr_arr[4] = 0;
165 		}
166 	} else {
167 		ret = mp_bpoly2arr(irr, meth->irr_arr, 5);
168 		/* Irreducible polynomials are either trinomials or pentanomials. */
169 		if ((ret != 5) && (ret != 3)) {
170 			res = MP_UNDEF;
171 			goto CLEANUP;
172 		}
173 	}
174 	meth->field_add = &ec_GF2m_add;
175 	meth->field_neg = &ec_GF2m_neg;
176 	meth->field_sub = &ec_GF2m_add;
177 	meth->field_mod = &ec_GF2m_mod;
178 	meth->field_mul = &ec_GF2m_mul;
179 	meth->field_sqr = &ec_GF2m_sqr;
180 	meth->field_div = &ec_GF2m_div;
181 	meth->field_enc = NULL;
182 	meth->field_dec = NULL;
183 	meth->extra1 = NULL;
184 	meth->extra2 = NULL;
185 	meth->extra_free = NULL;
186 
187   CLEANUP:
188 	if (res != MP_OKAY) {
189 		GFMethod_free(meth);
190 		return NULL;
191 	}
192 	return meth;
193 }
194 
195 /* Free the memory allocated (if any) to a GFMethod object. */
196 void
197 GFMethod_free(GFMethod *meth)
198 {
199 	if (meth == NULL)
200 		return;
201 	if (meth->constructed == MP_NO)
202 		return;
203 	mp_clear(&meth->irr);
204 	if (meth->extra_free != NULL)
205 		meth->extra_free(meth);
206 #ifdef _KERNEL
207 	kmem_free(meth, sizeof(GFMethod));
208 #else
209 	free(meth);
210 #endif
211 }
212 
213 /* Wrapper functions for generic prime field arithmetic. */
214 
215 /* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
216 mp_err
217 ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
218 		   const GFMethod *meth)
219 {
220 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
221 	mp_err res;
222 
223 	if ((res = mp_add(a, b, r)) != MP_OKAY) {
224 		return res;
225 	}
226 	if (mp_cmp(r, &meth->irr) >= 0) {
227 		return mp_sub(r, &meth->irr, r);
228 	}
229 	return res;
230 }
231 
232 /* Negates a field element.  Assumes that 0 <= a < meth->irr */
233 mp_err
234 ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
235 {
236 	/* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
237 
238 	if (mp_cmp_z(a) == 0) {
239 		mp_zero(r);
240 		return MP_OKAY;
241 	}
242 	return mp_sub(&meth->irr, a, r);
243 }
244 
245 /* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
246 mp_err
247 ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
248 		   const GFMethod *meth)
249 {
250 	mp_err res = MP_OKAY;
251 
252 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
253 	res = mp_sub(a, b, r);
254 	if (res == MP_RANGE) {
255 		MP_CHECKOK(mp_sub(b, a, r));
256 		if (mp_cmp_z(r) < 0) {
257 			MP_CHECKOK(mp_add(r, &meth->irr, r));
258 		}
259 		MP_CHECKOK(ec_GFp_neg(r, r, meth));
260 	}
261 	if (mp_cmp_z(r) < 0) {
262 		MP_CHECKOK(mp_add(r, &meth->irr, r));
263 	}
264   CLEANUP:
265 	return res;
266 }
267 /*
268  * Inline adds for small curve lengths.
269  */
270 /* 3 words */
271 mp_err
272 ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
273 			const GFMethod *meth)
274 {
275 	mp_err res = MP_OKAY;
276 	mp_digit a0 = 0, a1 = 0, a2 = 0;
277 	mp_digit r0 = 0, r1 = 0, r2 = 0;
278 	mp_digit carry;
279 
280 	switch(MP_USED(a)) {
281 	case 3:
282 		a2 = MP_DIGIT(a,2);
283 	case 2:
284 		a1 = MP_DIGIT(a,1);
285 	case 1:
286 		a0 = MP_DIGIT(a,0);
287 	}
288 	switch(MP_USED(b)) {
289 	case 3:
290 		r2 = MP_DIGIT(b,2);
291 	case 2:
292 		r1 = MP_DIGIT(b,1);
293 	case 1:
294 		r0 = MP_DIGIT(b,0);
295 	}
296 
297 #ifndef MPI_AMD64_ADD
298 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
299 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
300 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
301 #else
302 	__asm__ (
303                 "xorq   %3,%3           \n\t"
304                 "addq   %4,%0           \n\t"
305                 "adcq   %5,%1           \n\t"
306                 "adcq   %6,%2           \n\t"
307                 "adcq   $0,%3           \n\t"
308                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
309                 : "r" (a0), "r" (a1), "r" (a2),
310 		  "0" (r0), "1" (r1), "2" (r2)
311                 : "%cc" );
312 #endif
313 
314 	MP_CHECKOK(s_mp_pad(r, 3));
315 	MP_DIGIT(r, 2) = r2;
316 	MP_DIGIT(r, 1) = r1;
317 	MP_DIGIT(r, 0) = r0;
318 	MP_SIGN(r) = MP_ZPOS;
319 	MP_USED(r) = 3;
320 
321 	/* Do quick 'subract' if we've gone over
322 	 * (add the 2's complement of the curve field) */
323 	 a2 = MP_DIGIT(&meth->irr,2);
324 	if (carry ||  r2 >  a2 ||
325 		((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
326 		a1 = MP_DIGIT(&meth->irr,1);
327 		a0 = MP_DIGIT(&meth->irr,0);
328 #ifndef MPI_AMD64_ADD
329 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
330 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
331 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
332 #else
333 		__asm__ (
334 			"subq   %3,%0           \n\t"
335 			"sbbq   %4,%1           \n\t"
336 			"sbbq   %5,%2           \n\t"
337 			: "=r"(r0), "=r"(r1), "=r"(r2)
338 			: "r" (a0), "r" (a1), "r" (a2),
339 			  "0" (r0), "1" (r1), "2" (r2)
340 			: "%cc" );
341 #endif
342 		MP_DIGIT(r, 2) = r2;
343 		MP_DIGIT(r, 1) = r1;
344 		MP_DIGIT(r, 0) = r0;
345 	}
346 
347 	s_mp_clamp(r);
348 
349   CLEANUP:
350 	return res;
351 }
352 
353 /* 4 words */
354 mp_err
355 ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
356 			const GFMethod *meth)
357 {
358 	mp_err res = MP_OKAY;
359 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
360 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
361 	mp_digit carry;
362 
363 	switch(MP_USED(a)) {
364 	case 4:
365 		a3 = MP_DIGIT(a,3);
366 	case 3:
367 		a2 = MP_DIGIT(a,2);
368 	case 2:
369 		a1 = MP_DIGIT(a,1);
370 	case 1:
371 		a0 = MP_DIGIT(a,0);
372 	}
373 	switch(MP_USED(b)) {
374 	case 4:
375 		r3 = MP_DIGIT(b,3);
376 	case 3:
377 		r2 = MP_DIGIT(b,2);
378 	case 2:
379 		r1 = MP_DIGIT(b,1);
380 	case 1:
381 		r0 = MP_DIGIT(b,0);
382 	}
383 
384 #ifndef MPI_AMD64_ADD
385 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
386 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
387 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
388 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
389 #else
390 	__asm__ (
391                 "xorq   %4,%4           \n\t"
392                 "addq   %5,%0           \n\t"
393                 "adcq   %6,%1           \n\t"
394                 "adcq   %7,%2           \n\t"
395                 "adcq   %8,%3           \n\t"
396                 "adcq   $0,%4           \n\t"
397                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
398                 : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
399 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
400                 : "%cc" );
401 #endif
402 
403 	MP_CHECKOK(s_mp_pad(r, 4));
404 	MP_DIGIT(r, 3) = r3;
405 	MP_DIGIT(r, 2) = r2;
406 	MP_DIGIT(r, 1) = r1;
407 	MP_DIGIT(r, 0) = r0;
408 	MP_SIGN(r) = MP_ZPOS;
409 	MP_USED(r) = 4;
410 
411 	/* Do quick 'subract' if we've gone over
412 	 * (add the 2's complement of the curve field) */
413 	 a3 = MP_DIGIT(&meth->irr,3);
414 	if (carry ||  r3 >  a3 ||
415 		((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
416 		a2 = MP_DIGIT(&meth->irr,2);
417 		a1 = MP_DIGIT(&meth->irr,1);
418 		a0 = MP_DIGIT(&meth->irr,0);
419 #ifndef MPI_AMD64_ADD
420 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
421 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
422 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
423 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
424 #else
425 		__asm__ (
426 			"subq   %4,%0           \n\t"
427 			"sbbq   %5,%1           \n\t"
428 			"sbbq   %6,%2           \n\t"
429 			"sbbq   %7,%3           \n\t"
430 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
431 			: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
432 			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
433 			: "%cc" );
434 #endif
435 		MP_DIGIT(r, 3) = r3;
436 		MP_DIGIT(r, 2) = r2;
437 		MP_DIGIT(r, 1) = r1;
438 		MP_DIGIT(r, 0) = r0;
439 	}
440 
441 	s_mp_clamp(r);
442 
443   CLEANUP:
444 	return res;
445 }
446 
447 /* 5 words */
448 mp_err
449 ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
450 			const GFMethod *meth)
451 {
452 	mp_err res = MP_OKAY;
453 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
454 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
455 	mp_digit carry;
456 
457 	switch(MP_USED(a)) {
458 	case 5:
459 		a4 = MP_DIGIT(a,4);
460 	case 4:
461 		a3 = MP_DIGIT(a,3);
462 	case 3:
463 		a2 = MP_DIGIT(a,2);
464 	case 2:
465 		a1 = MP_DIGIT(a,1);
466 	case 1:
467 		a0 = MP_DIGIT(a,0);
468 	}
469 	switch(MP_USED(b)) {
470 	case 5:
471 		r4 = MP_DIGIT(b,4);
472 	case 4:
473 		r3 = MP_DIGIT(b,3);
474 	case 3:
475 		r2 = MP_DIGIT(b,2);
476 	case 2:
477 		r1 = MP_DIGIT(b,1);
478 	case 1:
479 		r0 = MP_DIGIT(b,0);
480 	}
481 
482 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
483 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
484 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
485 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
486 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
487 
488 	MP_CHECKOK(s_mp_pad(r, 5));
489 	MP_DIGIT(r, 4) = r4;
490 	MP_DIGIT(r, 3) = r3;
491 	MP_DIGIT(r, 2) = r2;
492 	MP_DIGIT(r, 1) = r1;
493 	MP_DIGIT(r, 0) = r0;
494 	MP_SIGN(r) = MP_ZPOS;
495 	MP_USED(r) = 5;
496 
497 	/* Do quick 'subract' if we've gone over
498 	 * (add the 2's complement of the curve field) */
499 	 a4 = MP_DIGIT(&meth->irr,4);
500 	if (carry ||  r4 >  a4 ||
501 		((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
502 		a3 = MP_DIGIT(&meth->irr,3);
503 		a2 = MP_DIGIT(&meth->irr,2);
504 		a1 = MP_DIGIT(&meth->irr,1);
505 		a0 = MP_DIGIT(&meth->irr,0);
506 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
507 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
508 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
509 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
510 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
511 		MP_DIGIT(r, 4) = r4;
512 		MP_DIGIT(r, 3) = r3;
513 		MP_DIGIT(r, 2) = r2;
514 		MP_DIGIT(r, 1) = r1;
515 		MP_DIGIT(r, 0) = r0;
516 	}
517 
518 	s_mp_clamp(r);
519 
520   CLEANUP:
521 	return res;
522 }
523 
524 /* 6 words */
525 mp_err
526 ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
527 			const GFMethod *meth)
528 {
529 	mp_err res = MP_OKAY;
530 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
531 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
532 	mp_digit carry;
533 
534 	switch(MP_USED(a)) {
535 	case 6:
536 		a5 = MP_DIGIT(a,5);
537 	case 5:
538 		a4 = MP_DIGIT(a,4);
539 	case 4:
540 		a3 = MP_DIGIT(a,3);
541 	case 3:
542 		a2 = MP_DIGIT(a,2);
543 	case 2:
544 		a1 = MP_DIGIT(a,1);
545 	case 1:
546 		a0 = MP_DIGIT(a,0);
547 	}
548 	switch(MP_USED(b)) {
549 	case 6:
550 		r5 = MP_DIGIT(b,5);
551 	case 5:
552 		r4 = MP_DIGIT(b,4);
553 	case 4:
554 		r3 = MP_DIGIT(b,3);
555 	case 3:
556 		r2 = MP_DIGIT(b,2);
557 	case 2:
558 		r1 = MP_DIGIT(b,1);
559 	case 1:
560 		r0 = MP_DIGIT(b,0);
561 	}
562 
563 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
564 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
565 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
566 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
567 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
568 	MP_ADD_CARRY(a5, r5, r5, carry, carry);
569 
570 	MP_CHECKOK(s_mp_pad(r, 6));
571 	MP_DIGIT(r, 5) = r5;
572 	MP_DIGIT(r, 4) = r4;
573 	MP_DIGIT(r, 3) = r3;
574 	MP_DIGIT(r, 2) = r2;
575 	MP_DIGIT(r, 1) = r1;
576 	MP_DIGIT(r, 0) = r0;
577 	MP_SIGN(r) = MP_ZPOS;
578 	MP_USED(r) = 6;
579 
580 	/* Do quick 'subract' if we've gone over
581 	 * (add the 2's complement of the curve field) */
582 	a5 = MP_DIGIT(&meth->irr,5);
583 	if (carry ||  r5 >  a5 ||
584 		((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
585 		a4 = MP_DIGIT(&meth->irr,4);
586 		a3 = MP_DIGIT(&meth->irr,3);
587 		a2 = MP_DIGIT(&meth->irr,2);
588 		a1 = MP_DIGIT(&meth->irr,1);
589 		a0 = MP_DIGIT(&meth->irr,0);
590 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
591 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
592 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
593 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
594 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
595 		MP_SUB_BORROW(r5, a5, r5, carry, carry);
596 		MP_DIGIT(r, 5) = r5;
597 		MP_DIGIT(r, 4) = r4;
598 		MP_DIGIT(r, 3) = r3;
599 		MP_DIGIT(r, 2) = r2;
600 		MP_DIGIT(r, 1) = r1;
601 		MP_DIGIT(r, 0) = r0;
602 	}
603 
604 	s_mp_clamp(r);
605 
606   CLEANUP:
607 	return res;
608 }
609 
610 /*
611  * The following subraction functions do in-line subractions based
612  * on our curve size.
613  *
614  * ... 3 words
615  */
616 mp_err
617 ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
618 			const GFMethod *meth)
619 {
620 	mp_err res = MP_OKAY;
621 	mp_digit b0 = 0, b1 = 0, b2 = 0;
622 	mp_digit r0 = 0, r1 = 0, r2 = 0;
623 	mp_digit borrow;
624 
625 	switch(MP_USED(a)) {
626 	case 3:
627 		r2 = MP_DIGIT(a,2);
628 	case 2:
629 		r1 = MP_DIGIT(a,1);
630 	case 1:
631 		r0 = MP_DIGIT(a,0);
632 	}
633 	switch(MP_USED(b)) {
634 	case 3:
635 		b2 = MP_DIGIT(b,2);
636 	case 2:
637 		b1 = MP_DIGIT(b,1);
638 	case 1:
639 		b0 = MP_DIGIT(b,0);
640 	}
641 
642 #ifndef MPI_AMD64_ADD
643 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
644 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
645 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
646 #else
647 	__asm__ (
648                 "xorq   %3,%3           \n\t"
649                 "subq   %4,%0           \n\t"
650                 "sbbq   %5,%1           \n\t"
651                 "sbbq   %6,%2           \n\t"
652                 "adcq   $0,%3           \n\t"
653                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
654                 : "r" (b0), "r" (b1), "r" (b2),
655 		  "0" (r0), "1" (r1), "2" (r2)
656                 : "%cc" );
657 #endif
658 
659 	/* Do quick 'add' if we've gone under 0
660 	 * (subtract the 2's complement of the curve field) */
661 	if (borrow) {
662 	 	b2 = MP_DIGIT(&meth->irr,2);
663 		b1 = MP_DIGIT(&meth->irr,1);
664 		b0 = MP_DIGIT(&meth->irr,0);
665 #ifndef MPI_AMD64_ADD
666 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
667 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
668 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
669 #else
670 		__asm__ (
671 			"addq   %3,%0           \n\t"
672 			"adcq   %4,%1           \n\t"
673 			"adcq   %5,%2           \n\t"
674 			: "=r"(r0), "=r"(r1), "=r"(r2)
675 			: "r" (b0), "r" (b1), "r" (b2),
676   			  "0" (r0), "1" (r1), "2" (r2)
677 			: "%cc" );
678 #endif
679 	}
680 
681 #ifdef MPI_AMD64_ADD
682 	/* compiler fakeout? */
683 	if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
684 		MP_CHECKOK(s_mp_pad(r, 4));
685 	}
686 #endif
687 	MP_CHECKOK(s_mp_pad(r, 3));
688 	MP_DIGIT(r, 2) = r2;
689 	MP_DIGIT(r, 1) = r1;
690 	MP_DIGIT(r, 0) = r0;
691 	MP_SIGN(r) = MP_ZPOS;
692 	MP_USED(r) = 3;
693 	s_mp_clamp(r);
694 
695   CLEANUP:
696 	return res;
697 }
698 
699 /* 4 words */
700 mp_err
701 ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
702 			const GFMethod *meth)
703 {
704 	mp_err res = MP_OKAY;
705 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
706 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
707 	mp_digit borrow;
708 
709 	switch(MP_USED(a)) {
710 	case 4:
711 		r3 = MP_DIGIT(a,3);
712 	case 3:
713 		r2 = MP_DIGIT(a,2);
714 	case 2:
715 		r1 = MP_DIGIT(a,1);
716 	case 1:
717 		r0 = MP_DIGIT(a,0);
718 	}
719 	switch(MP_USED(b)) {
720 	case 4:
721 		b3 = MP_DIGIT(b,3);
722 	case 3:
723 		b2 = MP_DIGIT(b,2);
724 	case 2:
725 		b1 = MP_DIGIT(b,1);
726 	case 1:
727 		b0 = MP_DIGIT(b,0);
728 	}
729 
730 #ifndef MPI_AMD64_ADD
731 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
732 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
733 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
734 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
735 #else
736 	__asm__ (
737                 "xorq   %4,%4           \n\t"
738                 "subq   %5,%0           \n\t"
739                 "sbbq   %6,%1           \n\t"
740                 "sbbq   %7,%2           \n\t"
741                 "sbbq   %8,%3           \n\t"
742                 "adcq   $0,%4           \n\t"
743                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
744                 : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
745 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
746                 : "%cc" );
747 #endif
748 
749 	/* Do quick 'add' if we've gone under 0
750 	 * (subtract the 2's complement of the curve field) */
751 	if (borrow) {
752 	 	b3 = MP_DIGIT(&meth->irr,3);
753 	 	b2 = MP_DIGIT(&meth->irr,2);
754 		b1 = MP_DIGIT(&meth->irr,1);
755 		b0 = MP_DIGIT(&meth->irr,0);
756 #ifndef MPI_AMD64_ADD
757 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
758 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
759 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
760 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
761 #else
762 		__asm__ (
763 			"addq   %4,%0           \n\t"
764 			"adcq   %5,%1           \n\t"
765 			"adcq   %6,%2           \n\t"
766 			"adcq   %7,%3           \n\t"
767 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
768 			: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
769   			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
770 			: "%cc" );
771 #endif
772 	}
773 #ifdef MPI_AMD64_ADD
774 	/* compiler fakeout? */
775 	if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
776 		MP_CHECKOK(s_mp_pad(r, 4));
777 	}
778 #endif
779 	MP_CHECKOK(s_mp_pad(r, 4));
780 	MP_DIGIT(r, 3) = r3;
781 	MP_DIGIT(r, 2) = r2;
782 	MP_DIGIT(r, 1) = r1;
783 	MP_DIGIT(r, 0) = r0;
784 	MP_SIGN(r) = MP_ZPOS;
785 	MP_USED(r) = 4;
786 	s_mp_clamp(r);
787 
788   CLEANUP:
789 	return res;
790 }
791 
792 /* 5 words */
793 mp_err
794 ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
795 			const GFMethod *meth)
796 {
797 	mp_err res = MP_OKAY;
798 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
799 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
800 	mp_digit borrow;
801 
802 	switch(MP_USED(a)) {
803 	case 5:
804 		r4 = MP_DIGIT(a,4);
805 	case 4:
806 		r3 = MP_DIGIT(a,3);
807 	case 3:
808 		r2 = MP_DIGIT(a,2);
809 	case 2:
810 		r1 = MP_DIGIT(a,1);
811 	case 1:
812 		r0 = MP_DIGIT(a,0);
813 	}
814 	switch(MP_USED(b)) {
815 	case 5:
816 		b4 = MP_DIGIT(b,4);
817 	case 4:
818 		b3 = MP_DIGIT(b,3);
819 	case 3:
820 		b2 = MP_DIGIT(b,2);
821 	case 2:
822 		b1 = MP_DIGIT(b,1);
823 	case 1:
824 		b0 = MP_DIGIT(b,0);
825 	}
826 
827 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
828 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
829 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
830 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
831 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
832 
833 	/* Do quick 'add' if we've gone under 0
834 	 * (subtract the 2's complement of the curve field) */
835 	if (borrow) {
836 	 	b4 = MP_DIGIT(&meth->irr,4);
837 	 	b3 = MP_DIGIT(&meth->irr,3);
838 	 	b2 = MP_DIGIT(&meth->irr,2);
839 		b1 = MP_DIGIT(&meth->irr,1);
840 		b0 = MP_DIGIT(&meth->irr,0);
841 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
842 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
843 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
844 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
845 	}
846 	MP_CHECKOK(s_mp_pad(r, 5));
847 	MP_DIGIT(r, 4) = r4;
848 	MP_DIGIT(r, 3) = r3;
849 	MP_DIGIT(r, 2) = r2;
850 	MP_DIGIT(r, 1) = r1;
851 	MP_DIGIT(r, 0) = r0;
852 	MP_SIGN(r) = MP_ZPOS;
853 	MP_USED(r) = 5;
854 	s_mp_clamp(r);
855 
856   CLEANUP:
857 	return res;
858 }
859 
860 /* 6 words */
861 mp_err
862 ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
863 			const GFMethod *meth)
864 {
865 	mp_err res = MP_OKAY;
866 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
867 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
868 	mp_digit borrow;
869 
870 	switch(MP_USED(a)) {
871 	case 6:
872 		r5 = MP_DIGIT(a,5);
873 	case 5:
874 		r4 = MP_DIGIT(a,4);
875 	case 4:
876 		r3 = MP_DIGIT(a,3);
877 	case 3:
878 		r2 = MP_DIGIT(a,2);
879 	case 2:
880 		r1 = MP_DIGIT(a,1);
881 	case 1:
882 		r0 = MP_DIGIT(a,0);
883 	}
884 	switch(MP_USED(b)) {
885 	case 6:
886 		b5 = MP_DIGIT(b,5);
887 	case 5:
888 		b4 = MP_DIGIT(b,4);
889 	case 4:
890 		b3 = MP_DIGIT(b,3);
891 	case 3:
892 		b2 = MP_DIGIT(b,2);
893 	case 2:
894 		b1 = MP_DIGIT(b,1);
895 	case 1:
896 		b0 = MP_DIGIT(b,0);
897 	}
898 
899 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
900 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
901 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
902 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
903 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
904 	MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
905 
906 	/* Do quick 'add' if we've gone under 0
907 	 * (subtract the 2's complement of the curve field) */
908 	if (borrow) {
909 	 	b5 = MP_DIGIT(&meth->irr,5);
910 	 	b4 = MP_DIGIT(&meth->irr,4);
911 	 	b3 = MP_DIGIT(&meth->irr,3);
912 	 	b2 = MP_DIGIT(&meth->irr,2);
913 		b1 = MP_DIGIT(&meth->irr,1);
914 		b0 = MP_DIGIT(&meth->irr,0);
915 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
916 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
917 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
918 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
919 		MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
920 	}
921 
922 	MP_CHECKOK(s_mp_pad(r, 6));
923 	MP_DIGIT(r, 5) = r5;
924 	MP_DIGIT(r, 4) = r4;
925 	MP_DIGIT(r, 3) = r3;
926 	MP_DIGIT(r, 2) = r2;
927 	MP_DIGIT(r, 1) = r1;
928 	MP_DIGIT(r, 0) = r0;
929 	MP_SIGN(r) = MP_ZPOS;
930 	MP_USED(r) = 6;
931 	s_mp_clamp(r);
932 
933   CLEANUP:
934 	return res;
935 }
936 
937 
938 /* Reduces an integer to a field element. */
939 mp_err
940 ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
941 {
942 	return mp_mod(a, &meth->irr, r);
943 }
944 
945 /* Multiplies two field elements. */
946 mp_err
947 ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
948 		   const GFMethod *meth)
949 {
950 	return mp_mulmod(a, b, &meth->irr, r);
951 }
952 
953 /* Squares a field element. */
954 mp_err
955 ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
956 {
957 	return mp_sqrmod(a, &meth->irr, r);
958 }
959 
960 /* Divides two field elements. If a is NULL, then returns the inverse of
961  * b. */
962 mp_err
963 ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
964 		   const GFMethod *meth)
965 {
966 	mp_err res = MP_OKAY;
967 	mp_int t;
968 
969 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
970 	if (a == NULL) {
971 		return mp_invmod(b, &meth->irr, r);
972 	} else {
973 		/* MPI doesn't support divmod, so we implement it using invmod and
974 		 * mulmod. */
975 		MP_CHECKOK(mp_init(&t, FLAG(b)));
976 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
977 		MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
978 	  CLEANUP:
979 		mp_clear(&t);
980 		return res;
981 	}
982 }
983 
984 /* Wrapper functions for generic binary polynomial field arithmetic. */
985 
986 /* Adds two field elements. */
987 mp_err
988 ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
989 			const GFMethod *meth)
990 {
991 	return mp_badd(a, b, r);
992 }
993 
994 /* Negates a field element. Note that for binary polynomial fields, the
995  * negation of a field element is the field element itself. */
996 mp_err
997 ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
998 {
999 	if (a == r) {
1000 		return MP_OKAY;
1001 	} else {
1002 		return mp_copy(a, r);
1003 	}
1004 }
1005 
1006 /* Reduces a binary polynomial to a field element. */
1007 mp_err
1008 ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
1009 {
1010 	return mp_bmod(a, meth->irr_arr, r);
1011 }
1012 
1013 /* Multiplies two field elements. */
1014 mp_err
1015 ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
1016 			const GFMethod *meth)
1017 {
1018 	return mp_bmulmod(a, b, meth->irr_arr, r);
1019 }
1020 
1021 /* Squares a field element. */
1022 mp_err
1023 ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1024 {
1025 	return mp_bsqrmod(a, meth->irr_arr, r);
1026 }
1027 
1028 /* Divides two field elements. If a is NULL, then returns the inverse of
1029  * b. */
1030 mp_err
1031 ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
1032 			const GFMethod *meth)
1033 {
1034 	mp_err res = MP_OKAY;
1035 	mp_int t;
1036 
1037 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1038 	if (a == NULL) {
1039 		/* The GF(2^m) portion of MPI doesn't support invmod, so we
1040 		 * compute 1/b. */
1041 		MP_CHECKOK(mp_init(&t, FLAG(b)));
1042 		MP_CHECKOK(mp_set_int(&t, 1));
1043 		MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
1044 	  CLEANUP:
1045 		mp_clear(&t);
1046 		return res;
1047 	} else {
1048 		return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
1049 	}
1050 }
1051