xref: /illumos-gate/usr/src/common/crypto/ecc/ecl_gf.c (revision e9db39cef1f968a982994f50c05903cc988a3dd3)
1 /*
2  * ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  * http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * The Original Code is the elliptic curve math library.
16  *
17  * The Initial Developer of the Original Code is
18  * Sun Microsystems, Inc.
19  * Portions created by the Initial Developer are Copyright (C) 2003
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *   Stephen Fung <fungstep@hotmail.com> and
24  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
25  *
26  * Alternatively, the contents of this file may be used under the terms of
27  * either the GNU General Public License Version 2 or later (the "GPL"), or
28  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29  * in which case the provisions of the GPL or the LGPL are applicable instead
30  * of those above. If you wish to allow use of your version of this file only
31  * under the terms of either the GPL or the LGPL, and not to allow others to
32  * use your version of this file under the terms of the MPL, indicate your
33  * decision by deleting the provisions above and replace them with the notice
34  * and other provisions required by the GPL or the LGPL. If you do not delete
35  * the provisions above, a recipient may use your version of this file under
36  * the terms of any one of the MPL, the GPL or the LGPL.
37  *
38  * ***** END LICENSE BLOCK ***** */
39 /*
40  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
41  * Use is subject to license terms.
42  *
43  * Sun elects to use this software under the MPL license.
44  */
45 
46 #include "mpi.h"
47 #include "mp_gf2m.h"
48 #include "ecl-priv.h"
49 #include "mpi-priv.h"
50 #ifndef _KERNEL
51 #include <stdlib.h>
52 #endif
53 
54 /* Allocate memory for a new GFMethod object. */
55 GFMethod *
56 GFMethod_new(int kmflag)
57 {
58 	mp_err res = MP_OKAY;
59 	GFMethod *meth;
60 #ifdef _KERNEL
61 	meth = (GFMethod *) kmem_alloc(sizeof(GFMethod), kmflag);
62 #else
63 	meth = (GFMethod *) malloc(sizeof(GFMethod));
64 	if (meth == NULL)
65 		return NULL;
66 #endif
67 	meth->constructed = MP_YES;
68 	MP_DIGITS(&meth->irr) = 0;
69 	meth->extra_free = NULL;
70 	MP_CHECKOK(mp_init(&meth->irr, kmflag));
71 
72   CLEANUP:
73 	if (res != MP_OKAY) {
74 		GFMethod_free(meth);
75 		return NULL;
76 	}
77 	return meth;
78 }
79 
80 /* Construct a generic GFMethod for arithmetic over prime fields with
81  * irreducible irr. */
82 GFMethod *
83 GFMethod_consGFp(const mp_int *irr)
84 {
85 	mp_err res = MP_OKAY;
86 	GFMethod *meth = NULL;
87 
88 	meth = GFMethod_new(FLAG(irr));
89 	if (meth == NULL)
90 		return NULL;
91 
92 	MP_CHECKOK(mp_copy(irr, &meth->irr));
93 	meth->irr_arr[0] = mpl_significant_bits(irr);
94 	meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
95 		meth->irr_arr[4] = 0;
96 	switch(MP_USED(&meth->irr)) {
97 	/* maybe we need 1 and 2 words here as well?*/
98 	case 3:
99 		meth->field_add = &ec_GFp_add_3;
100 		meth->field_sub = &ec_GFp_sub_3;
101 		break;
102 	case 4:
103 		meth->field_add = &ec_GFp_add_4;
104 		meth->field_sub = &ec_GFp_sub_4;
105 		break;
106 	case 5:
107 		meth->field_add = &ec_GFp_add_5;
108 		meth->field_sub = &ec_GFp_sub_5;
109 		break;
110 	case 6:
111 		meth->field_add = &ec_GFp_add_6;
112 		meth->field_sub = &ec_GFp_sub_6;
113 		break;
114 	default:
115 		meth->field_add = &ec_GFp_add;
116 		meth->field_sub = &ec_GFp_sub;
117 	}
118 	meth->field_neg = &ec_GFp_neg;
119 	meth->field_mod = &ec_GFp_mod;
120 	meth->field_mul = &ec_GFp_mul;
121 	meth->field_sqr = &ec_GFp_sqr;
122 	meth->field_div = &ec_GFp_div;
123 	meth->field_enc = NULL;
124 	meth->field_dec = NULL;
125 	meth->extra1 = NULL;
126 	meth->extra2 = NULL;
127 	meth->extra_free = NULL;
128 
129   CLEANUP:
130 	if (res != MP_OKAY) {
131 		GFMethod_free(meth);
132 		return NULL;
133 	}
134 	return meth;
135 }
136 
137 /* Construct a generic GFMethod for arithmetic over binary polynomial
138  * fields with irreducible irr that has array representation irr_arr (see
139  * ecl-priv.h for description of the representation).  If irr_arr is NULL,
140  * then it is constructed from the bitstring representation. */
141 GFMethod *
142 GFMethod_consGF2m(const mp_int *irr, const unsigned int irr_arr[5])
143 {
144 	mp_err res = MP_OKAY;
145 	int ret;
146 	GFMethod *meth = NULL;
147 
148 	meth = GFMethod_new(FLAG(irr));
149 	if (meth == NULL)
150 		return NULL;
151 
152 	MP_CHECKOK(mp_copy(irr, &meth->irr));
153 	if (irr_arr != NULL) {
154 		/* Irreducible polynomials are either trinomials or pentanomials. */
155 		meth->irr_arr[0] = irr_arr[0];
156 		meth->irr_arr[1] = irr_arr[1];
157 		meth->irr_arr[2] = irr_arr[2];
158 		if (irr_arr[2] > 0) {
159 			meth->irr_arr[3] = irr_arr[3];
160 			meth->irr_arr[4] = irr_arr[4];
161 		} else {
162 			meth->irr_arr[3] = meth->irr_arr[4] = 0;
163 		}
164 	} else {
165 		ret = mp_bpoly2arr(irr, meth->irr_arr, 5);
166 		/* Irreducible polynomials are either trinomials or pentanomials. */
167 		if ((ret != 5) && (ret != 3)) {
168 			res = MP_UNDEF;
169 			goto CLEANUP;
170 		}
171 	}
172 	meth->field_add = &ec_GF2m_add;
173 	meth->field_neg = &ec_GF2m_neg;
174 	meth->field_sub = &ec_GF2m_add;
175 	meth->field_mod = &ec_GF2m_mod;
176 	meth->field_mul = &ec_GF2m_mul;
177 	meth->field_sqr = &ec_GF2m_sqr;
178 	meth->field_div = &ec_GF2m_div;
179 	meth->field_enc = NULL;
180 	meth->field_dec = NULL;
181 	meth->extra1 = NULL;
182 	meth->extra2 = NULL;
183 	meth->extra_free = NULL;
184 
185   CLEANUP:
186 	if (res != MP_OKAY) {
187 		GFMethod_free(meth);
188 		return NULL;
189 	}
190 	return meth;
191 }
192 
193 /* Free the memory allocated (if any) to a GFMethod object. */
194 void
195 GFMethod_free(GFMethod *meth)
196 {
197 	if (meth == NULL)
198 		return;
199 	if (meth->constructed == MP_NO)
200 		return;
201 	mp_clear(&meth->irr);
202 	if (meth->extra_free != NULL)
203 		meth->extra_free(meth);
204 #ifdef _KERNEL
205 	kmem_free(meth, sizeof(GFMethod));
206 #else
207 	free(meth);
208 #endif
209 }
210 
211 /* Wrapper functions for generic prime field arithmetic. */
212 
213 /* Add two field elements.  Assumes that 0 <= a, b < meth->irr */
214 mp_err
215 ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
216 		   const GFMethod *meth)
217 {
218 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
219 	mp_err res;
220 
221 	if ((res = mp_add(a, b, r)) != MP_OKAY) {
222 		return res;
223 	}
224 	if (mp_cmp(r, &meth->irr) >= 0) {
225 		return mp_sub(r, &meth->irr, r);
226 	}
227 	return res;
228 }
229 
230 /* Negates a field element.  Assumes that 0 <= a < meth->irr */
231 mp_err
232 ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
233 {
234 	/* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
235 
236 	if (mp_cmp_z(a) == 0) {
237 		mp_zero(r);
238 		return MP_OKAY;
239 	}
240 	return mp_sub(&meth->irr, a, r);
241 }
242 
243 /* Subtracts two field elements.  Assumes that 0 <= a, b < meth->irr */
244 mp_err
245 ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
246 		   const GFMethod *meth)
247 {
248 	mp_err res = MP_OKAY;
249 
250 	/* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
251 	res = mp_sub(a, b, r);
252 	if (res == MP_RANGE) {
253 		MP_CHECKOK(mp_sub(b, a, r));
254 		if (mp_cmp_z(r) < 0) {
255 			MP_CHECKOK(mp_add(r, &meth->irr, r));
256 		}
257 		MP_CHECKOK(ec_GFp_neg(r, r, meth));
258 	}
259 	if (mp_cmp_z(r) < 0) {
260 		MP_CHECKOK(mp_add(r, &meth->irr, r));
261 	}
262   CLEANUP:
263 	return res;
264 }
265 /*
266  * Inline adds for small curve lengths.
267  */
268 /* 3 words */
269 mp_err
270 ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
271 			const GFMethod *meth)
272 {
273 	mp_err res = MP_OKAY;
274 	mp_digit a0 = 0, a1 = 0, a2 = 0;
275 	mp_digit r0 = 0, r1 = 0, r2 = 0;
276 	mp_digit carry;
277 
278 	switch(MP_USED(a)) {
279 	case 3:
280 		a2 = MP_DIGIT(a,2);
281 		/* FALLTHROUGH */
282 	case 2:
283 		a1 = MP_DIGIT(a,1);
284 		/* FALLTHROUGH */
285 	case 1:
286 		a0 = MP_DIGIT(a,0);
287 	}
288 	switch(MP_USED(b)) {
289 	case 3:
290 		r2 = MP_DIGIT(b,2);
291 		/* FALLTHROUGH */
292 	case 2:
293 		r1 = MP_DIGIT(b,1);
294 		/* FALLTHROUGH */
295 	case 1:
296 		r0 = MP_DIGIT(b,0);
297 	}
298 
299 #ifndef MPI_AMD64_ADD
300 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
301 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
302 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
303 #else
304 	__asm__ (
305                 "xorq   %3,%3           \n\t"
306                 "addq   %4,%0           \n\t"
307                 "adcq   %5,%1           \n\t"
308                 "adcq   %6,%2           \n\t"
309                 "adcq   $0,%3           \n\t"
310                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
311                 : "r" (a0), "r" (a1), "r" (a2),
312 		  "0" (r0), "1" (r1), "2" (r2)
313                 : "%cc" );
314 #endif
315 
316 	MP_CHECKOK(s_mp_pad(r, 3));
317 	MP_DIGIT(r, 2) = r2;
318 	MP_DIGIT(r, 1) = r1;
319 	MP_DIGIT(r, 0) = r0;
320 	MP_SIGN(r) = MP_ZPOS;
321 	MP_USED(r) = 3;
322 
323 	/* Do quick 'subract' if we've gone over
324 	 * (add the 2's complement of the curve field) */
325 	 a2 = MP_DIGIT(&meth->irr,2);
326 	if (carry ||  r2 >  a2 ||
327 		((r2 == a2) && mp_cmp(r,&meth->irr) != MP_LT)) {
328 		a1 = MP_DIGIT(&meth->irr,1);
329 		a0 = MP_DIGIT(&meth->irr,0);
330 #ifndef MPI_AMD64_ADD
331 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
332 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
333 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
334 #else
335 		__asm__ (
336 			"subq   %3,%0           \n\t"
337 			"sbbq   %4,%1           \n\t"
338 			"sbbq   %5,%2           \n\t"
339 			: "=r"(r0), "=r"(r1), "=r"(r2)
340 			: "r" (a0), "r" (a1), "r" (a2),
341 			  "0" (r0), "1" (r1), "2" (r2)
342 			: "%cc" );
343 #endif
344 		MP_DIGIT(r, 2) = r2;
345 		MP_DIGIT(r, 1) = r1;
346 		MP_DIGIT(r, 0) = r0;
347 	}
348 
349 	s_mp_clamp(r);
350 
351   CLEANUP:
352 	return res;
353 }
354 
355 /* 4 words */
356 mp_err
357 ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
358 			const GFMethod *meth)
359 {
360 	mp_err res = MP_OKAY;
361 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
362 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
363 	mp_digit carry;
364 
365 	switch(MP_USED(a)) {
366 	case 4:
367 		a3 = MP_DIGIT(a,3);
368 		/* FALLTHROUGH */
369 	case 3:
370 		a2 = MP_DIGIT(a,2);
371 		/* FALLTHROUGH */
372 	case 2:
373 		a1 = MP_DIGIT(a,1);
374 		/* FALLTHROUGH */
375 	case 1:
376 		a0 = MP_DIGIT(a,0);
377 	}
378 	switch(MP_USED(b)) {
379 	case 4:
380 		r3 = MP_DIGIT(b,3);
381 		/* FALLTHROUGH */
382 	case 3:
383 		r2 = MP_DIGIT(b,2);
384 		/* FALLTHROUGH */
385 	case 2:
386 		r1 = MP_DIGIT(b,1);
387 		/* FALLTHROUGH */
388 	case 1:
389 		r0 = MP_DIGIT(b,0);
390 	}
391 
392 #ifndef MPI_AMD64_ADD
393 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
394 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
395 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
396 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
397 #else
398 	__asm__ (
399                 "xorq   %4,%4           \n\t"
400                 "addq   %5,%0           \n\t"
401                 "adcq   %6,%1           \n\t"
402                 "adcq   %7,%2           \n\t"
403                 "adcq   %8,%3           \n\t"
404                 "adcq   $0,%4           \n\t"
405                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
406                 : "r" (a0), "r" (a1), "r" (a2), "r" (a3),
407 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
408                 : "%cc" );
409 #endif
410 
411 	MP_CHECKOK(s_mp_pad(r, 4));
412 	MP_DIGIT(r, 3) = r3;
413 	MP_DIGIT(r, 2) = r2;
414 	MP_DIGIT(r, 1) = r1;
415 	MP_DIGIT(r, 0) = r0;
416 	MP_SIGN(r) = MP_ZPOS;
417 	MP_USED(r) = 4;
418 
419 	/* Do quick 'subract' if we've gone over
420 	 * (add the 2's complement of the curve field) */
421 	 a3 = MP_DIGIT(&meth->irr,3);
422 	if (carry ||  r3 >  a3 ||
423 		((r3 == a3) && mp_cmp(r,&meth->irr) != MP_LT)) {
424 		a2 = MP_DIGIT(&meth->irr,2);
425 		a1 = MP_DIGIT(&meth->irr,1);
426 		a0 = MP_DIGIT(&meth->irr,0);
427 #ifndef MPI_AMD64_ADD
428 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
429 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
430 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
431 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
432 #else
433 		__asm__ (
434 			"subq   %4,%0           \n\t"
435 			"sbbq   %5,%1           \n\t"
436 			"sbbq   %6,%2           \n\t"
437 			"sbbq   %7,%3           \n\t"
438 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
439 			: "r" (a0), "r" (a1), "r" (a2), "r" (a3),
440 			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
441 			: "%cc" );
442 #endif
443 		MP_DIGIT(r, 3) = r3;
444 		MP_DIGIT(r, 2) = r2;
445 		MP_DIGIT(r, 1) = r1;
446 		MP_DIGIT(r, 0) = r0;
447 	}
448 
449 	s_mp_clamp(r);
450 
451   CLEANUP:
452 	return res;
453 }
454 
455 /* 5 words */
456 mp_err
457 ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
458 			const GFMethod *meth)
459 {
460 	mp_err res = MP_OKAY;
461 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
462 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
463 	mp_digit carry;
464 
465 	switch(MP_USED(a)) {
466 	case 5:
467 		a4 = MP_DIGIT(a,4);
468 		/* FALLTHROUGH */
469 	case 4:
470 		a3 = MP_DIGIT(a,3);
471 		/* FALLTHROUGH */
472 	case 3:
473 		a2 = MP_DIGIT(a,2);
474 		/* FALLTHROUGH */
475 	case 2:
476 		a1 = MP_DIGIT(a,1);
477 		/* FALLTHROUGH */
478 	case 1:
479 		a0 = MP_DIGIT(a,0);
480 	}
481 	switch(MP_USED(b)) {
482 	case 5:
483 		r4 = MP_DIGIT(b,4);
484 		/* FALLTHROUGH */
485 	case 4:
486 		r3 = MP_DIGIT(b,3);
487 		/* FALLTHROUGH */
488 	case 3:
489 		r2 = MP_DIGIT(b,2);
490 		/* FALLTHROUGH */
491 	case 2:
492 		r1 = MP_DIGIT(b,1);
493 		/* FALLTHROUGH */
494 	case 1:
495 		r0 = MP_DIGIT(b,0);
496 	}
497 
498 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
499 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
500 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
501 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
502 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
503 
504 	MP_CHECKOK(s_mp_pad(r, 5));
505 	MP_DIGIT(r, 4) = r4;
506 	MP_DIGIT(r, 3) = r3;
507 	MP_DIGIT(r, 2) = r2;
508 	MP_DIGIT(r, 1) = r1;
509 	MP_DIGIT(r, 0) = r0;
510 	MP_SIGN(r) = MP_ZPOS;
511 	MP_USED(r) = 5;
512 
513 	/* Do quick 'subract' if we've gone over
514 	 * (add the 2's complement of the curve field) */
515 	 a4 = MP_DIGIT(&meth->irr,4);
516 	if (carry ||  r4 >  a4 ||
517 		((r4 == a4) && mp_cmp(r,&meth->irr) != MP_LT)) {
518 		a3 = MP_DIGIT(&meth->irr,3);
519 		a2 = MP_DIGIT(&meth->irr,2);
520 		a1 = MP_DIGIT(&meth->irr,1);
521 		a0 = MP_DIGIT(&meth->irr,0);
522 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
523 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
524 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
525 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
526 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
527 		MP_DIGIT(r, 4) = r4;
528 		MP_DIGIT(r, 3) = r3;
529 		MP_DIGIT(r, 2) = r2;
530 		MP_DIGIT(r, 1) = r1;
531 		MP_DIGIT(r, 0) = r0;
532 	}
533 
534 	s_mp_clamp(r);
535 
536   CLEANUP:
537 	return res;
538 }
539 
540 /* 6 words */
541 mp_err
542 ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
543 			const GFMethod *meth)
544 {
545 	mp_err res = MP_OKAY;
546 	mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
547 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
548 	mp_digit carry;
549 
550 	switch(MP_USED(a)) {
551 	case 6:
552 		a5 = MP_DIGIT(a,5);
553 		/* FALLTHROUGH */
554 	case 5:
555 		a4 = MP_DIGIT(a,4);
556 		/* FALLTHROUGH */
557 	case 4:
558 		a3 = MP_DIGIT(a,3);
559 		/* FALLTHROUGH */
560 	case 3:
561 		a2 = MP_DIGIT(a,2);
562 		/* FALLTHROUGH */
563 	case 2:
564 		a1 = MP_DIGIT(a,1);
565 		/* FALLTHROUGH */
566 	case 1:
567 		a0 = MP_DIGIT(a,0);
568 	}
569 	switch(MP_USED(b)) {
570 	case 6:
571 		r5 = MP_DIGIT(b,5);
572 		/* FALLTHROUGH */
573 	case 5:
574 		r4 = MP_DIGIT(b,4);
575 		/* FALLTHROUGH */
576 	case 4:
577 		r3 = MP_DIGIT(b,3);
578 		/* FALLTHROUGH */
579 	case 3:
580 		r2 = MP_DIGIT(b,2);
581 		/* FALLTHROUGH */
582 	case 2:
583 		r1 = MP_DIGIT(b,1);
584 		/* FALLTHROUGH */
585 	case 1:
586 		r0 = MP_DIGIT(b,0);
587 	}
588 
589 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
590 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
591 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
592 	MP_ADD_CARRY(a3, r3, r3, carry, carry);
593 	MP_ADD_CARRY(a4, r4, r4, carry, carry);
594 	MP_ADD_CARRY(a5, r5, r5, carry, carry);
595 
596 	MP_CHECKOK(s_mp_pad(r, 6));
597 	MP_DIGIT(r, 5) = r5;
598 	MP_DIGIT(r, 4) = r4;
599 	MP_DIGIT(r, 3) = r3;
600 	MP_DIGIT(r, 2) = r2;
601 	MP_DIGIT(r, 1) = r1;
602 	MP_DIGIT(r, 0) = r0;
603 	MP_SIGN(r) = MP_ZPOS;
604 	MP_USED(r) = 6;
605 
606 	/* Do quick 'subract' if we've gone over
607 	 * (add the 2's complement of the curve field) */
608 	a5 = MP_DIGIT(&meth->irr,5);
609 	if (carry ||  r5 >  a5 ||
610 		((r5 == a5) && mp_cmp(r,&meth->irr) != MP_LT)) {
611 		a4 = MP_DIGIT(&meth->irr,4);
612 		a3 = MP_DIGIT(&meth->irr,3);
613 		a2 = MP_DIGIT(&meth->irr,2);
614 		a1 = MP_DIGIT(&meth->irr,1);
615 		a0 = MP_DIGIT(&meth->irr,0);
616 		MP_SUB_BORROW(r0, a0, r0, 0,     carry);
617 		MP_SUB_BORROW(r1, a1, r1, carry, carry);
618 		MP_SUB_BORROW(r2, a2, r2, carry, carry);
619 		MP_SUB_BORROW(r3, a3, r3, carry, carry);
620 		MP_SUB_BORROW(r4, a4, r4, carry, carry);
621 		MP_SUB_BORROW(r5, a5, r5, carry, carry);
622 		MP_DIGIT(r, 5) = r5;
623 		MP_DIGIT(r, 4) = r4;
624 		MP_DIGIT(r, 3) = r3;
625 		MP_DIGIT(r, 2) = r2;
626 		MP_DIGIT(r, 1) = r1;
627 		MP_DIGIT(r, 0) = r0;
628 	}
629 
630 	s_mp_clamp(r);
631 
632   CLEANUP:
633 	return res;
634 }
635 
636 /*
637  * The following subraction functions do in-line subractions based
638  * on our curve size.
639  *
640  * ... 3 words
641  */
642 mp_err
643 ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
644 			const GFMethod *meth)
645 {
646 	mp_err res = MP_OKAY;
647 	mp_digit b0 = 0, b1 = 0, b2 = 0;
648 	mp_digit r0 = 0, r1 = 0, r2 = 0;
649 	mp_digit borrow;
650 
651 	switch(MP_USED(a)) {
652 	case 3:
653 		r2 = MP_DIGIT(a,2);
654 		/* FALLTHROUGH */
655 	case 2:
656 		r1 = MP_DIGIT(a,1);
657 		/* FALLTHROUGH */
658 	case 1:
659 		r0 = MP_DIGIT(a,0);
660 	}
661 	switch(MP_USED(b)) {
662 	case 3:
663 		b2 = MP_DIGIT(b,2);
664 		/* FALLTHROUGH */
665 	case 2:
666 		b1 = MP_DIGIT(b,1);
667 		/* FALLTHROUGH */
668 	case 1:
669 		b0 = MP_DIGIT(b,0);
670 	}
671 
672 #ifndef MPI_AMD64_ADD
673 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
674 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
675 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
676 #else
677 	__asm__ (
678                 "xorq   %3,%3           \n\t"
679                 "subq   %4,%0           \n\t"
680                 "sbbq   %5,%1           \n\t"
681                 "sbbq   %6,%2           \n\t"
682                 "adcq   $0,%3           \n\t"
683                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r" (borrow)
684                 : "r" (b0), "r" (b1), "r" (b2),
685 		  "0" (r0), "1" (r1), "2" (r2)
686                 : "%cc" );
687 #endif
688 
689 	/* Do quick 'add' if we've gone under 0
690 	 * (subtract the 2's complement of the curve field) */
691 	if (borrow) {
692 	 	b2 = MP_DIGIT(&meth->irr,2);
693 		b1 = MP_DIGIT(&meth->irr,1);
694 		b0 = MP_DIGIT(&meth->irr,0);
695 #ifndef MPI_AMD64_ADD
696 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
697 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
698 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
699 #else
700 		__asm__ (
701 			"addq   %3,%0           \n\t"
702 			"adcq   %4,%1           \n\t"
703 			"adcq   %5,%2           \n\t"
704 			: "=r"(r0), "=r"(r1), "=r"(r2)
705 			: "r" (b0), "r" (b1), "r" (b2),
706   			  "0" (r0), "1" (r1), "2" (r2)
707 			: "%cc" );
708 #endif
709 	}
710 
711 #ifdef MPI_AMD64_ADD
712 	/* compiler fakeout? */
713 	if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
714 		MP_CHECKOK(s_mp_pad(r, 4));
715 	}
716 #endif
717 	MP_CHECKOK(s_mp_pad(r, 3));
718 	MP_DIGIT(r, 2) = r2;
719 	MP_DIGIT(r, 1) = r1;
720 	MP_DIGIT(r, 0) = r0;
721 	MP_SIGN(r) = MP_ZPOS;
722 	MP_USED(r) = 3;
723 	s_mp_clamp(r);
724 
725   CLEANUP:
726 	return res;
727 }
728 
729 /* 4 words */
730 mp_err
731 ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
732 			const GFMethod *meth)
733 {
734 	mp_err res = MP_OKAY;
735 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
736 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
737 	mp_digit borrow;
738 
739 	switch(MP_USED(a)) {
740 	case 4:
741 		r3 = MP_DIGIT(a,3);
742 		/* FALLTHROUGH */
743 	case 3:
744 		r2 = MP_DIGIT(a,2);
745 		/* FALLTHROUGH */
746 	case 2:
747 		r1 = MP_DIGIT(a,1);
748 		/* FALLTHROUGH */
749 	case 1:
750 		r0 = MP_DIGIT(a,0);
751 	}
752 	switch(MP_USED(b)) {
753 	case 4:
754 		b3 = MP_DIGIT(b,3);
755 		/* FALLTHROUGH */
756 	case 3:
757 		b2 = MP_DIGIT(b,2);
758 		/* FALLTHROUGH */
759 	case 2:
760 		b1 = MP_DIGIT(b,1);
761 		/* FALLTHROUGH */
762 	case 1:
763 		b0 = MP_DIGIT(b,0);
764 	}
765 
766 #ifndef MPI_AMD64_ADD
767 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
768 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
769 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
770 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
771 #else
772 	__asm__ (
773                 "xorq   %4,%4           \n\t"
774                 "subq   %5,%0           \n\t"
775                 "sbbq   %6,%1           \n\t"
776                 "sbbq   %7,%2           \n\t"
777                 "sbbq   %8,%3           \n\t"
778                 "adcq   $0,%4           \n\t"
779                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r" (borrow)
780                 : "r" (b0), "r" (b1), "r" (b2), "r" (b3),
781 		  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
782                 : "%cc" );
783 #endif
784 
785 	/* Do quick 'add' if we've gone under 0
786 	 * (subtract the 2's complement of the curve field) */
787 	if (borrow) {
788 	 	b3 = MP_DIGIT(&meth->irr,3);
789 	 	b2 = MP_DIGIT(&meth->irr,2);
790 		b1 = MP_DIGIT(&meth->irr,1);
791 		b0 = MP_DIGIT(&meth->irr,0);
792 #ifndef MPI_AMD64_ADD
793 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
794 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
795 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
796 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
797 #else
798 		__asm__ (
799 			"addq   %4,%0           \n\t"
800 			"adcq   %5,%1           \n\t"
801 			"adcq   %6,%2           \n\t"
802 			"adcq   %7,%3           \n\t"
803 			: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
804 			: "r" (b0), "r" (b1), "r" (b2), "r" (b3),
805   			  "0" (r0), "1" (r1), "2" (r2), "3" (r3)
806 			: "%cc" );
807 #endif
808 	}
809 #ifdef MPI_AMD64_ADD
810 	/* compiler fakeout? */
811 	if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
812 		MP_CHECKOK(s_mp_pad(r, 4));
813 	}
814 #endif
815 	MP_CHECKOK(s_mp_pad(r, 4));
816 	MP_DIGIT(r, 3) = r3;
817 	MP_DIGIT(r, 2) = r2;
818 	MP_DIGIT(r, 1) = r1;
819 	MP_DIGIT(r, 0) = r0;
820 	MP_SIGN(r) = MP_ZPOS;
821 	MP_USED(r) = 4;
822 	s_mp_clamp(r);
823 
824   CLEANUP:
825 	return res;
826 }
827 
828 /* 5 words */
829 mp_err
830 ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
831 			const GFMethod *meth)
832 {
833 	mp_err res = MP_OKAY;
834 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
835 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
836 	mp_digit borrow;
837 
838 	switch(MP_USED(a)) {
839 	case 5:
840 		r4 = MP_DIGIT(a,4);
841 		/* FALLTHROUGH */
842 	case 4:
843 		r3 = MP_DIGIT(a,3);
844 		/* FALLTHROUGH */
845 	case 3:
846 		r2 = MP_DIGIT(a,2);
847 		/* FALLTHROUGH */
848 	case 2:
849 		r1 = MP_DIGIT(a,1);
850 		/* FALLTHROUGH */
851 	case 1:
852 		r0 = MP_DIGIT(a,0);
853 	}
854 	switch(MP_USED(b)) {
855 	case 5:
856 		b4 = MP_DIGIT(b,4);
857 		/* FALLTHROUGH */
858 	case 4:
859 		b3 = MP_DIGIT(b,3);
860 		/* FALLTHROUGH */
861 	case 3:
862 		b2 = MP_DIGIT(b,2);
863 		/* FALLTHROUGH */
864 	case 2:
865 		b1 = MP_DIGIT(b,1);
866 		/* FALLTHROUGH */
867 	case 1:
868 		b0 = MP_DIGIT(b,0);
869 	}
870 
871 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
872 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
873 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
874 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
875 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
876 
877 	/* Do quick 'add' if we've gone under 0
878 	 * (subtract the 2's complement of the curve field) */
879 	if (borrow) {
880 	 	b4 = MP_DIGIT(&meth->irr,4);
881 	 	b3 = MP_DIGIT(&meth->irr,3);
882 	 	b2 = MP_DIGIT(&meth->irr,2);
883 		b1 = MP_DIGIT(&meth->irr,1);
884 		b0 = MP_DIGIT(&meth->irr,0);
885 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
886 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
887 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
888 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
889 	}
890 	MP_CHECKOK(s_mp_pad(r, 5));
891 	MP_DIGIT(r, 4) = r4;
892 	MP_DIGIT(r, 3) = r3;
893 	MP_DIGIT(r, 2) = r2;
894 	MP_DIGIT(r, 1) = r1;
895 	MP_DIGIT(r, 0) = r0;
896 	MP_SIGN(r) = MP_ZPOS;
897 	MP_USED(r) = 5;
898 	s_mp_clamp(r);
899 
900   CLEANUP:
901 	return res;
902 }
903 
904 /* 6 words */
905 mp_err
906 ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
907 			const GFMethod *meth)
908 {
909 	mp_err res = MP_OKAY;
910 	mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
911 	mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
912 	mp_digit borrow;
913 
914 	switch(MP_USED(a)) {
915 	case 6:
916 		r5 = MP_DIGIT(a,5);
917 		/* FALLTHROUGH */
918 	case 5:
919 		r4 = MP_DIGIT(a,4);
920 		/* FALLTHROUGH */
921 	case 4:
922 		r3 = MP_DIGIT(a,3);
923 		/* FALLTHROUGH */
924 	case 3:
925 		r2 = MP_DIGIT(a,2);
926 		/* FALLTHROUGH */
927 	case 2:
928 		r1 = MP_DIGIT(a,1);
929 		/* FALLTHROUGH */
930 	case 1:
931 		r0 = MP_DIGIT(a,0);
932 	}
933 	switch(MP_USED(b)) {
934 	case 6:
935 		b5 = MP_DIGIT(b,5);
936 		/* FALLTHROUGH */
937 	case 5:
938 		b4 = MP_DIGIT(b,4);
939 		/* FALLTHROUGH */
940 	case 4:
941 		b3 = MP_DIGIT(b,3);
942 		/* FALLTHROUGH */
943 	case 3:
944 		b2 = MP_DIGIT(b,2);
945 		/* FALLTHROUGH */
946 	case 2:
947 		b1 = MP_DIGIT(b,1);
948 		/* FALLTHROUGH */
949 	case 1:
950 		b0 = MP_DIGIT(b,0);
951 	}
952 
953 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
954 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
955 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
956 	MP_SUB_BORROW(r3, b3, r3, borrow, borrow);
957 	MP_SUB_BORROW(r4, b4, r4, borrow, borrow);
958 	MP_SUB_BORROW(r5, b5, r5, borrow, borrow);
959 
960 	/* Do quick 'add' if we've gone under 0
961 	 * (subtract the 2's complement of the curve field) */
962 	if (borrow) {
963 	 	b5 = MP_DIGIT(&meth->irr,5);
964 	 	b4 = MP_DIGIT(&meth->irr,4);
965 	 	b3 = MP_DIGIT(&meth->irr,3);
966 	 	b2 = MP_DIGIT(&meth->irr,2);
967 		b1 = MP_DIGIT(&meth->irr,1);
968 		b0 = MP_DIGIT(&meth->irr,0);
969 		MP_ADD_CARRY(b0, r0, r0, 0,      borrow);
970 		MP_ADD_CARRY(b1, r1, r1, borrow, borrow);
971 		MP_ADD_CARRY(b2, r2, r2, borrow, borrow);
972 		MP_ADD_CARRY(b3, r3, r3, borrow, borrow);
973 		MP_ADD_CARRY(b4, r4, r4, borrow, borrow);
974 	}
975 
976 	MP_CHECKOK(s_mp_pad(r, 6));
977 	MP_DIGIT(r, 5) = r5;
978 	MP_DIGIT(r, 4) = r4;
979 	MP_DIGIT(r, 3) = r3;
980 	MP_DIGIT(r, 2) = r2;
981 	MP_DIGIT(r, 1) = r1;
982 	MP_DIGIT(r, 0) = r0;
983 	MP_SIGN(r) = MP_ZPOS;
984 	MP_USED(r) = 6;
985 	s_mp_clamp(r);
986 
987   CLEANUP:
988 	return res;
989 }
990 
991 
992 /* Reduces an integer to a field element. */
993 mp_err
994 ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
995 {
996 	return mp_mod(a, &meth->irr, r);
997 }
998 
999 /* Multiplies two field elements. */
1000 mp_err
1001 ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
1002 		   const GFMethod *meth)
1003 {
1004 	return mp_mulmod(a, b, &meth->irr, r);
1005 }
1006 
1007 /* Squares a field element. */
1008 mp_err
1009 ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1010 {
1011 	return mp_sqrmod(a, &meth->irr, r);
1012 }
1013 
1014 /* Divides two field elements. If a is NULL, then returns the inverse of
1015  * b. */
1016 mp_err
1017 ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
1018 		   const GFMethod *meth)
1019 {
1020 	mp_err res = MP_OKAY;
1021 	mp_int t;
1022 
1023 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1024 	if (a == NULL) {
1025 		return mp_invmod(b, &meth->irr, r);
1026 	} else {
1027 		/* MPI doesn't support divmod, so we implement it using invmod and
1028 		 * mulmod. */
1029 		MP_CHECKOK(mp_init(&t, FLAG(b)));
1030 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
1031 		MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
1032 	  CLEANUP:
1033 		mp_clear(&t);
1034 		return res;
1035 	}
1036 }
1037 
1038 /* Wrapper functions for generic binary polynomial field arithmetic. */
1039 
1040 /* Adds two field elements. */
1041 mp_err
1042 ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
1043 			const GFMethod *meth)
1044 {
1045 	return mp_badd(a, b, r);
1046 }
1047 
1048 /* Negates a field element. Note that for binary polynomial fields, the
1049  * negation of a field element is the field element itself. */
1050 mp_err
1051 ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
1052 {
1053 	if (a == r) {
1054 		return MP_OKAY;
1055 	} else {
1056 		return mp_copy(a, r);
1057 	}
1058 }
1059 
1060 /* Reduces a binary polynomial to a field element. */
1061 mp_err
1062 ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
1063 {
1064 	return mp_bmod(a, meth->irr_arr, r);
1065 }
1066 
1067 /* Multiplies two field elements. */
1068 mp_err
1069 ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
1070 			const GFMethod *meth)
1071 {
1072 	return mp_bmulmod(a, b, meth->irr_arr, r);
1073 }
1074 
1075 /* Squares a field element. */
1076 mp_err
1077 ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
1078 {
1079 	return mp_bsqrmod(a, meth->irr_arr, r);
1080 }
1081 
1082 /* Divides two field elements. If a is NULL, then returns the inverse of
1083  * b. */
1084 mp_err
1085 ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
1086 			const GFMethod *meth)
1087 {
1088 	mp_err res = MP_OKAY;
1089 	mp_int t;
1090 
1091 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
1092 	if (a == NULL) {
1093 		/* The GF(2^m) portion of MPI doesn't support invmod, so we
1094 		 * compute 1/b. */
1095 		MP_CHECKOK(mp_init(&t, FLAG(b)));
1096 		MP_CHECKOK(mp_set_int(&t, 1));
1097 		MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
1098 	  CLEANUP:
1099 		mp_clear(&t);
1100 		return res;
1101 	} else {
1102 		return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
1103 	}
1104 }
1105