xref: /illumos-gate/usr/src/common/crypto/ecc/ecp_192.c (revision e121b61f5e8ffbeb2f6b373c967c80351333ee21)
1 /*
2  * ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  * http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * The Original Code is the elliptic curve math library for prime field curves.
16  *
17  * The Initial Developer of the Original Code is
18  * Sun Microsystems, Inc.
19  * Portions created by the Initial Developer are Copyright (C) 2003
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
24  *
25  * Alternatively, the contents of this file may be used under the terms of
26  * either the GNU General Public License Version 2 or later (the "GPL"), or
27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28  * in which case the provisions of the GPL or the LGPL are applicable instead
29  * of those above. If you wish to allow use of your version of this file only
30  * under the terms of either the GPL or the LGPL, and not to allow others to
31  * use your version of this file under the terms of the MPL, indicate your
32  * decision by deleting the provisions above and replace them with the notice
33  * and other provisions required by the GPL or the LGPL. If you do not delete
34  * the provisions above, a recipient may use your version of this file under
35  * the terms of any one of the MPL, the GPL or the LGPL.
36  *
37  * ***** END LICENSE BLOCK ***** */
38 /*
39  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
40  * Use is subject to license terms.
41  *
42  * Sun elects to use this software under the MPL license.
43  */
44 
45 #include "ecp.h"
46 #include "mpi.h"
47 #include "mplogic.h"
48 #include "mpi-priv.h"
49 #ifndef _KERNEL
50 #include <stdlib.h>
51 #endif
52 
53 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
54 
55 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
56  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
57  * Implementation of the NIST Elliptic Curves over Prime Fields. */
58 mp_err
59 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
60 {
61 	mp_err res = MP_OKAY;
62 	mp_size a_used = MP_USED(a);
63 	mp_digit r3;
64 #ifndef MPI_AMD64_ADD
65 	mp_digit carry;
66 #endif
67 #ifdef ECL_THIRTY_TWO_BIT
68 	mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
69         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
70 #else
71 	mp_digit a5 = 0, a4 = 0, a3 = 0;
72         mp_digit r0, r1, r2;
73 #endif
74 
75 	/* reduction not needed if a is not larger than field size */
76 	if (a_used < ECP192_DIGITS) {
77 		if (a == r) {
78 			return MP_OKAY;
79 		}
80 		return mp_copy(a, r);
81 	}
82 
83 	/* for polynomials larger than twice the field size, use regular
84 	 * reduction */
85 	if (a_used > ECP192_DIGITS*2) {
86 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
87 	} else {
88 		/* copy out upper words of a */
89 
90 #ifdef ECL_THIRTY_TWO_BIT
91 
92 		/* in all the math below,
93 		 * nXb is most signifiant, nXa is least significant */
94 		switch (a_used) {
95 		case 12:
96 			a5b = MP_DIGIT(a, 11);
97 			/* FALLTHROUGH */
98 		case 11:
99 			a5a = MP_DIGIT(a, 10);
100 			/* FALLTHROUGH */
101 		case 10:
102 			a4b = MP_DIGIT(a, 9);
103 			/* FALLTHROUGH */
104 		case 9:
105 			a4a = MP_DIGIT(a, 8);
106 			/* FALLTHROUGH */
107 		case 8:
108 			a3b = MP_DIGIT(a, 7);
109 			/* FALLTHROUGH */
110 		case 7:
111 			a3a = MP_DIGIT(a, 6);
112 		}
113 
114 
115                 r2b= MP_DIGIT(a, 5);
116                 r2a= MP_DIGIT(a, 4);
117                 r1b = MP_DIGIT(a, 3);
118                 r1a = MP_DIGIT(a, 2);
119                 r0b = MP_DIGIT(a, 1);
120                 r0a = MP_DIGIT(a, 0);
121 
122 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
123 		MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
124 		MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
125 		MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
126 		MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
127 		MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
128 		MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
129 		r3 = carry; carry = 0;
130 		MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
131 		MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
132 		MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
133 		MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
134 		MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
135 		MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
136 		r3 += carry;
137 		MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
138 		MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
139 		MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
140 		MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
141 		r3 += carry;
142 
143 		/* reduce out the carry */
144 		while (r3) {
145 			MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
146 			MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
147 			MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
148 			MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
149 			MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
150 			MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
151 			r3 = carry;
152 		}
153 
154 		/* check for final reduction */
155 		/*
156 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
157 		 * 0xffffffffffffffff. That means we can only be over and need
158 		 * one more reduction
159 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
160 		 *     and
161 		 *     r1 == 0xffffffffffffffffff   or
162 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
163 		 * In all cases, we subtract the field (or add the 2's
164 		 * complement value (1,1,0)).  (r0, r1, r2)
165 		 */
166 		if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
167 			&& (r1b == 0xffffffff) ) &&
168 			   ((r1a == 0xffffffff) ||
169 			    (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
170 					(r0b == 0xffffffff)) ) {
171 			/* do a quick subtract */
172 			MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
173 			r0b += carry;
174 			r1a = r1b = r2a = r2b = 0;
175 		}
176 
177 		/* set the lower words of r */
178 		if (a != r) {
179 			MP_CHECKOK(s_mp_pad(r, 6));
180 		}
181 		MP_DIGIT(r, 5) = r2b;
182 		MP_DIGIT(r, 4) = r2a;
183 		MP_DIGIT(r, 3) = r1b;
184 		MP_DIGIT(r, 2) = r1a;
185 		MP_DIGIT(r, 1) = r0b;
186 		MP_DIGIT(r, 0) = r0a;
187 		MP_USED(r) = 6;
188 #else
189 		switch (a_used) {
190 		case 6:
191 			a5 = MP_DIGIT(a, 5);
192 			/* FALLTHROUGH */
193 		case 5:
194 			a4 = MP_DIGIT(a, 4);
195 			/* FALLTHROUGH */
196 		case 4:
197 			a3 = MP_DIGIT(a, 3);
198 		}
199 
200                 r2 = MP_DIGIT(a, 2);
201                 r1 = MP_DIGIT(a, 1);
202                 r0 = MP_DIGIT(a, 0);
203 
204 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
205 #ifndef MPI_AMD64_ADD
206 		MP_ADD_CARRY(r0, a3, r0, 0,     carry);
207 		MP_ADD_CARRY(r1, a3, r1, carry, carry);
208 		MP_ADD_CARRY(r2, a4, r2, carry, carry);
209 		r3 = carry;
210 		MP_ADD_CARRY(r0, a5, r0, 0,     carry);
211 		MP_ADD_CARRY(r1, a5, r1, carry, carry);
212 		MP_ADD_CARRY(r2, a5, r2, carry, carry);
213 		r3 += carry;
214 		MP_ADD_CARRY(r1, a4, r1, 0,     carry);
215 		MP_ADD_CARRY(r2,  0, r2, carry, carry);
216 		r3 += carry;
217 
218 #else
219                 r2 = MP_DIGIT(a, 2);
220                 r1 = MP_DIGIT(a, 1);
221                 r0 = MP_DIGIT(a, 0);
222 
223                 /* set the lower words of r */
224                 __asm__ (
225                 "xorq   %3,%3           \n\t"
226                 "addq   %4,%0           \n\t"
227                 "adcq   %4,%1           \n\t"
228                 "adcq   %5,%2           \n\t"
229                 "adcq   $0,%3           \n\t"
230                 "addq   %6,%0           \n\t"
231                 "adcq   %6,%1           \n\t"
232                 "adcq   %6,%2           \n\t"
233                 "adcq   $0,%3           \n\t"
234                 "addq   %5,%1           \n\t"
235                 "adcq   $0,%2           \n\t"
236                 "adcq   $0,%3           \n\t"
237                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
238 		  "=r"(a4), "=r"(a5)
239                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
240 		  "4" (a3), "5" (a4), "6"(a5)
241                 : "%cc" );
242 #endif
243 
244 		/* reduce out the carry */
245 		while (r3) {
246 #ifndef MPI_AMD64_ADD
247 			MP_ADD_CARRY(r0, r3, r0, 0,     carry);
248 			MP_ADD_CARRY(r1, r3, r1, carry, carry);
249 			MP_ADD_CARRY(r2,  0, r2, carry, carry);
250 			r3 = carry;
251 #else
252 			a3=r3;
253               		__asm__ (
254                 	"xorq   %3,%3           \n\t"
255                 	"addq   %4,%0           \n\t"
256                 	"adcq   %4,%1           \n\t"
257                 	"adcq   $0,%2           \n\t"
258                 	"adcq   $0,%3           \n\t"
259                 	: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
260                 	: "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
261                 	: "%cc" );
262 #endif
263 		}
264 
265 		/* check for final reduction */
266 		/*
267 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
268 		 * 0xffffffffffffffff. That means we can only be over and need
269 		 * one more reduction
270 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
271 		 *     and
272 		 *     r1 == 0xffffffffffffffffff   or
273 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
274 		 * In all cases, we subtract the field (or add the 2's
275 		 * complement value (1,1,0)).  (r0, r1, r2)
276 		 */
277 		if (r3 || ((r2 == MP_DIGIT_MAX) &&
278 		      ((r1 == MP_DIGIT_MAX) ||
279 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
280 			/* do a quick subtract */
281 			r0++;
282 			r1 = r2 = 0;
283 		}
284 		/* set the lower words of r */
285 		if (a != r) {
286 			MP_CHECKOK(s_mp_pad(r, 3));
287 		}
288 		MP_DIGIT(r, 2) = r2;
289 		MP_DIGIT(r, 1) = r1;
290 		MP_DIGIT(r, 0) = r0;
291 		MP_USED(r) = 3;
292 #endif
293 	}
294 
295   CLEANUP:
296 	return res;
297 }
298 
299 #ifndef ECL_THIRTY_TWO_BIT
300 /* Compute the sum of 192 bit curves. Do the work in-line since the
301  * number of words are so small, we don't want to overhead of mp function
302  * calls.  Uses optimized modular reduction for p192.
303  */
304 mp_err
305 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
306 			const GFMethod *meth)
307 {
308 	mp_err res = MP_OKAY;
309 	mp_digit a0 = 0, a1 = 0, a2 = 0;
310 	mp_digit r0 = 0, r1 = 0, r2 = 0;
311 	mp_digit carry;
312 
313 	switch(MP_USED(a)) {
314 	case 3:
315 		a2 = MP_DIGIT(a,2);
316 		/* FALLTHROUGH */
317 	case 2:
318 		a1 = MP_DIGIT(a,1);
319 		/* FALLTHROUGH */
320 	case 1:
321 		a0 = MP_DIGIT(a,0);
322 	}
323 	switch(MP_USED(b)) {
324 	case 3:
325 		r2 = MP_DIGIT(b,2);
326 		/* FALLTHROUGH */
327 	case 2:
328 		r1 = MP_DIGIT(b,1);
329 		/* FALLTHROUGH */
330 	case 1:
331 		r0 = MP_DIGIT(b,0);
332 	}
333 
334 #ifndef MPI_AMD64_ADD
335 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
336 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
337 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
338 #else
339 	__asm__ (
340                 "xorq   %3,%3           \n\t"
341                 "addq   %4,%0           \n\t"
342                 "adcq   %5,%1           \n\t"
343                 "adcq   %6,%2           \n\t"
344                 "adcq   $0,%3           \n\t"
345                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
346                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
347 		  "1" (r1), "2" (r2)
348                 : "%cc" );
349 #endif
350 
351 	/* Do quick 'subract' if we've gone over
352 	 * (add the 2's complement of the curve field) */
353 	if (carry || ((r2 == MP_DIGIT_MAX) &&
354 		      ((r1 == MP_DIGIT_MAX) ||
355 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
356 #ifndef MPI_AMD64_ADD
357 		MP_ADD_CARRY(r0, 1, r0, 0,     carry);
358 		MP_ADD_CARRY(r1, 1, r1, carry, carry);
359 		MP_ADD_CARRY(r2, 0, r2, carry, carry);
360 #else
361 		__asm__ (
362 			"addq   $1,%0           \n\t"
363 			"adcq   $1,%1           \n\t"
364 			"adcq   $0,%2           \n\t"
365 			: "=r"(r0), "=r"(r1), "=r"(r2)
366 			: "0" (r0), "1" (r1), "2" (r2)
367 			: "%cc" );
368 #endif
369 	}
370 
371 
372 	MP_CHECKOK(s_mp_pad(r, 3));
373 	MP_DIGIT(r, 2) = r2;
374 	MP_DIGIT(r, 1) = r1;
375 	MP_DIGIT(r, 0) = r0;
376 	MP_SIGN(r) = MP_ZPOS;
377 	MP_USED(r) = 3;
378 	s_mp_clamp(r);
379 
380 
381   CLEANUP:
382 	return res;
383 }
384 
385 /* Compute the diff of 192 bit curves. Do the work in-line since the
386  * number of words are so small, we don't want to overhead of mp function
387  * calls.  Uses optimized modular reduction for p192.
388  */
389 mp_err
390 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
391 			const GFMethod *meth)
392 {
393 	mp_err res = MP_OKAY;
394 	mp_digit b0 = 0, b1 = 0, b2 = 0;
395 	mp_digit r0 = 0, r1 = 0, r2 = 0;
396 	mp_digit borrow;
397 
398 	switch(MP_USED(a)) {
399 	case 3:
400 		r2 = MP_DIGIT(a,2);
401 		/* FALLTHROUGH */
402 	case 2:
403 		r1 = MP_DIGIT(a,1);
404 		/* FALLTHROUGH */
405 	case 1:
406 		r0 = MP_DIGIT(a,0);
407 	}
408 
409 	switch(MP_USED(b)) {
410 	case 3:
411 		b2 = MP_DIGIT(b,2);
412 		/* FALLTHROUGH */
413 	case 2:
414 		b1 = MP_DIGIT(b,1);
415 		/* FALLTHROUGH */
416 	case 1:
417 		b0 = MP_DIGIT(b,0);
418 	}
419 
420 #ifndef MPI_AMD64_ADD
421 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
422 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
423 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
424 #else
425 	__asm__ (
426                 "xorq   %3,%3           \n\t"
427                 "subq   %4,%0           \n\t"
428                 "sbbq   %5,%1           \n\t"
429                 "sbbq   %6,%2           \n\t"
430                 "adcq   $0,%3           \n\t"
431                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
432                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
433 		  "1" (r1), "2" (r2)
434                 : "%cc" );
435 #endif
436 
437 	/* Do quick 'add' if we've gone under 0
438 	 * (subtract the 2's complement of the curve field) */
439 	if (borrow) {
440 #ifndef MPI_AMD64_ADD
441 		MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
442 		MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
443 		MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
444 #else
445 		__asm__ (
446 			"subq   $1,%0           \n\t"
447 			"sbbq   $1,%1           \n\t"
448 			"sbbq   $0,%2           \n\t"
449 			: "=r"(r0), "=r"(r1), "=r"(r2)
450 			: "0" (r0), "1" (r1), "2" (r2)
451 			: "%cc" );
452 #endif
453 	}
454 
455 	MP_CHECKOK(s_mp_pad(r, 3));
456 	MP_DIGIT(r, 2) = r2;
457 	MP_DIGIT(r, 1) = r1;
458 	MP_DIGIT(r, 0) = r0;
459 	MP_SIGN(r) = MP_ZPOS;
460 	MP_USED(r) = 3;
461 	s_mp_clamp(r);
462 
463   CLEANUP:
464 	return res;
465 }
466 
467 #endif
468 
469 /* Compute the square of polynomial a, reduce modulo p192. Store the
470  * result in r.  r could be a.  Uses optimized modular reduction for p192.
471  */
472 mp_err
473 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
474 {
475 	mp_err res = MP_OKAY;
476 
477 	MP_CHECKOK(mp_sqr(a, r));
478 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
479   CLEANUP:
480 	return res;
481 }
482 
483 /* Compute the product of two polynomials a and b, reduce modulo p192.
484  * Store the result in r.  r could be a or b; a could be b.  Uses
485  * optimized modular reduction for p192. */
486 mp_err
487 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
488 					const GFMethod *meth)
489 {
490 	mp_err res = MP_OKAY;
491 
492 	MP_CHECKOK(mp_mul(a, b, r));
493 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
494   CLEANUP:
495 	return res;
496 }
497 
498 /* Divides two field elements. If a is NULL, then returns the inverse of
499  * b. */
500 mp_err
501 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
502 		   const GFMethod *meth)
503 {
504 	mp_err res = MP_OKAY;
505 	mp_int t;
506 
507 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
508 	if (a == NULL) {
509 		return  mp_invmod(b, &meth->irr, r);
510 	} else {
511 		/* MPI doesn't support divmod, so we implement it using invmod and
512 		 * mulmod. */
513 		MP_CHECKOK(mp_init(&t, FLAG(b)));
514 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
515 		MP_CHECKOK(mp_mul(a, &t, r));
516 		MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
517 	  CLEANUP:
518 		mp_clear(&t);
519 		return res;
520 	}
521 }
522 
523 /* Wire in fast field arithmetic and precomputation of base point for
524  * named curves. */
525 mp_err
526 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
527 {
528 	if (name == ECCurve_NIST_P192) {
529 		group->meth->field_mod = &ec_GFp_nistp192_mod;
530 		group->meth->field_mul = &ec_GFp_nistp192_mul;
531 		group->meth->field_sqr = &ec_GFp_nistp192_sqr;
532 		group->meth->field_div = &ec_GFp_nistp192_div;
533 #ifndef ECL_THIRTY_TWO_BIT
534 		group->meth->field_add = &ec_GFp_nistp192_add;
535 		group->meth->field_sub = &ec_GFp_nistp192_sub;
536 #endif
537 	}
538 	return MP_OKAY;
539 }
540