xref: /titanic_41/usr/src/common/crypto/ecc/ecp_192.c (revision b54157c1b1bf9673e4da8b526477d59202cd08a6)
1 /*
2  * ***** BEGIN LICENSE BLOCK *****
3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4  *
5  * The contents of this file are subject to the Mozilla Public License Version
6  * 1.1 (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  * http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the
13  * License.
14  *
15  * The Original Code is the elliptic curve math library for prime field curves.
16  *
17  * The Initial Developer of the Original Code is
18  * Sun Microsystems, Inc.
19  * Portions created by the Initial Developer are Copyright (C) 2003
20  * the Initial Developer. All Rights Reserved.
21  *
22  * Contributor(s):
23  *   Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories
24  *
25  * Alternatively, the contents of this file may be used under the terms of
26  * either the GNU General Public License Version 2 or later (the "GPL"), or
27  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28  * in which case the provisions of the GPL or the LGPL are applicable instead
29  * of those above. If you wish to allow use of your version of this file only
30  * under the terms of either the GPL or the LGPL, and not to allow others to
31  * use your version of this file under the terms of the MPL, indicate your
32  * decision by deleting the provisions above and replace them with the notice
33  * and other provisions required by the GPL or the LGPL. If you do not delete
34  * the provisions above, a recipient may use your version of this file under
35  * the terms of any one of the MPL, the GPL or the LGPL.
36  *
37  * ***** END LICENSE BLOCK ***** */
38 /*
39  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
40  * Use is subject to license terms.
41  *
42  * Sun elects to use this software under the MPL license.
43  */
44 
45 #pragma ident	"%Z%%M%	%I%	%E% SMI"
46 
47 #include "ecp.h"
48 #include "mpi.h"
49 #include "mplogic.h"
50 #include "mpi-priv.h"
51 #ifndef _KERNEL
52 #include <stdlib.h>
53 #endif
54 
55 #define ECP192_DIGITS ECL_CURVE_DIGITS(192)
56 
57 /* Fast modular reduction for p192 = 2^192 - 2^64 - 1.  a can be r. Uses
58  * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software
59  * Implementation of the NIST Elliptic Curves over Prime Fields. */
60 mp_err
61 ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
62 {
63 	mp_err res = MP_OKAY;
64 	mp_size a_used = MP_USED(a);
65 	mp_digit r3;
66 #ifndef MPI_AMD64_ADD
67 	mp_digit carry;
68 #endif
69 #ifdef ECL_THIRTY_TWO_BIT
70 	mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0;
71         mp_digit r0a, r0b, r1a, r1b, r2a, r2b;
72 #else
73 	mp_digit a5 = 0, a4 = 0, a3 = 0;
74         mp_digit r0, r1, r2;
75 #endif
76 
77 	/* reduction not needed if a is not larger than field size */
78 	if (a_used < ECP192_DIGITS) {
79 		if (a == r) {
80 			return MP_OKAY;
81 		}
82 		return mp_copy(a, r);
83 	}
84 
85 	/* for polynomials larger than twice the field size, use regular
86 	 * reduction */
87 	if (a_used > ECP192_DIGITS*2) {
88 		MP_CHECKOK(mp_mod(a, &meth->irr, r));
89 	} else {
90 		/* copy out upper words of a */
91 
92 #ifdef ECL_THIRTY_TWO_BIT
93 
94 		/* in all the math below,
95 		 * nXb is most signifiant, nXa is least significant */
96 		switch (a_used) {
97 		case 12:
98 			a5b = MP_DIGIT(a, 11);
99 		case 11:
100 			a5a = MP_DIGIT(a, 10);
101 		case 10:
102 			a4b = MP_DIGIT(a, 9);
103 		case 9:
104 			a4a = MP_DIGIT(a, 8);
105 		case 8:
106 			a3b = MP_DIGIT(a, 7);
107 		case 7:
108 			a3a = MP_DIGIT(a, 6);
109 		}
110 
111 
112                 r2b= MP_DIGIT(a, 5);
113                 r2a= MP_DIGIT(a, 4);
114                 r1b = MP_DIGIT(a, 3);
115                 r1a = MP_DIGIT(a, 2);
116                 r0b = MP_DIGIT(a, 1);
117                 r0a = MP_DIGIT(a, 0);
118 
119 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
120 		MP_ADD_CARRY(r0a, a3a, r0a, 0,    carry);
121 		MP_ADD_CARRY(r0b, a3b, r0b, carry, carry);
122 		MP_ADD_CARRY(r1a, a3a, r1a, carry, carry);
123 		MP_ADD_CARRY(r1b, a3b, r1b, carry, carry);
124 		MP_ADD_CARRY(r2a, a4a, r2a, carry, carry);
125 		MP_ADD_CARRY(r2b, a4b, r2b, carry, carry);
126 		r3 = carry; carry = 0;
127 		MP_ADD_CARRY(r0a, a5a, r0a, 0,     carry);
128 		MP_ADD_CARRY(r0b, a5b, r0b, carry, carry);
129 		MP_ADD_CARRY(r1a, a5a, r1a, carry, carry);
130 		MP_ADD_CARRY(r1b, a5b, r1b, carry, carry);
131 		MP_ADD_CARRY(r2a, a5a, r2a, carry, carry);
132 		MP_ADD_CARRY(r2b, a5b, r2b, carry, carry);
133 		r3 += carry;
134 		MP_ADD_CARRY(r1a, a4a, r1a, 0,     carry);
135 		MP_ADD_CARRY(r1b, a4b, r1b, carry, carry);
136 		MP_ADD_CARRY(r2a,   0, r2a, carry, carry);
137 		MP_ADD_CARRY(r2b,   0, r2b, carry, carry);
138 		r3 += carry;
139 
140 		/* reduce out the carry */
141 		while (r3) {
142 			MP_ADD_CARRY(r0a, r3, r0a, 0,     carry);
143 			MP_ADD_CARRY(r0b,  0, r0b, carry, carry);
144 			MP_ADD_CARRY(r1a, r3, r1a, carry, carry);
145 			MP_ADD_CARRY(r1b,  0, r1b, carry, carry);
146 			MP_ADD_CARRY(r2a,  0, r2a, carry, carry);
147 			MP_ADD_CARRY(r2b,  0, r2b, carry, carry);
148 			r3 = carry;
149 		}
150 
151 		/* check for final reduction */
152 		/*
153 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
154 		 * 0xffffffffffffffff. That means we can only be over and need
155 		 * one more reduction
156 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
157 		 *     and
158 		 *     r1 == 0xffffffffffffffffff   or
159 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
160 		 * In all cases, we subtract the field (or add the 2's
161 		 * complement value (1,1,0)).  (r0, r1, r2)
162 		 */
163 		if (((r2b == 0xffffffff) && (r2a == 0xffffffff)
164 			&& (r1b == 0xffffffff) ) &&
165 			   ((r1a == 0xffffffff) ||
166 			    (r1a == 0xfffffffe) && (r0a == 0xffffffff) &&
167 					(r0b == 0xffffffff)) ) {
168 			/* do a quick subtract */
169 			MP_ADD_CARRY(r0a, 1, r0a, 0, carry);
170 			r0b += carry;
171 			r1a = r1b = r2a = r2b = 0;
172 		}
173 
174 		/* set the lower words of r */
175 		if (a != r) {
176 			MP_CHECKOK(s_mp_pad(r, 6));
177 		}
178 		MP_DIGIT(r, 5) = r2b;
179 		MP_DIGIT(r, 4) = r2a;
180 		MP_DIGIT(r, 3) = r1b;
181 		MP_DIGIT(r, 2) = r1a;
182 		MP_DIGIT(r, 1) = r0b;
183 		MP_DIGIT(r, 0) = r0a;
184 		MP_USED(r) = 6;
185 #else
186 		switch (a_used) {
187 		case 6:
188 			a5 = MP_DIGIT(a, 5);
189 		case 5:
190 			a4 = MP_DIGIT(a, 4);
191 		case 4:
192 			a3 = MP_DIGIT(a, 3);
193 		}
194 
195                 r2 = MP_DIGIT(a, 2);
196                 r1 = MP_DIGIT(a, 1);
197                 r0 = MP_DIGIT(a, 0);
198 
199 		/* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */
200 #ifndef MPI_AMD64_ADD
201 		MP_ADD_CARRY(r0, a3, r0, 0,     carry);
202 		MP_ADD_CARRY(r1, a3, r1, carry, carry);
203 		MP_ADD_CARRY(r2, a4, r2, carry, carry);
204 		r3 = carry;
205 		MP_ADD_CARRY(r0, a5, r0, 0,     carry);
206 		MP_ADD_CARRY(r1, a5, r1, carry, carry);
207 		MP_ADD_CARRY(r2, a5, r2, carry, carry);
208 		r3 += carry;
209 		MP_ADD_CARRY(r1, a4, r1, 0,     carry);
210 		MP_ADD_CARRY(r2,  0, r2, carry, carry);
211 		r3 += carry;
212 
213 #else
214                 r2 = MP_DIGIT(a, 2);
215                 r1 = MP_DIGIT(a, 1);
216                 r0 = MP_DIGIT(a, 0);
217 
218                 /* set the lower words of r */
219                 __asm__ (
220                 "xorq   %3,%3           \n\t"
221                 "addq   %4,%0           \n\t"
222                 "adcq   %4,%1           \n\t"
223                 "adcq   %5,%2           \n\t"
224                 "adcq   $0,%3           \n\t"
225                 "addq   %6,%0           \n\t"
226                 "adcq   %6,%1           \n\t"
227                 "adcq   %6,%2           \n\t"
228                 "adcq   $0,%3           \n\t"
229                 "addq   %5,%1           \n\t"
230                 "adcq   $0,%2           \n\t"
231                 "adcq   $0,%3           \n\t"
232                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3),
233 		  "=r"(a4), "=r"(a5)
234                 : "0" (r0), "1" (r1), "2" (r2), "3" (r3),
235 		  "4" (a3), "5" (a4), "6"(a5)
236                 : "%cc" );
237 #endif
238 
239 		/* reduce out the carry */
240 		while (r3) {
241 #ifndef MPI_AMD64_ADD
242 			MP_ADD_CARRY(r0, r3, r0, 0,     carry);
243 			MP_ADD_CARRY(r1, r3, r1, carry, carry);
244 			MP_ADD_CARRY(r2,  0, r2, carry, carry);
245 			r3 = carry;
246 #else
247 			a3=r3;
248               		__asm__ (
249                 	"xorq   %3,%3           \n\t"
250                 	"addq   %4,%0           \n\t"
251                 	"adcq   %4,%1           \n\t"
252                 	"adcq   $0,%2           \n\t"
253                 	"adcq   $0,%3           \n\t"
254                 	: "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3)
255                 	: "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3)
256                 	: "%cc" );
257 #endif
258 		}
259 
260 		/* check for final reduction */
261 		/*
262 		 * our field is 0xffffffffffffffff, 0xfffffffffffffffe,
263 		 * 0xffffffffffffffff. That means we can only be over and need
264 		 * one more reduction
265 		 *  if r2 == 0xffffffffffffffffff (same as r2+1 == 0)
266 		 *     and
267 		 *     r1 == 0xffffffffffffffffff   or
268 		 *     r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff
269 		 * In all cases, we subtract the field (or add the 2's
270 		 * complement value (1,1,0)).  (r0, r1, r2)
271 		 */
272 		if (r3 || ((r2 == MP_DIGIT_MAX) &&
273 		      ((r1 == MP_DIGIT_MAX) ||
274 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
275 			/* do a quick subtract */
276 			r0++;
277 			r1 = r2 = 0;
278 		}
279 		/* set the lower words of r */
280 		if (a != r) {
281 			MP_CHECKOK(s_mp_pad(r, 3));
282 		}
283 		MP_DIGIT(r, 2) = r2;
284 		MP_DIGIT(r, 1) = r1;
285 		MP_DIGIT(r, 0) = r0;
286 		MP_USED(r) = 3;
287 #endif
288 	}
289 
290   CLEANUP:
291 	return res;
292 }
293 
294 #ifndef ECL_THIRTY_TWO_BIT
295 /* Compute the sum of 192 bit curves. Do the work in-line since the
296  * number of words are so small, we don't want to overhead of mp function
297  * calls.  Uses optimized modular reduction for p192.
298  */
299 mp_err
300 ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r,
301 			const GFMethod *meth)
302 {
303 	mp_err res = MP_OKAY;
304 	mp_digit a0 = 0, a1 = 0, a2 = 0;
305 	mp_digit r0 = 0, r1 = 0, r2 = 0;
306 	mp_digit carry;
307 
308 	switch(MP_USED(a)) {
309 	case 3:
310 		a2 = MP_DIGIT(a,2);
311 	case 2:
312 		a1 = MP_DIGIT(a,1);
313 	case 1:
314 		a0 = MP_DIGIT(a,0);
315 	}
316 	switch(MP_USED(b)) {
317 	case 3:
318 		r2 = MP_DIGIT(b,2);
319 	case 2:
320 		r1 = MP_DIGIT(b,1);
321 	case 1:
322 		r0 = MP_DIGIT(b,0);
323 	}
324 
325 #ifndef MPI_AMD64_ADD
326 	MP_ADD_CARRY(a0, r0, r0, 0,     carry);
327 	MP_ADD_CARRY(a1, r1, r1, carry, carry);
328 	MP_ADD_CARRY(a2, r2, r2, carry, carry);
329 #else
330 	__asm__ (
331                 "xorq   %3,%3           \n\t"
332                 "addq   %4,%0           \n\t"
333                 "adcq   %5,%1           \n\t"
334                 "adcq   %6,%2           \n\t"
335                 "adcq   $0,%3           \n\t"
336                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
337                 : "r" (a0), "r" (a1), "r" (a2), "0" (r0),
338 		  "1" (r1), "2" (r2)
339                 : "%cc" );
340 #endif
341 
342 	/* Do quick 'subract' if we've gone over
343 	 * (add the 2's complement of the curve field) */
344 	if (carry || ((r2 == MP_DIGIT_MAX) &&
345 		      ((r1 == MP_DIGIT_MAX) ||
346 			((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) {
347 #ifndef MPI_AMD64_ADD
348 		MP_ADD_CARRY(r0, 1, r0, 0,     carry);
349 		MP_ADD_CARRY(r1, 1, r1, carry, carry);
350 		MP_ADD_CARRY(r2, 0, r2, carry, carry);
351 #else
352 		__asm__ (
353 			"addq   $1,%0           \n\t"
354 			"adcq   $1,%1           \n\t"
355 			"adcq   $0,%2           \n\t"
356 			: "=r"(r0), "=r"(r1), "=r"(r2)
357 			: "0" (r0), "1" (r1), "2" (r2)
358 			: "%cc" );
359 #endif
360 	}
361 
362 
363 	MP_CHECKOK(s_mp_pad(r, 3));
364 	MP_DIGIT(r, 2) = r2;
365 	MP_DIGIT(r, 1) = r1;
366 	MP_DIGIT(r, 0) = r0;
367 	MP_SIGN(r) = MP_ZPOS;
368 	MP_USED(r) = 3;
369 	s_mp_clamp(r);
370 
371 
372   CLEANUP:
373 	return res;
374 }
375 
376 /* Compute the diff of 192 bit curves. Do the work in-line since the
377  * number of words are so small, we don't want to overhead of mp function
378  * calls.  Uses optimized modular reduction for p192.
379  */
380 mp_err
381 ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r,
382 			const GFMethod *meth)
383 {
384 	mp_err res = MP_OKAY;
385 	mp_digit b0 = 0, b1 = 0, b2 = 0;
386 	mp_digit r0 = 0, r1 = 0, r2 = 0;
387 	mp_digit borrow;
388 
389 	switch(MP_USED(a)) {
390 	case 3:
391 		r2 = MP_DIGIT(a,2);
392 	case 2:
393 		r1 = MP_DIGIT(a,1);
394 	case 1:
395 		r0 = MP_DIGIT(a,0);
396 	}
397 
398 	switch(MP_USED(b)) {
399 	case 3:
400 		b2 = MP_DIGIT(b,2);
401 	case 2:
402 		b1 = MP_DIGIT(b,1);
403 	case 1:
404 		b0 = MP_DIGIT(b,0);
405 	}
406 
407 #ifndef MPI_AMD64_ADD
408 	MP_SUB_BORROW(r0, b0, r0, 0,     borrow);
409 	MP_SUB_BORROW(r1, b1, r1, borrow, borrow);
410 	MP_SUB_BORROW(r2, b2, r2, borrow, borrow);
411 #else
412 	__asm__ (
413                 "xorq   %3,%3           \n\t"
414                 "subq   %4,%0           \n\t"
415                 "sbbq   %5,%1           \n\t"
416                 "sbbq   %6,%2           \n\t"
417                 "adcq   $0,%3           \n\t"
418                 : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
419                 : "r" (b0), "r" (b1), "r" (b2), "0" (r0),
420 		  "1" (r1), "2" (r2)
421                 : "%cc" );
422 #endif
423 
424 	/* Do quick 'add' if we've gone under 0
425 	 * (subtract the 2's complement of the curve field) */
426 	if (borrow) {
427 #ifndef MPI_AMD64_ADD
428 		MP_SUB_BORROW(r0, 1, r0, 0,     borrow);
429 		MP_SUB_BORROW(r1, 1, r1, borrow, borrow);
430 		MP_SUB_BORROW(r2,  0, r2, borrow, borrow);
431 #else
432 		__asm__ (
433 			"subq   $1,%0           \n\t"
434 			"sbbq   $1,%1           \n\t"
435 			"sbbq   $0,%2           \n\t"
436 			: "=r"(r0), "=r"(r1), "=r"(r2)
437 			: "0" (r0), "1" (r1), "2" (r2)
438 			: "%cc" );
439 #endif
440 	}
441 
442 	MP_CHECKOK(s_mp_pad(r, 3));
443 	MP_DIGIT(r, 2) = r2;
444 	MP_DIGIT(r, 1) = r1;
445 	MP_DIGIT(r, 0) = r0;
446 	MP_SIGN(r) = MP_ZPOS;
447 	MP_USED(r) = 3;
448 	s_mp_clamp(r);
449 
450   CLEANUP:
451 	return res;
452 }
453 
454 #endif
455 
456 /* Compute the square of polynomial a, reduce modulo p192. Store the
457  * result in r.  r could be a.  Uses optimized modular reduction for p192.
458  */
459 mp_err
460 ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
461 {
462 	mp_err res = MP_OKAY;
463 
464 	MP_CHECKOK(mp_sqr(a, r));
465 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
466   CLEANUP:
467 	return res;
468 }
469 
470 /* Compute the product of two polynomials a and b, reduce modulo p192.
471  * Store the result in r.  r could be a or b; a could be b.  Uses
472  * optimized modular reduction for p192. */
473 mp_err
474 ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r,
475 					const GFMethod *meth)
476 {
477 	mp_err res = MP_OKAY;
478 
479 	MP_CHECKOK(mp_mul(a, b, r));
480 	MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
481   CLEANUP:
482 	return res;
483 }
484 
485 /* Divides two field elements. If a is NULL, then returns the inverse of
486  * b. */
487 mp_err
488 ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r,
489 		   const GFMethod *meth)
490 {
491 	mp_err res = MP_OKAY;
492 	mp_int t;
493 
494 	/* If a is NULL, then return the inverse of b, otherwise return a/b. */
495 	if (a == NULL) {
496 		return  mp_invmod(b, &meth->irr, r);
497 	} else {
498 		/* MPI doesn't support divmod, so we implement it using invmod and
499 		 * mulmod. */
500 		MP_CHECKOK(mp_init(&t, FLAG(b)));
501 		MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
502 		MP_CHECKOK(mp_mul(a, &t, r));
503 		MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth));
504 	  CLEANUP:
505 		mp_clear(&t);
506 		return res;
507 	}
508 }
509 
510 /* Wire in fast field arithmetic and precomputation of base point for
511  * named curves. */
512 mp_err
513 ec_group_set_gfp192(ECGroup *group, ECCurveName name)
514 {
515 	if (name == ECCurve_NIST_P192) {
516 		group->meth->field_mod = &ec_GFp_nistp192_mod;
517 		group->meth->field_mul = &ec_GFp_nistp192_mul;
518 		group->meth->field_sqr = &ec_GFp_nistp192_sqr;
519 		group->meth->field_div = &ec_GFp_nistp192_div;
520 #ifndef ECL_THIRTY_TWO_BIT
521 		group->meth->field_add = &ec_GFp_nistp192_add;
522 		group->meth->field_sub = &ec_GFp_nistp192_sub;
523 #endif
524 	}
525 	return MP_OKAY;
526 }
527