1 /* 2 * ***** BEGIN LICENSE BLOCK ***** 3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4 * 5 * The contents of this file are subject to the Mozilla Public License Version 6 * 1.1 (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * http://www.mozilla.org/MPL/ 9 * 10 * Software distributed under the License is distributed on an "AS IS" basis, 11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12 * for the specific language governing rights and limitations under the 13 * License. 14 * 15 * The Original Code is the elliptic curve math library for prime field curves. 16 * 17 * The Initial Developer of the Original Code is 18 * Sun Microsystems, Inc. 19 * Portions created by the Initial Developer are Copyright (C) 2003 20 * the Initial Developer. All Rights Reserved. 21 * 22 * Contributor(s): 23 * Douglas Stebila <douglas@stebila.ca> 24 * 25 * Alternatively, the contents of this file may be used under the terms of 26 * either the GNU General Public License Version 2 or later (the "GPL"), or 27 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28 * in which case the provisions of the GPL or the LGPL are applicable instead 29 * of those above. If you wish to allow use of your version of this file only 30 * under the terms of either the GPL or the LGPL, and not to allow others to 31 * use your version of this file under the terms of the MPL, indicate your 32 * decision by deleting the provisions above and replace them with the notice 33 * and other provisions required by the GPL or the LGPL. If you do not delete 34 * the provisions above, a recipient may use your version of this file under 35 * the terms of any one of the MPL, the GPL or the LGPL. 36 * 37 * ***** END LICENSE BLOCK ***** */ 38 /* 39 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 40 * Use is subject to license terms. 41 * 42 * Sun elects to use this software under the MPL license. 43 */ 44 45 #include "ecp.h" 46 #include "mpi.h" 47 #include "mplogic.h" 48 #include "mpi-priv.h" 49 #ifndef _KERNEL 50 #include <stdlib.h> 51 #endif 52 53 /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. 54 * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to 55 * Elliptic Curve Cryptography. */ 56 mp_err 57 ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 58 { 59 mp_err res = MP_OKAY; 60 mp_size a_used = MP_USED(a); 61 int a_bits = mpl_significant_bits(a); 62 mp_digit carry; 63 64 #ifdef ECL_THIRTY_TWO_BIT 65 mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0; 66 mp_digit r0, r1, r2, r3, r4, r5, r6, r7; 67 int r8; /* must be a signed value ! */ 68 #else 69 mp_digit a4=0, a5=0, a6=0, a7=0; 70 mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; 71 mp_digit r0, r1, r2, r3; 72 int r4; /* must be a signed value ! */ 73 #endif 74 /* for polynomials larger than twice the field size 75 * use regular reduction */ 76 if (a_bits < 256) { 77 if (a == r) return MP_OKAY; 78 return mp_copy(a,r); 79 } 80 if (a_bits > 512) { 81 MP_CHECKOK(mp_mod(a, &meth->irr, r)); 82 } else { 83 84 #ifdef ECL_THIRTY_TWO_BIT 85 switch (a_used) { 86 case 16: 87 a15 = MP_DIGIT(a,15); 88 /* FALLTHROUGH */ 89 case 15: 90 a14 = MP_DIGIT(a,14); 91 /* FALLTHROUGH */ 92 case 14: 93 a13 = MP_DIGIT(a,13); 94 /* FALLTHROUGH */ 95 case 13: 96 a12 = MP_DIGIT(a,12); 97 /* FALLTHROUGH */ 98 case 12: 99 a11 = MP_DIGIT(a,11); 100 /* FALLTHROUGH */ 101 case 11: 102 a10 = MP_DIGIT(a,10); 103 /* FALLTHROUGH */ 104 case 10: 105 a9 = MP_DIGIT(a,9); 106 /* FALLTHROUGH */ 107 case 9: 108 a8 = MP_DIGIT(a,8); 109 } 110 111 r0 = MP_DIGIT(a,0); 112 r1 = MP_DIGIT(a,1); 113 r2 = MP_DIGIT(a,2); 114 r3 = MP_DIGIT(a,3); 115 r4 = MP_DIGIT(a,4); 116 r5 = MP_DIGIT(a,5); 117 r6 = MP_DIGIT(a,6); 118 r7 = MP_DIGIT(a,7); 119 120 /* sum 1 */ 121 MP_ADD_CARRY(r3, a11, r3, 0, carry); 122 MP_ADD_CARRY(r4, a12, r4, carry, carry); 123 MP_ADD_CARRY(r5, a13, r5, carry, carry); 124 MP_ADD_CARRY(r6, a14, r6, carry, carry); 125 MP_ADD_CARRY(r7, a15, r7, carry, carry); 126 r8 = carry; 127 MP_ADD_CARRY(r3, a11, r3, 0, carry); 128 MP_ADD_CARRY(r4, a12, r4, carry, carry); 129 MP_ADD_CARRY(r5, a13, r5, carry, carry); 130 MP_ADD_CARRY(r6, a14, r6, carry, carry); 131 MP_ADD_CARRY(r7, a15, r7, carry, carry); 132 r8 += carry; 133 /* sum 2 */ 134 MP_ADD_CARRY(r3, a12, r3, 0, carry); 135 MP_ADD_CARRY(r4, a13, r4, carry, carry); 136 MP_ADD_CARRY(r5, a14, r5, carry, carry); 137 MP_ADD_CARRY(r6, a15, r6, carry, carry); 138 MP_ADD_CARRY(r7, 0, r7, carry, carry); 139 r8 += carry; 140 /* combine last bottom of sum 3 with second sum 2 */ 141 MP_ADD_CARRY(r0, a8, r0, 0, carry); 142 MP_ADD_CARRY(r1, a9, r1, carry, carry); 143 MP_ADD_CARRY(r2, a10, r2, carry, carry); 144 MP_ADD_CARRY(r3, a12, r3, carry, carry); 145 MP_ADD_CARRY(r4, a13, r4, carry, carry); 146 MP_ADD_CARRY(r5, a14, r5, carry, carry); 147 MP_ADD_CARRY(r6, a15, r6, carry, carry); 148 MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */ 149 r8 += carry; 150 /* sum 3 (rest of it)*/ 151 MP_ADD_CARRY(r6, a14, r6, 0, carry); 152 MP_ADD_CARRY(r7, 0, r7, carry, carry); 153 r8 += carry; 154 /* sum 4 (rest of it)*/ 155 MP_ADD_CARRY(r0, a9, r0, 0, carry); 156 MP_ADD_CARRY(r1, a10, r1, carry, carry); 157 MP_ADD_CARRY(r2, a11, r2, carry, carry); 158 MP_ADD_CARRY(r3, a13, r3, carry, carry); 159 MP_ADD_CARRY(r4, a14, r4, carry, carry); 160 MP_ADD_CARRY(r5, a15, r5, carry, carry); 161 MP_ADD_CARRY(r6, a13, r6, carry, carry); 162 MP_ADD_CARRY(r7, a8, r7, carry, carry); 163 r8 += carry; 164 /* diff 5 */ 165 MP_SUB_BORROW(r0, a11, r0, 0, carry); 166 MP_SUB_BORROW(r1, a12, r1, carry, carry); 167 MP_SUB_BORROW(r2, a13, r2, carry, carry); 168 MP_SUB_BORROW(r3, 0, r3, carry, carry); 169 MP_SUB_BORROW(r4, 0, r4, carry, carry); 170 MP_SUB_BORROW(r5, 0, r5, carry, carry); 171 MP_SUB_BORROW(r6, a8, r6, carry, carry); 172 MP_SUB_BORROW(r7, a10, r7, carry, carry); 173 r8 -= carry; 174 /* diff 6 */ 175 MP_SUB_BORROW(r0, a12, r0, 0, carry); 176 MP_SUB_BORROW(r1, a13, r1, carry, carry); 177 MP_SUB_BORROW(r2, a14, r2, carry, carry); 178 MP_SUB_BORROW(r3, a15, r3, carry, carry); 179 MP_SUB_BORROW(r4, 0, r4, carry, carry); 180 MP_SUB_BORROW(r5, 0, r5, carry, carry); 181 MP_SUB_BORROW(r6, a9, r6, carry, carry); 182 MP_SUB_BORROW(r7, a11, r7, carry, carry); 183 r8 -= carry; 184 /* diff 7 */ 185 MP_SUB_BORROW(r0, a13, r0, 0, carry); 186 MP_SUB_BORROW(r1, a14, r1, carry, carry); 187 MP_SUB_BORROW(r2, a15, r2, carry, carry); 188 MP_SUB_BORROW(r3, a8, r3, carry, carry); 189 MP_SUB_BORROW(r4, a9, r4, carry, carry); 190 MP_SUB_BORROW(r5, a10, r5, carry, carry); 191 MP_SUB_BORROW(r6, 0, r6, carry, carry); 192 MP_SUB_BORROW(r7, a12, r7, carry, carry); 193 r8 -= carry; 194 /* diff 8 */ 195 MP_SUB_BORROW(r0, a14, r0, 0, carry); 196 MP_SUB_BORROW(r1, a15, r1, carry, carry); 197 MP_SUB_BORROW(r2, 0, r2, carry, carry); 198 MP_SUB_BORROW(r3, a9, r3, carry, carry); 199 MP_SUB_BORROW(r4, a10, r4, carry, carry); 200 MP_SUB_BORROW(r5, a11, r5, carry, carry); 201 MP_SUB_BORROW(r6, 0, r6, carry, carry); 202 MP_SUB_BORROW(r7, a13, r7, carry, carry); 203 r8 -= carry; 204 205 /* reduce the overflows */ 206 while (r8 > 0) { 207 mp_digit r8_d = r8; 208 MP_ADD_CARRY(r0, r8_d, r0, 0, carry); 209 MP_ADD_CARRY(r1, 0, r1, carry, carry); 210 MP_ADD_CARRY(r2, 0, r2, carry, carry); 211 MP_ADD_CARRY(r3, -r8_d, r3, carry, carry); 212 MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry); 213 MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry); 214 MP_ADD_CARRY(r6, -(r8_d+1), r6, carry, carry); 215 MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry); 216 r8 = carry; 217 } 218 219 /* reduce the underflows */ 220 while (r8 < 0) { 221 mp_digit r8_d = -r8; 222 MP_SUB_BORROW(r0, r8_d, r0, 0, carry); 223 MP_SUB_BORROW(r1, 0, r1, carry, carry); 224 MP_SUB_BORROW(r2, 0, r2, carry, carry); 225 MP_SUB_BORROW(r3, -r8_d, r3, carry, carry); 226 MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry); 227 MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry); 228 MP_SUB_BORROW(r6, -(r8_d+1), r6, carry, carry); 229 MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry); 230 r8 = -carry; 231 } 232 if (a != r) { 233 MP_CHECKOK(s_mp_pad(r,8)); 234 } 235 MP_SIGN(r) = MP_ZPOS; 236 MP_USED(r) = 8; 237 238 MP_DIGIT(r,7) = r7; 239 MP_DIGIT(r,6) = r6; 240 MP_DIGIT(r,5) = r5; 241 MP_DIGIT(r,4) = r4; 242 MP_DIGIT(r,3) = r3; 243 MP_DIGIT(r,2) = r2; 244 MP_DIGIT(r,1) = r1; 245 MP_DIGIT(r,0) = r0; 246 247 /* final reduction if necessary */ 248 if ((r7 == MP_DIGIT_MAX) && 249 ((r6 > 1) || ((r6 == 1) && 250 (r5 || r4 || r3 || 251 ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) 252 && (r0 == MP_DIGIT_MAX)))))) { 253 MP_CHECKOK(mp_sub(r, &meth->irr, r)); 254 } 255 #ifdef notdef 256 257 258 /* smooth the negatives */ 259 while (MP_SIGN(r) != MP_ZPOS) { 260 MP_CHECKOK(mp_add(r, &meth->irr, r)); 261 } 262 while (MP_USED(r) > 8) { 263 MP_CHECKOK(mp_sub(r, &meth->irr, r)); 264 } 265 266 /* final reduction if necessary */ 267 if (MP_DIGIT(r,7) >= MP_DIGIT(&meth->irr,7)) { 268 if (mp_cmp(r,&meth->irr) != MP_LT) { 269 MP_CHECKOK(mp_sub(r, &meth->irr, r)); 270 } 271 } 272 #endif 273 s_mp_clamp(r); 274 #else 275 switch (a_used) { 276 case 8: 277 a7 = MP_DIGIT(a,7); 278 /* FALLTHROUGH */ 279 case 7: 280 a6 = MP_DIGIT(a,6); 281 /* FALLTHROUGH */ 282 case 6: 283 a5 = MP_DIGIT(a,5); 284 /* FALLTHROUGH */ 285 case 5: 286 a4 = MP_DIGIT(a,4); 287 } 288 a7l = a7 << 32; 289 a7h = a7 >> 32; 290 a6l = a6 << 32; 291 a6h = a6 >> 32; 292 a5l = a5 << 32; 293 a5h = a5 >> 32; 294 a4l = a4 << 32; 295 a4h = a4 >> 32; 296 r3 = MP_DIGIT(a,3); 297 r2 = MP_DIGIT(a,2); 298 r1 = MP_DIGIT(a,1); 299 r0 = MP_DIGIT(a,0); 300 301 /* sum 1 */ 302 MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 303 MP_ADD_CARRY(r2, a6, r2, carry, carry); 304 MP_ADD_CARRY(r3, a7, r3, carry, carry); 305 r4 = carry; 306 MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 307 MP_ADD_CARRY(r2, a6, r2, carry, carry); 308 MP_ADD_CARRY(r3, a7, r3, carry, carry); 309 r4 += carry; 310 /* sum 2 */ 311 MP_ADD_CARRY(r1, a6l, r1, 0, carry); 312 MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 313 MP_ADD_CARRY(r3, a7h, r3, carry, carry); 314 r4 += carry; 315 MP_ADD_CARRY(r1, a6l, r1, 0, carry); 316 MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 317 MP_ADD_CARRY(r3, a7h, r3, carry, carry); 318 r4 += carry; 319 320 /* sum 3 */ 321 MP_ADD_CARRY(r0, a4, r0, 0, carry); 322 MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry); 323 MP_ADD_CARRY(r2, 0, r2, carry, carry); 324 MP_ADD_CARRY(r3, a7, r3, carry, carry); 325 r4 += carry; 326 /* sum 4 */ 327 MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry); 328 MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry); 329 MP_ADD_CARRY(r2, a7, r2, carry, carry); 330 MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry); 331 r4 += carry; 332 /* diff 5 */ 333 MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry); 334 MP_SUB_BORROW(r1, a6h, r1, carry, carry); 335 MP_SUB_BORROW(r2, 0, r2, carry, carry); 336 MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry); 337 r4 -= carry; 338 /* diff 6 */ 339 MP_SUB_BORROW(r0, a6, r0, 0, carry); 340 MP_SUB_BORROW(r1, a7, r1, carry, carry); 341 MP_SUB_BORROW(r2, 0, r2, carry, carry); 342 MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry); 343 r4 -= carry; 344 /* diff 7 */ 345 MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry); 346 MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry); 347 MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry); 348 MP_SUB_BORROW(r3, a6l, r3, carry, carry); 349 r4 -= carry; 350 /* diff 8 */ 351 MP_SUB_BORROW(r0, a7, r0, 0, carry); 352 MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry); 353 MP_SUB_BORROW(r2, a5, r2, carry, carry); 354 MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry); 355 r4 -= carry; 356 357 /* reduce the overflows */ 358 while (r4 > 0) { 359 mp_digit r4_long = r4; 360 mp_digit r4l = (r4_long << 32); 361 MP_ADD_CARRY(r0, r4_long, r0, 0, carry); 362 MP_ADD_CARRY(r1, -r4l, r1, carry, carry); 363 MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry); 364 MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry); 365 r4 = carry; 366 } 367 368 /* reduce the underflows */ 369 while (r4 < 0) { 370 mp_digit r4_long = -r4; 371 mp_digit r4l = (r4_long << 32); 372 MP_SUB_BORROW(r0, r4_long, r0, 0, carry); 373 MP_SUB_BORROW(r1, -r4l, r1, carry, carry); 374 MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry); 375 MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry); 376 r4 = -carry; 377 } 378 379 if (a != r) { 380 MP_CHECKOK(s_mp_pad(r,4)); 381 } 382 MP_SIGN(r) = MP_ZPOS; 383 MP_USED(r) = 4; 384 385 MP_DIGIT(r,3) = r3; 386 MP_DIGIT(r,2) = r2; 387 MP_DIGIT(r,1) = r1; 388 MP_DIGIT(r,0) = r0; 389 390 /* final reduction if necessary */ 391 if ((r3 > 0xFFFFFFFF00000001ULL) || 392 ((r3 == 0xFFFFFFFF00000001ULL) && 393 (r2 || (r1 >> 32)|| 394 (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { 395 /* very rare, just use mp_sub */ 396 MP_CHECKOK(mp_sub(r, &meth->irr, r)); 397 } 398 399 s_mp_clamp(r); 400 #endif 401 } 402 403 CLEANUP: 404 return res; 405 } 406 407 /* Compute the square of polynomial a, reduce modulo p256. Store the 408 * result in r. r could be a. Uses optimized modular reduction for p256. 409 */ 410 mp_err 411 ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 412 { 413 mp_err res = MP_OKAY; 414 415 MP_CHECKOK(mp_sqr(a, r)); 416 MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 417 CLEANUP: 418 return res; 419 } 420 421 /* Compute the product of two polynomials a and b, reduce modulo p256. 422 * Store the result in r. r could be a or b; a could be b. Uses 423 * optimized modular reduction for p256. */ 424 mp_err 425 ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, 426 const GFMethod *meth) 427 { 428 mp_err res = MP_OKAY; 429 430 MP_CHECKOK(mp_mul(a, b, r)); 431 MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 432 CLEANUP: 433 return res; 434 } 435 436 /* Wire in fast field arithmetic and precomputation of base point for 437 * named curves. */ 438 mp_err 439 ec_group_set_gfp256(ECGroup *group, ECCurveName name) 440 { 441 if (name == ECCurve_NIST_P256) { 442 group->meth->field_mod = &ec_GFp_nistp256_mod; 443 group->meth->field_mul = &ec_GFp_nistp256_mul; 444 group->meth->field_sqr = &ec_GFp_nistp256_sqr; 445 } 446 return MP_OKAY; 447 } 448