1*f9fbec18Smcpowers /* 2*f9fbec18Smcpowers * ***** BEGIN LICENSE BLOCK ***** 3*f9fbec18Smcpowers * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4*f9fbec18Smcpowers * 5*f9fbec18Smcpowers * The contents of this file are subject to the Mozilla Public License Version 6*f9fbec18Smcpowers * 1.1 (the "License"); you may not use this file except in compliance with 7*f9fbec18Smcpowers * the License. You may obtain a copy of the License at 8*f9fbec18Smcpowers * http://www.mozilla.org/MPL/ 9*f9fbec18Smcpowers * 10*f9fbec18Smcpowers * Software distributed under the License is distributed on an "AS IS" basis, 11*f9fbec18Smcpowers * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12*f9fbec18Smcpowers * for the specific language governing rights and limitations under the 13*f9fbec18Smcpowers * License. 14*f9fbec18Smcpowers * 15*f9fbec18Smcpowers * The Original Code is the elliptic curve math library for prime field curves. 16*f9fbec18Smcpowers * 17*f9fbec18Smcpowers * The Initial Developer of the Original Code is 18*f9fbec18Smcpowers * Sun Microsystems, Inc. 19*f9fbec18Smcpowers * Portions created by the Initial Developer are Copyright (C) 2003 20*f9fbec18Smcpowers * the Initial Developer. All Rights Reserved. 21*f9fbec18Smcpowers * 22*f9fbec18Smcpowers * Contributor(s): 23*f9fbec18Smcpowers * Douglas Stebila <douglas@stebila.ca> 24*f9fbec18Smcpowers * 25*f9fbec18Smcpowers * Alternatively, the contents of this file may be used under the terms of 26*f9fbec18Smcpowers * either the GNU General Public License Version 2 or later (the "GPL"), or 27*f9fbec18Smcpowers * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28*f9fbec18Smcpowers * in which case the provisions of the GPL or the LGPL are applicable instead 29*f9fbec18Smcpowers * of those above. If you wish to allow use of your version of this file only 30*f9fbec18Smcpowers * under the terms of either the GPL or the LGPL, and not to allow others to 31*f9fbec18Smcpowers * use your version of this file under the terms of the MPL, indicate your 32*f9fbec18Smcpowers * decision by deleting the provisions above and replace them with the notice 33*f9fbec18Smcpowers * and other provisions required by the GPL or the LGPL. If you do not delete 34*f9fbec18Smcpowers * the provisions above, a recipient may use your version of this file under 35*f9fbec18Smcpowers * the terms of any one of the MPL, the GPL or the LGPL. 36*f9fbec18Smcpowers * 37*f9fbec18Smcpowers * ***** END LICENSE BLOCK ***** */ 38*f9fbec18Smcpowers /* 39*f9fbec18Smcpowers * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 40*f9fbec18Smcpowers * Use is subject to license terms. 41*f9fbec18Smcpowers * 42*f9fbec18Smcpowers * Sun elects to use this software under the MPL license. 43*f9fbec18Smcpowers */ 44*f9fbec18Smcpowers 45*f9fbec18Smcpowers #pragma ident "%Z%%M% %I% %E% SMI" 46*f9fbec18Smcpowers 47*f9fbec18Smcpowers #include "ecp.h" 48*f9fbec18Smcpowers #include "mpi.h" 49*f9fbec18Smcpowers #include "mplogic.h" 50*f9fbec18Smcpowers #include "mpi-priv.h" 51*f9fbec18Smcpowers #ifndef _KERNEL 52*f9fbec18Smcpowers #include <stdlib.h> 53*f9fbec18Smcpowers #endif 54*f9fbec18Smcpowers 55*f9fbec18Smcpowers /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. 56*f9fbec18Smcpowers * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to 57*f9fbec18Smcpowers * Elliptic Curve Cryptography. */ 58*f9fbec18Smcpowers mp_err 59*f9fbec18Smcpowers ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 60*f9fbec18Smcpowers { 61*f9fbec18Smcpowers mp_err res = MP_OKAY; 62*f9fbec18Smcpowers mp_size a_used = MP_USED(a); 63*f9fbec18Smcpowers int a_bits = mpl_significant_bits(a); 64*f9fbec18Smcpowers mp_digit carry; 65*f9fbec18Smcpowers 66*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT 67*f9fbec18Smcpowers mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0; 68*f9fbec18Smcpowers mp_digit r0, r1, r2, r3, r4, r5, r6, r7; 69*f9fbec18Smcpowers int r8; /* must be a signed value ! */ 70*f9fbec18Smcpowers #else 71*f9fbec18Smcpowers mp_digit a4=0, a5=0, a6=0, a7=0; 72*f9fbec18Smcpowers mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; 73*f9fbec18Smcpowers mp_digit r0, r1, r2, r3; 74*f9fbec18Smcpowers int r4; /* must be a signed value ! */ 75*f9fbec18Smcpowers #endif 76*f9fbec18Smcpowers /* for polynomials larger than twice the field size 77*f9fbec18Smcpowers * use regular reduction */ 78*f9fbec18Smcpowers if (a_bits < 256) { 79*f9fbec18Smcpowers if (a == r) return MP_OKAY; 80*f9fbec18Smcpowers return mp_copy(a,r); 81*f9fbec18Smcpowers } 82*f9fbec18Smcpowers if (a_bits > 512) { 83*f9fbec18Smcpowers MP_CHECKOK(mp_mod(a, &meth->irr, r)); 84*f9fbec18Smcpowers } else { 85*f9fbec18Smcpowers 86*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT 87*f9fbec18Smcpowers switch (a_used) { 88*f9fbec18Smcpowers case 16: 89*f9fbec18Smcpowers a15 = MP_DIGIT(a,15); 90*f9fbec18Smcpowers case 15: 91*f9fbec18Smcpowers a14 = MP_DIGIT(a,14); 92*f9fbec18Smcpowers case 14: 93*f9fbec18Smcpowers a13 = MP_DIGIT(a,13); 94*f9fbec18Smcpowers case 13: 95*f9fbec18Smcpowers a12 = MP_DIGIT(a,12); 96*f9fbec18Smcpowers case 12: 97*f9fbec18Smcpowers a11 = MP_DIGIT(a,11); 98*f9fbec18Smcpowers case 11: 99*f9fbec18Smcpowers a10 = MP_DIGIT(a,10); 100*f9fbec18Smcpowers case 10: 101*f9fbec18Smcpowers a9 = MP_DIGIT(a,9); 102*f9fbec18Smcpowers case 9: 103*f9fbec18Smcpowers a8 = MP_DIGIT(a,8); 104*f9fbec18Smcpowers } 105*f9fbec18Smcpowers 106*f9fbec18Smcpowers r0 = MP_DIGIT(a,0); 107*f9fbec18Smcpowers r1 = MP_DIGIT(a,1); 108*f9fbec18Smcpowers r2 = MP_DIGIT(a,2); 109*f9fbec18Smcpowers r3 = MP_DIGIT(a,3); 110*f9fbec18Smcpowers r4 = MP_DIGIT(a,4); 111*f9fbec18Smcpowers r5 = MP_DIGIT(a,5); 112*f9fbec18Smcpowers r6 = MP_DIGIT(a,6); 113*f9fbec18Smcpowers r7 = MP_DIGIT(a,7); 114*f9fbec18Smcpowers 115*f9fbec18Smcpowers /* sum 1 */ 116*f9fbec18Smcpowers MP_ADD_CARRY(r3, a11, r3, 0, carry); 117*f9fbec18Smcpowers MP_ADD_CARRY(r4, a12, r4, carry, carry); 118*f9fbec18Smcpowers MP_ADD_CARRY(r5, a13, r5, carry, carry); 119*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, carry, carry); 120*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry); 121*f9fbec18Smcpowers r8 = carry; 122*f9fbec18Smcpowers MP_ADD_CARRY(r3, a11, r3, 0, carry); 123*f9fbec18Smcpowers MP_ADD_CARRY(r4, a12, r4, carry, carry); 124*f9fbec18Smcpowers MP_ADD_CARRY(r5, a13, r5, carry, carry); 125*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, carry, carry); 126*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry); 127*f9fbec18Smcpowers r8 += carry; 128*f9fbec18Smcpowers /* sum 2 */ 129*f9fbec18Smcpowers MP_ADD_CARRY(r3, a12, r3, 0, carry); 130*f9fbec18Smcpowers MP_ADD_CARRY(r4, a13, r4, carry, carry); 131*f9fbec18Smcpowers MP_ADD_CARRY(r5, a14, r5, carry, carry); 132*f9fbec18Smcpowers MP_ADD_CARRY(r6, a15, r6, carry, carry); 133*f9fbec18Smcpowers MP_ADD_CARRY(r7, 0, r7, carry, carry); 134*f9fbec18Smcpowers r8 += carry; 135*f9fbec18Smcpowers /* combine last bottom of sum 3 with second sum 2 */ 136*f9fbec18Smcpowers MP_ADD_CARRY(r0, a8, r0, 0, carry); 137*f9fbec18Smcpowers MP_ADD_CARRY(r1, a9, r1, carry, carry); 138*f9fbec18Smcpowers MP_ADD_CARRY(r2, a10, r2, carry, carry); 139*f9fbec18Smcpowers MP_ADD_CARRY(r3, a12, r3, carry, carry); 140*f9fbec18Smcpowers MP_ADD_CARRY(r4, a13, r4, carry, carry); 141*f9fbec18Smcpowers MP_ADD_CARRY(r5, a14, r5, carry, carry); 142*f9fbec18Smcpowers MP_ADD_CARRY(r6, a15, r6, carry, carry); 143*f9fbec18Smcpowers MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */ 144*f9fbec18Smcpowers r8 += carry; 145*f9fbec18Smcpowers /* sum 3 (rest of it)*/ 146*f9fbec18Smcpowers MP_ADD_CARRY(r6, a14, r6, 0, carry); 147*f9fbec18Smcpowers MP_ADD_CARRY(r7, 0, r7, carry, carry); 148*f9fbec18Smcpowers r8 += carry; 149*f9fbec18Smcpowers /* sum 4 (rest of it)*/ 150*f9fbec18Smcpowers MP_ADD_CARRY(r0, a9, r0, 0, carry); 151*f9fbec18Smcpowers MP_ADD_CARRY(r1, a10, r1, carry, carry); 152*f9fbec18Smcpowers MP_ADD_CARRY(r2, a11, r2, carry, carry); 153*f9fbec18Smcpowers MP_ADD_CARRY(r3, a13, r3, carry, carry); 154*f9fbec18Smcpowers MP_ADD_CARRY(r4, a14, r4, carry, carry); 155*f9fbec18Smcpowers MP_ADD_CARRY(r5, a15, r5, carry, carry); 156*f9fbec18Smcpowers MP_ADD_CARRY(r6, a13, r6, carry, carry); 157*f9fbec18Smcpowers MP_ADD_CARRY(r7, a8, r7, carry, carry); 158*f9fbec18Smcpowers r8 += carry; 159*f9fbec18Smcpowers /* diff 5 */ 160*f9fbec18Smcpowers MP_SUB_BORROW(r0, a11, r0, 0, carry); 161*f9fbec18Smcpowers MP_SUB_BORROW(r1, a12, r1, carry, carry); 162*f9fbec18Smcpowers MP_SUB_BORROW(r2, a13, r2, carry, carry); 163*f9fbec18Smcpowers MP_SUB_BORROW(r3, 0, r3, carry, carry); 164*f9fbec18Smcpowers MP_SUB_BORROW(r4, 0, r4, carry, carry); 165*f9fbec18Smcpowers MP_SUB_BORROW(r5, 0, r5, carry, carry); 166*f9fbec18Smcpowers MP_SUB_BORROW(r6, a8, r6, carry, carry); 167*f9fbec18Smcpowers MP_SUB_BORROW(r7, a10, r7, carry, carry); 168*f9fbec18Smcpowers r8 -= carry; 169*f9fbec18Smcpowers /* diff 6 */ 170*f9fbec18Smcpowers MP_SUB_BORROW(r0, a12, r0, 0, carry); 171*f9fbec18Smcpowers MP_SUB_BORROW(r1, a13, r1, carry, carry); 172*f9fbec18Smcpowers MP_SUB_BORROW(r2, a14, r2, carry, carry); 173*f9fbec18Smcpowers MP_SUB_BORROW(r3, a15, r3, carry, carry); 174*f9fbec18Smcpowers MP_SUB_BORROW(r4, 0, r4, carry, carry); 175*f9fbec18Smcpowers MP_SUB_BORROW(r5, 0, r5, carry, carry); 176*f9fbec18Smcpowers MP_SUB_BORROW(r6, a9, r6, carry, carry); 177*f9fbec18Smcpowers MP_SUB_BORROW(r7, a11, r7, carry, carry); 178*f9fbec18Smcpowers r8 -= carry; 179*f9fbec18Smcpowers /* diff 7 */ 180*f9fbec18Smcpowers MP_SUB_BORROW(r0, a13, r0, 0, carry); 181*f9fbec18Smcpowers MP_SUB_BORROW(r1, a14, r1, carry, carry); 182*f9fbec18Smcpowers MP_SUB_BORROW(r2, a15, r2, carry, carry); 183*f9fbec18Smcpowers MP_SUB_BORROW(r3, a8, r3, carry, carry); 184*f9fbec18Smcpowers MP_SUB_BORROW(r4, a9, r4, carry, carry); 185*f9fbec18Smcpowers MP_SUB_BORROW(r5, a10, r5, carry, carry); 186*f9fbec18Smcpowers MP_SUB_BORROW(r6, 0, r6, carry, carry); 187*f9fbec18Smcpowers MP_SUB_BORROW(r7, a12, r7, carry, carry); 188*f9fbec18Smcpowers r8 -= carry; 189*f9fbec18Smcpowers /* diff 8 */ 190*f9fbec18Smcpowers MP_SUB_BORROW(r0, a14, r0, 0, carry); 191*f9fbec18Smcpowers MP_SUB_BORROW(r1, a15, r1, carry, carry); 192*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry); 193*f9fbec18Smcpowers MP_SUB_BORROW(r3, a9, r3, carry, carry); 194*f9fbec18Smcpowers MP_SUB_BORROW(r4, a10, r4, carry, carry); 195*f9fbec18Smcpowers MP_SUB_BORROW(r5, a11, r5, carry, carry); 196*f9fbec18Smcpowers MP_SUB_BORROW(r6, 0, r6, carry, carry); 197*f9fbec18Smcpowers MP_SUB_BORROW(r7, a13, r7, carry, carry); 198*f9fbec18Smcpowers r8 -= carry; 199*f9fbec18Smcpowers 200*f9fbec18Smcpowers /* reduce the overflows */ 201*f9fbec18Smcpowers while (r8 > 0) { 202*f9fbec18Smcpowers mp_digit r8_d = r8; 203*f9fbec18Smcpowers MP_ADD_CARRY(r0, r8_d, r0, 0, carry); 204*f9fbec18Smcpowers MP_ADD_CARRY(r1, 0, r1, carry, carry); 205*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry); 206*f9fbec18Smcpowers MP_ADD_CARRY(r3, -r8_d, r3, carry, carry); 207*f9fbec18Smcpowers MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry); 208*f9fbec18Smcpowers MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry); 209*f9fbec18Smcpowers MP_ADD_CARRY(r6, -(r8_d+1), r6, carry, carry); 210*f9fbec18Smcpowers MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry); 211*f9fbec18Smcpowers r8 = carry; 212*f9fbec18Smcpowers } 213*f9fbec18Smcpowers 214*f9fbec18Smcpowers /* reduce the underflows */ 215*f9fbec18Smcpowers while (r8 < 0) { 216*f9fbec18Smcpowers mp_digit r8_d = -r8; 217*f9fbec18Smcpowers MP_SUB_BORROW(r0, r8_d, r0, 0, carry); 218*f9fbec18Smcpowers MP_SUB_BORROW(r1, 0, r1, carry, carry); 219*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry); 220*f9fbec18Smcpowers MP_SUB_BORROW(r3, -r8_d, r3, carry, carry); 221*f9fbec18Smcpowers MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry); 222*f9fbec18Smcpowers MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry); 223*f9fbec18Smcpowers MP_SUB_BORROW(r6, -(r8_d+1), r6, carry, carry); 224*f9fbec18Smcpowers MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry); 225*f9fbec18Smcpowers r8 = -carry; 226*f9fbec18Smcpowers } 227*f9fbec18Smcpowers if (a != r) { 228*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r,8)); 229*f9fbec18Smcpowers } 230*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS; 231*f9fbec18Smcpowers MP_USED(r) = 8; 232*f9fbec18Smcpowers 233*f9fbec18Smcpowers MP_DIGIT(r,7) = r7; 234*f9fbec18Smcpowers MP_DIGIT(r,6) = r6; 235*f9fbec18Smcpowers MP_DIGIT(r,5) = r5; 236*f9fbec18Smcpowers MP_DIGIT(r,4) = r4; 237*f9fbec18Smcpowers MP_DIGIT(r,3) = r3; 238*f9fbec18Smcpowers MP_DIGIT(r,2) = r2; 239*f9fbec18Smcpowers MP_DIGIT(r,1) = r1; 240*f9fbec18Smcpowers MP_DIGIT(r,0) = r0; 241*f9fbec18Smcpowers 242*f9fbec18Smcpowers /* final reduction if necessary */ 243*f9fbec18Smcpowers if ((r7 == MP_DIGIT_MAX) && 244*f9fbec18Smcpowers ((r6 > 1) || ((r6 == 1) && 245*f9fbec18Smcpowers (r5 || r4 || r3 || 246*f9fbec18Smcpowers ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) 247*f9fbec18Smcpowers && (r0 == MP_DIGIT_MAX)))))) { 248*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r)); 249*f9fbec18Smcpowers } 250*f9fbec18Smcpowers #ifdef notdef 251*f9fbec18Smcpowers 252*f9fbec18Smcpowers 253*f9fbec18Smcpowers /* smooth the negatives */ 254*f9fbec18Smcpowers while (MP_SIGN(r) != MP_ZPOS) { 255*f9fbec18Smcpowers MP_CHECKOK(mp_add(r, &meth->irr, r)); 256*f9fbec18Smcpowers } 257*f9fbec18Smcpowers while (MP_USED(r) > 8) { 258*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r)); 259*f9fbec18Smcpowers } 260*f9fbec18Smcpowers 261*f9fbec18Smcpowers /* final reduction if necessary */ 262*f9fbec18Smcpowers if (MP_DIGIT(r,7) >= MP_DIGIT(&meth->irr,7)) { 263*f9fbec18Smcpowers if (mp_cmp(r,&meth->irr) != MP_LT) { 264*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r)); 265*f9fbec18Smcpowers } 266*f9fbec18Smcpowers } 267*f9fbec18Smcpowers #endif 268*f9fbec18Smcpowers s_mp_clamp(r); 269*f9fbec18Smcpowers #else 270*f9fbec18Smcpowers switch (a_used) { 271*f9fbec18Smcpowers case 8: 272*f9fbec18Smcpowers a7 = MP_DIGIT(a,7); 273*f9fbec18Smcpowers case 7: 274*f9fbec18Smcpowers a6 = MP_DIGIT(a,6); 275*f9fbec18Smcpowers case 6: 276*f9fbec18Smcpowers a5 = MP_DIGIT(a,5); 277*f9fbec18Smcpowers case 5: 278*f9fbec18Smcpowers a4 = MP_DIGIT(a,4); 279*f9fbec18Smcpowers } 280*f9fbec18Smcpowers a7l = a7 << 32; 281*f9fbec18Smcpowers a7h = a7 >> 32; 282*f9fbec18Smcpowers a6l = a6 << 32; 283*f9fbec18Smcpowers a6h = a6 >> 32; 284*f9fbec18Smcpowers a5l = a5 << 32; 285*f9fbec18Smcpowers a5h = a5 >> 32; 286*f9fbec18Smcpowers a4l = a4 << 32; 287*f9fbec18Smcpowers a4h = a4 >> 32; 288*f9fbec18Smcpowers r3 = MP_DIGIT(a,3); 289*f9fbec18Smcpowers r2 = MP_DIGIT(a,2); 290*f9fbec18Smcpowers r1 = MP_DIGIT(a,1); 291*f9fbec18Smcpowers r0 = MP_DIGIT(a,0); 292*f9fbec18Smcpowers 293*f9fbec18Smcpowers /* sum 1 */ 294*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 295*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6, r2, carry, carry); 296*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry); 297*f9fbec18Smcpowers r4 = carry; 298*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); 299*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6, r2, carry, carry); 300*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry); 301*f9fbec18Smcpowers r4 += carry; 302*f9fbec18Smcpowers /* sum 2 */ 303*f9fbec18Smcpowers MP_ADD_CARRY(r1, a6l, r1, 0, carry); 304*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 305*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7h, r3, carry, carry); 306*f9fbec18Smcpowers r4 += carry; 307*f9fbec18Smcpowers MP_ADD_CARRY(r1, a6l, r1, 0, carry); 308*f9fbec18Smcpowers MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); 309*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7h, r3, carry, carry); 310*f9fbec18Smcpowers r4 += carry; 311*f9fbec18Smcpowers 312*f9fbec18Smcpowers /* sum 3 */ 313*f9fbec18Smcpowers MP_ADD_CARRY(r0, a4, r0, 0, carry); 314*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry); 315*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry); 316*f9fbec18Smcpowers MP_ADD_CARRY(r3, a7, r3, carry, carry); 317*f9fbec18Smcpowers r4 += carry; 318*f9fbec18Smcpowers /* sum 4 */ 319*f9fbec18Smcpowers MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry); 320*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry); 321*f9fbec18Smcpowers MP_ADD_CARRY(r2, a7, r2, carry, carry); 322*f9fbec18Smcpowers MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry); 323*f9fbec18Smcpowers r4 += carry; 324*f9fbec18Smcpowers /* diff 5 */ 325*f9fbec18Smcpowers MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry); 326*f9fbec18Smcpowers MP_SUB_BORROW(r1, a6h, r1, carry, carry); 327*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry); 328*f9fbec18Smcpowers MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry); 329*f9fbec18Smcpowers r4 -= carry; 330*f9fbec18Smcpowers /* diff 6 */ 331*f9fbec18Smcpowers MP_SUB_BORROW(r0, a6, r0, 0, carry); 332*f9fbec18Smcpowers MP_SUB_BORROW(r1, a7, r1, carry, carry); 333*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, carry, carry); 334*f9fbec18Smcpowers MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry); 335*f9fbec18Smcpowers r4 -= carry; 336*f9fbec18Smcpowers /* diff 7 */ 337*f9fbec18Smcpowers MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry); 338*f9fbec18Smcpowers MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry); 339*f9fbec18Smcpowers MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry); 340*f9fbec18Smcpowers MP_SUB_BORROW(r3, a6l, r3, carry, carry); 341*f9fbec18Smcpowers r4 -= carry; 342*f9fbec18Smcpowers /* diff 8 */ 343*f9fbec18Smcpowers MP_SUB_BORROW(r0, a7, r0, 0, carry); 344*f9fbec18Smcpowers MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry); 345*f9fbec18Smcpowers MP_SUB_BORROW(r2, a5, r2, carry, carry); 346*f9fbec18Smcpowers MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry); 347*f9fbec18Smcpowers r4 -= carry; 348*f9fbec18Smcpowers 349*f9fbec18Smcpowers /* reduce the overflows */ 350*f9fbec18Smcpowers while (r4 > 0) { 351*f9fbec18Smcpowers mp_digit r4_long = r4; 352*f9fbec18Smcpowers mp_digit r4l = (r4_long << 32); 353*f9fbec18Smcpowers MP_ADD_CARRY(r0, r4_long, r0, 0, carry); 354*f9fbec18Smcpowers MP_ADD_CARRY(r1, -r4l, r1, carry, carry); 355*f9fbec18Smcpowers MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry); 356*f9fbec18Smcpowers MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry); 357*f9fbec18Smcpowers r4 = carry; 358*f9fbec18Smcpowers } 359*f9fbec18Smcpowers 360*f9fbec18Smcpowers /* reduce the underflows */ 361*f9fbec18Smcpowers while (r4 < 0) { 362*f9fbec18Smcpowers mp_digit r4_long = -r4; 363*f9fbec18Smcpowers mp_digit r4l = (r4_long << 32); 364*f9fbec18Smcpowers MP_SUB_BORROW(r0, r4_long, r0, 0, carry); 365*f9fbec18Smcpowers MP_SUB_BORROW(r1, -r4l, r1, carry, carry); 366*f9fbec18Smcpowers MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry); 367*f9fbec18Smcpowers MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry); 368*f9fbec18Smcpowers r4 = -carry; 369*f9fbec18Smcpowers } 370*f9fbec18Smcpowers 371*f9fbec18Smcpowers if (a != r) { 372*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r,4)); 373*f9fbec18Smcpowers } 374*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS; 375*f9fbec18Smcpowers MP_USED(r) = 4; 376*f9fbec18Smcpowers 377*f9fbec18Smcpowers MP_DIGIT(r,3) = r3; 378*f9fbec18Smcpowers MP_DIGIT(r,2) = r2; 379*f9fbec18Smcpowers MP_DIGIT(r,1) = r1; 380*f9fbec18Smcpowers MP_DIGIT(r,0) = r0; 381*f9fbec18Smcpowers 382*f9fbec18Smcpowers /* final reduction if necessary */ 383*f9fbec18Smcpowers if ((r3 > 0xFFFFFFFF00000001ULL) || 384*f9fbec18Smcpowers ((r3 == 0xFFFFFFFF00000001ULL) && 385*f9fbec18Smcpowers (r2 || (r1 >> 32)|| 386*f9fbec18Smcpowers (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { 387*f9fbec18Smcpowers /* very rare, just use mp_sub */ 388*f9fbec18Smcpowers MP_CHECKOK(mp_sub(r, &meth->irr, r)); 389*f9fbec18Smcpowers } 390*f9fbec18Smcpowers 391*f9fbec18Smcpowers s_mp_clamp(r); 392*f9fbec18Smcpowers #endif 393*f9fbec18Smcpowers } 394*f9fbec18Smcpowers 395*f9fbec18Smcpowers CLEANUP: 396*f9fbec18Smcpowers return res; 397*f9fbec18Smcpowers } 398*f9fbec18Smcpowers 399*f9fbec18Smcpowers /* Compute the square of polynomial a, reduce modulo p256. Store the 400*f9fbec18Smcpowers * result in r. r could be a. Uses optimized modular reduction for p256. 401*f9fbec18Smcpowers */ 402*f9fbec18Smcpowers mp_err 403*f9fbec18Smcpowers ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 404*f9fbec18Smcpowers { 405*f9fbec18Smcpowers mp_err res = MP_OKAY; 406*f9fbec18Smcpowers 407*f9fbec18Smcpowers MP_CHECKOK(mp_sqr(a, r)); 408*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 409*f9fbec18Smcpowers CLEANUP: 410*f9fbec18Smcpowers return res; 411*f9fbec18Smcpowers } 412*f9fbec18Smcpowers 413*f9fbec18Smcpowers /* Compute the product of two polynomials a and b, reduce modulo p256. 414*f9fbec18Smcpowers * Store the result in r. r could be a or b; a could be b. Uses 415*f9fbec18Smcpowers * optimized modular reduction for p256. */ 416*f9fbec18Smcpowers mp_err 417*f9fbec18Smcpowers ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, 418*f9fbec18Smcpowers const GFMethod *meth) 419*f9fbec18Smcpowers { 420*f9fbec18Smcpowers mp_err res = MP_OKAY; 421*f9fbec18Smcpowers 422*f9fbec18Smcpowers MP_CHECKOK(mp_mul(a, b, r)); 423*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); 424*f9fbec18Smcpowers CLEANUP: 425*f9fbec18Smcpowers return res; 426*f9fbec18Smcpowers } 427*f9fbec18Smcpowers 428*f9fbec18Smcpowers /* Wire in fast field arithmetic and precomputation of base point for 429*f9fbec18Smcpowers * named curves. */ 430*f9fbec18Smcpowers mp_err 431*f9fbec18Smcpowers ec_group_set_gfp256(ECGroup *group, ECCurveName name) 432*f9fbec18Smcpowers { 433*f9fbec18Smcpowers if (name == ECCurve_NIST_P256) { 434*f9fbec18Smcpowers group->meth->field_mod = &ec_GFp_nistp256_mod; 435*f9fbec18Smcpowers group->meth->field_mul = &ec_GFp_nistp256_mul; 436*f9fbec18Smcpowers group->meth->field_sqr = &ec_GFp_nistp256_sqr; 437*f9fbec18Smcpowers } 438*f9fbec18Smcpowers return MP_OKAY; 439*f9fbec18Smcpowers } 440