1*f9fbec18Smcpowers /* 2*f9fbec18Smcpowers * ***** BEGIN LICENSE BLOCK ***** 3*f9fbec18Smcpowers * Version: MPL 1.1/GPL 2.0/LGPL 2.1 4*f9fbec18Smcpowers * 5*f9fbec18Smcpowers * The contents of this file are subject to the Mozilla Public License Version 6*f9fbec18Smcpowers * 1.1 (the "License"); you may not use this file except in compliance with 7*f9fbec18Smcpowers * the License. You may obtain a copy of the License at 8*f9fbec18Smcpowers * http://www.mozilla.org/MPL/ 9*f9fbec18Smcpowers * 10*f9fbec18Smcpowers * Software distributed under the License is distributed on an "AS IS" basis, 11*f9fbec18Smcpowers * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 12*f9fbec18Smcpowers * for the specific language governing rights and limitations under the 13*f9fbec18Smcpowers * License. 14*f9fbec18Smcpowers * 15*f9fbec18Smcpowers * The Original Code is the elliptic curve math library for prime field curves. 16*f9fbec18Smcpowers * 17*f9fbec18Smcpowers * The Initial Developer of the Original Code is 18*f9fbec18Smcpowers * Sun Microsystems, Inc. 19*f9fbec18Smcpowers * Portions created by the Initial Developer are Copyright (C) 2003 20*f9fbec18Smcpowers * the Initial Developer. All Rights Reserved. 21*f9fbec18Smcpowers * 22*f9fbec18Smcpowers * Contributor(s): 23*f9fbec18Smcpowers * Douglas Stebila <douglas@stebila.ca>, Sun Microsystems Laboratories 24*f9fbec18Smcpowers * 25*f9fbec18Smcpowers * Alternatively, the contents of this file may be used under the terms of 26*f9fbec18Smcpowers * either the GNU General Public License Version 2 or later (the "GPL"), or 27*f9fbec18Smcpowers * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 28*f9fbec18Smcpowers * in which case the provisions of the GPL or the LGPL are applicable instead 29*f9fbec18Smcpowers * of those above. If you wish to allow use of your version of this file only 30*f9fbec18Smcpowers * under the terms of either the GPL or the LGPL, and not to allow others to 31*f9fbec18Smcpowers * use your version of this file under the terms of the MPL, indicate your 32*f9fbec18Smcpowers * decision by deleting the provisions above and replace them with the notice 33*f9fbec18Smcpowers * and other provisions required by the GPL or the LGPL. If you do not delete 34*f9fbec18Smcpowers * the provisions above, a recipient may use your version of this file under 35*f9fbec18Smcpowers * the terms of any one of the MPL, the GPL or the LGPL. 36*f9fbec18Smcpowers * 37*f9fbec18Smcpowers * ***** END LICENSE BLOCK ***** */ 38*f9fbec18Smcpowers /* 39*f9fbec18Smcpowers * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 40*f9fbec18Smcpowers * Use is subject to license terms. 41*f9fbec18Smcpowers * 42*f9fbec18Smcpowers * Sun elects to use this software under the MPL license. 43*f9fbec18Smcpowers */ 44*f9fbec18Smcpowers 45*f9fbec18Smcpowers #pragma ident "%Z%%M% %I% %E% SMI" 46*f9fbec18Smcpowers 47*f9fbec18Smcpowers #include "ecp.h" 48*f9fbec18Smcpowers #include "mpi.h" 49*f9fbec18Smcpowers #include "mplogic.h" 50*f9fbec18Smcpowers #include "mpi-priv.h" 51*f9fbec18Smcpowers #ifndef _KERNEL 52*f9fbec18Smcpowers #include <stdlib.h> 53*f9fbec18Smcpowers #endif 54*f9fbec18Smcpowers 55*f9fbec18Smcpowers #define ECP192_DIGITS ECL_CURVE_DIGITS(192) 56*f9fbec18Smcpowers 57*f9fbec18Smcpowers /* Fast modular reduction for p192 = 2^192 - 2^64 - 1. a can be r. Uses 58*f9fbec18Smcpowers * algorithm 7 from Brown, Hankerson, Lopez, Menezes. Software 59*f9fbec18Smcpowers * Implementation of the NIST Elliptic Curves over Prime Fields. */ 60*f9fbec18Smcpowers mp_err 61*f9fbec18Smcpowers ec_GFp_nistp192_mod(const mp_int *a, mp_int *r, const GFMethod *meth) 62*f9fbec18Smcpowers { 63*f9fbec18Smcpowers mp_err res = MP_OKAY; 64*f9fbec18Smcpowers mp_size a_used = MP_USED(a); 65*f9fbec18Smcpowers mp_digit r3; 66*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 67*f9fbec18Smcpowers mp_digit carry; 68*f9fbec18Smcpowers #endif 69*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT 70*f9fbec18Smcpowers mp_digit a5a = 0, a5b = 0, a4a = 0, a4b = 0, a3a = 0, a3b = 0; 71*f9fbec18Smcpowers mp_digit r0a, r0b, r1a, r1b, r2a, r2b; 72*f9fbec18Smcpowers #else 73*f9fbec18Smcpowers mp_digit a5 = 0, a4 = 0, a3 = 0; 74*f9fbec18Smcpowers mp_digit r0, r1, r2; 75*f9fbec18Smcpowers #endif 76*f9fbec18Smcpowers 77*f9fbec18Smcpowers /* reduction not needed if a is not larger than field size */ 78*f9fbec18Smcpowers if (a_used < ECP192_DIGITS) { 79*f9fbec18Smcpowers if (a == r) { 80*f9fbec18Smcpowers return MP_OKAY; 81*f9fbec18Smcpowers } 82*f9fbec18Smcpowers return mp_copy(a, r); 83*f9fbec18Smcpowers } 84*f9fbec18Smcpowers 85*f9fbec18Smcpowers /* for polynomials larger than twice the field size, use regular 86*f9fbec18Smcpowers * reduction */ 87*f9fbec18Smcpowers if (a_used > ECP192_DIGITS*2) { 88*f9fbec18Smcpowers MP_CHECKOK(mp_mod(a, &meth->irr, r)); 89*f9fbec18Smcpowers } else { 90*f9fbec18Smcpowers /* copy out upper words of a */ 91*f9fbec18Smcpowers 92*f9fbec18Smcpowers #ifdef ECL_THIRTY_TWO_BIT 93*f9fbec18Smcpowers 94*f9fbec18Smcpowers /* in all the math below, 95*f9fbec18Smcpowers * nXb is most signifiant, nXa is least significant */ 96*f9fbec18Smcpowers switch (a_used) { 97*f9fbec18Smcpowers case 12: 98*f9fbec18Smcpowers a5b = MP_DIGIT(a, 11); 99*f9fbec18Smcpowers case 11: 100*f9fbec18Smcpowers a5a = MP_DIGIT(a, 10); 101*f9fbec18Smcpowers case 10: 102*f9fbec18Smcpowers a4b = MP_DIGIT(a, 9); 103*f9fbec18Smcpowers case 9: 104*f9fbec18Smcpowers a4a = MP_DIGIT(a, 8); 105*f9fbec18Smcpowers case 8: 106*f9fbec18Smcpowers a3b = MP_DIGIT(a, 7); 107*f9fbec18Smcpowers case 7: 108*f9fbec18Smcpowers a3a = MP_DIGIT(a, 6); 109*f9fbec18Smcpowers } 110*f9fbec18Smcpowers 111*f9fbec18Smcpowers 112*f9fbec18Smcpowers r2b= MP_DIGIT(a, 5); 113*f9fbec18Smcpowers r2a= MP_DIGIT(a, 4); 114*f9fbec18Smcpowers r1b = MP_DIGIT(a, 3); 115*f9fbec18Smcpowers r1a = MP_DIGIT(a, 2); 116*f9fbec18Smcpowers r0b = MP_DIGIT(a, 1); 117*f9fbec18Smcpowers r0a = MP_DIGIT(a, 0); 118*f9fbec18Smcpowers 119*f9fbec18Smcpowers /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 120*f9fbec18Smcpowers MP_ADD_CARRY(r0a, a3a, r0a, 0, carry); 121*f9fbec18Smcpowers MP_ADD_CARRY(r0b, a3b, r0b, carry, carry); 122*f9fbec18Smcpowers MP_ADD_CARRY(r1a, a3a, r1a, carry, carry); 123*f9fbec18Smcpowers MP_ADD_CARRY(r1b, a3b, r1b, carry, carry); 124*f9fbec18Smcpowers MP_ADD_CARRY(r2a, a4a, r2a, carry, carry); 125*f9fbec18Smcpowers MP_ADD_CARRY(r2b, a4b, r2b, carry, carry); 126*f9fbec18Smcpowers r3 = carry; carry = 0; 127*f9fbec18Smcpowers MP_ADD_CARRY(r0a, a5a, r0a, 0, carry); 128*f9fbec18Smcpowers MP_ADD_CARRY(r0b, a5b, r0b, carry, carry); 129*f9fbec18Smcpowers MP_ADD_CARRY(r1a, a5a, r1a, carry, carry); 130*f9fbec18Smcpowers MP_ADD_CARRY(r1b, a5b, r1b, carry, carry); 131*f9fbec18Smcpowers MP_ADD_CARRY(r2a, a5a, r2a, carry, carry); 132*f9fbec18Smcpowers MP_ADD_CARRY(r2b, a5b, r2b, carry, carry); 133*f9fbec18Smcpowers r3 += carry; 134*f9fbec18Smcpowers MP_ADD_CARRY(r1a, a4a, r1a, 0, carry); 135*f9fbec18Smcpowers MP_ADD_CARRY(r1b, a4b, r1b, carry, carry); 136*f9fbec18Smcpowers MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 137*f9fbec18Smcpowers MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 138*f9fbec18Smcpowers r3 += carry; 139*f9fbec18Smcpowers 140*f9fbec18Smcpowers /* reduce out the carry */ 141*f9fbec18Smcpowers while (r3) { 142*f9fbec18Smcpowers MP_ADD_CARRY(r0a, r3, r0a, 0, carry); 143*f9fbec18Smcpowers MP_ADD_CARRY(r0b, 0, r0b, carry, carry); 144*f9fbec18Smcpowers MP_ADD_CARRY(r1a, r3, r1a, carry, carry); 145*f9fbec18Smcpowers MP_ADD_CARRY(r1b, 0, r1b, carry, carry); 146*f9fbec18Smcpowers MP_ADD_CARRY(r2a, 0, r2a, carry, carry); 147*f9fbec18Smcpowers MP_ADD_CARRY(r2b, 0, r2b, carry, carry); 148*f9fbec18Smcpowers r3 = carry; 149*f9fbec18Smcpowers } 150*f9fbec18Smcpowers 151*f9fbec18Smcpowers /* check for final reduction */ 152*f9fbec18Smcpowers /* 153*f9fbec18Smcpowers * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 154*f9fbec18Smcpowers * 0xffffffffffffffff. That means we can only be over and need 155*f9fbec18Smcpowers * one more reduction 156*f9fbec18Smcpowers * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 157*f9fbec18Smcpowers * and 158*f9fbec18Smcpowers * r1 == 0xffffffffffffffffff or 159*f9fbec18Smcpowers * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 160*f9fbec18Smcpowers * In all cases, we subtract the field (or add the 2's 161*f9fbec18Smcpowers * complement value (1,1,0)). (r0, r1, r2) 162*f9fbec18Smcpowers */ 163*f9fbec18Smcpowers if (((r2b == 0xffffffff) && (r2a == 0xffffffff) 164*f9fbec18Smcpowers && (r1b == 0xffffffff) ) && 165*f9fbec18Smcpowers ((r1a == 0xffffffff) || 166*f9fbec18Smcpowers (r1a == 0xfffffffe) && (r0a == 0xffffffff) && 167*f9fbec18Smcpowers (r0b == 0xffffffff)) ) { 168*f9fbec18Smcpowers /* do a quick subtract */ 169*f9fbec18Smcpowers MP_ADD_CARRY(r0a, 1, r0a, 0, carry); 170*f9fbec18Smcpowers r0b += carry; 171*f9fbec18Smcpowers r1a = r1b = r2a = r2b = 0; 172*f9fbec18Smcpowers } 173*f9fbec18Smcpowers 174*f9fbec18Smcpowers /* set the lower words of r */ 175*f9fbec18Smcpowers if (a != r) { 176*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r, 6)); 177*f9fbec18Smcpowers } 178*f9fbec18Smcpowers MP_DIGIT(r, 5) = r2b; 179*f9fbec18Smcpowers MP_DIGIT(r, 4) = r2a; 180*f9fbec18Smcpowers MP_DIGIT(r, 3) = r1b; 181*f9fbec18Smcpowers MP_DIGIT(r, 2) = r1a; 182*f9fbec18Smcpowers MP_DIGIT(r, 1) = r0b; 183*f9fbec18Smcpowers MP_DIGIT(r, 0) = r0a; 184*f9fbec18Smcpowers MP_USED(r) = 6; 185*f9fbec18Smcpowers #else 186*f9fbec18Smcpowers switch (a_used) { 187*f9fbec18Smcpowers case 6: 188*f9fbec18Smcpowers a5 = MP_DIGIT(a, 5); 189*f9fbec18Smcpowers case 5: 190*f9fbec18Smcpowers a4 = MP_DIGIT(a, 4); 191*f9fbec18Smcpowers case 4: 192*f9fbec18Smcpowers a3 = MP_DIGIT(a, 3); 193*f9fbec18Smcpowers } 194*f9fbec18Smcpowers 195*f9fbec18Smcpowers r2 = MP_DIGIT(a, 2); 196*f9fbec18Smcpowers r1 = MP_DIGIT(a, 1); 197*f9fbec18Smcpowers r0 = MP_DIGIT(a, 0); 198*f9fbec18Smcpowers 199*f9fbec18Smcpowers /* implement r = (a2,a1,a0)+(a5,a5,a5)+(a4,a4,0)+(0,a3,a3) */ 200*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 201*f9fbec18Smcpowers MP_ADD_CARRY(r0, a3, r0, 0, carry); 202*f9fbec18Smcpowers MP_ADD_CARRY(r1, a3, r1, carry, carry); 203*f9fbec18Smcpowers MP_ADD_CARRY(r2, a4, r2, carry, carry); 204*f9fbec18Smcpowers r3 = carry; 205*f9fbec18Smcpowers MP_ADD_CARRY(r0, a5, r0, 0, carry); 206*f9fbec18Smcpowers MP_ADD_CARRY(r1, a5, r1, carry, carry); 207*f9fbec18Smcpowers MP_ADD_CARRY(r2, a5, r2, carry, carry); 208*f9fbec18Smcpowers r3 += carry; 209*f9fbec18Smcpowers MP_ADD_CARRY(r1, a4, r1, 0, carry); 210*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry); 211*f9fbec18Smcpowers r3 += carry; 212*f9fbec18Smcpowers 213*f9fbec18Smcpowers #else 214*f9fbec18Smcpowers r2 = MP_DIGIT(a, 2); 215*f9fbec18Smcpowers r1 = MP_DIGIT(a, 1); 216*f9fbec18Smcpowers r0 = MP_DIGIT(a, 0); 217*f9fbec18Smcpowers 218*f9fbec18Smcpowers /* set the lower words of r */ 219*f9fbec18Smcpowers __asm__ ( 220*f9fbec18Smcpowers "xorq %3,%3 \n\t" 221*f9fbec18Smcpowers "addq %4,%0 \n\t" 222*f9fbec18Smcpowers "adcq %4,%1 \n\t" 223*f9fbec18Smcpowers "adcq %5,%2 \n\t" 224*f9fbec18Smcpowers "adcq $0,%3 \n\t" 225*f9fbec18Smcpowers "addq %6,%0 \n\t" 226*f9fbec18Smcpowers "adcq %6,%1 \n\t" 227*f9fbec18Smcpowers "adcq %6,%2 \n\t" 228*f9fbec18Smcpowers "adcq $0,%3 \n\t" 229*f9fbec18Smcpowers "addq %5,%1 \n\t" 230*f9fbec18Smcpowers "adcq $0,%2 \n\t" 231*f9fbec18Smcpowers "adcq $0,%3 \n\t" 232*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3), 233*f9fbec18Smcpowers "=r"(a4), "=r"(a5) 234*f9fbec18Smcpowers : "0" (r0), "1" (r1), "2" (r2), "3" (r3), 235*f9fbec18Smcpowers "4" (a3), "5" (a4), "6"(a5) 236*f9fbec18Smcpowers : "%cc" ); 237*f9fbec18Smcpowers #endif 238*f9fbec18Smcpowers 239*f9fbec18Smcpowers /* reduce out the carry */ 240*f9fbec18Smcpowers while (r3) { 241*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 242*f9fbec18Smcpowers MP_ADD_CARRY(r0, r3, r0, 0, carry); 243*f9fbec18Smcpowers MP_ADD_CARRY(r1, r3, r1, carry, carry); 244*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry); 245*f9fbec18Smcpowers r3 = carry; 246*f9fbec18Smcpowers #else 247*f9fbec18Smcpowers a3=r3; 248*f9fbec18Smcpowers __asm__ ( 249*f9fbec18Smcpowers "xorq %3,%3 \n\t" 250*f9fbec18Smcpowers "addq %4,%0 \n\t" 251*f9fbec18Smcpowers "adcq %4,%1 \n\t" 252*f9fbec18Smcpowers "adcq $0,%2 \n\t" 253*f9fbec18Smcpowers "adcq $0,%3 \n\t" 254*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(a3) 255*f9fbec18Smcpowers : "0" (r0), "1" (r1), "2" (r2), "3" (r3), "4"(a3) 256*f9fbec18Smcpowers : "%cc" ); 257*f9fbec18Smcpowers #endif 258*f9fbec18Smcpowers } 259*f9fbec18Smcpowers 260*f9fbec18Smcpowers /* check for final reduction */ 261*f9fbec18Smcpowers /* 262*f9fbec18Smcpowers * our field is 0xffffffffffffffff, 0xfffffffffffffffe, 263*f9fbec18Smcpowers * 0xffffffffffffffff. That means we can only be over and need 264*f9fbec18Smcpowers * one more reduction 265*f9fbec18Smcpowers * if r2 == 0xffffffffffffffffff (same as r2+1 == 0) 266*f9fbec18Smcpowers * and 267*f9fbec18Smcpowers * r1 == 0xffffffffffffffffff or 268*f9fbec18Smcpowers * r1 == 0xfffffffffffffffffe and r0 = 0xfffffffffffffffff 269*f9fbec18Smcpowers * In all cases, we subtract the field (or add the 2's 270*f9fbec18Smcpowers * complement value (1,1,0)). (r0, r1, r2) 271*f9fbec18Smcpowers */ 272*f9fbec18Smcpowers if (r3 || ((r2 == MP_DIGIT_MAX) && 273*f9fbec18Smcpowers ((r1 == MP_DIGIT_MAX) || 274*f9fbec18Smcpowers ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 275*f9fbec18Smcpowers /* do a quick subtract */ 276*f9fbec18Smcpowers r0++; 277*f9fbec18Smcpowers r1 = r2 = 0; 278*f9fbec18Smcpowers } 279*f9fbec18Smcpowers /* set the lower words of r */ 280*f9fbec18Smcpowers if (a != r) { 281*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r, 3)); 282*f9fbec18Smcpowers } 283*f9fbec18Smcpowers MP_DIGIT(r, 2) = r2; 284*f9fbec18Smcpowers MP_DIGIT(r, 1) = r1; 285*f9fbec18Smcpowers MP_DIGIT(r, 0) = r0; 286*f9fbec18Smcpowers MP_USED(r) = 3; 287*f9fbec18Smcpowers #endif 288*f9fbec18Smcpowers } 289*f9fbec18Smcpowers 290*f9fbec18Smcpowers CLEANUP: 291*f9fbec18Smcpowers return res; 292*f9fbec18Smcpowers } 293*f9fbec18Smcpowers 294*f9fbec18Smcpowers #ifndef ECL_THIRTY_TWO_BIT 295*f9fbec18Smcpowers /* Compute the sum of 192 bit curves. Do the work in-line since the 296*f9fbec18Smcpowers * number of words are so small, we don't want to overhead of mp function 297*f9fbec18Smcpowers * calls. Uses optimized modular reduction for p192. 298*f9fbec18Smcpowers */ 299*f9fbec18Smcpowers mp_err 300*f9fbec18Smcpowers ec_GFp_nistp192_add(const mp_int *a, const mp_int *b, mp_int *r, 301*f9fbec18Smcpowers const GFMethod *meth) 302*f9fbec18Smcpowers { 303*f9fbec18Smcpowers mp_err res = MP_OKAY; 304*f9fbec18Smcpowers mp_digit a0 = 0, a1 = 0, a2 = 0; 305*f9fbec18Smcpowers mp_digit r0 = 0, r1 = 0, r2 = 0; 306*f9fbec18Smcpowers mp_digit carry; 307*f9fbec18Smcpowers 308*f9fbec18Smcpowers switch(MP_USED(a)) { 309*f9fbec18Smcpowers case 3: 310*f9fbec18Smcpowers a2 = MP_DIGIT(a,2); 311*f9fbec18Smcpowers case 2: 312*f9fbec18Smcpowers a1 = MP_DIGIT(a,1); 313*f9fbec18Smcpowers case 1: 314*f9fbec18Smcpowers a0 = MP_DIGIT(a,0); 315*f9fbec18Smcpowers } 316*f9fbec18Smcpowers switch(MP_USED(b)) { 317*f9fbec18Smcpowers case 3: 318*f9fbec18Smcpowers r2 = MP_DIGIT(b,2); 319*f9fbec18Smcpowers case 2: 320*f9fbec18Smcpowers r1 = MP_DIGIT(b,1); 321*f9fbec18Smcpowers case 1: 322*f9fbec18Smcpowers r0 = MP_DIGIT(b,0); 323*f9fbec18Smcpowers } 324*f9fbec18Smcpowers 325*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 326*f9fbec18Smcpowers MP_ADD_CARRY(a0, r0, r0, 0, carry); 327*f9fbec18Smcpowers MP_ADD_CARRY(a1, r1, r1, carry, carry); 328*f9fbec18Smcpowers MP_ADD_CARRY(a2, r2, r2, carry, carry); 329*f9fbec18Smcpowers #else 330*f9fbec18Smcpowers __asm__ ( 331*f9fbec18Smcpowers "xorq %3,%3 \n\t" 332*f9fbec18Smcpowers "addq %4,%0 \n\t" 333*f9fbec18Smcpowers "adcq %5,%1 \n\t" 334*f9fbec18Smcpowers "adcq %6,%2 \n\t" 335*f9fbec18Smcpowers "adcq $0,%3 \n\t" 336*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) 337*f9fbec18Smcpowers : "r" (a0), "r" (a1), "r" (a2), "0" (r0), 338*f9fbec18Smcpowers "1" (r1), "2" (r2) 339*f9fbec18Smcpowers : "%cc" ); 340*f9fbec18Smcpowers #endif 341*f9fbec18Smcpowers 342*f9fbec18Smcpowers /* Do quick 'subract' if we've gone over 343*f9fbec18Smcpowers * (add the 2's complement of the curve field) */ 344*f9fbec18Smcpowers if (carry || ((r2 == MP_DIGIT_MAX) && 345*f9fbec18Smcpowers ((r1 == MP_DIGIT_MAX) || 346*f9fbec18Smcpowers ((r1 == (MP_DIGIT_MAX-1)) && (r0 == MP_DIGIT_MAX))))) { 347*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 348*f9fbec18Smcpowers MP_ADD_CARRY(r0, 1, r0, 0, carry); 349*f9fbec18Smcpowers MP_ADD_CARRY(r1, 1, r1, carry, carry); 350*f9fbec18Smcpowers MP_ADD_CARRY(r2, 0, r2, carry, carry); 351*f9fbec18Smcpowers #else 352*f9fbec18Smcpowers __asm__ ( 353*f9fbec18Smcpowers "addq $1,%0 \n\t" 354*f9fbec18Smcpowers "adcq $1,%1 \n\t" 355*f9fbec18Smcpowers "adcq $0,%2 \n\t" 356*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2) 357*f9fbec18Smcpowers : "0" (r0), "1" (r1), "2" (r2) 358*f9fbec18Smcpowers : "%cc" ); 359*f9fbec18Smcpowers #endif 360*f9fbec18Smcpowers } 361*f9fbec18Smcpowers 362*f9fbec18Smcpowers 363*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r, 3)); 364*f9fbec18Smcpowers MP_DIGIT(r, 2) = r2; 365*f9fbec18Smcpowers MP_DIGIT(r, 1) = r1; 366*f9fbec18Smcpowers MP_DIGIT(r, 0) = r0; 367*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS; 368*f9fbec18Smcpowers MP_USED(r) = 3; 369*f9fbec18Smcpowers s_mp_clamp(r); 370*f9fbec18Smcpowers 371*f9fbec18Smcpowers 372*f9fbec18Smcpowers CLEANUP: 373*f9fbec18Smcpowers return res; 374*f9fbec18Smcpowers } 375*f9fbec18Smcpowers 376*f9fbec18Smcpowers /* Compute the diff of 192 bit curves. Do the work in-line since the 377*f9fbec18Smcpowers * number of words are so small, we don't want to overhead of mp function 378*f9fbec18Smcpowers * calls. Uses optimized modular reduction for p192. 379*f9fbec18Smcpowers */ 380*f9fbec18Smcpowers mp_err 381*f9fbec18Smcpowers ec_GFp_nistp192_sub(const mp_int *a, const mp_int *b, mp_int *r, 382*f9fbec18Smcpowers const GFMethod *meth) 383*f9fbec18Smcpowers { 384*f9fbec18Smcpowers mp_err res = MP_OKAY; 385*f9fbec18Smcpowers mp_digit b0 = 0, b1 = 0, b2 = 0; 386*f9fbec18Smcpowers mp_digit r0 = 0, r1 = 0, r2 = 0; 387*f9fbec18Smcpowers mp_digit borrow; 388*f9fbec18Smcpowers 389*f9fbec18Smcpowers switch(MP_USED(a)) { 390*f9fbec18Smcpowers case 3: 391*f9fbec18Smcpowers r2 = MP_DIGIT(a,2); 392*f9fbec18Smcpowers case 2: 393*f9fbec18Smcpowers r1 = MP_DIGIT(a,1); 394*f9fbec18Smcpowers case 1: 395*f9fbec18Smcpowers r0 = MP_DIGIT(a,0); 396*f9fbec18Smcpowers } 397*f9fbec18Smcpowers 398*f9fbec18Smcpowers switch(MP_USED(b)) { 399*f9fbec18Smcpowers case 3: 400*f9fbec18Smcpowers b2 = MP_DIGIT(b,2); 401*f9fbec18Smcpowers case 2: 402*f9fbec18Smcpowers b1 = MP_DIGIT(b,1); 403*f9fbec18Smcpowers case 1: 404*f9fbec18Smcpowers b0 = MP_DIGIT(b,0); 405*f9fbec18Smcpowers } 406*f9fbec18Smcpowers 407*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 408*f9fbec18Smcpowers MP_SUB_BORROW(r0, b0, r0, 0, borrow); 409*f9fbec18Smcpowers MP_SUB_BORROW(r1, b1, r1, borrow, borrow); 410*f9fbec18Smcpowers MP_SUB_BORROW(r2, b2, r2, borrow, borrow); 411*f9fbec18Smcpowers #else 412*f9fbec18Smcpowers __asm__ ( 413*f9fbec18Smcpowers "xorq %3,%3 \n\t" 414*f9fbec18Smcpowers "subq %4,%0 \n\t" 415*f9fbec18Smcpowers "sbbq %5,%1 \n\t" 416*f9fbec18Smcpowers "sbbq %6,%2 \n\t" 417*f9fbec18Smcpowers "adcq $0,%3 \n\t" 418*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) 419*f9fbec18Smcpowers : "r" (b0), "r" (b1), "r" (b2), "0" (r0), 420*f9fbec18Smcpowers "1" (r1), "2" (r2) 421*f9fbec18Smcpowers : "%cc" ); 422*f9fbec18Smcpowers #endif 423*f9fbec18Smcpowers 424*f9fbec18Smcpowers /* Do quick 'add' if we've gone under 0 425*f9fbec18Smcpowers * (subtract the 2's complement of the curve field) */ 426*f9fbec18Smcpowers if (borrow) { 427*f9fbec18Smcpowers #ifndef MPI_AMD64_ADD 428*f9fbec18Smcpowers MP_SUB_BORROW(r0, 1, r0, 0, borrow); 429*f9fbec18Smcpowers MP_SUB_BORROW(r1, 1, r1, borrow, borrow); 430*f9fbec18Smcpowers MP_SUB_BORROW(r2, 0, r2, borrow, borrow); 431*f9fbec18Smcpowers #else 432*f9fbec18Smcpowers __asm__ ( 433*f9fbec18Smcpowers "subq $1,%0 \n\t" 434*f9fbec18Smcpowers "sbbq $1,%1 \n\t" 435*f9fbec18Smcpowers "sbbq $0,%2 \n\t" 436*f9fbec18Smcpowers : "=r"(r0), "=r"(r1), "=r"(r2) 437*f9fbec18Smcpowers : "0" (r0), "1" (r1), "2" (r2) 438*f9fbec18Smcpowers : "%cc" ); 439*f9fbec18Smcpowers #endif 440*f9fbec18Smcpowers } 441*f9fbec18Smcpowers 442*f9fbec18Smcpowers MP_CHECKOK(s_mp_pad(r, 3)); 443*f9fbec18Smcpowers MP_DIGIT(r, 2) = r2; 444*f9fbec18Smcpowers MP_DIGIT(r, 1) = r1; 445*f9fbec18Smcpowers MP_DIGIT(r, 0) = r0; 446*f9fbec18Smcpowers MP_SIGN(r) = MP_ZPOS; 447*f9fbec18Smcpowers MP_USED(r) = 3; 448*f9fbec18Smcpowers s_mp_clamp(r); 449*f9fbec18Smcpowers 450*f9fbec18Smcpowers CLEANUP: 451*f9fbec18Smcpowers return res; 452*f9fbec18Smcpowers } 453*f9fbec18Smcpowers 454*f9fbec18Smcpowers #endif 455*f9fbec18Smcpowers 456*f9fbec18Smcpowers /* Compute the square of polynomial a, reduce modulo p192. Store the 457*f9fbec18Smcpowers * result in r. r could be a. Uses optimized modular reduction for p192. 458*f9fbec18Smcpowers */ 459*f9fbec18Smcpowers mp_err 460*f9fbec18Smcpowers ec_GFp_nistp192_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) 461*f9fbec18Smcpowers { 462*f9fbec18Smcpowers mp_err res = MP_OKAY; 463*f9fbec18Smcpowers 464*f9fbec18Smcpowers MP_CHECKOK(mp_sqr(a, r)); 465*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 466*f9fbec18Smcpowers CLEANUP: 467*f9fbec18Smcpowers return res; 468*f9fbec18Smcpowers } 469*f9fbec18Smcpowers 470*f9fbec18Smcpowers /* Compute the product of two polynomials a and b, reduce modulo p192. 471*f9fbec18Smcpowers * Store the result in r. r could be a or b; a could be b. Uses 472*f9fbec18Smcpowers * optimized modular reduction for p192. */ 473*f9fbec18Smcpowers mp_err 474*f9fbec18Smcpowers ec_GFp_nistp192_mul(const mp_int *a, const mp_int *b, mp_int *r, 475*f9fbec18Smcpowers const GFMethod *meth) 476*f9fbec18Smcpowers { 477*f9fbec18Smcpowers mp_err res = MP_OKAY; 478*f9fbec18Smcpowers 479*f9fbec18Smcpowers MP_CHECKOK(mp_mul(a, b, r)); 480*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 481*f9fbec18Smcpowers CLEANUP: 482*f9fbec18Smcpowers return res; 483*f9fbec18Smcpowers } 484*f9fbec18Smcpowers 485*f9fbec18Smcpowers /* Divides two field elements. If a is NULL, then returns the inverse of 486*f9fbec18Smcpowers * b. */ 487*f9fbec18Smcpowers mp_err 488*f9fbec18Smcpowers ec_GFp_nistp192_div(const mp_int *a, const mp_int *b, mp_int *r, 489*f9fbec18Smcpowers const GFMethod *meth) 490*f9fbec18Smcpowers { 491*f9fbec18Smcpowers mp_err res = MP_OKAY; 492*f9fbec18Smcpowers mp_int t; 493*f9fbec18Smcpowers 494*f9fbec18Smcpowers /* If a is NULL, then return the inverse of b, otherwise return a/b. */ 495*f9fbec18Smcpowers if (a == NULL) { 496*f9fbec18Smcpowers return mp_invmod(b, &meth->irr, r); 497*f9fbec18Smcpowers } else { 498*f9fbec18Smcpowers /* MPI doesn't support divmod, so we implement it using invmod and 499*f9fbec18Smcpowers * mulmod. */ 500*f9fbec18Smcpowers MP_CHECKOK(mp_init(&t, FLAG(b))); 501*f9fbec18Smcpowers MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); 502*f9fbec18Smcpowers MP_CHECKOK(mp_mul(a, &t, r)); 503*f9fbec18Smcpowers MP_CHECKOK(ec_GFp_nistp192_mod(r, r, meth)); 504*f9fbec18Smcpowers CLEANUP: 505*f9fbec18Smcpowers mp_clear(&t); 506*f9fbec18Smcpowers return res; 507*f9fbec18Smcpowers } 508*f9fbec18Smcpowers } 509*f9fbec18Smcpowers 510*f9fbec18Smcpowers /* Wire in fast field arithmetic and precomputation of base point for 511*f9fbec18Smcpowers * named curves. */ 512*f9fbec18Smcpowers mp_err 513*f9fbec18Smcpowers ec_group_set_gfp192(ECGroup *group, ECCurveName name) 514*f9fbec18Smcpowers { 515*f9fbec18Smcpowers if (name == ECCurve_NIST_P192) { 516*f9fbec18Smcpowers group->meth->field_mod = &ec_GFp_nistp192_mod; 517*f9fbec18Smcpowers group->meth->field_mul = &ec_GFp_nistp192_mul; 518*f9fbec18Smcpowers group->meth->field_sqr = &ec_GFp_nistp192_sqr; 519*f9fbec18Smcpowers group->meth->field_div = &ec_GFp_nistp192_div; 520*f9fbec18Smcpowers #ifndef ECL_THIRTY_TWO_BIT 521*f9fbec18Smcpowers group->meth->field_add = &ec_GFp_nistp192_add; 522*f9fbec18Smcpowers group->meth->field_sub = &ec_GFp_nistp192_sub; 523*f9fbec18Smcpowers #endif 524*f9fbec18Smcpowers } 525*f9fbec18Smcpowers return MP_OKAY; 526*f9fbec18Smcpowers } 527