1*f7167e0eSDag-Erling Smørgrav /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */ 2*f7167e0eSDag-Erling Smørgrav /* 3*f7167e0eSDag-Erling Smørgrav version 20081011 4*f7167e0eSDag-Erling Smørgrav Matthew Dempsky 5*f7167e0eSDag-Erling Smørgrav Public domain. 6*f7167e0eSDag-Erling Smørgrav Derived from public domain code by D. J. Bernstein. 7*f7167e0eSDag-Erling Smørgrav */ 8*f7167e0eSDag-Erling Smørgrav 9*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *); 10*f7167e0eSDag-Erling Smørgrav 11*f7167e0eSDag-Erling Smørgrav static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 12*f7167e0eSDag-Erling Smørgrav { 13*f7167e0eSDag-Erling Smørgrav unsigned int j; 14*f7167e0eSDag-Erling Smørgrav unsigned int u; 15*f7167e0eSDag-Erling Smørgrav u = 0; 16*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; } 17*f7167e0eSDag-Erling Smørgrav u += a[31] + b[31]; out[31] = u; 18*f7167e0eSDag-Erling Smørgrav } 19*f7167e0eSDag-Erling Smørgrav 20*f7167e0eSDag-Erling Smørgrav static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 21*f7167e0eSDag-Erling Smørgrav { 22*f7167e0eSDag-Erling Smørgrav unsigned int j; 23*f7167e0eSDag-Erling Smørgrav unsigned int u; 24*f7167e0eSDag-Erling Smørgrav u = 218; 25*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { 26*f7167e0eSDag-Erling Smørgrav u += a[j] + 65280 - b[j]; 27*f7167e0eSDag-Erling Smørgrav out[j] = u & 255; 28*f7167e0eSDag-Erling Smørgrav u >>= 8; 29*f7167e0eSDag-Erling Smørgrav } 30*f7167e0eSDag-Erling Smørgrav u += a[31] - b[31]; 31*f7167e0eSDag-Erling Smørgrav out[31] = u; 32*f7167e0eSDag-Erling Smørgrav } 33*f7167e0eSDag-Erling Smørgrav 34*f7167e0eSDag-Erling Smørgrav static void squeeze(unsigned int a[32]) 35*f7167e0eSDag-Erling Smørgrav { 36*f7167e0eSDag-Erling Smørgrav unsigned int j; 37*f7167e0eSDag-Erling Smørgrav unsigned int u; 38*f7167e0eSDag-Erling Smørgrav u = 0; 39*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 40*f7167e0eSDag-Erling Smørgrav u += a[31]; a[31] = u & 127; 41*f7167e0eSDag-Erling Smørgrav u = 19 * (u >> 7); 42*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; } 43*f7167e0eSDag-Erling Smørgrav u += a[31]; a[31] = u; 44*f7167e0eSDag-Erling Smørgrav } 45*f7167e0eSDag-Erling Smørgrav 46*f7167e0eSDag-Erling Smørgrav static const unsigned int minusp[32] = { 47*f7167e0eSDag-Erling Smørgrav 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 48*f7167e0eSDag-Erling Smørgrav } ; 49*f7167e0eSDag-Erling Smørgrav 50*f7167e0eSDag-Erling Smørgrav static void freeze(unsigned int a[32]) 51*f7167e0eSDag-Erling Smørgrav { 52*f7167e0eSDag-Erling Smørgrav unsigned int aorig[32]; 53*f7167e0eSDag-Erling Smørgrav unsigned int j; 54*f7167e0eSDag-Erling Smørgrav unsigned int negative; 55*f7167e0eSDag-Erling Smørgrav 56*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) aorig[j] = a[j]; 57*f7167e0eSDag-Erling Smørgrav add(a,a,minusp); 58*f7167e0eSDag-Erling Smørgrav negative = -((a[31] >> 7) & 1); 59*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]); 60*f7167e0eSDag-Erling Smørgrav } 61*f7167e0eSDag-Erling Smørgrav 62*f7167e0eSDag-Erling Smørgrav static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32]) 63*f7167e0eSDag-Erling Smørgrav { 64*f7167e0eSDag-Erling Smørgrav unsigned int i; 65*f7167e0eSDag-Erling Smørgrav unsigned int j; 66*f7167e0eSDag-Erling Smørgrav unsigned int u; 67*f7167e0eSDag-Erling Smørgrav 68*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) { 69*f7167e0eSDag-Erling Smørgrav u = 0; 70*f7167e0eSDag-Erling Smørgrav for (j = 0;j <= i;++j) u += a[j] * b[i - j]; 71*f7167e0eSDag-Erling Smørgrav for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j]; 72*f7167e0eSDag-Erling Smørgrav out[i] = u; 73*f7167e0eSDag-Erling Smørgrav } 74*f7167e0eSDag-Erling Smørgrav squeeze(out); 75*f7167e0eSDag-Erling Smørgrav } 76*f7167e0eSDag-Erling Smørgrav 77*f7167e0eSDag-Erling Smørgrav static void mult121665(unsigned int out[32],const unsigned int a[32]) 78*f7167e0eSDag-Erling Smørgrav { 79*f7167e0eSDag-Erling Smørgrav unsigned int j; 80*f7167e0eSDag-Erling Smørgrav unsigned int u; 81*f7167e0eSDag-Erling Smørgrav 82*f7167e0eSDag-Erling Smørgrav u = 0; 83*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; } 84*f7167e0eSDag-Erling Smørgrav u += 121665 * a[31]; out[31] = u & 127; 85*f7167e0eSDag-Erling Smørgrav u = 19 * (u >> 7); 86*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; } 87*f7167e0eSDag-Erling Smørgrav u += out[j]; out[j] = u; 88*f7167e0eSDag-Erling Smørgrav } 89*f7167e0eSDag-Erling Smørgrav 90*f7167e0eSDag-Erling Smørgrav static void square(unsigned int out[32],const unsigned int a[32]) 91*f7167e0eSDag-Erling Smørgrav { 92*f7167e0eSDag-Erling Smørgrav unsigned int i; 93*f7167e0eSDag-Erling Smørgrav unsigned int j; 94*f7167e0eSDag-Erling Smørgrav unsigned int u; 95*f7167e0eSDag-Erling Smørgrav 96*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) { 97*f7167e0eSDag-Erling Smørgrav u = 0; 98*f7167e0eSDag-Erling Smørgrav for (j = 0;j < i - j;++j) u += a[j] * a[i - j]; 99*f7167e0eSDag-Erling Smørgrav for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j]; 100*f7167e0eSDag-Erling Smørgrav u *= 2; 101*f7167e0eSDag-Erling Smørgrav if ((i & 1) == 0) { 102*f7167e0eSDag-Erling Smørgrav u += a[i / 2] * a[i / 2]; 103*f7167e0eSDag-Erling Smørgrav u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; 104*f7167e0eSDag-Erling Smørgrav } 105*f7167e0eSDag-Erling Smørgrav out[i] = u; 106*f7167e0eSDag-Erling Smørgrav } 107*f7167e0eSDag-Erling Smørgrav squeeze(out); 108*f7167e0eSDag-Erling Smørgrav } 109*f7167e0eSDag-Erling Smørgrav 110*f7167e0eSDag-Erling Smørgrav static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b) 111*f7167e0eSDag-Erling Smørgrav { 112*f7167e0eSDag-Erling Smørgrav unsigned int j; 113*f7167e0eSDag-Erling Smørgrav unsigned int t; 114*f7167e0eSDag-Erling Smørgrav unsigned int bminus1; 115*f7167e0eSDag-Erling Smørgrav 116*f7167e0eSDag-Erling Smørgrav bminus1 = b - 1; 117*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 64;++j) { 118*f7167e0eSDag-Erling Smørgrav t = bminus1 & (r[j] ^ s[j]); 119*f7167e0eSDag-Erling Smørgrav p[j] = s[j] ^ t; 120*f7167e0eSDag-Erling Smørgrav q[j] = r[j] ^ t; 121*f7167e0eSDag-Erling Smørgrav } 122*f7167e0eSDag-Erling Smørgrav } 123*f7167e0eSDag-Erling Smørgrav 124*f7167e0eSDag-Erling Smørgrav static void mainloop(unsigned int work[64],const unsigned char e[32]) 125*f7167e0eSDag-Erling Smørgrav { 126*f7167e0eSDag-Erling Smørgrav unsigned int xzm1[64]; 127*f7167e0eSDag-Erling Smørgrav unsigned int xzm[64]; 128*f7167e0eSDag-Erling Smørgrav unsigned int xzmb[64]; 129*f7167e0eSDag-Erling Smørgrav unsigned int xzm1b[64]; 130*f7167e0eSDag-Erling Smørgrav unsigned int xznb[64]; 131*f7167e0eSDag-Erling Smørgrav unsigned int xzn1b[64]; 132*f7167e0eSDag-Erling Smørgrav unsigned int a0[64]; 133*f7167e0eSDag-Erling Smørgrav unsigned int a1[64]; 134*f7167e0eSDag-Erling Smørgrav unsigned int b0[64]; 135*f7167e0eSDag-Erling Smørgrav unsigned int b1[64]; 136*f7167e0eSDag-Erling Smørgrav unsigned int c1[64]; 137*f7167e0eSDag-Erling Smørgrav unsigned int r[32]; 138*f7167e0eSDag-Erling Smørgrav unsigned int s[32]; 139*f7167e0eSDag-Erling Smørgrav unsigned int t[32]; 140*f7167e0eSDag-Erling Smørgrav unsigned int u[32]; 141*f7167e0eSDag-Erling Smørgrav unsigned int j; 142*f7167e0eSDag-Erling Smørgrav unsigned int b; 143*f7167e0eSDag-Erling Smørgrav int pos; 144*f7167e0eSDag-Erling Smørgrav 145*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) xzm1[j] = work[j]; 146*f7167e0eSDag-Erling Smørgrav xzm1[32] = 1; 147*f7167e0eSDag-Erling Smørgrav for (j = 33;j < 64;++j) xzm1[j] = 0; 148*f7167e0eSDag-Erling Smørgrav 149*f7167e0eSDag-Erling Smørgrav xzm[0] = 1; 150*f7167e0eSDag-Erling Smørgrav for (j = 1;j < 64;++j) xzm[j] = 0; 151*f7167e0eSDag-Erling Smørgrav 152*f7167e0eSDag-Erling Smørgrav for (pos = 254;pos >= 0;--pos) { 153*f7167e0eSDag-Erling Smørgrav b = e[pos / 8] >> (pos & 7); 154*f7167e0eSDag-Erling Smørgrav b &= 1; 155*f7167e0eSDag-Erling Smørgrav select(xzmb,xzm1b,xzm,xzm1,b); 156*f7167e0eSDag-Erling Smørgrav add(a0,xzmb,xzmb + 32); 157*f7167e0eSDag-Erling Smørgrav sub(a0 + 32,xzmb,xzmb + 32); 158*f7167e0eSDag-Erling Smørgrav add(a1,xzm1b,xzm1b + 32); 159*f7167e0eSDag-Erling Smørgrav sub(a1 + 32,xzm1b,xzm1b + 32); 160*f7167e0eSDag-Erling Smørgrav square(b0,a0); 161*f7167e0eSDag-Erling Smørgrav square(b0 + 32,a0 + 32); 162*f7167e0eSDag-Erling Smørgrav mult(b1,a1,a0 + 32); 163*f7167e0eSDag-Erling Smørgrav mult(b1 + 32,a1 + 32,a0); 164*f7167e0eSDag-Erling Smørgrav add(c1,b1,b1 + 32); 165*f7167e0eSDag-Erling Smørgrav sub(c1 + 32,b1,b1 + 32); 166*f7167e0eSDag-Erling Smørgrav square(r,c1 + 32); 167*f7167e0eSDag-Erling Smørgrav sub(s,b0,b0 + 32); 168*f7167e0eSDag-Erling Smørgrav mult121665(t,s); 169*f7167e0eSDag-Erling Smørgrav add(u,t,b0); 170*f7167e0eSDag-Erling Smørgrav mult(xznb,b0,b0 + 32); 171*f7167e0eSDag-Erling Smørgrav mult(xznb + 32,s,u); 172*f7167e0eSDag-Erling Smørgrav square(xzn1b,c1); 173*f7167e0eSDag-Erling Smørgrav mult(xzn1b + 32,r,work); 174*f7167e0eSDag-Erling Smørgrav select(xzm,xzm1,xznb,xzn1b,b); 175*f7167e0eSDag-Erling Smørgrav } 176*f7167e0eSDag-Erling Smørgrav 177*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 64;++j) work[j] = xzm[j]; 178*f7167e0eSDag-Erling Smørgrav } 179*f7167e0eSDag-Erling Smørgrav 180*f7167e0eSDag-Erling Smørgrav static void recip(unsigned int out[32],const unsigned int z[32]) 181*f7167e0eSDag-Erling Smørgrav { 182*f7167e0eSDag-Erling Smørgrav unsigned int z2[32]; 183*f7167e0eSDag-Erling Smørgrav unsigned int z9[32]; 184*f7167e0eSDag-Erling Smørgrav unsigned int z11[32]; 185*f7167e0eSDag-Erling Smørgrav unsigned int z2_5_0[32]; 186*f7167e0eSDag-Erling Smørgrav unsigned int z2_10_0[32]; 187*f7167e0eSDag-Erling Smørgrav unsigned int z2_20_0[32]; 188*f7167e0eSDag-Erling Smørgrav unsigned int z2_50_0[32]; 189*f7167e0eSDag-Erling Smørgrav unsigned int z2_100_0[32]; 190*f7167e0eSDag-Erling Smørgrav unsigned int t0[32]; 191*f7167e0eSDag-Erling Smørgrav unsigned int t1[32]; 192*f7167e0eSDag-Erling Smørgrav int i; 193*f7167e0eSDag-Erling Smørgrav 194*f7167e0eSDag-Erling Smørgrav /* 2 */ square(z2,z); 195*f7167e0eSDag-Erling Smørgrav /* 4 */ square(t1,z2); 196*f7167e0eSDag-Erling Smørgrav /* 8 */ square(t0,t1); 197*f7167e0eSDag-Erling Smørgrav /* 9 */ mult(z9,t0,z); 198*f7167e0eSDag-Erling Smørgrav /* 11 */ mult(z11,z9,z2); 199*f7167e0eSDag-Erling Smørgrav /* 22 */ square(t0,z11); 200*f7167e0eSDag-Erling Smørgrav /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9); 201*f7167e0eSDag-Erling Smørgrav 202*f7167e0eSDag-Erling Smørgrav /* 2^6 - 2^1 */ square(t0,z2_5_0); 203*f7167e0eSDag-Erling Smørgrav /* 2^7 - 2^2 */ square(t1,t0); 204*f7167e0eSDag-Erling Smørgrav /* 2^8 - 2^3 */ square(t0,t1); 205*f7167e0eSDag-Erling Smørgrav /* 2^9 - 2^4 */ square(t1,t0); 206*f7167e0eSDag-Erling Smørgrav /* 2^10 - 2^5 */ square(t0,t1); 207*f7167e0eSDag-Erling Smørgrav /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0); 208*f7167e0eSDag-Erling Smørgrav 209*f7167e0eSDag-Erling Smørgrav /* 2^11 - 2^1 */ square(t0,z2_10_0); 210*f7167e0eSDag-Erling Smørgrav /* 2^12 - 2^2 */ square(t1,t0); 211*f7167e0eSDag-Erling Smørgrav /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); } 212*f7167e0eSDag-Erling Smørgrav /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0); 213*f7167e0eSDag-Erling Smørgrav 214*f7167e0eSDag-Erling Smørgrav /* 2^21 - 2^1 */ square(t0,z2_20_0); 215*f7167e0eSDag-Erling Smørgrav /* 2^22 - 2^2 */ square(t1,t0); 216*f7167e0eSDag-Erling Smørgrav /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); } 217*f7167e0eSDag-Erling Smørgrav /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0); 218*f7167e0eSDag-Erling Smørgrav 219*f7167e0eSDag-Erling Smørgrav /* 2^41 - 2^1 */ square(t1,t0); 220*f7167e0eSDag-Erling Smørgrav /* 2^42 - 2^2 */ square(t0,t1); 221*f7167e0eSDag-Erling Smørgrav /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); } 222*f7167e0eSDag-Erling Smørgrav /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0); 223*f7167e0eSDag-Erling Smørgrav 224*f7167e0eSDag-Erling Smørgrav /* 2^51 - 2^1 */ square(t0,z2_50_0); 225*f7167e0eSDag-Erling Smørgrav /* 2^52 - 2^2 */ square(t1,t0); 226*f7167e0eSDag-Erling Smørgrav /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 227*f7167e0eSDag-Erling Smørgrav /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0); 228*f7167e0eSDag-Erling Smørgrav 229*f7167e0eSDag-Erling Smørgrav /* 2^101 - 2^1 */ square(t1,z2_100_0); 230*f7167e0eSDag-Erling Smørgrav /* 2^102 - 2^2 */ square(t0,t1); 231*f7167e0eSDag-Erling Smørgrav /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); } 232*f7167e0eSDag-Erling Smørgrav /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0); 233*f7167e0eSDag-Erling Smørgrav 234*f7167e0eSDag-Erling Smørgrav /* 2^201 - 2^1 */ square(t0,t1); 235*f7167e0eSDag-Erling Smørgrav /* 2^202 - 2^2 */ square(t1,t0); 236*f7167e0eSDag-Erling Smørgrav /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); } 237*f7167e0eSDag-Erling Smørgrav /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0); 238*f7167e0eSDag-Erling Smørgrav 239*f7167e0eSDag-Erling Smørgrav /* 2^251 - 2^1 */ square(t1,t0); 240*f7167e0eSDag-Erling Smørgrav /* 2^252 - 2^2 */ square(t0,t1); 241*f7167e0eSDag-Erling Smørgrav /* 2^253 - 2^3 */ square(t1,t0); 242*f7167e0eSDag-Erling Smørgrav /* 2^254 - 2^4 */ square(t0,t1); 243*f7167e0eSDag-Erling Smørgrav /* 2^255 - 2^5 */ square(t1,t0); 244*f7167e0eSDag-Erling Smørgrav /* 2^255 - 21 */ mult(out,t1,z11); 245*f7167e0eSDag-Erling Smørgrav } 246*f7167e0eSDag-Erling Smørgrav 247*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *q, 248*f7167e0eSDag-Erling Smørgrav const unsigned char *n, 249*f7167e0eSDag-Erling Smørgrav const unsigned char *p) 250*f7167e0eSDag-Erling Smørgrav { 251*f7167e0eSDag-Erling Smørgrav unsigned int work[96]; 252*f7167e0eSDag-Erling Smørgrav unsigned char e[32]; 253*f7167e0eSDag-Erling Smørgrav unsigned int i; 254*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) e[i] = n[i]; 255*f7167e0eSDag-Erling Smørgrav e[0] &= 248; 256*f7167e0eSDag-Erling Smørgrav e[31] &= 127; 257*f7167e0eSDag-Erling Smørgrav e[31] |= 64; 258*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) work[i] = p[i]; 259*f7167e0eSDag-Erling Smørgrav mainloop(work,e); 260*f7167e0eSDag-Erling Smørgrav recip(work + 32,work + 32); 261*f7167e0eSDag-Erling Smørgrav mult(work + 64,work,work + 32); 262*f7167e0eSDag-Erling Smørgrav freeze(work + 64); 263*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) q[i] = work[64 + i]; 264*f7167e0eSDag-Erling Smørgrav return 0; 265*f7167e0eSDag-Erling Smørgrav } 266