1*f7167e0eSDag-Erling Smørgrav /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2*f7167e0eSDag-Erling Smørgrav /*
3*f7167e0eSDag-Erling Smørgrav version 20081011
4*f7167e0eSDag-Erling Smørgrav Matthew Dempsky
5*f7167e0eSDag-Erling Smørgrav Public domain.
6*f7167e0eSDag-Erling Smørgrav Derived from public domain code by D. J. Bernstein.
7*f7167e0eSDag-Erling Smørgrav */
8*f7167e0eSDag-Erling Smørgrav
9*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*f7167e0eSDag-Erling Smørgrav
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*f7167e0eSDag-Erling Smørgrav static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*f7167e0eSDag-Erling Smørgrav {
13*f7167e0eSDag-Erling Smørgrav unsigned int j;
14*f7167e0eSDag-Erling Smørgrav unsigned int u;
15*f7167e0eSDag-Erling Smørgrav u = 0;
16*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*f7167e0eSDag-Erling Smørgrav u += a[31] + b[31]; out[31] = u;
18*f7167e0eSDag-Erling Smørgrav }
19*f7167e0eSDag-Erling Smørgrav
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*f7167e0eSDag-Erling Smørgrav static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*f7167e0eSDag-Erling Smørgrav {
22*f7167e0eSDag-Erling Smørgrav unsigned int j;
23*f7167e0eSDag-Erling Smørgrav unsigned int u;
24*f7167e0eSDag-Erling Smørgrav u = 218;
25*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) {
26*f7167e0eSDag-Erling Smørgrav u += a[j] + 65280 - b[j];
27*f7167e0eSDag-Erling Smørgrav out[j] = u & 255;
28*f7167e0eSDag-Erling Smørgrav u >>= 8;
29*f7167e0eSDag-Erling Smørgrav }
30*f7167e0eSDag-Erling Smørgrav u += a[31] - b[31];
31*f7167e0eSDag-Erling Smørgrav out[31] = u;
32*f7167e0eSDag-Erling Smørgrav }
33*f7167e0eSDag-Erling Smørgrav
squeeze(unsigned int a[32])34*f7167e0eSDag-Erling Smørgrav static void squeeze(unsigned int a[32])
35*f7167e0eSDag-Erling Smørgrav {
36*f7167e0eSDag-Erling Smørgrav unsigned int j;
37*f7167e0eSDag-Erling Smørgrav unsigned int u;
38*f7167e0eSDag-Erling Smørgrav u = 0;
39*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*f7167e0eSDag-Erling Smørgrav u += a[31]; a[31] = u & 127;
41*f7167e0eSDag-Erling Smørgrav u = 19 * (u >> 7);
42*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*f7167e0eSDag-Erling Smørgrav u += a[31]; a[31] = u;
44*f7167e0eSDag-Erling Smørgrav }
45*f7167e0eSDag-Erling Smørgrav
46*f7167e0eSDag-Erling Smørgrav static const unsigned int minusp[32] = {
47*f7167e0eSDag-Erling Smørgrav 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*f7167e0eSDag-Erling Smørgrav } ;
49*f7167e0eSDag-Erling Smørgrav
freeze(unsigned int a[32])50*f7167e0eSDag-Erling Smørgrav static void freeze(unsigned int a[32])
51*f7167e0eSDag-Erling Smørgrav {
52*f7167e0eSDag-Erling Smørgrav unsigned int aorig[32];
53*f7167e0eSDag-Erling Smørgrav unsigned int j;
54*f7167e0eSDag-Erling Smørgrav unsigned int negative;
55*f7167e0eSDag-Erling Smørgrav
56*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) aorig[j] = a[j];
57*f7167e0eSDag-Erling Smørgrav add(a,a,minusp);
58*f7167e0eSDag-Erling Smørgrav negative = -((a[31] >> 7) & 1);
59*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*f7167e0eSDag-Erling Smørgrav }
61*f7167e0eSDag-Erling Smørgrav
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*f7167e0eSDag-Erling Smørgrav static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*f7167e0eSDag-Erling Smørgrav {
64*f7167e0eSDag-Erling Smørgrav unsigned int i;
65*f7167e0eSDag-Erling Smørgrav unsigned int j;
66*f7167e0eSDag-Erling Smørgrav unsigned int u;
67*f7167e0eSDag-Erling Smørgrav
68*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) {
69*f7167e0eSDag-Erling Smørgrav u = 0;
70*f7167e0eSDag-Erling Smørgrav for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*f7167e0eSDag-Erling Smørgrav for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*f7167e0eSDag-Erling Smørgrav out[i] = u;
73*f7167e0eSDag-Erling Smørgrav }
74*f7167e0eSDag-Erling Smørgrav squeeze(out);
75*f7167e0eSDag-Erling Smørgrav }
76*f7167e0eSDag-Erling Smørgrav
mult121665(unsigned int out[32],const unsigned int a[32])77*f7167e0eSDag-Erling Smørgrav static void mult121665(unsigned int out[32],const unsigned int a[32])
78*f7167e0eSDag-Erling Smørgrav {
79*f7167e0eSDag-Erling Smørgrav unsigned int j;
80*f7167e0eSDag-Erling Smørgrav unsigned int u;
81*f7167e0eSDag-Erling Smørgrav
82*f7167e0eSDag-Erling Smørgrav u = 0;
83*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*f7167e0eSDag-Erling Smørgrav u += 121665 * a[31]; out[31] = u & 127;
85*f7167e0eSDag-Erling Smørgrav u = 19 * (u >> 7);
86*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*f7167e0eSDag-Erling Smørgrav u += out[j]; out[j] = u;
88*f7167e0eSDag-Erling Smørgrav }
89*f7167e0eSDag-Erling Smørgrav
square(unsigned int out[32],const unsigned int a[32])90*f7167e0eSDag-Erling Smørgrav static void square(unsigned int out[32],const unsigned int a[32])
91*f7167e0eSDag-Erling Smørgrav {
92*f7167e0eSDag-Erling Smørgrav unsigned int i;
93*f7167e0eSDag-Erling Smørgrav unsigned int j;
94*f7167e0eSDag-Erling Smørgrav unsigned int u;
95*f7167e0eSDag-Erling Smørgrav
96*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) {
97*f7167e0eSDag-Erling Smørgrav u = 0;
98*f7167e0eSDag-Erling Smørgrav for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*f7167e0eSDag-Erling Smørgrav for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*f7167e0eSDag-Erling Smørgrav u *= 2;
101*f7167e0eSDag-Erling Smørgrav if ((i & 1) == 0) {
102*f7167e0eSDag-Erling Smørgrav u += a[i / 2] * a[i / 2];
103*f7167e0eSDag-Erling Smørgrav u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*f7167e0eSDag-Erling Smørgrav }
105*f7167e0eSDag-Erling Smørgrav out[i] = u;
106*f7167e0eSDag-Erling Smørgrav }
107*f7167e0eSDag-Erling Smørgrav squeeze(out);
108*f7167e0eSDag-Erling Smørgrav }
109*f7167e0eSDag-Erling Smørgrav
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*f7167e0eSDag-Erling Smørgrav static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*f7167e0eSDag-Erling Smørgrav {
112*f7167e0eSDag-Erling Smørgrav unsigned int j;
113*f7167e0eSDag-Erling Smørgrav unsigned int t;
114*f7167e0eSDag-Erling Smørgrav unsigned int bminus1;
115*f7167e0eSDag-Erling Smørgrav
116*f7167e0eSDag-Erling Smørgrav bminus1 = b - 1;
117*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 64;++j) {
118*f7167e0eSDag-Erling Smørgrav t = bminus1 & (r[j] ^ s[j]);
119*f7167e0eSDag-Erling Smørgrav p[j] = s[j] ^ t;
120*f7167e0eSDag-Erling Smørgrav q[j] = r[j] ^ t;
121*f7167e0eSDag-Erling Smørgrav }
122*f7167e0eSDag-Erling Smørgrav }
123*f7167e0eSDag-Erling Smørgrav
mainloop(unsigned int work[64],const unsigned char e[32])124*f7167e0eSDag-Erling Smørgrav static void mainloop(unsigned int work[64],const unsigned char e[32])
125*f7167e0eSDag-Erling Smørgrav {
126*f7167e0eSDag-Erling Smørgrav unsigned int xzm1[64];
127*f7167e0eSDag-Erling Smørgrav unsigned int xzm[64];
128*f7167e0eSDag-Erling Smørgrav unsigned int xzmb[64];
129*f7167e0eSDag-Erling Smørgrav unsigned int xzm1b[64];
130*f7167e0eSDag-Erling Smørgrav unsigned int xznb[64];
131*f7167e0eSDag-Erling Smørgrav unsigned int xzn1b[64];
132*f7167e0eSDag-Erling Smørgrav unsigned int a0[64];
133*f7167e0eSDag-Erling Smørgrav unsigned int a1[64];
134*f7167e0eSDag-Erling Smørgrav unsigned int b0[64];
135*f7167e0eSDag-Erling Smørgrav unsigned int b1[64];
136*f7167e0eSDag-Erling Smørgrav unsigned int c1[64];
137*f7167e0eSDag-Erling Smørgrav unsigned int r[32];
138*f7167e0eSDag-Erling Smørgrav unsigned int s[32];
139*f7167e0eSDag-Erling Smørgrav unsigned int t[32];
140*f7167e0eSDag-Erling Smørgrav unsigned int u[32];
141*f7167e0eSDag-Erling Smørgrav unsigned int j;
142*f7167e0eSDag-Erling Smørgrav unsigned int b;
143*f7167e0eSDag-Erling Smørgrav int pos;
144*f7167e0eSDag-Erling Smørgrav
145*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*f7167e0eSDag-Erling Smørgrav xzm1[32] = 1;
147*f7167e0eSDag-Erling Smørgrav for (j = 33;j < 64;++j) xzm1[j] = 0;
148*f7167e0eSDag-Erling Smørgrav
149*f7167e0eSDag-Erling Smørgrav xzm[0] = 1;
150*f7167e0eSDag-Erling Smørgrav for (j = 1;j < 64;++j) xzm[j] = 0;
151*f7167e0eSDag-Erling Smørgrav
152*f7167e0eSDag-Erling Smørgrav for (pos = 254;pos >= 0;--pos) {
153*f7167e0eSDag-Erling Smørgrav b = e[pos / 8] >> (pos & 7);
154*f7167e0eSDag-Erling Smørgrav b &= 1;
155*f7167e0eSDag-Erling Smørgrav select(xzmb,xzm1b,xzm,xzm1,b);
156*f7167e0eSDag-Erling Smørgrav add(a0,xzmb,xzmb + 32);
157*f7167e0eSDag-Erling Smørgrav sub(a0 + 32,xzmb,xzmb + 32);
158*f7167e0eSDag-Erling Smørgrav add(a1,xzm1b,xzm1b + 32);
159*f7167e0eSDag-Erling Smørgrav sub(a1 + 32,xzm1b,xzm1b + 32);
160*f7167e0eSDag-Erling Smørgrav square(b0,a0);
161*f7167e0eSDag-Erling Smørgrav square(b0 + 32,a0 + 32);
162*f7167e0eSDag-Erling Smørgrav mult(b1,a1,a0 + 32);
163*f7167e0eSDag-Erling Smørgrav mult(b1 + 32,a1 + 32,a0);
164*f7167e0eSDag-Erling Smørgrav add(c1,b1,b1 + 32);
165*f7167e0eSDag-Erling Smørgrav sub(c1 + 32,b1,b1 + 32);
166*f7167e0eSDag-Erling Smørgrav square(r,c1 + 32);
167*f7167e0eSDag-Erling Smørgrav sub(s,b0,b0 + 32);
168*f7167e0eSDag-Erling Smørgrav mult121665(t,s);
169*f7167e0eSDag-Erling Smørgrav add(u,t,b0);
170*f7167e0eSDag-Erling Smørgrav mult(xznb,b0,b0 + 32);
171*f7167e0eSDag-Erling Smørgrav mult(xznb + 32,s,u);
172*f7167e0eSDag-Erling Smørgrav square(xzn1b,c1);
173*f7167e0eSDag-Erling Smørgrav mult(xzn1b + 32,r,work);
174*f7167e0eSDag-Erling Smørgrav select(xzm,xzm1,xznb,xzn1b,b);
175*f7167e0eSDag-Erling Smørgrav }
176*f7167e0eSDag-Erling Smørgrav
177*f7167e0eSDag-Erling Smørgrav for (j = 0;j < 64;++j) work[j] = xzm[j];
178*f7167e0eSDag-Erling Smørgrav }
179*f7167e0eSDag-Erling Smørgrav
recip(unsigned int out[32],const unsigned int z[32])180*f7167e0eSDag-Erling Smørgrav static void recip(unsigned int out[32],const unsigned int z[32])
181*f7167e0eSDag-Erling Smørgrav {
182*f7167e0eSDag-Erling Smørgrav unsigned int z2[32];
183*f7167e0eSDag-Erling Smørgrav unsigned int z9[32];
184*f7167e0eSDag-Erling Smørgrav unsigned int z11[32];
185*f7167e0eSDag-Erling Smørgrav unsigned int z2_5_0[32];
186*f7167e0eSDag-Erling Smørgrav unsigned int z2_10_0[32];
187*f7167e0eSDag-Erling Smørgrav unsigned int z2_20_0[32];
188*f7167e0eSDag-Erling Smørgrav unsigned int z2_50_0[32];
189*f7167e0eSDag-Erling Smørgrav unsigned int z2_100_0[32];
190*f7167e0eSDag-Erling Smørgrav unsigned int t0[32];
191*f7167e0eSDag-Erling Smørgrav unsigned int t1[32];
192*f7167e0eSDag-Erling Smørgrav int i;
193*f7167e0eSDag-Erling Smørgrav
194*f7167e0eSDag-Erling Smørgrav /* 2 */ square(z2,z);
195*f7167e0eSDag-Erling Smørgrav /* 4 */ square(t1,z2);
196*f7167e0eSDag-Erling Smørgrav /* 8 */ square(t0,t1);
197*f7167e0eSDag-Erling Smørgrav /* 9 */ mult(z9,t0,z);
198*f7167e0eSDag-Erling Smørgrav /* 11 */ mult(z11,z9,z2);
199*f7167e0eSDag-Erling Smørgrav /* 22 */ square(t0,z11);
200*f7167e0eSDag-Erling Smørgrav /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*f7167e0eSDag-Erling Smørgrav
202*f7167e0eSDag-Erling Smørgrav /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*f7167e0eSDag-Erling Smørgrav /* 2^7 - 2^2 */ square(t1,t0);
204*f7167e0eSDag-Erling Smørgrav /* 2^8 - 2^3 */ square(t0,t1);
205*f7167e0eSDag-Erling Smørgrav /* 2^9 - 2^4 */ square(t1,t0);
206*f7167e0eSDag-Erling Smørgrav /* 2^10 - 2^5 */ square(t0,t1);
207*f7167e0eSDag-Erling Smørgrav /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*f7167e0eSDag-Erling Smørgrav
209*f7167e0eSDag-Erling Smørgrav /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*f7167e0eSDag-Erling Smørgrav /* 2^12 - 2^2 */ square(t1,t0);
211*f7167e0eSDag-Erling Smørgrav /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*f7167e0eSDag-Erling Smørgrav /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*f7167e0eSDag-Erling Smørgrav
214*f7167e0eSDag-Erling Smørgrav /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*f7167e0eSDag-Erling Smørgrav /* 2^22 - 2^2 */ square(t1,t0);
216*f7167e0eSDag-Erling Smørgrav /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*f7167e0eSDag-Erling Smørgrav /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*f7167e0eSDag-Erling Smørgrav
219*f7167e0eSDag-Erling Smørgrav /* 2^41 - 2^1 */ square(t1,t0);
220*f7167e0eSDag-Erling Smørgrav /* 2^42 - 2^2 */ square(t0,t1);
221*f7167e0eSDag-Erling Smørgrav /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*f7167e0eSDag-Erling Smørgrav /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*f7167e0eSDag-Erling Smørgrav
224*f7167e0eSDag-Erling Smørgrav /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*f7167e0eSDag-Erling Smørgrav /* 2^52 - 2^2 */ square(t1,t0);
226*f7167e0eSDag-Erling Smørgrav /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*f7167e0eSDag-Erling Smørgrav /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*f7167e0eSDag-Erling Smørgrav
229*f7167e0eSDag-Erling Smørgrav /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*f7167e0eSDag-Erling Smørgrav /* 2^102 - 2^2 */ square(t0,t1);
231*f7167e0eSDag-Erling Smørgrav /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*f7167e0eSDag-Erling Smørgrav /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*f7167e0eSDag-Erling Smørgrav
234*f7167e0eSDag-Erling Smørgrav /* 2^201 - 2^1 */ square(t0,t1);
235*f7167e0eSDag-Erling Smørgrav /* 2^202 - 2^2 */ square(t1,t0);
236*f7167e0eSDag-Erling Smørgrav /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*f7167e0eSDag-Erling Smørgrav /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*f7167e0eSDag-Erling Smørgrav
239*f7167e0eSDag-Erling Smørgrav /* 2^251 - 2^1 */ square(t1,t0);
240*f7167e0eSDag-Erling Smørgrav /* 2^252 - 2^2 */ square(t0,t1);
241*f7167e0eSDag-Erling Smørgrav /* 2^253 - 2^3 */ square(t1,t0);
242*f7167e0eSDag-Erling Smørgrav /* 2^254 - 2^4 */ square(t0,t1);
243*f7167e0eSDag-Erling Smørgrav /* 2^255 - 2^5 */ square(t1,t0);
244*f7167e0eSDag-Erling Smørgrav /* 2^255 - 21 */ mult(out,t1,z11);
245*f7167e0eSDag-Erling Smørgrav }
246*f7167e0eSDag-Erling Smørgrav
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *q,
248*f7167e0eSDag-Erling Smørgrav const unsigned char *n,
249*f7167e0eSDag-Erling Smørgrav const unsigned char *p)
250*f7167e0eSDag-Erling Smørgrav {
251*f7167e0eSDag-Erling Smørgrav unsigned int work[96];
252*f7167e0eSDag-Erling Smørgrav unsigned char e[32];
253*f7167e0eSDag-Erling Smørgrav unsigned int i;
254*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) e[i] = n[i];
255*f7167e0eSDag-Erling Smørgrav e[0] &= 248;
256*f7167e0eSDag-Erling Smørgrav e[31] &= 127;
257*f7167e0eSDag-Erling Smørgrav e[31] |= 64;
258*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) work[i] = p[i];
259*f7167e0eSDag-Erling Smørgrav mainloop(work,e);
260*f7167e0eSDag-Erling Smørgrav recip(work + 32,work + 32);
261*f7167e0eSDag-Erling Smørgrav mult(work + 64,work,work + 32);
262*f7167e0eSDag-Erling Smørgrav freeze(work + 64);
263*f7167e0eSDag-Erling Smørgrav for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*f7167e0eSDag-Erling Smørgrav return 0;
265*f7167e0eSDag-Erling Smørgrav }
266