xref: /freebsd/crypto/openssh/smult_curve25519_ref.c (revision 6cec9cad762b6476313fb1f8e931a1647822db6b)
1*f7167e0eSDag-Erling Smørgrav /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2*f7167e0eSDag-Erling Smørgrav /*
3*f7167e0eSDag-Erling Smørgrav version 20081011
4*f7167e0eSDag-Erling Smørgrav Matthew Dempsky
5*f7167e0eSDag-Erling Smørgrav Public domain.
6*f7167e0eSDag-Erling Smørgrav Derived from public domain code by D. J. Bernstein.
7*f7167e0eSDag-Erling Smørgrav */
8*f7167e0eSDag-Erling Smørgrav 
9*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*f7167e0eSDag-Erling Smørgrav 
add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])11*f7167e0eSDag-Erling Smørgrav static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*f7167e0eSDag-Erling Smørgrav {
13*f7167e0eSDag-Erling Smørgrav   unsigned int j;
14*f7167e0eSDag-Erling Smørgrav   unsigned int u;
15*f7167e0eSDag-Erling Smørgrav   u = 0;
16*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*f7167e0eSDag-Erling Smørgrav   u += a[31] + b[31]; out[31] = u;
18*f7167e0eSDag-Erling Smørgrav }
19*f7167e0eSDag-Erling Smørgrav 
sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])20*f7167e0eSDag-Erling Smørgrav static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*f7167e0eSDag-Erling Smørgrav {
22*f7167e0eSDag-Erling Smørgrav   unsigned int j;
23*f7167e0eSDag-Erling Smørgrav   unsigned int u;
24*f7167e0eSDag-Erling Smørgrav   u = 218;
25*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) {
26*f7167e0eSDag-Erling Smørgrav     u += a[j] + 65280 - b[j];
27*f7167e0eSDag-Erling Smørgrav     out[j] = u & 255;
28*f7167e0eSDag-Erling Smørgrav     u >>= 8;
29*f7167e0eSDag-Erling Smørgrav   }
30*f7167e0eSDag-Erling Smørgrav   u += a[31] - b[31];
31*f7167e0eSDag-Erling Smørgrav   out[31] = u;
32*f7167e0eSDag-Erling Smørgrav }
33*f7167e0eSDag-Erling Smørgrav 
squeeze(unsigned int a[32])34*f7167e0eSDag-Erling Smørgrav static void squeeze(unsigned int a[32])
35*f7167e0eSDag-Erling Smørgrav {
36*f7167e0eSDag-Erling Smørgrav   unsigned int j;
37*f7167e0eSDag-Erling Smørgrav   unsigned int u;
38*f7167e0eSDag-Erling Smørgrav   u = 0;
39*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*f7167e0eSDag-Erling Smørgrav   u += a[31]; a[31] = u & 127;
41*f7167e0eSDag-Erling Smørgrav   u = 19 * (u >> 7);
42*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*f7167e0eSDag-Erling Smørgrav   u += a[31]; a[31] = u;
44*f7167e0eSDag-Erling Smørgrav }
45*f7167e0eSDag-Erling Smørgrav 
46*f7167e0eSDag-Erling Smørgrav static const unsigned int minusp[32] = {
47*f7167e0eSDag-Erling Smørgrav  19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*f7167e0eSDag-Erling Smørgrav } ;
49*f7167e0eSDag-Erling Smørgrav 
freeze(unsigned int a[32])50*f7167e0eSDag-Erling Smørgrav static void freeze(unsigned int a[32])
51*f7167e0eSDag-Erling Smørgrav {
52*f7167e0eSDag-Erling Smørgrav   unsigned int aorig[32];
53*f7167e0eSDag-Erling Smørgrav   unsigned int j;
54*f7167e0eSDag-Erling Smørgrav   unsigned int negative;
55*f7167e0eSDag-Erling Smørgrav 
56*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) aorig[j] = a[j];
57*f7167e0eSDag-Erling Smørgrav   add(a,a,minusp);
58*f7167e0eSDag-Erling Smørgrav   negative = -((a[31] >> 7) & 1);
59*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*f7167e0eSDag-Erling Smørgrav }
61*f7167e0eSDag-Erling Smørgrav 
mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])62*f7167e0eSDag-Erling Smørgrav static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*f7167e0eSDag-Erling Smørgrav {
64*f7167e0eSDag-Erling Smørgrav   unsigned int i;
65*f7167e0eSDag-Erling Smørgrav   unsigned int j;
66*f7167e0eSDag-Erling Smørgrav   unsigned int u;
67*f7167e0eSDag-Erling Smørgrav 
68*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) {
69*f7167e0eSDag-Erling Smørgrav     u = 0;
70*f7167e0eSDag-Erling Smørgrav     for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*f7167e0eSDag-Erling Smørgrav     for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*f7167e0eSDag-Erling Smørgrav     out[i] = u;
73*f7167e0eSDag-Erling Smørgrav   }
74*f7167e0eSDag-Erling Smørgrav   squeeze(out);
75*f7167e0eSDag-Erling Smørgrav }
76*f7167e0eSDag-Erling Smørgrav 
mult121665(unsigned int out[32],const unsigned int a[32])77*f7167e0eSDag-Erling Smørgrav static void mult121665(unsigned int out[32],const unsigned int a[32])
78*f7167e0eSDag-Erling Smørgrav {
79*f7167e0eSDag-Erling Smørgrav   unsigned int j;
80*f7167e0eSDag-Erling Smørgrav   unsigned int u;
81*f7167e0eSDag-Erling Smørgrav 
82*f7167e0eSDag-Erling Smørgrav   u = 0;
83*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*f7167e0eSDag-Erling Smørgrav   u += 121665 * a[31]; out[31] = u & 127;
85*f7167e0eSDag-Erling Smørgrav   u = 19 * (u >> 7);
86*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*f7167e0eSDag-Erling Smørgrav   u += out[j]; out[j] = u;
88*f7167e0eSDag-Erling Smørgrav }
89*f7167e0eSDag-Erling Smørgrav 
square(unsigned int out[32],const unsigned int a[32])90*f7167e0eSDag-Erling Smørgrav static void square(unsigned int out[32],const unsigned int a[32])
91*f7167e0eSDag-Erling Smørgrav {
92*f7167e0eSDag-Erling Smørgrav   unsigned int i;
93*f7167e0eSDag-Erling Smørgrav   unsigned int j;
94*f7167e0eSDag-Erling Smørgrav   unsigned int u;
95*f7167e0eSDag-Erling Smørgrav 
96*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) {
97*f7167e0eSDag-Erling Smørgrav     u = 0;
98*f7167e0eSDag-Erling Smørgrav     for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*f7167e0eSDag-Erling Smørgrav     for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*f7167e0eSDag-Erling Smørgrav     u *= 2;
101*f7167e0eSDag-Erling Smørgrav     if ((i & 1) == 0) {
102*f7167e0eSDag-Erling Smørgrav       u += a[i / 2] * a[i / 2];
103*f7167e0eSDag-Erling Smørgrav       u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*f7167e0eSDag-Erling Smørgrav     }
105*f7167e0eSDag-Erling Smørgrav     out[i] = u;
106*f7167e0eSDag-Erling Smørgrav   }
107*f7167e0eSDag-Erling Smørgrav   squeeze(out);
108*f7167e0eSDag-Erling Smørgrav }
109*f7167e0eSDag-Erling Smørgrav 
select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)110*f7167e0eSDag-Erling Smørgrav static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*f7167e0eSDag-Erling Smørgrav {
112*f7167e0eSDag-Erling Smørgrav   unsigned int j;
113*f7167e0eSDag-Erling Smørgrav   unsigned int t;
114*f7167e0eSDag-Erling Smørgrav   unsigned int bminus1;
115*f7167e0eSDag-Erling Smørgrav 
116*f7167e0eSDag-Erling Smørgrav   bminus1 = b - 1;
117*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 64;++j) {
118*f7167e0eSDag-Erling Smørgrav     t = bminus1 & (r[j] ^ s[j]);
119*f7167e0eSDag-Erling Smørgrav     p[j] = s[j] ^ t;
120*f7167e0eSDag-Erling Smørgrav     q[j] = r[j] ^ t;
121*f7167e0eSDag-Erling Smørgrav   }
122*f7167e0eSDag-Erling Smørgrav }
123*f7167e0eSDag-Erling Smørgrav 
mainloop(unsigned int work[64],const unsigned char e[32])124*f7167e0eSDag-Erling Smørgrav static void mainloop(unsigned int work[64],const unsigned char e[32])
125*f7167e0eSDag-Erling Smørgrav {
126*f7167e0eSDag-Erling Smørgrav   unsigned int xzm1[64];
127*f7167e0eSDag-Erling Smørgrav   unsigned int xzm[64];
128*f7167e0eSDag-Erling Smørgrav   unsigned int xzmb[64];
129*f7167e0eSDag-Erling Smørgrav   unsigned int xzm1b[64];
130*f7167e0eSDag-Erling Smørgrav   unsigned int xznb[64];
131*f7167e0eSDag-Erling Smørgrav   unsigned int xzn1b[64];
132*f7167e0eSDag-Erling Smørgrav   unsigned int a0[64];
133*f7167e0eSDag-Erling Smørgrav   unsigned int a1[64];
134*f7167e0eSDag-Erling Smørgrav   unsigned int b0[64];
135*f7167e0eSDag-Erling Smørgrav   unsigned int b1[64];
136*f7167e0eSDag-Erling Smørgrav   unsigned int c1[64];
137*f7167e0eSDag-Erling Smørgrav   unsigned int r[32];
138*f7167e0eSDag-Erling Smørgrav   unsigned int s[32];
139*f7167e0eSDag-Erling Smørgrav   unsigned int t[32];
140*f7167e0eSDag-Erling Smørgrav   unsigned int u[32];
141*f7167e0eSDag-Erling Smørgrav   unsigned int j;
142*f7167e0eSDag-Erling Smørgrav   unsigned int b;
143*f7167e0eSDag-Erling Smørgrav   int pos;
144*f7167e0eSDag-Erling Smørgrav 
145*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*f7167e0eSDag-Erling Smørgrav   xzm1[32] = 1;
147*f7167e0eSDag-Erling Smørgrav   for (j = 33;j < 64;++j) xzm1[j] = 0;
148*f7167e0eSDag-Erling Smørgrav 
149*f7167e0eSDag-Erling Smørgrav   xzm[0] = 1;
150*f7167e0eSDag-Erling Smørgrav   for (j = 1;j < 64;++j) xzm[j] = 0;
151*f7167e0eSDag-Erling Smørgrav 
152*f7167e0eSDag-Erling Smørgrav   for (pos = 254;pos >= 0;--pos) {
153*f7167e0eSDag-Erling Smørgrav     b = e[pos / 8] >> (pos & 7);
154*f7167e0eSDag-Erling Smørgrav     b &= 1;
155*f7167e0eSDag-Erling Smørgrav     select(xzmb,xzm1b,xzm,xzm1,b);
156*f7167e0eSDag-Erling Smørgrav     add(a0,xzmb,xzmb + 32);
157*f7167e0eSDag-Erling Smørgrav     sub(a0 + 32,xzmb,xzmb + 32);
158*f7167e0eSDag-Erling Smørgrav     add(a1,xzm1b,xzm1b + 32);
159*f7167e0eSDag-Erling Smørgrav     sub(a1 + 32,xzm1b,xzm1b + 32);
160*f7167e0eSDag-Erling Smørgrav     square(b0,a0);
161*f7167e0eSDag-Erling Smørgrav     square(b0 + 32,a0 + 32);
162*f7167e0eSDag-Erling Smørgrav     mult(b1,a1,a0 + 32);
163*f7167e0eSDag-Erling Smørgrav     mult(b1 + 32,a1 + 32,a0);
164*f7167e0eSDag-Erling Smørgrav     add(c1,b1,b1 + 32);
165*f7167e0eSDag-Erling Smørgrav     sub(c1 + 32,b1,b1 + 32);
166*f7167e0eSDag-Erling Smørgrav     square(r,c1 + 32);
167*f7167e0eSDag-Erling Smørgrav     sub(s,b0,b0 + 32);
168*f7167e0eSDag-Erling Smørgrav     mult121665(t,s);
169*f7167e0eSDag-Erling Smørgrav     add(u,t,b0);
170*f7167e0eSDag-Erling Smørgrav     mult(xznb,b0,b0 + 32);
171*f7167e0eSDag-Erling Smørgrav     mult(xznb + 32,s,u);
172*f7167e0eSDag-Erling Smørgrav     square(xzn1b,c1);
173*f7167e0eSDag-Erling Smørgrav     mult(xzn1b + 32,r,work);
174*f7167e0eSDag-Erling Smørgrav     select(xzm,xzm1,xznb,xzn1b,b);
175*f7167e0eSDag-Erling Smørgrav   }
176*f7167e0eSDag-Erling Smørgrav 
177*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 64;++j) work[j] = xzm[j];
178*f7167e0eSDag-Erling Smørgrav }
179*f7167e0eSDag-Erling Smørgrav 
recip(unsigned int out[32],const unsigned int z[32])180*f7167e0eSDag-Erling Smørgrav static void recip(unsigned int out[32],const unsigned int z[32])
181*f7167e0eSDag-Erling Smørgrav {
182*f7167e0eSDag-Erling Smørgrav   unsigned int z2[32];
183*f7167e0eSDag-Erling Smørgrav   unsigned int z9[32];
184*f7167e0eSDag-Erling Smørgrav   unsigned int z11[32];
185*f7167e0eSDag-Erling Smørgrav   unsigned int z2_5_0[32];
186*f7167e0eSDag-Erling Smørgrav   unsigned int z2_10_0[32];
187*f7167e0eSDag-Erling Smørgrav   unsigned int z2_20_0[32];
188*f7167e0eSDag-Erling Smørgrav   unsigned int z2_50_0[32];
189*f7167e0eSDag-Erling Smørgrav   unsigned int z2_100_0[32];
190*f7167e0eSDag-Erling Smørgrav   unsigned int t0[32];
191*f7167e0eSDag-Erling Smørgrav   unsigned int t1[32];
192*f7167e0eSDag-Erling Smørgrav   int i;
193*f7167e0eSDag-Erling Smørgrav 
194*f7167e0eSDag-Erling Smørgrav   /* 2 */ square(z2,z);
195*f7167e0eSDag-Erling Smørgrav   /* 4 */ square(t1,z2);
196*f7167e0eSDag-Erling Smørgrav   /* 8 */ square(t0,t1);
197*f7167e0eSDag-Erling Smørgrav   /* 9 */ mult(z9,t0,z);
198*f7167e0eSDag-Erling Smørgrav   /* 11 */ mult(z11,z9,z2);
199*f7167e0eSDag-Erling Smørgrav   /* 22 */ square(t0,z11);
200*f7167e0eSDag-Erling Smørgrav   /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*f7167e0eSDag-Erling Smørgrav 
202*f7167e0eSDag-Erling Smørgrav   /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*f7167e0eSDag-Erling Smørgrav   /* 2^7 - 2^2 */ square(t1,t0);
204*f7167e0eSDag-Erling Smørgrav   /* 2^8 - 2^3 */ square(t0,t1);
205*f7167e0eSDag-Erling Smørgrav   /* 2^9 - 2^4 */ square(t1,t0);
206*f7167e0eSDag-Erling Smørgrav   /* 2^10 - 2^5 */ square(t0,t1);
207*f7167e0eSDag-Erling Smørgrav   /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*f7167e0eSDag-Erling Smørgrav 
209*f7167e0eSDag-Erling Smørgrav   /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*f7167e0eSDag-Erling Smørgrav   /* 2^12 - 2^2 */ square(t1,t0);
211*f7167e0eSDag-Erling Smørgrav   /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*f7167e0eSDag-Erling Smørgrav   /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*f7167e0eSDag-Erling Smørgrav 
214*f7167e0eSDag-Erling Smørgrav   /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*f7167e0eSDag-Erling Smørgrav   /* 2^22 - 2^2 */ square(t1,t0);
216*f7167e0eSDag-Erling Smørgrav   /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*f7167e0eSDag-Erling Smørgrav   /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*f7167e0eSDag-Erling Smørgrav 
219*f7167e0eSDag-Erling Smørgrav   /* 2^41 - 2^1 */ square(t1,t0);
220*f7167e0eSDag-Erling Smørgrav   /* 2^42 - 2^2 */ square(t0,t1);
221*f7167e0eSDag-Erling Smørgrav   /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*f7167e0eSDag-Erling Smørgrav   /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*f7167e0eSDag-Erling Smørgrav 
224*f7167e0eSDag-Erling Smørgrav   /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*f7167e0eSDag-Erling Smørgrav   /* 2^52 - 2^2 */ square(t1,t0);
226*f7167e0eSDag-Erling Smørgrav   /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*f7167e0eSDag-Erling Smørgrav   /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*f7167e0eSDag-Erling Smørgrav 
229*f7167e0eSDag-Erling Smørgrav   /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*f7167e0eSDag-Erling Smørgrav   /* 2^102 - 2^2 */ square(t0,t1);
231*f7167e0eSDag-Erling Smørgrav   /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*f7167e0eSDag-Erling Smørgrav   /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*f7167e0eSDag-Erling Smørgrav 
234*f7167e0eSDag-Erling Smørgrav   /* 2^201 - 2^1 */ square(t0,t1);
235*f7167e0eSDag-Erling Smørgrav   /* 2^202 - 2^2 */ square(t1,t0);
236*f7167e0eSDag-Erling Smørgrav   /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*f7167e0eSDag-Erling Smørgrav   /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*f7167e0eSDag-Erling Smørgrav 
239*f7167e0eSDag-Erling Smørgrav   /* 2^251 - 2^1 */ square(t1,t0);
240*f7167e0eSDag-Erling Smørgrav   /* 2^252 - 2^2 */ square(t0,t1);
241*f7167e0eSDag-Erling Smørgrav   /* 2^253 - 2^3 */ square(t1,t0);
242*f7167e0eSDag-Erling Smørgrav   /* 2^254 - 2^4 */ square(t0,t1);
243*f7167e0eSDag-Erling Smørgrav   /* 2^255 - 2^5 */ square(t1,t0);
244*f7167e0eSDag-Erling Smørgrav   /* 2^255 - 21 */ mult(out,t1,z11);
245*f7167e0eSDag-Erling Smørgrav }
246*f7167e0eSDag-Erling Smørgrav 
crypto_scalarmult_curve25519(unsigned char * q,const unsigned char * n,const unsigned char * p)247*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *q,
248*f7167e0eSDag-Erling Smørgrav   const unsigned char *n,
249*f7167e0eSDag-Erling Smørgrav   const unsigned char *p)
250*f7167e0eSDag-Erling Smørgrav {
251*f7167e0eSDag-Erling Smørgrav   unsigned int work[96];
252*f7167e0eSDag-Erling Smørgrav   unsigned char e[32];
253*f7167e0eSDag-Erling Smørgrav   unsigned int i;
254*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) e[i] = n[i];
255*f7167e0eSDag-Erling Smørgrav   e[0] &= 248;
256*f7167e0eSDag-Erling Smørgrav   e[31] &= 127;
257*f7167e0eSDag-Erling Smørgrav   e[31] |= 64;
258*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) work[i] = p[i];
259*f7167e0eSDag-Erling Smørgrav   mainloop(work,e);
260*f7167e0eSDag-Erling Smørgrav   recip(work + 32,work + 32);
261*f7167e0eSDag-Erling Smørgrav   mult(work + 64,work,work + 32);
262*f7167e0eSDag-Erling Smørgrav   freeze(work + 64);
263*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*f7167e0eSDag-Erling Smørgrav   return 0;
265*f7167e0eSDag-Erling Smørgrav }
266