xref: /freebsd/crypto/openssh/smult_curve25519_ref.c (revision f7167e0ea0bf5aaabff9490453b3b71b3f1b4d51)
1*f7167e0eSDag-Erling Smørgrav /* $OpenBSD: smult_curve25519_ref.c,v 1.2 2013/11/02 22:02:14 markus Exp $ */
2*f7167e0eSDag-Erling Smørgrav /*
3*f7167e0eSDag-Erling Smørgrav version 20081011
4*f7167e0eSDag-Erling Smørgrav Matthew Dempsky
5*f7167e0eSDag-Erling Smørgrav Public domain.
6*f7167e0eSDag-Erling Smørgrav Derived from public domain code by D. J. Bernstein.
7*f7167e0eSDag-Erling Smørgrav */
8*f7167e0eSDag-Erling Smørgrav 
9*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *, const unsigned char *, const unsigned char *);
10*f7167e0eSDag-Erling Smørgrav 
11*f7167e0eSDag-Erling Smørgrav static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
12*f7167e0eSDag-Erling Smørgrav {
13*f7167e0eSDag-Erling Smørgrav   unsigned int j;
14*f7167e0eSDag-Erling Smørgrav   unsigned int u;
15*f7167e0eSDag-Erling Smørgrav   u = 0;
16*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
17*f7167e0eSDag-Erling Smørgrav   u += a[31] + b[31]; out[31] = u;
18*f7167e0eSDag-Erling Smørgrav }
19*f7167e0eSDag-Erling Smørgrav 
20*f7167e0eSDag-Erling Smørgrav static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
21*f7167e0eSDag-Erling Smørgrav {
22*f7167e0eSDag-Erling Smørgrav   unsigned int j;
23*f7167e0eSDag-Erling Smørgrav   unsigned int u;
24*f7167e0eSDag-Erling Smørgrav   u = 218;
25*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) {
26*f7167e0eSDag-Erling Smørgrav     u += a[j] + 65280 - b[j];
27*f7167e0eSDag-Erling Smørgrav     out[j] = u & 255;
28*f7167e0eSDag-Erling Smørgrav     u >>= 8;
29*f7167e0eSDag-Erling Smørgrav   }
30*f7167e0eSDag-Erling Smørgrav   u += a[31] - b[31];
31*f7167e0eSDag-Erling Smørgrav   out[31] = u;
32*f7167e0eSDag-Erling Smørgrav }
33*f7167e0eSDag-Erling Smørgrav 
34*f7167e0eSDag-Erling Smørgrav static void squeeze(unsigned int a[32])
35*f7167e0eSDag-Erling Smørgrav {
36*f7167e0eSDag-Erling Smørgrav   unsigned int j;
37*f7167e0eSDag-Erling Smørgrav   unsigned int u;
38*f7167e0eSDag-Erling Smørgrav   u = 0;
39*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
40*f7167e0eSDag-Erling Smørgrav   u += a[31]; a[31] = u & 127;
41*f7167e0eSDag-Erling Smørgrav   u = 19 * (u >> 7);
42*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
43*f7167e0eSDag-Erling Smørgrav   u += a[31]; a[31] = u;
44*f7167e0eSDag-Erling Smørgrav }
45*f7167e0eSDag-Erling Smørgrav 
46*f7167e0eSDag-Erling Smørgrav static const unsigned int minusp[32] = {
47*f7167e0eSDag-Erling Smørgrav  19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
48*f7167e0eSDag-Erling Smørgrav } ;
49*f7167e0eSDag-Erling Smørgrav 
50*f7167e0eSDag-Erling Smørgrav static void freeze(unsigned int a[32])
51*f7167e0eSDag-Erling Smørgrav {
52*f7167e0eSDag-Erling Smørgrav   unsigned int aorig[32];
53*f7167e0eSDag-Erling Smørgrav   unsigned int j;
54*f7167e0eSDag-Erling Smørgrav   unsigned int negative;
55*f7167e0eSDag-Erling Smørgrav 
56*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) aorig[j] = a[j];
57*f7167e0eSDag-Erling Smørgrav   add(a,a,minusp);
58*f7167e0eSDag-Erling Smørgrav   negative = -((a[31] >> 7) & 1);
59*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
60*f7167e0eSDag-Erling Smørgrav }
61*f7167e0eSDag-Erling Smørgrav 
62*f7167e0eSDag-Erling Smørgrav static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
63*f7167e0eSDag-Erling Smørgrav {
64*f7167e0eSDag-Erling Smørgrav   unsigned int i;
65*f7167e0eSDag-Erling Smørgrav   unsigned int j;
66*f7167e0eSDag-Erling Smørgrav   unsigned int u;
67*f7167e0eSDag-Erling Smørgrav 
68*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) {
69*f7167e0eSDag-Erling Smørgrav     u = 0;
70*f7167e0eSDag-Erling Smørgrav     for (j = 0;j <= i;++j) u += a[j] * b[i - j];
71*f7167e0eSDag-Erling Smørgrav     for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
72*f7167e0eSDag-Erling Smørgrav     out[i] = u;
73*f7167e0eSDag-Erling Smørgrav   }
74*f7167e0eSDag-Erling Smørgrav   squeeze(out);
75*f7167e0eSDag-Erling Smørgrav }
76*f7167e0eSDag-Erling Smørgrav 
77*f7167e0eSDag-Erling Smørgrav static void mult121665(unsigned int out[32],const unsigned int a[32])
78*f7167e0eSDag-Erling Smørgrav {
79*f7167e0eSDag-Erling Smørgrav   unsigned int j;
80*f7167e0eSDag-Erling Smørgrav   unsigned int u;
81*f7167e0eSDag-Erling Smørgrav 
82*f7167e0eSDag-Erling Smørgrav   u = 0;
83*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
84*f7167e0eSDag-Erling Smørgrav   u += 121665 * a[31]; out[31] = u & 127;
85*f7167e0eSDag-Erling Smørgrav   u = 19 * (u >> 7);
86*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
87*f7167e0eSDag-Erling Smørgrav   u += out[j]; out[j] = u;
88*f7167e0eSDag-Erling Smørgrav }
89*f7167e0eSDag-Erling Smørgrav 
90*f7167e0eSDag-Erling Smørgrav static void square(unsigned int out[32],const unsigned int a[32])
91*f7167e0eSDag-Erling Smørgrav {
92*f7167e0eSDag-Erling Smørgrav   unsigned int i;
93*f7167e0eSDag-Erling Smørgrav   unsigned int j;
94*f7167e0eSDag-Erling Smørgrav   unsigned int u;
95*f7167e0eSDag-Erling Smørgrav 
96*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) {
97*f7167e0eSDag-Erling Smørgrav     u = 0;
98*f7167e0eSDag-Erling Smørgrav     for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
99*f7167e0eSDag-Erling Smørgrav     for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
100*f7167e0eSDag-Erling Smørgrav     u *= 2;
101*f7167e0eSDag-Erling Smørgrav     if ((i & 1) == 0) {
102*f7167e0eSDag-Erling Smørgrav       u += a[i / 2] * a[i / 2];
103*f7167e0eSDag-Erling Smørgrav       u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
104*f7167e0eSDag-Erling Smørgrav     }
105*f7167e0eSDag-Erling Smørgrav     out[i] = u;
106*f7167e0eSDag-Erling Smørgrav   }
107*f7167e0eSDag-Erling Smørgrav   squeeze(out);
108*f7167e0eSDag-Erling Smørgrav }
109*f7167e0eSDag-Erling Smørgrav 
110*f7167e0eSDag-Erling Smørgrav static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
111*f7167e0eSDag-Erling Smørgrav {
112*f7167e0eSDag-Erling Smørgrav   unsigned int j;
113*f7167e0eSDag-Erling Smørgrav   unsigned int t;
114*f7167e0eSDag-Erling Smørgrav   unsigned int bminus1;
115*f7167e0eSDag-Erling Smørgrav 
116*f7167e0eSDag-Erling Smørgrav   bminus1 = b - 1;
117*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 64;++j) {
118*f7167e0eSDag-Erling Smørgrav     t = bminus1 & (r[j] ^ s[j]);
119*f7167e0eSDag-Erling Smørgrav     p[j] = s[j] ^ t;
120*f7167e0eSDag-Erling Smørgrav     q[j] = r[j] ^ t;
121*f7167e0eSDag-Erling Smørgrav   }
122*f7167e0eSDag-Erling Smørgrav }
123*f7167e0eSDag-Erling Smørgrav 
124*f7167e0eSDag-Erling Smørgrav static void mainloop(unsigned int work[64],const unsigned char e[32])
125*f7167e0eSDag-Erling Smørgrav {
126*f7167e0eSDag-Erling Smørgrav   unsigned int xzm1[64];
127*f7167e0eSDag-Erling Smørgrav   unsigned int xzm[64];
128*f7167e0eSDag-Erling Smørgrav   unsigned int xzmb[64];
129*f7167e0eSDag-Erling Smørgrav   unsigned int xzm1b[64];
130*f7167e0eSDag-Erling Smørgrav   unsigned int xznb[64];
131*f7167e0eSDag-Erling Smørgrav   unsigned int xzn1b[64];
132*f7167e0eSDag-Erling Smørgrav   unsigned int a0[64];
133*f7167e0eSDag-Erling Smørgrav   unsigned int a1[64];
134*f7167e0eSDag-Erling Smørgrav   unsigned int b0[64];
135*f7167e0eSDag-Erling Smørgrav   unsigned int b1[64];
136*f7167e0eSDag-Erling Smørgrav   unsigned int c1[64];
137*f7167e0eSDag-Erling Smørgrav   unsigned int r[32];
138*f7167e0eSDag-Erling Smørgrav   unsigned int s[32];
139*f7167e0eSDag-Erling Smørgrav   unsigned int t[32];
140*f7167e0eSDag-Erling Smørgrav   unsigned int u[32];
141*f7167e0eSDag-Erling Smørgrav   unsigned int j;
142*f7167e0eSDag-Erling Smørgrav   unsigned int b;
143*f7167e0eSDag-Erling Smørgrav   int pos;
144*f7167e0eSDag-Erling Smørgrav 
145*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 32;++j) xzm1[j] = work[j];
146*f7167e0eSDag-Erling Smørgrav   xzm1[32] = 1;
147*f7167e0eSDag-Erling Smørgrav   for (j = 33;j < 64;++j) xzm1[j] = 0;
148*f7167e0eSDag-Erling Smørgrav 
149*f7167e0eSDag-Erling Smørgrav   xzm[0] = 1;
150*f7167e0eSDag-Erling Smørgrav   for (j = 1;j < 64;++j) xzm[j] = 0;
151*f7167e0eSDag-Erling Smørgrav 
152*f7167e0eSDag-Erling Smørgrav   for (pos = 254;pos >= 0;--pos) {
153*f7167e0eSDag-Erling Smørgrav     b = e[pos / 8] >> (pos & 7);
154*f7167e0eSDag-Erling Smørgrav     b &= 1;
155*f7167e0eSDag-Erling Smørgrav     select(xzmb,xzm1b,xzm,xzm1,b);
156*f7167e0eSDag-Erling Smørgrav     add(a0,xzmb,xzmb + 32);
157*f7167e0eSDag-Erling Smørgrav     sub(a0 + 32,xzmb,xzmb + 32);
158*f7167e0eSDag-Erling Smørgrav     add(a1,xzm1b,xzm1b + 32);
159*f7167e0eSDag-Erling Smørgrav     sub(a1 + 32,xzm1b,xzm1b + 32);
160*f7167e0eSDag-Erling Smørgrav     square(b0,a0);
161*f7167e0eSDag-Erling Smørgrav     square(b0 + 32,a0 + 32);
162*f7167e0eSDag-Erling Smørgrav     mult(b1,a1,a0 + 32);
163*f7167e0eSDag-Erling Smørgrav     mult(b1 + 32,a1 + 32,a0);
164*f7167e0eSDag-Erling Smørgrav     add(c1,b1,b1 + 32);
165*f7167e0eSDag-Erling Smørgrav     sub(c1 + 32,b1,b1 + 32);
166*f7167e0eSDag-Erling Smørgrav     square(r,c1 + 32);
167*f7167e0eSDag-Erling Smørgrav     sub(s,b0,b0 + 32);
168*f7167e0eSDag-Erling Smørgrav     mult121665(t,s);
169*f7167e0eSDag-Erling Smørgrav     add(u,t,b0);
170*f7167e0eSDag-Erling Smørgrav     mult(xznb,b0,b0 + 32);
171*f7167e0eSDag-Erling Smørgrav     mult(xznb + 32,s,u);
172*f7167e0eSDag-Erling Smørgrav     square(xzn1b,c1);
173*f7167e0eSDag-Erling Smørgrav     mult(xzn1b + 32,r,work);
174*f7167e0eSDag-Erling Smørgrav     select(xzm,xzm1,xznb,xzn1b,b);
175*f7167e0eSDag-Erling Smørgrav   }
176*f7167e0eSDag-Erling Smørgrav 
177*f7167e0eSDag-Erling Smørgrav   for (j = 0;j < 64;++j) work[j] = xzm[j];
178*f7167e0eSDag-Erling Smørgrav }
179*f7167e0eSDag-Erling Smørgrav 
180*f7167e0eSDag-Erling Smørgrav static void recip(unsigned int out[32],const unsigned int z[32])
181*f7167e0eSDag-Erling Smørgrav {
182*f7167e0eSDag-Erling Smørgrav   unsigned int z2[32];
183*f7167e0eSDag-Erling Smørgrav   unsigned int z9[32];
184*f7167e0eSDag-Erling Smørgrav   unsigned int z11[32];
185*f7167e0eSDag-Erling Smørgrav   unsigned int z2_5_0[32];
186*f7167e0eSDag-Erling Smørgrav   unsigned int z2_10_0[32];
187*f7167e0eSDag-Erling Smørgrav   unsigned int z2_20_0[32];
188*f7167e0eSDag-Erling Smørgrav   unsigned int z2_50_0[32];
189*f7167e0eSDag-Erling Smørgrav   unsigned int z2_100_0[32];
190*f7167e0eSDag-Erling Smørgrav   unsigned int t0[32];
191*f7167e0eSDag-Erling Smørgrav   unsigned int t1[32];
192*f7167e0eSDag-Erling Smørgrav   int i;
193*f7167e0eSDag-Erling Smørgrav 
194*f7167e0eSDag-Erling Smørgrav   /* 2 */ square(z2,z);
195*f7167e0eSDag-Erling Smørgrav   /* 4 */ square(t1,z2);
196*f7167e0eSDag-Erling Smørgrav   /* 8 */ square(t0,t1);
197*f7167e0eSDag-Erling Smørgrav   /* 9 */ mult(z9,t0,z);
198*f7167e0eSDag-Erling Smørgrav   /* 11 */ mult(z11,z9,z2);
199*f7167e0eSDag-Erling Smørgrav   /* 22 */ square(t0,z11);
200*f7167e0eSDag-Erling Smørgrav   /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
201*f7167e0eSDag-Erling Smørgrav 
202*f7167e0eSDag-Erling Smørgrav   /* 2^6 - 2^1 */ square(t0,z2_5_0);
203*f7167e0eSDag-Erling Smørgrav   /* 2^7 - 2^2 */ square(t1,t0);
204*f7167e0eSDag-Erling Smørgrav   /* 2^8 - 2^3 */ square(t0,t1);
205*f7167e0eSDag-Erling Smørgrav   /* 2^9 - 2^4 */ square(t1,t0);
206*f7167e0eSDag-Erling Smørgrav   /* 2^10 - 2^5 */ square(t0,t1);
207*f7167e0eSDag-Erling Smørgrav   /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
208*f7167e0eSDag-Erling Smørgrav 
209*f7167e0eSDag-Erling Smørgrav   /* 2^11 - 2^1 */ square(t0,z2_10_0);
210*f7167e0eSDag-Erling Smørgrav   /* 2^12 - 2^2 */ square(t1,t0);
211*f7167e0eSDag-Erling Smørgrav   /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
212*f7167e0eSDag-Erling Smørgrav   /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
213*f7167e0eSDag-Erling Smørgrav 
214*f7167e0eSDag-Erling Smørgrav   /* 2^21 - 2^1 */ square(t0,z2_20_0);
215*f7167e0eSDag-Erling Smørgrav   /* 2^22 - 2^2 */ square(t1,t0);
216*f7167e0eSDag-Erling Smørgrav   /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
217*f7167e0eSDag-Erling Smørgrav   /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
218*f7167e0eSDag-Erling Smørgrav 
219*f7167e0eSDag-Erling Smørgrav   /* 2^41 - 2^1 */ square(t1,t0);
220*f7167e0eSDag-Erling Smørgrav   /* 2^42 - 2^2 */ square(t0,t1);
221*f7167e0eSDag-Erling Smørgrav   /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
222*f7167e0eSDag-Erling Smørgrav   /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
223*f7167e0eSDag-Erling Smørgrav 
224*f7167e0eSDag-Erling Smørgrav   /* 2^51 - 2^1 */ square(t0,z2_50_0);
225*f7167e0eSDag-Erling Smørgrav   /* 2^52 - 2^2 */ square(t1,t0);
226*f7167e0eSDag-Erling Smørgrav   /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
227*f7167e0eSDag-Erling Smørgrav   /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
228*f7167e0eSDag-Erling Smørgrav 
229*f7167e0eSDag-Erling Smørgrav   /* 2^101 - 2^1 */ square(t1,z2_100_0);
230*f7167e0eSDag-Erling Smørgrav   /* 2^102 - 2^2 */ square(t0,t1);
231*f7167e0eSDag-Erling Smørgrav   /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
232*f7167e0eSDag-Erling Smørgrav   /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
233*f7167e0eSDag-Erling Smørgrav 
234*f7167e0eSDag-Erling Smørgrav   /* 2^201 - 2^1 */ square(t0,t1);
235*f7167e0eSDag-Erling Smørgrav   /* 2^202 - 2^2 */ square(t1,t0);
236*f7167e0eSDag-Erling Smørgrav   /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
237*f7167e0eSDag-Erling Smørgrav   /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
238*f7167e0eSDag-Erling Smørgrav 
239*f7167e0eSDag-Erling Smørgrav   /* 2^251 - 2^1 */ square(t1,t0);
240*f7167e0eSDag-Erling Smørgrav   /* 2^252 - 2^2 */ square(t0,t1);
241*f7167e0eSDag-Erling Smørgrav   /* 2^253 - 2^3 */ square(t1,t0);
242*f7167e0eSDag-Erling Smørgrav   /* 2^254 - 2^4 */ square(t0,t1);
243*f7167e0eSDag-Erling Smørgrav   /* 2^255 - 2^5 */ square(t1,t0);
244*f7167e0eSDag-Erling Smørgrav   /* 2^255 - 21 */ mult(out,t1,z11);
245*f7167e0eSDag-Erling Smørgrav }
246*f7167e0eSDag-Erling Smørgrav 
247*f7167e0eSDag-Erling Smørgrav int crypto_scalarmult_curve25519(unsigned char *q,
248*f7167e0eSDag-Erling Smørgrav   const unsigned char *n,
249*f7167e0eSDag-Erling Smørgrav   const unsigned char *p)
250*f7167e0eSDag-Erling Smørgrav {
251*f7167e0eSDag-Erling Smørgrav   unsigned int work[96];
252*f7167e0eSDag-Erling Smørgrav   unsigned char e[32];
253*f7167e0eSDag-Erling Smørgrav   unsigned int i;
254*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) e[i] = n[i];
255*f7167e0eSDag-Erling Smørgrav   e[0] &= 248;
256*f7167e0eSDag-Erling Smørgrav   e[31] &= 127;
257*f7167e0eSDag-Erling Smørgrav   e[31] |= 64;
258*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) work[i] = p[i];
259*f7167e0eSDag-Erling Smørgrav   mainloop(work,e);
260*f7167e0eSDag-Erling Smørgrav   recip(work + 32,work + 32);
261*f7167e0eSDag-Erling Smørgrav   mult(work + 64,work,work + 32);
262*f7167e0eSDag-Erling Smørgrav   freeze(work + 64);
263*f7167e0eSDag-Erling Smørgrav   for (i = 0;i < 32;++i) q[i] = work[64 + i];
264*f7167e0eSDag-Erling Smørgrav   return 0;
265*f7167e0eSDag-Erling Smørgrav }
266