xref: /freebsd/sys/opencrypto/gfmult.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1 /*-
2  * Copyright (c) 2014 The FreeBSD Foundation
3  *
4  * This software was developed by John-Mark Gurney under
5  * the sponsorship of the FreeBSD Foundation and
6  * Rubicon Communications, LLC (Netgate).
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1.  Redistributions of source code must retain the above copyright
11  *     notice, this list of conditions and the following disclaimer.
12  * 2.  Redistributions in binary form must reproduce the above copyright
13  *     notice, this list of conditions and the following disclaimer in the
14  *     documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include "gfmult.h"
31 
32 #define REV_POLY_REDUCT	0xe1	/* 0x87 bit reversed */
33 
34 /* reverse the bits of a nibble */
35 static const uint8_t nib_rev[] = {
36 	0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
37 	0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
38 };
39 
40 /* calculate v * 2 */
41 static inline struct gf128
gf128_mulalpha(struct gf128 v)42 gf128_mulalpha(struct gf128 v)
43 {
44 	uint64_t mask;
45 
46 	mask = !!(v.v[1] & 1);
47 	mask = ~(mask - 1);
48 	v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
49 	v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
50 
51 	return v;
52 }
53 
54 /*
55  * Generate a table for 0-16 * h.  Store the results in the table w/ indexes
56  * bit reversed, and the words striped across the values.
57  */
58 void
gf128_genmultable(struct gf128 h,struct gf128table * t)59 gf128_genmultable(struct gf128 h, struct gf128table *t)
60 {
61 	struct gf128 tbl[16];
62 	int i;
63 
64 	tbl[0] = MAKE_GF128(0, 0);
65 	tbl[1] = h;
66 
67 	for (i = 2; i < 16; i += 2) {
68 		tbl[i] = gf128_mulalpha(tbl[i / 2]);
69 		tbl[i + 1] = gf128_add(tbl[i], h);
70 	}
71 
72 	for (i = 0; i < 16; i++) {
73 		t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
74 		t->b[nib_rev[i]] = tbl[i].v[0];
75 		t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
76 		t->d[nib_rev[i]] = tbl[i].v[1];
77 	}
78 }
79 
80 /*
81  * Generate tables containing h, h^2, h^3 and h^4, starting at 0.
82  */
83 void
gf128_genmultable4(struct gf128 h,struct gf128table4 * t)84 gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
85 {
86 	struct gf128 h2, h3, h4;
87 
88 	gf128_genmultable(h, &t->tbls[0]);
89 
90 	h2 = gf128_mul(h, &t->tbls[0]);
91 
92 	gf128_genmultable(h2, &t->tbls[1]);
93 
94 	h3 = gf128_mul(h, &t->tbls[1]);
95 	gf128_genmultable(h3, &t->tbls[2]);
96 
97 	h4 = gf128_mul(h2, &t->tbls[1]);
98 	gf128_genmultable(h4, &t->tbls[3]);
99 }
100 
101 /*
102  * Read a row from the table.
103  */
104 static inline struct gf128
readrow(struct gf128table * tbl,unsigned bits)105 readrow(struct gf128table *tbl, unsigned bits)
106 {
107 	struct gf128 r;
108 
109 	bits = bits % 16;
110 
111 	r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
112 	r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
113 
114 	return r;
115 }
116 
117 /*
118  * These are the reduction values.  Since we are dealing with bit reversed
119  * version, the values need to be bit reversed, AND the indexes are also
120  * bit reversed to make lookups quicker.
121  */
122 static uint16_t reduction[] = {
123 	0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
124 	0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
125 };
126 
127 /*
128  * Calculate:
129  * (x*2^4 + word[3,0]*h) *
130  * 2^4 + word[7,4]*h) *
131  * ...
132  * 2^4 + word[63,60]*h
133  */
134 static struct gf128
gfmultword(uint64_t word,struct gf128 x,struct gf128table * tbl)135 gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
136 {
137 	struct gf128 row;
138 	unsigned bits;
139 	unsigned redbits;
140 	int i;
141 
142 	for (i = 0; i < 64; i += 4) {
143 		bits = word % 16;
144 
145 		/* fetch row */
146 		row = readrow(tbl, bits);
147 
148 		/* x * 2^4 */
149 		redbits = x.v[1] % 16;
150 		x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
151 		x.v[0] >>= 4;
152 		x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
153 
154 		word >>= 4;
155 
156 		x = gf128_add(x, row);
157 	}
158 
159 	return x;
160 }
161 
162 /*
163  * Calculate
164  * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *
165  * ...
166  * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h
167  *
168  * Passing/returning struct is .5% faster than passing in via pointer on
169  * amd64.
170  */
171 static struct gf128
gfmultword4(uint64_t worda,uint64_t wordb,uint64_t wordc,uint64_t wordd,struct gf128 x,struct gf128table4 * tbl)172 gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
173     struct gf128 x, struct gf128table4 *tbl)
174 {
175 	struct gf128 rowa, rowb, rowc, rowd;
176 	unsigned bitsa, bitsb, bitsc, bitsd;
177 	unsigned redbits;
178 	int i;
179 
180 	/*
181 	 * XXX - nibble reverse words to save a shift? probably not as
182 	 * nibble reverse would take 20 ops (5 * 4) verse 16
183 	 */
184 
185 	for (i = 0; i < 64; i += 4) {
186 		bitsa = worda % 16;
187 		bitsb = wordb % 16;
188 		bitsc = wordc % 16;
189 		bitsd = wordd % 16;
190 
191 		/* fetch row */
192 		rowa = readrow(&tbl->tbls[3], bitsa);
193 		rowb = readrow(&tbl->tbls[2], bitsb);
194 		rowc = readrow(&tbl->tbls[1], bitsc);
195 		rowd = readrow(&tbl->tbls[0], bitsd);
196 
197 		/* x * 2^4 */
198 		redbits = x.v[1] % 16;
199 		x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
200 		x.v[0] >>= 4;
201 		x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
202 
203 		worda >>= 4;
204 		wordb >>= 4;
205 		wordc >>= 4;
206 		wordd >>= 4;
207 
208 		x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
209 		    gf128_add(rowc, rowd))));
210 	}
211 
212 	return x;
213 }
214 
215 struct gf128
gf128_mul(struct gf128 v,struct gf128table * tbl)216 gf128_mul(struct gf128 v, struct gf128table *tbl)
217 {
218 	struct gf128 ret;
219 
220 	ret = MAKE_GF128(0, 0);
221 
222 	ret = gfmultword(v.v[1], ret, tbl);
223 	ret = gfmultword(v.v[0], ret, tbl);
224 
225 	return ret;
226 }
227 
228 /*
229  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
230  * (((a*h+b)*h+c)*h+d)*h
231  */
232 struct gf128
gf128_mul4(struct gf128 a,struct gf128 b,struct gf128 c,struct gf128 d,struct gf128table4 * tbl)233 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
234     struct gf128table4 *tbl)
235 {
236 	struct gf128 tmp;
237 
238 	tmp = MAKE_GF128(0, 0);
239 
240 	tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
241 	tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
242 
243 	return tmp;
244 }
245 
246 /*
247  * a = data[0..15] + r
248  * b = data[16..31]
249  * c = data[32..47]
250  * d = data[48..63]
251  *
252  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
253  * (((a*h+b)*h+c)*h+d)*h
254  */
255 struct gf128
gf128_mul4b(struct gf128 r,const uint8_t * v,struct gf128table4 * tbl)256 gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
257 {
258 	struct gf128 a, b, c, d;
259 	struct gf128 tmp;
260 
261 	tmp = MAKE_GF128(0, 0);
262 
263 	a = gf128_add(r, gf128_read(&v[0*16]));
264 	b = gf128_read(&v[1*16]);
265 	c = gf128_read(&v[2*16]);
266 	d = gf128_read(&v[3*16]);
267 
268 	tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
269 	tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
270 
271 	return tmp;
272 }
273