xref: /freebsd/sys/opencrypto/gfmult.c (revision b9f654b163bce26de79705e77b872427c9f2afa1)
1 /*-
2  * Copyright (c) 2014 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by John-Mark Gurney under
6  * the sponsorship of the FreeBSD Foundation and
7  * Rubicon Communications, LLC (Netgate).
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1.  Redistributions of source code must retain the above copyright
12  *     notice, this list of conditions and the following disclaimer.
13  * 2.  Redistributions in binary form must reproduce the above copyright
14  *     notice, this list of conditions and the following disclaimer in the
15  *     documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	$FreeBSD$
30  *
31  */
32 
33 #include "gfmult.h"
34 
35 #define REV_POLY_REDUCT	0xe1	/* 0x87 bit reversed */
36 
37 /* reverse the bits of a nibble */
38 static const uint8_t nib_rev[] = {
39 	0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
40 	0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
41 };
42 
43 /* calculate v * 2 */
44 static inline struct gf128
45 gf128_mulalpha(struct gf128 v)
46 {
47 	uint64_t mask;
48 
49 	mask = !!(v.v[1] & 1);
50 	mask = ~(mask - 1);
51 	v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63);
52 	v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56);
53 
54 	return v;
55 }
56 
57 /*
58  * Generate a table for 0-16 * h.  Store the results in the table w/ indexes
59  * bit reversed, and the words striped across the values.
60  */
61 void
62 gf128_genmultable(struct gf128 h, struct gf128table *t)
63 {
64 	struct gf128 tbl[16];
65 	int i;
66 
67 	tbl[0] = MAKE_GF128(0, 0);
68 	tbl[1] = h;
69 
70 	for (i = 2; i < 16; i += 2) {
71 		tbl[i] = gf128_mulalpha(tbl[i / 2]);
72 		tbl[i + 1] = gf128_add(tbl[i], h);
73 	}
74 
75 	for (i = 0; i < 16; i++) {
76 		t->a[nib_rev[i]] = tbl[i].v[0] >> 32;
77 		t->b[nib_rev[i]] = tbl[i].v[0];
78 		t->c[nib_rev[i]] = tbl[i].v[1] >> 32;
79 		t->d[nib_rev[i]] = tbl[i].v[1];
80 	}
81 }
82 
83 /*
84  * Generate tables containing h, h^2, h^3 and h^4, starting at 0.
85  */
86 void
87 gf128_genmultable4(struct gf128 h, struct gf128table4 *t)
88 {
89 	struct gf128 h2, h3, h4;
90 
91 	gf128_genmultable(h, &t->tbls[0]);
92 
93 	h2 = gf128_mul(h, &t->tbls[0]);
94 
95 	gf128_genmultable(h2, &t->tbls[1]);
96 
97 	h3 = gf128_mul(h, &t->tbls[1]);
98 	gf128_genmultable(h3, &t->tbls[2]);
99 
100 	h4 = gf128_mul(h2, &t->tbls[1]);
101 	gf128_genmultable(h4, &t->tbls[3]);
102 }
103 
104 /*
105  * Read a row from the table.
106  */
107 static inline struct gf128
108 readrow(struct gf128table *tbl, unsigned bits)
109 {
110 	struct gf128 r;
111 
112 	bits = bits % 16;
113 
114 	r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits];
115 	r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits];
116 
117 	return r;
118 }
119 
120 /*
121  * These are the reduction values.  Since we are dealing with bit reversed
122  * version, the values need to be bit reversed, AND the indexes are also
123  * bit reversed to make lookups quicker.
124  */
125 static uint16_t reduction[] = {
126 	0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0,
127 	0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0,
128 };
129 
130 /*
131  * Calculate:
132  * (x*2^4 + word[3,0]*h) *
133  * 2^4 + word[7,4]*h) *
134  * ...
135  * 2^4 + word[63,60]*h
136  */
137 static struct gf128
138 gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl)
139 {
140 	struct gf128 row;
141 	unsigned bits;
142 	unsigned redbits;
143 	int i;
144 
145 	for (i = 0; i < 64; i += 4) {
146 		bits = word % 16;
147 
148 		/* fetch row */
149 		row = readrow(tbl, bits);
150 
151 		/* x * 2^4 */
152 		redbits = x.v[1] % 16;
153 		x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
154 		x.v[0] >>= 4;
155 		x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
156 
157 		word >>= 4;
158 
159 		x = gf128_add(x, row);
160 	}
161 
162 	return x;
163 }
164 
165 /*
166  * Calculate
167  * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) *
168  * ...
169  * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h
170  *
171  * Passing/returning struct is .5% faster than passing in via pointer on
172  * amd64.
173  */
174 static struct gf128
175 gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd,
176     struct gf128 x, struct gf128table4 *tbl)
177 {
178 	struct gf128 rowa, rowb, rowc, rowd;
179 	unsigned bitsa, bitsb, bitsc, bitsd;
180 	unsigned redbits;
181 	int i;
182 
183 	/*
184 	 * XXX - nibble reverse words to save a shift? probably not as
185 	 * nibble reverse would take 20 ops (5 * 4) verse 16
186 	 */
187 
188 	for (i = 0; i < 64; i += 4) {
189 		bitsa = worda % 16;
190 		bitsb = wordb % 16;
191 		bitsc = wordc % 16;
192 		bitsd = wordd % 16;
193 
194 		/* fetch row */
195 		rowa = readrow(&tbl->tbls[3], bitsa);
196 		rowb = readrow(&tbl->tbls[2], bitsb);
197 		rowc = readrow(&tbl->tbls[1], bitsc);
198 		rowd = readrow(&tbl->tbls[0], bitsd);
199 
200 		/* x * 2^4 */
201 		redbits = x.v[1] % 16;
202 		x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60;
203 		x.v[0] >>= 4;
204 		x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16);
205 
206 		worda >>= 4;
207 		wordb >>= 4;
208 		wordc >>= 4;
209 		wordd >>= 4;
210 
211 		x = gf128_add(x, gf128_add(rowa, gf128_add(rowb,
212 		    gf128_add(rowc, rowd))));
213 	}
214 
215 	return x;
216 }
217 
218 struct gf128
219 gf128_mul(struct gf128 v, struct gf128table *tbl)
220 {
221 	struct gf128 ret;
222 
223 	ret = MAKE_GF128(0, 0);
224 
225 	ret = gfmultword(v.v[1], ret, tbl);
226 	ret = gfmultword(v.v[0], ret, tbl);
227 
228 	return ret;
229 }
230 
231 /*
232  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
233  * (((a*h+b)*h+c)*h+d)*h
234  */
235 struct gf128
236 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d,
237     struct gf128table4 *tbl)
238 {
239 	struct gf128 tmp;
240 
241 	tmp = MAKE_GF128(0, 0);
242 
243 	tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
244 	tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
245 
246 	return tmp;
247 }
248 
249 /*
250  * a = data[0..15] + r
251  * b = data[16..31]
252  * c = data[32..47]
253  * d = data[48..63]
254  *
255  * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or:
256  * (((a*h+b)*h+c)*h+d)*h
257  */
258 struct gf128
259 gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl)
260 {
261 	struct gf128 a, b, c, d;
262 	struct gf128 tmp;
263 
264 	tmp = MAKE_GF128(0, 0);
265 
266 	a = gf128_add(r, gf128_read(&v[0*16]));
267 	b = gf128_read(&v[1*16]);
268 	c = gf128_read(&v[2*16]);
269 	d = gf128_read(&v[3*16]);
270 
271 	tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl);
272 	tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl);
273 
274 	return tmp;
275 }
276