1 /*- 2 * Copyright (c) 2014 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by John-Mark Gurney under 6 * the sponsorship of the FreeBSD Foundation and 7 * Rubicon Communications, LLC (Netgate). 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 * 31 */ 32 33 #include "gfmult.h" 34 35 #define REV_POLY_REDUCT 0xe1 /* 0x87 bit reversed */ 36 37 /* reverse the bits of a nibble */ 38 static const uint8_t nib_rev[] = { 39 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, 40 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, 41 }; 42 43 /* calculate v * 2 */ 44 static inline struct gf128 45 gf128_mulalpha(struct gf128 v) 46 { 47 uint64_t mask; 48 49 mask = !!(v.v[1] & 1); 50 mask = ~(mask - 1); 51 v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63); 52 v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56); 53 54 return v; 55 } 56 57 /* 58 * Generate a table for 0-16 * h. Store the results in the table w/ indexes 59 * bit reversed, and the words striped across the values. 60 */ 61 void 62 gf128_genmultable(struct gf128 h, struct gf128table *t) 63 { 64 struct gf128 tbl[16]; 65 int i; 66 67 tbl[0] = MAKE_GF128(0, 0); 68 tbl[1] = h; 69 70 for (i = 2; i < 16; i += 2) { 71 tbl[i] = gf128_mulalpha(tbl[i / 2]); 72 tbl[i + 1] = gf128_add(tbl[i], h); 73 } 74 75 for (i = 0; i < 16; i++) { 76 t->a[nib_rev[i]] = tbl[i].v[0] >> 32; 77 t->b[nib_rev[i]] = tbl[i].v[0]; 78 t->c[nib_rev[i]] = tbl[i].v[1] >> 32; 79 t->d[nib_rev[i]] = tbl[i].v[1]; 80 } 81 } 82 83 /* 84 * Generate tables containing h, h^2, h^3 and h^4, starting at 0. 85 */ 86 void 87 gf128_genmultable4(struct gf128 h, struct gf128table4 *t) 88 { 89 struct gf128 h2, h3, h4; 90 91 gf128_genmultable(h, &t->tbls[0]); 92 93 h2 = gf128_mul(h, &t->tbls[0]); 94 95 gf128_genmultable(h2, &t->tbls[1]); 96 97 h3 = gf128_mul(h, &t->tbls[1]); 98 gf128_genmultable(h3, &t->tbls[2]); 99 100 h4 = gf128_mul(h2, &t->tbls[1]); 101 gf128_genmultable(h4, &t->tbls[3]); 102 } 103 104 /* 105 * Read a row from the table. 106 */ 107 static inline struct gf128 108 readrow(struct gf128table *tbl, unsigned bits) 109 { 110 struct gf128 r; 111 112 bits = bits % 16; 113 114 r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits]; 115 r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits]; 116 117 return r; 118 } 119 120 /* 121 * These are the reduction values. Since we are dealing with bit reversed 122 * version, the values need to be bit reversed, AND the indexes are also 123 * bit reversed to make lookups quicker. 124 */ 125 static uint16_t reduction[] = { 126 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, 127 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0, 128 }; 129 130 /* 131 * Calculate: 132 * (x*2^4 + word[3,0]*h) * 133 * 2^4 + word[7,4]*h) * 134 * ... 135 * 2^4 + word[63,60]*h 136 */ 137 static struct gf128 138 gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl) 139 { 140 struct gf128 row; 141 unsigned bits; 142 unsigned redbits; 143 int i; 144 145 for (i = 0; i < 64; i += 4) { 146 bits = word % 16; 147 148 /* fetch row */ 149 row = readrow(tbl, bits); 150 151 /* x * 2^4 */ 152 redbits = x.v[1] % 16; 153 x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; 154 x.v[0] >>= 4; 155 x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); 156 157 word >>= 4; 158 159 x = gf128_add(x, row); 160 } 161 162 return x; 163 } 164 165 /* 166 * Calculate 167 * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) * 168 * ... 169 * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h 170 * 171 * Passing/returning struct is .5% faster than passing in via pointer on 172 * amd64. 173 */ 174 static struct gf128 175 gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd, 176 struct gf128 x, struct gf128table4 *tbl) 177 { 178 struct gf128 rowa, rowb, rowc, rowd; 179 unsigned bitsa, bitsb, bitsc, bitsd; 180 unsigned redbits; 181 int i; 182 183 /* 184 * XXX - nibble reverse words to save a shift? probably not as 185 * nibble reverse would take 20 ops (5 * 4) verse 16 186 */ 187 188 for (i = 0; i < 64; i += 4) { 189 bitsa = worda % 16; 190 bitsb = wordb % 16; 191 bitsc = wordc % 16; 192 bitsd = wordd % 16; 193 194 /* fetch row */ 195 rowa = readrow(&tbl->tbls[3], bitsa); 196 rowb = readrow(&tbl->tbls[2], bitsb); 197 rowc = readrow(&tbl->tbls[1], bitsc); 198 rowd = readrow(&tbl->tbls[0], bitsd); 199 200 /* x * 2^4 */ 201 redbits = x.v[1] % 16; 202 x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; 203 x.v[0] >>= 4; 204 x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); 205 206 worda >>= 4; 207 wordb >>= 4; 208 wordc >>= 4; 209 wordd >>= 4; 210 211 x = gf128_add(x, gf128_add(rowa, gf128_add(rowb, 212 gf128_add(rowc, rowd)))); 213 } 214 215 return x; 216 } 217 218 struct gf128 219 gf128_mul(struct gf128 v, struct gf128table *tbl) 220 { 221 struct gf128 ret; 222 223 ret = MAKE_GF128(0, 0); 224 225 ret = gfmultword(v.v[1], ret, tbl); 226 ret = gfmultword(v.v[0], ret, tbl); 227 228 return ret; 229 } 230 231 /* 232 * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: 233 * (((a*h+b)*h+c)*h+d)*h 234 */ 235 struct gf128 236 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d, 237 struct gf128table4 *tbl) 238 { 239 struct gf128 tmp; 240 241 tmp = MAKE_GF128(0, 0); 242 243 tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); 244 tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); 245 246 return tmp; 247 } 248 249 /* 250 * a = data[0..15] + r 251 * b = data[16..31] 252 * c = data[32..47] 253 * d = data[48..63] 254 * 255 * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: 256 * (((a*h+b)*h+c)*h+d)*h 257 */ 258 struct gf128 259 gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl) 260 { 261 struct gf128 a, b, c, d; 262 struct gf128 tmp; 263 264 tmp = MAKE_GF128(0, 0); 265 266 a = gf128_add(r, gf128_read(&v[0*16])); 267 b = gf128_read(&v[1*16]); 268 c = gf128_read(&v[2*16]); 269 d = gf128_read(&v[3*16]); 270 271 tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); 272 tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); 273 274 return tmp; 275 } 276