1 /* 2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands 29 * and bit reversal of 64-bit words. 30 */ 31 32 static inline uint64_t 33 bmul64(uint64_t x, uint64_t y) 34 { 35 uint64_t x0, x1, x2, x3; 36 uint64_t y0, y1, y2, y3; 37 uint64_t z0, z1, z2, z3; 38 39 x0 = x & (uint64_t)0x1111111111111111; 40 x1 = x & (uint64_t)0x2222222222222222; 41 x2 = x & (uint64_t)0x4444444444444444; 42 x3 = x & (uint64_t)0x8888888888888888; 43 y0 = y & (uint64_t)0x1111111111111111; 44 y1 = y & (uint64_t)0x2222222222222222; 45 y2 = y & (uint64_t)0x4444444444444444; 46 y3 = y & (uint64_t)0x8888888888888888; 47 z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); 48 z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); 49 z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); 50 z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); 51 z0 &= (uint64_t)0x1111111111111111; 52 z1 &= (uint64_t)0x2222222222222222; 53 z2 &= (uint64_t)0x4444444444444444; 54 z3 &= (uint64_t)0x8888888888888888; 55 return z0 | z1 | z2 | z3; 56 } 57 58 static uint64_t 59 rev64(uint64_t x) 60 { 61 #define RMS(m, s) do { \ 62 x = ((x & (uint64_t)(m)) << (s)) \ 63 | ((x >> (s)) & (uint64_t)(m)); \ 64 } while (0) 65 66 RMS(0x5555555555555555, 1); 67 RMS(0x3333333333333333, 2); 68 RMS(0x0F0F0F0F0F0F0F0F, 4); 69 RMS(0x00FF00FF00FF00FF, 8); 70 RMS(0x0000FFFF0000FFFF, 16); 71 return (x << 32) | (x >> 32); 72 73 #undef RMS 74 } 75 76 /* see bearssl_ghash.h */ 77 void 78 br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len) 79 { 80 const unsigned char *buf, *hb; 81 unsigned char *yb; 82 uint64_t y0, y1; 83 uint64_t h0, h1, h2, h0r, h1r, h2r; 84 85 buf = data; 86 yb = y; 87 hb = h; 88 y1 = br_dec64be(yb); 89 y0 = br_dec64be(yb + 8); 90 h1 = br_dec64be(hb); 91 h0 = br_dec64be(hb + 8); 92 h0r = rev64(h0); 93 h1r = rev64(h1); 94 h2 = h0 ^ h1; 95 h2r = h0r ^ h1r; 96 while (len > 0) { 97 const unsigned char *src; 98 unsigned char tmp[16]; 99 uint64_t y0r, y1r, y2, y2r; 100 uint64_t z0, z1, z2, z0h, z1h, z2h; 101 uint64_t v0, v1, v2, v3; 102 103 if (len >= 16) { 104 src = buf; 105 buf += 16; 106 len -= 16; 107 } else { 108 memcpy(tmp, buf, len); 109 memset(tmp + len, 0, (sizeof tmp) - len); 110 src = tmp; 111 len = 0; 112 } 113 y1 ^= br_dec64be(src); 114 y0 ^= br_dec64be(src + 8); 115 116 y0r = rev64(y0); 117 y1r = rev64(y1); 118 y2 = y0 ^ y1; 119 y2r = y0r ^ y1r; 120 121 z0 = bmul64(y0, h0); 122 z1 = bmul64(y1, h1); 123 z2 = bmul64(y2, h2); 124 z0h = bmul64(y0r, h0r); 125 z1h = bmul64(y1r, h1r); 126 z2h = bmul64(y2r, h2r); 127 z2 ^= z0 ^ z1; 128 z2h ^= z0h ^ z1h; 129 z0h = rev64(z0h) >> 1; 130 z1h = rev64(z1h) >> 1; 131 z2h = rev64(z2h) >> 1; 132 133 v0 = z0; 134 v1 = z0h ^ z2; 135 v2 = z1 ^ z2h; 136 v3 = z1h; 137 138 v3 = (v3 << 1) | (v2 >> 63); 139 v2 = (v2 << 1) | (v1 >> 63); 140 v1 = (v1 << 1) | (v0 >> 63); 141 v0 = (v0 << 1); 142 143 v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7); 144 v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57); 145 v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7); 146 v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57); 147 148 y0 = v2; 149 y1 = v3; 150 } 151 152 br_enc64be(yb, y1); 153 br_enc64be(yb + 8, y0); 154 } 155