1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <nxge_fflp_hash.h>
28
29 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
30
31 /*
32 * The crc32c algorithms are taken from sctp_crc32 implementation
33 * common/inet/sctp_crc32.{c,h}
34 *
35 */
36
37 /*
38 * Fast CRC32C calculation algorithm. The basic idea is to look at it
39 * four bytes (one word) at a time, using four tables. The
40 * standard algorithm in RFC 3309 uses one table.
41 */
42
43 /*
44 * SCTP uses reflected/reverse polynomial CRC32 with generating
45 * polynomial 0x1EDC6F41L
46 */
47 #define SCTP_POLY 0x1EDC6F41L
48
49 /* CRC-CCITT Polynomial */
50 #define CRC_CCITT_POLY 0x1021
51
52 /* The four CRC32c tables. */
53 static uint32_t crc32c_tab[4][256];
54
55 /* The four CRC-CCITT tables. */
56 static uint16_t crc_ccitt_tab[4][256];
57
58 /* the four tables for H1 Computation */
59 static uint32_t h1table[4][256];
60
61 #define CRC_32C_POLY 0x1EDC6F41L
62
63 #define COMPUTE_H1_BYTE(crc, data) \
64 (crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
65
66 static uint32_t
reflect_32(uint32_t b)67 reflect_32(uint32_t b)
68 {
69 int i;
70 uint32_t rw = 0;
71
72 for (i = 0; i < 32; i++) {
73 if (b & 1) {
74 rw |= 1 << (31 - i);
75 }
76 b >>= 1;
77 }
78 return (rw);
79 }
80
81 static uint32_t
flip32(uint32_t w)82 flip32(uint32_t w)
83 {
84 return (((w >> 24) | ((w >> 8) & 0xff00) |
85 ((w << 8) & 0xff0000) | (w << 24)));
86 }
87
88 /*
89 * reference crc-ccitt implementation
90 */
91
92 uint16_t
crc_ccitt(uint16_t crcin,uint8_t data)93 crc_ccitt(uint16_t crcin, uint8_t data)
94 {
95 uint16_t mcrc, crc = 0, bits = 0;
96
97 mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
98 for (bits = 0; bits < 8; bits++) {
99 crc = ((crc ^ mcrc) & 0x8000) ?
100 (crc << 1) ^ CRC_CCITT_POLY :
101 crc << 1;
102 mcrc <<= 1;
103 }
104 return ((crcin << 8) ^ crc);
105 }
106
107 /*
108 * Initialize the crc32c tables.
109 */
110
111 void
nxge_crc32c_init(void)112 nxge_crc32c_init(void)
113 {
114 uint32_t index, bit, byte, crc;
115
116 for (index = 0; index < 256; index++) {
117 crc = reflect_32(index);
118 for (byte = 0; byte < 4; byte++) {
119 for (bit = 0; bit < 8; bit++) {
120 crc = (crc & 0x80000000) ?
121 (crc << 1) ^ SCTP_POLY : crc << 1;
122 }
123 #ifdef _BIG_ENDIAN
124 crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
125 #else
126 crc32c_tab[byte][index] = reflect_32(crc);
127 #endif
128 }
129 }
130 }
131
132 /*
133 * Initialize the crc-ccitt tables.
134 */
135
136 void
nxge_crc_ccitt_init(void)137 nxge_crc_ccitt_init(void)
138 {
139 uint16_t crc;
140 uint16_t index, bit, byte;
141
142 for (index = 0; index < 256; index++) {
143 crc = index << 8;
144 for (byte = 0; byte < 4; byte++) {
145 for (bit = 0; bit < 8; bit++) {
146 crc = (crc & 0x8000) ?
147 (crc << 1) ^ CRC_CCITT_POLY : crc << 1;
148 }
149 #ifdef _BIG_ENDIAN
150 crc_ccitt_tab[3 - byte][index] = crc;
151 #else
152 crc_ccitt_tab[byte][index] = crc;
153 #endif
154 }
155 }
156 }
157
158 /*
159 * Lookup the crc32c for a byte stream
160 */
161
162 static void
nxge_crc32c_byte(uint32_t * crcptr,const uint8_t * buf,int len)163 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
164 {
165 uint32_t crc;
166 int i;
167
168 crc = *crcptr;
169 for (i = 0; i < len; i++) {
170 #ifdef _BIG_ENDIAN
171 crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
172 #else
173 crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
174 #endif
175 }
176 *crcptr = crc;
177 }
178
179 /*
180 * Lookup the crc-ccitt for a byte stream
181 */
182
183 static void
nxge_crc_ccitt_byte(uint16_t * crcptr,const uint8_t * buf,int len)184 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
185 {
186 uint16_t crc;
187 int i;
188
189 crc = *crcptr;
190 for (i = 0; i < len; i++) {
191
192 #ifdef _BIG_ENDIAN
193 crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
194 #else
195 crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
196 #endif
197 }
198 *crcptr = crc;
199 }
200
201 /*
202 * Lookup the crc32c for a 32 bit word stream
203 * Lookup is done fro the 4 bytes in parallel
204 * from the tables computed earlier
205 *
206 */
207
208 static void
nxge_crc32c_word(uint32_t * crcptr,const uint32_t * buf,int len)209 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
210 {
211 uint32_t w, crc;
212 int i;
213
214 crc = *crcptr;
215 for (i = 0; i < len; i++) {
216 w = crc ^ buf[i];
217 crc = crc32c_tab[0][w >> 24] ^
218 crc32c_tab[1][(w >> 16) & 0xff] ^
219 crc32c_tab[2][(w >> 8) & 0xff] ^
220 crc32c_tab[3][w & 0xff];
221 }
222 *crcptr = crc;
223 }
224
225 /*
226 * Lookup the crc-ccitt for a stream of bytes
227 *
228 * Since the parallel lookup version doesn't work yet,
229 * use the byte stream version (lookup crc for a byte
230 * at a time
231 *
232 */
233
234 uint16_t
nxge_crc_ccitt(uint16_t crc16,const uint8_t * buf,int len)235 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
236 {
237 nxge_crc_ccitt_byte(&crc16, buf, len);
238 return (crc16);
239 }
240
241 /*
242 * Lookup the crc32c for a stream of bytes
243 *
244 * Tries to lookup the CRC on 4 byte words
245 * If the buffer is not 4 byte aligned, first compute
246 * with byte lookup until aligned. Then compute crc
247 * for each 4 bytes. If there are bytes left at the end of
248 * the buffer, then perform a byte lookup for the remaining bytes
249 *
250 *
251 */
252
253 uint32_t
nxge_crc32c(uint32_t crc32,const uint8_t * buf,int len)254 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
255 {
256 int rem;
257
258 rem = 4 - ((uintptr_t)buf) & 3;
259 if (rem != 0) {
260 if (len < rem) {
261 rem = len;
262 }
263 nxge_crc32c_byte(&crc32, buf, rem);
264 buf = buf + rem;
265 len = len - rem;
266 }
267 if (len > 3) {
268 nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
269 }
270 rem = len & 3;
271 if (rem != 0) {
272 nxge_crc32c_byte(&crc32, buf + len - rem, rem);
273 }
274 return (crc32);
275 }
276
277 void
nxge_init_h1_table()278 nxge_init_h1_table()
279 {
280 uint32_t crc, bit, byte, index;
281
282 for (index = 0; index < 256; index++) {
283 crc = index << 24;
284 for (byte = 0; byte < 4; byte++) {
285 for (bit = 0; bit < 8; bit++) {
286 crc = ((crc & 0x80000000)) ?
287 (crc << 1) ^ CRC_32C_POLY : crc << 1;
288 }
289 h1table[byte][index] = crc;
290 }
291 }
292 }
293
294 /*
295 * Reference Neptune H1 computation function
296 *
297 * It is a slightly modified implementation of
298 * CRC-32C implementation
299 */
300
301 uint32_t
nxge_compute_h1_serial(uint32_t init_value,uint32_t * flow,uint32_t len)302 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
303 {
304 int bit, byte;
305 uint32_t crc_h1 = init_value;
306 uint8_t *buf;
307
308 buf = (uint8_t *)flow;
309 for (byte = 0; byte < len; byte++) {
310 for (bit = 0; bit < 8; bit++) {
311 crc_h1 = (((crc_h1 >> 24) & 0x80) ^
312 ((buf[byte] << bit) & 0x80)) ?
313 (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
314 }
315 }
316
317 return (crc_h1);
318 }
319
320 /*
321 * table based implementation
322 * uses 4 four tables in parallel
323 * 1 for each byte of a 32 bit word
324 *
325 * This is the default h1 computing function
326 *
327 */
328
329 uint32_t
nxge_compute_h1_table4(uint32_t crcin,uint32_t * flow,uint32_t length)330 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
331 {
332 uint32_t w, fw, i, crch1 = crcin;
333 uint32_t *buf;
334
335 buf = (uint32_t *)flow;
336
337 for (i = 0; i < length / 4; i++) {
338 #ifdef _BIG_ENDIAN
339 fw = buf[i];
340 #else
341 fw = flip32(buf[i]);
342 fw = buf[i];
343 #endif
344 w = crch1 ^ fw;
345 crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
346 h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
347 }
348 return (crch1);
349 }
350
351 /*
352 * table based implementation
353 * uses a single table and computes h1 for a byte
354 * at a time.
355 *
356 */
357
358 uint32_t
nxge_compute_h1_table1(uint32_t crcin,uint32_t * flow,uint32_t length)359 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
360 {
361
362 uint32_t i, crch1, tmp = crcin;
363 uint8_t *buf;
364
365 buf = (uint8_t *)flow;
366
367 tmp = crcin;
368 crch1 = 0;
369 for (i = 0; i < length; i++) {
370 crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
371 tmp = crch1;
372 }
373
374 return (crch1);
375 }
376