1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 #pragma ident "%Z%%M% %I% %E% SMI"
26
27 #include <sys/types.h>
28 #include <nxge_fflp_hash.h>
29
30 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
31
32 /*
33 * The crc32c algorithms are taken from sctp_crc32 implementation
34 * common/inet/sctp_crc32.{c,h}
35 *
36 */
37
38 /*
39 * Fast CRC32C calculation algorithm. The basic idea is to look at it
40 * four bytes (one word) at a time, using four tables. The
41 * standard algorithm in RFC 3309 uses one table.
42 */
43
44 /*
45 * SCTP uses reflected/reverse polynomial CRC32 with generating
46 * polynomial 0x1EDC6F41L
47 */
48 #define SCTP_POLY 0x1EDC6F41L
49
50 /* CRC-CCITT Polynomial */
51 #define CRC_CCITT_POLY 0x1021
52
53 /* The four CRC32c tables. */
54 static uint32_t crc32c_tab[4][256];
55
56 /* The four CRC-CCITT tables. */
57 static uint16_t crc_ccitt_tab[4][256];
58
59 /* the four tables for H1 Computation */
60 static uint32_t h1table[4][256];
61
62 #define CRC_32C_POLY 0x1EDC6F41L
63
64 #define COMPUTE_H1_BYTE(crc, data) \
65 (crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
66
67 static uint32_t
reflect_32(uint32_t b)68 reflect_32(uint32_t b)
69 {
70 int i;
71 uint32_t rw = 0;
72
73 for (i = 0; i < 32; i++) {
74 if (b & 1) {
75 rw |= 1 << (31 - i);
76 }
77 b >>= 1;
78 }
79 return (rw);
80 }
81
82 static uint32_t
flip32(uint32_t w)83 flip32(uint32_t w)
84 {
85 return (((w >> 24) | ((w >> 8) & 0xff00) |
86 ((w << 8) & 0xff0000) | (w << 24)));
87 }
88
89 /*
90 * reference crc-ccitt implementation
91 */
92
93 uint16_t
crc_ccitt(uint16_t crcin,uint8_t data)94 crc_ccitt(uint16_t crcin, uint8_t data)
95 {
96 uint16_t mcrc, crc = 0, bits = 0;
97
98 mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
99 for (bits = 0; bits < 8; bits++) {
100 crc = ((crc ^ mcrc) & 0x8000) ?
101 (crc << 1) ^ CRC_CCITT_POLY :
102 crc << 1;
103 mcrc <<= 1;
104 }
105 return ((crcin << 8) ^ crc);
106 }
107
108 /*
109 * Initialize the crc32c tables.
110 */
111
112 void
nxge_crc32c_init(void)113 nxge_crc32c_init(void)
114 {
115 uint32_t index, bit, byte, crc;
116
117 for (index = 0; index < 256; index++) {
118 crc = reflect_32(index);
119 for (byte = 0; byte < 4; byte++) {
120 for (bit = 0; bit < 8; bit++) {
121 crc = (crc & 0x80000000) ?
122 (crc << 1) ^ SCTP_POLY : crc << 1;
123 }
124 #ifdef _BIG_ENDIAN
125 crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
126 #else
127 crc32c_tab[byte][index] = reflect_32(crc);
128 #endif
129 }
130 }
131 }
132
133 /*
134 * Initialize the crc-ccitt tables.
135 */
136
137 void
nxge_crc_ccitt_init(void)138 nxge_crc_ccitt_init(void)
139 {
140 uint16_t crc;
141 uint16_t index, bit, byte;
142
143 for (index = 0; index < 256; index++) {
144 crc = index << 8;
145 for (byte = 0; byte < 4; byte++) {
146 for (bit = 0; bit < 8; bit++) {
147 crc = (crc & 0x8000) ?
148 (crc << 1) ^ CRC_CCITT_POLY : crc << 1;
149 }
150 #ifdef _BIG_ENDIAN
151 crc_ccitt_tab[3 - byte][index] = crc;
152 #else
153 crc_ccitt_tab[byte][index] = crc;
154 #endif
155 }
156 }
157 }
158
159 /*
160 * Lookup the crc32c for a byte stream
161 */
162
163 static void
nxge_crc32c_byte(uint32_t * crcptr,const uint8_t * buf,int len)164 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
165 {
166 uint32_t crc;
167 int i;
168
169 crc = *crcptr;
170 for (i = 0; i < len; i++) {
171 #ifdef _BIG_ENDIAN
172 crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
173 #else
174 crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
175 #endif
176 }
177 *crcptr = crc;
178 }
179
180 /*
181 * Lookup the crc-ccitt for a byte stream
182 */
183
184 static void
nxge_crc_ccitt_byte(uint16_t * crcptr,const uint8_t * buf,int len)185 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
186 {
187 uint16_t crc;
188 int i;
189
190 crc = *crcptr;
191 for (i = 0; i < len; i++) {
192
193 #ifdef _BIG_ENDIAN
194 crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
195 #else
196 crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
197 #endif
198 }
199 *crcptr = crc;
200 }
201
202 /*
203 * Lookup the crc32c for a 32 bit word stream
204 * Lookup is done fro the 4 bytes in parallel
205 * from the tables computed earlier
206 *
207 */
208
209 static void
nxge_crc32c_word(uint32_t * crcptr,const uint32_t * buf,int len)210 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
211 {
212 uint32_t w, crc;
213 int i;
214
215 crc = *crcptr;
216 for (i = 0; i < len; i++) {
217 w = crc ^ buf[i];
218 crc = crc32c_tab[0][w >> 24] ^
219 crc32c_tab[1][(w >> 16) & 0xff] ^
220 crc32c_tab[2][(w >> 8) & 0xff] ^
221 crc32c_tab[3][w & 0xff];
222 }
223 *crcptr = crc;
224 }
225
226 /*
227 * Lookup the crc-ccitt for a stream of bytes
228 *
229 * Since the parallel lookup version doesn't work yet,
230 * use the byte stream version (lookup crc for a byte
231 * at a time
232 *
233 */
234
235 uint16_t
nxge_crc_ccitt(uint16_t crc16,const uint8_t * buf,int len)236 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
237 {
238 nxge_crc_ccitt_byte(&crc16, buf, len);
239 return (crc16);
240 }
241
242 /*
243 * Lookup the crc32c for a stream of bytes
244 *
245 * Tries to lookup the CRC on 4 byte words
246 * If the buffer is not 4 byte aligned, first compute
247 * with byte lookup until aligned. Then compute crc
248 * for each 4 bytes. If there are bytes left at the end of
249 * the buffer, then perform a byte lookup for the remaining bytes
250 *
251 *
252 */
253
254 uint32_t
nxge_crc32c(uint32_t crc32,const uint8_t * buf,int len)255 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
256 {
257 int rem;
258
259 rem = 4 - ((uintptr_t)buf) & 3;
260 if (rem != 0) {
261 if (len < rem) {
262 rem = len;
263 }
264 nxge_crc32c_byte(&crc32, buf, rem);
265 buf = buf + rem;
266 len = len - rem;
267 }
268 if (len > 3) {
269 nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
270 }
271 rem = len & 3;
272 if (rem != 0) {
273 nxge_crc32c_byte(&crc32, buf + len - rem, rem);
274 }
275 return (crc32);
276 }
277
278 void
nxge_init_h1_table()279 nxge_init_h1_table()
280 {
281 uint32_t crc, bit, byte, index;
282
283 for (index = 0; index < 256; index++) {
284 crc = index << 24;
285 for (byte = 0; byte < 4; byte++) {
286 for (bit = 0; bit < 8; bit++) {
287 crc = ((crc & 0x80000000)) ?
288 (crc << 1) ^ CRC_32C_POLY : crc << 1;
289 }
290 h1table[byte][index] = crc;
291 }
292 }
293 }
294
295 /*
296 * Reference Neptune H1 computation function
297 *
298 * It is a slightly modified implementation of
299 * CRC-32C implementation
300 */
301
302 uint32_t
nxge_compute_h1_serial(uint32_t init_value,uint32_t * flow,uint32_t len)303 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
304 {
305 int bit, byte;
306 uint32_t crc_h1 = init_value;
307 uint8_t *buf;
308
309 buf = (uint8_t *)flow;
310 for (byte = 0; byte < len; byte++) {
311 for (bit = 0; bit < 8; bit++) {
312 crc_h1 = (((crc_h1 >> 24) & 0x80) ^
313 ((buf[byte] << bit) & 0x80)) ?
314 (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
315 }
316 }
317
318 return (crc_h1);
319 }
320
321 /*
322 * table based implementation
323 * uses 4 four tables in parallel
324 * 1 for each byte of a 32 bit word
325 *
326 * This is the default h1 computing function
327 *
328 */
329
330 uint32_t
nxge_compute_h1_table4(uint32_t crcin,uint32_t * flow,uint32_t length)331 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
332 {
333 uint32_t w, fw, i, crch1 = crcin;
334 uint32_t *buf;
335
336 buf = (uint32_t *)flow;
337
338 for (i = 0; i < length / 4; i++) {
339 #ifdef _BIG_ENDIAN
340 fw = buf[i];
341 #else
342 fw = flip32(buf[i]);
343 fw = buf[i];
344 #endif
345 w = crch1 ^ fw;
346 crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
347 h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
348 }
349 return (crch1);
350 }
351
352 /*
353 * table based implementation
354 * uses a single table and computes h1 for a byte
355 * at a time.
356 *
357 */
358
359 uint32_t
nxge_compute_h1_table1(uint32_t crcin,uint32_t * flow,uint32_t length)360 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
361 {
362
363 uint32_t i, crch1, tmp = crcin;
364 uint8_t *buf;
365
366 buf = (uint8_t *)flow;
367
368 tmp = crcin;
369 for (i = 0; i < length; i++) {
370 crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
371 tmp = crch1;
372 }
373
374 return (crch1);
375 }
376