xref: /illumos-gate/usr/src/uts/common/io/nxge/nxge_fflp_hash.c (revision a6e6969cf9cfe2070eae4cd6071f76b0fa4f539f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <sys/types.h>
28 #include <nxge_fflp_hash.h>
29 
30 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
31 
32 /*
33  * The crc32c algorithms are taken from sctp_crc32 implementation
34  * common/inet/sctp_crc32.{c,h}
35  *
36  */
37 
38 /*
39  * Fast CRC32C calculation algorithm.  The basic idea is to look at it
40  * four bytes (one word) at a time, using four tables.  The
41  * standard algorithm in RFC 3309 uses one table.
42  */
43 
44 /*
45  * SCTP uses reflected/reverse polynomial CRC32 with generating
46  * polynomial 0x1EDC6F41L
47  */
48 #define	SCTP_POLY 0x1EDC6F41L
49 
50 /* CRC-CCITT Polynomial */
51 #define	CRC_CCITT_POLY 0x1021
52 
53 /* The four CRC32c tables. */
54 static uint32_t crc32c_tab[4][256];
55 
56 /* The four CRC-CCITT tables. */
57 static uint16_t crc_ccitt_tab[4][256];
58 
59 /* the four tables for H1 Computation */
60 static uint32_t h1table[4][256];
61 
62 #define	CRC_32C_POLY 0x1EDC6F41L
63 
64 #define	COMPUTE_H1_BYTE(crc, data) \
65 	(crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
66 
67 static uint32_t
68 reflect_32(uint32_t b)
69 {
70 	int i;
71 	uint32_t rw = 0;
72 
73 	for (i = 0; i < 32; i++) {
74 		if (b & 1) {
75 			rw |= 1 << (31 - i);
76 		}
77 		b >>= 1;
78 	}
79 	return (rw);
80 }
81 
82 static uint32_t
83 flip32(uint32_t w)
84 {
85 	return (((w >> 24) | ((w >> 8) & 0xff00) |
86 		((w << 8) & 0xff0000) | (w << 24)));
87 }
88 
89 /*
90  * reference crc-ccitt implementation
91  */
92 
93 uint16_t
94 crc_ccitt(uint16_t crcin, uint8_t data)
95 {
96 	uint16_t mcrc, crc = 0, bits = 0;
97 
98 	mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
99 	for (bits = 0; bits < 8; bits++) {
100 		crc = ((crc ^ mcrc) & 0x8000) ?
101 			(crc << 1) ^ CRC_CCITT_POLY :
102 			crc << 1;
103 		mcrc <<= 1;
104 	}
105 	return ((crcin << 8) ^ crc);
106 }
107 
108 /*
109  * Initialize the crc32c tables.
110  */
111 
112 void
113 nxge_crc32c_init(void)
114 {
115 	uint32_t index, bit, byte, crc;
116 
117 	for (index = 0; index < 256; index++) {
118 		crc = reflect_32(index);
119 		for (byte = 0; byte < 4; byte++) {
120 			for (bit = 0; bit < 8; bit++) {
121 				crc = (crc & 0x80000000) ?
122 					(crc << 1) ^ SCTP_POLY : crc << 1;
123 			}
124 #ifdef _BIG_ENDIAN
125 			crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
126 #else
127 			crc32c_tab[byte][index] = reflect_32(crc);
128 #endif
129 		}
130 	}
131 }
132 
133 /*
134  * Initialize the crc-ccitt tables.
135  */
136 
137 void
138 nxge_crc_ccitt_init(void)
139 {
140 	uint16_t crc;
141 	uint16_t index, bit, byte;
142 
143 	for (index = 0; index < 256; index++) {
144 		crc = index << 8;
145 		for (byte = 0; byte < 4; byte++) {
146 			for (bit = 0; bit < 8; bit++) {
147 				crc = (crc & 0x8000) ?
148 					(crc << 1) ^ CRC_CCITT_POLY : crc << 1;
149 			}
150 #ifdef _BIG_ENDIAN
151 			crc_ccitt_tab[3 - byte][index] = crc;
152 #else
153 			crc_ccitt_tab[byte][index] = crc;
154 #endif
155 		}
156 	}
157 }
158 
159 /*
160  * Lookup  the crc32c for a byte stream
161  */
162 
163 static void
164 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
165 {
166 	uint32_t crc;
167 	int i;
168 
169 	crc = *crcptr;
170 	for (i = 0; i < len; i++) {
171 #ifdef _BIG_ENDIAN
172 		crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
173 #else
174 		crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
175 #endif
176 	}
177 	*crcptr = crc;
178 }
179 
180 /*
181  * Lookup  the crc-ccitt for a byte stream
182  */
183 
184 static void
185 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
186 {
187 	uint16_t crc;
188 	int i;
189 
190 	crc = *crcptr;
191 	for (i = 0; i < len; i++) {
192 
193 #ifdef _BIG_ENDIAN
194 		crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
195 #else
196 		crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
197 #endif
198 	}
199 	*crcptr = crc;
200 }
201 
202 /*
203  * Lookup  the crc32c for a 32 bit word stream
204  * Lookup is done fro the 4 bytes in parallel
205  * from the tables computed earlier
206  *
207  */
208 
209 static void
210 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
211 {
212 	uint32_t w, crc;
213 	int i;
214 
215 	crc = *crcptr;
216 	for (i = 0; i < len; i++) {
217 		w = crc ^ buf[i];
218 		crc = crc32c_tab[0][w >> 24] ^
219 			crc32c_tab[1][(w >> 16) & 0xff] ^
220 			crc32c_tab[2][(w >> 8) & 0xff] ^
221 			crc32c_tab[3][w & 0xff];
222 	}
223 	*crcptr = crc;
224 }
225 
226 /*
227  * Lookup  the crc-ccitt for a stream of bytes
228  *
229  * Since the parallel lookup version doesn't work yet,
230  * use the byte stream version (lookup crc for a byte
231  * at a time
232  *
233  */
234 
235 uint16_t
236 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
237 {
238 	nxge_crc_ccitt_byte(&crc16, buf, len);
239 	return (crc16);
240 }
241 
242 /*
243  * Lookup  the crc32c for a stream of bytes
244  *
245  * Tries to lookup the CRC on 4 byte words
246  * If the buffer is not 4 byte aligned, first compute
247  * with byte lookup until aligned. Then compute crc
248  * for each 4 bytes. If there are bytes left at the end of
249  * the buffer, then perform a byte lookup for the remaining bytes
250  *
251  *
252  */
253 
254 uint32_t
255 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
256 {
257 	int rem;
258 
259 	rem = 4 - ((uintptr_t)buf) & 3;
260 	if (rem != 0) {
261 		if (len < rem) {
262 			rem = len;
263 		}
264 		nxge_crc32c_byte(&crc32, buf, rem);
265 		buf = buf + rem;
266 		len = len - rem;
267 	}
268 	if (len > 3) {
269 		nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
270 	}
271 	rem = len & 3;
272 	if (rem != 0) {
273 		nxge_crc32c_byte(&crc32, buf + len - rem, rem);
274 	}
275 	return (crc32);
276 }
277 
278 void
279 nxge_init_h1_table()
280 {
281 	uint32_t crc, bit, byte, index;
282 
283 	for (index = 0; index < 256; index++) {
284 		crc = index << 24;
285 		for (byte = 0; byte < 4; byte++) {
286 			for (bit = 0; bit < 8; bit++) {
287 				crc = ((crc & 0x80000000)) ?
288 					(crc << 1) ^ CRC_32C_POLY : crc << 1;
289 			}
290 			h1table[byte][index] = crc;
291 		}
292 	}
293 }
294 
295 /*
296  * Reference Neptune H1 computation function
297  *
298  * It is a slightly modified implementation of
299  * CRC-32C implementation
300  */
301 
302 uint32_t
303 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
304 {
305 	int bit, byte;
306 	uint32_t crc_h1 = init_value;
307 	uint8_t *buf;
308 
309 	buf = (uint8_t *)flow;
310 	for (byte = 0; byte < len; byte++) {
311 		for (bit = 0; bit < 8; bit++) {
312 			crc_h1 = (((crc_h1 >> 24) & 0x80) ^
313 				((buf[byte] << bit) & 0x80)) ?
314 				(crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
315 		}
316 	}
317 
318 	return (crc_h1);
319 }
320 
321 /*
322  * table based implementation
323  * uses 4 four tables in parallel
324  * 1 for each byte of a 32 bit word
325  *
326  * This is the default h1 computing function
327  *
328  */
329 
330 uint32_t
331 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
332 {
333 	uint32_t w, fw, i, crch1 = crcin;
334 	uint32_t *buf;
335 
336 	buf = (uint32_t *)flow;
337 
338 	for (i = 0; i < length / 4; i++) {
339 #ifdef _BIG_ENDIAN
340 		fw = buf[i];
341 #else
342 		fw = flip32(buf[i]);
343 		fw = buf[i];
344 #endif
345 		w = crch1 ^ fw;
346 		crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
347 			h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
348 	}
349 	return (crch1);
350 }
351 
352 /*
353  * table based implementation
354  * uses a single table and computes h1 for a byte
355  * at a time.
356  *
357  */
358 
359 uint32_t
360 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
361 {
362 
363 	uint32_t i, crch1, tmp = crcin;
364 	uint8_t *buf;
365 
366 	buf = (uint8_t *)flow;
367 
368 	tmp = crcin;
369 	for (i = 0; i < length; i++) {
370 		crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
371 		tmp = crch1;
372 	}
373 
374 	return (crch1);
375 }
376