xref: /illumos-gate/usr/src/uts/common/io/nxge/nxge_fflp_hash.c (revision f17620a4f72a29025a22655ba8735ccd20ae174f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <nxge_fflp_hash.h>
28 
29 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len);
30 
31 /*
32  * The crc32c algorithms are taken from sctp_crc32 implementation
33  * common/inet/sctp_crc32.{c,h}
34  *
35  */
36 
37 /*
38  * Fast CRC32C calculation algorithm.  The basic idea is to look at it
39  * four bytes (one word) at a time, using four tables.  The
40  * standard algorithm in RFC 3309 uses one table.
41  */
42 
43 /*
44  * SCTP uses reflected/reverse polynomial CRC32 with generating
45  * polynomial 0x1EDC6F41L
46  */
47 #define	SCTP_POLY 0x1EDC6F41L
48 
49 /* CRC-CCITT Polynomial */
50 #define	CRC_CCITT_POLY 0x1021
51 
52 /* The four CRC32c tables. */
53 static uint32_t crc32c_tab[4][256];
54 
55 /* The four CRC-CCITT tables. */
56 static uint16_t crc_ccitt_tab[4][256];
57 
58 /* the four tables for H1 Computation */
59 static uint32_t h1table[4][256];
60 
61 #define	CRC_32C_POLY 0x1EDC6F41L
62 
63 #define	COMPUTE_H1_BYTE(crc, data) \
64 	(crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff])
65 
66 static uint32_t
67 reflect_32(uint32_t b)
68 {
69 	int i;
70 	uint32_t rw = 0;
71 
72 	for (i = 0; i < 32; i++) {
73 		if (b & 1) {
74 			rw |= 1 << (31 - i);
75 		}
76 		b >>= 1;
77 	}
78 	return (rw);
79 }
80 
81 static uint32_t
82 flip32(uint32_t w)
83 {
84 	return (((w >> 24) | ((w >> 8) & 0xff00) |
85 	    ((w << 8) & 0xff0000) | (w << 24)));
86 }
87 
88 /*
89  * reference crc-ccitt implementation
90  */
91 
92 uint16_t
93 crc_ccitt(uint16_t crcin, uint8_t data)
94 {
95 	uint16_t mcrc, crc = 0, bits = 0;
96 
97 	mcrc = (((crcin >> 8) ^ data) & 0xff) << 8;
98 	for (bits = 0; bits < 8; bits++) {
99 		crc = ((crc ^ mcrc) & 0x8000) ?
100 		    (crc << 1) ^ CRC_CCITT_POLY :
101 		    crc << 1;
102 		mcrc <<= 1;
103 	}
104 	return ((crcin << 8) ^ crc);
105 }
106 
107 /*
108  * Initialize the crc32c tables.
109  */
110 
111 void
112 nxge_crc32c_init(void)
113 {
114 	uint32_t index, bit, byte, crc;
115 
116 	for (index = 0; index < 256; index++) {
117 		crc = reflect_32(index);
118 		for (byte = 0; byte < 4; byte++) {
119 			for (bit = 0; bit < 8; bit++) {
120 				crc = (crc & 0x80000000) ?
121 				    (crc << 1) ^ SCTP_POLY : crc << 1;
122 			}
123 #ifdef _BIG_ENDIAN
124 			crc32c_tab[3 - byte][index] = flip32(reflect_32(crc));
125 #else
126 			crc32c_tab[byte][index] = reflect_32(crc);
127 #endif
128 		}
129 	}
130 }
131 
132 /*
133  * Initialize the crc-ccitt tables.
134  */
135 
136 void
137 nxge_crc_ccitt_init(void)
138 {
139 	uint16_t crc;
140 	uint16_t index, bit, byte;
141 
142 	for (index = 0; index < 256; index++) {
143 		crc = index << 8;
144 		for (byte = 0; byte < 4; byte++) {
145 			for (bit = 0; bit < 8; bit++) {
146 				crc = (crc & 0x8000) ?
147 				    (crc << 1) ^ CRC_CCITT_POLY : crc << 1;
148 			}
149 #ifdef _BIG_ENDIAN
150 			crc_ccitt_tab[3 - byte][index] = crc;
151 #else
152 			crc_ccitt_tab[byte][index] = crc;
153 #endif
154 		}
155 	}
156 }
157 
158 /*
159  * Lookup  the crc32c for a byte stream
160  */
161 
162 static void
163 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len)
164 {
165 	uint32_t crc;
166 	int i;
167 
168 	crc = *crcptr;
169 	for (i = 0; i < len; i++) {
170 #ifdef _BIG_ENDIAN
171 		crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)];
172 #else
173 		crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)];
174 #endif
175 	}
176 	*crcptr = crc;
177 }
178 
179 /*
180  * Lookup  the crc-ccitt for a byte stream
181  */
182 
183 static void
184 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len)
185 {
186 	uint16_t crc;
187 	int i;
188 
189 	crc = *crcptr;
190 	for (i = 0; i < len; i++) {
191 
192 #ifdef _BIG_ENDIAN
193 		crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)];
194 #else
195 		crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)];
196 #endif
197 	}
198 	*crcptr = crc;
199 }
200 
201 /*
202  * Lookup  the crc32c for a 32 bit word stream
203  * Lookup is done fro the 4 bytes in parallel
204  * from the tables computed earlier
205  *
206  */
207 
208 static void
209 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len)
210 {
211 	uint32_t w, crc;
212 	int i;
213 
214 	crc = *crcptr;
215 	for (i = 0; i < len; i++) {
216 		w = crc ^ buf[i];
217 		crc = crc32c_tab[0][w >> 24] ^
218 		    crc32c_tab[1][(w >> 16) & 0xff] ^
219 		    crc32c_tab[2][(w >> 8) & 0xff] ^
220 		    crc32c_tab[3][w & 0xff];
221 	}
222 	*crcptr = crc;
223 }
224 
225 /*
226  * Lookup  the crc-ccitt for a stream of bytes
227  *
228  * Since the parallel lookup version doesn't work yet,
229  * use the byte stream version (lookup crc for a byte
230  * at a time
231  *
232  */
233 
234 uint16_t
235 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len)
236 {
237 	nxge_crc_ccitt_byte(&crc16, buf, len);
238 	return (crc16);
239 }
240 
241 /*
242  * Lookup  the crc32c for a stream of bytes
243  *
244  * Tries to lookup the CRC on 4 byte words
245  * If the buffer is not 4 byte aligned, first compute
246  * with byte lookup until aligned. Then compute crc
247  * for each 4 bytes. If there are bytes left at the end of
248  * the buffer, then perform a byte lookup for the remaining bytes
249  *
250  *
251  */
252 
253 uint32_t
254 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len)
255 {
256 	int rem;
257 
258 	rem = 4 - ((uintptr_t)buf) & 3;
259 	if (rem != 0) {
260 		if (len < rem) {
261 			rem = len;
262 		}
263 		nxge_crc32c_byte(&crc32, buf, rem);
264 		buf = buf + rem;
265 		len = len - rem;
266 	}
267 	if (len > 3) {
268 		nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4);
269 	}
270 	rem = len & 3;
271 	if (rem != 0) {
272 		nxge_crc32c_byte(&crc32, buf + len - rem, rem);
273 	}
274 	return (crc32);
275 }
276 
277 void
278 nxge_init_h1_table()
279 {
280 	uint32_t crc, bit, byte, index;
281 
282 	for (index = 0; index < 256; index++) {
283 		crc = index << 24;
284 		for (byte = 0; byte < 4; byte++) {
285 			for (bit = 0; bit < 8; bit++) {
286 				crc = ((crc & 0x80000000)) ?
287 				    (crc << 1) ^ CRC_32C_POLY : crc << 1;
288 			}
289 			h1table[byte][index] = crc;
290 		}
291 	}
292 }
293 
294 /*
295  * Reference Neptune H1 computation function
296  *
297  * It is a slightly modified implementation of
298  * CRC-32C implementation
299  */
300 
301 uint32_t
302 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len)
303 {
304 	int bit, byte;
305 	uint32_t crc_h1 = init_value;
306 	uint8_t *buf;
307 
308 	buf = (uint8_t *)flow;
309 	for (byte = 0; byte < len; byte++) {
310 		for (bit = 0; bit < 8; bit++) {
311 			crc_h1 = (((crc_h1 >> 24) & 0x80) ^
312 			    ((buf[byte] << bit) & 0x80)) ?
313 			    (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1;
314 		}
315 	}
316 
317 	return (crc_h1);
318 }
319 
320 /*
321  * table based implementation
322  * uses 4 four tables in parallel
323  * 1 for each byte of a 32 bit word
324  *
325  * This is the default h1 computing function
326  *
327  */
328 
329 uint32_t
330 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length)
331 {
332 	uint32_t w, fw, i, crch1 = crcin;
333 	uint32_t *buf;
334 
335 	buf = (uint32_t *)flow;
336 
337 	for (i = 0; i < length / 4; i++) {
338 #ifdef _BIG_ENDIAN
339 		fw = buf[i];
340 #else
341 		fw = flip32(buf[i]);
342 		fw = buf[i];
343 #endif
344 		w = crch1 ^ fw;
345 		crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^
346 		    h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff];
347 	}
348 	return (crch1);
349 }
350 
351 /*
352  * table based implementation
353  * uses a single table and computes h1 for a byte
354  * at a time.
355  *
356  */
357 
358 uint32_t
359 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length)
360 {
361 
362 	uint32_t i, crch1, tmp = crcin;
363 	uint8_t *buf;
364 
365 	buf = (uint8_t *)flow;
366 
367 	tmp = crcin;
368 	crch1 = 0;
369 	for (i = 0; i < length; i++) {
370 		crch1 = COMPUTE_H1_BYTE(tmp, buf[i]);
371 		tmp = crch1;
372 	}
373 
374 	return (crch1);
375 }
376