1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 #pragma ident "%Z%%M% %I% %E% SMI" 26 27 #include <sys/types.h> 28 #include <nxge_fflp_hash.h> 29 30 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len); 31 32 /* 33 * The crc32c algorithms are taken from sctp_crc32 implementation 34 * common/inet/sctp_crc32.{c,h} 35 * 36 */ 37 38 /* 39 * Fast CRC32C calculation algorithm. The basic idea is to look at it 40 * four bytes (one word) at a time, using four tables. The 41 * standard algorithm in RFC 3309 uses one table. 42 */ 43 44 /* 45 * SCTP uses reflected/reverse polynomial CRC32 with generating 46 * polynomial 0x1EDC6F41L 47 */ 48 #define SCTP_POLY 0x1EDC6F41L 49 50 /* CRC-CCITT Polynomial */ 51 #define CRC_CCITT_POLY 0x1021 52 53 /* The four CRC32c tables. */ 54 static uint32_t crc32c_tab[4][256]; 55 56 /* The four CRC-CCITT tables. */ 57 static uint16_t crc_ccitt_tab[4][256]; 58 59 /* the four tables for H1 Computation */ 60 static uint32_t h1table[4][256]; 61 62 #define CRC_32C_POLY 0x1EDC6F41L 63 64 #define COMPUTE_H1_BYTE(crc, data) \ 65 (crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff]) 66 67 static uint32_t 68 reflect_32(uint32_t b) 69 { 70 int i; 71 uint32_t rw = 0; 72 73 for (i = 0; i < 32; i++) { 74 if (b & 1) { 75 rw |= 1 << (31 - i); 76 } 77 b >>= 1; 78 } 79 return (rw); 80 } 81 82 static uint32_t 83 flip32(uint32_t w) 84 { 85 return (((w >> 24) | ((w >> 8) & 0xff00) | 86 ((w << 8) & 0xff0000) | (w << 24))); 87 } 88 89 /* 90 * reference crc-ccitt implementation 91 */ 92 93 uint16_t 94 crc_ccitt(uint16_t crcin, uint8_t data) 95 { 96 uint16_t mcrc, crc = 0, bits = 0; 97 98 mcrc = (((crcin >> 8) ^ data) & 0xff) << 8; 99 for (bits = 0; bits < 8; bits++) { 100 crc = ((crc ^ mcrc) & 0x8000) ? 101 (crc << 1) ^ CRC_CCITT_POLY : 102 crc << 1; 103 mcrc <<= 1; 104 } 105 return ((crcin << 8) ^ crc); 106 } 107 108 /* 109 * Initialize the crc32c tables. 110 */ 111 112 void 113 nxge_crc32c_init(void) 114 { 115 uint32_t index, bit, byte, crc; 116 117 for (index = 0; index < 256; index++) { 118 crc = reflect_32(index); 119 for (byte = 0; byte < 4; byte++) { 120 for (bit = 0; bit < 8; bit++) { 121 crc = (crc & 0x80000000) ? 122 (crc << 1) ^ SCTP_POLY : crc << 1; 123 } 124 #ifdef _BIG_ENDIAN 125 crc32c_tab[3 - byte][index] = flip32(reflect_32(crc)); 126 #else 127 crc32c_tab[byte][index] = reflect_32(crc); 128 #endif 129 } 130 } 131 } 132 133 /* 134 * Initialize the crc-ccitt tables. 135 */ 136 137 void 138 nxge_crc_ccitt_init(void) 139 { 140 uint16_t crc; 141 uint16_t index, bit, byte; 142 143 for (index = 0; index < 256; index++) { 144 crc = index << 8; 145 for (byte = 0; byte < 4; byte++) { 146 for (bit = 0; bit < 8; bit++) { 147 crc = (crc & 0x8000) ? 148 (crc << 1) ^ CRC_CCITT_POLY : crc << 1; 149 } 150 #ifdef _BIG_ENDIAN 151 crc_ccitt_tab[3 - byte][index] = crc; 152 #else 153 crc_ccitt_tab[byte][index] = crc; 154 #endif 155 } 156 } 157 } 158 159 /* 160 * Lookup the crc32c for a byte stream 161 */ 162 163 static void 164 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len) 165 { 166 uint32_t crc; 167 int i; 168 169 crc = *crcptr; 170 for (i = 0; i < len; i++) { 171 #ifdef _BIG_ENDIAN 172 crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)]; 173 #else 174 crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)]; 175 #endif 176 } 177 *crcptr = crc; 178 } 179 180 /* 181 * Lookup the crc-ccitt for a byte stream 182 */ 183 184 static void 185 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len) 186 { 187 uint16_t crc; 188 int i; 189 190 crc = *crcptr; 191 for (i = 0; i < len; i++) { 192 193 #ifdef _BIG_ENDIAN 194 crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)]; 195 #else 196 crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)]; 197 #endif 198 } 199 *crcptr = crc; 200 } 201 202 /* 203 * Lookup the crc32c for a 32 bit word stream 204 * Lookup is done fro the 4 bytes in parallel 205 * from the tables computed earlier 206 * 207 */ 208 209 static void 210 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len) 211 { 212 uint32_t w, crc; 213 int i; 214 215 crc = *crcptr; 216 for (i = 0; i < len; i++) { 217 w = crc ^ buf[i]; 218 crc = crc32c_tab[0][w >> 24] ^ 219 crc32c_tab[1][(w >> 16) & 0xff] ^ 220 crc32c_tab[2][(w >> 8) & 0xff] ^ 221 crc32c_tab[3][w & 0xff]; 222 } 223 *crcptr = crc; 224 } 225 226 /* 227 * Lookup the crc-ccitt for a stream of bytes 228 * 229 * Since the parallel lookup version doesn't work yet, 230 * use the byte stream version (lookup crc for a byte 231 * at a time 232 * 233 */ 234 235 uint16_t 236 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len) 237 { 238 nxge_crc_ccitt_byte(&crc16, buf, len); 239 return (crc16); 240 } 241 242 /* 243 * Lookup the crc32c for a stream of bytes 244 * 245 * Tries to lookup the CRC on 4 byte words 246 * If the buffer is not 4 byte aligned, first compute 247 * with byte lookup until aligned. Then compute crc 248 * for each 4 bytes. If there are bytes left at the end of 249 * the buffer, then perform a byte lookup for the remaining bytes 250 * 251 * 252 */ 253 254 uint32_t 255 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len) 256 { 257 int rem; 258 259 rem = 4 - ((uintptr_t)buf) & 3; 260 if (rem != 0) { 261 if (len < rem) { 262 rem = len; 263 } 264 nxge_crc32c_byte(&crc32, buf, rem); 265 buf = buf + rem; 266 len = len - rem; 267 } 268 if (len > 3) { 269 nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4); 270 } 271 rem = len & 3; 272 if (rem != 0) { 273 nxge_crc32c_byte(&crc32, buf + len - rem, rem); 274 } 275 return (crc32); 276 } 277 278 void 279 nxge_init_h1_table() 280 { 281 uint32_t crc, bit, byte, index; 282 283 for (index = 0; index < 256; index++) { 284 crc = index << 24; 285 for (byte = 0; byte < 4; byte++) { 286 for (bit = 0; bit < 8; bit++) { 287 crc = ((crc & 0x80000000)) ? 288 (crc << 1) ^ CRC_32C_POLY : crc << 1; 289 } 290 h1table[byte][index] = crc; 291 } 292 } 293 } 294 295 /* 296 * Reference Neptune H1 computation function 297 * 298 * It is a slightly modified implementation of 299 * CRC-32C implementation 300 */ 301 302 uint32_t 303 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len) 304 { 305 int bit, byte; 306 uint32_t crc_h1 = init_value; 307 uint8_t *buf; 308 309 buf = (uint8_t *)flow; 310 for (byte = 0; byte < len; byte++) { 311 for (bit = 0; bit < 8; bit++) { 312 crc_h1 = (((crc_h1 >> 24) & 0x80) ^ 313 ((buf[byte] << bit) & 0x80)) ? 314 (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1; 315 } 316 } 317 318 return (crc_h1); 319 } 320 321 /* 322 * table based implementation 323 * uses 4 four tables in parallel 324 * 1 for each byte of a 32 bit word 325 * 326 * This is the default h1 computing function 327 * 328 */ 329 330 uint32_t 331 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length) 332 { 333 uint32_t w, fw, i, crch1 = crcin; 334 uint32_t *buf; 335 336 buf = (uint32_t *)flow; 337 338 for (i = 0; i < length / 4; i++) { 339 #ifdef _BIG_ENDIAN 340 fw = buf[i]; 341 #else 342 fw = flip32(buf[i]); 343 fw = buf[i]; 344 #endif 345 w = crch1 ^ fw; 346 crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^ 347 h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff]; 348 } 349 return (crch1); 350 } 351 352 /* 353 * table based implementation 354 * uses a single table and computes h1 for a byte 355 * at a time. 356 * 357 */ 358 359 uint32_t 360 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length) 361 { 362 363 uint32_t i, crch1, tmp = crcin; 364 uint8_t *buf; 365 366 buf = (uint8_t *)flow; 367 368 tmp = crcin; 369 for (i = 0; i < length; i++) { 370 crch1 = COMPUTE_H1_BYTE(tmp, buf[i]); 371 tmp = crch1; 372 } 373 374 return (crch1); 375 } 376