1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <nxge_fflp_hash.h> 28 29 static void nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len); 30 31 /* 32 * The crc32c algorithms are taken from sctp_crc32 implementation 33 * common/inet/sctp_crc32.{c,h} 34 * 35 */ 36 37 /* 38 * Fast CRC32C calculation algorithm. The basic idea is to look at it 39 * four bytes (one word) at a time, using four tables. The 40 * standard algorithm in RFC 3309 uses one table. 41 */ 42 43 /* 44 * SCTP uses reflected/reverse polynomial CRC32 with generating 45 * polynomial 0x1EDC6F41L 46 */ 47 #define SCTP_POLY 0x1EDC6F41L 48 49 /* CRC-CCITT Polynomial */ 50 #define CRC_CCITT_POLY 0x1021 51 52 /* The four CRC32c tables. */ 53 static uint32_t crc32c_tab[4][256]; 54 55 /* The four CRC-CCITT tables. */ 56 static uint16_t crc_ccitt_tab[4][256]; 57 58 /* the four tables for H1 Computation */ 59 static uint32_t h1table[4][256]; 60 61 #define CRC_32C_POLY 0x1EDC6F41L 62 63 #define COMPUTE_H1_BYTE(crc, data) \ 64 (crc = (crc<<8)^h1table[0][((crc >> 24) ^data) & 0xff]) 65 66 static uint32_t 67 reflect_32(uint32_t b) 68 { 69 int i; 70 uint32_t rw = 0; 71 72 for (i = 0; i < 32; i++) { 73 if (b & 1) { 74 rw |= 1 << (31 - i); 75 } 76 b >>= 1; 77 } 78 return (rw); 79 } 80 81 static uint32_t 82 flip32(uint32_t w) 83 { 84 return (((w >> 24) | ((w >> 8) & 0xff00) | 85 ((w << 8) & 0xff0000) | (w << 24))); 86 } 87 88 /* 89 * reference crc-ccitt implementation 90 */ 91 92 uint16_t 93 crc_ccitt(uint16_t crcin, uint8_t data) 94 { 95 uint16_t mcrc, crc = 0, bits = 0; 96 97 mcrc = (((crcin >> 8) ^ data) & 0xff) << 8; 98 for (bits = 0; bits < 8; bits++) { 99 crc = ((crc ^ mcrc) & 0x8000) ? 100 (crc << 1) ^ CRC_CCITT_POLY : 101 crc << 1; 102 mcrc <<= 1; 103 } 104 return ((crcin << 8) ^ crc); 105 } 106 107 /* 108 * Initialize the crc32c tables. 109 */ 110 111 void 112 nxge_crc32c_init(void) 113 { 114 uint32_t index, bit, byte, crc; 115 116 for (index = 0; index < 256; index++) { 117 crc = reflect_32(index); 118 for (byte = 0; byte < 4; byte++) { 119 for (bit = 0; bit < 8; bit++) { 120 crc = (crc & 0x80000000) ? 121 (crc << 1) ^ SCTP_POLY : crc << 1; 122 } 123 #ifdef _BIG_ENDIAN 124 crc32c_tab[3 - byte][index] = flip32(reflect_32(crc)); 125 #else 126 crc32c_tab[byte][index] = reflect_32(crc); 127 #endif 128 } 129 } 130 } 131 132 /* 133 * Initialize the crc-ccitt tables. 134 */ 135 136 void 137 nxge_crc_ccitt_init(void) 138 { 139 uint16_t crc; 140 uint16_t index, bit, byte; 141 142 for (index = 0; index < 256; index++) { 143 crc = index << 8; 144 for (byte = 0; byte < 4; byte++) { 145 for (bit = 0; bit < 8; bit++) { 146 crc = (crc & 0x8000) ? 147 (crc << 1) ^ CRC_CCITT_POLY : crc << 1; 148 } 149 #ifdef _BIG_ENDIAN 150 crc_ccitt_tab[3 - byte][index] = crc; 151 #else 152 crc_ccitt_tab[byte][index] = crc; 153 #endif 154 } 155 } 156 } 157 158 /* 159 * Lookup the crc32c for a byte stream 160 */ 161 162 static void 163 nxge_crc32c_byte(uint32_t *crcptr, const uint8_t *buf, int len) 164 { 165 uint32_t crc; 166 int i; 167 168 crc = *crcptr; 169 for (i = 0; i < len; i++) { 170 #ifdef _BIG_ENDIAN 171 crc = (crc << 8) ^ crc32c_tab[3][buf[i] ^ (crc >> 24)]; 172 #else 173 crc = (crc >> 8) ^ crc32c_tab[0][buf[i] ^ (crc & 0xff)]; 174 #endif 175 } 176 *crcptr = crc; 177 } 178 179 /* 180 * Lookup the crc-ccitt for a byte stream 181 */ 182 183 static void 184 nxge_crc_ccitt_byte(uint16_t *crcptr, const uint8_t *buf, int len) 185 { 186 uint16_t crc; 187 int i; 188 189 crc = *crcptr; 190 for (i = 0; i < len; i++) { 191 192 #ifdef _BIG_ENDIAN 193 crc = (crc << 8) ^ crc_ccitt_tab[3][buf[i] ^ (crc >> 8)]; 194 #else 195 crc = (crc << 8) ^ crc_ccitt_tab[0][buf[i] ^ (crc >> 8)]; 196 #endif 197 } 198 *crcptr = crc; 199 } 200 201 /* 202 * Lookup the crc32c for a 32 bit word stream 203 * Lookup is done fro the 4 bytes in parallel 204 * from the tables computed earlier 205 * 206 */ 207 208 static void 209 nxge_crc32c_word(uint32_t *crcptr, const uint32_t *buf, int len) 210 { 211 uint32_t w, crc; 212 int i; 213 214 crc = *crcptr; 215 for (i = 0; i < len; i++) { 216 w = crc ^ buf[i]; 217 crc = crc32c_tab[0][w >> 24] ^ 218 crc32c_tab[1][(w >> 16) & 0xff] ^ 219 crc32c_tab[2][(w >> 8) & 0xff] ^ 220 crc32c_tab[3][w & 0xff]; 221 } 222 *crcptr = crc; 223 } 224 225 /* 226 * Lookup the crc-ccitt for a stream of bytes 227 * 228 * Since the parallel lookup version doesn't work yet, 229 * use the byte stream version (lookup crc for a byte 230 * at a time 231 * 232 */ 233 234 uint16_t 235 nxge_crc_ccitt(uint16_t crc16, const uint8_t *buf, int len) 236 { 237 nxge_crc_ccitt_byte(&crc16, buf, len); 238 return (crc16); 239 } 240 241 /* 242 * Lookup the crc32c for a stream of bytes 243 * 244 * Tries to lookup the CRC on 4 byte words 245 * If the buffer is not 4 byte aligned, first compute 246 * with byte lookup until aligned. Then compute crc 247 * for each 4 bytes. If there are bytes left at the end of 248 * the buffer, then perform a byte lookup for the remaining bytes 249 * 250 * 251 */ 252 253 uint32_t 254 nxge_crc32c(uint32_t crc32, const uint8_t *buf, int len) 255 { 256 int rem; 257 258 rem = 4 - ((uintptr_t)buf) & 3; 259 if (rem != 0) { 260 if (len < rem) { 261 rem = len; 262 } 263 nxge_crc32c_byte(&crc32, buf, rem); 264 buf = buf + rem; 265 len = len - rem; 266 } 267 if (len > 3) { 268 nxge_crc32c_word(&crc32, (const uint32_t *) buf, len / 4); 269 } 270 rem = len & 3; 271 if (rem != 0) { 272 nxge_crc32c_byte(&crc32, buf + len - rem, rem); 273 } 274 return (crc32); 275 } 276 277 void 278 nxge_init_h1_table() 279 { 280 uint32_t crc, bit, byte, index; 281 282 for (index = 0; index < 256; index++) { 283 crc = index << 24; 284 for (byte = 0; byte < 4; byte++) { 285 for (bit = 0; bit < 8; bit++) { 286 crc = ((crc & 0x80000000)) ? 287 (crc << 1) ^ CRC_32C_POLY : crc << 1; 288 } 289 h1table[byte][index] = crc; 290 } 291 } 292 } 293 294 /* 295 * Reference Neptune H1 computation function 296 * 297 * It is a slightly modified implementation of 298 * CRC-32C implementation 299 */ 300 301 uint32_t 302 nxge_compute_h1_serial(uint32_t init_value, uint32_t *flow, uint32_t len) 303 { 304 int bit, byte; 305 uint32_t crc_h1 = init_value; 306 uint8_t *buf; 307 308 buf = (uint8_t *)flow; 309 for (byte = 0; byte < len; byte++) { 310 for (bit = 0; bit < 8; bit++) { 311 crc_h1 = (((crc_h1 >> 24) & 0x80) ^ 312 ((buf[byte] << bit) & 0x80)) ? 313 (crc_h1 << 1) ^ CRC_32C_POLY : crc_h1 << 1; 314 } 315 } 316 317 return (crc_h1); 318 } 319 320 /* 321 * table based implementation 322 * uses 4 four tables in parallel 323 * 1 for each byte of a 32 bit word 324 * 325 * This is the default h1 computing function 326 * 327 */ 328 329 uint32_t 330 nxge_compute_h1_table4(uint32_t crcin, uint32_t *flow, uint32_t length) 331 { 332 uint32_t w, fw, i, crch1 = crcin; 333 uint32_t *buf; 334 335 buf = (uint32_t *)flow; 336 337 for (i = 0; i < length / 4; i++) { 338 #ifdef _BIG_ENDIAN 339 fw = buf[i]; 340 #else 341 fw = flip32(buf[i]); 342 fw = buf[i]; 343 #endif 344 w = crch1 ^ fw; 345 crch1 = h1table[3][w >> 24] ^ h1table[2][(w >> 16) & 0xff] ^ 346 h1table[1][(w >> 8) & 0xff] ^ h1table[0][w & 0xff]; 347 } 348 return (crch1); 349 } 350 351 /* 352 * table based implementation 353 * uses a single table and computes h1 for a byte 354 * at a time. 355 * 356 */ 357 358 uint32_t 359 nxge_compute_h1_table1(uint32_t crcin, uint32_t *flow, uint32_t length) 360 { 361 362 uint32_t i, crch1, tmp = crcin; 363 uint8_t *buf; 364 365 buf = (uint8_t *)flow; 366 367 tmp = crcin; 368 crch1 = 0; 369 for (i = 0; i < length; i++) { 370 crch1 = COMPUTE_H1_BYTE(tmp, buf[i]); 371 tmp = crch1; 372 } 373 374 return (crch1); 375 } 376