1 /* 2 * 842 Software Compression 3 * 4 * Copyright (C) 2015 Dan Streetman, IBM Corp 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * See 842.h for details of the 842 compressed format. 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 #define MODULE_NAME "842_compress" 21 22 #include <linux/hashtable.h> 23 24 #include "842.h" 25 #include "842_debugfs.h" 26 27 #define SW842_HASHTABLE8_BITS (10) 28 #define SW842_HASHTABLE4_BITS (11) 29 #define SW842_HASHTABLE2_BITS (10) 30 31 /* By default, we allow compressing input buffers of any length, but we must 32 * use the non-standard "short data" template so the decompressor can correctly 33 * reproduce the uncompressed data buffer at the right length. However the 34 * hardware 842 compressor will not recognize the "short data" template, and 35 * will fail to decompress any compressed buffer containing it (I have no idea 36 * why anyone would want to use software to compress and hardware to decompress 37 * but that's beside the point). This parameter forces the compression 38 * function to simply reject any input buffer that isn't a multiple of 8 bytes 39 * long, instead of using the "short data" template, so that all compressed 40 * buffers produced by this function will be decompressable by the 842 hardware 41 * decompressor. Unless you have a specific need for that, leave this disabled 42 * so that any length buffer can be compressed. 43 */ 44 static bool sw842_strict; 45 module_param_named(strict, sw842_strict, bool, 0644); 46 47 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */ 48 { I8, N0, N0, N0, 0x19 }, /* 8 */ 49 { I4, I4, N0, N0, 0x18 }, /* 18 */ 50 { I4, I2, I2, N0, 0x17 }, /* 25 */ 51 { I2, I2, I4, N0, 0x13 }, /* 25 */ 52 { I2, I2, I2, I2, 0x12 }, /* 32 */ 53 { I4, I2, D2, N0, 0x16 }, /* 33 */ 54 { I4, D2, I2, N0, 0x15 }, /* 33 */ 55 { I2, D2, I4, N0, 0x0e }, /* 33 */ 56 { D2, I2, I4, N0, 0x09 }, /* 33 */ 57 { I2, I2, I2, D2, 0x11 }, /* 40 */ 58 { I2, I2, D2, I2, 0x10 }, /* 40 */ 59 { I2, D2, I2, I2, 0x0d }, /* 40 */ 60 { D2, I2, I2, I2, 0x08 }, /* 40 */ 61 { I4, D4, N0, N0, 0x14 }, /* 41 */ 62 { D4, I4, N0, N0, 0x04 }, /* 41 */ 63 { I2, I2, D4, N0, 0x0f }, /* 48 */ 64 { I2, D2, I2, D2, 0x0c }, /* 48 */ 65 { I2, D4, I2, N0, 0x0b }, /* 48 */ 66 { D2, I2, I2, D2, 0x07 }, /* 48 */ 67 { D2, I2, D2, I2, 0x06 }, /* 48 */ 68 { D4, I2, I2, N0, 0x03 }, /* 48 */ 69 { I2, D2, D4, N0, 0x0a }, /* 56 */ 70 { D2, I2, D4, N0, 0x05 }, /* 56 */ 71 { D4, I2, D2, N0, 0x02 }, /* 56 */ 72 { D4, D2, I2, N0, 0x01 }, /* 56 */ 73 { D8, N0, N0, N0, 0x00 }, /* 64 */ 74 }; 75 76 struct sw842_hlist_node8 { 77 struct hlist_node node; 78 u64 data; 79 u8 index; 80 }; 81 82 struct sw842_hlist_node4 { 83 struct hlist_node node; 84 u32 data; 85 u16 index; 86 }; 87 88 struct sw842_hlist_node2 { 89 struct hlist_node node; 90 u16 data; 91 u8 index; 92 }; 93 94 #define INDEX_NOT_FOUND (-1) 95 #define INDEX_NOT_CHECKED (-2) 96 97 struct sw842_param { 98 u8 *in; 99 u8 *instart; 100 u64 ilen; 101 u8 *out; 102 u64 olen; 103 u8 bit; 104 u64 data8[1]; 105 u32 data4[2]; 106 u16 data2[4]; 107 int index8[1]; 108 int index4[2]; 109 int index2[4]; 110 DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS); 111 DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS); 112 DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS); 113 struct sw842_hlist_node8 node8[1 << I8_BITS]; 114 struct sw842_hlist_node4 node4[1 << I4_BITS]; 115 struct sw842_hlist_node2 node2[1 << I2_BITS]; 116 }; 117 118 #define get_input_data(p, o, b) \ 119 be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o)))) 120 121 #define init_hashtable_nodes(p, b) do { \ 122 int _i; \ 123 hash_init((p)->htable##b); \ 124 for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) { \ 125 (p)->node##b[_i].index = _i; \ 126 (p)->node##b[_i].data = 0; \ 127 INIT_HLIST_NODE(&(p)->node##b[_i].node); \ 128 } \ 129 } while (0) 130 131 #define find_index(p, b, n) ({ \ 132 struct sw842_hlist_node##b *_n; \ 133 p->index##b[n] = INDEX_NOT_FOUND; \ 134 hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) { \ 135 if (p->data##b[n] == _n->data) { \ 136 p->index##b[n] = _n->index; \ 137 break; \ 138 } \ 139 } \ 140 p->index##b[n] >= 0; \ 141 }) 142 143 #define check_index(p, b, n) \ 144 ((p)->index##b[n] == INDEX_NOT_CHECKED \ 145 ? find_index(p, b, n) \ 146 : (p)->index##b[n] >= 0) 147 148 #define replace_hash(p, b, i, d) do { \ 149 struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)]; \ 150 hash_del(&_n->node); \ 151 _n->data = (p)->data##b[d]; \ 152 pr_debug("add hash index%x %x pos %x data %lx\n", b, \ 153 (unsigned int)_n->index, \ 154 (unsigned int)((p)->in - (p)->instart), \ 155 (unsigned long)_n->data); \ 156 hash_add((p)->htable##b, &_n->node, _n->data); \ 157 } while (0) 158 159 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe }; 160 161 static int add_bits(struct sw842_param *p, u64 d, u8 n); 162 163 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s) 164 { 165 int ret; 166 167 if (n <= s) 168 return -EINVAL; 169 170 ret = add_bits(p, d >> s, n - s); 171 if (ret) 172 return ret; 173 return add_bits(p, d & GENMASK_ULL(s - 1, 0), s); 174 } 175 176 static int add_bits(struct sw842_param *p, u64 d, u8 n) 177 { 178 int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits; 179 u64 o; 180 u8 *out = p->out; 181 182 pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d); 183 184 if (n > 64) 185 return -EINVAL; 186 187 /* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0), 188 * or if we're at the end of the output buffer and would write past end 189 */ 190 if (bits > 64) 191 return __split_add_bits(p, d, n, 32); 192 else if (p->olen < 8 && bits > 32 && bits <= 56) 193 return __split_add_bits(p, d, n, 16); 194 else if (p->olen < 4 && bits > 16 && bits <= 24) 195 return __split_add_bits(p, d, n, 8); 196 197 if (DIV_ROUND_UP(bits, 8) > p->olen) 198 return -ENOSPC; 199 200 o = *out & bmask[b]; 201 d <<= s; 202 203 if (bits <= 8) 204 *out = o | d; 205 else if (bits <= 16) 206 put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out); 207 else if (bits <= 24) 208 put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out); 209 else if (bits <= 32) 210 put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out); 211 else if (bits <= 40) 212 put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out); 213 else if (bits <= 48) 214 put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out); 215 else if (bits <= 56) 216 put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out); 217 else 218 put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out); 219 220 p->bit += n; 221 222 if (p->bit > 7) { 223 p->out += p->bit / 8; 224 p->olen -= p->bit / 8; 225 p->bit %= 8; 226 } 227 228 return 0; 229 } 230 231 static int add_template(struct sw842_param *p, u8 c) 232 { 233 int ret, i, b = 0; 234 u8 *t = comp_ops[c]; 235 bool inv = false; 236 237 if (c >= OPS_MAX) 238 return -EINVAL; 239 240 pr_debug("template %x\n", t[4]); 241 242 ret = add_bits(p, t[4], OP_BITS); 243 if (ret) 244 return ret; 245 246 for (i = 0; i < 4; i++) { 247 pr_debug("op %x\n", t[i]); 248 249 switch (t[i] & OP_AMOUNT) { 250 case OP_AMOUNT_8: 251 if (b) 252 inv = true; 253 else if (t[i] & OP_ACTION_INDEX) 254 ret = add_bits(p, p->index8[0], I8_BITS); 255 else if (t[i] & OP_ACTION_DATA) 256 ret = add_bits(p, p->data8[0], 64); 257 else 258 inv = true; 259 break; 260 case OP_AMOUNT_4: 261 if (b == 2 && t[i] & OP_ACTION_DATA) 262 ret = add_bits(p, get_input_data(p, 2, 32), 32); 263 else if (b != 0 && b != 4) 264 inv = true; 265 else if (t[i] & OP_ACTION_INDEX) 266 ret = add_bits(p, p->index4[b >> 2], I4_BITS); 267 else if (t[i] & OP_ACTION_DATA) 268 ret = add_bits(p, p->data4[b >> 2], 32); 269 else 270 inv = true; 271 break; 272 case OP_AMOUNT_2: 273 if (b != 0 && b != 2 && b != 4 && b != 6) 274 inv = true; 275 if (t[i] & OP_ACTION_INDEX) 276 ret = add_bits(p, p->index2[b >> 1], I2_BITS); 277 else if (t[i] & OP_ACTION_DATA) 278 ret = add_bits(p, p->data2[b >> 1], 16); 279 else 280 inv = true; 281 break; 282 case OP_AMOUNT_0: 283 inv = (b != 8) || !(t[i] & OP_ACTION_NOOP); 284 break; 285 default: 286 inv = true; 287 break; 288 } 289 290 if (ret) 291 return ret; 292 293 if (inv) { 294 pr_err("Invalid templ %x op %d : %x %x %x %x\n", 295 c, i, t[0], t[1], t[2], t[3]); 296 return -EINVAL; 297 } 298 299 b += t[i] & OP_AMOUNT; 300 } 301 302 if (b != 8) { 303 pr_err("Invalid template %x len %x : %x %x %x %x\n", 304 c, b, t[0], t[1], t[2], t[3]); 305 return -EINVAL; 306 } 307 308 if (sw842_template_counts) 309 atomic_inc(&template_count[t[4]]); 310 311 return 0; 312 } 313 314 static int add_repeat_template(struct sw842_param *p, u8 r) 315 { 316 int ret; 317 318 /* repeat param is 0-based */ 319 if (!r || --r > REPEAT_BITS_MAX) 320 return -EINVAL; 321 322 ret = add_bits(p, OP_REPEAT, OP_BITS); 323 if (ret) 324 return ret; 325 326 ret = add_bits(p, r, REPEAT_BITS); 327 if (ret) 328 return ret; 329 330 if (sw842_template_counts) 331 atomic_inc(&template_repeat_count); 332 333 return 0; 334 } 335 336 static int add_short_data_template(struct sw842_param *p, u8 b) 337 { 338 int ret, i; 339 340 if (!b || b > SHORT_DATA_BITS_MAX) 341 return -EINVAL; 342 343 ret = add_bits(p, OP_SHORT_DATA, OP_BITS); 344 if (ret) 345 return ret; 346 347 ret = add_bits(p, b, SHORT_DATA_BITS); 348 if (ret) 349 return ret; 350 351 for (i = 0; i < b; i++) { 352 ret = add_bits(p, p->in[i], 8); 353 if (ret) 354 return ret; 355 } 356 357 if (sw842_template_counts) 358 atomic_inc(&template_short_data_count); 359 360 return 0; 361 } 362 363 static int add_zeros_template(struct sw842_param *p) 364 { 365 int ret = add_bits(p, OP_ZEROS, OP_BITS); 366 367 if (ret) 368 return ret; 369 370 if (sw842_template_counts) 371 atomic_inc(&template_zeros_count); 372 373 return 0; 374 } 375 376 static int add_end_template(struct sw842_param *p) 377 { 378 int ret = add_bits(p, OP_END, OP_BITS); 379 380 if (ret) 381 return ret; 382 383 if (sw842_template_counts) 384 atomic_inc(&template_end_count); 385 386 return 0; 387 } 388 389 static bool check_template(struct sw842_param *p, u8 c) 390 { 391 u8 *t = comp_ops[c]; 392 int i, match, b = 0; 393 394 if (c >= OPS_MAX) 395 return false; 396 397 for (i = 0; i < 4; i++) { 398 if (t[i] & OP_ACTION_INDEX) { 399 if (t[i] & OP_AMOUNT_2) 400 match = check_index(p, 2, b >> 1); 401 else if (t[i] & OP_AMOUNT_4) 402 match = check_index(p, 4, b >> 2); 403 else if (t[i] & OP_AMOUNT_8) 404 match = check_index(p, 8, 0); 405 else 406 return false; 407 if (!match) 408 return false; 409 } 410 411 b += t[i] & OP_AMOUNT; 412 } 413 414 return true; 415 } 416 417 static void get_next_data(struct sw842_param *p) 418 { 419 p->data8[0] = get_input_data(p, 0, 64); 420 p->data4[0] = get_input_data(p, 0, 32); 421 p->data4[1] = get_input_data(p, 4, 32); 422 p->data2[0] = get_input_data(p, 0, 16); 423 p->data2[1] = get_input_data(p, 2, 16); 424 p->data2[2] = get_input_data(p, 4, 16); 425 p->data2[3] = get_input_data(p, 6, 16); 426 } 427 428 /* update the hashtable entries. 429 * only call this after finding/adding the current template 430 * the dataN fields for the current 8 byte block must be already updated 431 */ 432 static void update_hashtables(struct sw842_param *p) 433 { 434 u64 pos = p->in - p->instart; 435 u64 n8 = (pos >> 3) % (1 << I8_BITS); 436 u64 n4 = (pos >> 2) % (1 << I4_BITS); 437 u64 n2 = (pos >> 1) % (1 << I2_BITS); 438 439 replace_hash(p, 8, n8, 0); 440 replace_hash(p, 4, n4, 0); 441 replace_hash(p, 4, n4, 1); 442 replace_hash(p, 2, n2, 0); 443 replace_hash(p, 2, n2, 1); 444 replace_hash(p, 2, n2, 2); 445 replace_hash(p, 2, n2, 3); 446 } 447 448 /* find the next template to use, and add it 449 * the p->dataN fields must already be set for the current 8 byte block 450 */ 451 static int process_next(struct sw842_param *p) 452 { 453 int ret, i; 454 455 p->index8[0] = INDEX_NOT_CHECKED; 456 p->index4[0] = INDEX_NOT_CHECKED; 457 p->index4[1] = INDEX_NOT_CHECKED; 458 p->index2[0] = INDEX_NOT_CHECKED; 459 p->index2[1] = INDEX_NOT_CHECKED; 460 p->index2[2] = INDEX_NOT_CHECKED; 461 p->index2[3] = INDEX_NOT_CHECKED; 462 463 /* check up to OPS_MAX - 1; last op is our fallback */ 464 for (i = 0; i < OPS_MAX - 1; i++) { 465 if (check_template(p, i)) 466 break; 467 } 468 469 ret = add_template(p, i); 470 if (ret) 471 return ret; 472 473 return 0; 474 } 475 476 /** 477 * sw842_compress 478 * 479 * Compress the uncompressed buffer of length @ilen at @in to the output buffer 480 * @out, using no more than @olen bytes, using the 842 compression format. 481 * 482 * Returns: 0 on success, error on failure. The @olen parameter 483 * will contain the number of output bytes written on success, or 484 * 0 on error. 485 */ 486 int sw842_compress(const u8 *in, unsigned int ilen, 487 u8 *out, unsigned int *olen, void *wmem) 488 { 489 struct sw842_param *p = (struct sw842_param *)wmem; 490 int ret; 491 u64 last, next, pad, total; 492 u8 repeat_count = 0; 493 494 BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS); 495 496 init_hashtable_nodes(p, 8); 497 init_hashtable_nodes(p, 4); 498 init_hashtable_nodes(p, 2); 499 500 p->in = (u8 *)in; 501 p->instart = p->in; 502 p->ilen = ilen; 503 p->out = out; 504 p->olen = *olen; 505 p->bit = 0; 506 507 total = p->olen; 508 509 *olen = 0; 510 511 /* if using strict mode, we can only compress a multiple of 8 */ 512 if (sw842_strict && (ilen % 8)) { 513 pr_err("Using strict mode, can't compress len %d\n", ilen); 514 return -EINVAL; 515 } 516 517 /* let's compress at least 8 bytes, mkay? */ 518 if (unlikely(ilen < 8)) 519 goto skip_comp; 520 521 /* make initial 'last' different so we don't match the first time */ 522 last = ~get_unaligned((u64 *)p->in); 523 524 while (p->ilen > 7) { 525 next = get_unaligned((u64 *)p->in); 526 527 /* must get the next data, as we need to update the hashtable 528 * entries with the new data every time 529 */ 530 get_next_data(p); 531 532 /* we don't care about endianness in last or next; 533 * we're just comparing 8 bytes to another 8 bytes, 534 * they're both the same endianness 535 */ 536 if (next == last) { 537 /* repeat count bits are 0-based, so we stop at +1 */ 538 if (++repeat_count <= REPEAT_BITS_MAX) 539 goto repeat; 540 } 541 if (repeat_count) { 542 ret = add_repeat_template(p, repeat_count); 543 repeat_count = 0; 544 if (next == last) /* reached max repeat bits */ 545 goto repeat; 546 } 547 548 if (next == 0) 549 ret = add_zeros_template(p); 550 else 551 ret = process_next(p); 552 553 if (ret) 554 return ret; 555 556 repeat: 557 last = next; 558 update_hashtables(p); 559 p->in += 8; 560 p->ilen -= 8; 561 } 562 563 if (repeat_count) { 564 ret = add_repeat_template(p, repeat_count); 565 if (ret) 566 return ret; 567 } 568 569 skip_comp: 570 if (p->ilen > 0) { 571 ret = add_short_data_template(p, p->ilen); 572 if (ret) 573 return ret; 574 575 p->in += p->ilen; 576 p->ilen = 0; 577 } 578 579 ret = add_end_template(p); 580 if (ret) 581 return ret; 582 583 if (p->bit) { 584 p->out++; 585 p->olen--; 586 p->bit = 0; 587 } 588 589 /* pad compressed length to multiple of 8 */ 590 pad = (8 - ((total - p->olen) % 8)) % 8; 591 if (pad) { 592 if (pad > p->olen) /* we were so close! */ 593 return -ENOSPC; 594 memset(p->out, 0, pad); 595 p->out += pad; 596 p->olen -= pad; 597 } 598 599 if (unlikely((total - p->olen) > UINT_MAX)) 600 return -ENOSPC; 601 602 *olen = total - p->olen; 603 604 return 0; 605 } 606 EXPORT_SYMBOL_GPL(sw842_compress); 607 608 static int __init sw842_init(void) 609 { 610 if (sw842_template_counts) 611 sw842_debugfs_create(); 612 613 return 0; 614 } 615 module_init(sw842_init); 616 617 static void __exit sw842_exit(void) 618 { 619 if (sw842_template_counts) 620 sw842_debugfs_remove(); 621 } 622 module_exit(sw842_exit); 623 624 MODULE_LICENSE("GPL"); 625 MODULE_DESCRIPTION("Software 842 Compressor"); 626 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>"); 627