xref: /linux/lib/842/842_compress.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * 842 Software Compression
3  *
4  * Copyright (C) 2015 Dan Streetman, IBM Corp
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * See 842.h for details of the 842 compressed format.
17  */
18 
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #define MODULE_NAME "842_compress"
21 
22 #include <linux/hashtable.h>
23 
24 #include "842.h"
25 #include "842_debugfs.h"
26 
27 #define SW842_HASHTABLE8_BITS	(10)
28 #define SW842_HASHTABLE4_BITS	(11)
29 #define SW842_HASHTABLE2_BITS	(10)
30 
31 /* By default, we allow compressing input buffers of any length, but we must
32  * use the non-standard "short data" template so the decompressor can correctly
33  * reproduce the uncompressed data buffer at the right length.  However the
34  * hardware 842 compressor will not recognize the "short data" template, and
35  * will fail to decompress any compressed buffer containing it (I have no idea
36  * why anyone would want to use software to compress and hardware to decompress
37  * but that's beside the point).  This parameter forces the compression
38  * function to simply reject any input buffer that isn't a multiple of 8 bytes
39  * long, instead of using the "short data" template, so that all compressed
40  * buffers produced by this function will be decompressable by the 842 hardware
41  * decompressor.  Unless you have a specific need for that, leave this disabled
42  * so that any length buffer can be compressed.
43  */
44 static bool sw842_strict;
45 module_param_named(strict, sw842_strict, bool, 0644);
46 
47 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
48 	{ I8, N0, N0, N0, 0x19 }, /* 8 */
49 	{ I4, I4, N0, N0, 0x18 }, /* 18 */
50 	{ I4, I2, I2, N0, 0x17 }, /* 25 */
51 	{ I2, I2, I4, N0, 0x13 }, /* 25 */
52 	{ I2, I2, I2, I2, 0x12 }, /* 32 */
53 	{ I4, I2, D2, N0, 0x16 }, /* 33 */
54 	{ I4, D2, I2, N0, 0x15 }, /* 33 */
55 	{ I2, D2, I4, N0, 0x0e }, /* 33 */
56 	{ D2, I2, I4, N0, 0x09 }, /* 33 */
57 	{ I2, I2, I2, D2, 0x11 }, /* 40 */
58 	{ I2, I2, D2, I2, 0x10 }, /* 40 */
59 	{ I2, D2, I2, I2, 0x0d }, /* 40 */
60 	{ D2, I2, I2, I2, 0x08 }, /* 40 */
61 	{ I4, D4, N0, N0, 0x14 }, /* 41 */
62 	{ D4, I4, N0, N0, 0x04 }, /* 41 */
63 	{ I2, I2, D4, N0, 0x0f }, /* 48 */
64 	{ I2, D2, I2, D2, 0x0c }, /* 48 */
65 	{ I2, D4, I2, N0, 0x0b }, /* 48 */
66 	{ D2, I2, I2, D2, 0x07 }, /* 48 */
67 	{ D2, I2, D2, I2, 0x06 }, /* 48 */
68 	{ D4, I2, I2, N0, 0x03 }, /* 48 */
69 	{ I2, D2, D4, N0, 0x0a }, /* 56 */
70 	{ D2, I2, D4, N0, 0x05 }, /* 56 */
71 	{ D4, I2, D2, N0, 0x02 }, /* 56 */
72 	{ D4, D2, I2, N0, 0x01 }, /* 56 */
73 	{ D8, N0, N0, N0, 0x00 }, /* 64 */
74 };
75 
76 struct sw842_hlist_node8 {
77 	struct hlist_node node;
78 	u64 data;
79 	u8 index;
80 };
81 
82 struct sw842_hlist_node4 {
83 	struct hlist_node node;
84 	u32 data;
85 	u16 index;
86 };
87 
88 struct sw842_hlist_node2 {
89 	struct hlist_node node;
90 	u16 data;
91 	u8 index;
92 };
93 
94 #define INDEX_NOT_FOUND		(-1)
95 #define INDEX_NOT_CHECKED	(-2)
96 
97 struct sw842_param {
98 	u8 *in;
99 	u8 *instart;
100 	u64 ilen;
101 	u8 *out;
102 	u64 olen;
103 	u8 bit;
104 	u64 data8[1];
105 	u32 data4[2];
106 	u16 data2[4];
107 	int index8[1];
108 	int index4[2];
109 	int index2[4];
110 	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
111 	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
112 	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
113 	struct sw842_hlist_node8 node8[1 << I8_BITS];
114 	struct sw842_hlist_node4 node4[1 << I4_BITS];
115 	struct sw842_hlist_node2 node2[1 << I2_BITS];
116 };
117 
118 #define get_input_data(p, o, b)						\
119 	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
120 
121 #define init_hashtable_nodes(p, b)	do {			\
122 	int _i;							\
123 	hash_init((p)->htable##b);				\
124 	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
125 		(p)->node##b[_i].index = _i;			\
126 		(p)->node##b[_i].data = 0;			\
127 		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
128 	}							\
129 } while (0)
130 
131 #define find_index(p, b, n)	({					\
132 	struct sw842_hlist_node##b *_n;					\
133 	p->index##b[n] = INDEX_NOT_FOUND;				\
134 	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
135 		if (p->data##b[n] == _n->data) {			\
136 			p->index##b[n] = _n->index;			\
137 			break;						\
138 		}							\
139 	}								\
140 	p->index##b[n] >= 0;						\
141 })
142 
143 #define check_index(p, b, n)			\
144 	((p)->index##b[n] == INDEX_NOT_CHECKED	\
145 	 ? find_index(p, b, n)			\
146 	 : (p)->index##b[n] >= 0)
147 
148 #define replace_hash(p, b, i, d)	do {				\
149 	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
150 	hash_del(&_n->node);						\
151 	_n->data = (p)->data##b[d];					\
152 	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
153 		 (unsigned int)_n->index,				\
154 		 (unsigned int)((p)->in - (p)->instart),		\
155 		 (unsigned long)_n->data);				\
156 	hash_add((p)->htable##b, &_n->node, _n->data);			\
157 } while (0)
158 
159 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
160 
161 static int add_bits(struct sw842_param *p, u64 d, u8 n);
162 
163 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
164 {
165 	int ret;
166 
167 	if (n <= s)
168 		return -EINVAL;
169 
170 	ret = add_bits(p, d >> s, n - s);
171 	if (ret)
172 		return ret;
173 	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
174 }
175 
176 static int add_bits(struct sw842_param *p, u64 d, u8 n)
177 {
178 	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
179 	u64 o;
180 	u8 *out = p->out;
181 
182 	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
183 
184 	if (n > 64)
185 		return -EINVAL;
186 
187 	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
188 	 * or if we're at the end of the output buffer and would write past end
189 	 */
190 	if (bits > 64)
191 		return __split_add_bits(p, d, n, 32);
192 	else if (p->olen < 8 && bits > 32 && bits <= 56)
193 		return __split_add_bits(p, d, n, 16);
194 	else if (p->olen < 4 && bits > 16 && bits <= 24)
195 		return __split_add_bits(p, d, n, 8);
196 
197 	if (DIV_ROUND_UP(bits, 8) > p->olen)
198 		return -ENOSPC;
199 
200 	o = *out & bmask[b];
201 	d <<= s;
202 
203 	if (bits <= 8)
204 		*out = o | d;
205 	else if (bits <= 16)
206 		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
207 	else if (bits <= 24)
208 		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
209 	else if (bits <= 32)
210 		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
211 	else if (bits <= 40)
212 		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
213 	else if (bits <= 48)
214 		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
215 	else if (bits <= 56)
216 		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
217 	else
218 		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
219 
220 	p->bit += n;
221 
222 	if (p->bit > 7) {
223 		p->out += p->bit / 8;
224 		p->olen -= p->bit / 8;
225 		p->bit %= 8;
226 	}
227 
228 	return 0;
229 }
230 
231 static int add_template(struct sw842_param *p, u8 c)
232 {
233 	int ret, i, b = 0;
234 	u8 *t = comp_ops[c];
235 	bool inv = false;
236 
237 	if (c >= OPS_MAX)
238 		return -EINVAL;
239 
240 	pr_debug("template %x\n", t[4]);
241 
242 	ret = add_bits(p, t[4], OP_BITS);
243 	if (ret)
244 		return ret;
245 
246 	for (i = 0; i < 4; i++) {
247 		pr_debug("op %x\n", t[i]);
248 
249 		switch (t[i] & OP_AMOUNT) {
250 		case OP_AMOUNT_8:
251 			if (b)
252 				inv = true;
253 			else if (t[i] & OP_ACTION_INDEX)
254 				ret = add_bits(p, p->index8[0], I8_BITS);
255 			else if (t[i] & OP_ACTION_DATA)
256 				ret = add_bits(p, p->data8[0], 64);
257 			else
258 				inv = true;
259 			break;
260 		case OP_AMOUNT_4:
261 			if (b == 2 && t[i] & OP_ACTION_DATA)
262 				ret = add_bits(p, get_input_data(p, 2, 32), 32);
263 			else if (b != 0 && b != 4)
264 				inv = true;
265 			else if (t[i] & OP_ACTION_INDEX)
266 				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
267 			else if (t[i] & OP_ACTION_DATA)
268 				ret = add_bits(p, p->data4[b >> 2], 32);
269 			else
270 				inv = true;
271 			break;
272 		case OP_AMOUNT_2:
273 			if (b != 0 && b != 2 && b != 4 && b != 6)
274 				inv = true;
275 			if (t[i] & OP_ACTION_INDEX)
276 				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
277 			else if (t[i] & OP_ACTION_DATA)
278 				ret = add_bits(p, p->data2[b >> 1], 16);
279 			else
280 				inv = true;
281 			break;
282 		case OP_AMOUNT_0:
283 			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
284 			break;
285 		default:
286 			inv = true;
287 			break;
288 		}
289 
290 		if (ret)
291 			return ret;
292 
293 		if (inv) {
294 			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
295 			       c, i, t[0], t[1], t[2], t[3]);
296 			return -EINVAL;
297 		}
298 
299 		b += t[i] & OP_AMOUNT;
300 	}
301 
302 	if (b != 8) {
303 		pr_err("Invalid template %x len %x : %x %x %x %x\n",
304 		       c, b, t[0], t[1], t[2], t[3]);
305 		return -EINVAL;
306 	}
307 
308 	if (sw842_template_counts)
309 		atomic_inc(&template_count[t[4]]);
310 
311 	return 0;
312 }
313 
314 static int add_repeat_template(struct sw842_param *p, u8 r)
315 {
316 	int ret;
317 
318 	/* repeat param is 0-based */
319 	if (!r || --r > REPEAT_BITS_MAX)
320 		return -EINVAL;
321 
322 	ret = add_bits(p, OP_REPEAT, OP_BITS);
323 	if (ret)
324 		return ret;
325 
326 	ret = add_bits(p, r, REPEAT_BITS);
327 	if (ret)
328 		return ret;
329 
330 	if (sw842_template_counts)
331 		atomic_inc(&template_repeat_count);
332 
333 	return 0;
334 }
335 
336 static int add_short_data_template(struct sw842_param *p, u8 b)
337 {
338 	int ret, i;
339 
340 	if (!b || b > SHORT_DATA_BITS_MAX)
341 		return -EINVAL;
342 
343 	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
344 	if (ret)
345 		return ret;
346 
347 	ret = add_bits(p, b, SHORT_DATA_BITS);
348 	if (ret)
349 		return ret;
350 
351 	for (i = 0; i < b; i++) {
352 		ret = add_bits(p, p->in[i], 8);
353 		if (ret)
354 			return ret;
355 	}
356 
357 	if (sw842_template_counts)
358 		atomic_inc(&template_short_data_count);
359 
360 	return 0;
361 }
362 
363 static int add_zeros_template(struct sw842_param *p)
364 {
365 	int ret = add_bits(p, OP_ZEROS, OP_BITS);
366 
367 	if (ret)
368 		return ret;
369 
370 	if (sw842_template_counts)
371 		atomic_inc(&template_zeros_count);
372 
373 	return 0;
374 }
375 
376 static int add_end_template(struct sw842_param *p)
377 {
378 	int ret = add_bits(p, OP_END, OP_BITS);
379 
380 	if (ret)
381 		return ret;
382 
383 	if (sw842_template_counts)
384 		atomic_inc(&template_end_count);
385 
386 	return 0;
387 }
388 
389 static bool check_template(struct sw842_param *p, u8 c)
390 {
391 	u8 *t = comp_ops[c];
392 	int i, match, b = 0;
393 
394 	if (c >= OPS_MAX)
395 		return false;
396 
397 	for (i = 0; i < 4; i++) {
398 		if (t[i] & OP_ACTION_INDEX) {
399 			if (t[i] & OP_AMOUNT_2)
400 				match = check_index(p, 2, b >> 1);
401 			else if (t[i] & OP_AMOUNT_4)
402 				match = check_index(p, 4, b >> 2);
403 			else if (t[i] & OP_AMOUNT_8)
404 				match = check_index(p, 8, 0);
405 			else
406 				return false;
407 			if (!match)
408 				return false;
409 		}
410 
411 		b += t[i] & OP_AMOUNT;
412 	}
413 
414 	return true;
415 }
416 
417 static void get_next_data(struct sw842_param *p)
418 {
419 	p->data8[0] = get_input_data(p, 0, 64);
420 	p->data4[0] = get_input_data(p, 0, 32);
421 	p->data4[1] = get_input_data(p, 4, 32);
422 	p->data2[0] = get_input_data(p, 0, 16);
423 	p->data2[1] = get_input_data(p, 2, 16);
424 	p->data2[2] = get_input_data(p, 4, 16);
425 	p->data2[3] = get_input_data(p, 6, 16);
426 }
427 
428 /* update the hashtable entries.
429  * only call this after finding/adding the current template
430  * the dataN fields for the current 8 byte block must be already updated
431  */
432 static void update_hashtables(struct sw842_param *p)
433 {
434 	u64 pos = p->in - p->instart;
435 	u64 n8 = (pos >> 3) % (1 << I8_BITS);
436 	u64 n4 = (pos >> 2) % (1 << I4_BITS);
437 	u64 n2 = (pos >> 1) % (1 << I2_BITS);
438 
439 	replace_hash(p, 8, n8, 0);
440 	replace_hash(p, 4, n4, 0);
441 	replace_hash(p, 4, n4, 1);
442 	replace_hash(p, 2, n2, 0);
443 	replace_hash(p, 2, n2, 1);
444 	replace_hash(p, 2, n2, 2);
445 	replace_hash(p, 2, n2, 3);
446 }
447 
448 /* find the next template to use, and add it
449  * the p->dataN fields must already be set for the current 8 byte block
450  */
451 static int process_next(struct sw842_param *p)
452 {
453 	int ret, i;
454 
455 	p->index8[0] = INDEX_NOT_CHECKED;
456 	p->index4[0] = INDEX_NOT_CHECKED;
457 	p->index4[1] = INDEX_NOT_CHECKED;
458 	p->index2[0] = INDEX_NOT_CHECKED;
459 	p->index2[1] = INDEX_NOT_CHECKED;
460 	p->index2[2] = INDEX_NOT_CHECKED;
461 	p->index2[3] = INDEX_NOT_CHECKED;
462 
463 	/* check up to OPS_MAX - 1; last op is our fallback */
464 	for (i = 0; i < OPS_MAX - 1; i++) {
465 		if (check_template(p, i))
466 			break;
467 	}
468 
469 	ret = add_template(p, i);
470 	if (ret)
471 		return ret;
472 
473 	return 0;
474 }
475 
476 /**
477  * sw842_compress
478  *
479  * Compress the uncompressed buffer of length @ilen at @in to the output buffer
480  * @out, using no more than @olen bytes, using the 842 compression format.
481  *
482  * Returns: 0 on success, error on failure.  The @olen parameter
483  * will contain the number of output bytes written on success, or
484  * 0 on error.
485  */
486 int sw842_compress(const u8 *in, unsigned int ilen,
487 		   u8 *out, unsigned int *olen, void *wmem)
488 {
489 	struct sw842_param *p = (struct sw842_param *)wmem;
490 	int ret;
491 	u64 last, next, pad, total;
492 	u8 repeat_count = 0;
493 	u32 crc;
494 
495 	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
496 
497 	init_hashtable_nodes(p, 8);
498 	init_hashtable_nodes(p, 4);
499 	init_hashtable_nodes(p, 2);
500 
501 	p->in = (u8 *)in;
502 	p->instart = p->in;
503 	p->ilen = ilen;
504 	p->out = out;
505 	p->olen = *olen;
506 	p->bit = 0;
507 
508 	total = p->olen;
509 
510 	*olen = 0;
511 
512 	/* if using strict mode, we can only compress a multiple of 8 */
513 	if (sw842_strict && (ilen % 8)) {
514 		pr_err("Using strict mode, can't compress len %d\n", ilen);
515 		return -EINVAL;
516 	}
517 
518 	/* let's compress at least 8 bytes, mkay? */
519 	if (unlikely(ilen < 8))
520 		goto skip_comp;
521 
522 	/* make initial 'last' different so we don't match the first time */
523 	last = ~get_unaligned((u64 *)p->in);
524 
525 	while (p->ilen > 7) {
526 		next = get_unaligned((u64 *)p->in);
527 
528 		/* must get the next data, as we need to update the hashtable
529 		 * entries with the new data every time
530 		 */
531 		get_next_data(p);
532 
533 		/* we don't care about endianness in last or next;
534 		 * we're just comparing 8 bytes to another 8 bytes,
535 		 * they're both the same endianness
536 		 */
537 		if (next == last) {
538 			/* repeat count bits are 0-based, so we stop at +1 */
539 			if (++repeat_count <= REPEAT_BITS_MAX)
540 				goto repeat;
541 		}
542 		if (repeat_count) {
543 			ret = add_repeat_template(p, repeat_count);
544 			repeat_count = 0;
545 			if (next == last) /* reached max repeat bits */
546 				goto repeat;
547 		}
548 
549 		if (next == 0)
550 			ret = add_zeros_template(p);
551 		else
552 			ret = process_next(p);
553 
554 		if (ret)
555 			return ret;
556 
557 repeat:
558 		last = next;
559 		update_hashtables(p);
560 		p->in += 8;
561 		p->ilen -= 8;
562 	}
563 
564 	if (repeat_count) {
565 		ret = add_repeat_template(p, repeat_count);
566 		if (ret)
567 			return ret;
568 	}
569 
570 skip_comp:
571 	if (p->ilen > 0) {
572 		ret = add_short_data_template(p, p->ilen);
573 		if (ret)
574 			return ret;
575 
576 		p->in += p->ilen;
577 		p->ilen = 0;
578 	}
579 
580 	ret = add_end_template(p);
581 	if (ret)
582 		return ret;
583 
584 	/*
585 	 * crc(0:31) is appended to target data starting with the next
586 	 * bit after End of stream template.
587 	 * nx842 calculates CRC for data in big-endian format. So doing
588 	 * same here so that sw842 decompression can be used for both
589 	 * compressed data.
590 	 */
591 	crc = crc32_be(0, in, ilen);
592 	ret = add_bits(p, crc, CRC_BITS);
593 	if (ret)
594 		return ret;
595 
596 	if (p->bit) {
597 		p->out++;
598 		p->olen--;
599 		p->bit = 0;
600 	}
601 
602 	/* pad compressed length to multiple of 8 */
603 	pad = (8 - ((total - p->olen) % 8)) % 8;
604 	if (pad) {
605 		if (pad > p->olen) /* we were so close! */
606 			return -ENOSPC;
607 		memset(p->out, 0, pad);
608 		p->out += pad;
609 		p->olen -= pad;
610 	}
611 
612 	if (unlikely((total - p->olen) > UINT_MAX))
613 		return -ENOSPC;
614 
615 	*olen = total - p->olen;
616 
617 	return 0;
618 }
619 EXPORT_SYMBOL_GPL(sw842_compress);
620 
621 static int __init sw842_init(void)
622 {
623 	if (sw842_template_counts)
624 		sw842_debugfs_create();
625 
626 	return 0;
627 }
628 module_init(sw842_init);
629 
630 static void __exit sw842_exit(void)
631 {
632 	if (sw842_template_counts)
633 		sw842_debugfs_remove();
634 }
635 module_exit(sw842_exit);
636 
637 MODULE_LICENSE("GPL");
638 MODULE_DESCRIPTION("Software 842 Compressor");
639 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
640