xref: /linux/fs/ntfs3/lib/xpress_decompress.c (revision 522e010b58379fbe19b38fdef5016bca0c3cf405)
1*522e010bSKonstantin Komarov // SPDX-License-Identifier: GPL-2.0-or-later
2*522e010bSKonstantin Komarov /*
3*522e010bSKonstantin Komarov  * xpress_decompress.c - A decompressor for the XPRESS compression format
4*522e010bSKonstantin Komarov  * (Huffman variant), which can be used in "System Compressed" files.  This is
5*522e010bSKonstantin Komarov  * based on the code from wimlib.
6*522e010bSKonstantin Komarov  *
7*522e010bSKonstantin Komarov  * Copyright (C) 2015 Eric Biggers
8*522e010bSKonstantin Komarov  *
9*522e010bSKonstantin Komarov  * This program is free software: you can redistribute it and/or modify it under
10*522e010bSKonstantin Komarov  * the terms of the GNU General Public License as published by the Free Software
11*522e010bSKonstantin Komarov  * Foundation, either version 2 of the License, or (at your option) any later
12*522e010bSKonstantin Komarov  * version.
13*522e010bSKonstantin Komarov  *
14*522e010bSKonstantin Komarov  * This program is distributed in the hope that it will be useful, but WITHOUT
15*522e010bSKonstantin Komarov  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16*522e010bSKonstantin Komarov  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
17*522e010bSKonstantin Komarov  * details.
18*522e010bSKonstantin Komarov  *
19*522e010bSKonstantin Komarov  * You should have received a copy of the GNU General Public License along with
20*522e010bSKonstantin Komarov  * this program.  If not, see <http://www.gnu.org/licenses/>.
21*522e010bSKonstantin Komarov  */
22*522e010bSKonstantin Komarov 
23*522e010bSKonstantin Komarov #include "decompress_common.h"
24*522e010bSKonstantin Komarov #include "lib.h"
25*522e010bSKonstantin Komarov 
26*522e010bSKonstantin Komarov #define XPRESS_NUM_SYMBOLS	512
27*522e010bSKonstantin Komarov #define XPRESS_MAX_CODEWORD_LEN	15
28*522e010bSKonstantin Komarov #define XPRESS_MIN_MATCH_LEN	3
29*522e010bSKonstantin Komarov 
30*522e010bSKonstantin Komarov /* This value is chosen for fast decompression.  */
31*522e010bSKonstantin Komarov #define XPRESS_TABLEBITS 12
32*522e010bSKonstantin Komarov 
33*522e010bSKonstantin Komarov /* Reusable heap-allocated memory for XPRESS decompression  */
34*522e010bSKonstantin Komarov struct xpress_decompressor {
35*522e010bSKonstantin Komarov 
36*522e010bSKonstantin Komarov 	/* The Huffman decoding table  */
37*522e010bSKonstantin Komarov 	u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
38*522e010bSKonstantin Komarov 
39*522e010bSKonstantin Komarov 	/* An array that maps symbols to codeword lengths  */
40*522e010bSKonstantin Komarov 	u8 lens[XPRESS_NUM_SYMBOLS];
41*522e010bSKonstantin Komarov 
42*522e010bSKonstantin Komarov 	/* Temporary space for make_huffman_decode_table()  */
43*522e010bSKonstantin Komarov 	u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
44*522e010bSKonstantin Komarov 			  XPRESS_NUM_SYMBOLS];
45*522e010bSKonstantin Komarov };
46*522e010bSKonstantin Komarov 
47*522e010bSKonstantin Komarov /*
48*522e010bSKonstantin Komarov  * xpress_allocate_decompressor - Allocate an XPRESS decompressor
49*522e010bSKonstantin Komarov  *
50*522e010bSKonstantin Komarov  * Return the pointer to the decompressor on success, or return NULL and set
51*522e010bSKonstantin Komarov  * errno on failure.
52*522e010bSKonstantin Komarov  */
53*522e010bSKonstantin Komarov struct xpress_decompressor *xpress_allocate_decompressor(void)
54*522e010bSKonstantin Komarov {
55*522e010bSKonstantin Komarov 	return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS);
56*522e010bSKonstantin Komarov }
57*522e010bSKonstantin Komarov 
58*522e010bSKonstantin Komarov /*
59*522e010bSKonstantin Komarov  * xpress_decompress - Decompress a buffer of XPRESS-compressed data
60*522e010bSKonstantin Komarov  *
61*522e010bSKonstantin Komarov  * @decompressor:       A decompressor that was allocated with
62*522e010bSKonstantin Komarov  *			xpress_allocate_decompressor()
63*522e010bSKonstantin Komarov  * @compressed_data:	The buffer of data to decompress
64*522e010bSKonstantin Komarov  * @compressed_size:	Number of bytes of compressed data
65*522e010bSKonstantin Komarov  * @uncompressed_data:	The buffer in which to store the decompressed data
66*522e010bSKonstantin Komarov  * @uncompressed_size:	The number of bytes the data decompresses into
67*522e010bSKonstantin Komarov  *
68*522e010bSKonstantin Komarov  * Return 0 on success, or return -1 and set errno on failure.
69*522e010bSKonstantin Komarov  */
70*522e010bSKonstantin Komarov int xpress_decompress(struct xpress_decompressor *decompressor,
71*522e010bSKonstantin Komarov 		      const void *compressed_data, size_t compressed_size,
72*522e010bSKonstantin Komarov 		      void *uncompressed_data, size_t uncompressed_size)
73*522e010bSKonstantin Komarov {
74*522e010bSKonstantin Komarov 	struct xpress_decompressor *d = decompressor;
75*522e010bSKonstantin Komarov 	const u8 * const in_begin = compressed_data;
76*522e010bSKonstantin Komarov 	u8 * const out_begin = uncompressed_data;
77*522e010bSKonstantin Komarov 	u8 *out_next = out_begin;
78*522e010bSKonstantin Komarov 	u8 * const out_end = out_begin + uncompressed_size;
79*522e010bSKonstantin Komarov 	struct input_bitstream is;
80*522e010bSKonstantin Komarov 	u32 i;
81*522e010bSKonstantin Komarov 
82*522e010bSKonstantin Komarov 	/* Read the Huffman codeword lengths.  */
83*522e010bSKonstantin Komarov 	if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
84*522e010bSKonstantin Komarov 		goto invalid;
85*522e010bSKonstantin Komarov 	for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
86*522e010bSKonstantin Komarov 		d->lens[i*2 + 0] = in_begin[i] & 0xF;
87*522e010bSKonstantin Komarov 		d->lens[i*2 + 1] = in_begin[i] >> 4;
88*522e010bSKonstantin Komarov 	}
89*522e010bSKonstantin Komarov 
90*522e010bSKonstantin Komarov 	/* Build a decoding table for the Huffman code.  */
91*522e010bSKonstantin Komarov 	if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
92*522e010bSKonstantin Komarov 				      XPRESS_TABLEBITS, d->lens,
93*522e010bSKonstantin Komarov 				      XPRESS_MAX_CODEWORD_LEN,
94*522e010bSKonstantin Komarov 				      d->working_space))
95*522e010bSKonstantin Komarov 		goto invalid;
96*522e010bSKonstantin Komarov 
97*522e010bSKonstantin Komarov 	/* Decode the matches and literals.  */
98*522e010bSKonstantin Komarov 
99*522e010bSKonstantin Komarov 	init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
100*522e010bSKonstantin Komarov 			     compressed_size - XPRESS_NUM_SYMBOLS / 2);
101*522e010bSKonstantin Komarov 
102*522e010bSKonstantin Komarov 	while (out_next != out_end) {
103*522e010bSKonstantin Komarov 		u32 sym;
104*522e010bSKonstantin Komarov 		u32 log2_offset;
105*522e010bSKonstantin Komarov 		u32 length;
106*522e010bSKonstantin Komarov 		u32 offset;
107*522e010bSKonstantin Komarov 
108*522e010bSKonstantin Komarov 		sym = read_huffsym(&is, d->decode_table,
109*522e010bSKonstantin Komarov 				   XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
110*522e010bSKonstantin Komarov 		if (sym < 256) {
111*522e010bSKonstantin Komarov 			/* Literal  */
112*522e010bSKonstantin Komarov 			*out_next++ = sym;
113*522e010bSKonstantin Komarov 		} else {
114*522e010bSKonstantin Komarov 			/* Match  */
115*522e010bSKonstantin Komarov 			length = sym & 0xf;
116*522e010bSKonstantin Komarov 			log2_offset = (sym >> 4) & 0xf;
117*522e010bSKonstantin Komarov 
118*522e010bSKonstantin Komarov 			bitstream_ensure_bits(&is, 16);
119*522e010bSKonstantin Komarov 
120*522e010bSKonstantin Komarov 			offset = ((u32)1 << log2_offset) |
121*522e010bSKonstantin Komarov 				 bitstream_pop_bits(&is, log2_offset);
122*522e010bSKonstantin Komarov 
123*522e010bSKonstantin Komarov 			if (length == 0xf) {
124*522e010bSKonstantin Komarov 				length += bitstream_read_byte(&is);
125*522e010bSKonstantin Komarov 				if (length == 0xf + 0xff)
126*522e010bSKonstantin Komarov 					length = bitstream_read_u16(&is);
127*522e010bSKonstantin Komarov 			}
128*522e010bSKonstantin Komarov 			length += XPRESS_MIN_MATCH_LEN;
129*522e010bSKonstantin Komarov 
130*522e010bSKonstantin Komarov 			if (offset > (size_t)(out_next - out_begin))
131*522e010bSKonstantin Komarov 				goto invalid;
132*522e010bSKonstantin Komarov 
133*522e010bSKonstantin Komarov 			if (length > (size_t)(out_end - out_next))
134*522e010bSKonstantin Komarov 				goto invalid;
135*522e010bSKonstantin Komarov 
136*522e010bSKonstantin Komarov 			out_next = lz_copy(out_next, length, offset, out_end,
137*522e010bSKonstantin Komarov 					   XPRESS_MIN_MATCH_LEN);
138*522e010bSKonstantin Komarov 		}
139*522e010bSKonstantin Komarov 	}
140*522e010bSKonstantin Komarov 	return 0;
141*522e010bSKonstantin Komarov 
142*522e010bSKonstantin Komarov invalid:
143*522e010bSKonstantin Komarov 	return -1;
144*522e010bSKonstantin Komarov }
145*522e010bSKonstantin Komarov 
146*522e010bSKonstantin Komarov /*
147*522e010bSKonstantin Komarov  * xpress_free_decompressor - Free an XPRESS decompressor
148*522e010bSKonstantin Komarov  *
149*522e010bSKonstantin Komarov  * @decompressor:       A decompressor that was allocated with
150*522e010bSKonstantin Komarov  *			xpress_allocate_decompressor(), or NULL.
151*522e010bSKonstantin Komarov  */
152*522e010bSKonstantin Komarov void xpress_free_decompressor(struct xpress_decompressor *decompressor)
153*522e010bSKonstantin Komarov {
154*522e010bSKonstantin Komarov 	kfree(decompressor);
155*522e010bSKonstantin Komarov }
156