1*5c4b64e6SXin LI /* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */
2*5c4b64e6SXin LI
3*5c4b64e6SXin LI /*-
4*5c4b64e6SXin LI * Copyright (c) 2018 The NetBSD Foundation, Inc.
5*5c4b64e6SXin LI * All rights reserved.
6*5c4b64e6SXin LI *
7*5c4b64e6SXin LI * This code is derived from software contributed to The NetBSD Foundation
8*5c4b64e6SXin LI * by Christos Zoulas.
9*5c4b64e6SXin LI *
10*5c4b64e6SXin LI * Redistribution and use in source and binary forms, with or without
11*5c4b64e6SXin LI * modification, are permitted provided that the following conditions
12*5c4b64e6SXin LI * are met:
13*5c4b64e6SXin LI * 1. Redistributions of source code must retain the above copyright
14*5c4b64e6SXin LI * notice, this list of conditions and the following disclaimer.
15*5c4b64e6SXin LI * 2. Redistributions in binary form must reproduce the above copyright
16*5c4b64e6SXin LI * notice, this list of conditions and the following disclaimer in the
17*5c4b64e6SXin LI * documentation and/or other materials provided with the distribution.
18*5c4b64e6SXin LI *
19*5c4b64e6SXin LI * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20*5c4b64e6SXin LI * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21*5c4b64e6SXin LI * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22*5c4b64e6SXin LI * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23*5c4b64e6SXin LI * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*5c4b64e6SXin LI * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*5c4b64e6SXin LI * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*5c4b64e6SXin LI * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*5c4b64e6SXin LI * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*5c4b64e6SXin LI * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*5c4b64e6SXin LI * POSSIBILITY OF SUCH DAMAGE.
30*5c4b64e6SXin LI */
31*5c4b64e6SXin LI
32*5c4b64e6SXin LI /* Lzd - Educational decompressor for the lzip format
33*5c4b64e6SXin LI Copyright (C) 2013-2018 Antonio Diaz Diaz.
34*5c4b64e6SXin LI
35*5c4b64e6SXin LI This program is free software. Redistribution and use in source and
36*5c4b64e6SXin LI binary forms, with or without modification, are permitted provided
37*5c4b64e6SXin LI that the following conditions are met:
38*5c4b64e6SXin LI
39*5c4b64e6SXin LI 1. Redistributions of source code must retain the above copyright
40*5c4b64e6SXin LI notice, this list of conditions and the following disclaimer.
41*5c4b64e6SXin LI
42*5c4b64e6SXin LI 2. Redistributions in binary form must reproduce the above copyright
43*5c4b64e6SXin LI notice, this list of conditions and the following disclaimer in the
44*5c4b64e6SXin LI documentation and/or other materials provided with the distribution.
45*5c4b64e6SXin LI
46*5c4b64e6SXin LI This program is distributed in the hope that it will be useful,
47*5c4b64e6SXin LI but WITHOUT ANY WARRANTY; without even the implied warranty of
48*5c4b64e6SXin LI MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
49*5c4b64e6SXin LI */
50*5c4b64e6SXin LI
51*5c4b64e6SXin LI #include <sys/param.h>
52*5c4b64e6SXin LI #include <stdio.h>
53*5c4b64e6SXin LI #include <string.h>
54*5c4b64e6SXin LI #include <stdlib.h>
55*5c4b64e6SXin LI #include <stdint.h>
56*5c4b64e6SXin LI #include <stdbool.h>
57*5c4b64e6SXin LI #include <errno.h>
58*5c4b64e6SXin LI #include <unistd.h>
59*5c4b64e6SXin LI
60*5c4b64e6SXin LI #define LZ_STATES 12
61*5c4b64e6SXin LI
62*5c4b64e6SXin LI #define LITERAL_CONTEXT_BITS 3
63*5c4b64e6SXin LI #define POS_STATE_BITS 2
64*5c4b64e6SXin LI #define POS_STATES (1 << POS_STATE_BITS)
65*5c4b64e6SXin LI #define POS_STATE_MASK (POS_STATES - 1)
66*5c4b64e6SXin LI
67*5c4b64e6SXin LI #define STATES 4
68*5c4b64e6SXin LI #define DIS_SLOT_BITS 6
69*5c4b64e6SXin LI
70*5c4b64e6SXin LI #define DIS_MODEL_START 4
71*5c4b64e6SXin LI #define DIS_MODEL_END 14
72*5c4b64e6SXin LI
73*5c4b64e6SXin LI #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
74*5c4b64e6SXin LI #define DIS_ALIGN_BITS 4
75*5c4b64e6SXin LI #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
76*5c4b64e6SXin LI
77*5c4b64e6SXin LI #define LOW_BITS 3
78*5c4b64e6SXin LI #define MID_BITS 3
79*5c4b64e6SXin LI #define HIGH_BITS 8
80*5c4b64e6SXin LI
81*5c4b64e6SXin LI #define LOW_SYMBOLS (1 << LOW_BITS)
82*5c4b64e6SXin LI #define MID_SYMBOLS (1 << MID_BITS)
83*5c4b64e6SXin LI #define HIGH_SYMBOLS (1 << HIGH_BITS)
84*5c4b64e6SXin LI
85*5c4b64e6SXin LI #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
86*5c4b64e6SXin LI
87*5c4b64e6SXin LI #define MIN_MATCH_LEN 2
88*5c4b64e6SXin LI
89*5c4b64e6SXin LI #define BIT_MODEL_MOVE_BITS 5
90*5c4b64e6SXin LI #define BIT_MODEL_TOTAL_BITS 11
91*5c4b64e6SXin LI #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
92*5c4b64e6SXin LI #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
93*5c4b64e6SXin LI
94*5c4b64e6SXin LI static const int lz_st_next[] = {
95*5c4b64e6SXin LI 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
96*5c4b64e6SXin LI };
97*5c4b64e6SXin LI
98*5c4b64e6SXin LI static bool
lz_st_is_char(int st)99*5c4b64e6SXin LI lz_st_is_char(int st) {
100*5c4b64e6SXin LI return st < 7;
101*5c4b64e6SXin LI }
102*5c4b64e6SXin LI
103*5c4b64e6SXin LI static int
lz_st_get_char(int st)104*5c4b64e6SXin LI lz_st_get_char(int st) {
105*5c4b64e6SXin LI return lz_st_next[st];
106*5c4b64e6SXin LI }
107*5c4b64e6SXin LI
108*5c4b64e6SXin LI static int
lz_st_get_match(int st)109*5c4b64e6SXin LI lz_st_get_match(int st) {
110*5c4b64e6SXin LI return st < 7 ? 7 : 10;
111*5c4b64e6SXin LI }
112*5c4b64e6SXin LI
113*5c4b64e6SXin LI static int
lz_st_get_rep(int st)114*5c4b64e6SXin LI lz_st_get_rep(int st) {
115*5c4b64e6SXin LI return st < 7 ? 8 : 11;
116*5c4b64e6SXin LI }
117*5c4b64e6SXin LI
118*5c4b64e6SXin LI static int
lz_st_get_short_rep(int st)119*5c4b64e6SXin LI lz_st_get_short_rep(int st) {
120*5c4b64e6SXin LI return st < 7 ? 9 : 11;
121*5c4b64e6SXin LI }
122*5c4b64e6SXin LI
123*5c4b64e6SXin LI struct lz_len_model {
124*5c4b64e6SXin LI int choice1;
125*5c4b64e6SXin LI int choice2;
126*5c4b64e6SXin LI int bm_low[POS_STATES][LOW_SYMBOLS];
127*5c4b64e6SXin LI int bm_mid[POS_STATES][MID_SYMBOLS];
128*5c4b64e6SXin LI int bm_high[HIGH_SYMBOLS];
129*5c4b64e6SXin LI };
130*5c4b64e6SXin LI
131*5c4b64e6SXin LI static uint32_t lz_crc[256];
132*5c4b64e6SXin LI
133*5c4b64e6SXin LI static void
lz_crc_init(void)134*5c4b64e6SXin LI lz_crc_init(void)
135*5c4b64e6SXin LI {
136*5c4b64e6SXin LI for (unsigned i = 0; i < nitems(lz_crc); i++) {
137*5c4b64e6SXin LI unsigned c = i;
138*5c4b64e6SXin LI for (unsigned j = 0; j < 8; j++) {
139*5c4b64e6SXin LI if (c & 1)
140*5c4b64e6SXin LI c = 0xEDB88320U ^ (c >> 1);
141*5c4b64e6SXin LI else
142*5c4b64e6SXin LI c >>= 1;
143*5c4b64e6SXin LI }
144*5c4b64e6SXin LI lz_crc[i] = c;
145*5c4b64e6SXin LI }
146*5c4b64e6SXin LI }
147*5c4b64e6SXin LI
148*5c4b64e6SXin LI static void
lz_crc_update(uint32_t * crc,const uint8_t * buf,size_t len)149*5c4b64e6SXin LI lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
150*5c4b64e6SXin LI {
151*5c4b64e6SXin LI for (size_t i = 0; i < len; i++)
152*5c4b64e6SXin LI *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
153*5c4b64e6SXin LI }
154*5c4b64e6SXin LI
155*5c4b64e6SXin LI struct lz_range_decoder {
156*5c4b64e6SXin LI FILE *fp;
157*5c4b64e6SXin LI uint32_t code;
158*5c4b64e6SXin LI uint32_t range;
159*5c4b64e6SXin LI };
160*5c4b64e6SXin LI
161*5c4b64e6SXin LI static int
lz_rd_create(struct lz_range_decoder * rd,FILE * fp)162*5c4b64e6SXin LI lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
163*5c4b64e6SXin LI {
164*5c4b64e6SXin LI rd->fp = fp;
165*5c4b64e6SXin LI rd->code = 0;
166*5c4b64e6SXin LI rd->range = ~0;
167*5c4b64e6SXin LI for (int i = 0; i < 5; i++)
168*5c4b64e6SXin LI rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
169*5c4b64e6SXin LI return ferror(rd->fp) ? -1 : 0;
170*5c4b64e6SXin LI }
171*5c4b64e6SXin LI
172*5c4b64e6SXin LI static unsigned
lz_rd_decode(struct lz_range_decoder * rd,int num_bits)173*5c4b64e6SXin LI lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
174*5c4b64e6SXin LI {
175*5c4b64e6SXin LI unsigned symbol = 0;
176*5c4b64e6SXin LI
177*5c4b64e6SXin LI for (int i = num_bits; i > 0; i--) {
178*5c4b64e6SXin LI rd->range >>= 1;
179*5c4b64e6SXin LI symbol <<= 1;
180*5c4b64e6SXin LI if (rd->code >= rd->range) {
181*5c4b64e6SXin LI rd->code -= rd->range;
182*5c4b64e6SXin LI symbol |= 1;
183*5c4b64e6SXin LI }
184*5c4b64e6SXin LI if (rd->range <= 0x00FFFFFFU) {
185*5c4b64e6SXin LI rd->range <<= 8;
186*5c4b64e6SXin LI rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
187*5c4b64e6SXin LI }
188*5c4b64e6SXin LI }
189*5c4b64e6SXin LI
190*5c4b64e6SXin LI return symbol;
191*5c4b64e6SXin LI }
192*5c4b64e6SXin LI
193*5c4b64e6SXin LI static unsigned
lz_rd_decode_bit(struct lz_range_decoder * rd,int * bm)194*5c4b64e6SXin LI lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
195*5c4b64e6SXin LI {
196*5c4b64e6SXin LI unsigned symbol;
197*5c4b64e6SXin LI const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
198*5c4b64e6SXin LI
199*5c4b64e6SXin LI if(rd->code < bound) {
200*5c4b64e6SXin LI rd->range = bound;
201*5c4b64e6SXin LI *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
202*5c4b64e6SXin LI symbol = 0;
203*5c4b64e6SXin LI }
204*5c4b64e6SXin LI else {
205*5c4b64e6SXin LI rd->range -= bound;
206*5c4b64e6SXin LI rd->code -= bound;
207*5c4b64e6SXin LI *bm -= *bm >> BIT_MODEL_MOVE_BITS;
208*5c4b64e6SXin LI symbol = 1;
209*5c4b64e6SXin LI }
210*5c4b64e6SXin LI
211*5c4b64e6SXin LI if (rd->range <= 0x00FFFFFFU) {
212*5c4b64e6SXin LI rd->range <<= 8;
213*5c4b64e6SXin LI rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
214*5c4b64e6SXin LI }
215*5c4b64e6SXin LI return symbol;
216*5c4b64e6SXin LI }
217*5c4b64e6SXin LI
218*5c4b64e6SXin LI static unsigned
lz_rd_decode_tree(struct lz_range_decoder * rd,int * bm,int num_bits)219*5c4b64e6SXin LI lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
220*5c4b64e6SXin LI {
221*5c4b64e6SXin LI unsigned symbol = 1;
222*5c4b64e6SXin LI
223*5c4b64e6SXin LI for (int i = 0; i < num_bits; i++)
224*5c4b64e6SXin LI symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
225*5c4b64e6SXin LI
226*5c4b64e6SXin LI return symbol - (1 << num_bits);
227*5c4b64e6SXin LI }
228*5c4b64e6SXin LI
229*5c4b64e6SXin LI static unsigned
lz_rd_decode_tree_reversed(struct lz_range_decoder * rd,int * bm,int num_bits)230*5c4b64e6SXin LI lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
231*5c4b64e6SXin LI {
232*5c4b64e6SXin LI unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
233*5c4b64e6SXin LI unsigned reversed_symbol = 0;
234*5c4b64e6SXin LI
235*5c4b64e6SXin LI for (int i = 0; i < num_bits; i++) {
236*5c4b64e6SXin LI reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
237*5c4b64e6SXin LI symbol >>= 1;
238*5c4b64e6SXin LI }
239*5c4b64e6SXin LI
240*5c4b64e6SXin LI return reversed_symbol;
241*5c4b64e6SXin LI }
242*5c4b64e6SXin LI
243*5c4b64e6SXin LI static unsigned
lz_rd_decode_matched(struct lz_range_decoder * rd,int * bm,int match_byte)244*5c4b64e6SXin LI lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
245*5c4b64e6SXin LI {
246*5c4b64e6SXin LI unsigned symbol = 1;
247*5c4b64e6SXin LI
248*5c4b64e6SXin LI for (int i = 7; i >= 0; i--) {
249*5c4b64e6SXin LI const unsigned match_bit = (match_byte >> i) & 1;
250*5c4b64e6SXin LI const unsigned bit = lz_rd_decode_bit(rd,
251*5c4b64e6SXin LI &bm[symbol + (match_bit << 8) + 0x100]);
252*5c4b64e6SXin LI symbol = (symbol << 1) | bit;
253*5c4b64e6SXin LI if (match_bit != bit) {
254*5c4b64e6SXin LI while (symbol < 0x100) {
255*5c4b64e6SXin LI symbol = (symbol << 1) |
256*5c4b64e6SXin LI lz_rd_decode_bit(rd, &bm[symbol]);
257*5c4b64e6SXin LI }
258*5c4b64e6SXin LI break;
259*5c4b64e6SXin LI }
260*5c4b64e6SXin LI }
261*5c4b64e6SXin LI return symbol & 0xFF;
262*5c4b64e6SXin LI }
263*5c4b64e6SXin LI
264*5c4b64e6SXin LI static unsigned
lz_rd_decode_len(struct lz_range_decoder * rd,struct lz_len_model * lm,int pos_state)265*5c4b64e6SXin LI lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
266*5c4b64e6SXin LI int pos_state)
267*5c4b64e6SXin LI {
268*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
269*5c4b64e6SXin LI return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
270*5c4b64e6SXin LI
271*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
272*5c4b64e6SXin LI return LOW_SYMBOLS +
273*5c4b64e6SXin LI lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
274*5c4b64e6SXin LI }
275*5c4b64e6SXin LI
276*5c4b64e6SXin LI return LOW_SYMBOLS + MID_SYMBOLS +
277*5c4b64e6SXin LI lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
278*5c4b64e6SXin LI }
279*5c4b64e6SXin LI
280*5c4b64e6SXin LI struct lz_decoder {
281*5c4b64e6SXin LI FILE *fin, *fout;
282*5c4b64e6SXin LI off_t pos, ppos, spos, dict_size;
283*5c4b64e6SXin LI bool wrapped;
284*5c4b64e6SXin LI uint32_t crc;
285*5c4b64e6SXin LI uint8_t *obuf;
286*5c4b64e6SXin LI struct lz_range_decoder rdec;
287*5c4b64e6SXin LI };
288*5c4b64e6SXin LI
289*5c4b64e6SXin LI static int
lz_flush(struct lz_decoder * lz)290*5c4b64e6SXin LI lz_flush(struct lz_decoder *lz)
291*5c4b64e6SXin LI {
292*5c4b64e6SXin LI off_t offs = lz->pos - lz->spos;
293*5c4b64e6SXin LI if (offs <= 0)
294*5c4b64e6SXin LI return -1;
295*5c4b64e6SXin LI
296*5c4b64e6SXin LI size_t size = (size_t)offs;
297*5c4b64e6SXin LI lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
298*5c4b64e6SXin LI if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
299*5c4b64e6SXin LI return -1;
300*5c4b64e6SXin LI
301*5c4b64e6SXin LI lz->wrapped = lz->pos >= lz->dict_size;
302*5c4b64e6SXin LI if (lz->wrapped) {
303*5c4b64e6SXin LI lz->ppos += lz->pos;
304*5c4b64e6SXin LI lz->pos = 0;
305*5c4b64e6SXin LI }
306*5c4b64e6SXin LI lz->spos = lz->pos;
307*5c4b64e6SXin LI return 0;
308*5c4b64e6SXin LI }
309*5c4b64e6SXin LI
310*5c4b64e6SXin LI static void
lz_destroy(struct lz_decoder * lz)311*5c4b64e6SXin LI lz_destroy(struct lz_decoder *lz)
312*5c4b64e6SXin LI {
313*5c4b64e6SXin LI if (lz->fin)
314*5c4b64e6SXin LI fclose(lz->fin);
315*5c4b64e6SXin LI if (lz->fout)
316*5c4b64e6SXin LI fclose(lz->fout);
317*5c4b64e6SXin LI free(lz->obuf);
318*5c4b64e6SXin LI }
319*5c4b64e6SXin LI
320*5c4b64e6SXin LI static int
lz_create(struct lz_decoder * lz,int fin,int fdout,int dict_size)321*5c4b64e6SXin LI lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
322*5c4b64e6SXin LI {
323*5c4b64e6SXin LI memset(lz, 0, sizeof(*lz));
324*5c4b64e6SXin LI
325*5c4b64e6SXin LI lz->fin = fdopen(dup(fin), "r");
326*5c4b64e6SXin LI if (lz->fin == NULL)
327*5c4b64e6SXin LI goto out;
328*5c4b64e6SXin LI
329*5c4b64e6SXin LI lz->fout = fdopen(dup(fdout), "w");
330*5c4b64e6SXin LI if (lz->fout == NULL)
331*5c4b64e6SXin LI goto out;
332*5c4b64e6SXin LI
333*5c4b64e6SXin LI lz->pos = lz->ppos = lz->spos = 0;
334*5c4b64e6SXin LI lz->crc = ~0;
335*5c4b64e6SXin LI lz->dict_size = dict_size;
336*5c4b64e6SXin LI lz->wrapped = false;
337*5c4b64e6SXin LI
338*5c4b64e6SXin LI lz->obuf = malloc(dict_size);
339*5c4b64e6SXin LI if (lz->obuf == NULL)
340*5c4b64e6SXin LI goto out;
341*5c4b64e6SXin LI
342*5c4b64e6SXin LI if (lz_rd_create(&lz->rdec, lz->fin) == -1)
343*5c4b64e6SXin LI goto out;
344*5c4b64e6SXin LI return 0;
345*5c4b64e6SXin LI out:
346*5c4b64e6SXin LI lz_destroy(lz);
347*5c4b64e6SXin LI return -1;
348*5c4b64e6SXin LI }
349*5c4b64e6SXin LI
350*5c4b64e6SXin LI static uint8_t
lz_peek(const struct lz_decoder * lz,unsigned ahead)351*5c4b64e6SXin LI lz_peek(const struct lz_decoder *lz, unsigned ahead)
352*5c4b64e6SXin LI {
353*5c4b64e6SXin LI off_t diff = lz->pos - ahead - 1;
354*5c4b64e6SXin LI
355*5c4b64e6SXin LI if (diff >= 0)
356*5c4b64e6SXin LI return lz->obuf[diff];
357*5c4b64e6SXin LI
358*5c4b64e6SXin LI if (lz->wrapped)
359*5c4b64e6SXin LI return lz->obuf[lz->dict_size + diff];
360*5c4b64e6SXin LI
361*5c4b64e6SXin LI return 0;
362*5c4b64e6SXin LI }
363*5c4b64e6SXin LI
364*5c4b64e6SXin LI static void
lz_put(struct lz_decoder * lz,uint8_t b)365*5c4b64e6SXin LI lz_put(struct lz_decoder *lz, uint8_t b)
366*5c4b64e6SXin LI {
367*5c4b64e6SXin LI lz->obuf[lz->pos++] = b;
368*5c4b64e6SXin LI if (lz->dict_size == lz->pos)
369*5c4b64e6SXin LI lz_flush(lz);
370*5c4b64e6SXin LI }
371*5c4b64e6SXin LI
372*5c4b64e6SXin LI static off_t
lz_get_data_position(const struct lz_decoder * lz)373*5c4b64e6SXin LI lz_get_data_position(const struct lz_decoder *lz)
374*5c4b64e6SXin LI {
375*5c4b64e6SXin LI return lz->ppos + lz->pos;
376*5c4b64e6SXin LI }
377*5c4b64e6SXin LI
378*5c4b64e6SXin LI static unsigned
lz_get_crc(const struct lz_decoder * lz)379*5c4b64e6SXin LI lz_get_crc(const struct lz_decoder *lz)
380*5c4b64e6SXin LI {
381*5c4b64e6SXin LI return lz->crc ^ 0xffffffffU;
382*5c4b64e6SXin LI }
383*5c4b64e6SXin LI
384*5c4b64e6SXin LI static void
lz_bm_init(int * a,size_t l)385*5c4b64e6SXin LI lz_bm_init(int *a, size_t l)
386*5c4b64e6SXin LI {
387*5c4b64e6SXin LI for (size_t i = 0; i < l; i++)
388*5c4b64e6SXin LI a[i] = BIT_MODEL_INIT;
389*5c4b64e6SXin LI }
390*5c4b64e6SXin LI
391*5c4b64e6SXin LI #define LZ_BM_INIT(a) lz_bm_init(a, nitems(a))
392*5c4b64e6SXin LI #define LZ_BM_INIT2(a) do { \
393*5c4b64e6SXin LI size_t l = nitems(a[0]); \
394*5c4b64e6SXin LI for (size_t i = 0; i < nitems(a); i++) \
395*5c4b64e6SXin LI lz_bm_init(a[i], l); \
396*5c4b64e6SXin LI } while (/*CONSTCOND*/0)
397*5c4b64e6SXin LI
398*5c4b64e6SXin LI #define LZ_MODEL_INIT(a) do { \
399*5c4b64e6SXin LI a.choice1 = BIT_MODEL_INIT; \
400*5c4b64e6SXin LI a.choice2 = BIT_MODEL_INIT; \
401*5c4b64e6SXin LI LZ_BM_INIT2(a.bm_low); \
402*5c4b64e6SXin LI LZ_BM_INIT2(a.bm_mid); \
403*5c4b64e6SXin LI LZ_BM_INIT(a.bm_high); \
404*5c4b64e6SXin LI } while (/*CONSTCOND*/0)
405*5c4b64e6SXin LI
406*5c4b64e6SXin LI static bool
lz_decode_member(struct lz_decoder * lz)407*5c4b64e6SXin LI lz_decode_member(struct lz_decoder *lz)
408*5c4b64e6SXin LI {
409*5c4b64e6SXin LI int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
410*5c4b64e6SXin LI int bm_match[LZ_STATES][POS_STATES];
411*5c4b64e6SXin LI int bm_rep[4][LZ_STATES];
412*5c4b64e6SXin LI int bm_len[LZ_STATES][POS_STATES];
413*5c4b64e6SXin LI int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
414*5c4b64e6SXin LI int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
415*5c4b64e6SXin LI int bm_align[DIS_ALIGN_SIZE];
416*5c4b64e6SXin LI
417*5c4b64e6SXin LI LZ_BM_INIT2(bm_literal);
418*5c4b64e6SXin LI LZ_BM_INIT2(bm_match);
419*5c4b64e6SXin LI LZ_BM_INIT2(bm_rep);
420*5c4b64e6SXin LI LZ_BM_INIT2(bm_len);
421*5c4b64e6SXin LI LZ_BM_INIT2(bm_dis_slot);
422*5c4b64e6SXin LI LZ_BM_INIT(bm_dis);
423*5c4b64e6SXin LI LZ_BM_INIT(bm_align);
424*5c4b64e6SXin LI
425*5c4b64e6SXin LI struct lz_len_model match_len_model;
426*5c4b64e6SXin LI struct lz_len_model rep_len_model;
427*5c4b64e6SXin LI
428*5c4b64e6SXin LI LZ_MODEL_INIT(match_len_model);
429*5c4b64e6SXin LI LZ_MODEL_INIT(rep_len_model);
430*5c4b64e6SXin LI
431*5c4b64e6SXin LI struct lz_range_decoder *rd = &lz->rdec;
432*5c4b64e6SXin LI unsigned rep[4] = { 0 };
433*5c4b64e6SXin LI
434*5c4b64e6SXin LI
435*5c4b64e6SXin LI int state = 0;
436*5c4b64e6SXin LI
437*5c4b64e6SXin LI while (!feof(lz->fin) && !ferror(lz->fin)) {
438*5c4b64e6SXin LI const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
439*5c4b64e6SXin LI // bit 1
440*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
441*5c4b64e6SXin LI const uint8_t prev_byte = lz_peek(lz, 0);
442*5c4b64e6SXin LI const int literal_state =
443*5c4b64e6SXin LI prev_byte >> (8 - LITERAL_CONTEXT_BITS);
444*5c4b64e6SXin LI int *bm = bm_literal[literal_state];
445*5c4b64e6SXin LI if (lz_st_is_char(state))
446*5c4b64e6SXin LI lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
447*5c4b64e6SXin LI else {
448*5c4b64e6SXin LI int peek = lz_peek(lz, rep[0]);
449*5c4b64e6SXin LI lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
450*5c4b64e6SXin LI }
451*5c4b64e6SXin LI state = lz_st_get_char(state);
452*5c4b64e6SXin LI continue;
453*5c4b64e6SXin LI }
454*5c4b64e6SXin LI int len;
455*5c4b64e6SXin LI // bit 2
456*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
457*5c4b64e6SXin LI // bit 3
458*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
459*5c4b64e6SXin LI // bit 4
460*5c4b64e6SXin LI if (lz_rd_decode_bit(rd,
461*5c4b64e6SXin LI &bm_len[state][pos_state]) == 0)
462*5c4b64e6SXin LI {
463*5c4b64e6SXin LI state = lz_st_get_short_rep(state);
464*5c4b64e6SXin LI lz_put(lz, lz_peek(lz, rep[0]));
465*5c4b64e6SXin LI continue;
466*5c4b64e6SXin LI }
467*5c4b64e6SXin LI } else {
468*5c4b64e6SXin LI unsigned distance;
469*5c4b64e6SXin LI // bit 4
470*5c4b64e6SXin LI if (lz_rd_decode_bit(rd, &bm_rep[2][state])
471*5c4b64e6SXin LI == 0)
472*5c4b64e6SXin LI distance = rep[1];
473*5c4b64e6SXin LI else {
474*5c4b64e6SXin LI // bit 5
475*5c4b64e6SXin LI if (lz_rd_decode_bit(rd,
476*5c4b64e6SXin LI &bm_rep[3][state]) == 0)
477*5c4b64e6SXin LI distance = rep[2];
478*5c4b64e6SXin LI else {
479*5c4b64e6SXin LI distance = rep[3];
480*5c4b64e6SXin LI rep[3] = rep[2];
481*5c4b64e6SXin LI }
482*5c4b64e6SXin LI rep[2] = rep[1];
483*5c4b64e6SXin LI }
484*5c4b64e6SXin LI rep[1] = rep[0];
485*5c4b64e6SXin LI rep[0] = distance;
486*5c4b64e6SXin LI }
487*5c4b64e6SXin LI state = lz_st_get_rep(state);
488*5c4b64e6SXin LI len = MIN_MATCH_LEN +
489*5c4b64e6SXin LI lz_rd_decode_len(rd, &rep_len_model, pos_state);
490*5c4b64e6SXin LI } else {
491*5c4b64e6SXin LI rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
492*5c4b64e6SXin LI len = MIN_MATCH_LEN +
493*5c4b64e6SXin LI lz_rd_decode_len(rd, &match_len_model, pos_state);
494*5c4b64e6SXin LI const int len_state =
495*5c4b64e6SXin LI MIN(len - MIN_MATCH_LEN, STATES - 1);
496*5c4b64e6SXin LI rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
497*5c4b64e6SXin LI DIS_SLOT_BITS);
498*5c4b64e6SXin LI if (rep[0] >= DIS_MODEL_START) {
499*5c4b64e6SXin LI const unsigned dis_slot = rep[0];
500*5c4b64e6SXin LI const int direct_bits = (dis_slot >> 1) - 1;
501*5c4b64e6SXin LI rep[0] = (2 | (dis_slot & 1)) << direct_bits;
502*5c4b64e6SXin LI if (dis_slot < DIS_MODEL_END)
503*5c4b64e6SXin LI rep[0] += lz_rd_decode_tree_reversed(rd,
504*5c4b64e6SXin LI &bm_dis[rep[0] - dis_slot],
505*5c4b64e6SXin LI direct_bits);
506*5c4b64e6SXin LI else {
507*5c4b64e6SXin LI rep[0] += lz_rd_decode(rd, direct_bits
508*5c4b64e6SXin LI - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
509*5c4b64e6SXin LI rep[0] += lz_rd_decode_tree_reversed(rd,
510*5c4b64e6SXin LI bm_align, DIS_ALIGN_BITS);
511*5c4b64e6SXin LI if (rep[0] == 0xFFFFFFFFU) {
512*5c4b64e6SXin LI lz_flush(lz);
513*5c4b64e6SXin LI return len == MIN_MATCH_LEN;
514*5c4b64e6SXin LI }
515*5c4b64e6SXin LI }
516*5c4b64e6SXin LI }
517*5c4b64e6SXin LI state = lz_st_get_match(state);
518*5c4b64e6SXin LI if (rep[0] >= lz->dict_size ||
519*5c4b64e6SXin LI (rep[0] >= lz->pos && !lz->wrapped)) {
520*5c4b64e6SXin LI lz_flush(lz);
521*5c4b64e6SXin LI return false;
522*5c4b64e6SXin LI }
523*5c4b64e6SXin LI }
524*5c4b64e6SXin LI for (int i = 0; i < len; i++)
525*5c4b64e6SXin LI lz_put(lz, lz_peek(lz, rep[0]));
526*5c4b64e6SXin LI }
527*5c4b64e6SXin LI lz_flush(lz);
528*5c4b64e6SXin LI return false;
529*5c4b64e6SXin LI }
530*5c4b64e6SXin LI
531*5c4b64e6SXin LI /*
532*5c4b64e6SXin LI * 0-3 CRC32 of the uncompressed data
533*5c4b64e6SXin LI * 4-11 size of the uncompressed data
534*5c4b64e6SXin LI * 12-19 member size including header and trailer
535*5c4b64e6SXin LI */
536*5c4b64e6SXin LI #define TRAILER_SIZE 20
537*5c4b64e6SXin LI
538*5c4b64e6SXin LI
539*5c4b64e6SXin LI static off_t
lz_decode(int fin,int fdout,unsigned dict_size,off_t * insize)540*5c4b64e6SXin LI lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
541*5c4b64e6SXin LI {
542*5c4b64e6SXin LI struct lz_decoder lz;
543*5c4b64e6SXin LI off_t rv = -1;
544*5c4b64e6SXin LI
545*5c4b64e6SXin LI if (lz_create(&lz, fin, fdout, dict_size) == -1)
546*5c4b64e6SXin LI return -1;
547*5c4b64e6SXin LI
548*5c4b64e6SXin LI if (!lz_decode_member(&lz))
549*5c4b64e6SXin LI goto out;
550*5c4b64e6SXin LI
551*5c4b64e6SXin LI uint8_t trailer[TRAILER_SIZE];
552*5c4b64e6SXin LI
553*5c4b64e6SXin LI for(size_t i = 0; i < nitems(trailer); i++)
554*5c4b64e6SXin LI trailer[i] = (uint8_t)getc(lz.fin);
555*5c4b64e6SXin LI
556*5c4b64e6SXin LI unsigned crc = 0;
557*5c4b64e6SXin LI for (int i = 3; i >= 0; --i) {
558*5c4b64e6SXin LI crc <<= 8;
559*5c4b64e6SXin LI crc += trailer[i];
560*5c4b64e6SXin LI }
561*5c4b64e6SXin LI
562*5c4b64e6SXin LI int64_t data_size = 0;
563*5c4b64e6SXin LI for (int i = 11; i >= 4; --i) {
564*5c4b64e6SXin LI data_size <<= 8;
565*5c4b64e6SXin LI data_size += trailer[i];
566*5c4b64e6SXin LI }
567*5c4b64e6SXin LI
568*5c4b64e6SXin LI if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
569*5c4b64e6SXin LI goto out;
570*5c4b64e6SXin LI
571*5c4b64e6SXin LI rv = 0;
572*5c4b64e6SXin LI for (int i = 19; i >= 12; --i) {
573*5c4b64e6SXin LI rv <<= 8;
574*5c4b64e6SXin LI rv += trailer[i];
575*5c4b64e6SXin LI }
576*5c4b64e6SXin LI if (insize)
577*5c4b64e6SXin LI *insize = rv;
578*5c4b64e6SXin LI #if 0
579*5c4b64e6SXin LI /* Does not work with pipes */
580*5c4b64e6SXin LI rv = ftello(lz.fout);
581*5c4b64e6SXin LI #else
582*5c4b64e6SXin LI rv = data_size;
583*5c4b64e6SXin LI #endif
584*5c4b64e6SXin LI out:
585*5c4b64e6SXin LI lz_destroy(&lz);
586*5c4b64e6SXin LI return rv;
587*5c4b64e6SXin LI }
588*5c4b64e6SXin LI
589*5c4b64e6SXin LI
590*5c4b64e6SXin LI /*
591*5c4b64e6SXin LI * 0-3 magic
592*5c4b64e6SXin LI * 4 version
593*5c4b64e6SXin LI * 5 coded dict_size
594*5c4b64e6SXin LI */
595*5c4b64e6SXin LI #define HDR_SIZE 6
596*5c4b64e6SXin LI #define MIN_DICTIONARY_SIZE (1 << 12)
597*5c4b64e6SXin LI #define MAX_DICTIONARY_SIZE (1 << 29)
598*5c4b64e6SXin LI
599*5c4b64e6SXin LI static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
600*5c4b64e6SXin LI
601*5c4b64e6SXin LI static unsigned
lz_get_dict_size(unsigned char c)602*5c4b64e6SXin LI lz_get_dict_size(unsigned char c)
603*5c4b64e6SXin LI {
604*5c4b64e6SXin LI unsigned dict_size = 1 << (c & 0x1f);
605*5c4b64e6SXin LI dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
606*5c4b64e6SXin LI if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
607*5c4b64e6SXin LI return 0;
608*5c4b64e6SXin LI return dict_size;
609*5c4b64e6SXin LI }
610*5c4b64e6SXin LI
611*5c4b64e6SXin LI static off_t
unlz(int fin,int fout,char * pre,size_t prelen,off_t * bytes_in)612*5c4b64e6SXin LI unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
613*5c4b64e6SXin LI {
614*5c4b64e6SXin LI if (lz_crc[0] == 0)
615*5c4b64e6SXin LI lz_crc_init();
616*5c4b64e6SXin LI
617*5c4b64e6SXin LI char header[HDR_SIZE];
618*5c4b64e6SXin LI
619*5c4b64e6SXin LI if (pre && prelen)
620*5c4b64e6SXin LI memcpy(header, pre, prelen);
621*5c4b64e6SXin LI
622*5c4b64e6SXin LI ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
623*5c4b64e6SXin LI switch (nr) {
624*5c4b64e6SXin LI case -1:
625*5c4b64e6SXin LI return -1;
626*5c4b64e6SXin LI case 0:
627*5c4b64e6SXin LI return prelen ? -1 : 0;
628*5c4b64e6SXin LI default:
629*5c4b64e6SXin LI if ((size_t)nr != sizeof(header) - prelen)
630*5c4b64e6SXin LI return -1;
631*5c4b64e6SXin LI break;
632*5c4b64e6SXin LI }
633*5c4b64e6SXin LI
634*5c4b64e6SXin LI if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
635*5c4b64e6SXin LI return -1;
636*5c4b64e6SXin LI
637*5c4b64e6SXin LI unsigned dict_size = lz_get_dict_size(header[5]);
638*5c4b64e6SXin LI if (dict_size == 0)
639*5c4b64e6SXin LI return -1;
640*5c4b64e6SXin LI
641*5c4b64e6SXin LI return lz_decode(fin, fout, dict_size, bytes_in);
642*5c4b64e6SXin LI }
643