1 /*-
2 * Copyright (c) 2008 Joerg Sonnenberger
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 /*-
27 * Copyright (c) 1985, 1986, 1992, 1993
28 * The Regents of the University of California. All rights reserved.
29 *
30 * This code is derived from software contributed to Berkeley by
31 * Diomidis Spinellis and James A. Woods, derived from original
32 * work by Spencer Thomas and Joseph Orost.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. Neither the name of the University nor the names of its contributors
43 * may be used to endorse or promote products derived from this software
44 * without specific prior written permission.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 */
58
59 #include "archive_platform.h"
60
61 #ifdef HAVE_ERRNO_H
62 #include <errno.h>
63 #endif
64 #ifdef HAVE_STDLIB_H
65 #include <stdlib.h>
66 #endif
67 #ifdef HAVE_STRING_H
68 #include <string.h>
69 #endif
70
71 #include "archive.h"
72 #include "archive_private.h"
73 #include "archive_write_private.h"
74
75 #define HSIZE 69001 /* 95% occupancy */
76 #define HSHIFT 8 /* 8 - trunc(log2(HSIZE / 65536)) */
77 #define CHECK_GAP 10000 /* Ratio check interval. */
78
79 #define MAXCODE(bits) ((1 << (bits)) - 1)
80
81 /*
82 * the next two codes should not be changed lightly, as they must not
83 * lie within the contiguous general code space.
84 */
85 #define FIRST 257 /* First free entry. */
86 #define CLEAR 256 /* Table clear output code. */
87
88 struct private_data {
89 int64_t in_count, out_count, checkpoint;
90
91 int code_len; /* Number of bits/code. */
92 int cur_maxcode; /* Maximum code, given n_bits. */
93 int max_maxcode; /* Should NEVER generate this code. */
94 int hashtab [HSIZE];
95 unsigned short codetab [HSIZE];
96 int first_free; /* First unused entry. */
97 int compress_ratio;
98
99 int cur_code, cur_fcode;
100
101 int bit_offset;
102 unsigned char bit_buf;
103
104 unsigned char *compressed;
105 size_t compressed_buffer_size;
106 size_t compressed_offset;
107 };
108
109 static int archive_compressor_compress_open(struct archive_write_filter *);
110 static int archive_compressor_compress_write(struct archive_write_filter *,
111 const void *, size_t);
112 static int archive_compressor_compress_close(struct archive_write_filter *);
113 static int archive_compressor_compress_free(struct archive_write_filter *);
114
115 #if ARCHIVE_VERSION_NUMBER < 4000000
116 int
archive_write_set_compression_compress(struct archive * a)117 archive_write_set_compression_compress(struct archive *a)
118 {
119 __archive_write_filters_free(a);
120 return (archive_write_add_filter_compress(a));
121 }
122 #endif
123
124 /*
125 * Add a compress filter to this write handle.
126 */
127 int
archive_write_add_filter_compress(struct archive * _a)128 archive_write_add_filter_compress(struct archive *_a)
129 {
130 struct archive_write *a = (struct archive_write *)_a;
131 struct archive_write_filter *f = __archive_write_allocate_filter(_a);
132
133 archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
134 ARCHIVE_STATE_NEW, "archive_write_add_filter_compress");
135 f->open = &archive_compressor_compress_open;
136 f->code = ARCHIVE_FILTER_COMPRESS;
137 f->name = "compress";
138 return (ARCHIVE_OK);
139 }
140
141 /*
142 * Setup callback.
143 */
144 static int
archive_compressor_compress_open(struct archive_write_filter * f)145 archive_compressor_compress_open(struct archive_write_filter *f)
146 {
147 struct private_data *state;
148 size_t bs = 65536, bpb;
149
150 f->code = ARCHIVE_FILTER_COMPRESS;
151 f->name = "compress";
152
153 state = calloc(1, sizeof(*state));
154 if (state == NULL) {
155 archive_set_error(f->archive, ENOMEM,
156 "Can't allocate data for compression");
157 return (ARCHIVE_FATAL);
158 }
159
160 if (f->archive->magic == ARCHIVE_WRITE_MAGIC) {
161 /* Buffer size should be a multiple number of the bytes
162 * per block for performance. */
163 bpb = archive_write_get_bytes_per_block(f->archive);
164 if (bpb > bs)
165 bs = bpb;
166 else if (bpb != 0)
167 bs -= bs % bpb;
168 }
169 state->compressed_buffer_size = bs;
170 state->compressed = malloc(state->compressed_buffer_size);
171
172 if (state->compressed == NULL) {
173 archive_set_error(f->archive, ENOMEM,
174 "Can't allocate data for compression buffer");
175 free(state);
176 return (ARCHIVE_FATAL);
177 }
178
179 f->write = archive_compressor_compress_write;
180 f->close = archive_compressor_compress_close;
181 f->free = archive_compressor_compress_free;
182
183 state->max_maxcode = 0x10000; /* Should NEVER generate this code. */
184 state->in_count = 0; /* Length of input. */
185 state->bit_buf = 0;
186 state->bit_offset = 0;
187 state->out_count = 3; /* Includes 3-byte header mojo. */
188 state->compress_ratio = 0;
189 state->checkpoint = CHECK_GAP;
190 state->code_len = 9;
191 state->cur_maxcode = MAXCODE(state->code_len);
192 state->first_free = FIRST;
193
194 memset(state->hashtab, 0xff, sizeof(state->hashtab));
195
196 /* Prime output buffer with a gzip header. */
197 state->compressed[0] = 0x1f; /* Compress */
198 state->compressed[1] = 0x9d;
199 state->compressed[2] = 0x90; /* Block mode, 16bit max */
200 state->compressed_offset = 3;
201
202 f->data = state;
203 return (0);
204 }
205
206 /*-
207 * Output the given code.
208 * Inputs:
209 * code: A n_bits-bit integer. If == -1, then EOF. This assumes
210 * that n_bits <= (long)wordsize - 1.
211 * Outputs:
212 * Outputs code to the file.
213 * Assumptions:
214 * Chars are 8 bits long.
215 * Algorithm:
216 * Maintain a BITS character long buffer (so that 8 codes will
217 * fit in it exactly). Use the VAX insv instruction to insert each
218 * code in turn. When the buffer fills up empty it and start over.
219 */
220
221 static const unsigned char rmask[9] =
222 {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
223
224 static int
output_byte(struct archive_write_filter * f,unsigned char c)225 output_byte(struct archive_write_filter *f, unsigned char c)
226 {
227 struct private_data *state = f->data;
228
229 state->compressed[state->compressed_offset++] = c;
230 ++state->out_count;
231
232 if (state->compressed_buffer_size == state->compressed_offset) {
233 int ret = __archive_write_filter(f->next_filter,
234 state->compressed, state->compressed_buffer_size);
235 if (ret != ARCHIVE_OK)
236 return ARCHIVE_FATAL;
237 state->compressed_offset = 0;
238 }
239
240 return ARCHIVE_OK;
241 }
242
243 static int
output_code(struct archive_write_filter * f,int ocode)244 output_code(struct archive_write_filter *f, int ocode)
245 {
246 struct private_data *state = f->data;
247 int bits, ret, clear_flg, bit_offset;
248
249 clear_flg = ocode == CLEAR;
250
251 /*
252 * Since ocode is always >= 8 bits, only need to mask the first
253 * hunk on the left.
254 */
255 bit_offset = state->bit_offset % 8;
256 state->bit_buf |= (ocode << bit_offset) & 0xff;
257 output_byte(f, state->bit_buf);
258
259 bits = state->code_len - (8 - bit_offset);
260 ocode >>= 8 - bit_offset;
261 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
262 if (bits >= 8) {
263 output_byte(f, ocode & 0xff);
264 ocode >>= 8;
265 bits -= 8;
266 }
267 /* Last bits. */
268 state->bit_offset += state->code_len;
269 state->bit_buf = ocode & rmask[bits];
270 if (state->bit_offset == state->code_len * 8)
271 state->bit_offset = 0;
272
273 /*
274 * If the next entry is going to be too big for the ocode size,
275 * then increase it, if possible.
276 */
277 if (clear_flg || state->first_free > state->cur_maxcode) {
278 /*
279 * Write the whole buffer, because the input side won't
280 * discover the size increase until after it has read it.
281 */
282 if (state->bit_offset > 0) {
283 while (state->bit_offset < state->code_len * 8) {
284 ret = output_byte(f, state->bit_buf);
285 if (ret != ARCHIVE_OK)
286 return ret;
287 state->bit_offset += 8;
288 state->bit_buf = 0;
289 }
290 }
291 state->bit_buf = 0;
292 state->bit_offset = 0;
293
294 if (clear_flg) {
295 state->code_len = 9;
296 state->cur_maxcode = MAXCODE(state->code_len);
297 } else {
298 state->code_len++;
299 if (state->code_len == 16)
300 state->cur_maxcode = state->max_maxcode;
301 else
302 state->cur_maxcode = MAXCODE(state->code_len);
303 }
304 }
305
306 return (ARCHIVE_OK);
307 }
308
309 static int
output_flush(struct archive_write_filter * f)310 output_flush(struct archive_write_filter *f)
311 {
312 struct private_data *state = f->data;
313 int ret;
314
315 /* At EOF, write the rest of the buffer. */
316 if (state->bit_offset % 8) {
317 state->code_len = (state->bit_offset % 8 + 7) / 8;
318 ret = output_byte(f, state->bit_buf);
319 if (ret != ARCHIVE_OK)
320 return ret;
321 }
322
323 return (ARCHIVE_OK);
324 }
325
326 /*
327 * Write data to the compressed stream.
328 */
329 static int
archive_compressor_compress_write(struct archive_write_filter * f,const void * buff,size_t length)330 archive_compressor_compress_write(struct archive_write_filter *f,
331 const void *buff, size_t length)
332 {
333 struct private_data *state = (struct private_data *)f->data;
334 int i;
335 int ratio;
336 int c, disp, ret;
337 const unsigned char *bp;
338
339 if (length == 0)
340 return ARCHIVE_OK;
341
342 bp = buff;
343
344 if (state->in_count == 0) {
345 state->cur_code = *bp++;
346 ++state->in_count;
347 --length;
348 }
349
350 while (length--) {
351 c = *bp++;
352 state->in_count++;
353 state->cur_fcode = (c << 16) | state->cur_code;
354 i = ((c << HSHIFT) ^ state->cur_code); /* Xor hashing. */
355
356 if (state->hashtab[i] == state->cur_fcode) {
357 state->cur_code = state->codetab[i];
358 continue;
359 }
360 if (state->hashtab[i] < 0) /* Empty slot. */
361 goto nomatch;
362 /* Secondary hash (after G. Knott). */
363 if (i == 0)
364 disp = 1;
365 else
366 disp = HSIZE - i;
367 probe:
368 if ((i -= disp) < 0)
369 i += HSIZE;
370
371 if (state->hashtab[i] == state->cur_fcode) {
372 state->cur_code = state->codetab[i];
373 continue;
374 }
375 if (state->hashtab[i] >= 0)
376 goto probe;
377 nomatch:
378 ret = output_code(f, state->cur_code);
379 if (ret != ARCHIVE_OK)
380 return ret;
381 state->cur_code = c;
382 if (state->first_free < state->max_maxcode) {
383 state->codetab[i] = state->first_free++; /* code -> hashtable */
384 state->hashtab[i] = state->cur_fcode;
385 continue;
386 }
387 if (state->in_count < state->checkpoint)
388 continue;
389
390 state->checkpoint = state->in_count + CHECK_GAP;
391
392 if (state->in_count <= 0x007fffff && state->out_count != 0)
393 ratio = (int)(state->in_count * 256 / state->out_count);
394 else if ((ratio = (int)(state->out_count / 256)) == 0)
395 ratio = 0x7fffffff;
396 else
397 ratio = (int)(state->in_count / ratio);
398
399 if (ratio > state->compress_ratio)
400 state->compress_ratio = ratio;
401 else {
402 state->compress_ratio = 0;
403 memset(state->hashtab, 0xff, sizeof(state->hashtab));
404 state->first_free = FIRST;
405 ret = output_code(f, CLEAR);
406 if (ret != ARCHIVE_OK)
407 return ret;
408 }
409 }
410
411 return (ARCHIVE_OK);
412 }
413
414
415 /*
416 * Finish the compression...
417 */
418 static int
archive_compressor_compress_close(struct archive_write_filter * f)419 archive_compressor_compress_close(struct archive_write_filter *f)
420 {
421 struct private_data *state = (struct private_data *)f->data;
422 int ret;
423
424 ret = output_code(f, state->cur_code);
425 if (ret != ARCHIVE_OK)
426 return ret;
427 ret = output_flush(f);
428 if (ret != ARCHIVE_OK)
429 return ret;
430
431 /* Write the last block */
432 ret = __archive_write_filter(f->next_filter,
433 state->compressed, state->compressed_offset);
434 return (ret);
435 }
436
437 static int
archive_compressor_compress_free(struct archive_write_filter * f)438 archive_compressor_compress_free(struct archive_write_filter *f)
439 {
440 struct private_data *state = (struct private_data *)f->data;
441
442 free(state->compressed);
443 free(state);
444 return (ARCHIVE_OK);
445 }
446