1 /* fitblk.c contains minimal changes required to be compiled with zlibWrapper:
2 * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h"
3 * - writing block to stdout was disabled */
4
5 /* fitblk.c: example of fitting compressed output to a specified size
6 Not copyrighted -- provided to the public domain
7 Version 1.1 25 November 2004 Mark Adler */
8
9 /* Version history:
10 1.0 24 Nov 2004 First version
11 1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
12 Use fixed-size, stack-allocated raw buffers
13 Simplify code moving compression to subroutines
14 Use assert() for internal errors
15 Add detailed description of approach
16 */
17
18 /* Approach to just fitting a requested compressed size:
19
20 fitblk performs three compression passes on a portion of the input
21 data in order to determine how much of that input will compress to
22 nearly the requested output block size. The first pass generates
23 enough deflate blocks to produce output to fill the requested
24 output size plus a specified excess amount (see the EXCESS define
25 below). The last deflate block may go quite a bit past that, but
26 is discarded. The second pass decompresses and recompresses just
27 the compressed data that fit in the requested plus excess sized
28 buffer. The deflate process is terminated after that amount of
29 input, which is less than the amount consumed on the first pass.
30 The last deflate block of the result will be of a comparable size
31 to the final product, so that the header for that deflate block and
32 the compression ratio for that block will be about the same as in
33 the final product. The third compression pass decompresses the
34 result of the second step, but only the compressed data up to the
35 requested size minus an amount to allow the compressed stream to
36 complete (see the MARGIN define below). That will result in a
37 final compressed stream whose length is less than or equal to the
38 requested size. Assuming sufficient input and a requested size
39 greater than a few hundred bytes, the shortfall will typically be
40 less than ten bytes.
41
42 If the input is short enough that the first compression completes
43 before filling the requested output size, then that compressed
44 stream is return with no recompression.
45
46 EXCESS is chosen to be just greater than the shortfall seen in a
47 two pass approach similar to the above. That shortfall is due to
48 the last deflate block compressing more efficiently with a smaller
49 header on the second pass. EXCESS is set to be large enough so
50 that there is enough uncompressed data for the second pass to fill
51 out the requested size, and small enough so that the final deflate
52 block of the second pass will be close in size to the final deflate
53 block of the third and final pass. MARGIN is chosen to be just
54 large enough to assure that the final compression has enough room
55 to complete in all cases.
56 */
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <assert.h>
61 #include "zstd_zlibwrapper.h"
62
63 #define LOG_FITBLK(...) /*printf(__VA_ARGS__)*/
64 #define local static
65
66 /* print nastygram and leave */
quit(char * why)67 local void quit(char *why)
68 {
69 fprintf(stderr, "fitblk abort: %s\n", why);
70 exit(1);
71 }
72
73 #define RAWLEN 4096 /* intermediate uncompressed buffer size */
74
75 /* compress from file to def until provided buffer is full or end of
76 input reached; return last deflate() return value, or Z_ERRNO if
77 there was read error on the file */
partcompress(FILE * in,z_streamp def)78 local int partcompress(FILE *in, z_streamp def)
79 {
80 int ret, flush;
81 unsigned char raw[RAWLEN];
82
83 flush = Z_SYNC_FLUSH;
84 do {
85 def->avail_in = (uInt)fread(raw, 1, RAWLEN, in);
86 if (ferror(in))
87 return Z_ERRNO;
88 def->next_in = raw;
89 if (feof(in))
90 flush = Z_FINISH;
91 LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
92 ret = deflate(def, flush);
93 LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
94 assert(ret != Z_STREAM_ERROR);
95 } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH);
96 return ret;
97 }
98
99 /* recompress from inf's input to def's output; the input for inf and
100 the output for def are set in those structures before calling;
101 return last deflate() return value, or Z_MEM_ERROR if inflate()
102 was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)103 local int recompress(z_streamp inf, z_streamp def)
104 {
105 int ret, flush;
106 unsigned char raw[RAWLEN];
107
108 flush = Z_NO_FLUSH;
109 LOG_FITBLK("recompress start\n");
110 do {
111 /* decompress */
112 inf->avail_out = RAWLEN;
113 inf->next_out = raw;
114 LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
115 ret = inflate(inf, Z_NO_FLUSH);
116 LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
117 assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
118 ret != Z_NEED_DICT);
119 if (ret == Z_MEM_ERROR)
120 return ret;
121
122 /* compress what was decompressed until done or no room */
123 def->avail_in = RAWLEN - inf->avail_out;
124 def->next_in = raw;
125 if (inf->avail_out != 0)
126 flush = Z_FINISH;
127 LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
128 ret = deflate(def, flush);
129 LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
130 assert(ret != Z_STREAM_ERROR);
131 } while (ret != Z_STREAM_END && def->avail_out != 0);
132 return ret;
133 }
134
135 #define EXCESS 256 /* empirically determined stream overage */
136 #define MARGIN 8 /* amount to back off for completion */
137
138 /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)139 int main(int argc, char **argv)
140 {
141 int ret; /* return code */
142 unsigned size; /* requested fixed output block size */
143 unsigned have; /* bytes written by deflate() call */
144 unsigned char *blk; /* intermediate and final stream */
145 unsigned char *tmp; /* close to desired size stream */
146 z_stream def, inf; /* zlib deflate and inflate states */
147
148 /* get requested output size */
149 if (argc != 2)
150 quit("need one argument: size of output block");
151 ret = (int)strtol(argv[1], argv + 1, 10);
152 if (argv[1][0] != 0)
153 quit("argument must be a number");
154 if (ret < 8) /* 8 is minimum zlib stream size */
155 quit("need positive size of 8 or greater");
156 size = (unsigned)ret;
157
158 printf("zlib version %s\n", ZLIB_VERSION);
159 if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion());
160
161 /* allocate memory for buffers and compression engine */
162 blk = (unsigned char*)malloc(size + EXCESS);
163 def.zalloc = Z_NULL;
164 def.zfree = Z_NULL;
165 def.opaque = Z_NULL;
166 ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
167 if (ret != Z_OK || blk == NULL)
168 quit("out of memory");
169
170 /* compress from stdin until output full, or no more input */
171 def.avail_out = size + EXCESS;
172 def.next_out = blk;
173 LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
174 ret = partcompress(stdin, &def);
175 printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
176 if (ret == Z_ERRNO)
177 quit("error reading input");
178
179 /* if it all fit, then size was undersubscribed -- done! */
180 if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
181 /* write block to stdout */
182 have = size + EXCESS - def.avail_out;
183 /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
184 * quit("error writing output"); */
185
186 /* clean up and print results to stderr */
187 ret = deflateEnd(&def);
188 assert(ret != Z_STREAM_ERROR);
189 free(blk);
190 fprintf(stderr,
191 "%u bytes unused out of %u requested (all input)\n",
192 size - have, size);
193 return 0;
194 }
195
196 /* it didn't all fit -- set up for recompression */
197 inf.zalloc = Z_NULL;
198 inf.zfree = Z_NULL;
199 inf.opaque = Z_NULL;
200 inf.avail_in = 0;
201 inf.next_in = Z_NULL;
202 ret = inflateInit(&inf);
203 tmp = (unsigned char*)malloc(size + EXCESS);
204 if (ret != Z_OK || tmp == NULL)
205 quit("out of memory");
206 ret = deflateReset(&def);
207 assert(ret != Z_STREAM_ERROR);
208
209 /* do first recompression close to the right amount */
210 inf.avail_in = size + EXCESS;
211 inf.next_in = blk;
212 def.avail_out = size + EXCESS;
213 def.next_out = tmp;
214 LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
215 ret = recompress(&inf, &def);
216 LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
217 if (ret == Z_MEM_ERROR)
218 quit("out of memory");
219
220 /* set up for next recompression */
221 ret = inflateReset(&inf);
222 assert(ret != Z_STREAM_ERROR);
223 ret = deflateReset(&def);
224 assert(ret != Z_STREAM_ERROR);
225
226 /* do second and final recompression (third compression) */
227 inf.avail_in = size - MARGIN; /* assure stream will complete */
228 inf.next_in = tmp;
229 def.avail_out = size;
230 def.next_out = blk;
231 LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
232 ret = recompress(&inf, &def);
233 LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
234 if (ret == Z_MEM_ERROR)
235 quit("out of memory");
236 assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
237
238 /* done -- write block to stdout */
239 have = size - def.avail_out;
240 /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
241 * quit("error writing output"); */
242
243 /* clean up and print results to stderr */
244 free(tmp);
245 ret = inflateEnd(&inf);
246 assert(ret != Z_STREAM_ERROR);
247 ret = deflateEnd(&def);
248 assert(ret != Z_STREAM_ERROR);
249 free(blk);
250 fprintf(stderr,
251 "%u bytes unused out of %u requested (%lu input)\n",
252 size - have, size, def.total_in);
253 return 0;
254 }
255