xref: /freebsd/sys/contrib/zstd/zlibWrapper/examples/fitblk.c (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
1 /* fitblk.c contains minimal changes required to be compiled with zlibWrapper:
2  * - #include "zlib.h" was changed to #include "zstd_zlibwrapper.h"
3  * - writing block to stdout was disabled                          */
4 
5 /* fitblk.c: example of fitting compressed output to a specified size
6    Not copyrighted -- provided to the public domain
7    Version 1.1  25 November 2004  Mark Adler */
8 
9 /* Version history:
10    1.0  24 Nov 2004  First version
11    1.1  25 Nov 2004  Change deflateInit2() to deflateInit()
12                      Use fixed-size, stack-allocated raw buffers
13                      Simplify code moving compression to subroutines
14                      Use assert() for internal errors
15                      Add detailed description of approach
16  */
17 
18 /* Approach to just fitting a requested compressed size:
19 
20    fitblk performs three compression passes on a portion of the input
21    data in order to determine how much of that input will compress to
22    nearly the requested output block size.  The first pass generates
23    enough deflate blocks to produce output to fill the requested
24    output size plus a specified excess amount (see the EXCESS define
25    below).  The last deflate block may go quite a bit past that, but
26    is discarded.  The second pass decompresses and recompresses just
27    the compressed data that fit in the requested plus excess sized
28    buffer.  The deflate process is terminated after that amount of
29    input, which is less than the amount consumed on the first pass.
30    The last deflate block of the result will be of a comparable size
31    to the final product, so that the header for that deflate block and
32    the compression ratio for that block will be about the same as in
33    the final product.  The third compression pass decompresses the
34    result of the second step, but only the compressed data up to the
35    requested size minus an amount to allow the compressed stream to
36    complete (see the MARGIN define below).  That will result in a
37    final compressed stream whose length is less than or equal to the
38    requested size.  Assuming sufficient input and a requested size
39    greater than a few hundred bytes, the shortfall will typically be
40    less than ten bytes.
41 
42    If the input is short enough that the first compression completes
43    before filling the requested output size, then that compressed
44    stream is return with no recompression.
45 
46    EXCESS is chosen to be just greater than the shortfall seen in a
47    two pass approach similar to the above.  That shortfall is due to
48    the last deflate block compressing more efficiently with a smaller
49    header on the second pass.  EXCESS is set to be large enough so
50    that there is enough uncompressed data for the second pass to fill
51    out the requested size, and small enough so that the final deflate
52    block of the second pass will be close in size to the final deflate
53    block of the third and final pass.  MARGIN is chosen to be just
54    large enough to assure that the final compression has enough room
55    to complete in all cases.
56  */
57 
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <assert.h>
61 #include "zstd_zlibwrapper.h"
62 
63 #define LOG_FITBLK(...)   /*printf(__VA_ARGS__)*/
64 #define local static
65 
66 /* print nastygram and leave */
quit(char * why)67 local void quit(char *why)
68 {
69     fprintf(stderr, "fitblk abort: %s\n", why);
70     exit(1);
71 }
72 
73 #define RAWLEN 4096    /* intermediate uncompressed buffer size */
74 
75 /* compress from file to def until provided buffer is full or end of
76    input reached; return last deflate() return value, or Z_ERRNO if
77    there was read error on the file */
partcompress(FILE * in,z_streamp def)78 local int partcompress(FILE *in, z_streamp def)
79 {
80     int ret, flush;
81     unsigned char raw[RAWLEN];
82 
83     flush = Z_SYNC_FLUSH;
84     do {
85         def->avail_in = (uInt)fread(raw, 1, RAWLEN, in);
86         if (ferror(in))
87             return Z_ERRNO;
88         def->next_in = raw;
89         if (feof(in))
90             flush = Z_FINISH;
91         LOG_FITBLK("partcompress1 avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
92         ret = deflate(def, flush);
93         LOG_FITBLK("partcompress2 ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
94         assert(ret != Z_STREAM_ERROR);
95     } while (def->avail_out != 0 && flush == Z_SYNC_FLUSH);
96     return ret;
97 }
98 
99 /* recompress from inf's input to def's output; the input for inf and
100    the output for def are set in those structures before calling;
101    return last deflate() return value, or Z_MEM_ERROR if inflate()
102    was not able to allocate enough memory when it needed to */
recompress(z_streamp inf,z_streamp def)103 local int recompress(z_streamp inf, z_streamp def)
104 {
105     int ret, flush;
106     unsigned char raw[RAWLEN];
107 
108     flush = Z_NO_FLUSH;
109     LOG_FITBLK("recompress start\n");
110     do {
111         /* decompress */
112         inf->avail_out = RAWLEN;
113         inf->next_out = raw;
114         LOG_FITBLK("recompress1inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
115         ret = inflate(inf, Z_NO_FLUSH);
116         LOG_FITBLK("recompress2inflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)inf->avail_in, (int)inf->total_in, (int)inf->avail_out, (int)inf->total_out);
117         assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
118                ret != Z_NEED_DICT);
119         if (ret == Z_MEM_ERROR)
120             return ret;
121 
122         /* compress what was decompressed until done or no room */
123         def->avail_in = RAWLEN - inf->avail_out;
124         def->next_in = raw;
125         if (inf->avail_out != 0)
126             flush = Z_FINISH;
127         LOG_FITBLK("recompress1deflate avail_in=%d total_in=%d avail_out=%d total_out=%d\n", (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
128         ret = deflate(def, flush);
129         LOG_FITBLK("recompress2deflate ret=%d avail_in=%d total_in=%d avail_out=%d total_out=%d\n", ret, (int)def->avail_in, (int)def->total_in, (int)def->avail_out, (int)def->total_out);
130         assert(ret != Z_STREAM_ERROR);
131     } while (ret != Z_STREAM_END && def->avail_out != 0);
132     return ret;
133 }
134 
135 #define EXCESS 256      /* empirically determined stream overage */
136 #define MARGIN 8        /* amount to back off for completion */
137 
138 /* compress from stdin to fixed-size block on stdout */
main(int argc,char ** argv)139 int main(int argc, char **argv)
140 {
141     int ret;                /* return code */
142     unsigned size;          /* requested fixed output block size */
143     unsigned have;          /* bytes written by deflate() call */
144     unsigned char *blk;     /* intermediate and final stream */
145     unsigned char *tmp;     /* close to desired size stream */
146     z_stream def, inf;      /* zlib deflate and inflate states */
147 
148     /* get requested output size */
149     if (argc != 2)
150         quit("need one argument: size of output block");
151     ret = (int)strtol(argv[1], argv + 1, 10);
152     if (argv[1][0] != 0)
153         quit("argument must be a number");
154     if (ret < 8)            /* 8 is minimum zlib stream size */
155         quit("need positive size of 8 or greater");
156     size = (unsigned)ret;
157 
158     printf("zlib version %s\n", ZLIB_VERSION);
159     if (ZWRAP_isUsingZSTDcompression()) printf("zstd version %s\n", zstdVersion());
160 
161     /* allocate memory for buffers and compression engine */
162     blk = (unsigned char*)malloc(size + EXCESS);
163     def.zalloc = Z_NULL;
164     def.zfree = Z_NULL;
165     def.opaque = Z_NULL;
166     ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
167     if (ret != Z_OK || blk == NULL)
168         quit("out of memory");
169 
170     /* compress from stdin until output full, or no more input */
171     def.avail_out = size + EXCESS;
172     def.next_out = blk;
173     LOG_FITBLK("partcompress1 total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
174     ret = partcompress(stdin, &def);
175     printf("partcompress total_in=%d total_out=%d\n", (int)def.total_in, (int)def.total_out);
176     if (ret == Z_ERRNO)
177         quit("error reading input");
178 
179     /* if it all fit, then size was undersubscribed -- done! */
180     if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
181         /* write block to stdout */
182         have = size + EXCESS - def.avail_out;
183    /*     if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
184     *         quit("error writing output"); */
185 
186         /* clean up and print results to stderr */
187         ret = deflateEnd(&def);
188         assert(ret != Z_STREAM_ERROR);
189         free(blk);
190         fprintf(stderr,
191                 "%u bytes unused out of %u requested (all input)\n",
192                 size - have, size);
193         return 0;
194     }
195 
196     /* it didn't all fit -- set up for recompression */
197     inf.zalloc = Z_NULL;
198     inf.zfree = Z_NULL;
199     inf.opaque = Z_NULL;
200     inf.avail_in = 0;
201     inf.next_in = Z_NULL;
202     ret = inflateInit(&inf);
203     tmp = (unsigned char*)malloc(size + EXCESS);
204     if (ret != Z_OK || tmp == NULL)
205         quit("out of memory");
206     ret = deflateReset(&def);
207     assert(ret != Z_STREAM_ERROR);
208 
209     /* do first recompression close to the right amount */
210     inf.avail_in = size + EXCESS;
211     inf.next_in = blk;
212     def.avail_out = size + EXCESS;
213     def.next_out = tmp;
214     LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
215     ret = recompress(&inf, &def);
216     LOG_FITBLK("recompress1 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
217     if (ret == Z_MEM_ERROR)
218         quit("out of memory");
219 
220     /* set up for next recompression */
221     ret = inflateReset(&inf);
222     assert(ret != Z_STREAM_ERROR);
223     ret = deflateReset(&def);
224     assert(ret != Z_STREAM_ERROR);
225 
226     /* do second and final recompression (third compression) */
227     inf.avail_in = size - MARGIN;   /* assure stream will complete */
228     inf.next_in = tmp;
229     def.avail_out = size;
230     def.next_out = blk;
231     LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
232     ret = recompress(&inf, &def);
233     LOG_FITBLK("recompress2 inf.total_in=%d def.total_out=%d\n", (int)inf.total_in, (int)def.total_out);
234     if (ret == Z_MEM_ERROR)
235         quit("out of memory");
236     assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */
237 
238     /* done -- write block to stdout */
239     have = size - def.avail_out;
240     /* if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
241      *     quit("error writing output"); */
242 
243     /* clean up and print results to stderr */
244     free(tmp);
245     ret = inflateEnd(&inf);
246     assert(ret != Z_STREAM_ERROR);
247     ret = deflateEnd(&def);
248     assert(ret != Z_STREAM_ERROR);
249     free(blk);
250     fprintf(stderr,
251             "%u bytes unused out of %u requested (%lu input)\n",
252             size - have, size, def.total_in);
253     return 0;
254 }
255