1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/types.h>
30 #include <sys/endian.h>
31 #include <sys/param.h>
32 #include <sys/sysctl.h>
33 #include <sys/stat.h>
34 #include <sys/uio.h>
35 #include <netinet/in.h>
36 #include <assert.h>
37 #include <ctype.h>
38 #include <err.h>
39 #include <fcntl.h>
40 #include <pthread.h>
41 #include <signal.h>
42 #include <stdint.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47
48 #include "mkuzip.h"
49 #include "mkuz_cloop.h"
50 #include "mkuz_blockcache.h"
51 #include "mkuz_lzma.h"
52 #include "mkuz_zlib.h"
53 #include "mkuz_zstd.h"
54 #include "mkuz_blk.h"
55 #include "mkuz_cfg.h"
56 #include "mkuz_conveyor.h"
57 #include "mkuz_format.h"
58 #include "mkuz_fqueue.h"
59 #include "mkuz_time.h"
60 #include "mkuz_insize.h"
61
62 #define DEFAULT_CLSTSIZE 16384
63
64 enum UZ_ALGORITHM {
65 UZ_ZLIB = 0,
66 UZ_LZMA,
67 UZ_ZSTD,
68 UZ_INVALID
69 };
70
71 static const struct mkuz_format uzip_fmts[] = {
72 [UZ_ZLIB] = {
73 .option = "zlib",
74 .magic = CLOOP_MAGIC_ZLIB,
75 .default_sufx = DEFAULT_SUFX_ZLIB,
76 .f_compress_bound = mkuz_zlib_cbound,
77 .f_init = mkuz_zlib_init,
78 .f_compress = mkuz_zlib_compress,
79 },
80 [UZ_LZMA] = {
81 .option = "lzma",
82 .magic = CLOOP_MAGIC_LZMA,
83 .default_sufx = DEFAULT_SUFX_LZMA,
84 .f_compress_bound = mkuz_lzma_cbound,
85 .f_init = mkuz_lzma_init,
86 .f_compress = mkuz_lzma_compress,
87 },
88 [UZ_ZSTD] = {
89 .option = "zstd",
90 .magic = CLOOP_MAGIC_ZSTD,
91 .default_sufx = DEFAULT_SUFX_ZSTD,
92 .f_compress_bound = mkuz_zstd_cbound,
93 .f_init = mkuz_zstd_init,
94 .f_compress = mkuz_zstd_compress,
95 },
96 };
97
98 static struct mkuz_blk *readblock(int, u_int32_t);
99 static void usage(void) __dead2;
100 static void cleanup(void);
101
102 static char *cleanfile = NULL;
103
104 static int
cmp_blkno(const struct mkuz_blk * bp,void * p)105 cmp_blkno(const struct mkuz_blk *bp, void *p)
106 {
107 uint32_t *ap;
108
109 ap = (uint32_t *)p;
110
111 return (bp->info.blkno == *ap);
112 }
113
main(int argc,char ** argv)114 int main(int argc, char **argv)
115 {
116 struct mkuz_cfg cfs;
117 char *oname;
118 uint64_t *toc;
119 int i, io, opt, tmp;
120 struct {
121 int en;
122 FILE *f;
123 } summary;
124 struct iovec iov[2];
125 uint64_t offset, last_offset;
126 struct cloop_header hdr;
127 struct mkuz_conveyor *cvp;
128 struct mkuz_blk_info *chit;
129 size_t ncpusz, ncpu, magiclen;
130 double st, et;
131 enum UZ_ALGORITHM comp_alg;
132 int comp_level;
133
134 st = getdtime();
135
136 ncpusz = sizeof(size_t);
137 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
138 ncpu = 1;
139 } else if (ncpu > MAX_WORKERS_AUTO) {
140 ncpu = MAX_WORKERS_AUTO;
141 }
142
143 memset(&hdr, 0, sizeof(hdr));
144 cfs.blksz = DEFAULT_CLSTSIZE;
145 oname = NULL;
146 cfs.verbose = 0;
147 cfs.no_zcomp = 0;
148 cfs.en_dedup = 0;
149 summary.en = 0;
150 summary.f = stderr;
151 comp_alg = UZ_ZLIB;
152 comp_level = USE_DEFAULT_LEVEL;
153 cfs.nworkers = ncpu;
154 struct mkuz_blk *iblk, *oblk;
155
156 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
157 switch(opt) {
158 case 'A':
159 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
160 if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
161 break;
162 }
163 if (tmp == UZ_INVALID)
164 errx(1, "invalid algorithm specified: %s",
165 optarg);
166 /* Not reached */
167 comp_alg = tmp;
168 break;
169 case 'C':
170 comp_level = atoi(optarg);
171 break;
172 case 'o':
173 oname = optarg;
174 break;
175
176 case 's':
177 tmp = atoi(optarg);
178 if (tmp <= 0) {
179 errx(1, "invalid cluster size specified: %s",
180 optarg);
181 /* Not reached */
182 }
183 cfs.blksz = tmp;
184 break;
185
186 case 'v':
187 cfs.verbose = 1;
188 break;
189
190 case 'Z':
191 cfs.no_zcomp = 1;
192 break;
193
194 case 'd':
195 cfs.en_dedup = 1;
196 break;
197
198 case 'L':
199 comp_alg = UZ_LZMA;
200 break;
201
202 case 'S':
203 summary.en = 1;
204 summary.f = stdout;
205 break;
206
207 case 'j':
208 tmp = atoi(optarg);
209 if (tmp <= 0) {
210 errx(1, "invalid number of compression threads"
211 " specified: %s", optarg);
212 /* Not reached */
213 }
214 cfs.nworkers = tmp;
215 break;
216
217 default:
218 usage();
219 /* Not reached */
220 }
221 }
222 argc -= optind;
223 argv += optind;
224
225 if (argc != 1) {
226 usage();
227 /* Not reached */
228 }
229
230 cfs.handler = &uzip_fmts[comp_alg];
231
232 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
233 assert(magiclen < sizeof(hdr.magic));
234
235 if (cfs.en_dedup != 0) {
236 /*
237 * Dedupe requires a version 3 format. Don't downgrade newer
238 * formats.
239 */
240 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
241 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
242 hdr.magic[CLOOP_OFS_COMPR] =
243 tolower(hdr.magic[CLOOP_OFS_COMPR]);
244 }
245
246 if (cfs.blksz % DEV_BSIZE != 0)
247 errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
248
249 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
250 if (cfs.cbound_blksz > MAXPHYS)
251 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
252 cfs.cbound_blksz, (size_t)MAXPHYS);
253
254 cfs.handler->f_init(&comp_level);
255 cfs.comp_level = comp_level;
256
257 cfs.iname = argv[0];
258 if (oname == NULL) {
259 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
260 if (oname == NULL) {
261 err(1, "can't allocate memory");
262 /* Not reached */
263 }
264 }
265
266 signal(SIGHUP, exit);
267 signal(SIGINT, exit);
268 signal(SIGTERM, exit);
269 signal(SIGXCPU, exit);
270 signal(SIGXFSZ, exit);
271 atexit(cleanup);
272
273 cfs.fdr = open(cfs.iname, O_RDONLY);
274 if (cfs.fdr < 0) {
275 err(1, "open(%s)", cfs.iname);
276 /* Not reached */
277 }
278 cfs.isize = mkuz_get_insize(&cfs);
279 if (cfs.isize < 0) {
280 errx(1, "can't determine input image size");
281 /* Not reached */
282 }
283 hdr.nblocks = cfs.isize / cfs.blksz;
284 if ((cfs.isize % cfs.blksz) != 0) {
285 if (cfs.verbose != 0)
286 fprintf(stderr, "file size is not multiple "
287 "of %d, padding data\n", cfs.blksz);
288 hdr.nblocks++;
289 }
290 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
291
292 /*
293 * Initialize last+1 entry with non-heap trash. If final padding is
294 * added later, it may or may not be overwritten with an offset
295 * representing the length of the final compressed block. If not,
296 * initialize to a defined value.
297 */
298 toc[hdr.nblocks] = 0;
299
300 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
301 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
302 if (cfs.fdw < 0) {
303 err(1, "open(%s)", oname);
304 /* Not reached */
305 }
306 cleanfile = oname;
307
308 /* Prepare header that we will write later when we have index ready. */
309 iov[0].iov_base = (char *)&hdr;
310 iov[0].iov_len = sizeof(hdr);
311 iov[1].iov_base = (char *)toc;
312 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
313 offset = iov[0].iov_len + iov[1].iov_len;
314
315 /* Reserve space for header */
316 lseek(cfs.fdw, offset, SEEK_SET);
317
318 if (cfs.verbose != 0) {
319 fprintf(stderr, "data size %ju bytes, number of clusters "
320 "%u, index length %zu bytes\n", cfs.isize,
321 hdr.nblocks, iov[1].iov_len);
322 }
323
324 cvp = mkuz_conveyor_ctor(&cfs);
325
326 last_offset = 0;
327 iblk = oblk = NULL;
328 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
329 iblk = readblock(cfs.fdr, cfs.blksz);
330 mkuz_fqueue_enq(cvp->wrk_queue, iblk);
331 if (iblk != MKUZ_BLK_EOF &&
332 (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
333 continue;
334 }
335 drain:
336 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
337 assert(oblk->info.blkno == (unsigned)io);
338 oblk->info.offset = offset;
339 chit = NULL;
340 if (cfs.en_dedup != 0 && oblk->info.len > 0) {
341 chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
342 /*
343 * There should be at least one non-empty block
344 * between us and the backref'ed offset, otherwise
345 * we won't be able to parse that sequence correctly
346 * as it would be indistinguishible from another
347 * empty block.
348 */
349 if (chit != NULL && chit->offset == last_offset) {
350 chit = NULL;
351 }
352 }
353 if (chit != NULL) {
354 toc[io] = htobe64(chit->offset);
355 oblk->info.len = 0;
356 } else {
357 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
358 oblk->info.len) < 0) {
359 err(1, "write(%s)", oname);
360 /* Not reached */
361 }
362 toc[io] = htobe64(offset);
363 last_offset = offset;
364 offset += oblk->info.len;
365 }
366 if (cfs.verbose != 0) {
367 fprintf(stderr, "cluster #%d, in %u bytes, "
368 "out len=%lu offset=%lu", io, cfs.blksz,
369 (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
370 if (chit != NULL) {
371 fprintf(stderr, " (backref'ed to #%d)",
372 chit->blkno);
373 }
374 fprintf(stderr, "\n");
375 }
376 free(oblk);
377 io += 1;
378 if (iblk == MKUZ_BLK_EOF) {
379 if (io < i)
380 goto drain;
381 /* Last block, see if we need to add some padding */
382 if ((offset % DEV_BSIZE) == 0)
383 continue;
384 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
385 oblk->info.blkno = io;
386 oblk->info.len = oblk->alen;
387 if (cfs.verbose != 0) {
388 fprintf(stderr, "padding data with %lu bytes "
389 "so that file size is multiple of %d\n",
390 (u_long)oblk->alen, DEV_BSIZE);
391 }
392 mkuz_fqueue_enq(cvp->results, oblk);
393 goto drain;
394 }
395 }
396
397 close(cfs.fdr);
398
399 if (cfs.verbose != 0 || summary.en != 0) {
400 et = getdtime();
401 fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
402 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
403 (long long)(cfs.isize - offset),
404 100.0 * (long long)(cfs.isize - offset) /
405 (float)cfs.isize, (float)cfs.isize / (et - st));
406 }
407
408 /* Convert to big endian */
409 hdr.blksz = htonl(cfs.blksz);
410 hdr.nblocks = htonl(hdr.nblocks);
411 /* Write headers into pre-allocated space */
412 lseek(cfs.fdw, 0, SEEK_SET);
413 if (writev(cfs.fdw, iov, 2) < 0) {
414 err(1, "writev(%s)", oname);
415 /* Not reached */
416 }
417 cleanfile = NULL;
418 close(cfs.fdw);
419
420 exit(0);
421 }
422
423 static struct mkuz_blk *
readblock(int fd,u_int32_t clstsize)424 readblock(int fd, u_int32_t clstsize)
425 {
426 int numread;
427 struct mkuz_blk *rval;
428 static int blockcnt;
429 off_t cpos;
430
431 rval = mkuz_blk_ctor(clstsize);
432
433 rval->info.blkno = blockcnt;
434 blockcnt += 1;
435 cpos = lseek(fd, 0, SEEK_CUR);
436 if (cpos < 0) {
437 err(1, "readblock: lseek() failed");
438 /* Not reached */
439 }
440 rval->info.offset = cpos;
441
442 numread = read(fd, rval->data, clstsize);
443 if (numread < 0) {
444 err(1, "readblock: read() failed");
445 /* Not reached */
446 }
447 if (numread == 0) {
448 free(rval);
449 return MKUZ_BLK_EOF;
450 }
451 rval->info.len = numread;
452 return rval;
453 }
454
455 static void
usage(void)456 usage(void)
457 {
458
459 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
460 "[-j ncompr] infile\n");
461 exit(1);
462 }
463
464 void *
mkuz_safe_malloc(size_t size)465 mkuz_safe_malloc(size_t size)
466 {
467 void *retval;
468
469 retval = malloc(size);
470 if (retval == NULL) {
471 err(1, "can't allocate memory");
472 /* Not reached */
473 }
474 return retval;
475 }
476
477 void *
mkuz_safe_zmalloc(size_t size)478 mkuz_safe_zmalloc(size_t size)
479 {
480 void *retval;
481
482 retval = mkuz_safe_malloc(size);
483 bzero(retval, size);
484 return retval;
485 }
486
487 static void
cleanup(void)488 cleanup(void)
489 {
490
491 if (cleanfile != NULL)
492 unlink(cleanfile);
493 }
494
495 int
mkuz_memvcmp(const void * memory,unsigned char val,size_t size)496 mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
497 {
498 const u_char *mm;
499
500 mm = (const u_char *)memory;
501 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
502 }
503