xref: /freebsd/usr.bin/mkuzip/mkuzip.c (revision 4d846d260e2b9a3d4d0a701462568268cbfe7a5b)
11de7b4b8SPedro F. Giffuni /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
31de7b4b8SPedro F. Giffuni  *
48f8cb840SMaxim Sobolev  * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
58f8cb840SMaxim Sobolev  * All rights reserved.
67f4caa8cSMaxim Sobolev  *
78f8cb840SMaxim Sobolev  * Redistribution and use in source and binary forms, with or without
88f8cb840SMaxim Sobolev  * modification, are permitted provided that the following conditions
98f8cb840SMaxim Sobolev  * are met:
108f8cb840SMaxim Sobolev  * 1. Redistributions of source code must retain the above copyright
118f8cb840SMaxim Sobolev  *    notice, this list of conditions and the following disclaimer.
128f8cb840SMaxim Sobolev  * 2. Redistributions in binary form must reproduce the above copyright
138f8cb840SMaxim Sobolev  *    notice, this list of conditions and the following disclaimer in the
148f8cb840SMaxim Sobolev  *    documentation and/or other materials provided with the distribution.
158f8cb840SMaxim Sobolev  *
168f8cb840SMaxim Sobolev  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
178f8cb840SMaxim Sobolev  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
188f8cb840SMaxim Sobolev  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
198f8cb840SMaxim Sobolev  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
208f8cb840SMaxim Sobolev  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
218f8cb840SMaxim Sobolev  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
228f8cb840SMaxim Sobolev  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
238f8cb840SMaxim Sobolev  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
248f8cb840SMaxim Sobolev  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
258f8cb840SMaxim Sobolev  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
268f8cb840SMaxim Sobolev  * SUCH DAMAGE.
277f4caa8cSMaxim Sobolev  */
287f4caa8cSMaxim Sobolev 
298f8cb840SMaxim Sobolev #include <sys/cdefs.h>
308f8cb840SMaxim Sobolev __FBSDID("$FreeBSD$");
318f8cb840SMaxim Sobolev 
327f4caa8cSMaxim Sobolev #include <sys/types.h>
337f4caa8cSMaxim Sobolev #include <sys/endian.h>
347f4caa8cSMaxim Sobolev #include <sys/param.h>
354fc55e3eSMaxim Sobolev #include <sys/sysctl.h>
367f4caa8cSMaxim Sobolev #include <sys/stat.h>
377f4caa8cSMaxim Sobolev #include <sys/uio.h>
387f4caa8cSMaxim Sobolev #include <netinet/in.h>
394fc55e3eSMaxim Sobolev #include <assert.h>
408f8cb840SMaxim Sobolev #include <ctype.h>
417f4caa8cSMaxim Sobolev #include <err.h>
427f4caa8cSMaxim Sobolev #include <fcntl.h>
434fc55e3eSMaxim Sobolev #include <pthread.h>
447f4caa8cSMaxim Sobolev #include <signal.h>
454fc55e3eSMaxim Sobolev #include <stdint.h>
467f4caa8cSMaxim Sobolev #include <stdio.h>
477f4caa8cSMaxim Sobolev #include <stdlib.h>
487f4caa8cSMaxim Sobolev #include <string.h>
497f4caa8cSMaxim Sobolev #include <unistd.h>
507f4caa8cSMaxim Sobolev 
518f8cb840SMaxim Sobolev #include "mkuzip.h"
528f8cb840SMaxim Sobolev #include "mkuz_cloop.h"
538f8cb840SMaxim Sobolev #include "mkuz_blockcache.h"
548f8cb840SMaxim Sobolev #include "mkuz_lzma.h"
55eefd8f96SConrad Meyer #include "mkuz_zlib.h"
56eefd8f96SConrad Meyer #include "mkuz_zstd.h"
574fc55e3eSMaxim Sobolev #include "mkuz_blk.h"
584fc55e3eSMaxim Sobolev #include "mkuz_cfg.h"
594fc55e3eSMaxim Sobolev #include "mkuz_conveyor.h"
604fc55e3eSMaxim Sobolev #include "mkuz_format.h"
614fc55e3eSMaxim Sobolev #include "mkuz_fqueue.h"
624fc55e3eSMaxim Sobolev #include "mkuz_time.h"
63bc3b2c55SMaxim Sobolev #include "mkuz_insize.h"
648f8cb840SMaxim Sobolev 
658f8cb840SMaxim Sobolev #define DEFAULT_CLSTSIZE	16384
668f8cb840SMaxim Sobolev 
67eefd8f96SConrad Meyer enum UZ_ALGORITHM {
68eefd8f96SConrad Meyer 	UZ_ZLIB = 0,
69eefd8f96SConrad Meyer 	UZ_LZMA,
70eefd8f96SConrad Meyer 	UZ_ZSTD,
71eefd8f96SConrad Meyer 	UZ_INVALID
728f8cb840SMaxim Sobolev };
738f8cb840SMaxim Sobolev 
74eefd8f96SConrad Meyer static const struct mkuz_format uzip_fmts[] = {
75eefd8f96SConrad Meyer 	[UZ_ZLIB] = {
76eefd8f96SConrad Meyer 		.option = "zlib",
77eefd8f96SConrad Meyer 		.magic = CLOOP_MAGIC_ZLIB,
78eefd8f96SConrad Meyer 		.default_sufx = DEFAULT_SUFX_ZLIB,
79eefd8f96SConrad Meyer 		.f_compress_bound = mkuz_zlib_cbound,
80eefd8f96SConrad Meyer 		.f_init = mkuz_zlib_init,
81eefd8f96SConrad Meyer 		.f_compress = mkuz_zlib_compress,
82eefd8f96SConrad Meyer 	},
83eefd8f96SConrad Meyer 	[UZ_LZMA] = {
84eefd8f96SConrad Meyer 		.option = "lzma",
858f8cb840SMaxim Sobolev 		.magic = CLOOP_MAGIC_LZMA,
868f8cb840SMaxim Sobolev 		.default_sufx = DEFAULT_SUFX_LZMA,
87eefd8f96SConrad Meyer 		.f_compress_bound = mkuz_lzma_cbound,
88eefd8f96SConrad Meyer 		.f_init = mkuz_lzma_init,
89eefd8f96SConrad Meyer 		.f_compress = mkuz_lzma_compress,
90eefd8f96SConrad Meyer 	},
91eefd8f96SConrad Meyer 	[UZ_ZSTD] = {
92eefd8f96SConrad Meyer 		.option = "zstd",
93eefd8f96SConrad Meyer 		.magic = CLOOP_MAGIC_ZSTD,
94eefd8f96SConrad Meyer 		.default_sufx = DEFAULT_SUFX_ZSTD,
95eefd8f96SConrad Meyer 		.f_compress_bound = mkuz_zstd_cbound,
96eefd8f96SConrad Meyer 		.f_init = mkuz_zstd_init,
97eefd8f96SConrad Meyer 		.f_compress = mkuz_zstd_compress,
98eefd8f96SConrad Meyer 	},
998f8cb840SMaxim Sobolev };
1007f4caa8cSMaxim Sobolev 
1014fc55e3eSMaxim Sobolev static struct mkuz_blk *readblock(int, u_int32_t);
1027f4caa8cSMaxim Sobolev static void usage(void);
1037f4caa8cSMaxim Sobolev static void cleanup(void);
1047f4caa8cSMaxim Sobolev 
1057f4caa8cSMaxim Sobolev static char *cleanfile = NULL;
1067f4caa8cSMaxim Sobolev 
1074fc55e3eSMaxim Sobolev static int
1084fc55e3eSMaxim Sobolev cmp_blkno(const struct mkuz_blk *bp, void *p)
1094fc55e3eSMaxim Sobolev {
1104fc55e3eSMaxim Sobolev 	uint32_t *ap;
1114fc55e3eSMaxim Sobolev 
1124fc55e3eSMaxim Sobolev 	ap = (uint32_t *)p;
1134fc55e3eSMaxim Sobolev 
1144fc55e3eSMaxim Sobolev 	return (bp->info.blkno == *ap);
1154fc55e3eSMaxim Sobolev }
1164fc55e3eSMaxim Sobolev 
1177f4caa8cSMaxim Sobolev int main(int argc, char **argv)
1187f4caa8cSMaxim Sobolev {
1194fc55e3eSMaxim Sobolev 	struct mkuz_cfg cfs;
120bc3b2c55SMaxim Sobolev 	char *oname;
1217f4caa8cSMaxim Sobolev 	uint64_t *toc;
1224fc55e3eSMaxim Sobolev 	int i, io, opt, tmp;
12362ee4b69SMaxim Sobolev 	struct {
12462ee4b69SMaxim Sobolev 		int en;
12562ee4b69SMaxim Sobolev 		FILE *f;
12662ee4b69SMaxim Sobolev 	} summary;
1277f4caa8cSMaxim Sobolev 	struct iovec iov[2];
1288f8cb840SMaxim Sobolev 	uint64_t offset, last_offset;
1298f8cb840SMaxim Sobolev 	struct cloop_header hdr;
1304fc55e3eSMaxim Sobolev 	struct mkuz_conveyor *cvp;
1314fc55e3eSMaxim Sobolev         void *c_ctx;
1324fc55e3eSMaxim Sobolev 	struct mkuz_blk_info *chit;
1330ce59aa8SAlan Somers 	size_t ncpusz, ncpu, magiclen;
1344fc55e3eSMaxim Sobolev 	double st, et;
135eefd8f96SConrad Meyer 	enum UZ_ALGORITHM comp_alg;
136eefd8f96SConrad Meyer 	int comp_level;
1374fc55e3eSMaxim Sobolev 
1384fc55e3eSMaxim Sobolev 	st = getdtime();
1394fc55e3eSMaxim Sobolev 
1404fc55e3eSMaxim Sobolev 	ncpusz = sizeof(size_t);
1414fc55e3eSMaxim Sobolev 	if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) {
1424fc55e3eSMaxim Sobolev 		ncpu = 1;
1434fc55e3eSMaxim Sobolev 	} else if (ncpu > MAX_WORKERS_AUTO) {
1444fc55e3eSMaxim Sobolev 		ncpu = MAX_WORKERS_AUTO;
1454fc55e3eSMaxim Sobolev 	}
1467f4caa8cSMaxim Sobolev 
1477f4caa8cSMaxim Sobolev 	memset(&hdr, 0, sizeof(hdr));
1484fc55e3eSMaxim Sobolev 	cfs.blksz = DEFAULT_CLSTSIZE;
1497f4caa8cSMaxim Sobolev 	oname = NULL;
1504fc55e3eSMaxim Sobolev 	cfs.verbose = 0;
1514fc55e3eSMaxim Sobolev 	cfs.no_zcomp = 0;
1524fc55e3eSMaxim Sobolev 	cfs.en_dedup = 0;
15362ee4b69SMaxim Sobolev 	summary.en = 0;
15462ee4b69SMaxim Sobolev 	summary.f = stderr;
155eefd8f96SConrad Meyer 	comp_alg = UZ_ZLIB;
156eefd8f96SConrad Meyer 	comp_level = USE_DEFAULT_LEVEL;
1574fc55e3eSMaxim Sobolev 	cfs.nworkers = ncpu;
1584fc55e3eSMaxim Sobolev 	struct mkuz_blk *iblk, *oblk;
1597f4caa8cSMaxim Sobolev 
160eefd8f96SConrad Meyer 	while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) {
1617f4caa8cSMaxim Sobolev 		switch(opt) {
162eefd8f96SConrad Meyer 		case 'A':
163eefd8f96SConrad Meyer 			for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) {
164eefd8f96SConrad Meyer 				if (strcmp(uzip_fmts[tmp].option, optarg) == 0)
165eefd8f96SConrad Meyer 					break;
166eefd8f96SConrad Meyer 			}
167eefd8f96SConrad Meyer 			if (tmp == UZ_INVALID)
168eefd8f96SConrad Meyer 				errx(1, "invalid algorithm specified: %s",
169eefd8f96SConrad Meyer 				    optarg);
170eefd8f96SConrad Meyer 				/* Not reached */
171eefd8f96SConrad Meyer 			comp_alg = tmp;
172eefd8f96SConrad Meyer 			break;
173eefd8f96SConrad Meyer 		case 'C':
174eefd8f96SConrad Meyer 			comp_level = atoi(optarg);
175eefd8f96SConrad Meyer 			break;
1767f4caa8cSMaxim Sobolev 		case 'o':
1777f4caa8cSMaxim Sobolev 			oname = optarg;
1787f4caa8cSMaxim Sobolev 			break;
1797f4caa8cSMaxim Sobolev 
1807f4caa8cSMaxim Sobolev 		case 's':
1817f4caa8cSMaxim Sobolev 			tmp = atoi(optarg);
1827f4caa8cSMaxim Sobolev 			if (tmp <= 0) {
1837f4caa8cSMaxim Sobolev 				errx(1, "invalid cluster size specified: %s",
1847f4caa8cSMaxim Sobolev 				    optarg);
1857f4caa8cSMaxim Sobolev 				/* Not reached */
1867f4caa8cSMaxim Sobolev 			}
1874fc55e3eSMaxim Sobolev 			cfs.blksz = tmp;
1887f4caa8cSMaxim Sobolev 			break;
1897f4caa8cSMaxim Sobolev 
1907f4caa8cSMaxim Sobolev 		case 'v':
1914fc55e3eSMaxim Sobolev 			cfs.verbose = 1;
1927f4caa8cSMaxim Sobolev 			break;
1937f4caa8cSMaxim Sobolev 
1948f8cb840SMaxim Sobolev 		case 'Z':
1954fc55e3eSMaxim Sobolev 			cfs.no_zcomp = 1;
1968f8cb840SMaxim Sobolev 			break;
1978f8cb840SMaxim Sobolev 
1988f8cb840SMaxim Sobolev 		case 'd':
1994fc55e3eSMaxim Sobolev 			cfs.en_dedup = 1;
2008f8cb840SMaxim Sobolev 			break;
2018f8cb840SMaxim Sobolev 
2028f8cb840SMaxim Sobolev 		case 'L':
203eefd8f96SConrad Meyer 			comp_alg = UZ_LZMA;
2048f8cb840SMaxim Sobolev 			break;
2058f8cb840SMaxim Sobolev 
206d83e0778SMaxim Sobolev 		case 'S':
20762ee4b69SMaxim Sobolev 			summary.en = 1;
20862ee4b69SMaxim Sobolev 			summary.f = stdout;
209d83e0778SMaxim Sobolev 			break;
210d83e0778SMaxim Sobolev 
2114fc55e3eSMaxim Sobolev 		case 'j':
2124fc55e3eSMaxim Sobolev 			tmp = atoi(optarg);
2134fc55e3eSMaxim Sobolev 			if (tmp <= 0) {
2144fc55e3eSMaxim Sobolev 				errx(1, "invalid number of compression threads"
2154fc55e3eSMaxim Sobolev                                     " specified: %s", optarg);
2164fc55e3eSMaxim Sobolev 				/* Not reached */
2174fc55e3eSMaxim Sobolev 			}
2184fc55e3eSMaxim Sobolev 			cfs.nworkers = tmp;
2194fc55e3eSMaxim Sobolev 			break;
2204fc55e3eSMaxim Sobolev 
2217f4caa8cSMaxim Sobolev 		default:
2227f4caa8cSMaxim Sobolev 			usage();
2237f4caa8cSMaxim Sobolev 			/* Not reached */
2247f4caa8cSMaxim Sobolev 		}
2257f4caa8cSMaxim Sobolev 	}
2267f4caa8cSMaxim Sobolev 	argc -= optind;
2277f4caa8cSMaxim Sobolev 	argv += optind;
2287f4caa8cSMaxim Sobolev 
2297f4caa8cSMaxim Sobolev 	if (argc != 1) {
2307f4caa8cSMaxim Sobolev 		usage();
2317f4caa8cSMaxim Sobolev 		/* Not reached */
2327f4caa8cSMaxim Sobolev 	}
2337f4caa8cSMaxim Sobolev 
234eefd8f96SConrad Meyer 	cfs.handler = &uzip_fmts[comp_alg];
235eefd8f96SConrad Meyer 
2360ce59aa8SAlan Somers 	magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic));
2370ce59aa8SAlan Somers 	assert(magiclen < sizeof(hdr.magic));
2388f8cb840SMaxim Sobolev 
2394fc55e3eSMaxim Sobolev 	if (cfs.en_dedup != 0) {
240eefd8f96SConrad Meyer 		/*
241eefd8f96SConrad Meyer 		 * Dedupe requires a version 3 format.  Don't downgrade newer
242eefd8f96SConrad Meyer 		 * formats.
243eefd8f96SConrad Meyer 		 */
244eefd8f96SConrad Meyer 		if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2)
2458f8cb840SMaxim Sobolev 			hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
2468f8cb840SMaxim Sobolev 		hdr.magic[CLOOP_OFS_COMPR] =
2478f8cb840SMaxim Sobolev 		    tolower(hdr.magic[CLOOP_OFS_COMPR]);
2488f8cb840SMaxim Sobolev 	}
2498f8cb840SMaxim Sobolev 
250eefd8f96SConrad Meyer 	if (cfs.blksz % DEV_BSIZE != 0)
251eefd8f96SConrad Meyer 		errx(1, "cluster size should be multiple of %d", DEV_BSIZE);
252eefd8f96SConrad Meyer 
253eefd8f96SConrad Meyer 	cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz);
254eefd8f96SConrad Meyer 	if (cfs.cbound_blksz > MAXPHYS)
255eefd8f96SConrad Meyer 		errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu",
256eefd8f96SConrad Meyer 		    cfs.cbound_blksz, (size_t)MAXPHYS);
257eefd8f96SConrad Meyer 
258eefd8f96SConrad Meyer 	c_ctx = cfs.handler->f_init(&comp_level);
259eefd8f96SConrad Meyer 	cfs.comp_level = comp_level;
2608f8cb840SMaxim Sobolev 
261bc3b2c55SMaxim Sobolev 	cfs.iname = argv[0];
2627f4caa8cSMaxim Sobolev 	if (oname == NULL) {
263bc3b2c55SMaxim Sobolev 		asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx);
2647f4caa8cSMaxim Sobolev 		if (oname == NULL) {
2657f4caa8cSMaxim Sobolev 			err(1, "can't allocate memory");
2667f4caa8cSMaxim Sobolev 			/* Not reached */
2677f4caa8cSMaxim Sobolev 		}
2687f4caa8cSMaxim Sobolev 	}
2697f4caa8cSMaxim Sobolev 
2707f4caa8cSMaxim Sobolev 	signal(SIGHUP, exit);
2717f4caa8cSMaxim Sobolev 	signal(SIGINT, exit);
2727f4caa8cSMaxim Sobolev 	signal(SIGTERM, exit);
2737f4caa8cSMaxim Sobolev 	signal(SIGXCPU, exit);
2747f4caa8cSMaxim Sobolev 	signal(SIGXFSZ, exit);
2757f4caa8cSMaxim Sobolev 	atexit(cleanup);
2767f4caa8cSMaxim Sobolev 
277bc3b2c55SMaxim Sobolev 	cfs.fdr = open(cfs.iname, O_RDONLY);
2784fc55e3eSMaxim Sobolev 	if (cfs.fdr < 0) {
279bc3b2c55SMaxim Sobolev 		err(1, "open(%s)", cfs.iname);
2807f4caa8cSMaxim Sobolev 		/* Not reached */
2817f4caa8cSMaxim Sobolev 	}
282bc3b2c55SMaxim Sobolev 	cfs.isize = mkuz_get_insize(&cfs);
283bc3b2c55SMaxim Sobolev 	if (cfs.isize < 0) {
284bc3b2c55SMaxim Sobolev 		errx(1, "can't determine input image size");
28527d0a1a4SMax Khon 		/* Not reached */
28627d0a1a4SMax Khon 	}
287bc3b2c55SMaxim Sobolev 	hdr.nblocks = cfs.isize / cfs.blksz;
288bc3b2c55SMaxim Sobolev 	if ((cfs.isize % cfs.blksz) != 0) {
2894fc55e3eSMaxim Sobolev 		if (cfs.verbose != 0)
2900b99ac63SMaxim Sobolev 			fprintf(stderr, "file size is not multiple "
2914fc55e3eSMaxim Sobolev 			"of %d, padding data\n", cfs.blksz);
2920b99ac63SMaxim Sobolev 		hdr.nblocks++;
2930b99ac63SMaxim Sobolev 	}
2948f8cb840SMaxim Sobolev 	toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
2957f4caa8cSMaxim Sobolev 
296eefd8f96SConrad Meyer 	/*
297eefd8f96SConrad Meyer 	 * Initialize last+1 entry with non-heap trash.  If final padding is
298eefd8f96SConrad Meyer 	 * added later, it may or may not be overwritten with an offset
299eefd8f96SConrad Meyer 	 * representing the length of the final compressed block.  If not,
300eefd8f96SConrad Meyer 	 * initialize to a defined value.
301eefd8f96SConrad Meyer 	 */
302eefd8f96SConrad Meyer 	toc[hdr.nblocks] = 0;
303eefd8f96SConrad Meyer 
3044fc55e3eSMaxim Sobolev 	cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT,
3055cf3bf70SMax Khon 		   S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
3064fc55e3eSMaxim Sobolev 	if (cfs.fdw < 0) {
307d72d8f53SPawel Jakub Dawidek 		err(1, "open(%s)", oname);
3087f4caa8cSMaxim Sobolev 		/* Not reached */
3097f4caa8cSMaxim Sobolev 	}
3107f4caa8cSMaxim Sobolev 	cleanfile = oname;
3117f4caa8cSMaxim Sobolev 
3127f4caa8cSMaxim Sobolev 	/* Prepare header that we will write later when we have index ready. */
3137f4caa8cSMaxim Sobolev 	iov[0].iov_base = (char *)&hdr;
3147f4caa8cSMaxim Sobolev 	iov[0].iov_len = sizeof(hdr);
3157f4caa8cSMaxim Sobolev 	iov[1].iov_base = (char *)toc;
3167f4caa8cSMaxim Sobolev 	iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
3177f4caa8cSMaxim Sobolev 	offset = iov[0].iov_len + iov[1].iov_len;
3187f4caa8cSMaxim Sobolev 
3197f4caa8cSMaxim Sobolev 	/* Reserve space for header */
3204fc55e3eSMaxim Sobolev 	lseek(cfs.fdw, offset, SEEK_SET);
3217f4caa8cSMaxim Sobolev 
3224fc55e3eSMaxim Sobolev 	if (cfs.verbose != 0) {
323ed9302fdSMaxim Sobolev 		fprintf(stderr, "data size %ju bytes, number of clusters "
324bc3b2c55SMaxim Sobolev 		    "%u, index length %zu bytes\n", cfs.isize,
3250b99ac63SMaxim Sobolev 		    hdr.nblocks, iov[1].iov_len);
3264fc55e3eSMaxim Sobolev 	}
3274fc55e3eSMaxim Sobolev 
3284fc55e3eSMaxim Sobolev 	cvp = mkuz_conveyor_ctor(&cfs);
3297f4caa8cSMaxim Sobolev 
3308f8cb840SMaxim Sobolev 	last_offset = 0;
3314fc55e3eSMaxim Sobolev         iblk = oblk = NULL;
3324fc55e3eSMaxim Sobolev 	for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) {
3334fc55e3eSMaxim Sobolev 		iblk = readblock(cfs.fdr, cfs.blksz);
3344fc55e3eSMaxim Sobolev 		mkuz_fqueue_enq(cvp->wrk_queue, iblk);
3354fc55e3eSMaxim Sobolev 		if (iblk != MKUZ_BLK_EOF &&
3364fc55e3eSMaxim Sobolev 		    (i < (cfs.nworkers * ITEMS_PER_WORKER))) {
3374fc55e3eSMaxim Sobolev 			continue;
3387f4caa8cSMaxim Sobolev 		}
3394fc55e3eSMaxim Sobolev drain:
3404fc55e3eSMaxim Sobolev 		oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io);
3414fc55e3eSMaxim Sobolev 		assert(oblk->info.blkno == (unsigned)io);
3424fc55e3eSMaxim Sobolev 		oblk->info.offset = offset;
3434fc55e3eSMaxim Sobolev 		chit = NULL;
3444fc55e3eSMaxim Sobolev 		if (cfs.en_dedup != 0 && oblk->info.len > 0) {
3454fc55e3eSMaxim Sobolev 			chit = mkuz_blkcache_regblock(cfs.fdw, oblk);
3468f8cb840SMaxim Sobolev 			/*
3478f8cb840SMaxim Sobolev 			 * There should be at least one non-empty block
3488f8cb840SMaxim Sobolev 			 * between us and the backref'ed offset, otherwise
3498f8cb840SMaxim Sobolev 			 * we won't be able to parse that sequence correctly
3508f8cb840SMaxim Sobolev 			 * as it would be indistinguishible from another
3518f8cb840SMaxim Sobolev 			 * empty block.
3528f8cb840SMaxim Sobolev 			 */
3538f8cb840SMaxim Sobolev 			if (chit != NULL && chit->offset == last_offset) {
3548f8cb840SMaxim Sobolev 				chit = NULL;
3558f8cb840SMaxim Sobolev 			}
3568f8cb840SMaxim Sobolev 		}
3578f8cb840SMaxim Sobolev 		if (chit != NULL) {
3584fc55e3eSMaxim Sobolev 			toc[io] = htobe64(chit->offset);
3594fc55e3eSMaxim Sobolev 			oblk->info.len = 0;
3608f8cb840SMaxim Sobolev 		} else {
3614fc55e3eSMaxim Sobolev 			if (oblk->info.len > 0 && write(cfs.fdw, oblk->data,
3624fc55e3eSMaxim Sobolev 			    oblk->info.len) < 0) {
363d72d8f53SPawel Jakub Dawidek 				err(1, "write(%s)", oname);
3647f4caa8cSMaxim Sobolev 				/* Not reached */
3657f4caa8cSMaxim Sobolev 			}
3664fc55e3eSMaxim Sobolev 			toc[io] = htobe64(offset);
3678f8cb840SMaxim Sobolev 			last_offset = offset;
3684fc55e3eSMaxim Sobolev 			offset += oblk->info.len;
3697f4caa8cSMaxim Sobolev 		}
3704fc55e3eSMaxim Sobolev 		if (cfs.verbose != 0) {
3718f8cb840SMaxim Sobolev 			fprintf(stderr, "cluster #%d, in %u bytes, "
3724fc55e3eSMaxim Sobolev 			    "out len=%lu offset=%lu", io, cfs.blksz,
3734fc55e3eSMaxim Sobolev 			    (u_long)oblk->info.len, (u_long)be64toh(toc[io]));
3748f8cb840SMaxim Sobolev 			if (chit != NULL) {
3758f8cb840SMaxim Sobolev 				fprintf(stderr, " (backref'ed to #%d)",
3768f8cb840SMaxim Sobolev 				    chit->blkno);
3778f8cb840SMaxim Sobolev 			}
3788f8cb840SMaxim Sobolev 			fprintf(stderr, "\n");
3794fc55e3eSMaxim Sobolev 		}
3804fc55e3eSMaxim Sobolev 		free(oblk);
3814fc55e3eSMaxim Sobolev 		io += 1;
3824fc55e3eSMaxim Sobolev 		if (iblk == MKUZ_BLK_EOF) {
3834fc55e3eSMaxim Sobolev 			if (io < i)
3844fc55e3eSMaxim Sobolev 				goto drain;
3854fc55e3eSMaxim Sobolev 			/* Last block, see if we need to add some padding */
3864fc55e3eSMaxim Sobolev 			if ((offset % DEV_BSIZE) == 0)
3874fc55e3eSMaxim Sobolev 				continue;
3884fc55e3eSMaxim Sobolev 			oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE));
3894fc55e3eSMaxim Sobolev 			oblk->info.blkno = io;
3904fc55e3eSMaxim Sobolev 			oblk->info.len = oblk->alen;
3914fc55e3eSMaxim Sobolev 			if (cfs.verbose != 0) {
3924fc55e3eSMaxim Sobolev 				fprintf(stderr, "padding data with %lu bytes "
3934fc55e3eSMaxim Sobolev 				    "so that file size is multiple of %d\n",
3944fc55e3eSMaxim Sobolev 				    (u_long)oblk->alen, DEV_BSIZE);
3954fc55e3eSMaxim Sobolev 			}
3964fc55e3eSMaxim Sobolev 			mkuz_fqueue_enq(cvp->results, oblk);
3974fc55e3eSMaxim Sobolev 			goto drain;
3988f8cb840SMaxim Sobolev 		}
3998f8cb840SMaxim Sobolev 	}
4007f4caa8cSMaxim Sobolev 
4014fc55e3eSMaxim Sobolev 	close(cfs.fdr);
4024fc55e3eSMaxim Sobolev 
4034fc55e3eSMaxim Sobolev 	if (cfs.verbose != 0 || summary.en != 0) {
4044fc55e3eSMaxim Sobolev 		et = getdtime();
40562ee4b69SMaxim Sobolev 		fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
4064fc55e3eSMaxim Sobolev 		    "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset,
407bc3b2c55SMaxim Sobolev 		    (long long)(cfs.isize - offset),
408bc3b2c55SMaxim Sobolev 		    100.0 * (long long)(cfs.isize - offset) /
409bc3b2c55SMaxim Sobolev 		    (float)cfs.isize, (float)cfs.isize / (et - st));
4104fc55e3eSMaxim Sobolev 	}
4117f4caa8cSMaxim Sobolev 
4127f4caa8cSMaxim Sobolev 	/* Convert to big endian */
4134fc55e3eSMaxim Sobolev 	hdr.blksz = htonl(cfs.blksz);
4147f4caa8cSMaxim Sobolev 	hdr.nblocks = htonl(hdr.nblocks);
4157f4caa8cSMaxim Sobolev 	/* Write headers into pre-allocated space */
4164fc55e3eSMaxim Sobolev 	lseek(cfs.fdw, 0, SEEK_SET);
4174fc55e3eSMaxim Sobolev 	if (writev(cfs.fdw, iov, 2) < 0) {
418d72d8f53SPawel Jakub Dawidek 		err(1, "writev(%s)", oname);
4197f4caa8cSMaxim Sobolev 		/* Not reached */
4207f4caa8cSMaxim Sobolev 	}
4217f4caa8cSMaxim Sobolev 	cleanfile = NULL;
4224fc55e3eSMaxim Sobolev 	close(cfs.fdw);
4237f4caa8cSMaxim Sobolev 
4247f4caa8cSMaxim Sobolev 	exit(0);
4257f4caa8cSMaxim Sobolev }
4267f4caa8cSMaxim Sobolev 
4274fc55e3eSMaxim Sobolev static struct mkuz_blk *
4284fc55e3eSMaxim Sobolev readblock(int fd, u_int32_t clstsize)
4290b99ac63SMaxim Sobolev {
4307f4caa8cSMaxim Sobolev 	int numread;
4314fc55e3eSMaxim Sobolev 	struct mkuz_blk *rval;
4324fc55e3eSMaxim Sobolev 	static int blockcnt;
4334fc55e3eSMaxim Sobolev 	off_t cpos;
4347f4caa8cSMaxim Sobolev 
4354fc55e3eSMaxim Sobolev 	rval = mkuz_blk_ctor(clstsize);
4364fc55e3eSMaxim Sobolev 
4374fc55e3eSMaxim Sobolev 	rval->info.blkno = blockcnt;
4384fc55e3eSMaxim Sobolev 	blockcnt += 1;
4394fc55e3eSMaxim Sobolev 	cpos = lseek(fd, 0, SEEK_CUR);
4404fc55e3eSMaxim Sobolev 	if (cpos < 0) {
4414fc55e3eSMaxim Sobolev 		err(1, "readblock: lseek() failed");
4424fc55e3eSMaxim Sobolev 		/* Not reached */
4434fc55e3eSMaxim Sobolev 	}
4444fc55e3eSMaxim Sobolev 	rval->info.offset = cpos;
4454fc55e3eSMaxim Sobolev 
4464fc55e3eSMaxim Sobolev 	numread = read(fd, rval->data, clstsize);
4477f4caa8cSMaxim Sobolev 	if (numread < 0) {
4484fc55e3eSMaxim Sobolev 		err(1, "readblock: read() failed");
4497f4caa8cSMaxim Sobolev 		/* Not reached */
4507f4caa8cSMaxim Sobolev 	}
4517f4caa8cSMaxim Sobolev 	if (numread == 0) {
4524fc55e3eSMaxim Sobolev 		free(rval);
4534fc55e3eSMaxim Sobolev 		return MKUZ_BLK_EOF;
4547f4caa8cSMaxim Sobolev 	}
4554fc55e3eSMaxim Sobolev 	rval->info.len = numread;
4564fc55e3eSMaxim Sobolev 	return rval;
4577f4caa8cSMaxim Sobolev }
4587f4caa8cSMaxim Sobolev 
4597f4caa8cSMaxim Sobolev static void
4600b99ac63SMaxim Sobolev usage(void)
4610b99ac63SMaxim Sobolev {
4627f4caa8cSMaxim Sobolev 
463d83e0778SMaxim Sobolev 	fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
4644fc55e3eSMaxim Sobolev 	    "[-j ncompr] infile\n");
4657f4caa8cSMaxim Sobolev 	exit(1);
4667f4caa8cSMaxim Sobolev }
4677f4caa8cSMaxim Sobolev 
4688f8cb840SMaxim Sobolev void *
4698f8cb840SMaxim Sobolev mkuz_safe_malloc(size_t size)
4700b99ac63SMaxim Sobolev {
4717f4caa8cSMaxim Sobolev 	void *retval;
4727f4caa8cSMaxim Sobolev 
4737f4caa8cSMaxim Sobolev 	retval = malloc(size);
4747f4caa8cSMaxim Sobolev 	if (retval == NULL) {
4757f4caa8cSMaxim Sobolev 		err(1, "can't allocate memory");
4767f4caa8cSMaxim Sobolev 		/* Not reached */
4777f4caa8cSMaxim Sobolev 	}
4787f4caa8cSMaxim Sobolev 	return retval;
4797f4caa8cSMaxim Sobolev }
4807f4caa8cSMaxim Sobolev 
4814fc55e3eSMaxim Sobolev void *
4824fc55e3eSMaxim Sobolev mkuz_safe_zmalloc(size_t size)
4834fc55e3eSMaxim Sobolev {
4844fc55e3eSMaxim Sobolev 	void *retval;
4854fc55e3eSMaxim Sobolev 
4864fc55e3eSMaxim Sobolev 	retval = mkuz_safe_malloc(size);
4874fc55e3eSMaxim Sobolev 	bzero(retval, size);
4884fc55e3eSMaxim Sobolev 	return retval;
4894fc55e3eSMaxim Sobolev }
4904fc55e3eSMaxim Sobolev 
4917f4caa8cSMaxim Sobolev static void
4920b99ac63SMaxim Sobolev cleanup(void)
4930b99ac63SMaxim Sobolev {
4947f4caa8cSMaxim Sobolev 
4957f4caa8cSMaxim Sobolev 	if (cleanfile != NULL)
4967f4caa8cSMaxim Sobolev 		unlink(cleanfile);
4977f4caa8cSMaxim Sobolev }
4988f8cb840SMaxim Sobolev 
4994fc55e3eSMaxim Sobolev int
5004fc55e3eSMaxim Sobolev mkuz_memvcmp(const void *memory, unsigned char val, size_t size)
5018f8cb840SMaxim Sobolev {
5028f8cb840SMaxim Sobolev     const u_char *mm;
5038f8cb840SMaxim Sobolev 
5048f8cb840SMaxim Sobolev     mm = (const u_char *)memory;
5058f8cb840SMaxim Sobolev     return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
5068f8cb840SMaxim Sobolev }
507