xref: /freebsd/usr.bin/mkuzip/mkuzip.c (revision acc1a9ef8333c798c210fa94be6af4d5fe2dd794)
1 /*
2  * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/types.h>
32 #include <sys/disk.h>
33 #include <sys/endian.h>
34 #include <sys/param.h>
35 #include <sys/stat.h>
36 #include <sys/uio.h>
37 #include <netinet/in.h>
38 #include <ctype.h>
39 #include <err.h>
40 #include <fcntl.h>
41 #include <signal.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 #include "mkuzip.h"
48 #include "mkuz_cloop.h"
49 #include "mkuz_blockcache.h"
50 #include "mkuz_zlib.h"
51 #include "mkuz_lzma.h"
52 
53 #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args)
54 
55 #define DEFAULT_CLSTSIZE	16384
56 
57 DEFINE_RAW_METHOD(f_init, void *, uint32_t);
58 DEFINE_RAW_METHOD(f_compress, void, const char *, uint32_t *);
59 
60 struct mkuz_format {
61 	const char *magic;
62 	const char *default_sufx;
63 	f_init_t f_init;
64 	f_compress_t f_compress;
65 };
66 
67 static struct mkuz_format uzip_fmt = {
68 	.magic = CLOOP_MAGIC_ZLIB,
69 	.default_sufx = DEFAULT_SUFX_ZLIB,
70 	.f_init = &mkuz_zlib_init,
71 	.f_compress = &mkuz_zlib_compress
72 };
73 
74 static struct mkuz_format ulzma_fmt = {
75         .magic = CLOOP_MAGIC_LZMA,
76         .default_sufx = DEFAULT_SUFX_LZMA,
77         .f_init = &mkuz_lzma_init,
78         .f_compress = &mkuz_lzma_compress
79 };
80 
81 static char *readblock(int, char *, u_int32_t);
82 static void usage(void);
83 static void cleanup(void);
84 static int  memvcmp(const void *, unsigned char, size_t);
85 
86 static char *cleanfile = NULL;
87 
88 int main(int argc, char **argv)
89 {
90 	char *iname, *oname, *obuf, *ibuf;
91 	uint64_t *toc;
92 	int fdr, fdw, i, opt, verbose, no_zcomp, tmp, en_dedup;
93 	struct {
94 		int en;
95 		FILE *f;
96 	} summary;
97 	struct iovec iov[2];
98 	struct stat sb;
99 	uint32_t destlen;
100 	uint64_t offset, last_offset;
101 	struct cloop_header hdr;
102 	struct mkuz_blkcache_hit *chit;
103 	const struct mkuz_format *handler;
104 
105 	memset(&hdr, 0, sizeof(hdr));
106 	hdr.blksz = DEFAULT_CLSTSIZE;
107 	oname = NULL;
108 	verbose = 0;
109 	no_zcomp = 0;
110 	en_dedup = 0;
111 	summary.en = 0;
112 	summary.f = stderr;
113 	handler = &uzip_fmt;
114 
115 	while((opt = getopt(argc, argv, "o:s:vZdLS")) != -1) {
116 		switch(opt) {
117 		case 'o':
118 			oname = optarg;
119 			break;
120 
121 		case 's':
122 			tmp = atoi(optarg);
123 			if (tmp <= 0) {
124 				errx(1, "invalid cluster size specified: %s",
125 				    optarg);
126 				/* Not reached */
127 			}
128 			hdr.blksz = tmp;
129 			break;
130 
131 		case 'v':
132 			verbose = 1;
133 			break;
134 
135 		case 'Z':
136 			no_zcomp = 1;
137 			break;
138 
139 		case 'd':
140 			en_dedup = 1;
141 			break;
142 
143 		case 'L':
144 			handler = &ulzma_fmt;
145 			break;
146 
147 		case 'S':
148 			summary.en = 1;
149 			summary.f = stdout;
150 			break;
151 
152 		default:
153 			usage();
154 			/* Not reached */
155 		}
156 	}
157 	argc -= optind;
158 	argv += optind;
159 
160 	if (argc != 1) {
161 		usage();
162 		/* Not reached */
163 	}
164 
165 	strcpy(hdr.magic, handler->magic);
166 
167 	if (en_dedup != 0) {
168 		hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3;
169 		hdr.magic[CLOOP_OFS_COMPR] =
170 		    tolower(hdr.magic[CLOOP_OFS_COMPR]);
171 	}
172 
173 	obuf = handler->f_init(hdr.blksz);
174 
175 	iname = argv[0];
176 	if (oname == NULL) {
177 		asprintf(&oname, "%s%s", iname, handler->default_sufx);
178 		if (oname == NULL) {
179 			err(1, "can't allocate memory");
180 			/* Not reached */
181 		}
182 	}
183 
184 	ibuf = mkuz_safe_malloc(hdr.blksz);
185 
186 	signal(SIGHUP, exit);
187 	signal(SIGINT, exit);
188 	signal(SIGTERM, exit);
189 	signal(SIGXCPU, exit);
190 	signal(SIGXFSZ, exit);
191 	atexit(cleanup);
192 
193 	fdr = open(iname, O_RDONLY);
194 	if (fdr < 0) {
195 		err(1, "open(%s)", iname);
196 		/* Not reached */
197 	}
198 	if (fstat(fdr, &sb) != 0) {
199 		err(1, "fstat(%s)", iname);
200 		/* Not reached */
201 	}
202 	if (S_ISCHR(sb.st_mode)) {
203 		off_t ms;
204 
205 		if (ioctl(fdr, DIOCGMEDIASIZE, &ms) < 0) {
206 			err(1, "ioctl(DIOCGMEDIASIZE)");
207 			/* Not reached */
208 		}
209 		sb.st_size = ms;
210 	} else if (!S_ISREG(sb.st_mode)) {
211 		fprintf(stderr, "%s: not a character device or regular file\n",
212 			iname);
213 		exit(1);
214 	}
215 	hdr.nblocks = sb.st_size / hdr.blksz;
216 	if ((sb.st_size % hdr.blksz) != 0) {
217 		if (verbose != 0)
218 			fprintf(stderr, "file size is not multiple "
219 			"of %d, padding data\n", hdr.blksz);
220 		hdr.nblocks++;
221 	}
222 	toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc));
223 
224 	fdw = open(oname, O_WRONLY | O_TRUNC | O_CREAT,
225 		   S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
226 	if (fdw < 0) {
227 		err(1, "open(%s)", oname);
228 		/* Not reached */
229 	}
230 	cleanfile = oname;
231 
232 	/* Prepare header that we will write later when we have index ready. */
233 	iov[0].iov_base = (char *)&hdr;
234 	iov[0].iov_len = sizeof(hdr);
235 	iov[1].iov_base = (char *)toc;
236 	iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc);
237 	offset = iov[0].iov_len + iov[1].iov_len;
238 
239 	/* Reserve space for header */
240 	lseek(fdw, offset, SEEK_SET);
241 
242 	if (verbose != 0)
243 		fprintf(stderr, "data size %ju bytes, number of clusters "
244 		    "%u, index length %zu bytes\n", sb.st_size,
245 		    hdr.nblocks, iov[1].iov_len);
246 
247 	last_offset = 0;
248 	for(i = 0; i == 0 || ibuf != NULL; i++) {
249 		ibuf = readblock(fdr, ibuf, hdr.blksz);
250 		if (ibuf != NULL) {
251 			if (no_zcomp == 0 && \
252 			    memvcmp(ibuf, '\0', hdr.blksz) != 0) {
253 				/* All zeroes block */
254 				destlen = 0;
255 			} else {
256 				handler->f_compress(ibuf, &destlen);
257 			}
258 		} else {
259 			destlen = DEV_BSIZE - (offset % DEV_BSIZE);
260 			memset(obuf, 0, destlen);
261 			if (verbose != 0)
262 				fprintf(stderr, "padding data with %lu bytes "
263 				    "so that file size is multiple of %d\n",
264 				    (u_long)destlen, DEV_BSIZE);
265 		}
266 		if (destlen > 0 && en_dedup != 0) {
267 			chit = mkuz_blkcache_regblock(fdw, i, offset, destlen,
268 			    obuf);
269 			/*
270 			 * There should be at least one non-empty block
271 			 * between us and the backref'ed offset, otherwise
272 			 * we won't be able to parse that sequence correctly
273 			 * as it would be indistinguishible from another
274 			 * empty block.
275 			 */
276 			if (chit != NULL && chit->offset == last_offset) {
277 				chit = NULL;
278 			}
279 		} else {
280 			chit = NULL;
281 		}
282 		if (chit != NULL) {
283 			toc[i] = htobe64(chit->offset);
284 		} else {
285 			if (destlen > 0 && write(fdw, obuf, destlen) < 0) {
286 				err(1, "write(%s)", oname);
287 				/* Not reached */
288 			}
289 			toc[i] = htobe64(offset);
290 			last_offset = offset;
291 			offset += destlen;
292 		}
293 		if (ibuf != NULL && verbose != 0) {
294 			fprintf(stderr, "cluster #%d, in %u bytes, "
295 			    "out len=%lu offset=%lu", i, hdr.blksz,
296 			    chit == NULL ? (u_long)destlen : 0,
297 			    (u_long)be64toh(toc[i]));
298 			if (chit != NULL) {
299 				fprintf(stderr, " (backref'ed to #%d)",
300 				    chit->blkno);
301 			}
302 			fprintf(stderr, "\n");
303 
304 		}
305 	}
306 	close(fdr);
307 
308 	if (verbose != 0 || summary.en != 0)
309 		fprintf(summary.f, "compressed data to %ju bytes, saved %lld "
310 		    "bytes, %.2f%% decrease.\n", offset,
311 		    (long long)(sb.st_size - offset),
312 		    100.0 * (long long)(sb.st_size - offset) /
313 		    (float)sb.st_size);
314 
315 	/* Convert to big endian */
316 	hdr.blksz = htonl(hdr.blksz);
317 	hdr.nblocks = htonl(hdr.nblocks);
318 	/* Write headers into pre-allocated space */
319 	lseek(fdw, 0, SEEK_SET);
320 	if (writev(fdw, iov, 2) < 0) {
321 		err(1, "writev(%s)", oname);
322 		/* Not reached */
323 	}
324 	cleanfile = NULL;
325 	close(fdw);
326 
327 	exit(0);
328 }
329 
330 static char *
331 readblock(int fd, char *ibuf, u_int32_t clstsize)
332 {
333 	int numread;
334 
335 	bzero(ibuf, clstsize);
336 	numread = read(fd, ibuf, clstsize);
337 	if (numread < 0) {
338 		err(1, "read() failed");
339 		/* Not reached */
340 	}
341 	if (numread == 0) {
342 		return NULL;
343 	}
344 	return ibuf;
345 }
346 
347 static void
348 usage(void)
349 {
350 
351 	fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] "
352 	    "infile\n");
353 	exit(1);
354 }
355 
356 void *
357 mkuz_safe_malloc(size_t size)
358 {
359 	void *retval;
360 
361 	retval = malloc(size);
362 	if (retval == NULL) {
363 		err(1, "can't allocate memory");
364 		/* Not reached */
365 	}
366 	return retval;
367 }
368 
369 static void
370 cleanup(void)
371 {
372 
373 	if (cleanfile != NULL)
374 		unlink(cleanfile);
375 }
376 
377 static int
378 memvcmp(const void *memory, unsigned char val, size_t size)
379 {
380     const u_char *mm;
381 
382     mm = (const u_char *)memory;
383     return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0;
384 }
385