1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/endian.h> 31 #include <sys/param.h> 32 #include <sys/sysctl.h> 33 #include <sys/stat.h> 34 #include <sys/uio.h> 35 #include <netinet/in.h> 36 #include <assert.h> 37 #include <ctype.h> 38 #include <err.h> 39 #include <fcntl.h> 40 #include <pthread.h> 41 #include <signal.h> 42 #include <stdint.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include "mkuzip.h" 49 #include "mkuz_cloop.h" 50 #include "mkuz_blockcache.h" 51 #include "mkuz_lzma.h" 52 #include "mkuz_zlib.h" 53 #include "mkuz_zstd.h" 54 #include "mkuz_blk.h" 55 #include "mkuz_cfg.h" 56 #include "mkuz_conveyor.h" 57 #include "mkuz_format.h" 58 #include "mkuz_fqueue.h" 59 #include "mkuz_time.h" 60 #include "mkuz_insize.h" 61 62 #define DEFAULT_CLSTSIZE 16384 63 64 enum UZ_ALGORITHM { 65 UZ_ZLIB = 0, 66 UZ_LZMA, 67 UZ_ZSTD, 68 UZ_INVALID 69 }; 70 71 static const struct mkuz_format uzip_fmts[] = { 72 [UZ_ZLIB] = { 73 .option = "zlib", 74 .magic = CLOOP_MAGIC_ZLIB, 75 .default_sufx = DEFAULT_SUFX_ZLIB, 76 .f_compress_bound = mkuz_zlib_cbound, 77 .f_init = mkuz_zlib_init, 78 .f_compress = mkuz_zlib_compress, 79 }, 80 [UZ_LZMA] = { 81 .option = "lzma", 82 .magic = CLOOP_MAGIC_LZMA, 83 .default_sufx = DEFAULT_SUFX_LZMA, 84 .f_compress_bound = mkuz_lzma_cbound, 85 .f_init = mkuz_lzma_init, 86 .f_compress = mkuz_lzma_compress, 87 }, 88 [UZ_ZSTD] = { 89 .option = "zstd", 90 .magic = CLOOP_MAGIC_ZSTD, 91 .default_sufx = DEFAULT_SUFX_ZSTD, 92 .f_compress_bound = mkuz_zstd_cbound, 93 .f_init = mkuz_zstd_init, 94 .f_compress = mkuz_zstd_compress, 95 }, 96 }; 97 98 static struct mkuz_blk *readblock(int, u_int32_t); 99 static void usage(void) __dead2; 100 static void cleanup(void); 101 102 static char *cleanfile = NULL; 103 104 static int 105 cmp_blkno(const struct mkuz_blk *bp, void *p) 106 { 107 uint32_t *ap; 108 109 ap = (uint32_t *)p; 110 111 return (bp->info.blkno == *ap); 112 } 113 114 int main(int argc, char **argv) 115 { 116 struct mkuz_cfg cfs; 117 char *oname; 118 uint64_t *toc; 119 int i, io, opt, tmp; 120 struct { 121 int en; 122 FILE *f; 123 } summary; 124 struct iovec iov[2]; 125 uint64_t offset, last_offset; 126 struct cloop_header hdr; 127 struct mkuz_conveyor *cvp; 128 struct mkuz_blk_info *chit; 129 size_t ncpusz, ncpu, magiclen; 130 double st, et; 131 enum UZ_ALGORITHM comp_alg; 132 int comp_level; 133 134 st = getdtime(); 135 136 ncpusz = sizeof(size_t); 137 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 138 ncpu = 1; 139 } else if (ncpu > MAX_WORKERS_AUTO) { 140 ncpu = MAX_WORKERS_AUTO; 141 } 142 143 memset(&hdr, 0, sizeof(hdr)); 144 cfs.blksz = DEFAULT_CLSTSIZE; 145 oname = NULL; 146 cfs.verbose = 0; 147 cfs.no_zcomp = 0; 148 cfs.en_dedup = 0; 149 summary.en = 0; 150 summary.f = stderr; 151 comp_alg = UZ_ZLIB; 152 comp_level = USE_DEFAULT_LEVEL; 153 cfs.nworkers = ncpu; 154 struct mkuz_blk *iblk, *oblk; 155 156 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { 157 switch(opt) { 158 case 'A': 159 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { 160 if (strcmp(uzip_fmts[tmp].option, optarg) == 0) 161 break; 162 } 163 if (tmp == UZ_INVALID) 164 errx(1, "invalid algorithm specified: %s", 165 optarg); 166 /* Not reached */ 167 comp_alg = tmp; 168 break; 169 case 'C': 170 comp_level = atoi(optarg); 171 break; 172 case 'o': 173 oname = optarg; 174 break; 175 176 case 's': 177 tmp = atoi(optarg); 178 if (tmp <= 0) { 179 errx(1, "invalid cluster size specified: %s", 180 optarg); 181 /* Not reached */ 182 } 183 cfs.blksz = tmp; 184 break; 185 186 case 'v': 187 cfs.verbose = 1; 188 break; 189 190 case 'Z': 191 cfs.no_zcomp = 1; 192 break; 193 194 case 'd': 195 cfs.en_dedup = 1; 196 break; 197 198 case 'L': 199 comp_alg = UZ_LZMA; 200 break; 201 202 case 'S': 203 summary.en = 1; 204 summary.f = stdout; 205 break; 206 207 case 'j': 208 tmp = atoi(optarg); 209 if (tmp <= 0) { 210 errx(1, "invalid number of compression threads" 211 " specified: %s", optarg); 212 /* Not reached */ 213 } 214 cfs.nworkers = tmp; 215 break; 216 217 default: 218 usage(); 219 /* Not reached */ 220 } 221 } 222 argc -= optind; 223 argv += optind; 224 225 if (argc != 1) { 226 usage(); 227 /* Not reached */ 228 } 229 230 cfs.handler = &uzip_fmts[comp_alg]; 231 232 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 233 assert(magiclen < sizeof(hdr.magic)); 234 235 if (cfs.en_dedup != 0) { 236 /* 237 * Dedupe requires a version 3 format. Don't downgrade newer 238 * formats. 239 */ 240 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) 241 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 242 hdr.magic[CLOOP_OFS_COMPR] = 243 tolower(hdr.magic[CLOOP_OFS_COMPR]); 244 } 245 246 if (cfs.blksz % DEV_BSIZE != 0) 247 errx(1, "cluster size should be multiple of %d", DEV_BSIZE); 248 249 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); 250 if (cfs.cbound_blksz > MAXPHYS) 251 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", 252 cfs.cbound_blksz, (size_t)MAXPHYS); 253 254 cfs.handler->f_init(&comp_level); 255 cfs.comp_level = comp_level; 256 257 cfs.iname = argv[0]; 258 if (oname == NULL) { 259 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); 260 if (oname == NULL) { 261 err(1, "can't allocate memory"); 262 /* Not reached */ 263 } 264 } 265 266 signal(SIGHUP, exit); 267 signal(SIGINT, exit); 268 signal(SIGTERM, exit); 269 signal(SIGXCPU, exit); 270 signal(SIGXFSZ, exit); 271 atexit(cleanup); 272 273 cfs.fdr = open(cfs.iname, O_RDONLY); 274 if (cfs.fdr < 0) { 275 err(1, "open(%s)", cfs.iname); 276 /* Not reached */ 277 } 278 cfs.isize = mkuz_get_insize(&cfs); 279 if (cfs.isize < 0) { 280 errx(1, "can't determine input image size"); 281 /* Not reached */ 282 } 283 hdr.nblocks = cfs.isize / cfs.blksz; 284 if ((cfs.isize % cfs.blksz) != 0) { 285 if (cfs.verbose != 0) 286 fprintf(stderr, "file size is not multiple " 287 "of %d, padding data\n", cfs.blksz); 288 hdr.nblocks++; 289 } 290 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 291 292 /* 293 * Initialize last+1 entry with non-heap trash. If final padding is 294 * added later, it may or may not be overwritten with an offset 295 * representing the length of the final compressed block. If not, 296 * initialize to a defined value. 297 */ 298 toc[hdr.nblocks] = 0; 299 300 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 301 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); 302 if (cfs.fdw < 0) { 303 err(1, "open(%s)", oname); 304 /* Not reached */ 305 } 306 cleanfile = oname; 307 308 /* Prepare header that we will write later when we have index ready. */ 309 iov[0].iov_base = (char *)&hdr; 310 iov[0].iov_len = sizeof(hdr); 311 iov[1].iov_base = (char *)toc; 312 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 313 offset = iov[0].iov_len + iov[1].iov_len; 314 315 /* Reserve space for header */ 316 lseek(cfs.fdw, offset, SEEK_SET); 317 318 if (cfs.verbose != 0) { 319 fprintf(stderr, "data size %ju bytes, number of clusters " 320 "%u, index length %zu bytes\n", cfs.isize, 321 hdr.nblocks, iov[1].iov_len); 322 } 323 324 cvp = mkuz_conveyor_ctor(&cfs); 325 326 last_offset = 0; 327 iblk = oblk = NULL; 328 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 329 iblk = readblock(cfs.fdr, cfs.blksz); 330 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 331 if (iblk != MKUZ_BLK_EOF && 332 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 333 continue; 334 } 335 drain: 336 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 337 assert(oblk->info.blkno == (unsigned)io); 338 oblk->info.offset = offset; 339 chit = NULL; 340 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 341 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 342 /* 343 * There should be at least one non-empty block 344 * between us and the backref'ed offset, otherwise 345 * we won't be able to parse that sequence correctly 346 * as it would be indistinguishible from another 347 * empty block. 348 */ 349 if (chit != NULL && chit->offset == last_offset) { 350 chit = NULL; 351 } 352 } 353 if (chit != NULL) { 354 toc[io] = htobe64(chit->offset); 355 oblk->info.len = 0; 356 } else { 357 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 358 oblk->info.len) < 0) { 359 err(1, "write(%s)", oname); 360 /* Not reached */ 361 } 362 toc[io] = htobe64(offset); 363 last_offset = offset; 364 offset += oblk->info.len; 365 } 366 if (cfs.verbose != 0) { 367 fprintf(stderr, "cluster #%d, in %u bytes, " 368 "out len=%lu offset=%lu", io, cfs.blksz, 369 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 370 if (chit != NULL) { 371 fprintf(stderr, " (backref'ed to #%d)", 372 chit->blkno); 373 } 374 fprintf(stderr, "\n"); 375 } 376 free(oblk); 377 io += 1; 378 if (iblk == MKUZ_BLK_EOF) { 379 if (io < i) 380 goto drain; 381 /* Last block, see if we need to add some padding */ 382 if ((offset % DEV_BSIZE) == 0) 383 continue; 384 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 385 oblk->info.blkno = io; 386 oblk->info.len = oblk->alen; 387 if (cfs.verbose != 0) { 388 fprintf(stderr, "padding data with %lu bytes " 389 "so that file size is multiple of %d\n", 390 (u_long)oblk->alen, DEV_BSIZE); 391 } 392 mkuz_fqueue_enq(cvp->results, oblk); 393 goto drain; 394 } 395 } 396 397 close(cfs.fdr); 398 399 if (cfs.verbose != 0 || summary.en != 0) { 400 et = getdtime(); 401 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 402 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 403 (long long)(cfs.isize - offset), 404 100.0 * (long long)(cfs.isize - offset) / 405 (float)cfs.isize, (float)cfs.isize / (et - st)); 406 } 407 408 /* Convert to big endian */ 409 hdr.blksz = htonl(cfs.blksz); 410 hdr.nblocks = htonl(hdr.nblocks); 411 /* Write headers into pre-allocated space */ 412 lseek(cfs.fdw, 0, SEEK_SET); 413 if (writev(cfs.fdw, iov, 2) < 0) { 414 err(1, "writev(%s)", oname); 415 /* Not reached */ 416 } 417 cleanfile = NULL; 418 close(cfs.fdw); 419 420 exit(0); 421 } 422 423 static struct mkuz_blk * 424 readblock(int fd, u_int32_t clstsize) 425 { 426 int numread; 427 struct mkuz_blk *rval; 428 static int blockcnt; 429 off_t cpos; 430 431 rval = mkuz_blk_ctor(clstsize); 432 433 rval->info.blkno = blockcnt; 434 blockcnt += 1; 435 cpos = lseek(fd, 0, SEEK_CUR); 436 if (cpos < 0) { 437 err(1, "readblock: lseek() failed"); 438 /* Not reached */ 439 } 440 rval->info.offset = cpos; 441 442 numread = read(fd, rval->data, clstsize); 443 if (numread < 0) { 444 err(1, "readblock: read() failed"); 445 /* Not reached */ 446 } 447 if (numread == 0) { 448 free(rval); 449 return MKUZ_BLK_EOF; 450 } 451 rval->info.len = numread; 452 return rval; 453 } 454 455 static void 456 usage(void) 457 { 458 459 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 460 "[-j ncompr] infile\n"); 461 exit(1); 462 } 463 464 void * 465 mkuz_safe_malloc(size_t size) 466 { 467 void *retval; 468 469 retval = malloc(size); 470 if (retval == NULL) { 471 err(1, "can't allocate memory"); 472 /* Not reached */ 473 } 474 return retval; 475 } 476 477 void * 478 mkuz_safe_zmalloc(size_t size) 479 { 480 void *retval; 481 482 retval = mkuz_safe_malloc(size); 483 bzero(retval, size); 484 return retval; 485 } 486 487 static void 488 cleanup(void) 489 { 490 491 if (cleanfile != NULL) 492 unlink(cleanfile); 493 } 494 495 int 496 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 497 { 498 const u_char *mm; 499 500 mm = (const u_char *)memory; 501 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 502 } 503