1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 #include <sys/endian.h> 34 #include <sys/param.h> 35 #include <sys/sysctl.h> 36 #include <sys/stat.h> 37 #include <sys/uio.h> 38 #include <netinet/in.h> 39 #include <assert.h> 40 #include <ctype.h> 41 #include <err.h> 42 #include <fcntl.h> 43 #include <pthread.h> 44 #include <signal.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 51 #include "mkuzip.h" 52 #include "mkuz_cloop.h" 53 #include "mkuz_blockcache.h" 54 #include "mkuz_lzma.h" 55 #include "mkuz_zlib.h" 56 #include "mkuz_zstd.h" 57 #include "mkuz_blk.h" 58 #include "mkuz_cfg.h" 59 #include "mkuz_conveyor.h" 60 #include "mkuz_format.h" 61 #include "mkuz_fqueue.h" 62 #include "mkuz_time.h" 63 #include "mkuz_insize.h" 64 65 #define DEFAULT_CLSTSIZE 16384 66 67 enum UZ_ALGORITHM { 68 UZ_ZLIB = 0, 69 UZ_LZMA, 70 UZ_ZSTD, 71 UZ_INVALID 72 }; 73 74 static const struct mkuz_format uzip_fmts[] = { 75 [UZ_ZLIB] = { 76 .option = "zlib", 77 .magic = CLOOP_MAGIC_ZLIB, 78 .default_sufx = DEFAULT_SUFX_ZLIB, 79 .f_compress_bound = mkuz_zlib_cbound, 80 .f_init = mkuz_zlib_init, 81 .f_compress = mkuz_zlib_compress, 82 }, 83 [UZ_LZMA] = { 84 .option = "lzma", 85 .magic = CLOOP_MAGIC_LZMA, 86 .default_sufx = DEFAULT_SUFX_LZMA, 87 .f_compress_bound = mkuz_lzma_cbound, 88 .f_init = mkuz_lzma_init, 89 .f_compress = mkuz_lzma_compress, 90 }, 91 [UZ_ZSTD] = { 92 .option = "zstd", 93 .magic = CLOOP_MAGIC_ZSTD, 94 .default_sufx = DEFAULT_SUFX_ZSTD, 95 .f_compress_bound = mkuz_zstd_cbound, 96 .f_init = mkuz_zstd_init, 97 .f_compress = mkuz_zstd_compress, 98 }, 99 }; 100 101 static struct mkuz_blk *readblock(int, u_int32_t); 102 static void usage(void); 103 static void cleanup(void); 104 105 static char *cleanfile = NULL; 106 107 static int 108 cmp_blkno(const struct mkuz_blk *bp, void *p) 109 { 110 uint32_t *ap; 111 112 ap = (uint32_t *)p; 113 114 return (bp->info.blkno == *ap); 115 } 116 117 int main(int argc, char **argv) 118 { 119 struct mkuz_cfg cfs; 120 char *oname; 121 uint64_t *toc; 122 int i, io, opt, tmp; 123 struct { 124 int en; 125 FILE *f; 126 } summary; 127 struct iovec iov[2]; 128 uint64_t offset, last_offset; 129 struct cloop_header hdr; 130 struct mkuz_conveyor *cvp; 131 void *c_ctx; 132 struct mkuz_blk_info *chit; 133 size_t ncpusz, ncpu, magiclen; 134 double st, et; 135 enum UZ_ALGORITHM comp_alg; 136 int comp_level; 137 138 st = getdtime(); 139 140 ncpusz = sizeof(size_t); 141 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 142 ncpu = 1; 143 } else if (ncpu > MAX_WORKERS_AUTO) { 144 ncpu = MAX_WORKERS_AUTO; 145 } 146 147 memset(&hdr, 0, sizeof(hdr)); 148 cfs.blksz = DEFAULT_CLSTSIZE; 149 oname = NULL; 150 cfs.verbose = 0; 151 cfs.no_zcomp = 0; 152 cfs.en_dedup = 0; 153 summary.en = 0; 154 summary.f = stderr; 155 comp_alg = UZ_ZLIB; 156 comp_level = USE_DEFAULT_LEVEL; 157 cfs.nworkers = ncpu; 158 struct mkuz_blk *iblk, *oblk; 159 160 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { 161 switch(opt) { 162 case 'A': 163 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { 164 if (strcmp(uzip_fmts[tmp].option, optarg) == 0) 165 break; 166 } 167 if (tmp == UZ_INVALID) 168 errx(1, "invalid algorithm specified: %s", 169 optarg); 170 /* Not reached */ 171 comp_alg = tmp; 172 break; 173 case 'C': 174 comp_level = atoi(optarg); 175 break; 176 case 'o': 177 oname = optarg; 178 break; 179 180 case 's': 181 tmp = atoi(optarg); 182 if (tmp <= 0) { 183 errx(1, "invalid cluster size specified: %s", 184 optarg); 185 /* Not reached */ 186 } 187 cfs.blksz = tmp; 188 break; 189 190 case 'v': 191 cfs.verbose = 1; 192 break; 193 194 case 'Z': 195 cfs.no_zcomp = 1; 196 break; 197 198 case 'd': 199 cfs.en_dedup = 1; 200 break; 201 202 case 'L': 203 comp_alg = UZ_LZMA; 204 break; 205 206 case 'S': 207 summary.en = 1; 208 summary.f = stdout; 209 break; 210 211 case 'j': 212 tmp = atoi(optarg); 213 if (tmp <= 0) { 214 errx(1, "invalid number of compression threads" 215 " specified: %s", optarg); 216 /* Not reached */ 217 } 218 cfs.nworkers = tmp; 219 break; 220 221 default: 222 usage(); 223 /* Not reached */ 224 } 225 } 226 argc -= optind; 227 argv += optind; 228 229 if (argc != 1) { 230 usage(); 231 /* Not reached */ 232 } 233 234 cfs.handler = &uzip_fmts[comp_alg]; 235 236 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 237 assert(magiclen < sizeof(hdr.magic)); 238 239 if (cfs.en_dedup != 0) { 240 /* 241 * Dedupe requires a version 3 format. Don't downgrade newer 242 * formats. 243 */ 244 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) 245 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 246 hdr.magic[CLOOP_OFS_COMPR] = 247 tolower(hdr.magic[CLOOP_OFS_COMPR]); 248 } 249 250 if (cfs.blksz % DEV_BSIZE != 0) 251 errx(1, "cluster size should be multiple of %d", DEV_BSIZE); 252 253 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); 254 if (cfs.cbound_blksz > MAXPHYS) 255 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", 256 cfs.cbound_blksz, (size_t)MAXPHYS); 257 258 c_ctx = cfs.handler->f_init(&comp_level); 259 cfs.comp_level = comp_level; 260 261 cfs.iname = argv[0]; 262 if (oname == NULL) { 263 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); 264 if (oname == NULL) { 265 err(1, "can't allocate memory"); 266 /* Not reached */ 267 } 268 } 269 270 signal(SIGHUP, exit); 271 signal(SIGINT, exit); 272 signal(SIGTERM, exit); 273 signal(SIGXCPU, exit); 274 signal(SIGXFSZ, exit); 275 atexit(cleanup); 276 277 cfs.fdr = open(cfs.iname, O_RDONLY); 278 if (cfs.fdr < 0) { 279 err(1, "open(%s)", cfs.iname); 280 /* Not reached */ 281 } 282 cfs.isize = mkuz_get_insize(&cfs); 283 if (cfs.isize < 0) { 284 errx(1, "can't determine input image size"); 285 /* Not reached */ 286 } 287 hdr.nblocks = cfs.isize / cfs.blksz; 288 if ((cfs.isize % cfs.blksz) != 0) { 289 if (cfs.verbose != 0) 290 fprintf(stderr, "file size is not multiple " 291 "of %d, padding data\n", cfs.blksz); 292 hdr.nblocks++; 293 } 294 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 295 296 /* 297 * Initialize last+1 entry with non-heap trash. If final padding is 298 * added later, it may or may not be overwritten with an offset 299 * representing the length of the final compressed block. If not, 300 * initialize to a defined value. 301 */ 302 toc[hdr.nblocks] = 0; 303 304 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 305 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 306 if (cfs.fdw < 0) { 307 err(1, "open(%s)", oname); 308 /* Not reached */ 309 } 310 cleanfile = oname; 311 312 /* Prepare header that we will write later when we have index ready. */ 313 iov[0].iov_base = (char *)&hdr; 314 iov[0].iov_len = sizeof(hdr); 315 iov[1].iov_base = (char *)toc; 316 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 317 offset = iov[0].iov_len + iov[1].iov_len; 318 319 /* Reserve space for header */ 320 lseek(cfs.fdw, offset, SEEK_SET); 321 322 if (cfs.verbose != 0) { 323 fprintf(stderr, "data size %ju bytes, number of clusters " 324 "%u, index length %zu bytes\n", cfs.isize, 325 hdr.nblocks, iov[1].iov_len); 326 } 327 328 cvp = mkuz_conveyor_ctor(&cfs); 329 330 last_offset = 0; 331 iblk = oblk = NULL; 332 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 333 iblk = readblock(cfs.fdr, cfs.blksz); 334 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 335 if (iblk != MKUZ_BLK_EOF && 336 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 337 continue; 338 } 339 drain: 340 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 341 assert(oblk->info.blkno == (unsigned)io); 342 oblk->info.offset = offset; 343 chit = NULL; 344 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 345 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 346 /* 347 * There should be at least one non-empty block 348 * between us and the backref'ed offset, otherwise 349 * we won't be able to parse that sequence correctly 350 * as it would be indistinguishible from another 351 * empty block. 352 */ 353 if (chit != NULL && chit->offset == last_offset) { 354 chit = NULL; 355 } 356 } 357 if (chit != NULL) { 358 toc[io] = htobe64(chit->offset); 359 oblk->info.len = 0; 360 } else { 361 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 362 oblk->info.len) < 0) { 363 err(1, "write(%s)", oname); 364 /* Not reached */ 365 } 366 toc[io] = htobe64(offset); 367 last_offset = offset; 368 offset += oblk->info.len; 369 } 370 if (cfs.verbose != 0) { 371 fprintf(stderr, "cluster #%d, in %u bytes, " 372 "out len=%lu offset=%lu", io, cfs.blksz, 373 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 374 if (chit != NULL) { 375 fprintf(stderr, " (backref'ed to #%d)", 376 chit->blkno); 377 } 378 fprintf(stderr, "\n"); 379 } 380 free(oblk); 381 io += 1; 382 if (iblk == MKUZ_BLK_EOF) { 383 if (io < i) 384 goto drain; 385 /* Last block, see if we need to add some padding */ 386 if ((offset % DEV_BSIZE) == 0) 387 continue; 388 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 389 oblk->info.blkno = io; 390 oblk->info.len = oblk->alen; 391 if (cfs.verbose != 0) { 392 fprintf(stderr, "padding data with %lu bytes " 393 "so that file size is multiple of %d\n", 394 (u_long)oblk->alen, DEV_BSIZE); 395 } 396 mkuz_fqueue_enq(cvp->results, oblk); 397 goto drain; 398 } 399 } 400 401 close(cfs.fdr); 402 403 if (cfs.verbose != 0 || summary.en != 0) { 404 et = getdtime(); 405 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 406 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 407 (long long)(cfs.isize - offset), 408 100.0 * (long long)(cfs.isize - offset) / 409 (float)cfs.isize, (float)cfs.isize / (et - st)); 410 } 411 412 /* Convert to big endian */ 413 hdr.blksz = htonl(cfs.blksz); 414 hdr.nblocks = htonl(hdr.nblocks); 415 /* Write headers into pre-allocated space */ 416 lseek(cfs.fdw, 0, SEEK_SET); 417 if (writev(cfs.fdw, iov, 2) < 0) { 418 err(1, "writev(%s)", oname); 419 /* Not reached */ 420 } 421 cleanfile = NULL; 422 close(cfs.fdw); 423 424 exit(0); 425 } 426 427 static struct mkuz_blk * 428 readblock(int fd, u_int32_t clstsize) 429 { 430 int numread; 431 struct mkuz_blk *rval; 432 static int blockcnt; 433 off_t cpos; 434 435 rval = mkuz_blk_ctor(clstsize); 436 437 rval->info.blkno = blockcnt; 438 blockcnt += 1; 439 cpos = lseek(fd, 0, SEEK_CUR); 440 if (cpos < 0) { 441 err(1, "readblock: lseek() failed"); 442 /* Not reached */ 443 } 444 rval->info.offset = cpos; 445 446 numread = read(fd, rval->data, clstsize); 447 if (numread < 0) { 448 err(1, "readblock: read() failed"); 449 /* Not reached */ 450 } 451 if (numread == 0) { 452 free(rval); 453 return MKUZ_BLK_EOF; 454 } 455 rval->info.len = numread; 456 return rval; 457 } 458 459 static void 460 usage(void) 461 { 462 463 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 464 "[-j ncompr] infile\n"); 465 exit(1); 466 } 467 468 void * 469 mkuz_safe_malloc(size_t size) 470 { 471 void *retval; 472 473 retval = malloc(size); 474 if (retval == NULL) { 475 err(1, "can't allocate memory"); 476 /* Not reached */ 477 } 478 return retval; 479 } 480 481 void * 482 mkuz_safe_zmalloc(size_t size) 483 { 484 void *retval; 485 486 retval = mkuz_safe_malloc(size); 487 bzero(retval, size); 488 return retval; 489 } 490 491 static void 492 cleanup(void) 493 { 494 495 if (cleanfile != NULL) 496 unlink(cleanfile); 497 } 498 499 int 500 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 501 { 502 const u_char *mm; 503 504 mm = (const u_char *)memory; 505 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 506 } 507