1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/types.h> 31 #include <sys/endian.h> 32 #include <sys/param.h> 33 #include <sys/sysctl.h> 34 #include <sys/stat.h> 35 #include <sys/uio.h> 36 #include <netinet/in.h> 37 #include <assert.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <fcntl.h> 41 #include <pthread.h> 42 #include <signal.h> 43 #include <stdint.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "mkuzip.h" 50 #include "mkuz_cloop.h" 51 #include "mkuz_blockcache.h" 52 #include "mkuz_lzma.h" 53 #include "mkuz_zlib.h" 54 #include "mkuz_zstd.h" 55 #include "mkuz_blk.h" 56 #include "mkuz_cfg.h" 57 #include "mkuz_conveyor.h" 58 #include "mkuz_format.h" 59 #include "mkuz_fqueue.h" 60 #include "mkuz_time.h" 61 #include "mkuz_insize.h" 62 63 #define DEFAULT_CLSTSIZE 16384 64 65 enum UZ_ALGORITHM { 66 UZ_ZLIB = 0, 67 UZ_LZMA, 68 UZ_ZSTD, 69 UZ_INVALID 70 }; 71 72 static const struct mkuz_format uzip_fmts[] = { 73 [UZ_ZLIB] = { 74 .option = "zlib", 75 .magic = CLOOP_MAGIC_ZLIB, 76 .default_sufx = DEFAULT_SUFX_ZLIB, 77 .f_compress_bound = mkuz_zlib_cbound, 78 .f_init = mkuz_zlib_init, 79 .f_compress = mkuz_zlib_compress, 80 }, 81 [UZ_LZMA] = { 82 .option = "lzma", 83 .magic = CLOOP_MAGIC_LZMA, 84 .default_sufx = DEFAULT_SUFX_LZMA, 85 .f_compress_bound = mkuz_lzma_cbound, 86 .f_init = mkuz_lzma_init, 87 .f_compress = mkuz_lzma_compress, 88 }, 89 [UZ_ZSTD] = { 90 .option = "zstd", 91 .magic = CLOOP_MAGIC_ZSTD, 92 .default_sufx = DEFAULT_SUFX_ZSTD, 93 .f_compress_bound = mkuz_zstd_cbound, 94 .f_init = mkuz_zstd_init, 95 .f_compress = mkuz_zstd_compress, 96 }, 97 }; 98 99 static struct mkuz_blk *readblock(int, u_int32_t); 100 static void usage(void) __dead2; 101 static void cleanup(void); 102 103 static char *cleanfile = NULL; 104 105 static int 106 cmp_blkno(const struct mkuz_blk *bp, void *p) 107 { 108 uint32_t *ap; 109 110 ap = (uint32_t *)p; 111 112 return (bp->info.blkno == *ap); 113 } 114 115 int main(int argc, char **argv) 116 { 117 struct mkuz_cfg cfs; 118 char *oname; 119 uint64_t *toc; 120 int i, io, opt, tmp; 121 struct { 122 int en; 123 FILE *f; 124 } summary; 125 struct iovec iov[2]; 126 uint64_t offset, last_offset; 127 struct cloop_header hdr; 128 struct mkuz_conveyor *cvp; 129 struct mkuz_blk_info *chit; 130 size_t ncpusz, ncpu, magiclen; 131 double st, et; 132 enum UZ_ALGORITHM comp_alg; 133 int comp_level; 134 135 st = getdtime(); 136 137 ncpusz = sizeof(size_t); 138 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 139 ncpu = 1; 140 } else if (ncpu > MAX_WORKERS_AUTO) { 141 ncpu = MAX_WORKERS_AUTO; 142 } 143 144 memset(&hdr, 0, sizeof(hdr)); 145 cfs.blksz = DEFAULT_CLSTSIZE; 146 oname = NULL; 147 cfs.verbose = 0; 148 cfs.no_zcomp = 0; 149 cfs.en_dedup = 0; 150 summary.en = 0; 151 summary.f = stderr; 152 comp_alg = UZ_ZLIB; 153 comp_level = USE_DEFAULT_LEVEL; 154 cfs.nworkers = ncpu; 155 struct mkuz_blk *iblk, *oblk; 156 157 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { 158 switch(opt) { 159 case 'A': 160 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { 161 if (strcmp(uzip_fmts[tmp].option, optarg) == 0) 162 break; 163 } 164 if (tmp == UZ_INVALID) 165 errx(1, "invalid algorithm specified: %s", 166 optarg); 167 /* Not reached */ 168 comp_alg = tmp; 169 break; 170 case 'C': 171 comp_level = atoi(optarg); 172 break; 173 case 'o': 174 oname = optarg; 175 break; 176 177 case 's': 178 tmp = atoi(optarg); 179 if (tmp <= 0) { 180 errx(1, "invalid cluster size specified: %s", 181 optarg); 182 /* Not reached */ 183 } 184 cfs.blksz = tmp; 185 break; 186 187 case 'v': 188 cfs.verbose = 1; 189 break; 190 191 case 'Z': 192 cfs.no_zcomp = 1; 193 break; 194 195 case 'd': 196 cfs.en_dedup = 1; 197 break; 198 199 case 'L': 200 comp_alg = UZ_LZMA; 201 break; 202 203 case 'S': 204 summary.en = 1; 205 summary.f = stdout; 206 break; 207 208 case 'j': 209 tmp = atoi(optarg); 210 if (tmp <= 0) { 211 errx(1, "invalid number of compression threads" 212 " specified: %s", optarg); 213 /* Not reached */ 214 } 215 cfs.nworkers = tmp; 216 break; 217 218 default: 219 usage(); 220 /* Not reached */ 221 } 222 } 223 argc -= optind; 224 argv += optind; 225 226 if (argc != 1) { 227 usage(); 228 /* Not reached */ 229 } 230 231 cfs.handler = &uzip_fmts[comp_alg]; 232 233 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 234 assert(magiclen < sizeof(hdr.magic)); 235 236 if (cfs.en_dedup != 0) { 237 /* 238 * Dedupe requires a version 3 format. Don't downgrade newer 239 * formats. 240 */ 241 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) 242 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 243 hdr.magic[CLOOP_OFS_COMPR] = 244 tolower(hdr.magic[CLOOP_OFS_COMPR]); 245 } 246 247 if (cfs.blksz % DEV_BSIZE != 0) 248 errx(1, "cluster size should be multiple of %d", DEV_BSIZE); 249 250 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); 251 if (cfs.cbound_blksz > MAXPHYS) 252 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", 253 cfs.cbound_blksz, (size_t)MAXPHYS); 254 255 cfs.handler->f_init(&comp_level); 256 cfs.comp_level = comp_level; 257 258 cfs.iname = argv[0]; 259 if (oname == NULL) { 260 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); 261 if (oname == NULL) { 262 err(1, "can't allocate memory"); 263 /* Not reached */ 264 } 265 } 266 267 signal(SIGHUP, exit); 268 signal(SIGINT, exit); 269 signal(SIGTERM, exit); 270 signal(SIGXCPU, exit); 271 signal(SIGXFSZ, exit); 272 atexit(cleanup); 273 274 cfs.fdr = open(cfs.iname, O_RDONLY); 275 if (cfs.fdr < 0) { 276 err(1, "open(%s)", cfs.iname); 277 /* Not reached */ 278 } 279 cfs.isize = mkuz_get_insize(&cfs); 280 if (cfs.isize < 0) { 281 errx(1, "can't determine input image size"); 282 /* Not reached */ 283 } 284 hdr.nblocks = cfs.isize / cfs.blksz; 285 if ((cfs.isize % cfs.blksz) != 0) { 286 if (cfs.verbose != 0) 287 fprintf(stderr, "file size is not multiple " 288 "of %d, padding data\n", cfs.blksz); 289 hdr.nblocks++; 290 } 291 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 292 293 /* 294 * Initialize last+1 entry with non-heap trash. If final padding is 295 * added later, it may or may not be overwritten with an offset 296 * representing the length of the final compressed block. If not, 297 * initialize to a defined value. 298 */ 299 toc[hdr.nblocks] = 0; 300 301 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 302 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 303 if (cfs.fdw < 0) { 304 err(1, "open(%s)", oname); 305 /* Not reached */ 306 } 307 cleanfile = oname; 308 309 /* Prepare header that we will write later when we have index ready. */ 310 iov[0].iov_base = (char *)&hdr; 311 iov[0].iov_len = sizeof(hdr); 312 iov[1].iov_base = (char *)toc; 313 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 314 offset = iov[0].iov_len + iov[1].iov_len; 315 316 /* Reserve space for header */ 317 lseek(cfs.fdw, offset, SEEK_SET); 318 319 if (cfs.verbose != 0) { 320 fprintf(stderr, "data size %ju bytes, number of clusters " 321 "%u, index length %zu bytes\n", cfs.isize, 322 hdr.nblocks, iov[1].iov_len); 323 } 324 325 cvp = mkuz_conveyor_ctor(&cfs); 326 327 last_offset = 0; 328 iblk = oblk = NULL; 329 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 330 iblk = readblock(cfs.fdr, cfs.blksz); 331 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 332 if (iblk != MKUZ_BLK_EOF && 333 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 334 continue; 335 } 336 drain: 337 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 338 assert(oblk->info.blkno == (unsigned)io); 339 oblk->info.offset = offset; 340 chit = NULL; 341 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 342 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 343 /* 344 * There should be at least one non-empty block 345 * between us and the backref'ed offset, otherwise 346 * we won't be able to parse that sequence correctly 347 * as it would be indistinguishible from another 348 * empty block. 349 */ 350 if (chit != NULL && chit->offset == last_offset) { 351 chit = NULL; 352 } 353 } 354 if (chit != NULL) { 355 toc[io] = htobe64(chit->offset); 356 oblk->info.len = 0; 357 } else { 358 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 359 oblk->info.len) < 0) { 360 err(1, "write(%s)", oname); 361 /* Not reached */ 362 } 363 toc[io] = htobe64(offset); 364 last_offset = offset; 365 offset += oblk->info.len; 366 } 367 if (cfs.verbose != 0) { 368 fprintf(stderr, "cluster #%d, in %u bytes, " 369 "out len=%lu offset=%lu", io, cfs.blksz, 370 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 371 if (chit != NULL) { 372 fprintf(stderr, " (backref'ed to #%d)", 373 chit->blkno); 374 } 375 fprintf(stderr, "\n"); 376 } 377 free(oblk); 378 io += 1; 379 if (iblk == MKUZ_BLK_EOF) { 380 if (io < i) 381 goto drain; 382 /* Last block, see if we need to add some padding */ 383 if ((offset % DEV_BSIZE) == 0) 384 continue; 385 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 386 oblk->info.blkno = io; 387 oblk->info.len = oblk->alen; 388 if (cfs.verbose != 0) { 389 fprintf(stderr, "padding data with %lu bytes " 390 "so that file size is multiple of %d\n", 391 (u_long)oblk->alen, DEV_BSIZE); 392 } 393 mkuz_fqueue_enq(cvp->results, oblk); 394 goto drain; 395 } 396 } 397 398 close(cfs.fdr); 399 400 if (cfs.verbose != 0 || summary.en != 0) { 401 et = getdtime(); 402 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 403 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 404 (long long)(cfs.isize - offset), 405 100.0 * (long long)(cfs.isize - offset) / 406 (float)cfs.isize, (float)cfs.isize / (et - st)); 407 } 408 409 /* Convert to big endian */ 410 hdr.blksz = htonl(cfs.blksz); 411 hdr.nblocks = htonl(hdr.nblocks); 412 /* Write headers into pre-allocated space */ 413 lseek(cfs.fdw, 0, SEEK_SET); 414 if (writev(cfs.fdw, iov, 2) < 0) { 415 err(1, "writev(%s)", oname); 416 /* Not reached */ 417 } 418 cleanfile = NULL; 419 close(cfs.fdw); 420 421 exit(0); 422 } 423 424 static struct mkuz_blk * 425 readblock(int fd, u_int32_t clstsize) 426 { 427 int numread; 428 struct mkuz_blk *rval; 429 static int blockcnt; 430 off_t cpos; 431 432 rval = mkuz_blk_ctor(clstsize); 433 434 rval->info.blkno = blockcnt; 435 blockcnt += 1; 436 cpos = lseek(fd, 0, SEEK_CUR); 437 if (cpos < 0) { 438 err(1, "readblock: lseek() failed"); 439 /* Not reached */ 440 } 441 rval->info.offset = cpos; 442 443 numread = read(fd, rval->data, clstsize); 444 if (numread < 0) { 445 err(1, "readblock: read() failed"); 446 /* Not reached */ 447 } 448 if (numread == 0) { 449 free(rval); 450 return MKUZ_BLK_EOF; 451 } 452 rval->info.len = numread; 453 return rval; 454 } 455 456 static void 457 usage(void) 458 { 459 460 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 461 "[-j ncompr] infile\n"); 462 exit(1); 463 } 464 465 void * 466 mkuz_safe_malloc(size_t size) 467 { 468 void *retval; 469 470 retval = malloc(size); 471 if (retval == NULL) { 472 err(1, "can't allocate memory"); 473 /* Not reached */ 474 } 475 return retval; 476 } 477 478 void * 479 mkuz_safe_zmalloc(size_t size) 480 { 481 void *retval; 482 483 retval = mkuz_safe_malloc(size); 484 bzero(retval, size); 485 return retval; 486 } 487 488 static void 489 cleanup(void) 490 { 491 492 if (cleanfile != NULL) 493 unlink(cleanfile); 494 } 495 496 int 497 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 498 { 499 const u_char *mm; 500 501 mm = (const u_char *)memory; 502 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 503 } 504