1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 #include <sys/endian.h> 34 #include <sys/param.h> 35 #include <sys/sysctl.h> 36 #include <sys/stat.h> 37 #include <sys/uio.h> 38 #include <netinet/in.h> 39 #include <assert.h> 40 #include <ctype.h> 41 #include <err.h> 42 #include <fcntl.h> 43 #include <pthread.h> 44 #include <signal.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 51 #include "mkuzip.h" 52 #include "mkuz_cloop.h" 53 #include "mkuz_blockcache.h" 54 #include "mkuz_lzma.h" 55 #include "mkuz_zlib.h" 56 #include "mkuz_zstd.h" 57 #include "mkuz_blk.h" 58 #include "mkuz_cfg.h" 59 #include "mkuz_conveyor.h" 60 #include "mkuz_format.h" 61 #include "mkuz_fqueue.h" 62 #include "mkuz_time.h" 63 #include "mkuz_insize.h" 64 65 #define DEFAULT_CLSTSIZE 16384 66 67 enum UZ_ALGORITHM { 68 UZ_ZLIB = 0, 69 UZ_LZMA, 70 UZ_ZSTD, 71 UZ_INVALID 72 }; 73 74 static const struct mkuz_format uzip_fmts[] = { 75 [UZ_ZLIB] = { 76 .option = "zlib", 77 .magic = CLOOP_MAGIC_ZLIB, 78 .default_sufx = DEFAULT_SUFX_ZLIB, 79 .f_compress_bound = mkuz_zlib_cbound, 80 .f_init = mkuz_zlib_init, 81 .f_compress = mkuz_zlib_compress, 82 }, 83 [UZ_LZMA] = { 84 .option = "lzma", 85 .magic = CLOOP_MAGIC_LZMA, 86 .default_sufx = DEFAULT_SUFX_LZMA, 87 .f_compress_bound = mkuz_lzma_cbound, 88 .f_init = mkuz_lzma_init, 89 .f_compress = mkuz_lzma_compress, 90 }, 91 [UZ_ZSTD] = { 92 .option = "zstd", 93 .magic = CLOOP_MAGIC_ZSTD, 94 .default_sufx = DEFAULT_SUFX_ZSTD, 95 .f_compress_bound = mkuz_zstd_cbound, 96 .f_init = mkuz_zstd_init, 97 .f_compress = mkuz_zstd_compress, 98 }, 99 }; 100 101 static struct mkuz_blk *readblock(int, u_int32_t); 102 static void usage(void); 103 static void cleanup(void); 104 105 static char *cleanfile = NULL; 106 107 static int 108 cmp_blkno(const struct mkuz_blk *bp, void *p) 109 { 110 uint32_t *ap; 111 112 ap = (uint32_t *)p; 113 114 return (bp->info.blkno == *ap); 115 } 116 117 int main(int argc, char **argv) 118 { 119 struct mkuz_cfg cfs; 120 char *oname; 121 uint64_t *toc; 122 int i, io, opt, tmp; 123 struct { 124 int en; 125 FILE *f; 126 } summary; 127 struct iovec iov[2]; 128 uint64_t offset, last_offset; 129 struct cloop_header hdr; 130 struct mkuz_conveyor *cvp; 131 struct mkuz_blk_info *chit; 132 size_t ncpusz, ncpu, magiclen; 133 double st, et; 134 enum UZ_ALGORITHM comp_alg; 135 int comp_level; 136 137 st = getdtime(); 138 139 ncpusz = sizeof(size_t); 140 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 141 ncpu = 1; 142 } else if (ncpu > MAX_WORKERS_AUTO) { 143 ncpu = MAX_WORKERS_AUTO; 144 } 145 146 memset(&hdr, 0, sizeof(hdr)); 147 cfs.blksz = DEFAULT_CLSTSIZE; 148 oname = NULL; 149 cfs.verbose = 0; 150 cfs.no_zcomp = 0; 151 cfs.en_dedup = 0; 152 summary.en = 0; 153 summary.f = stderr; 154 comp_alg = UZ_ZLIB; 155 comp_level = USE_DEFAULT_LEVEL; 156 cfs.nworkers = ncpu; 157 struct mkuz_blk *iblk, *oblk; 158 159 while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { 160 switch(opt) { 161 case 'A': 162 for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { 163 if (strcmp(uzip_fmts[tmp].option, optarg) == 0) 164 break; 165 } 166 if (tmp == UZ_INVALID) 167 errx(1, "invalid algorithm specified: %s", 168 optarg); 169 /* Not reached */ 170 comp_alg = tmp; 171 break; 172 case 'C': 173 comp_level = atoi(optarg); 174 break; 175 case 'o': 176 oname = optarg; 177 break; 178 179 case 's': 180 tmp = atoi(optarg); 181 if (tmp <= 0) { 182 errx(1, "invalid cluster size specified: %s", 183 optarg); 184 /* Not reached */ 185 } 186 cfs.blksz = tmp; 187 break; 188 189 case 'v': 190 cfs.verbose = 1; 191 break; 192 193 case 'Z': 194 cfs.no_zcomp = 1; 195 break; 196 197 case 'd': 198 cfs.en_dedup = 1; 199 break; 200 201 case 'L': 202 comp_alg = UZ_LZMA; 203 break; 204 205 case 'S': 206 summary.en = 1; 207 summary.f = stdout; 208 break; 209 210 case 'j': 211 tmp = atoi(optarg); 212 if (tmp <= 0) { 213 errx(1, "invalid number of compression threads" 214 " specified: %s", optarg); 215 /* Not reached */ 216 } 217 cfs.nworkers = tmp; 218 break; 219 220 default: 221 usage(); 222 /* Not reached */ 223 } 224 } 225 argc -= optind; 226 argv += optind; 227 228 if (argc != 1) { 229 usage(); 230 /* Not reached */ 231 } 232 233 cfs.handler = &uzip_fmts[comp_alg]; 234 235 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 236 assert(magiclen < sizeof(hdr.magic)); 237 238 if (cfs.en_dedup != 0) { 239 /* 240 * Dedupe requires a version 3 format. Don't downgrade newer 241 * formats. 242 */ 243 if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) 244 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 245 hdr.magic[CLOOP_OFS_COMPR] = 246 tolower(hdr.magic[CLOOP_OFS_COMPR]); 247 } 248 249 if (cfs.blksz % DEV_BSIZE != 0) 250 errx(1, "cluster size should be multiple of %d", DEV_BSIZE); 251 252 cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); 253 if (cfs.cbound_blksz > MAXPHYS) 254 errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", 255 cfs.cbound_blksz, (size_t)MAXPHYS); 256 257 cfs.handler->f_init(&comp_level); 258 cfs.comp_level = comp_level; 259 260 cfs.iname = argv[0]; 261 if (oname == NULL) { 262 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); 263 if (oname == NULL) { 264 err(1, "can't allocate memory"); 265 /* Not reached */ 266 } 267 } 268 269 signal(SIGHUP, exit); 270 signal(SIGINT, exit); 271 signal(SIGTERM, exit); 272 signal(SIGXCPU, exit); 273 signal(SIGXFSZ, exit); 274 atexit(cleanup); 275 276 cfs.fdr = open(cfs.iname, O_RDONLY); 277 if (cfs.fdr < 0) { 278 err(1, "open(%s)", cfs.iname); 279 /* Not reached */ 280 } 281 cfs.isize = mkuz_get_insize(&cfs); 282 if (cfs.isize < 0) { 283 errx(1, "can't determine input image size"); 284 /* Not reached */ 285 } 286 hdr.nblocks = cfs.isize / cfs.blksz; 287 if ((cfs.isize % cfs.blksz) != 0) { 288 if (cfs.verbose != 0) 289 fprintf(stderr, "file size is not multiple " 290 "of %d, padding data\n", cfs.blksz); 291 hdr.nblocks++; 292 } 293 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 294 295 /* 296 * Initialize last+1 entry with non-heap trash. If final padding is 297 * added later, it may or may not be overwritten with an offset 298 * representing the length of the final compressed block. If not, 299 * initialize to a defined value. 300 */ 301 toc[hdr.nblocks] = 0; 302 303 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 304 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 305 if (cfs.fdw < 0) { 306 err(1, "open(%s)", oname); 307 /* Not reached */ 308 } 309 cleanfile = oname; 310 311 /* Prepare header that we will write later when we have index ready. */ 312 iov[0].iov_base = (char *)&hdr; 313 iov[0].iov_len = sizeof(hdr); 314 iov[1].iov_base = (char *)toc; 315 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 316 offset = iov[0].iov_len + iov[1].iov_len; 317 318 /* Reserve space for header */ 319 lseek(cfs.fdw, offset, SEEK_SET); 320 321 if (cfs.verbose != 0) { 322 fprintf(stderr, "data size %ju bytes, number of clusters " 323 "%u, index length %zu bytes\n", cfs.isize, 324 hdr.nblocks, iov[1].iov_len); 325 } 326 327 cvp = mkuz_conveyor_ctor(&cfs); 328 329 last_offset = 0; 330 iblk = oblk = NULL; 331 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 332 iblk = readblock(cfs.fdr, cfs.blksz); 333 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 334 if (iblk != MKUZ_BLK_EOF && 335 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 336 continue; 337 } 338 drain: 339 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 340 assert(oblk->info.blkno == (unsigned)io); 341 oblk->info.offset = offset; 342 chit = NULL; 343 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 344 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 345 /* 346 * There should be at least one non-empty block 347 * between us and the backref'ed offset, otherwise 348 * we won't be able to parse that sequence correctly 349 * as it would be indistinguishible from another 350 * empty block. 351 */ 352 if (chit != NULL && chit->offset == last_offset) { 353 chit = NULL; 354 } 355 } 356 if (chit != NULL) { 357 toc[io] = htobe64(chit->offset); 358 oblk->info.len = 0; 359 } else { 360 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 361 oblk->info.len) < 0) { 362 err(1, "write(%s)", oname); 363 /* Not reached */ 364 } 365 toc[io] = htobe64(offset); 366 last_offset = offset; 367 offset += oblk->info.len; 368 } 369 if (cfs.verbose != 0) { 370 fprintf(stderr, "cluster #%d, in %u bytes, " 371 "out len=%lu offset=%lu", io, cfs.blksz, 372 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 373 if (chit != NULL) { 374 fprintf(stderr, " (backref'ed to #%d)", 375 chit->blkno); 376 } 377 fprintf(stderr, "\n"); 378 } 379 free(oblk); 380 io += 1; 381 if (iblk == MKUZ_BLK_EOF) { 382 if (io < i) 383 goto drain; 384 /* Last block, see if we need to add some padding */ 385 if ((offset % DEV_BSIZE) == 0) 386 continue; 387 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 388 oblk->info.blkno = io; 389 oblk->info.len = oblk->alen; 390 if (cfs.verbose != 0) { 391 fprintf(stderr, "padding data with %lu bytes " 392 "so that file size is multiple of %d\n", 393 (u_long)oblk->alen, DEV_BSIZE); 394 } 395 mkuz_fqueue_enq(cvp->results, oblk); 396 goto drain; 397 } 398 } 399 400 close(cfs.fdr); 401 402 if (cfs.verbose != 0 || summary.en != 0) { 403 et = getdtime(); 404 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 405 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 406 (long long)(cfs.isize - offset), 407 100.0 * (long long)(cfs.isize - offset) / 408 (float)cfs.isize, (float)cfs.isize / (et - st)); 409 } 410 411 /* Convert to big endian */ 412 hdr.blksz = htonl(cfs.blksz); 413 hdr.nblocks = htonl(hdr.nblocks); 414 /* Write headers into pre-allocated space */ 415 lseek(cfs.fdw, 0, SEEK_SET); 416 if (writev(cfs.fdw, iov, 2) < 0) { 417 err(1, "writev(%s)", oname); 418 /* Not reached */ 419 } 420 cleanfile = NULL; 421 close(cfs.fdw); 422 423 exit(0); 424 } 425 426 static struct mkuz_blk * 427 readblock(int fd, u_int32_t clstsize) 428 { 429 int numread; 430 struct mkuz_blk *rval; 431 static int blockcnt; 432 off_t cpos; 433 434 rval = mkuz_blk_ctor(clstsize); 435 436 rval->info.blkno = blockcnt; 437 blockcnt += 1; 438 cpos = lseek(fd, 0, SEEK_CUR); 439 if (cpos < 0) { 440 err(1, "readblock: lseek() failed"); 441 /* Not reached */ 442 } 443 rval->info.offset = cpos; 444 445 numread = read(fd, rval->data, clstsize); 446 if (numread < 0) { 447 err(1, "readblock: read() failed"); 448 /* Not reached */ 449 } 450 if (numread == 0) { 451 free(rval); 452 return MKUZ_BLK_EOF; 453 } 454 rval->info.len = numread; 455 return rval; 456 } 457 458 static void 459 usage(void) 460 { 461 462 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 463 "[-j ncompr] infile\n"); 464 exit(1); 465 } 466 467 void * 468 mkuz_safe_malloc(size_t size) 469 { 470 void *retval; 471 472 retval = malloc(size); 473 if (retval == NULL) { 474 err(1, "can't allocate memory"); 475 /* Not reached */ 476 } 477 return retval; 478 } 479 480 void * 481 mkuz_safe_zmalloc(size_t size) 482 { 483 void *retval; 484 485 retval = mkuz_safe_malloc(size); 486 bzero(retval, size); 487 return retval; 488 } 489 490 static void 491 cleanup(void) 492 { 493 494 if (cleanfile != NULL) 495 unlink(cleanfile); 496 } 497 498 int 499 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 500 { 501 const u_char *mm; 502 503 mm = (const u_char *)memory; 504 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 505 } 506