1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 #include <sys/endian.h> 34 #include <sys/param.h> 35 #include <sys/sysctl.h> 36 #include <sys/stat.h> 37 #include <sys/uio.h> 38 #include <netinet/in.h> 39 #include <assert.h> 40 #include <ctype.h> 41 #include <err.h> 42 #include <fcntl.h> 43 #include <pthread.h> 44 #include <signal.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 51 #include "mkuzip.h" 52 #include "mkuz_cloop.h" 53 #include "mkuz_blockcache.h" 54 #include "mkuz_zlib.h" 55 #include "mkuz_lzma.h" 56 #include "mkuz_blk.h" 57 #include "mkuz_cfg.h" 58 #include "mkuz_conveyor.h" 59 #include "mkuz_format.h" 60 #include "mkuz_fqueue.h" 61 #include "mkuz_time.h" 62 #include "mkuz_insize.h" 63 64 #define DEFAULT_CLSTSIZE 16384 65 66 static struct mkuz_format uzip_fmt = { 67 .magic = CLOOP_MAGIC_ZLIB, 68 .default_sufx = DEFAULT_SUFX_ZLIB, 69 .f_init = &mkuz_zlib_init, 70 .f_compress = &mkuz_zlib_compress 71 }; 72 73 static struct mkuz_format ulzma_fmt = { 74 .magic = CLOOP_MAGIC_LZMA, 75 .default_sufx = DEFAULT_SUFX_LZMA, 76 .f_init = &mkuz_lzma_init, 77 .f_compress = &mkuz_lzma_compress 78 }; 79 80 static struct mkuz_blk *readblock(int, u_int32_t); 81 static void usage(void); 82 static void cleanup(void); 83 84 static char *cleanfile = NULL; 85 86 static int 87 cmp_blkno(const struct mkuz_blk *bp, void *p) 88 { 89 uint32_t *ap; 90 91 ap = (uint32_t *)p; 92 93 return (bp->info.blkno == *ap); 94 } 95 96 int main(int argc, char **argv) 97 { 98 struct mkuz_cfg cfs; 99 char *oname; 100 uint64_t *toc; 101 int i, io, opt, tmp; 102 struct { 103 int en; 104 FILE *f; 105 } summary; 106 struct iovec iov[2]; 107 uint64_t offset, last_offset; 108 struct cloop_header hdr; 109 struct mkuz_conveyor *cvp; 110 void *c_ctx; 111 struct mkuz_blk_info *chit; 112 size_t ncpusz, ncpu, magiclen; 113 double st, et; 114 115 st = getdtime(); 116 117 ncpusz = sizeof(size_t); 118 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 119 ncpu = 1; 120 } else if (ncpu > MAX_WORKERS_AUTO) { 121 ncpu = MAX_WORKERS_AUTO; 122 } 123 124 memset(&hdr, 0, sizeof(hdr)); 125 cfs.blksz = DEFAULT_CLSTSIZE; 126 oname = NULL; 127 cfs.verbose = 0; 128 cfs.no_zcomp = 0; 129 cfs.en_dedup = 0; 130 summary.en = 0; 131 summary.f = stderr; 132 cfs.handler = &uzip_fmt; 133 cfs.nworkers = ncpu; 134 struct mkuz_blk *iblk, *oblk; 135 136 while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { 137 switch(opt) { 138 case 'o': 139 oname = optarg; 140 break; 141 142 case 's': 143 tmp = atoi(optarg); 144 if (tmp <= 0) { 145 errx(1, "invalid cluster size specified: %s", 146 optarg); 147 /* Not reached */ 148 } 149 cfs.blksz = tmp; 150 break; 151 152 case 'v': 153 cfs.verbose = 1; 154 break; 155 156 case 'Z': 157 cfs.no_zcomp = 1; 158 break; 159 160 case 'd': 161 cfs.en_dedup = 1; 162 break; 163 164 case 'L': 165 cfs.handler = &ulzma_fmt; 166 break; 167 168 case 'S': 169 summary.en = 1; 170 summary.f = stdout; 171 break; 172 173 case 'j': 174 tmp = atoi(optarg); 175 if (tmp <= 0) { 176 errx(1, "invalid number of compression threads" 177 " specified: %s", optarg); 178 /* Not reached */ 179 } 180 cfs.nworkers = tmp; 181 break; 182 183 default: 184 usage(); 185 /* Not reached */ 186 } 187 } 188 argc -= optind; 189 argv += optind; 190 191 if (argc != 1) { 192 usage(); 193 /* Not reached */ 194 } 195 196 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 197 assert(magiclen < sizeof(hdr.magic)); 198 199 if (cfs.en_dedup != 0) { 200 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 201 hdr.magic[CLOOP_OFS_COMPR] = 202 tolower(hdr.magic[CLOOP_OFS_COMPR]); 203 } 204 205 c_ctx = cfs.handler->f_init(cfs.blksz); 206 207 cfs.iname = argv[0]; 208 if (oname == NULL) { 209 asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); 210 if (oname == NULL) { 211 err(1, "can't allocate memory"); 212 /* Not reached */ 213 } 214 } 215 216 signal(SIGHUP, exit); 217 signal(SIGINT, exit); 218 signal(SIGTERM, exit); 219 signal(SIGXCPU, exit); 220 signal(SIGXFSZ, exit); 221 atexit(cleanup); 222 223 cfs.fdr = open(cfs.iname, O_RDONLY); 224 if (cfs.fdr < 0) { 225 err(1, "open(%s)", cfs.iname); 226 /* Not reached */ 227 } 228 cfs.isize = mkuz_get_insize(&cfs); 229 if (cfs.isize < 0) { 230 errx(1, "can't determine input image size"); 231 /* Not reached */ 232 } 233 hdr.nblocks = cfs.isize / cfs.blksz; 234 if ((cfs.isize % cfs.blksz) != 0) { 235 if (cfs.verbose != 0) 236 fprintf(stderr, "file size is not multiple " 237 "of %d, padding data\n", cfs.blksz); 238 hdr.nblocks++; 239 } 240 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 241 242 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 243 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 244 if (cfs.fdw < 0) { 245 err(1, "open(%s)", oname); 246 /* Not reached */ 247 } 248 cleanfile = oname; 249 250 /* Prepare header that we will write later when we have index ready. */ 251 iov[0].iov_base = (char *)&hdr; 252 iov[0].iov_len = sizeof(hdr); 253 iov[1].iov_base = (char *)toc; 254 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 255 offset = iov[0].iov_len + iov[1].iov_len; 256 257 /* Reserve space for header */ 258 lseek(cfs.fdw, offset, SEEK_SET); 259 260 if (cfs.verbose != 0) { 261 fprintf(stderr, "data size %ju bytes, number of clusters " 262 "%u, index length %zu bytes\n", cfs.isize, 263 hdr.nblocks, iov[1].iov_len); 264 } 265 266 cvp = mkuz_conveyor_ctor(&cfs); 267 268 last_offset = 0; 269 iblk = oblk = NULL; 270 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 271 iblk = readblock(cfs.fdr, cfs.blksz); 272 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 273 if (iblk != MKUZ_BLK_EOF && 274 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 275 continue; 276 } 277 drain: 278 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 279 assert(oblk->info.blkno == (unsigned)io); 280 oblk->info.offset = offset; 281 chit = NULL; 282 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 283 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 284 /* 285 * There should be at least one non-empty block 286 * between us and the backref'ed offset, otherwise 287 * we won't be able to parse that sequence correctly 288 * as it would be indistinguishible from another 289 * empty block. 290 */ 291 if (chit != NULL && chit->offset == last_offset) { 292 chit = NULL; 293 } 294 } 295 if (chit != NULL) { 296 toc[io] = htobe64(chit->offset); 297 oblk->info.len = 0; 298 } else { 299 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 300 oblk->info.len) < 0) { 301 err(1, "write(%s)", oname); 302 /* Not reached */ 303 } 304 toc[io] = htobe64(offset); 305 last_offset = offset; 306 offset += oblk->info.len; 307 } 308 if (cfs.verbose != 0) { 309 fprintf(stderr, "cluster #%d, in %u bytes, " 310 "out len=%lu offset=%lu", io, cfs.blksz, 311 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 312 if (chit != NULL) { 313 fprintf(stderr, " (backref'ed to #%d)", 314 chit->blkno); 315 } 316 fprintf(stderr, "\n"); 317 } 318 free(oblk); 319 io += 1; 320 if (iblk == MKUZ_BLK_EOF) { 321 if (io < i) 322 goto drain; 323 /* Last block, see if we need to add some padding */ 324 if ((offset % DEV_BSIZE) == 0) 325 continue; 326 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 327 oblk->info.blkno = io; 328 oblk->info.len = oblk->alen; 329 if (cfs.verbose != 0) { 330 fprintf(stderr, "padding data with %lu bytes " 331 "so that file size is multiple of %d\n", 332 (u_long)oblk->alen, DEV_BSIZE); 333 } 334 mkuz_fqueue_enq(cvp->results, oblk); 335 goto drain; 336 } 337 } 338 339 close(cfs.fdr); 340 341 if (cfs.verbose != 0 || summary.en != 0) { 342 et = getdtime(); 343 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 344 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 345 (long long)(cfs.isize - offset), 346 100.0 * (long long)(cfs.isize - offset) / 347 (float)cfs.isize, (float)cfs.isize / (et - st)); 348 } 349 350 /* Convert to big endian */ 351 hdr.blksz = htonl(cfs.blksz); 352 hdr.nblocks = htonl(hdr.nblocks); 353 /* Write headers into pre-allocated space */ 354 lseek(cfs.fdw, 0, SEEK_SET); 355 if (writev(cfs.fdw, iov, 2) < 0) { 356 err(1, "writev(%s)", oname); 357 /* Not reached */ 358 } 359 cleanfile = NULL; 360 close(cfs.fdw); 361 362 exit(0); 363 } 364 365 static struct mkuz_blk * 366 readblock(int fd, u_int32_t clstsize) 367 { 368 int numread; 369 struct mkuz_blk *rval; 370 static int blockcnt; 371 off_t cpos; 372 373 rval = mkuz_blk_ctor(clstsize); 374 375 rval->info.blkno = blockcnt; 376 blockcnt += 1; 377 cpos = lseek(fd, 0, SEEK_CUR); 378 if (cpos < 0) { 379 err(1, "readblock: lseek() failed"); 380 /* Not reached */ 381 } 382 rval->info.offset = cpos; 383 384 numread = read(fd, rval->data, clstsize); 385 if (numread < 0) { 386 err(1, "readblock: read() failed"); 387 /* Not reached */ 388 } 389 if (numread == 0) { 390 free(rval); 391 return MKUZ_BLK_EOF; 392 } 393 rval->info.len = numread; 394 return rval; 395 } 396 397 static void 398 usage(void) 399 { 400 401 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 402 "[-j ncompr] infile\n"); 403 exit(1); 404 } 405 406 void * 407 mkuz_safe_malloc(size_t size) 408 { 409 void *retval; 410 411 retval = malloc(size); 412 if (retval == NULL) { 413 err(1, "can't allocate memory"); 414 /* Not reached */ 415 } 416 return retval; 417 } 418 419 void * 420 mkuz_safe_zmalloc(size_t size) 421 { 422 void *retval; 423 424 retval = mkuz_safe_malloc(size); 425 bzero(retval, size); 426 return retval; 427 } 428 429 static void 430 cleanup(void) 431 { 432 433 if (cleanfile != NULL) 434 unlink(cleanfile); 435 } 436 437 int 438 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 439 { 440 const u_char *mm; 441 442 mm = (const u_char *)memory; 443 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 444 } 445