1 /* 2 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/types.h> 31 #include <sys/disk.h> 32 #include <sys/endian.h> 33 #include <sys/param.h> 34 #include <sys/sysctl.h> 35 #include <sys/stat.h> 36 #include <sys/uio.h> 37 #include <netinet/in.h> 38 #include <assert.h> 39 #include <ctype.h> 40 #include <err.h> 41 #include <fcntl.h> 42 #include <pthread.h> 43 #include <signal.h> 44 #include <stdint.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 50 #include "mkuzip.h" 51 #include "mkuz_cloop.h" 52 #include "mkuz_blockcache.h" 53 #include "mkuz_zlib.h" 54 #include "mkuz_lzma.h" 55 #include "mkuz_blk.h" 56 #include "mkuz_cfg.h" 57 #include "mkuz_conveyor.h" 58 #include "mkuz_format.h" 59 #include "mkuz_fqueue.h" 60 #include "mkuz_time.h" 61 62 #define DEFAULT_CLSTSIZE 16384 63 64 static struct mkuz_format uzip_fmt = { 65 .magic = CLOOP_MAGIC_ZLIB, 66 .default_sufx = DEFAULT_SUFX_ZLIB, 67 .f_init = &mkuz_zlib_init, 68 .f_compress = &mkuz_zlib_compress 69 }; 70 71 static struct mkuz_format ulzma_fmt = { 72 .magic = CLOOP_MAGIC_LZMA, 73 .default_sufx = DEFAULT_SUFX_LZMA, 74 .f_init = &mkuz_lzma_init, 75 .f_compress = &mkuz_lzma_compress 76 }; 77 78 static struct mkuz_blk *readblock(int, u_int32_t); 79 static void usage(void); 80 static void cleanup(void); 81 82 static char *cleanfile = NULL; 83 84 static int 85 cmp_blkno(const struct mkuz_blk *bp, void *p) 86 { 87 uint32_t *ap; 88 89 ap = (uint32_t *)p; 90 91 return (bp->info.blkno == *ap); 92 } 93 94 int main(int argc, char **argv) 95 { 96 struct mkuz_cfg cfs; 97 char *iname, *oname; 98 uint64_t *toc; 99 int i, io, opt, tmp; 100 struct { 101 int en; 102 FILE *f; 103 } summary; 104 struct iovec iov[2]; 105 struct stat sb; 106 uint64_t offset, last_offset; 107 struct cloop_header hdr; 108 struct mkuz_conveyor *cvp; 109 void *c_ctx; 110 struct mkuz_blk_info *chit; 111 size_t ncpusz, ncpu, magiclen; 112 double st, et; 113 114 st = getdtime(); 115 116 ncpusz = sizeof(size_t); 117 if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { 118 ncpu = 1; 119 } else if (ncpu > MAX_WORKERS_AUTO) { 120 ncpu = MAX_WORKERS_AUTO; 121 } 122 123 memset(&hdr, 0, sizeof(hdr)); 124 cfs.blksz = DEFAULT_CLSTSIZE; 125 oname = NULL; 126 cfs.verbose = 0; 127 cfs.no_zcomp = 0; 128 cfs.en_dedup = 0; 129 summary.en = 0; 130 summary.f = stderr; 131 cfs.handler = &uzip_fmt; 132 cfs.nworkers = ncpu; 133 struct mkuz_blk *iblk, *oblk; 134 135 while((opt = getopt(argc, argv, "o:s:vZdLSj:")) != -1) { 136 switch(opt) { 137 case 'o': 138 oname = optarg; 139 break; 140 141 case 's': 142 tmp = atoi(optarg); 143 if (tmp <= 0) { 144 errx(1, "invalid cluster size specified: %s", 145 optarg); 146 /* Not reached */ 147 } 148 cfs.blksz = tmp; 149 break; 150 151 case 'v': 152 cfs.verbose = 1; 153 break; 154 155 case 'Z': 156 cfs.no_zcomp = 1; 157 break; 158 159 case 'd': 160 cfs.en_dedup = 1; 161 break; 162 163 case 'L': 164 cfs.handler = &ulzma_fmt; 165 break; 166 167 case 'S': 168 summary.en = 1; 169 summary.f = stdout; 170 break; 171 172 case 'j': 173 tmp = atoi(optarg); 174 if (tmp <= 0) { 175 errx(1, "invalid number of compression threads" 176 " specified: %s", optarg); 177 /* Not reached */ 178 } 179 cfs.nworkers = tmp; 180 break; 181 182 default: 183 usage(); 184 /* Not reached */ 185 } 186 } 187 argc -= optind; 188 argv += optind; 189 190 if (argc != 1) { 191 usage(); 192 /* Not reached */ 193 } 194 195 magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); 196 assert(magiclen < sizeof(hdr.magic)); 197 198 if (cfs.en_dedup != 0) { 199 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 200 hdr.magic[CLOOP_OFS_COMPR] = 201 tolower(hdr.magic[CLOOP_OFS_COMPR]); 202 } 203 204 c_ctx = cfs.handler->f_init(cfs.blksz); 205 206 iname = argv[0]; 207 if (oname == NULL) { 208 asprintf(&oname, "%s%s", iname, cfs.handler->default_sufx); 209 if (oname == NULL) { 210 err(1, "can't allocate memory"); 211 /* Not reached */ 212 } 213 } 214 215 signal(SIGHUP, exit); 216 signal(SIGINT, exit); 217 signal(SIGTERM, exit); 218 signal(SIGXCPU, exit); 219 signal(SIGXFSZ, exit); 220 atexit(cleanup); 221 222 cfs.fdr = open(iname, O_RDONLY); 223 if (cfs.fdr < 0) { 224 err(1, "open(%s)", iname); 225 /* Not reached */ 226 } 227 if (fstat(cfs.fdr, &sb) != 0) { 228 err(1, "fstat(%s)", iname); 229 /* Not reached */ 230 } 231 if (S_ISCHR(sb.st_mode)) { 232 off_t ms; 233 234 if (ioctl(cfs.fdr, DIOCGMEDIASIZE, &ms) < 0) { 235 err(1, "ioctl(DIOCGMEDIASIZE)"); 236 /* Not reached */ 237 } 238 sb.st_size = ms; 239 } else if (!S_ISREG(sb.st_mode)) { 240 fprintf(stderr, "%s: not a character device or regular file\n", 241 iname); 242 exit(1); 243 } 244 hdr.nblocks = sb.st_size / cfs.blksz; 245 if ((sb.st_size % cfs.blksz) != 0) { 246 if (cfs.verbose != 0) 247 fprintf(stderr, "file size is not multiple " 248 "of %d, padding data\n", cfs.blksz); 249 hdr.nblocks++; 250 } 251 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 252 253 cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, 254 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 255 if (cfs.fdw < 0) { 256 err(1, "open(%s)", oname); 257 /* Not reached */ 258 } 259 cleanfile = oname; 260 261 /* Prepare header that we will write later when we have index ready. */ 262 iov[0].iov_base = (char *)&hdr; 263 iov[0].iov_len = sizeof(hdr); 264 iov[1].iov_base = (char *)toc; 265 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 266 offset = iov[0].iov_len + iov[1].iov_len; 267 268 /* Reserve space for header */ 269 lseek(cfs.fdw, offset, SEEK_SET); 270 271 if (cfs.verbose != 0) { 272 fprintf(stderr, "data size %ju bytes, number of clusters " 273 "%u, index length %zu bytes\n", sb.st_size, 274 hdr.nblocks, iov[1].iov_len); 275 } 276 277 cvp = mkuz_conveyor_ctor(&cfs); 278 279 last_offset = 0; 280 iblk = oblk = NULL; 281 for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { 282 iblk = readblock(cfs.fdr, cfs.blksz); 283 mkuz_fqueue_enq(cvp->wrk_queue, iblk); 284 if (iblk != MKUZ_BLK_EOF && 285 (i < (cfs.nworkers * ITEMS_PER_WORKER))) { 286 continue; 287 } 288 drain: 289 oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); 290 assert(oblk->info.blkno == (unsigned)io); 291 oblk->info.offset = offset; 292 chit = NULL; 293 if (cfs.en_dedup != 0 && oblk->info.len > 0) { 294 chit = mkuz_blkcache_regblock(cfs.fdw, oblk); 295 /* 296 * There should be at least one non-empty block 297 * between us and the backref'ed offset, otherwise 298 * we won't be able to parse that sequence correctly 299 * as it would be indistinguishible from another 300 * empty block. 301 */ 302 if (chit != NULL && chit->offset == last_offset) { 303 chit = NULL; 304 } 305 } 306 if (chit != NULL) { 307 toc[io] = htobe64(chit->offset); 308 oblk->info.len = 0; 309 } else { 310 if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, 311 oblk->info.len) < 0) { 312 err(1, "write(%s)", oname); 313 /* Not reached */ 314 } 315 toc[io] = htobe64(offset); 316 last_offset = offset; 317 offset += oblk->info.len; 318 } 319 if (cfs.verbose != 0) { 320 fprintf(stderr, "cluster #%d, in %u bytes, " 321 "out len=%lu offset=%lu", io, cfs.blksz, 322 (u_long)oblk->info.len, (u_long)be64toh(toc[io])); 323 if (chit != NULL) { 324 fprintf(stderr, " (backref'ed to #%d)", 325 chit->blkno); 326 } 327 fprintf(stderr, "\n"); 328 } 329 free(oblk); 330 io += 1; 331 if (iblk == MKUZ_BLK_EOF) { 332 if (io < i) 333 goto drain; 334 /* Last block, see if we need to add some padding */ 335 if ((offset % DEV_BSIZE) == 0) 336 continue; 337 oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); 338 oblk->info.blkno = io; 339 oblk->info.len = oblk->alen; 340 if (cfs.verbose != 0) { 341 fprintf(stderr, "padding data with %lu bytes " 342 "so that file size is multiple of %d\n", 343 (u_long)oblk->alen, DEV_BSIZE); 344 } 345 mkuz_fqueue_enq(cvp->results, oblk); 346 goto drain; 347 } 348 } 349 350 close(cfs.fdr); 351 352 if (cfs.verbose != 0 || summary.en != 0) { 353 et = getdtime(); 354 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 355 "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, 356 (long long)(sb.st_size - offset), 357 100.0 * (long long)(sb.st_size - offset) / 358 (float)sb.st_size, (float)sb.st_size / (et - st)); 359 } 360 361 /* Convert to big endian */ 362 hdr.blksz = htonl(cfs.blksz); 363 hdr.nblocks = htonl(hdr.nblocks); 364 /* Write headers into pre-allocated space */ 365 lseek(cfs.fdw, 0, SEEK_SET); 366 if (writev(cfs.fdw, iov, 2) < 0) { 367 err(1, "writev(%s)", oname); 368 /* Not reached */ 369 } 370 cleanfile = NULL; 371 close(cfs.fdw); 372 373 exit(0); 374 } 375 376 static struct mkuz_blk * 377 readblock(int fd, u_int32_t clstsize) 378 { 379 int numread; 380 struct mkuz_blk *rval; 381 static int blockcnt; 382 off_t cpos; 383 384 rval = mkuz_blk_ctor(clstsize); 385 386 rval->info.blkno = blockcnt; 387 blockcnt += 1; 388 cpos = lseek(fd, 0, SEEK_CUR); 389 if (cpos < 0) { 390 err(1, "readblock: lseek() failed"); 391 /* Not reached */ 392 } 393 rval->info.offset = cpos; 394 395 numread = read(fd, rval->data, clstsize); 396 if (numread < 0) { 397 err(1, "readblock: read() failed"); 398 /* Not reached */ 399 } 400 if (numread == 0) { 401 free(rval); 402 return MKUZ_BLK_EOF; 403 } 404 rval->info.len = numread; 405 return rval; 406 } 407 408 static void 409 usage(void) 410 { 411 412 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 413 "[-j ncompr] infile\n"); 414 exit(1); 415 } 416 417 void * 418 mkuz_safe_malloc(size_t size) 419 { 420 void *retval; 421 422 retval = malloc(size); 423 if (retval == NULL) { 424 err(1, "can't allocate memory"); 425 /* Not reached */ 426 } 427 return retval; 428 } 429 430 void * 431 mkuz_safe_zmalloc(size_t size) 432 { 433 void *retval; 434 435 retval = mkuz_safe_malloc(size); 436 bzero(retval, size); 437 return retval; 438 } 439 440 static void 441 cleanup(void) 442 { 443 444 if (cleanfile != NULL) 445 unlink(cleanfile); 446 } 447 448 int 449 mkuz_memvcmp(const void *memory, unsigned char val, size_t size) 450 { 451 const u_char *mm; 452 453 mm = (const u_char *)memory; 454 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 455 } 456