1 /* 2 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 #include <sys/disk.h> 33 #include <sys/endian.h> 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <sys/uio.h> 37 #include <netinet/in.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <fcntl.h> 41 #include <signal.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 #include "mkuzip.h" 48 #include "mkuz_cloop.h" 49 #include "mkuz_blockcache.h" 50 #include "mkuz_zlib.h" 51 #include "mkuz_lzma.h" 52 53 #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args) 54 55 #define DEFAULT_CLSTSIZE 16384 56 57 DEFINE_RAW_METHOD(f_init, void *, uint32_t); 58 DEFINE_RAW_METHOD(f_compress, void, const char *, uint32_t *); 59 60 struct mkuz_format { 61 const char *magic; 62 const char *default_sufx; 63 f_init_t f_init; 64 f_compress_t f_compress; 65 }; 66 67 static struct mkuz_format uzip_fmt = { 68 .magic = CLOOP_MAGIC_ZLIB, 69 .default_sufx = DEFAULT_SUFX_ZLIB, 70 .f_init = &mkuz_zlib_init, 71 .f_compress = &mkuz_zlib_compress 72 }; 73 74 static struct mkuz_format ulzma_fmt = { 75 .magic = CLOOP_MAGIC_LZMA, 76 .default_sufx = DEFAULT_SUFX_LZMA, 77 .f_init = &mkuz_lzma_init, 78 .f_compress = &mkuz_lzma_compress 79 }; 80 81 static char *readblock(int, char *, u_int32_t); 82 static void usage(void); 83 static void cleanup(void); 84 static int memvcmp(const void *, unsigned char, size_t); 85 86 static char *cleanfile = NULL; 87 88 int main(int argc, char **argv) 89 { 90 char *iname, *oname, *obuf, *ibuf; 91 uint64_t *toc; 92 int fdr, fdw, i, opt, verbose, no_zcomp, tmp, en_dedup; 93 struct { 94 int en; 95 FILE *f; 96 } summary; 97 struct iovec iov[2]; 98 struct stat sb; 99 uint32_t destlen; 100 uint64_t offset, last_offset; 101 struct cloop_header hdr; 102 struct mkuz_blkcache_hit *chit; 103 const struct mkuz_format *handler; 104 105 memset(&hdr, 0, sizeof(hdr)); 106 hdr.blksz = DEFAULT_CLSTSIZE; 107 oname = NULL; 108 verbose = 0; 109 no_zcomp = 0; 110 en_dedup = 0; 111 summary.en = 0; 112 summary.f = stderr; 113 handler = &uzip_fmt; 114 115 while((opt = getopt(argc, argv, "o:s:vZdLS")) != -1) { 116 switch(opt) { 117 case 'o': 118 oname = optarg; 119 break; 120 121 case 's': 122 tmp = atoi(optarg); 123 if (tmp <= 0) { 124 errx(1, "invalid cluster size specified: %s", 125 optarg); 126 /* Not reached */ 127 } 128 hdr.blksz = tmp; 129 break; 130 131 case 'v': 132 verbose = 1; 133 break; 134 135 case 'Z': 136 no_zcomp = 1; 137 break; 138 139 case 'd': 140 en_dedup = 1; 141 break; 142 143 case 'L': 144 handler = &ulzma_fmt; 145 break; 146 147 case 'S': 148 summary.en = 1; 149 summary.f = stdout; 150 break; 151 152 default: 153 usage(); 154 /* Not reached */ 155 } 156 } 157 argc -= optind; 158 argv += optind; 159 160 if (argc != 1) { 161 usage(); 162 /* Not reached */ 163 } 164 165 strcpy(hdr.magic, handler->magic); 166 167 if (en_dedup != 0) { 168 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 169 hdr.magic[CLOOP_OFS_COMPR] = 170 tolower(hdr.magic[CLOOP_OFS_COMPR]); 171 } 172 173 obuf = handler->f_init(hdr.blksz); 174 175 iname = argv[0]; 176 if (oname == NULL) { 177 asprintf(&oname, "%s%s", iname, handler->default_sufx); 178 if (oname == NULL) { 179 err(1, "can't allocate memory"); 180 /* Not reached */ 181 } 182 } 183 184 ibuf = mkuz_safe_malloc(hdr.blksz); 185 186 signal(SIGHUP, exit); 187 signal(SIGINT, exit); 188 signal(SIGTERM, exit); 189 signal(SIGXCPU, exit); 190 signal(SIGXFSZ, exit); 191 atexit(cleanup); 192 193 fdr = open(iname, O_RDONLY); 194 if (fdr < 0) { 195 err(1, "open(%s)", iname); 196 /* Not reached */ 197 } 198 if (fstat(fdr, &sb) != 0) { 199 err(1, "fstat(%s)", iname); 200 /* Not reached */ 201 } 202 if (S_ISCHR(sb.st_mode)) { 203 off_t ms; 204 205 if (ioctl(fdr, DIOCGMEDIASIZE, &ms) < 0) { 206 err(1, "ioctl(DIOCGMEDIASIZE)"); 207 /* Not reached */ 208 } 209 sb.st_size = ms; 210 } else if (!S_ISREG(sb.st_mode)) { 211 fprintf(stderr, "%s: not a character device or regular file\n", 212 iname); 213 exit(1); 214 } 215 hdr.nblocks = sb.st_size / hdr.blksz; 216 if ((sb.st_size % hdr.blksz) != 0) { 217 if (verbose != 0) 218 fprintf(stderr, "file size is not multiple " 219 "of %d, padding data\n", hdr.blksz); 220 hdr.nblocks++; 221 } 222 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 223 224 fdw = open(oname, O_WRONLY | O_TRUNC | O_CREAT, 225 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 226 if (fdw < 0) { 227 err(1, "open(%s)", oname); 228 /* Not reached */ 229 } 230 cleanfile = oname; 231 232 /* Prepare header that we will write later when we have index ready. */ 233 iov[0].iov_base = (char *)&hdr; 234 iov[0].iov_len = sizeof(hdr); 235 iov[1].iov_base = (char *)toc; 236 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 237 offset = iov[0].iov_len + iov[1].iov_len; 238 239 /* Reserve space for header */ 240 lseek(fdw, offset, SEEK_SET); 241 242 if (verbose != 0) 243 fprintf(stderr, "data size %ju bytes, number of clusters " 244 "%u, index length %zu bytes\n", sb.st_size, 245 hdr.nblocks, iov[1].iov_len); 246 247 last_offset = 0; 248 for(i = 0; i == 0 || ibuf != NULL; i++) { 249 ibuf = readblock(fdr, ibuf, hdr.blksz); 250 if (ibuf != NULL) { 251 if (no_zcomp == 0 && \ 252 memvcmp(ibuf, '\0', hdr.blksz) != 0) { 253 /* All zeroes block */ 254 destlen = 0; 255 } else { 256 handler->f_compress(ibuf, &destlen); 257 } 258 } else { 259 destlen = DEV_BSIZE - (offset % DEV_BSIZE); 260 memset(obuf, 0, destlen); 261 if (verbose != 0) 262 fprintf(stderr, "padding data with %lu bytes " 263 "so that file size is multiple of %d\n", 264 (u_long)destlen, DEV_BSIZE); 265 } 266 if (destlen > 0 && en_dedup != 0) { 267 chit = mkuz_blkcache_regblock(fdw, i, offset, destlen, 268 obuf); 269 /* 270 * There should be at least one non-empty block 271 * between us and the backref'ed offset, otherwise 272 * we won't be able to parse that sequence correctly 273 * as it would be indistinguishible from another 274 * empty block. 275 */ 276 if (chit != NULL && chit->offset == last_offset) { 277 chit = NULL; 278 } 279 } else { 280 chit = NULL; 281 } 282 if (chit != NULL) { 283 toc[i] = htobe64(chit->offset); 284 } else { 285 if (destlen > 0 && write(fdw, obuf, destlen) < 0) { 286 err(1, "write(%s)", oname); 287 /* Not reached */ 288 } 289 toc[i] = htobe64(offset); 290 last_offset = offset; 291 offset += destlen; 292 } 293 if (ibuf != NULL && verbose != 0) { 294 fprintf(stderr, "cluster #%d, in %u bytes, " 295 "out len=%lu offset=%lu", i, hdr.blksz, 296 chit == NULL ? (u_long)destlen : 0, 297 (u_long)be64toh(toc[i])); 298 if (chit != NULL) { 299 fprintf(stderr, " (backref'ed to #%d)", 300 chit->blkno); 301 } 302 fprintf(stderr, "\n"); 303 304 } 305 } 306 close(fdr); 307 308 if (verbose != 0 || summary.en != 0) 309 fprintf(summary.f, "compressed data to %ju bytes, saved %lld " 310 "bytes, %.2f%% decrease.\n", offset, 311 (long long)(sb.st_size - offset), 312 100.0 * (long long)(sb.st_size - offset) / 313 (float)sb.st_size); 314 315 /* Convert to big endian */ 316 hdr.blksz = htonl(hdr.blksz); 317 hdr.nblocks = htonl(hdr.nblocks); 318 /* Write headers into pre-allocated space */ 319 lseek(fdw, 0, SEEK_SET); 320 if (writev(fdw, iov, 2) < 0) { 321 err(1, "writev(%s)", oname); 322 /* Not reached */ 323 } 324 cleanfile = NULL; 325 close(fdw); 326 327 exit(0); 328 } 329 330 static char * 331 readblock(int fd, char *ibuf, u_int32_t clstsize) 332 { 333 int numread; 334 335 bzero(ibuf, clstsize); 336 numread = read(fd, ibuf, clstsize); 337 if (numread < 0) { 338 err(1, "read() failed"); 339 /* Not reached */ 340 } 341 if (numread == 0) { 342 return NULL; 343 } 344 return ibuf; 345 } 346 347 static void 348 usage(void) 349 { 350 351 fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " 352 "infile\n"); 353 exit(1); 354 } 355 356 void * 357 mkuz_safe_malloc(size_t size) 358 { 359 void *retval; 360 361 retval = malloc(size); 362 if (retval == NULL) { 363 err(1, "can't allocate memory"); 364 /* Not reached */ 365 } 366 return retval; 367 } 368 369 static void 370 cleanup(void) 371 { 372 373 if (cleanfile != NULL) 374 unlink(cleanfile); 375 } 376 377 static int 378 memvcmp(const void *memory, unsigned char val, size_t size) 379 { 380 const u_char *mm; 381 382 mm = (const u_char *)memory; 383 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 384 } 385