1 /* 2 * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/types.h> 32 #include <sys/disk.h> 33 #include <sys/endian.h> 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <sys/uio.h> 37 #include <netinet/in.h> 38 #include <ctype.h> 39 #include <err.h> 40 #include <fcntl.h> 41 #include <signal.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 #include "mkuzip.h" 48 #include "mkuz_cloop.h" 49 #include "mkuz_blockcache.h" 50 #include "mkuz_zlib.h" 51 #include "mkuz_lzma.h" 52 53 #define DEFINE_RAW_METHOD(func, rval, args...) typedef rval (*func##_t)(args) 54 55 #define DEFAULT_CLSTSIZE 16384 56 57 DEFINE_RAW_METHOD(f_init, void *, uint32_t); 58 DEFINE_RAW_METHOD(f_compress, void, const char *, uint32_t *); 59 60 struct mkuz_format { 61 const char *magic; 62 const char *default_sufx; 63 f_init_t f_init; 64 f_compress_t f_compress; 65 }; 66 67 static struct mkuz_format uzip_fmt = { 68 .magic = CLOOP_MAGIC_ZLIB, 69 .default_sufx = DEFAULT_SUFX_ZLIB, 70 .f_init = &mkuz_zlib_init, 71 .f_compress = &mkuz_zlib_compress 72 }; 73 74 static struct mkuz_format ulzma_fmt = { 75 .magic = CLOOP_MAGIC_LZMA, 76 .default_sufx = DEFAULT_SUFX_LZMA, 77 .f_init = &mkuz_lzma_init, 78 .f_compress = &mkuz_lzma_compress 79 }; 80 81 static char *readblock(int, char *, u_int32_t); 82 static void usage(void); 83 static void cleanup(void); 84 static int memvcmp(const void *, unsigned char, size_t); 85 86 static char *cleanfile = NULL; 87 88 int main(int argc, char **argv) 89 { 90 char *iname, *oname, *obuf, *ibuf; 91 uint64_t *toc; 92 int fdr, fdw, i, opt, verbose, no_zcomp, tmp, en_dedup; 93 struct iovec iov[2]; 94 struct stat sb; 95 uint32_t destlen; 96 uint64_t offset, last_offset; 97 struct cloop_header hdr; 98 struct mkuz_blkcache_hit *chit; 99 const struct mkuz_format *handler; 100 101 memset(&hdr, 0, sizeof(hdr)); 102 hdr.blksz = DEFAULT_CLSTSIZE; 103 oname = NULL; 104 verbose = 0; 105 no_zcomp = 0; 106 en_dedup = 0; 107 handler = &uzip_fmt; 108 109 while((opt = getopt(argc, argv, "o:s:vZdL")) != -1) { 110 switch(opt) { 111 case 'o': 112 oname = optarg; 113 break; 114 115 case 's': 116 tmp = atoi(optarg); 117 if (tmp <= 0) { 118 errx(1, "invalid cluster size specified: %s", 119 optarg); 120 /* Not reached */ 121 } 122 hdr.blksz = tmp; 123 break; 124 125 case 'v': 126 verbose = 1; 127 break; 128 129 case 'Z': 130 no_zcomp = 1; 131 break; 132 133 case 'd': 134 en_dedup = 1; 135 break; 136 137 case 'L': 138 handler = &ulzma_fmt; 139 break; 140 141 default: 142 usage(); 143 /* Not reached */ 144 } 145 } 146 argc -= optind; 147 argv += optind; 148 149 if (argc != 1) { 150 usage(); 151 /* Not reached */ 152 } 153 154 strcpy(hdr.magic, handler->magic); 155 156 if (en_dedup != 0) { 157 hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; 158 hdr.magic[CLOOP_OFS_COMPR] = 159 tolower(hdr.magic[CLOOP_OFS_COMPR]); 160 } 161 162 obuf = handler->f_init(hdr.blksz); 163 164 iname = argv[0]; 165 if (oname == NULL) { 166 asprintf(&oname, "%s%s", iname, handler->default_sufx); 167 if (oname == NULL) { 168 err(1, "can't allocate memory"); 169 /* Not reached */ 170 } 171 } 172 173 ibuf = mkuz_safe_malloc(hdr.blksz); 174 175 signal(SIGHUP, exit); 176 signal(SIGINT, exit); 177 signal(SIGTERM, exit); 178 signal(SIGXCPU, exit); 179 signal(SIGXFSZ, exit); 180 atexit(cleanup); 181 182 fdr = open(iname, O_RDONLY); 183 if (fdr < 0) { 184 err(1, "open(%s)", iname); 185 /* Not reached */ 186 } 187 if (fstat(fdr, &sb) != 0) { 188 err(1, "fstat(%s)", iname); 189 /* Not reached */ 190 } 191 if (S_ISCHR(sb.st_mode)) { 192 off_t ms; 193 194 if (ioctl(fdr, DIOCGMEDIASIZE, &ms) < 0) { 195 err(1, "ioctl(DIOCGMEDIASIZE)"); 196 /* Not reached */ 197 } 198 sb.st_size = ms; 199 } else if (!S_ISREG(sb.st_mode)) { 200 fprintf(stderr, "%s: not a character device or regular file\n", 201 iname); 202 exit(1); 203 } 204 hdr.nblocks = sb.st_size / hdr.blksz; 205 if ((sb.st_size % hdr.blksz) != 0) { 206 if (verbose != 0) 207 fprintf(stderr, "file size is not multiple " 208 "of %d, padding data\n", hdr.blksz); 209 hdr.nblocks++; 210 } 211 toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); 212 213 fdw = open(oname, O_WRONLY | O_TRUNC | O_CREAT, 214 S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); 215 if (fdw < 0) { 216 err(1, "open(%s)", oname); 217 /* Not reached */ 218 } 219 cleanfile = oname; 220 221 /* Prepare header that we will write later when we have index ready. */ 222 iov[0].iov_base = (char *)&hdr; 223 iov[0].iov_len = sizeof(hdr); 224 iov[1].iov_base = (char *)toc; 225 iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); 226 offset = iov[0].iov_len + iov[1].iov_len; 227 228 /* Reserve space for header */ 229 lseek(fdw, offset, SEEK_SET); 230 231 if (verbose != 0) 232 fprintf(stderr, "data size %ju bytes, number of clusters " 233 "%u, index length %zu bytes\n", sb.st_size, 234 hdr.nblocks, iov[1].iov_len); 235 236 last_offset = 0; 237 for(i = 0; i == 0 || ibuf != NULL; i++) { 238 ibuf = readblock(fdr, ibuf, hdr.blksz); 239 if (ibuf != NULL) { 240 if (no_zcomp == 0 && \ 241 memvcmp(ibuf, '\0', hdr.blksz) != 0) { 242 /* All zeroes block */ 243 destlen = 0; 244 } else { 245 handler->f_compress(ibuf, &destlen); 246 } 247 } else { 248 destlen = DEV_BSIZE - (offset % DEV_BSIZE); 249 memset(obuf, 0, destlen); 250 if (verbose != 0) 251 fprintf(stderr, "padding data with %lu bytes " 252 "so that file size is multiple of %d\n", 253 (u_long)destlen, DEV_BSIZE); 254 } 255 if (destlen > 0 && en_dedup != 0) { 256 chit = mkuz_blkcache_regblock(fdw, i, offset, destlen, 257 obuf); 258 /* 259 * There should be at least one non-empty block 260 * between us and the backref'ed offset, otherwise 261 * we won't be able to parse that sequence correctly 262 * as it would be indistinguishible from another 263 * empty block. 264 */ 265 if (chit != NULL && chit->offset == last_offset) { 266 chit = NULL; 267 } 268 } else { 269 chit = NULL; 270 } 271 if (chit != NULL) { 272 toc[i] = htobe64(chit->offset); 273 } else { 274 if (destlen > 0 && write(fdw, obuf, destlen) < 0) { 275 err(1, "write(%s)", oname); 276 /* Not reached */ 277 } 278 toc[i] = htobe64(offset); 279 last_offset = offset; 280 offset += destlen; 281 } 282 if (ibuf != NULL && verbose != 0) { 283 fprintf(stderr, "cluster #%d, in %u bytes, " 284 "out len=%lu offset=%lu", i, hdr.blksz, 285 chit == NULL ? (u_long)destlen : 0, 286 (u_long)be64toh(toc[i])); 287 if (chit != NULL) { 288 fprintf(stderr, " (backref'ed to #%d)", 289 chit->blkno); 290 } 291 fprintf(stderr, "\n"); 292 293 } 294 } 295 close(fdr); 296 297 if (verbose != 0) 298 fprintf(stderr, "compressed data to %ju bytes, saved %lld " 299 "bytes, %.2f%% decrease.\n", offset, 300 (long long)(sb.st_size - offset), 301 100.0 * (long long)(sb.st_size - offset) / 302 (float)sb.st_size); 303 304 /* Convert to big endian */ 305 hdr.blksz = htonl(hdr.blksz); 306 hdr.nblocks = htonl(hdr.nblocks); 307 /* Write headers into pre-allocated space */ 308 lseek(fdw, 0, SEEK_SET); 309 if (writev(fdw, iov, 2) < 0) { 310 err(1, "writev(%s)", oname); 311 /* Not reached */ 312 } 313 cleanfile = NULL; 314 close(fdw); 315 316 exit(0); 317 } 318 319 static char * 320 readblock(int fd, char *ibuf, u_int32_t clstsize) 321 { 322 int numread; 323 324 bzero(ibuf, clstsize); 325 numread = read(fd, ibuf, clstsize); 326 if (numread < 0) { 327 err(1, "read() failed"); 328 /* Not reached */ 329 } 330 if (numread == 0) { 331 return NULL; 332 } 333 return ibuf; 334 } 335 336 static void 337 usage(void) 338 { 339 340 fprintf(stderr, "usage: mkuzip [-vZdL] [-o outfile] [-s cluster_size] " 341 "infile\n"); 342 exit(1); 343 } 344 345 void * 346 mkuz_safe_malloc(size_t size) 347 { 348 void *retval; 349 350 retval = malloc(size); 351 if (retval == NULL) { 352 err(1, "can't allocate memory"); 353 /* Not reached */ 354 } 355 return retval; 356 } 357 358 static void 359 cleanup(void) 360 { 361 362 if (cleanfile != NULL) 363 unlink(cleanfile); 364 } 365 366 static int 367 memvcmp(const void *memory, unsigned char val, size_t size) 368 { 369 const u_char *mm; 370 371 mm = (const u_char *)memory; 372 return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; 373 } 374