1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Huffman encoding program 34 * Usage: pack [[ -f ] [ - ] filename ... ] filename ... 35 * - option: enable/disable listing of statistics 36 */ 37 38 #include <stdio.h> 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <unistd.h> 42 #include <locale.h> 43 #include <stdarg.h> 44 #include <errno.h> 45 #include <sys/isa_defs.h> 46 #include <stdlib.h> 47 #include <limits.h> 48 #include <sys/param.h> 49 #include <fcntl.h> 50 #include <utime.h> 51 #include <string.h> 52 #include <dirent.h> 53 #include <unistd.h> 54 55 #undef lint 56 57 #define END 256 58 #define PACKED 017436 /* <US><RS> - Unlikely value */ 59 #define SUF0 '.' 60 #define SUF1 'z' 61 62 struct stat status, ostatus; 63 static struct utimbuf u_times; 64 65 /* union for overlaying a long int with a set of four characters */ 66 union FOUR { 67 struct { long int lng; } lint; 68 struct { char c0, c1, c2, c3; } chars; 69 }; 70 71 /* character counters */ 72 long count [END+1]; 73 union FOUR insize; 74 long outsize; 75 long dictsize; 76 int diffbytes; 77 78 /* i/o stuff */ 79 char vflag = 0; 80 int force = 0; /* allow forced packing for consistency in directory */ 81 82 static char filename [MAXPATHLEN]; 83 static int max_name; 84 static int max_path = MAXPATHLEN; 85 86 int infile; /* unpacked file */ 87 int outfile; /* packed file */ 88 char inbuff [BUFSIZ]; 89 char outbuff [BUFSIZ+4]; 90 91 /* variables associated with the tree */ 92 int maxlev; 93 int levcount [25]; 94 int lastnode; 95 int parent [2*END+1]; 96 97 /* variables associated with the encoding process */ 98 char length [END+1]; 99 long bits [END+1]; 100 union FOUR mask; 101 long inc; 102 #if defined(_LITTLE_ENDIAN) 103 char *maskshuff[4] = {&(mask.chars.c3), 104 &(mask.chars.c2), 105 &(mask.chars.c1), 106 &(mask.chars.c0)}; 107 #elif defined(_BIG_ENDIAN) 108 char *maskshuff[4] = {&(mask.chars.c0), 109 &(mask.chars.c1), 110 &(mask.chars.c2), 111 &(mask.chars.c3)}; 112 #else 113 #error Unknown byte ordering! 114 #endif 115 116 /* the heap */ 117 int n; 118 struct heap { 119 long int count; 120 int node; 121 } heap [END+2]; 122 #define hmove(a, b) {(b).count = (a).count; (b).node = (a).node; } 123 124 static void heapify(int i); 125 static int mv_xattrs(int, int, char *, int); 126 127 /* gather character frequency statistics */ 128 /* return 1 if successful, 0 otherwise */ 129 int 130 input(char *source) 131 { 132 register int i; 133 for (i = 0; i < END; i++) 134 count[i] = 0; 135 while ((i = read(infile, inbuff, BUFSIZ)) > 0) 136 while (i > 0) 137 count[inbuff[--i]&0377] += 2; 138 if (i == 0) 139 return (1); 140 fprintf(stderr, gettext( 141 "pack: %s: read error - file unchanged: "), source); 142 perror(""); 143 return (0); 144 } 145 146 /* encode the current file */ 147 /* return 1 if successful, 0 otherwise */ 148 int 149 output(char *source) 150 { 151 int c, i, inleft; 152 char *inp; 153 register char **q, *outp; 154 register int bitsleft; 155 long temp; 156 157 /* output ``PACKED'' header */ 158 outbuff[0] = 037; /* ascii US */ 159 outbuff[1] = 036; /* ascii RS */ 160 /* output the length and the dictionary */ 161 temp = insize.lint.lng; 162 for (i = 5; i >= 2; i--) { 163 outbuff[i] = (char)(temp & 0377); 164 temp >>= 8; 165 } 166 outp = &outbuff[6]; 167 *outp++ = maxlev; 168 for (i = 1; i < maxlev; i++) 169 *outp++ = levcount[i]; 170 *outp++ = levcount[maxlev]-2; 171 for (i = 1; i <= maxlev; i++) 172 for (c = 0; c < END; c++) 173 if (length[c] == i) 174 *outp++ = c; 175 dictsize = outp-&outbuff[0]; 176 177 /* output the text */ 178 lseek(infile, 0L, 0); 179 outsize = 0; 180 bitsleft = 8; 181 inleft = 0; 182 do { 183 if (inleft <= 0) { 184 inleft = read(infile, inp = &inbuff[0], BUFSIZ); 185 if (inleft < 0) { 186 fprintf(stderr, gettext( 187 "pack: %s: read error - file unchanged: "), 188 source); 189 perror(""); 190 return (0); 191 } 192 } 193 c = (--inleft < 0) ? END : (*inp++ & 0377); 194 mask.lint.lng = bits[c]<<bitsleft; 195 q = &maskshuff[0]; 196 if (bitsleft == 8) 197 *outp = **q++; 198 else 199 *outp |= **q++; 200 bitsleft -= length[c]; 201 while (bitsleft < 0) { 202 *++outp = **q++; 203 bitsleft += 8; 204 } 205 if (outp >= &outbuff[BUFSIZ]) { 206 if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) { 207 wrerr: fprintf(stderr, gettext( 208 "pack: %s.z: write error - file unchanged: "), 209 source); 210 perror(""); 211 return (0); 212 } 213 ((union FOUR *)outbuff)->lint.lng = 214 ((union FOUR *)&outbuff[BUFSIZ])->lint.lng; 215 outp -= BUFSIZ; 216 outsize += BUFSIZ; 217 } 218 } while (c != END); 219 if (bitsleft < 8) 220 outp++; 221 c = outp-outbuff; 222 if (write(outfile, outbuff, c) != c) 223 goto wrerr; 224 outsize += c; 225 return (1); 226 } 227 228 /* makes a heap out of heap[i],...,heap[n] */ 229 void 230 heapify(int i) 231 { 232 register int k; 233 int lastparent; 234 struct heap heapsubi; 235 hmove(heap[i], heapsubi); 236 lastparent = n/2; 237 while (i <= lastparent) { 238 k = 2*i; 239 if (heap[k].count > heap[k+1].count && k < n) 240 k++; 241 if (heapsubi.count < heap[k].count) 242 break; 243 hmove(heap[k], heap[i]); 244 i = k; 245 } 246 hmove(heapsubi, heap[i]); 247 } 248 249 /* return 1 after successful packing, 0 otherwise */ 250 int 251 packfile(char *source) 252 { 253 register int c, i, p; 254 long bitsout; 255 256 /* gather frequency statistics */ 257 if (input(source) == 0) 258 return (0); 259 260 /* put occurring chars in heap with their counts */ 261 diffbytes = -1; 262 count[END] = 1; 263 insize.lint.lng = n = 0; 264 for (i = END; i >= 0; i--) { 265 parent[i] = 0; 266 if (count[i] > 0) { 267 diffbytes++; 268 insize.lint.lng += count[i]; 269 heap[++n].count = count[i]; 270 heap[n].node = i; 271 } 272 } 273 if (diffbytes == 1) { 274 fprintf(stderr, gettext( 275 "pack: %s: trivial file - file unchanged\n"), source); 276 return (0); 277 } 278 insize.lint.lng >>= 1; 279 for (i = n/2; i >= 1; i--) 280 heapify(i); 281 282 /* build Huffman tree */ 283 lastnode = END; 284 while (n > 1) { 285 parent[heap[1].node] = ++lastnode; 286 inc = heap[1].count; 287 hmove(heap[n], heap[1]); 288 n--; 289 heapify(1); 290 parent[heap[1].node] = lastnode; 291 heap[1].node = lastnode; 292 heap[1].count += inc; 293 heapify(1); 294 } 295 parent[lastnode] = 0; 296 297 /* assign lengths to encoding for each character */ 298 bitsout = maxlev = 0; 299 for (i = 1; i <= 24; i++) 300 levcount[i] = 0; 301 for (i = 0; i <= END; i++) { 302 c = 0; 303 for (p = parent[i]; p != 0; p = parent[p]) 304 c++; 305 levcount[c]++; 306 length[i] = c; 307 if (c > maxlev) 308 maxlev = c; 309 bitsout += c*(count[i]>>1); 310 } 311 if (maxlev > 24) { 312 /* can't occur unless insize.lint.lng >= 2**24 */ 313 fprintf(stderr, gettext( 314 "pack: %s: Huffman tree has too many levels - file unchanged\n"), 315 source); 316 return (0); 317 } 318 319 /* don't bother if no compression results */ 320 outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes; 321 if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <= 322 (outsize+BUFSIZ-1)/BUFSIZ && !force) { 323 printf(gettext( 324 "pack: %s: no saving - file unchanged\n"), source); 325 return (0); 326 } 327 328 /* compute bit patterns for each character */ 329 inc = 1L << 24; 330 inc >>= maxlev; 331 mask.lint.lng = 0; 332 for (i = maxlev; i > 0; i--) { 333 for (c = 0; c <= END; c++) 334 if (length[c] == i) { 335 bits[c] = mask.lint.lng; 336 mask.lint.lng += inc; 337 } 338 mask.lint.lng &= ~inc; 339 inc <<= 1; 340 } 341 342 return (output(source)); 343 } 344 345 int 346 main(int argc, char *argv[]) 347 { 348 extern int optind; 349 register int i; 350 register char *cp; 351 int k, sep, errflg = 0; 352 int c; 353 int fcount = 0; /* count failures */ 354 355 (void) setlocale(LC_ALL, ""); 356 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 357 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 358 #endif 359 (void) textdomain(TEXT_DOMAIN); 360 361 while ((c = getopt(argc, argv, "f-")) != EOF) { 362 if (c == 'f') 363 force++; 364 else 365 ++errflg; 366 } 367 /* 368 * Check for invalid option. Also check for missing 369 * file operand, ie: "pack" or "pack -". 370 */ 371 argc -= optind; 372 argv = &argv[optind]; 373 if (errflg || argc < 1 || 374 (argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) { 375 fprintf(stderr, gettext( 376 "usage: pack [-f] [-] file...\n")); 377 if (argc < 1 || 378 (argc == 1 && argv[0][0] == '-' && 379 argv[0][1] == '\0')) { 380 /* 381 * return 1 for usage error when no file was specified 382 */ 383 return (1); 384 } 385 } 386 /* loop through the file names */ 387 for (k = 0; k < argc; k++) { 388 if (argv[k][0] == '-' && argv[k][1] == '\0') { 389 vflag = 1 - vflag; 390 continue; 391 } 392 fcount++; /* increase failure count - expect the worst */ 393 if (errflg) { 394 /* 395 * invalid option; just count the number of files not 396 * packed 397 */ 398 continue; 399 } 400 /* remove any .z suffix the user may have added */ 401 for (cp = argv[k]; *cp != '\0'; ++cp) 402 ; 403 if (cp[-1] == SUF1 && cp[-2] == SUF0) { 404 *cp-- = '\0'; *cp-- = '\0'; *cp = '\0'; 405 } 406 sep = -1; cp = filename; 407 max_name = pathconf(argv[k], _PC_NAME_MAX); 408 if (max_name == -1) { 409 /* pathname invalid or no limit on length of filename */ 410 max_name = _POSIX_NAME_MAX; 411 } 412 /* copy argv[k] to filename and count chars in base name */ 413 for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++) 414 if (*cp++ == '/') sep = i; 415 if ((infile = open(filename, 0)) < 0) { 416 fprintf(stderr, gettext( 417 "pack: %s: cannot open: "), filename); 418 perror(""); 419 continue; 420 } 421 if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) { 422 fprintf(stderr, gettext( 423 "pack: %s: file name too long\n"), argv[k]); 424 continue; 425 } 426 fstat(infile, &status); 427 if (status.st_mode&S_IFDIR) { 428 fprintf(stderr, gettext( 429 "pack: %s: cannot pack a directory\n"), 430 argv[k]); 431 goto closein; 432 } 433 if (status.st_size == 0) { 434 fprintf(stderr, gettext( 435 "pack: %s: cannot pack a zero length file\n"), 436 argv[k]); 437 goto closein; 438 } 439 if (status.st_nlink != 1) { 440 fprintf(stderr, gettext( 441 "pack: %s: has links\n"), 442 argv[k]); 443 goto closein; 444 } 445 *cp++ = SUF0; *cp++ = SUF1; *cp = '\0'; 446 if (stat(filename, &ostatus) != -1) { 447 fprintf(stderr, gettext( 448 "pack: %s: already exists\n"), filename); 449 goto closein; 450 } 451 if ((outfile = creat(filename, status.st_mode)) < 0) { 452 fprintf(stderr, gettext( 453 "pack: %s: cannot create: "), filename); 454 perror(""); 455 goto closein; 456 } 457 458 if (packfile(argv[k]) && 459 ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) || 460 (mv_xattrs(infile, outfile, 461 argv[k], 0) == 0))) { 462 if (unlink(argv[k]) != 0) { 463 fprintf(stderr, gettext( 464 "pack: %s: cannot unlink: "), 465 argv[k]); 466 perror(""); 467 } 468 printf(gettext( 469 "pack: %s: %.1f%% Compression\n"), 470 argv[k], 471 ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100); 472 /* output statistics */ 473 if (vflag) { 474 printf(gettext("\tfrom %ld to %ld bytes\n"), 475 insize.lint.lng, outsize); 476 printf(gettext( 477 "\tHuffman tree has %d levels below root\n"), 478 maxlev); 479 printf(gettext( 480 "\t%d distinct bytes in input\n"), 481 diffbytes); 482 printf(gettext( 483 "\tdictionary overhead = %ld bytes\n"), 484 dictsize); 485 printf(gettext( 486 "\teffective entropy = %.2f bits/byte\n"), 487 ((double)outsize / (double)insize.lint.lng) * 8); 488 printf(gettext( 489 "\tasymptotic entropy = %.2f bits/byte\n"), 490 ((double)(outsize-dictsize) / 491 (double)insize.lint.lng) * 8); 492 } 493 u_times.actime = status.st_atime; 494 u_times.modtime = status.st_mtime; 495 if (utime(filename, &u_times) != 0) { 496 errflg++; 497 fprintf(stderr, 498 gettext( 499 "pack: cannot change times on %s: "), 500 filename); 501 perror(""); 502 } 503 if (chmod(filename, status.st_mode) != 0) { 504 errflg++; 505 fprintf(stderr, 506 gettext( 507 "pack: can't change mode to %o on %s: "), 508 status.st_mode, filename); 509 perror(""); 510 } 511 chown(filename, status.st_uid, status.st_gid); 512 if (!errflg) 513 fcount--; /* success after all */ 514 } else { 515 if (pathconf(filename, _PC_XATTR_EXISTS) == 1) { 516 (void) mv_xattrs(outfile, infile, filename, 1); 517 } 518 unlink(filename); 519 } 520 closein: close(outfile); 521 close(infile); 522 } 523 return (fcount); 524 } 525 526 /* 527 * mv_xattrs - move (via renameat) all of the extended attributes 528 * associated with the file referenced by infd to the file 529 * referenced by outfd. The infile and silent arguments are 530 * provided for error message processing. This function 531 * returns 0 on success and -1 on error. 532 */ 533 static int 534 mv_xattrs(int infd, int outfd, char *infile, int silent) 535 { 536 int indfd, outdfd, tmpfd; 537 DIR *dirp = NULL; 538 struct dirent *dp = NULL; 539 int error = 0; 540 char *etext; 541 542 indfd = outdfd = tmpfd = -1; 543 544 if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) { 545 etext = gettext("cannot open source"); 546 error = -1; 547 goto out; 548 } 549 550 if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) { 551 etext = gettext("cannot open target"); 552 error = -1; 553 goto out; 554 } 555 556 if ((tmpfd = dup(indfd)) == -1) { 557 etext = gettext("cannot dup descriptor"); 558 error = -1; 559 goto out; 560 561 } 562 if ((dirp = fdopendir(tmpfd)) == NULL) { 563 etext = gettext("cannot access source"); 564 error = -1; 565 goto out; 566 } 567 568 while (dp = readdir(dirp)) { 569 if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') || 570 (dp->d_name[0] == '.' && dp->d_name[1] == '.' && 571 dp->d_name[2] == '\0')) 572 continue; 573 if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) { 574 etext = dp->d_name; 575 error = -1; 576 goto out; 577 } 578 } 579 out: 580 if (error == -1 && silent == 0) { 581 fprintf(stderr, gettext( 582 "pack: %s: cannot move extended attributes, "), 583 infile); 584 perror(etext); 585 } 586 if (dirp) 587 closedir(dirp); 588 if (indfd != -1) 589 close(indfd); 590 if (outdfd != -1) 591 close(outdfd); 592 return (error); 593 } 594