1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 30 /* All Rights Reserved */ 31 32 33 /* 34 * Huffman encoding program 35 * Usage: pack [[ -f ] [ - ] filename ... ] filename ... 36 * - option: enable/disable listing of statistics 37 */ 38 39 #include <stdio.h> 40 #include <sys/types.h> 41 #include <sys/stat.h> 42 #include <unistd.h> 43 #include <locale.h> 44 #include <stdarg.h> 45 #include <errno.h> 46 #include <sys/isa_defs.h> 47 #include <stdlib.h> 48 #include <limits.h> 49 #include <sys/param.h> 50 #include <fcntl.h> 51 #include <utime.h> 52 #include <string.h> 53 #include <dirent.h> 54 #include <unistd.h> 55 56 #undef lint 57 58 #define END 256 59 #define PACKED 017436 /* <US><RS> - Unlikely value */ 60 #define SUF0 '.' 61 #define SUF1 'z' 62 63 struct stat status, ostatus; 64 static struct utimbuf u_times; 65 66 /* union for overlaying a long int with a set of four characters */ 67 union FOUR { 68 struct { long int lng; } lint; 69 struct { char c0, c1, c2, c3; } chars; 70 }; 71 72 /* character counters */ 73 long count [END+1]; 74 union FOUR insize; 75 long outsize; 76 long dictsize; 77 int diffbytes; 78 79 /* i/o stuff */ 80 char vflag = 0; 81 int force = 0; /* allow forced packing for consistency in directory */ 82 83 static char filename [MAXPATHLEN]; 84 static int max_name; 85 static int max_path = MAXPATHLEN; 86 87 int infile; /* unpacked file */ 88 int outfile; /* packed file */ 89 char inbuff [BUFSIZ]; 90 char outbuff [BUFSIZ+4]; 91 92 /* variables associated with the tree */ 93 int maxlev; 94 int levcount [25]; 95 int lastnode; 96 int parent [2*END+1]; 97 98 /* variables associated with the encoding process */ 99 char length [END+1]; 100 long bits [END+1]; 101 union FOUR mask; 102 long inc; 103 #if defined(_LITTLE_ENDIAN) 104 char *maskshuff[4] = {&(mask.chars.c3), 105 &(mask.chars.c2), 106 &(mask.chars.c1), 107 &(mask.chars.c0)}; 108 #elif defined(_BIG_ENDIAN) 109 char *maskshuff[4] = {&(mask.chars.c0), 110 &(mask.chars.c1), 111 &(mask.chars.c2), 112 &(mask.chars.c3)}; 113 #else 114 #error Unknown byte ordering! 115 #endif 116 117 /* the heap */ 118 int n; 119 struct heap { 120 long int count; 121 int node; 122 } heap [END+2]; 123 #define hmove(a, b) {(b).count = (a).count; (b).node = (a).node; } 124 125 static void heapify(int i); 126 static int mv_xattrs(int, int, char *, int); 127 128 /* gather character frequency statistics */ 129 /* return 1 if successful, 0 otherwise */ 130 input(char *source) 131 { 132 register int i; 133 for (i = 0; i < END; i++) 134 count[i] = 0; 135 while ((i = read(infile, inbuff, BUFSIZ)) > 0) 136 while (i > 0) 137 count[inbuff[--i]&0377] += 2; 138 if (i == 0) 139 return (1); 140 fprintf(stderr, gettext( 141 "pack: %s: read error - file unchanged: "), source); 142 perror(""); 143 return (0); 144 } 145 146 /* encode the current file */ 147 /* return 1 if successful, 0 otherwise */ 148 output(char *source) 149 { 150 int c, i, inleft; 151 char *inp; 152 register char **q, *outp; 153 register int bitsleft; 154 long temp; 155 156 /* output ``PACKED'' header */ 157 outbuff[0] = 037; /* ascii US */ 158 outbuff[1] = 036; /* ascii RS */ 159 /* output the length and the dictionary */ 160 temp = insize.lint.lng; 161 for (i = 5; i >= 2; i--) { 162 outbuff[i] = (char)(temp & 0377); 163 temp >>= 8; 164 } 165 outp = &outbuff[6]; 166 *outp++ = maxlev; 167 for (i = 1; i < maxlev; i++) 168 *outp++ = levcount[i]; 169 *outp++ = levcount[maxlev]-2; 170 for (i = 1; i <= maxlev; i++) 171 for (c = 0; c < END; c++) 172 if (length[c] == i) 173 *outp++ = c; 174 dictsize = outp-&outbuff[0]; 175 176 /* output the text */ 177 lseek(infile, 0L, 0); 178 outsize = 0; 179 bitsleft = 8; 180 inleft = 0; 181 do { 182 if (inleft <= 0) { 183 inleft = read(infile, inp = &inbuff[0], BUFSIZ); 184 if (inleft < 0) { 185 fprintf(stderr, gettext( 186 "pack: %s: read error - file unchanged: "), 187 source); 188 perror(""); 189 return (0); 190 } 191 } 192 c = (--inleft < 0) ? END : (*inp++ & 0377); 193 mask.lint.lng = bits[c]<<bitsleft; 194 q = &maskshuff[0]; 195 if (bitsleft == 8) 196 *outp = **q++; 197 else 198 *outp |= **q++; 199 bitsleft -= length[c]; 200 while (bitsleft < 0) { 201 *++outp = **q++; 202 bitsleft += 8; 203 } 204 if (outp >= &outbuff[BUFSIZ]) { 205 if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) { 206 wrerr: fprintf(stderr, gettext( 207 "pack: %s.z: write error - file unchanged: "), 208 source); 209 perror(""); 210 return (0); 211 } 212 ((union FOUR *)outbuff)->lint.lng = 213 ((union FOUR *)&outbuff[BUFSIZ])->lint.lng; 214 outp -= BUFSIZ; 215 outsize += BUFSIZ; 216 } 217 } while (c != END); 218 if (bitsleft < 8) 219 outp++; 220 c = outp-outbuff; 221 if (write(outfile, outbuff, c) != c) 222 goto wrerr; 223 outsize += c; 224 return (1); 225 } 226 227 /* makes a heap out of heap[i],...,heap[n] */ 228 void 229 heapify(int i) 230 { 231 register int k; 232 int lastparent; 233 struct heap heapsubi; 234 hmove(heap[i], heapsubi); 235 lastparent = n/2; 236 while (i <= lastparent) { 237 k = 2*i; 238 if (heap[k].count > heap[k+1].count && k < n) 239 k++; 240 if (heapsubi.count < heap[k].count) 241 break; 242 hmove(heap[k], heap[i]); 243 i = k; 244 } 245 hmove(heapsubi, heap[i]); 246 } 247 248 /* return 1 after successful packing, 0 otherwise */ 249 int 250 packfile(char *source) 251 { 252 register int c, i, p; 253 long bitsout; 254 255 /* gather frequency statistics */ 256 if (input(source) == 0) 257 return (0); 258 259 /* put occurring chars in heap with their counts */ 260 diffbytes = -1; 261 count[END] = 1; 262 insize.lint.lng = n = 0; 263 for (i = END; i >= 0; i--) { 264 parent[i] = 0; 265 if (count[i] > 0) { 266 diffbytes++; 267 insize.lint.lng += count[i]; 268 heap[++n].count = count[i]; 269 heap[n].node = i; 270 } 271 } 272 if (diffbytes == 1) { 273 fprintf(stderr, gettext( 274 "pack: %s: trivial file - file unchanged\n"), source); 275 return (0); 276 } 277 insize.lint.lng >>= 1; 278 for (i = n/2; i >= 1; i--) 279 heapify(i); 280 281 /* build Huffman tree */ 282 lastnode = END; 283 while (n > 1) { 284 parent[heap[1].node] = ++lastnode; 285 inc = heap[1].count; 286 hmove(heap[n], heap[1]); 287 n--; 288 heapify(1); 289 parent[heap[1].node] = lastnode; 290 heap[1].node = lastnode; 291 heap[1].count += inc; 292 heapify(1); 293 } 294 parent[lastnode] = 0; 295 296 /* assign lengths to encoding for each character */ 297 bitsout = maxlev = 0; 298 for (i = 1; i <= 24; i++) 299 levcount[i] = 0; 300 for (i = 0; i <= END; i++) { 301 c = 0; 302 for (p = parent[i]; p != 0; p = parent[p]) 303 c++; 304 levcount[c]++; 305 length[i] = c; 306 if (c > maxlev) 307 maxlev = c; 308 bitsout += c*(count[i]>>1); 309 } 310 if (maxlev > 24) { 311 /* can't occur unless insize.lint.lng >= 2**24 */ 312 fprintf(stderr, gettext( 313 "pack: %s: Huffman tree has too many levels - file unchanged\n"), 314 source); 315 return (0); 316 } 317 318 /* don't bother if no compression results */ 319 outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes; 320 if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <= 321 (outsize+BUFSIZ-1)/BUFSIZ && !force) { 322 printf(gettext( 323 "pack: %s: no saving - file unchanged\n"), source); 324 return (0); 325 } 326 327 /* compute bit patterns for each character */ 328 inc = 1L << 24; 329 inc >>= maxlev; 330 mask.lint.lng = 0; 331 for (i = maxlev; i > 0; i--) { 332 for (c = 0; c <= END; c++) 333 if (length[c] == i) { 334 bits[c] = mask.lint.lng; 335 mask.lint.lng += inc; 336 } 337 mask.lint.lng &= ~inc; 338 inc <<= 1; 339 } 340 341 return (output(source)); 342 } 343 344 main(int argc, char *argv[]) 345 { 346 extern int optind; 347 register int i; 348 register char *cp; 349 int k, sep, errflg = 0; 350 int c; 351 int fcount = 0; /* count failures */ 352 353 (void) setlocale(LC_ALL, ""); 354 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 355 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 356 #endif 357 (void) textdomain(TEXT_DOMAIN); 358 359 while ((c = getopt(argc, argv, "f-")) != EOF) { 360 if (c == 'f') 361 force++; 362 else 363 ++errflg; 364 } 365 /* 366 * Check for invalid option. Also check for missing 367 * file operand, ie: "pack" or "pack -". 368 */ 369 argc -= optind; 370 argv = &argv[optind]; 371 if (errflg || argc < 1 || 372 (argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) { 373 fprintf(stderr, gettext( 374 "usage: pack [-f] [-] file...\n")); 375 if (argc < 1 || 376 (argc == 1 && argv[0][0] == '-' && 377 argv[0][1] == '\0')) { 378 /* 379 * return 1 for usage error when no file was specified 380 */ 381 return (1); 382 } 383 } 384 /* loop through the file names */ 385 for (k = 0; k < argc; k++) { 386 if (argv[k][0] == '-' && argv[k][1] == '\0') { 387 vflag = 1 - vflag; 388 continue; 389 } 390 fcount++; /* increase failure count - expect the worst */ 391 if (errflg) { 392 /* 393 * invalid option; just count the number of files not 394 * packed 395 */ 396 continue; 397 } 398 /* remove any .z suffix the user may have added */ 399 for (cp = argv[k]; *cp != '\0'; ++cp) 400 ; 401 if (cp[-1] == SUF1 && cp[-2] == SUF0) { 402 *cp-- = '\0'; *cp-- = '\0'; *cp = '\0'; 403 } 404 sep = -1; cp = filename; 405 max_name = pathconf(argv[k], _PC_NAME_MAX); 406 if (max_name == -1) { 407 /* pathname invalid or no limit on length of filename */ 408 max_name = _POSIX_NAME_MAX; 409 } 410 /* copy argv[k] to filename and count chars in base name */ 411 for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++) 412 if (*cp++ == '/') sep = i; 413 if ((infile = open(filename, 0)) < 0) { 414 fprintf(stderr, gettext( 415 "pack: %s: cannot open: "), filename); 416 perror(""); 417 continue; 418 } 419 if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) { 420 fprintf(stderr, gettext( 421 "pack: %s: file name too long\n"), argv[k]); 422 continue; 423 } 424 fstat(infile, &status); 425 if (status.st_mode&S_IFDIR) { 426 fprintf(stderr, gettext( 427 "pack: %s: cannot pack a directory\n"), 428 argv[k]); 429 goto closein; 430 } 431 if (status.st_size == 0) { 432 fprintf(stderr, gettext( 433 "pack: %s: cannot pack a zero length file\n"), 434 argv[k]); 435 goto closein; 436 } 437 if (status.st_nlink != 1) { 438 fprintf(stderr, gettext( 439 "pack: %s: has links\n"), 440 argv[k]); 441 goto closein; 442 } 443 *cp++ = SUF0; *cp++ = SUF1; *cp = '\0'; 444 if (stat(filename, &ostatus) != -1) { 445 fprintf(stderr, gettext( 446 "pack: %s: already exists\n"), filename); 447 goto closein; 448 } 449 if ((outfile = creat(filename, status.st_mode)) < 0) { 450 fprintf(stderr, gettext( 451 "pack: %s: cannot create: "), filename); 452 perror(""); 453 goto closein; 454 } 455 456 if (packfile(argv[k]) && 457 ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) || 458 (mv_xattrs(infile, outfile, 459 argv[k], 0) == 0))) { 460 if (unlink(argv[k]) != 0) { 461 fprintf(stderr, gettext( 462 "pack: %s: cannot unlink: "), 463 argv[k]); 464 perror(""); 465 } 466 printf(gettext( 467 "pack: %s: %.1f%% Compression\n"), 468 argv[k], 469 ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100); 470 /* output statistics */ 471 if (vflag) { 472 printf(gettext("\tfrom %ld to %ld bytes\n"), 473 insize.lint.lng, outsize); 474 printf(gettext( 475 "\tHuffman tree has %d levels below root\n"), 476 maxlev); 477 printf(gettext( 478 "\t%d distinct bytes in input\n"), 479 diffbytes); 480 printf(gettext( 481 "\tdictionary overhead = %ld bytes\n"), 482 dictsize); 483 printf(gettext( 484 "\teffective entropy = %.2f bits/byte\n"), 485 ((double)outsize / (double)insize.lint.lng) * 8); 486 printf(gettext( 487 "\tasymptotic entropy = %.2f bits/byte\n"), 488 ((double)(outsize-dictsize) / 489 (double)insize.lint.lng) * 8); 490 } 491 u_times.actime = status.st_atime; 492 u_times.modtime = status.st_mtime; 493 if (utime(filename, &u_times) != 0) { 494 errflg++; 495 fprintf(stderr, 496 gettext( 497 "pack: cannot change times on %s: "), 498 filename); 499 perror(""); 500 } 501 if (chmod(filename, status.st_mode) != 0) { 502 errflg++; 503 fprintf(stderr, 504 gettext( 505 "pack: can't change mode to %o on %s: "), 506 status.st_mode, filename); 507 perror(""); 508 } 509 chown(filename, status.st_uid, status.st_gid); 510 if (!errflg) 511 fcount--; /* success after all */ 512 } else { 513 if (pathconf(filename, _PC_XATTR_EXISTS) == 1) { 514 (void) mv_xattrs(outfile, infile, filename, 1); 515 } 516 unlink(filename); 517 } 518 closein: close(outfile); 519 close(infile); 520 } 521 return (fcount); 522 } 523 524 /* 525 * mv_xattrs - move (via renameat) all of the extended attributes 526 * associated with the file referenced by infd to the file 527 * referenced by outfd. The infile and silent arguments are 528 * provided for error message processing. This function 529 * returns 0 on success and -1 on error. 530 */ 531 static int 532 mv_xattrs(int infd, int outfd, char *infile, int silent) 533 { 534 int indfd, outdfd, tmpfd; 535 DIR *dirp = NULL; 536 struct dirent *dp = NULL; 537 int error = 0; 538 char *etext; 539 540 indfd = outdfd = tmpfd = -1; 541 542 if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) { 543 etext = gettext("cannot open source"); 544 error = -1; 545 goto out; 546 } 547 548 if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) { 549 etext = gettext("cannot open target"); 550 error = -1; 551 goto out; 552 } 553 554 if ((tmpfd = dup(indfd)) == -1) { 555 etext = gettext("cannot dup descriptor"); 556 error = -1; 557 goto out; 558 559 } 560 if ((dirp = fdopendir(tmpfd)) == NULL) { 561 etext = gettext("cannot access source"); 562 error = -1; 563 goto out; 564 } 565 566 while (dp = readdir(dirp)) { 567 if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') || 568 (dp->d_name[0] == '.' && dp->d_name[1] == '.' && 569 dp->d_name[2] == '\0')) 570 continue; 571 if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) { 572 etext = dp->d_name; 573 error = -1; 574 goto out; 575 } 576 } 577 out: 578 if (error == -1 && silent == 0) { 579 fprintf(stderr, gettext( 580 "pack: %s: cannot move extended attributes, "), 581 infile); 582 perror(etext); 583 } 584 if (dirp) 585 closedir(dirp); 586 if (indfd != -1) 587 close(indfd); 588 if (outdfd != -1) 589 close(outdfd); 590 return (error); 591 } 592