1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Huffman encoding program 34 * Usage: pack [[ -f ] [ - ] filename ... ] filename ... 35 * - option: enable/disable listing of statistics 36 */ 37 38 #include <stdio.h> 39 #include <sys/types.h> 40 #include <sys/stat.h> 41 #include <unistd.h> 42 #include <locale.h> 43 #include <stdarg.h> 44 #include <errno.h> 45 #include <sys/isa_defs.h> 46 #include <stdlib.h> 47 #include <limits.h> 48 #include <sys/param.h> 49 #include <fcntl.h> 50 #include <utime.h> 51 #include <string.h> 52 #include <dirent.h> 53 #include <unistd.h> 54 #include <sys/acl.h> 55 #include <aclutils.h> 56 57 #undef lint 58 59 #define END 256 60 #define PACKED 017436 /* <US><RS> - Unlikely value */ 61 #define SUF0 '.' 62 #define SUF1 'z' 63 64 struct stat status, ostatus; 65 static struct utimbuf u_times; 66 67 /* union for overlaying a long int with a set of four characters */ 68 union FOUR { 69 struct { long int lng; } lint; 70 struct { char c0, c1, c2, c3; } chars; 71 }; 72 73 /* character counters */ 74 long count [END+1]; 75 union FOUR insize; 76 long outsize; 77 long dictsize; 78 int diffbytes; 79 80 /* i/o stuff */ 81 char vflag = 0; 82 int force = 0; /* allow forced packing for consistency in directory */ 83 84 static char filename [MAXPATHLEN]; 85 static int max_name; 86 static int max_path = MAXPATHLEN; 87 88 int infile; /* unpacked file */ 89 int outfile; /* packed file */ 90 char inbuff [BUFSIZ]; 91 char outbuff [BUFSIZ+4]; 92 93 /* variables associated with the tree */ 94 int maxlev; 95 int levcount [25]; 96 int lastnode; 97 int parent [2*END+1]; 98 99 /* variables associated with the encoding process */ 100 char length [END+1]; 101 long bits [END+1]; 102 union FOUR mask; 103 long inc; 104 #if defined(_LITTLE_ENDIAN) 105 char *maskshuff[4] = {&(mask.chars.c3), 106 &(mask.chars.c2), 107 &(mask.chars.c1), 108 &(mask.chars.c0)}; 109 #elif defined(_BIG_ENDIAN) 110 char *maskshuff[4] = {&(mask.chars.c0), 111 &(mask.chars.c1), 112 &(mask.chars.c2), 113 &(mask.chars.c3)}; 114 #else 115 #error Unknown byte ordering! 116 #endif 117 118 /* the heap */ 119 int n; 120 struct heap { 121 long int count; 122 int node; 123 } heap [END+2]; 124 #define hmove(a, b) {(b).count = (a).count; (b).node = (a).node; } 125 126 static void heapify(int i); 127 static int mv_xattrs(int, int, char *, int); 128 129 /* gather character frequency statistics */ 130 /* return 1 if successful, 0 otherwise */ 131 int 132 input(char *source) 133 { 134 register int i; 135 for (i = 0; i < END; i++) 136 count[i] = 0; 137 while ((i = read(infile, inbuff, BUFSIZ)) > 0) 138 while (i > 0) 139 count[inbuff[--i]&0377] += 2; 140 if (i == 0) 141 return (1); 142 fprintf(stderr, gettext( 143 "pack: %s: read error - file unchanged: "), source); 144 perror(""); 145 return (0); 146 } 147 148 /* encode the current file */ 149 /* return 1 if successful, 0 otherwise */ 150 int 151 output(char *source) 152 { 153 int c, i, inleft; 154 char *inp; 155 register char **q, *outp; 156 register int bitsleft; 157 long temp; 158 159 /* output ``PACKED'' header */ 160 outbuff[0] = 037; /* ascii US */ 161 outbuff[1] = 036; /* ascii RS */ 162 /* output the length and the dictionary */ 163 temp = insize.lint.lng; 164 for (i = 5; i >= 2; i--) { 165 outbuff[i] = (char)(temp & 0377); 166 temp >>= 8; 167 } 168 outp = &outbuff[6]; 169 *outp++ = maxlev; 170 for (i = 1; i < maxlev; i++) 171 *outp++ = levcount[i]; 172 *outp++ = levcount[maxlev]-2; 173 for (i = 1; i <= maxlev; i++) 174 for (c = 0; c < END; c++) 175 if (length[c] == i) 176 *outp++ = c; 177 dictsize = outp-&outbuff[0]; 178 179 /* output the text */ 180 lseek(infile, 0L, 0); 181 outsize = 0; 182 bitsleft = 8; 183 inleft = 0; 184 do { 185 if (inleft <= 0) { 186 inleft = read(infile, inp = &inbuff[0], BUFSIZ); 187 if (inleft < 0) { 188 fprintf(stderr, gettext( 189 "pack: %s: read error - file unchanged: "), 190 source); 191 perror(""); 192 return (0); 193 } 194 } 195 c = (--inleft < 0) ? END : (*inp++ & 0377); 196 mask.lint.lng = bits[c]<<bitsleft; 197 q = &maskshuff[0]; 198 if (bitsleft == 8) 199 *outp = **q++; 200 else 201 *outp |= **q++; 202 bitsleft -= length[c]; 203 while (bitsleft < 0) { 204 *++outp = **q++; 205 bitsleft += 8; 206 } 207 if (outp >= &outbuff[BUFSIZ]) { 208 if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) { 209 wrerr: fprintf(stderr, gettext( 210 "pack: %s.z: write error - file unchanged: "), 211 source); 212 perror(""); 213 return (0); 214 } 215 ((union FOUR *)outbuff)->lint.lng = 216 ((union FOUR *)&outbuff[BUFSIZ])->lint.lng; 217 outp -= BUFSIZ; 218 outsize += BUFSIZ; 219 } 220 } while (c != END); 221 if (bitsleft < 8) 222 outp++; 223 c = outp-outbuff; 224 if (write(outfile, outbuff, c) != c) 225 goto wrerr; 226 outsize += c; 227 return (1); 228 } 229 230 /* makes a heap out of heap[i],...,heap[n] */ 231 void 232 heapify(int i) 233 { 234 register int k; 235 int lastparent; 236 struct heap heapsubi; 237 hmove(heap[i], heapsubi); 238 lastparent = n/2; 239 while (i <= lastparent) { 240 k = 2*i; 241 if (heap[k].count > heap[k+1].count && k < n) 242 k++; 243 if (heapsubi.count < heap[k].count) 244 break; 245 hmove(heap[k], heap[i]); 246 i = k; 247 } 248 hmove(heapsubi, heap[i]); 249 } 250 251 /* return 1 after successful packing, 0 otherwise */ 252 int 253 packfile(char *source) 254 { 255 register int c, i, p; 256 long bitsout; 257 258 /* gather frequency statistics */ 259 if (input(source) == 0) 260 return (0); 261 262 /* put occurring chars in heap with their counts */ 263 diffbytes = -1; 264 count[END] = 1; 265 insize.lint.lng = n = 0; 266 for (i = END; i >= 0; i--) { 267 parent[i] = 0; 268 if (count[i] > 0) { 269 diffbytes++; 270 insize.lint.lng += count[i]; 271 heap[++n].count = count[i]; 272 heap[n].node = i; 273 } 274 } 275 if (diffbytes == 1) { 276 fprintf(stderr, gettext( 277 "pack: %s: trivial file - file unchanged\n"), source); 278 return (0); 279 } 280 insize.lint.lng >>= 1; 281 for (i = n/2; i >= 1; i--) 282 heapify(i); 283 284 /* build Huffman tree */ 285 lastnode = END; 286 while (n > 1) { 287 parent[heap[1].node] = ++lastnode; 288 inc = heap[1].count; 289 hmove(heap[n], heap[1]); 290 n--; 291 heapify(1); 292 parent[heap[1].node] = lastnode; 293 heap[1].node = lastnode; 294 heap[1].count += inc; 295 heapify(1); 296 } 297 parent[lastnode] = 0; 298 299 /* assign lengths to encoding for each character */ 300 bitsout = maxlev = 0; 301 for (i = 1; i <= 24; i++) 302 levcount[i] = 0; 303 for (i = 0; i <= END; i++) { 304 c = 0; 305 for (p = parent[i]; p != 0; p = parent[p]) 306 c++; 307 levcount[c]++; 308 length[i] = c; 309 if (c > maxlev) 310 maxlev = c; 311 bitsout += c*(count[i]>>1); 312 } 313 if (maxlev > 24) { 314 /* can't occur unless insize.lint.lng >= 2**24 */ 315 fprintf(stderr, gettext( 316 "pack: %s: Huffman tree has too many levels - file unchanged\n"), 317 source); 318 return (0); 319 } 320 321 /* don't bother if no compression results */ 322 outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes; 323 if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <= 324 (outsize+BUFSIZ-1)/BUFSIZ && !force) { 325 printf(gettext( 326 "pack: %s: no saving - file unchanged\n"), source); 327 return (0); 328 } 329 330 /* compute bit patterns for each character */ 331 inc = 1L << 24; 332 inc >>= maxlev; 333 mask.lint.lng = 0; 334 for (i = maxlev; i > 0; i--) { 335 for (c = 0; c <= END; c++) 336 if (length[c] == i) { 337 bits[c] = mask.lint.lng; 338 mask.lint.lng += inc; 339 } 340 mask.lint.lng &= ~inc; 341 inc <<= 1; 342 } 343 344 return (output(source)); 345 } 346 347 int 348 main(int argc, char *argv[]) 349 { 350 extern int optind; 351 register int i; 352 register char *cp; 353 int k, sep, errflg = 0; 354 int c; 355 int error; 356 int fcount = 0; /* count failures */ 357 acl_t *aclp = NULL; 358 359 (void) setlocale(LC_ALL, ""); 360 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 361 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 362 #endif 363 (void) textdomain(TEXT_DOMAIN); 364 365 while ((c = getopt(argc, argv, "f-")) != EOF) { 366 if (c == 'f') 367 force++; 368 else 369 ++errflg; 370 } 371 /* 372 * Check for invalid option. Also check for missing 373 * file operand, ie: "pack" or "pack -". 374 */ 375 argc -= optind; 376 argv = &argv[optind]; 377 if (errflg || argc < 1 || 378 (argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) { 379 fprintf(stderr, gettext( 380 "usage: pack [-f] [-] file...\n")); 381 if (argc < 1 || 382 (argc == 1 && argv[0][0] == '-' && 383 argv[0][1] == '\0')) { 384 /* 385 * return 1 for usage error when no file was specified 386 */ 387 return (1); 388 } 389 } 390 /* loop through the file names */ 391 for (k = 0; k < argc; k++) { 392 if (argv[k][0] == '-' && argv[k][1] == '\0') { 393 vflag = 1 - vflag; 394 continue; 395 } 396 fcount++; /* increase failure count - expect the worst */ 397 if (errflg) { 398 /* 399 * invalid option; just count the number of files not 400 * packed 401 */ 402 continue; 403 } 404 /* remove any .z suffix the user may have added */ 405 for (cp = argv[k]; *cp != '\0'; ++cp) 406 ; 407 if (cp[-1] == SUF1 && cp[-2] == SUF0) { 408 *cp-- = '\0'; *cp-- = '\0'; *cp = '\0'; 409 } 410 sep = -1; cp = filename; 411 max_name = pathconf(argv[k], _PC_NAME_MAX); 412 if (max_name == -1) { 413 /* pathname invalid or no limit on length of filename */ 414 max_name = _POSIX_NAME_MAX; 415 } 416 /* copy argv[k] to filename and count chars in base name */ 417 for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++) 418 if (*cp++ == '/') sep = i; 419 if ((infile = open(filename, 0)) < 0) { 420 fprintf(stderr, gettext( 421 "pack: %s: cannot open: "), filename); 422 perror(""); 423 continue; 424 } 425 if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) { 426 fprintf(stderr, gettext( 427 "pack: %s: file name too long\n"), argv[k]); 428 continue; 429 } 430 fstat(infile, &status); 431 if (status.st_mode&S_IFDIR) { 432 fprintf(stderr, gettext( 433 "pack: %s: cannot pack a directory\n"), 434 argv[k]); 435 goto closein; 436 } 437 if (status.st_size == 0) { 438 fprintf(stderr, gettext( 439 "pack: %s: cannot pack a zero length file\n"), 440 argv[k]); 441 goto closein; 442 } 443 if (status.st_nlink != 1) { 444 fprintf(stderr, gettext( 445 "pack: %s: has links\n"), 446 argv[k]); 447 goto closein; 448 } 449 *cp++ = SUF0; *cp++ = SUF1; *cp = '\0'; 450 if (stat(filename, &ostatus) != -1) { 451 fprintf(stderr, gettext( 452 "pack: %s: already exists\n"), filename); 453 goto closein; 454 } 455 456 if ((outfile = creat(filename, status.st_mode)) < 0) { 457 fprintf(stderr, gettext( 458 "pack: %s: cannot create: "), filename); 459 perror(""); 460 goto closein; 461 } 462 463 error = facl_get(infile, ACL_NO_TRIVIAL, &aclp); 464 465 if (error != 0) { 466 fprintf(stderr, gettext( 467 "pack: %s: cannot retrieve ACL: %s\n"), argv[k], 468 acl_strerror(error)); 469 } 470 if (packfile(argv[k]) && 471 ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) || 472 (mv_xattrs(infile, outfile, 473 argv[k], 0) == 0))) { 474 if (unlink(argv[k]) != 0) { 475 fprintf(stderr, gettext( 476 "pack: %s: cannot unlink: "), 477 argv[k]); 478 perror(""); 479 } 480 printf(gettext( 481 "pack: %s: %.1f%% Compression\n"), 482 argv[k], 483 ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100); 484 /* output statistics */ 485 if (vflag) { 486 printf(gettext("\tfrom %ld to %ld bytes\n"), 487 insize.lint.lng, outsize); 488 printf(gettext( 489 "\tHuffman tree has %d levels below root\n"), 490 maxlev); 491 printf(gettext( 492 "\t%d distinct bytes in input\n"), 493 diffbytes); 494 printf(gettext( 495 "\tdictionary overhead = %ld bytes\n"), 496 dictsize); 497 printf(gettext( 498 "\teffective entropy = %.2f bits/byte\n"), 499 ((double)outsize / (double)insize.lint.lng) * 8); 500 printf(gettext( 501 "\tasymptotic entropy = %.2f bits/byte\n"), 502 ((double)(outsize-dictsize) / 503 (double)insize.lint.lng) * 8); 504 } 505 u_times.actime = status.st_atime; 506 u_times.modtime = status.st_mtime; 507 if (utime(filename, &u_times) != 0) { 508 errflg++; 509 fprintf(stderr, 510 gettext( 511 "pack: cannot change times on %s: "), 512 filename); 513 perror(""); 514 } 515 if (chmod(filename, status.st_mode) != 0) { 516 errflg++; 517 fprintf(stderr, 518 gettext( 519 "pack: can't change mode to %o on %s: "), 520 status.st_mode, filename); 521 perror(""); 522 } 523 chown(filename, status.st_uid, status.st_gid); 524 if (aclp && (facl_set(outfile, aclp) < 0)) { 525 fprintf(stderr, gettext( 526 "pack: %s: failed to set acl entries\n"), 527 filename); 528 perror(""); 529 } 530 if (!errflg) 531 fcount--; /* success after all */ 532 } else { 533 if (pathconf(filename, _PC_XATTR_EXISTS) == 1) { 534 (void) mv_xattrs(outfile, infile, filename, 1); 535 } 536 unlink(filename); 537 } 538 539 if (aclp) 540 acl_free(aclp); 541 542 closein: close(outfile); 543 close(infile); 544 } 545 return (fcount); 546 } 547 548 /* 549 * mv_xattrs - move (via renameat) all of the extended attributes 550 * associated with the file referenced by infd to the file 551 * referenced by outfd. The infile and silent arguments are 552 * provided for error message processing. This function 553 * returns 0 on success and -1 on error. 554 */ 555 static int 556 mv_xattrs(int infd, int outfd, char *infile, int silent) 557 { 558 int indfd, outdfd, tmpfd; 559 DIR *dirp = NULL; 560 struct dirent *dp = NULL; 561 int error = 0; 562 char *etext; 563 564 indfd = outdfd = tmpfd = -1; 565 566 if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) { 567 etext = gettext("cannot open source"); 568 error = -1; 569 goto out; 570 } 571 572 if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) { 573 etext = gettext("cannot open target"); 574 error = -1; 575 goto out; 576 } 577 578 if ((tmpfd = dup(indfd)) == -1) { 579 etext = gettext("cannot dup descriptor"); 580 error = -1; 581 goto out; 582 583 } 584 if ((dirp = fdopendir(tmpfd)) == NULL) { 585 etext = gettext("cannot access source"); 586 error = -1; 587 goto out; 588 } 589 590 while (dp = readdir(dirp)) { 591 if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') || 592 (dp->d_name[0] == '.' && dp->d_name[1] == '.' && 593 dp->d_name[2] == '\0')) 594 continue; 595 if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) { 596 etext = dp->d_name; 597 error = -1; 598 goto out; 599 } 600 } 601 out: 602 if (error == -1 && silent == 0) { 603 fprintf(stderr, gettext( 604 "pack: %s: cannot move extended attributes, "), 605 infile); 606 perror(etext); 607 } 608 if (dirp) 609 closedir(dirp); 610 if (indfd != -1) 611 close(indfd); 612 if (outdfd != -1) 613 close(outdfd); 614 return (error); 615 } 616