xref: /illumos-gate/usr/src/cmd/pack/pack.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 /*
30  *	Huffman encoding program
31  *	Usage:	pack [[ -f ] [ - ] [-/] filename ... ] filename ...
32  *		- option: enable/disable listing of statistics
33  */
34 
35 #include <locale.h>
36 #include <stdarg.h>
37 #include <sys/isa_defs.h>
38 #include <sys/param.h>
39 #include <utime.h>
40 #include <sys/acl.h>
41 #include <aclutils.h>
42 #include <libcmdutils.h>
43 
44 #undef lint
45 
46 #define	END	256
47 #define	PACKED 017436 /* <US><RS> - Unlikely value */
48 #define	SUF0	'.'
49 #define	SUF1	'z'
50 
51 struct stat status, ostatus;
52 static struct utimbuf u_times;
53 
54 /* union for overlaying a long int with a set of four characters */
55 union FOUR {
56 	struct { long int lng; } lint;
57 	struct { char c0, c1, c2, c3; } chars;
58 };
59 
60 /* character counters */
61 long	count [END+1];
62 union	FOUR insize;
63 long	outsize;
64 long	dictsize;
65 int	diffbytes;
66 
67 /* i/o stuff */
68 char	vflag = 0;
69 int	force = 0;	/* allow forced packing for consistency in directory */
70 
71 static	char filename [MAXPATHLEN];
72 static int max_name;
73 
74 int	infile;		/* unpacked file */
75 int	outfile;	/* packed file */
76 char	inbuff [BUFSIZ];
77 char	outbuff [BUFSIZ+4];
78 
79 /* variables associated with the tree */
80 int	maxlev;
81 int	levcount [25];
82 int	lastnode;
83 int	parent [2*END+1];
84 
85 /* variables associated with the encoding process */
86 char	length [END+1];
87 long	bits [END+1];
88 union	FOUR mask;
89 long	inc;
90 #if defined(_LITTLE_ENDIAN)
91 char	*maskshuff[4]  = {&(mask.chars.c3),
92 			    &(mask.chars.c2),
93 			    &(mask.chars.c1),
94 			    &(mask.chars.c0)};
95 #elif defined(_BIG_ENDIAN)
96 char	*maskshuff[4]  = {&(mask.chars.c0),
97 			    &(mask.chars.c1),
98 			    &(mask.chars.c2),
99 			    &(mask.chars.c3)};
100 #else
101 #error Unknown byte ordering!
102 #endif
103 
104 /* the heap */
105 int	n;
106 struct	heap {
107 	long int count;
108 	int node;
109 } heap [END+2];
110 #define	hmove(a, b) {(b).count = (a).count; (b).node = (a).node; }
111 
112 static void heapify(int i);
113 
114 /* Extended system attribute support */
115 
116 static int saflg = 0;
117 
118 
119 /* gather character frequency statistics */
120 /* return 1 if successful, 0 otherwise */
121 int
122 input(char *source)
123 {
124 	register int i;
125 	for (i = 0; i < END; i++)
126 		count[i] = 0;
127 	while ((i = read(infile, inbuff, BUFSIZ)) > 0)
128 		while (i > 0)
129 			count[inbuff[--i]&0377] += 2;
130 	if (i == 0)
131 		return (1);
132 	(void) fprintf(stderr, gettext(
133 	    "pack: %s: read error - file unchanged: "), source);
134 	perror("");
135 	return (0);
136 }
137 
138 /* encode the current file */
139 /* return 1 if successful, 0 otherwise */
140 int
141 output(char *source)
142 {
143 	int c, i, inleft;
144 	char *inp;
145 	register char **q, *outp;
146 	register int bitsleft;
147 	long temp;
148 
149 	/* output ``PACKED'' header */
150 	outbuff[0] = 037; 	/* ascii US */
151 	outbuff[1] = 036; 	/* ascii RS */
152 	/* output the length and the dictionary */
153 	temp = insize.lint.lng;
154 	for (i = 5; i >= 2; i--) {
155 		outbuff[i] =  (char)(temp & 0377);
156 		temp >>= 8;
157 	}
158 	outp = &outbuff[6];
159 	*outp++ = maxlev;
160 	for (i = 1; i < maxlev; i++)
161 		*outp++ = levcount[i];
162 	*outp++ = levcount[maxlev]-2;
163 	for (i = 1; i <= maxlev; i++)
164 		for (c = 0; c < END; c++)
165 			if (length[c] == i)
166 				*outp++ = c;
167 	dictsize = outp-&outbuff[0];
168 
169 	/* output the text */
170 	(void) lseek(infile, 0L, 0);
171 	outsize = 0;
172 	bitsleft = 8;
173 	inleft = 0;
174 	do {
175 		if (inleft <= 0) {
176 			inleft = read(infile, inp = &inbuff[0], BUFSIZ);
177 			if (inleft < 0) {
178 				(void) fprintf(stderr, gettext(
179 				    "pack: %s: read error - file unchanged: "),
180 				    source);
181 				perror("");
182 				return (0);
183 			}
184 		}
185 		c = (--inleft < 0) ? END : (*inp++ & 0377);
186 		mask.lint.lng = bits[c]<<bitsleft;
187 		q = &maskshuff[0];
188 		if (bitsleft == 8)
189 			*outp = **q++;
190 		else
191 			*outp |= **q++;
192 		bitsleft -= length[c];
193 		while (bitsleft < 0) {
194 			*++outp = **q++;
195 			bitsleft += 8;
196 		}
197 		if (outp >= &outbuff[BUFSIZ]) {
198 			if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) {
199 wrerr:				(void) fprintf(stderr, gettext(
200 				    "pack: %s.z: write error - "
201 				    "file unchanged: "), source);
202 				perror("");
203 				return (0);
204 			}
205 			((union FOUR *)outbuff)->lint.lng =
206 			    ((union FOUR *)&outbuff[BUFSIZ])->lint.lng;
207 			outp -= BUFSIZ;
208 			outsize += BUFSIZ;
209 		}
210 	} while (c != END);
211 	if (bitsleft < 8)
212 		outp++;
213 	c = outp-outbuff;
214 	if (write(outfile, outbuff, c) != c)
215 		goto wrerr;
216 	outsize += c;
217 	return (1);
218 }
219 
220 /* makes a heap out of heap[i],...,heap[n] */
221 void
222 heapify(int i)
223 {
224 	register int k;
225 	int lastparent;
226 	struct heap heapsubi;
227 	hmove(heap[i], heapsubi);
228 	lastparent = n/2;
229 	while (i <= lastparent) {
230 		k = 2*i;
231 		if (heap[k].count > heap[k+1].count && k < n)
232 			k++;
233 		if (heapsubi.count < heap[k].count)
234 			break;
235 		hmove(heap[k], heap[i]);
236 		i = k;
237 	}
238 	hmove(heapsubi, heap[i]);
239 }
240 
241 /* return 1 after successful packing, 0 otherwise */
242 int
243 packfile(char *source)
244 {
245 	register int c, i, p;
246 	long bitsout;
247 
248 	/* gather frequency statistics */
249 	if (input(source) == 0)
250 		return (0);
251 
252 	/* put occurring chars in heap with their counts */
253 	diffbytes = -1;
254 	count[END] = 1;
255 	insize.lint.lng = n = 0;
256 	for (i = END; i >= 0; i--) {
257 		parent[i] = 0;
258 		if (count[i] > 0) {
259 			diffbytes++;
260 			insize.lint.lng += count[i];
261 			heap[++n].count = count[i];
262 			heap[n].node = i;
263 		}
264 	}
265 	if (diffbytes == 1) {
266 		(void) fprintf(stderr, gettext(
267 		    "pack: %s: trivial file - file unchanged\n"), source);
268 		return (0);
269 	}
270 	insize.lint.lng >>= 1;
271 	for (i = n/2; i >= 1; i--)
272 		heapify(i);
273 
274 	/* build Huffman tree */
275 	lastnode = END;
276 	while (n > 1) {
277 		parent[heap[1].node] = ++lastnode;
278 		inc = heap[1].count;
279 		hmove(heap[n], heap[1]);
280 		n--;
281 		heapify(1);
282 		parent[heap[1].node] = lastnode;
283 		heap[1].node = lastnode;
284 		heap[1].count += inc;
285 		heapify(1);
286 	}
287 	parent[lastnode] = 0;
288 
289 	/* assign lengths to encoding for each character */
290 	bitsout = maxlev = 0;
291 	for (i = 1; i <= 24; i++)
292 		levcount[i] = 0;
293 	for (i = 0; i <= END; i++) {
294 		c = 0;
295 		for (p = parent[i]; p != 0; p = parent[p])
296 			c++;
297 		levcount[c]++;
298 		length[i] = c;
299 		if (c > maxlev)
300 			maxlev = c;
301 		bitsout += c*(count[i]>>1);
302 	}
303 	if (maxlev > 24) {
304 		/* can't occur unless insize.lint.lng >= 2**24 */
305 		(void) fprintf(stderr, gettext(
306 		    "pack: %s: Huffman tree has too many levels - "
307 		    "file unchanged\n"), source);
308 		return (0);
309 	}
310 
311 	/* don't bother if no compression results */
312 	outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
313 	if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <=
314 	    (outsize+BUFSIZ-1)/BUFSIZ && !force) {
315 		(void) printf(gettext(
316 		    "pack: %s: no saving - file unchanged\n"), source);
317 		return (0);
318 	}
319 
320 	/* compute bit patterns for each character */
321 	inc = 1L << 24;
322 	inc >>= maxlev;
323 	mask.lint.lng = 0;
324 	for (i = maxlev; i > 0; i--) {
325 		for (c = 0; c <= END; c++)
326 			if (length[c] == i) {
327 				bits[c] = mask.lint.lng;
328 				mask.lint.lng += inc;
329 			}
330 		mask.lint.lng &= ~inc;
331 		inc <<= 1;
332 	}
333 
334 	return (output(source));
335 }
336 
337 int
338 main(int argc, char *argv[])
339 {
340 	extern  int optind;
341 	register int i;
342 	register char *cp;
343 	int k, sep, errflg = 0;
344 	int c;
345 	int error;
346 	int fcount = 0; /* count failures */
347 	acl_t *aclp = NULL;
348 	char *progname;
349 	int sattr_exist = 0;
350 	int xattr_exist = 0;
351 
352 	(void) setlocale(LC_ALL, "");
353 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
354 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
355 #endif
356 	(void) textdomain(TEXT_DOMAIN);
357 
358 	if (progname = strrchr(argv[0], '/'))
359 		++progname;
360 	else
361 		progname = argv[0];
362 
363 	while ((c = getopt(argc, argv, "f-/")) != EOF) {
364 		if (c == 'f')
365 			force++;
366 		else if (c == '/')
367 			saflg++;
368 		else
369 			++errflg;
370 	}
371 	/*
372 	 * Check for invalid option.  Also check for missing
373 	 * file operand, ie: "pack" or "pack -".
374 	 */
375 	argc -= optind;
376 	argv = &argv[optind];
377 	if (errflg || argc < 1 ||
378 	    (argc == 1 && (argv[0][0] == '-' || argv[0][0] == '/' &&
379 	    argv[0][1] == '\0'))) {
380 		(void) fprintf(stderr, gettext(
381 		    "usage: pack [-f] [-] [-/] file...\n"));
382 		if (argc < 1 ||
383 		    (argc == 1 && (argv[0][0] == '-' || argv[0][0] == '/') &&
384 		    argv[0][1] == '\0')) {
385 			/*
386 			 * return 1 for usage error when no file was specified
387 			 */
388 			return (1);
389 		}
390 	}
391 
392 	/* loop through the file names */
393 	for (k = 0; k < argc; k++) {
394 		if (argv[k][0] == '-' && argv[k][1] == '\0') {
395 			vflag = 1 - vflag;
396 			continue;
397 		}
398 		fcount++; /* increase failure count - expect the worst */
399 		if (errflg) {
400 			/*
401 			 * invalid option; just count the number of files not
402 			 * packed
403 			 */
404 			continue;
405 		}
406 		/* remove any .z suffix the user may have added */
407 		for (cp = argv[k]; *cp != '\0'; ++cp)
408 			;
409 		if (cp[-1] == SUF1 && cp[-2] == SUF0) {
410 			*cp-- = '\0'; *cp-- = '\0'; *cp = '\0';
411 		}
412 		sep = -1;  cp = filename;
413 		max_name = pathconf(argv[k], _PC_NAME_MAX);
414 		if (max_name == -1) {
415 			/* pathname invalid or no limit on length of filename */
416 			max_name = _POSIX_NAME_MAX;
417 		}
418 		/* copy argv[k] to filename and count chars in base name */
419 		for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++)
420 			if (*cp++ == '/') sep = i;
421 		if ((infile = open(filename, 0)) < 0) {
422 			(void) fprintf(stderr, gettext(
423 			    "pack: %s: cannot open: "), filename);
424 			perror("");
425 			continue;
426 		}
427 		if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) {
428 			(void) fprintf(stderr, gettext(
429 			    "pack: %s: file name too long\n"), argv[k]);
430 			continue;
431 		}
432 		(void) fstat(infile, &status);
433 		if (S_ISDIR(status.st_mode)) {
434 			(void) fprintf(stderr, gettext(
435 			    "pack: %s: cannot pack a directory\n"),
436 			    argv[k]);
437 			goto closein;
438 		}
439 		if (status.st_size == 0) {
440 			(void) fprintf(stderr, gettext(
441 			    "pack: %s: cannot pack a zero length file\n"),
442 			    argv[k]);
443 			goto closein;
444 		}
445 		if (status.st_nlink != 1) {
446 			(void) fprintf(stderr, gettext(
447 			    "pack: %s: has links\n"),
448 			    argv[k]);
449 			goto closein;
450 		}
451 		*cp++ = SUF0;  *cp++ = SUF1;  *cp = '\0';
452 		if (stat(filename, &ostatus) != -1) {
453 			(void) fprintf(stderr, gettext(
454 			    "pack: %s: already exists\n"), filename);
455 			goto closein;
456 		}
457 		if ((outfile = creat(filename, status.st_mode | O_RDONLY))
458 		    < 0) {
459 			(void) fprintf(stderr, gettext(
460 			    "pack: %s: cannot create: "), filename);
461 			perror("");
462 			goto closein;
463 		}
464 
465 		error = facl_get(infile, ACL_NO_TRIVIAL, &aclp);
466 
467 		if (error != 0) {
468 			(void) fprintf(stderr, gettext(
469 			    "pack: %s: cannot retrieve ACL: %s\n"), argv[k],
470 			    acl_strerror(error));
471 		}
472 
473 		if (packfile(argv[k])) {
474 			if (pathconf(argv[k], _PC_XATTR_EXISTS) == 1)
475 				xattr_exist = 1;
476 			if (saflg && sysattr_support(argv[k],
477 			    _PC_SATTR_EXISTS) == 1)
478 				sattr_exist = 1;
479 			if (sattr_exist || xattr_exist) {
480 				if (mv_xattrs(progname, argv[k], filename,
481 				    sattr_exist, 0) != 0) {
482 				/* Move attributes back ... */
483 					xattr_exist = 0;
484 					sattr_exist = 0;
485 					if (pathconf(filename,
486 					    _PC_XATTR_EXISTS) == 1)
487 						xattr_exist = 1;
488 					if (saflg && sysattr_support(filename,
489 					    _PC_SATTR_EXISTS) == 1)
490 						sattr_exist = 1;
491 					if (xattr_exist || sattr_exist) {
492 						(void) mv_xattrs(progname,
493 						    filename, argv[k],
494 						    sattr_exist, 1);
495 						(void) unlink(filename);
496 						goto out;
497 					}
498 				} else {
499 					errno = 0;
500 					if (unlink(argv[k]) != 0) {
501 						(void) fprintf(stderr, gettext(
502 						    "pack: %s :cannot unlink:"),
503 						    argv[k]);
504 						if (errno == EPERM)
505 							perror("No permission");
506 						else
507 							perror("");
508 					}
509 				}
510 			} else {
511 				errno = 0;
512 				if (unlink(argv[k]) != 0) {
513 					(void) fprintf(stderr, gettext(
514 					    "pack: %s :cannot unlink"),
515 					    argv[k]);
516 					if (errno == EPERM)
517 						perror("No permission");
518 					else
519 						perror("");
520 				}
521 			}
522 			(void) printf(gettext(
523 			    "pack: %s: %.1f%% Compression\n"),
524 			    argv[k],
525 			    ((double)(-outsize+(insize.lint.lng))/
526 			    (double)insize.lint.lng)*100);
527 			/* output statistics */
528 			if (vflag) {
529 				(void) printf(gettext(
530 				    "\tfrom %ld to %ld bytes\n"),
531 				    insize.lint.lng, outsize);
532 				(void) printf(gettext(
533 				    "\tHuffman tree has %d levels below "
534 				    "root\n"), maxlev);
535 				(void) printf(gettext(
536 				    "\t%d distinct bytes in input\n"),
537 				    diffbytes);
538 				(void) printf(gettext(
539 				    "\tdictionary overhead = %ld bytes\n"),
540 				    dictsize);
541 				(void) printf(gettext(
542 				    "\teffective  entropy  = %.2f bits/byte\n"),
543 				    ((double)outsize / (double)insize.lint.lng)
544 				    * 8);
545 				(void) printf(gettext(
546 				    "\tasymptotic entropy  = %.2f bits/byte\n"),
547 				    ((double)(outsize-dictsize) /
548 				    (double)insize.lint.lng) * 8);
549 			}
550 
551 			u_times.actime = status.st_atime;
552 			u_times.modtime = status.st_mtime;
553 			if (utime(filename, &u_times) != 0) {
554 				errflg++;
555 				(void) fprintf(stderr,
556 				    gettext(
557 				    "pack: cannot change times on %s: "),
558 				    filename);
559 				perror("");
560 			}
561 			if (chmod(filename, status.st_mode) != 0) {
562 				errflg++;
563 				(void) fprintf(stderr,
564 				    gettext(
565 				    "pack: can't change mode to %o on %s: "),
566 				    status.st_mode, filename);
567 				perror("");
568 			}
569 			(void) chown(filename, status.st_uid, status.st_gid);
570 			if (aclp && (facl_set(outfile, aclp) < 0)) {
571 				(void) fprintf(stderr, gettext(
572 				    "pack: %s: failed to set acl entries\n"),
573 				    filename);
574 				perror("");
575 			}
576 			if (!errflg)
577 				fcount--;  /* success after all */
578 
579 		}
580 out:
581 		if (aclp) {
582 			acl_free(aclp);
583 			aclp = NULL;
584 		}
585 
586 closein:	(void) close(outfile);
587 		(void) close(infile);
588 	}
589 	return (fcount);
590 }
591