xref: /illumos-gate/usr/src/cmd/pack/pack.c (revision 24da5b34f49324ed742a340010ed5bd3d4e06625)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 /*
32  *	Huffman encoding program
33  *	Usage:	pack [[ -f ] [ - ] filename ... ] filename ...
34  *		- option: enable/disable listing of statistics
35  */
36 
37 #include <stdio.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <unistd.h>
41 #include <locale.h>
42 #include <stdarg.h>
43 #include <errno.h>
44 #include <sys/isa_defs.h>
45 #include <stdlib.h>
46 #include <limits.h>
47 #include <sys/param.h>
48 #include <fcntl.h>
49 #include <utime.h>
50 #include <string.h>
51 #include <dirent.h>
52 #include <unistd.h>
53 #include <sys/acl.h>
54 #include <aclutils.h>
55 
56 #undef lint
57 
58 #define	END	256
59 #define	PACKED 017436 /* <US><RS> - Unlikely value */
60 #define	SUF0	'.'
61 #define	SUF1	'z'
62 
63 struct stat status, ostatus;
64 static struct utimbuf u_times;
65 
66 /* union for overlaying a long int with a set of four characters */
67 union FOUR {
68 	struct { long int lng; } lint;
69 	struct { char c0, c1, c2, c3; } chars;
70 };
71 
72 /* character counters */
73 long	count [END+1];
74 union	FOUR insize;
75 long	outsize;
76 long	dictsize;
77 int	diffbytes;
78 
79 /* i/o stuff */
80 char	vflag = 0;
81 int	force = 0;	/* allow forced packing for consistency in directory */
82 
83 static	char filename [MAXPATHLEN];
84 static int max_name;
85 static int max_path = MAXPATHLEN;
86 
87 int	infile;		/* unpacked file */
88 int	outfile;	/* packed file */
89 char	inbuff [BUFSIZ];
90 char	outbuff [BUFSIZ+4];
91 
92 /* variables associated with the tree */
93 int	maxlev;
94 int	levcount [25];
95 int	lastnode;
96 int	parent [2*END+1];
97 
98 /* variables associated with the encoding process */
99 char	length [END+1];
100 long	bits [END+1];
101 union	FOUR mask;
102 long	inc;
103 #if defined(_LITTLE_ENDIAN)
104 char	*maskshuff[4]  = {&(mask.chars.c3),
105 			    &(mask.chars.c2),
106 			    &(mask.chars.c1),
107 			    &(mask.chars.c0)};
108 #elif defined(_BIG_ENDIAN)
109 char	*maskshuff[4]  = {&(mask.chars.c0),
110 			    &(mask.chars.c1),
111 			    &(mask.chars.c2),
112 			    &(mask.chars.c3)};
113 #else
114 #error Unknown byte ordering!
115 #endif
116 
117 /* the heap */
118 int	n;
119 struct	heap {
120 	long int count;
121 	int node;
122 } heap [END+2];
123 #define	hmove(a, b) {(b).count = (a).count; (b).node = (a).node; }
124 
125 static void heapify(int i);
126 static int mv_xattrs(int, int, char *, int);
127 
128 /* gather character frequency statistics */
129 /* return 1 if successful, 0 otherwise */
130 int
131 input(char *source)
132 {
133 	register int i;
134 	for (i = 0; i < END; i++)
135 		count[i] = 0;
136 	while ((i = read(infile, inbuff, BUFSIZ)) > 0)
137 		while (i > 0)
138 			count[inbuff[--i]&0377] += 2;
139 	if (i == 0)
140 		return (1);
141 	fprintf(stderr, gettext(
142 		"pack: %s: read error - file unchanged: "), source);
143 	perror("");
144 	return (0);
145 }
146 
147 /* encode the current file */
148 /* return 1 if successful, 0 otherwise */
149 int
150 output(char *source)
151 {
152 	int c, i, inleft;
153 	char *inp;
154 	register char **q, *outp;
155 	register int bitsleft;
156 	long temp;
157 
158 	/* output ``PACKED'' header */
159 	outbuff[0] = 037; 	/* ascii US */
160 	outbuff[1] = 036; 	/* ascii RS */
161 	/* output the length and the dictionary */
162 	temp = insize.lint.lng;
163 	for (i = 5; i >= 2; i--) {
164 		outbuff[i] =  (char)(temp & 0377);
165 		temp >>= 8;
166 	}
167 	outp = &outbuff[6];
168 	*outp++ = maxlev;
169 	for (i = 1; i < maxlev; i++)
170 		*outp++ = levcount[i];
171 	*outp++ = levcount[maxlev]-2;
172 	for (i = 1; i <= maxlev; i++)
173 		for (c = 0; c < END; c++)
174 			if (length[c] == i)
175 				*outp++ = c;
176 	dictsize = outp-&outbuff[0];
177 
178 	/* output the text */
179 	lseek(infile, 0L, 0);
180 	outsize = 0;
181 	bitsleft = 8;
182 	inleft = 0;
183 	do {
184 		if (inleft <= 0) {
185 			inleft = read(infile, inp = &inbuff[0], BUFSIZ);
186 			if (inleft < 0) {
187 				fprintf(stderr, gettext(
188 				    "pack: %s: read error - file unchanged: "),
189 					    source);
190 				perror("");
191 				return (0);
192 			}
193 		}
194 		c = (--inleft < 0) ? END : (*inp++ & 0377);
195 		mask.lint.lng = bits[c]<<bitsleft;
196 		q = &maskshuff[0];
197 		if (bitsleft == 8)
198 			*outp = **q++;
199 		else
200 			*outp |= **q++;
201 		bitsleft -= length[c];
202 		while (bitsleft < 0) {
203 			*++outp = **q++;
204 			bitsleft += 8;
205 		}
206 		if (outp >= &outbuff[BUFSIZ]) {
207 			if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) {
208 wrerr:				fprintf(stderr, gettext(
209 				"pack: %s.z: write error - file unchanged: "),
210 					source);
211 				perror("");
212 				return (0);
213 			}
214 			((union FOUR *)outbuff)->lint.lng =
215 				    ((union FOUR *)&outbuff[BUFSIZ])->lint.lng;
216 			outp -= BUFSIZ;
217 			outsize += BUFSIZ;
218 		}
219 	} while (c != END);
220 	if (bitsleft < 8)
221 		outp++;
222 	c = outp-outbuff;
223 	if (write(outfile, outbuff, c) != c)
224 		goto wrerr;
225 	outsize += c;
226 	return (1);
227 }
228 
229 /* makes a heap out of heap[i],...,heap[n] */
230 void
231 heapify(int i)
232 {
233 	register int k;
234 	int lastparent;
235 	struct heap heapsubi;
236 	hmove(heap[i], heapsubi);
237 	lastparent = n/2;
238 	while (i <= lastparent) {
239 		k = 2*i;
240 		if (heap[k].count > heap[k+1].count && k < n)
241 			k++;
242 		if (heapsubi.count < heap[k].count)
243 			break;
244 		hmove(heap[k], heap[i]);
245 		i = k;
246 	}
247 	hmove(heapsubi, heap[i]);
248 }
249 
250 /* return 1 after successful packing, 0 otherwise */
251 int
252 packfile(char *source)
253 {
254 	register int c, i, p;
255 	long bitsout;
256 
257 	/* gather frequency statistics */
258 	if (input(source) == 0)
259 		return (0);
260 
261 	/* put occurring chars in heap with their counts */
262 	diffbytes = -1;
263 	count[END] = 1;
264 	insize.lint.lng = n = 0;
265 	for (i = END; i >= 0; i--) {
266 		parent[i] = 0;
267 		if (count[i] > 0) {
268 			diffbytes++;
269 			insize.lint.lng += count[i];
270 			heap[++n].count = count[i];
271 			heap[n].node = i;
272 		}
273 	}
274 	if (diffbytes == 1) {
275 		fprintf(stderr, gettext(
276 			"pack: %s: trivial file - file unchanged\n"), source);
277 		return (0);
278 	}
279 	insize.lint.lng >>= 1;
280 	for (i = n/2; i >= 1; i--)
281 		heapify(i);
282 
283 	/* build Huffman tree */
284 	lastnode = END;
285 	while (n > 1) {
286 		parent[heap[1].node] = ++lastnode;
287 		inc = heap[1].count;
288 		hmove(heap[n], heap[1]);
289 		n--;
290 		heapify(1);
291 		parent[heap[1].node] = lastnode;
292 		heap[1].node = lastnode;
293 		heap[1].count += inc;
294 		heapify(1);
295 	}
296 	parent[lastnode] = 0;
297 
298 	/* assign lengths to encoding for each character */
299 	bitsout = maxlev = 0;
300 	for (i = 1; i <= 24; i++)
301 		levcount[i] = 0;
302 	for (i = 0; i <= END; i++) {
303 		c = 0;
304 		for (p = parent[i]; p != 0; p = parent[p])
305 			c++;
306 		levcount[c]++;
307 		length[i] = c;
308 		if (c > maxlev)
309 			maxlev = c;
310 		bitsout += c*(count[i]>>1);
311 	}
312 	if (maxlev > 24) {
313 		/* can't occur unless insize.lint.lng >= 2**24 */
314 		fprintf(stderr, gettext(
315 	"pack: %s: Huffman tree has too many levels - file unchanged\n"),
316 			source);
317 		return (0);
318 	}
319 
320 	/* don't bother if no compression results */
321 	outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
322 	if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <=
323 				    (outsize+BUFSIZ-1)/BUFSIZ && !force) {
324 		printf(gettext(
325 			"pack: %s: no saving - file unchanged\n"), source);
326 		return (0);
327 	}
328 
329 	/* compute bit patterns for each character */
330 	inc = 1L << 24;
331 	inc >>= maxlev;
332 	mask.lint.lng = 0;
333 	for (i = maxlev; i > 0; i--) {
334 		for (c = 0; c <= END; c++)
335 			if (length[c] == i) {
336 				bits[c] = mask.lint.lng;
337 				mask.lint.lng += inc;
338 			}
339 		mask.lint.lng &= ~inc;
340 		inc <<= 1;
341 	}
342 
343 	return (output(source));
344 }
345 
346 int
347 main(int argc, char *argv[])
348 {
349 	extern  int optind;
350 	register int i;
351 	register char *cp;
352 	int k, sep, errflg = 0;
353 	int c;
354 	int error;
355 	int fcount = 0; /* count failures */
356 	acl_t *aclp = NULL;
357 
358 	(void) setlocale(LC_ALL, "");
359 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
360 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
361 #endif
362 	(void) textdomain(TEXT_DOMAIN);
363 
364 	while ((c = getopt(argc, argv, "f-")) != EOF) {
365 		if (c == 'f')
366 			force++;
367 		else
368 			++errflg;
369 	}
370 	/*
371 	 * Check for invalid option.  Also check for missing
372 	 * file operand, ie: "pack" or "pack -".
373 	 */
374 	argc -= optind;
375 	argv = &argv[optind];
376 	if (errflg || argc < 1 ||
377 		(argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) {
378 		fprintf(stderr, gettext(
379 			"usage: pack [-f] [-] file...\n"));
380 		if (argc < 1 ||
381 			(argc == 1 && argv[0][0] == '-' &&
382 				argv[0][1] == '\0')) {
383 			/*
384 			 * return 1 for usage error when no file was specified
385 			 */
386 			return (1);
387 		}
388 	}
389 	/* loop through the file names */
390 	for (k = 0; k < argc; k++) {
391 		if (argv[k][0] == '-' && argv[k][1] == '\0') {
392 			vflag = 1 - vflag;
393 			continue;
394 		}
395 		fcount++; /* increase failure count - expect the worst */
396 		if (errflg) {
397 			/*
398 			 * invalid option; just count the number of files not
399 			 * packed
400 			 */
401 			continue;
402 		}
403 		/* remove any .z suffix the user may have added */
404 		for (cp = argv[k]; *cp != '\0'; ++cp)
405 			;
406 		if (cp[-1] == SUF1 && cp[-2] == SUF0) {
407 			*cp-- = '\0'; *cp-- = '\0'; *cp = '\0';
408 		}
409 		sep = -1;  cp = filename;
410 		max_name = pathconf(argv[k], _PC_NAME_MAX);
411 		if (max_name == -1) {
412 			/* pathname invalid or no limit on length of filename */
413 			max_name = _POSIX_NAME_MAX;
414 		}
415 		/* copy argv[k] to filename and count chars in base name */
416 		for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++)
417 			if (*cp++ == '/') sep = i;
418 		if ((infile = open(filename, 0)) < 0) {
419 			fprintf(stderr, gettext(
420 				"pack: %s: cannot open: "), filename);
421 			perror("");
422 			continue;
423 		}
424 		if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) {
425 			fprintf(stderr, gettext(
426 				"pack: %s: file name too long\n"), argv[k]);
427 			continue;
428 		}
429 		fstat(infile, &status);
430 		if (S_ISDIR(status.st_mode)) {
431 			fprintf(stderr, gettext(
432 				"pack: %s: cannot pack a directory\n"),
433 				    argv[k]);
434 			goto closein;
435 		}
436 		if (status.st_size == 0) {
437 			fprintf(stderr, gettext(
438 				"pack: %s: cannot pack a zero length file\n"),
439 				argv[k]);
440 			goto closein;
441 		}
442 		if (status.st_nlink != 1) {
443 			fprintf(stderr, gettext(
444 				"pack: %s: has links\n"),
445 				argv[k]);
446 			goto closein;
447 		}
448 		*cp++ = SUF0;  *cp++ = SUF1;  *cp = '\0';
449 		if (stat(filename, &ostatus) != -1) {
450 			fprintf(stderr, gettext(
451 				"pack: %s: already exists\n"), filename);
452 			goto closein;
453 		}
454 
455 		if ((outfile = creat(filename, status.st_mode)) < 0) {
456 			fprintf(stderr, gettext(
457 				"pack: %s: cannot create: "), filename);
458 			perror("");
459 			goto closein;
460 		}
461 
462 		error = facl_get(infile, ACL_NO_TRIVIAL, &aclp);
463 
464 		if (error != 0) {
465 			fprintf(stderr, gettext(
466 			    "pack: %s: cannot retrieve ACL: %s\n"), argv[k],
467 			    acl_strerror(error));
468 		}
469 		if (packfile(argv[k]) &&
470 		    ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) ||
471 				(mv_xattrs(infile, outfile,
472 					argv[k], 0) == 0))) {
473 			if (unlink(argv[k]) != 0) {
474 				fprintf(stderr, gettext(
475 					"pack: %s: cannot unlink: "),
476 					argv[k]);
477 				perror("");
478 			}
479 			printf(gettext(
480 				"pack: %s: %.1f%% Compression\n"),
481 				argv[k],
482 	    ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100);
483 			/* output statistics */
484 			if (vflag) {
485 				printf(gettext("\tfrom %ld to %ld bytes\n"),
486 					insize.lint.lng, outsize);
487 				printf(gettext(
488 				"\tHuffman tree has %d levels below root\n"),
489 				    maxlev);
490 				printf(gettext(
491 					"\t%d distinct bytes in input\n"),
492 					diffbytes);
493 				printf(gettext(
494 					"\tdictionary overhead = %ld bytes\n"),
495 					dictsize);
496 				printf(gettext(
497 				    "\teffective  entropy  = %.2f bits/byte\n"),
498 			    ((double)outsize / (double)insize.lint.lng) * 8);
499 				printf(gettext(
500 				    "\tasymptotic entropy  = %.2f bits/byte\n"),
501 					((double)(outsize-dictsize) /
502 					(double)insize.lint.lng) * 8);
503 			}
504 			u_times.actime = status.st_atime;
505 			u_times.modtime = status.st_mtime;
506 			if (utime(filename, &u_times) != 0) {
507 				errflg++;
508 				fprintf(stderr,
509 					gettext(
510 					"pack: cannot change times on %s: "),
511 					filename);
512 				perror("");
513 			}
514 			if (chmod(filename, status.st_mode) != 0) {
515 				errflg++;
516 				fprintf(stderr,
517 					gettext(
518 				"pack: can't change mode to %o on %s: "),
519 					status.st_mode, filename);
520 				perror("");
521 			}
522 			chown(filename, status.st_uid, status.st_gid);
523 			if (aclp && (facl_set(outfile, aclp) < 0)) {
524 				fprintf(stderr, gettext(
525 				    "pack: %s: failed to set acl entries\n"),
526 				    filename);
527 				perror("");
528 			}
529 			if (!errflg)
530 				fcount--;  /* success after all */
531 		} else {
532 			if (pathconf(filename, _PC_XATTR_EXISTS) == 1) {
533 				(void) mv_xattrs(outfile, infile, filename, 1);
534 			}
535 			unlink(filename);
536 		}
537 
538 		if (aclp) {
539 			acl_free(aclp);
540 			aclp = NULL;
541 		}
542 
543 closein:	close(outfile);
544 		close(infile);
545 	}
546 	return (fcount);
547 }
548 
549 /*
550  * mv_xattrs - move (via renameat) all of the extended attributes
551  *	associated with the file referenced by infd to the file
552  *	referenced by outfd.  The infile and silent arguments are
553  *	provided for error message processing.  This function
554  *	returns 0 on success and -1 on error.
555  */
556 static int
557 mv_xattrs(int infd, int outfd, char *infile, int silent)
558 {
559 	int indfd, outdfd, tmpfd;
560 	DIR *dirp = NULL;
561 	struct dirent *dp = NULL;
562 	int error = 0;
563 	char *etext;
564 
565 	indfd = outdfd = tmpfd = -1;
566 
567 	if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) {
568 		etext = gettext("cannot open source");
569 		error = -1;
570 		goto out;
571 	}
572 
573 	if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) {
574 		etext = gettext("cannot open target");
575 		error = -1;
576 		goto out;
577 	}
578 
579 	if ((tmpfd = dup(indfd)) == -1) {
580 		etext = gettext("cannot dup descriptor");
581 		error = -1;
582 		goto out;
583 
584 	}
585 	if ((dirp = fdopendir(tmpfd)) == NULL) {
586 		etext = gettext("cannot access source");
587 		error = -1;
588 		goto out;
589 	}
590 
591 	while (dp = readdir(dirp)) {
592 		if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') ||
593 		    (dp->d_name[0] == '.' && dp->d_name[1] == '.' &&
594 		    dp->d_name[2] == '\0'))
595 			continue;
596 		if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) {
597 			etext = dp->d_name;
598 			error = -1;
599 			goto out;
600 		}
601 	}
602 out:
603 	if (error == -1 && silent == 0) {
604 		fprintf(stderr, gettext(
605 			"pack: %s: cannot move extended attributes, "),
606 			infile);
607 		perror(etext);
608 	}
609 	if (dirp)
610 		closedir(dirp);
611 	if (indfd != -1)
612 		close(indfd);
613 	if (outdfd != -1)
614 		close(outdfd);
615 	return (error);
616 }
617