xref: /illumos-gate/usr/src/cmd/pack/pack.c (revision d5dbd18d69de8954ab5ceb588e99d43fc9b21d46)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  *	Huffman encoding program
34  *	Usage:	pack [[ -f ] [ - ] filename ... ] filename ...
35  *		- option: enable/disable listing of statistics
36  */
37 
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <unistd.h>
42 #include <locale.h>
43 #include <stdarg.h>
44 #include <errno.h>
45 #include <sys/isa_defs.h>
46 #include <stdlib.h>
47 #include <limits.h>
48 #include <sys/param.h>
49 #include <fcntl.h>
50 #include <utime.h>
51 #include <string.h>
52 #include <dirent.h>
53 #include <unistd.h>
54 
55 #undef lint
56 
57 #define	END	256
58 #define	PACKED 017436 /* <US><RS> - Unlikely value */
59 #define	SUF0	'.'
60 #define	SUF1	'z'
61 
62 struct stat status, ostatus;
63 static struct utimbuf u_times;
64 
65 /* union for overlaying a long int with a set of four characters */
66 union FOUR {
67 	struct { long int lng; } lint;
68 	struct { char c0, c1, c2, c3; } chars;
69 };
70 
71 /* character counters */
72 long	count [END+1];
73 union	FOUR insize;
74 long	outsize;
75 long	dictsize;
76 int	diffbytes;
77 
78 /* i/o stuff */
79 char	vflag = 0;
80 int	force = 0;	/* allow forced packing for consistency in directory */
81 
82 static	char filename [MAXPATHLEN];
83 static int max_name;
84 static int max_path = MAXPATHLEN;
85 
86 int	infile;		/* unpacked file */
87 int	outfile;	/* packed file */
88 char	inbuff [BUFSIZ];
89 char	outbuff [BUFSIZ+4];
90 
91 /* variables associated with the tree */
92 int	maxlev;
93 int	levcount [25];
94 int	lastnode;
95 int	parent [2*END+1];
96 
97 /* variables associated with the encoding process */
98 char	length [END+1];
99 long	bits [END+1];
100 union	FOUR mask;
101 long	inc;
102 #if defined(_LITTLE_ENDIAN)
103 char	*maskshuff[4]  = {&(mask.chars.c3),
104 			    &(mask.chars.c2),
105 			    &(mask.chars.c1),
106 			    &(mask.chars.c0)};
107 #elif defined(_BIG_ENDIAN)
108 char	*maskshuff[4]  = {&(mask.chars.c0),
109 			    &(mask.chars.c1),
110 			    &(mask.chars.c2),
111 			    &(mask.chars.c3)};
112 #else
113 #error Unknown byte ordering!
114 #endif
115 
116 /* the heap */
117 int	n;
118 struct	heap {
119 	long int count;
120 	int node;
121 } heap [END+2];
122 #define	hmove(a, b) {(b).count = (a).count; (b).node = (a).node; }
123 
124 static void heapify(int i);
125 static int mv_xattrs(int, int, char *, int);
126 
127 /* gather character frequency statistics */
128 /* return 1 if successful, 0 otherwise */
129 int
130 input(char *source)
131 {
132 	register int i;
133 	for (i = 0; i < END; i++)
134 		count[i] = 0;
135 	while ((i = read(infile, inbuff, BUFSIZ)) > 0)
136 		while (i > 0)
137 			count[inbuff[--i]&0377] += 2;
138 	if (i == 0)
139 		return (1);
140 	fprintf(stderr, gettext(
141 		"pack: %s: read error - file unchanged: "), source);
142 	perror("");
143 	return (0);
144 }
145 
146 /* encode the current file */
147 /* return 1 if successful, 0 otherwise */
148 int
149 output(char *source)
150 {
151 	int c, i, inleft;
152 	char *inp;
153 	register char **q, *outp;
154 	register int bitsleft;
155 	long temp;
156 
157 	/* output ``PACKED'' header */
158 	outbuff[0] = 037; 	/* ascii US */
159 	outbuff[1] = 036; 	/* ascii RS */
160 	/* output the length and the dictionary */
161 	temp = insize.lint.lng;
162 	for (i = 5; i >= 2; i--) {
163 		outbuff[i] =  (char)(temp & 0377);
164 		temp >>= 8;
165 	}
166 	outp = &outbuff[6];
167 	*outp++ = maxlev;
168 	for (i = 1; i < maxlev; i++)
169 		*outp++ = levcount[i];
170 	*outp++ = levcount[maxlev]-2;
171 	for (i = 1; i <= maxlev; i++)
172 		for (c = 0; c < END; c++)
173 			if (length[c] == i)
174 				*outp++ = c;
175 	dictsize = outp-&outbuff[0];
176 
177 	/* output the text */
178 	lseek(infile, 0L, 0);
179 	outsize = 0;
180 	bitsleft = 8;
181 	inleft = 0;
182 	do {
183 		if (inleft <= 0) {
184 			inleft = read(infile, inp = &inbuff[0], BUFSIZ);
185 			if (inleft < 0) {
186 				fprintf(stderr, gettext(
187 				    "pack: %s: read error - file unchanged: "),
188 					    source);
189 				perror("");
190 				return (0);
191 			}
192 		}
193 		c = (--inleft < 0) ? END : (*inp++ & 0377);
194 		mask.lint.lng = bits[c]<<bitsleft;
195 		q = &maskshuff[0];
196 		if (bitsleft == 8)
197 			*outp = **q++;
198 		else
199 			*outp |= **q++;
200 		bitsleft -= length[c];
201 		while (bitsleft < 0) {
202 			*++outp = **q++;
203 			bitsleft += 8;
204 		}
205 		if (outp >= &outbuff[BUFSIZ]) {
206 			if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) {
207 wrerr:				fprintf(stderr, gettext(
208 				"pack: %s.z: write error - file unchanged: "),
209 					source);
210 				perror("");
211 				return (0);
212 			}
213 			((union FOUR *)outbuff)->lint.lng =
214 				    ((union FOUR *)&outbuff[BUFSIZ])->lint.lng;
215 			outp -= BUFSIZ;
216 			outsize += BUFSIZ;
217 		}
218 	} while (c != END);
219 	if (bitsleft < 8)
220 		outp++;
221 	c = outp-outbuff;
222 	if (write(outfile, outbuff, c) != c)
223 		goto wrerr;
224 	outsize += c;
225 	return (1);
226 }
227 
228 /* makes a heap out of heap[i],...,heap[n] */
229 void
230 heapify(int i)
231 {
232 	register int k;
233 	int lastparent;
234 	struct heap heapsubi;
235 	hmove(heap[i], heapsubi);
236 	lastparent = n/2;
237 	while (i <= lastparent) {
238 		k = 2*i;
239 		if (heap[k].count > heap[k+1].count && k < n)
240 			k++;
241 		if (heapsubi.count < heap[k].count)
242 			break;
243 		hmove(heap[k], heap[i]);
244 		i = k;
245 	}
246 	hmove(heapsubi, heap[i]);
247 }
248 
249 /* return 1 after successful packing, 0 otherwise */
250 int
251 packfile(char *source)
252 {
253 	register int c, i, p;
254 	long bitsout;
255 
256 	/* gather frequency statistics */
257 	if (input(source) == 0)
258 		return (0);
259 
260 	/* put occurring chars in heap with their counts */
261 	diffbytes = -1;
262 	count[END] = 1;
263 	insize.lint.lng = n = 0;
264 	for (i = END; i >= 0; i--) {
265 		parent[i] = 0;
266 		if (count[i] > 0) {
267 			diffbytes++;
268 			insize.lint.lng += count[i];
269 			heap[++n].count = count[i];
270 			heap[n].node = i;
271 		}
272 	}
273 	if (diffbytes == 1) {
274 		fprintf(stderr, gettext(
275 			"pack: %s: trivial file - file unchanged\n"), source);
276 		return (0);
277 	}
278 	insize.lint.lng >>= 1;
279 	for (i = n/2; i >= 1; i--)
280 		heapify(i);
281 
282 	/* build Huffman tree */
283 	lastnode = END;
284 	while (n > 1) {
285 		parent[heap[1].node] = ++lastnode;
286 		inc = heap[1].count;
287 		hmove(heap[n], heap[1]);
288 		n--;
289 		heapify(1);
290 		parent[heap[1].node] = lastnode;
291 		heap[1].node = lastnode;
292 		heap[1].count += inc;
293 		heapify(1);
294 	}
295 	parent[lastnode] = 0;
296 
297 	/* assign lengths to encoding for each character */
298 	bitsout = maxlev = 0;
299 	for (i = 1; i <= 24; i++)
300 		levcount[i] = 0;
301 	for (i = 0; i <= END; i++) {
302 		c = 0;
303 		for (p = parent[i]; p != 0; p = parent[p])
304 			c++;
305 		levcount[c]++;
306 		length[i] = c;
307 		if (c > maxlev)
308 			maxlev = c;
309 		bitsout += c*(count[i]>>1);
310 	}
311 	if (maxlev > 24) {
312 		/* can't occur unless insize.lint.lng >= 2**24 */
313 		fprintf(stderr, gettext(
314 	"pack: %s: Huffman tree has too many levels - file unchanged\n"),
315 			source);
316 		return (0);
317 	}
318 
319 	/* don't bother if no compression results */
320 	outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
321 	if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <=
322 				    (outsize+BUFSIZ-1)/BUFSIZ && !force) {
323 		printf(gettext(
324 			"pack: %s: no saving - file unchanged\n"), source);
325 		return (0);
326 	}
327 
328 	/* compute bit patterns for each character */
329 	inc = 1L << 24;
330 	inc >>= maxlev;
331 	mask.lint.lng = 0;
332 	for (i = maxlev; i > 0; i--) {
333 		for (c = 0; c <= END; c++)
334 			if (length[c] == i) {
335 				bits[c] = mask.lint.lng;
336 				mask.lint.lng += inc;
337 			}
338 		mask.lint.lng &= ~inc;
339 		inc <<= 1;
340 	}
341 
342 	return (output(source));
343 }
344 
345 int
346 main(int argc, char *argv[])
347 {
348 	extern  int optind;
349 	register int i;
350 	register char *cp;
351 	int k, sep, errflg = 0;
352 	int c;
353 	int fcount = 0; /* count failures */
354 
355 	(void) setlocale(LC_ALL, "");
356 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
357 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
358 #endif
359 	(void) textdomain(TEXT_DOMAIN);
360 
361 	while ((c = getopt(argc, argv, "f-")) != EOF) {
362 		if (c == 'f')
363 			force++;
364 		else
365 			++errflg;
366 	}
367 	/*
368 	 * Check for invalid option.  Also check for missing
369 	 * file operand, ie: "pack" or "pack -".
370 	 */
371 	argc -= optind;
372 	argv = &argv[optind];
373 	if (errflg || argc < 1 ||
374 		(argc == 1 && argv[0][0] == '-' && argv[0][1] == '\0')) {
375 		fprintf(stderr, gettext(
376 			"usage: pack [-f] [-] file...\n"));
377 		if (argc < 1 ||
378 			(argc == 1 && argv[0][0] == '-' &&
379 				argv[0][1] == '\0')) {
380 			/*
381 			 * return 1 for usage error when no file was specified
382 			 */
383 			return (1);
384 		}
385 	}
386 	/* loop through the file names */
387 	for (k = 0; k < argc; k++) {
388 		if (argv[k][0] == '-' && argv[k][1] == '\0') {
389 			vflag = 1 - vflag;
390 			continue;
391 		}
392 		fcount++; /* increase failure count - expect the worst */
393 		if (errflg) {
394 			/*
395 			 * invalid option; just count the number of files not
396 			 * packed
397 			 */
398 			continue;
399 		}
400 		/* remove any .z suffix the user may have added */
401 		for (cp = argv[k]; *cp != '\0'; ++cp)
402 			;
403 		if (cp[-1] == SUF1 && cp[-2] == SUF0) {
404 			*cp-- = '\0'; *cp-- = '\0'; *cp = '\0';
405 		}
406 		sep = -1;  cp = filename;
407 		max_name = pathconf(argv[k], _PC_NAME_MAX);
408 		if (max_name == -1) {
409 			/* pathname invalid or no limit on length of filename */
410 			max_name = _POSIX_NAME_MAX;
411 		}
412 		/* copy argv[k] to filename and count chars in base name */
413 		for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++)
414 			if (*cp++ == '/') sep = i;
415 		if ((infile = open(filename, 0)) < 0) {
416 			fprintf(stderr, gettext(
417 				"pack: %s: cannot open: "), filename);
418 			perror("");
419 			continue;
420 		}
421 		if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) {
422 			fprintf(stderr, gettext(
423 				"pack: %s: file name too long\n"), argv[k]);
424 			continue;
425 		}
426 		fstat(infile, &status);
427 		if (status.st_mode&S_IFDIR) {
428 			fprintf(stderr, gettext(
429 				"pack: %s: cannot pack a directory\n"),
430 				    argv[k]);
431 			goto closein;
432 		}
433 		if (status.st_size == 0) {
434 			fprintf(stderr, gettext(
435 				"pack: %s: cannot pack a zero length file\n"),
436 				argv[k]);
437 			goto closein;
438 		}
439 		if (status.st_nlink != 1) {
440 			fprintf(stderr, gettext(
441 				"pack: %s: has links\n"),
442 				argv[k]);
443 			goto closein;
444 		}
445 		*cp++ = SUF0;  *cp++ = SUF1;  *cp = '\0';
446 		if (stat(filename, &ostatus) != -1) {
447 			fprintf(stderr, gettext(
448 				"pack: %s: already exists\n"), filename);
449 			goto closein;
450 		}
451 		if ((outfile = creat(filename, status.st_mode)) < 0) {
452 			fprintf(stderr, gettext(
453 				"pack: %s: cannot create: "), filename);
454 			perror("");
455 			goto closein;
456 		}
457 
458 		if (packfile(argv[k]) &&
459 		    ((pathconf(argv[k], _PC_XATTR_EXISTS) != 1) ||
460 				(mv_xattrs(infile, outfile,
461 					argv[k], 0) == 0))) {
462 			if (unlink(argv[k]) != 0) {
463 				fprintf(stderr, gettext(
464 					"pack: %s: cannot unlink: "),
465 					argv[k]);
466 				perror("");
467 			}
468 			printf(gettext(
469 				"pack: %s: %.1f%% Compression\n"),
470 				argv[k],
471 	    ((double)(-outsize+(insize.lint.lng))/(double)insize.lint.lng)*100);
472 			/* output statistics */
473 			if (vflag) {
474 				printf(gettext("\tfrom %ld to %ld bytes\n"),
475 					insize.lint.lng, outsize);
476 				printf(gettext(
477 				"\tHuffman tree has %d levels below root\n"),
478 				    maxlev);
479 				printf(gettext(
480 					"\t%d distinct bytes in input\n"),
481 					diffbytes);
482 				printf(gettext(
483 					"\tdictionary overhead = %ld bytes\n"),
484 					dictsize);
485 				printf(gettext(
486 				    "\teffective  entropy  = %.2f bits/byte\n"),
487 			    ((double)outsize / (double)insize.lint.lng) * 8);
488 				printf(gettext(
489 				    "\tasymptotic entropy  = %.2f bits/byte\n"),
490 					((double)(outsize-dictsize) /
491 					(double)insize.lint.lng) * 8);
492 			}
493 			u_times.actime = status.st_atime;
494 			u_times.modtime = status.st_mtime;
495 			if (utime(filename, &u_times) != 0) {
496 				errflg++;
497 				fprintf(stderr,
498 					gettext(
499 					"pack: cannot change times on %s: "),
500 					filename);
501 				perror("");
502 			}
503 			if (chmod(filename, status.st_mode) != 0) {
504 				errflg++;
505 				fprintf(stderr,
506 					gettext(
507 				"pack: can't change mode to %o on %s: "),
508 					status.st_mode, filename);
509 				perror("");
510 			}
511 			chown(filename, status.st_uid, status.st_gid);
512 			if (!errflg)
513 				fcount--;  /* success after all */
514 		} else {
515 			if (pathconf(filename, _PC_XATTR_EXISTS) == 1) {
516 				(void) mv_xattrs(outfile, infile, filename, 1);
517 			}
518 			unlink(filename);
519 		}
520 closein:	close(outfile);
521 		close(infile);
522 	}
523 	return (fcount);
524 }
525 
526 /*
527  * mv_xattrs - move (via renameat) all of the extended attributes
528  *	associated with the file referenced by infd to the file
529  *	referenced by outfd.  The infile and silent arguments are
530  *	provided for error message processing.  This function
531  *	returns 0 on success and -1 on error.
532  */
533 static int
534 mv_xattrs(int infd, int outfd, char *infile, int silent)
535 {
536 	int indfd, outdfd, tmpfd;
537 	DIR *dirp = NULL;
538 	struct dirent *dp = NULL;
539 	int error = 0;
540 	char *etext;
541 
542 	indfd = outdfd = tmpfd = -1;
543 
544 	if ((indfd = openat(infd, ".", O_RDONLY|O_XATTR)) == -1) {
545 		etext = gettext("cannot open source");
546 		error = -1;
547 		goto out;
548 	}
549 
550 	if ((outdfd = openat(outfd, ".", O_RDONLY|O_XATTR)) == -1) {
551 		etext = gettext("cannot open target");
552 		error = -1;
553 		goto out;
554 	}
555 
556 	if ((tmpfd = dup(indfd)) == -1) {
557 		etext = gettext("cannot dup descriptor");
558 		error = -1;
559 		goto out;
560 
561 	}
562 	if ((dirp = fdopendir(tmpfd)) == NULL) {
563 		etext = gettext("cannot access source");
564 		error = -1;
565 		goto out;
566 	}
567 
568 	while (dp = readdir(dirp)) {
569 		if ((dp->d_name[0] == '.' && dp->d_name[1] == '\0') ||
570 		    (dp->d_name[0] == '.' && dp->d_name[1] == '.' &&
571 		    dp->d_name[2] == '\0'))
572 			continue;
573 		if ((renameat(indfd, dp->d_name, outdfd, dp->d_name)) == -1) {
574 			etext = dp->d_name;
575 			error = -1;
576 			goto out;
577 		}
578 	}
579 out:
580 	if (error == -1 && silent == 0) {
581 		fprintf(stderr, gettext(
582 			"pack: %s: cannot move extended attributes, "),
583 			infile);
584 		perror(etext);
585 	}
586 	if (dirp)
587 		closedir(dirp);
588 	if (indfd != -1)
589 		close(indfd);
590 	if (outdfd != -1)
591 		close(outdfd);
592 	return (error);
593 }
594