xref: /freebsd/contrib/libarchive/tar/read.c (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "bsdtar_platform.h"
27 __FBSDID("$FreeBSD$");
28 
29 #ifdef HAVE_SYS_TYPES_H
30 #include <sys/types.h>
31 #endif
32 #ifdef HAVE_SYS_PARAM_H
33 #include <sys/param.h>
34 #endif
35 #ifdef HAVE_SYS_STAT_H
36 #include <sys/stat.h>
37 #endif
38 
39 #ifdef HAVE_ERRNO_H
40 #include <errno.h>
41 #endif
42 #ifdef HAVE_GRP_H
43 #include <grp.h>
44 #endif
45 #ifdef HAVE_LIMITS_H
46 #include <limits.h>
47 #endif
48 #ifdef HAVE_PWD_H
49 #include <pwd.h>
50 #endif
51 #ifdef HAVE_STDINT_H
52 #include <stdint.h>
53 #endif
54 #include <stdio.h>
55 #ifdef HAVE_STDLIB_H
56 #include <stdlib.h>
57 #endif
58 #ifdef HAVE_STRING_H
59 #include <string.h>
60 #endif
61 #ifdef HAVE_TIME_H
62 #include <time.h>
63 #endif
64 #ifdef HAVE_UNISTD_H
65 #include <unistd.h>
66 #endif
67 
68 #include "bsdtar.h"
69 #include "err.h"
70 
71 struct progress_data {
72 	struct bsdtar *bsdtar;
73 	struct archive *archive;
74 	struct archive_entry *entry;
75 };
76 
77 static void	list_item_verbose(struct bsdtar *, FILE *,
78 		    struct archive_entry *);
79 static void	read_archive(struct bsdtar *bsdtar, char mode, struct archive *);
80 static int unmatched_inclusions_warn(struct archive *matching, const char *);
81 
82 
83 void
84 tar_mode_t(struct bsdtar *bsdtar)
85 {
86 	read_archive(bsdtar, 't', NULL);
87 	if (unmatched_inclusions_warn(bsdtar->matching,
88 	    "Not found in archive") != 0)
89 		bsdtar->return_value = 1;
90 }
91 
92 void
93 tar_mode_x(struct bsdtar *bsdtar)
94 {
95 	struct archive *writer;
96 
97 	writer = archive_write_disk_new();
98 	if (writer == NULL)
99 		lafe_errc(1, ENOMEM, "Cannot allocate disk writer object");
100 	if (!bsdtar->option_numeric_owner)
101 		archive_write_disk_set_standard_lookup(writer);
102 	archive_write_disk_set_options(writer, bsdtar->extract_flags);
103 
104 	read_archive(bsdtar, 'x', writer);
105 
106 	if (unmatched_inclusions_warn(bsdtar->matching,
107 	    "Not found in archive") != 0)
108 		bsdtar->return_value = 1;
109 	archive_write_free(writer);
110 }
111 
112 static void
113 progress_func(void *cookie)
114 {
115 	struct progress_data *progress_data = cookie;
116 	struct bsdtar *bsdtar = progress_data->bsdtar;
117 	struct archive *a = progress_data->archive;
118 	struct archive_entry *entry = progress_data->entry;
119 	uint64_t comp, uncomp;
120 	int compression;
121 
122 	if (!need_report())
123 		return;
124 
125 	if (bsdtar->verbose)
126 		fprintf(stderr, "\n");
127 	if (a != NULL) {
128 		comp = archive_filter_bytes(a, -1);
129 		uncomp = archive_filter_bytes(a, 0);
130 		if (comp > uncomp)
131 			compression = 0;
132 		else
133 			compression = (int)((uncomp - comp) * 100 / uncomp);
134 		fprintf(stderr,
135 		    "In: %s bytes, compression %d%%;",
136 		    tar_i64toa(comp), compression);
137 		fprintf(stderr, "  Out: %d files, %s bytes\n",
138 		    archive_file_count(a), tar_i64toa(uncomp));
139 	}
140 	if (entry != NULL) {
141 		safe_fprintf(stderr, "Current: %s",
142 		    archive_entry_pathname(entry));
143 		fprintf(stderr, " (%s bytes)\n",
144 		    tar_i64toa(archive_entry_size(entry)));
145 	}
146 }
147 
148 /*
149  * Handle 'x' and 't' modes.
150  */
151 static void
152 read_archive(struct bsdtar *bsdtar, char mode, struct archive *writer)
153 {
154 	struct progress_data	progress_data;
155 	FILE			 *out;
156 	struct archive		 *a;
157 	struct archive_entry	 *entry;
158 	const char		 *reader_options;
159 	int			  r;
160 
161 	while (*bsdtar->argv) {
162 		if (archive_match_include_pattern(bsdtar->matching,
163 		    *bsdtar->argv) != ARCHIVE_OK)
164 			lafe_errc(1, 0, "Error inclusion pattern: %s",
165 			    archive_error_string(bsdtar->matching));
166 		bsdtar->argv++;
167 	}
168 
169 	if (bsdtar->names_from_file != NULL)
170 		if (archive_match_include_pattern_from_file(
171 		    bsdtar->matching, bsdtar->names_from_file,
172 		    bsdtar->option_null) != ARCHIVE_OK)
173 			lafe_errc(1, 0, "Error inclusion pattern: %s",
174 			    archive_error_string(bsdtar->matching));
175 
176 	a = archive_read_new();
177 	if (cset_read_support_filter_program(bsdtar->cset, a) == 0)
178 		archive_read_support_filter_all(a);
179 	archive_read_support_format_all(a);
180 
181 	reader_options = getenv(ENV_READER_OPTIONS);
182 	if (reader_options != NULL) {
183 		char *p;
184 		/* Set default read options. */
185 		p = malloc(sizeof(IGNORE_WRONG_MODULE_NAME)
186 		    + strlen(reader_options) + 1);
187 		if (p == NULL)
188 			lafe_errc(1, errno, "Out of memory");
189 		/* Prepend magic code to ignore options for
190 		 * a format or  modules which are not added to
191 		 *  the archive read object. */
192 		strncpy(p, IGNORE_WRONG_MODULE_NAME,
193 		    sizeof(IGNORE_WRONG_MODULE_NAME) -1);
194 		strcpy(p + sizeof(IGNORE_WRONG_MODULE_NAME) -1, reader_options);
195 		r = archive_read_set_options(a, p);
196 		free(p);
197 		if (r == ARCHIVE_FATAL)
198 			lafe_errc(1, 0, "%s", archive_error_string(a));
199 		else
200 			archive_clear_error(a);
201 	}
202 	if (ARCHIVE_OK != archive_read_set_options(a, bsdtar->option_options))
203 		lafe_errc(1, 0, "%s", archive_error_string(a));
204 	if (archive_read_open_filename(a, bsdtar->filename,
205 					bsdtar->bytes_per_block))
206 		lafe_errc(1, 0, "Error opening archive: %s",
207 		    archive_error_string(a));
208 
209 	do_chdir(bsdtar);
210 
211 	if (mode == 'x') {
212 		/* Set an extract callback so that we can handle SIGINFO. */
213 		progress_data.bsdtar = bsdtar;
214 		progress_data.archive = a;
215 		archive_read_extract_set_progress_callback(a, progress_func,
216 		    &progress_data);
217 	}
218 
219 	if (mode == 'x' && bsdtar->option_chroot) {
220 #if HAVE_CHROOT
221 		if (chroot(".") != 0)
222 			lafe_errc(1, errno, "Can't chroot to \".\"");
223 #else
224 		lafe_errc(1, 0,
225 		    "chroot isn't supported on this platform");
226 #endif
227 	}
228 
229 	for (;;) {
230 		/* Support --fast-read option */
231 		if (bsdtar->option_fast_read &&
232 		    archive_match_path_unmatched_inclusions(bsdtar->matching) == 0)
233 			break;
234 
235 		r = archive_read_next_header(a, &entry);
236 		progress_data.entry = entry;
237 		if (r == ARCHIVE_EOF)
238 			break;
239 		if (r < ARCHIVE_OK)
240 			lafe_warnc(0, "%s", archive_error_string(a));
241 		if (r <= ARCHIVE_WARN)
242 			bsdtar->return_value = 1;
243 		if (r == ARCHIVE_RETRY) {
244 			/* Retryable error: try again */
245 			lafe_warnc(0, "Retrying...");
246 			continue;
247 		}
248 		if (r == ARCHIVE_FATAL)
249 			break;
250 
251 		if (bsdtar->uid >= 0) {
252 			archive_entry_set_uid(entry, bsdtar->uid);
253 			archive_entry_set_uname(entry, NULL);
254 		}
255 		if (bsdtar->gid >= 0) {
256 			archive_entry_set_gid(entry, bsdtar->gid);
257 			archive_entry_set_gname(entry, NULL);
258 		}
259 		if (bsdtar->uname)
260 			archive_entry_set_uname(entry, bsdtar->uname);
261 		if (bsdtar->gname)
262 			archive_entry_set_gname(entry, bsdtar->gname);
263 
264 		/*
265 		 * Note that pattern exclusions are checked before
266 		 * pathname rewrites are handled.  This gives more
267 		 * control over exclusions, since rewrites always lose
268 		 * information.  (For example, consider a rewrite
269 		 * s/foo[0-9]/foo/.  If we check exclusions after the
270 		 * rewrite, there would be no way to exclude foo1/bar
271 		 * while allowing foo2/bar.)
272 		 */
273 		if (archive_match_excluded(bsdtar->matching, entry))
274 			continue; /* Excluded by a pattern test. */
275 
276 		if (mode == 't') {
277 			/* Perversely, gtar uses -O to mean "send to stderr"
278 			 * when used with -t. */
279 			out = bsdtar->option_stdout ? stderr : stdout;
280 
281 			/*
282 			 * TODO: Provide some reasonable way to
283 			 * preview rewrites.  gtar always displays
284 			 * the unedited path in -t output, which means
285 			 * you cannot easily preview rewrites.
286 			 */
287 			if (bsdtar->verbose < 2)
288 				safe_fprintf(out, "%s",
289 				    archive_entry_pathname(entry));
290 			else
291 				list_item_verbose(bsdtar, out, entry);
292 			fflush(out);
293 			r = archive_read_data_skip(a);
294 			if (r == ARCHIVE_WARN) {
295 				fprintf(out, "\n");
296 				lafe_warnc(0, "%s",
297 				    archive_error_string(a));
298 			}
299 			if (r == ARCHIVE_RETRY) {
300 				fprintf(out, "\n");
301 				lafe_warnc(0, "%s",
302 				    archive_error_string(a));
303 			}
304 			if (r == ARCHIVE_FATAL) {
305 				fprintf(out, "\n");
306 				lafe_warnc(0, "%s",
307 				    archive_error_string(a));
308 				bsdtar->return_value = 1;
309 				break;
310 			}
311 			fprintf(out, "\n");
312 		} else {
313 			/* Note: some rewrite failures prevent extraction. */
314 			if (edit_pathname(bsdtar, entry))
315 				continue; /* Excluded by a rewrite failure. */
316 
317 			if (bsdtar->option_interactive &&
318 			    !yes("extract '%s'", archive_entry_pathname(entry)))
319 				continue;
320 
321 			/*
322 			 * Format here is from SUSv2, including the
323 			 * deferred '\n'.
324 			 */
325 			if (bsdtar->verbose) {
326 				safe_fprintf(stderr, "x %s",
327 				    archive_entry_pathname(entry));
328 				fflush(stderr);
329 			}
330 
331 			/* TODO siginfo_printinfo(bsdtar, 0); */
332 
333 			if (bsdtar->option_stdout)
334 				r = archive_read_data_into_fd(a, 1);
335 			else
336 				r = archive_read_extract2(a, entry, writer);
337 			if (r != ARCHIVE_OK) {
338 				if (!bsdtar->verbose)
339 					safe_fprintf(stderr, "%s",
340 					    archive_entry_pathname(entry));
341 				safe_fprintf(stderr, ": %s",
342 				    archive_error_string(a));
343 				if (!bsdtar->verbose)
344 					fprintf(stderr, "\n");
345 				bsdtar->return_value = 1;
346 			}
347 			if (bsdtar->verbose)
348 				fprintf(stderr, "\n");
349 			if (r == ARCHIVE_FATAL)
350 				break;
351 		}
352 	}
353 
354 
355 	r = archive_read_close(a);
356 	if (r != ARCHIVE_OK)
357 		lafe_warnc(0, "%s", archive_error_string(a));
358 	if (r <= ARCHIVE_WARN)
359 		bsdtar->return_value = 1;
360 
361 	if (bsdtar->verbose > 2)
362 		fprintf(stdout, "Archive Format: %s,  Compression: %s\n",
363 		    archive_format_name(a), archive_filter_name(a, 0));
364 
365 	archive_read_free(a);
366 }
367 
368 
369 /*
370  * Display information about the current file.
371  *
372  * The format here roughly duplicates the output of 'ls -l'.
373  * This is based on SUSv2, where 'tar tv' is documented as
374  * listing additional information in an "unspecified format,"
375  * and 'pax -l' is documented as using the same format as 'ls -l'.
376  */
377 static void
378 list_item_verbose(struct bsdtar *bsdtar, FILE *out, struct archive_entry *entry)
379 {
380 	char			 tmp[100];
381 	size_t			 w;
382 	const char		*p;
383 	const char		*fmt;
384 	time_t			 tim;
385 	static time_t		 now;
386 
387 	/*
388 	 * We avoid collecting the entire list in memory at once by
389 	 * listing things as we see them.  However, that also means we can't
390 	 * just pre-compute the field widths.  Instead, we start with guesses
391 	 * and just widen them as necessary.  These numbers are completely
392 	 * arbitrary.
393 	 */
394 	if (!bsdtar->u_width) {
395 		bsdtar->u_width = 6;
396 		bsdtar->gs_width = 13;
397 	}
398 	if (!now)
399 		time(&now);
400 	fprintf(out, "%s %d ",
401 	    archive_entry_strmode(entry),
402 	    archive_entry_nlink(entry));
403 
404 	/* Use uname if it's present, else uid. */
405 	p = archive_entry_uname(entry);
406 	if ((p == NULL) || (*p == '\0')) {
407 		sprintf(tmp, "%lu ",
408 		    (unsigned long)archive_entry_uid(entry));
409 		p = tmp;
410 	}
411 	w = strlen(p);
412 	if (w > bsdtar->u_width)
413 		bsdtar->u_width = w;
414 	fprintf(out, "%-*s ", (int)bsdtar->u_width, p);
415 
416 	/* Use gname if it's present, else gid. */
417 	p = archive_entry_gname(entry);
418 	if (p != NULL && p[0] != '\0') {
419 		fprintf(out, "%s", p);
420 		w = strlen(p);
421 	} else {
422 		sprintf(tmp, "%lu",
423 		    (unsigned long)archive_entry_gid(entry));
424 		w = strlen(tmp);
425 		fprintf(out, "%s", tmp);
426 	}
427 
428 	/*
429 	 * Print device number or file size, right-aligned so as to make
430 	 * total width of group and devnum/filesize fields be gs_width.
431 	 * If gs_width is too small, grow it.
432 	 */
433 	if (archive_entry_filetype(entry) == AE_IFCHR
434 	    || archive_entry_filetype(entry) == AE_IFBLK) {
435 		sprintf(tmp, "%lu,%lu",
436 		    (unsigned long)archive_entry_rdevmajor(entry),
437 		    (unsigned long)archive_entry_rdevminor(entry));
438 	} else {
439 		strcpy(tmp, tar_i64toa(archive_entry_size(entry)));
440 	}
441 	if (w + strlen(tmp) >= bsdtar->gs_width)
442 		bsdtar->gs_width = w+strlen(tmp)+1;
443 	fprintf(out, "%*s", (int)(bsdtar->gs_width - w), tmp);
444 
445 	/* Format the time using 'ls -l' conventions. */
446 	tim = archive_entry_mtime(entry);
447 #define	HALF_YEAR (time_t)365 * 86400 / 2
448 #if defined(_WIN32) && !defined(__CYGWIN__)
449 #define	DAY_FMT  "%d"  /* Windows' strftime function does not support %e format. */
450 #else
451 #define	DAY_FMT  "%e"  /* Day number without leading zeros */
452 #endif
453 	if (tim < now - HALF_YEAR || tim > now + HALF_YEAR)
454 		fmt = bsdtar->day_first ? DAY_FMT " %b  %Y" : "%b " DAY_FMT "  %Y";
455 	else
456 		fmt = bsdtar->day_first ? DAY_FMT " %b %H:%M" : "%b " DAY_FMT " %H:%M";
457 	strftime(tmp, sizeof(tmp), fmt, localtime(&tim));
458 	fprintf(out, " %s ", tmp);
459 	safe_fprintf(out, "%s", archive_entry_pathname(entry));
460 
461 	/* Extra information for links. */
462 	if (archive_entry_hardlink(entry)) /* Hard link */
463 		safe_fprintf(out, " link to %s",
464 		    archive_entry_hardlink(entry));
465 	else if (archive_entry_symlink(entry)) /* Symbolic link */
466 		safe_fprintf(out, " -> %s", archive_entry_symlink(entry));
467 }
468 
469 static int
470 unmatched_inclusions_warn(struct archive *matching, const char *msg)
471 {
472 	const char *p;
473 	int r;
474 
475 	if (matching == NULL)
476 		return (0);
477 
478 	while ((r = archive_match_path_unmatched_inclusions_next(
479 	    matching, &p)) == ARCHIVE_OK)
480 		lafe_warnc(0, "%s: %s", p, msg);
481 	if (r == ARCHIVE_FATAL)
482 		lafe_errc(1, errno, "Out of memory");
483 
484 	return (archive_match_path_unmatched_inclusions(matching));
485 }
486