xref: /freebsd/contrib/libarchive/tar/util.c (revision caf54c4f6ce7f1612a528170c1aa3c3fe4f8b153)
1*caf54c4fSMartin Matuska /*-
2*caf54c4fSMartin Matuska  * Copyright (c) 2003-2007 Tim Kientzle
3*caf54c4fSMartin Matuska  * All rights reserved.
4*caf54c4fSMartin Matuska  *
5*caf54c4fSMartin Matuska  * Redistribution and use in source and binary forms, with or without
6*caf54c4fSMartin Matuska  * modification, are permitted provided that the following conditions
7*caf54c4fSMartin Matuska  * are met:
8*caf54c4fSMartin Matuska  * 1. Redistributions of source code must retain the above copyright
9*caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer.
10*caf54c4fSMartin Matuska  * 2. Redistributions in binary form must reproduce the above copyright
11*caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer in the
12*caf54c4fSMartin Matuska  *    documentation and/or other materials provided with the distribution.
13*caf54c4fSMartin Matuska  *
14*caf54c4fSMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15*caf54c4fSMartin Matuska  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16*caf54c4fSMartin Matuska  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17*caf54c4fSMartin Matuska  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18*caf54c4fSMartin Matuska  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19*caf54c4fSMartin Matuska  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20*caf54c4fSMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21*caf54c4fSMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22*caf54c4fSMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23*caf54c4fSMartin Matuska  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24*caf54c4fSMartin Matuska  */
25*caf54c4fSMartin Matuska 
26*caf54c4fSMartin Matuska #include "bsdtar_platform.h"
27*caf54c4fSMartin Matuska __FBSDID("$FreeBSD: src/usr.bin/tar/util.c,v 1.23 2008/12/15 06:00:25 kientzle Exp $");
28*caf54c4fSMartin Matuska 
29*caf54c4fSMartin Matuska #ifdef HAVE_SYS_STAT_H
30*caf54c4fSMartin Matuska #include <sys/stat.h>
31*caf54c4fSMartin Matuska #endif
32*caf54c4fSMartin Matuska #ifdef HAVE_SYS_TYPES_H
33*caf54c4fSMartin Matuska #include <sys/types.h>  /* Linux doesn't define mode_t, etc. in sys/stat.h. */
34*caf54c4fSMartin Matuska #endif
35*caf54c4fSMartin Matuska #include <ctype.h>
36*caf54c4fSMartin Matuska #ifdef HAVE_ERRNO_H
37*caf54c4fSMartin Matuska #include <errno.h>
38*caf54c4fSMartin Matuska #endif
39*caf54c4fSMartin Matuska #ifdef HAVE_IO_H
40*caf54c4fSMartin Matuska #include <io.h>
41*caf54c4fSMartin Matuska #endif
42*caf54c4fSMartin Matuska #ifdef HAVE_STDARG_H
43*caf54c4fSMartin Matuska #include <stdarg.h>
44*caf54c4fSMartin Matuska #endif
45*caf54c4fSMartin Matuska #ifdef HAVE_STDINT_H
46*caf54c4fSMartin Matuska #include <stdint.h>
47*caf54c4fSMartin Matuska #endif
48*caf54c4fSMartin Matuska #include <stdio.h>
49*caf54c4fSMartin Matuska #ifdef HAVE_STDLIB_H
50*caf54c4fSMartin Matuska #include <stdlib.h>
51*caf54c4fSMartin Matuska #endif
52*caf54c4fSMartin Matuska #ifdef HAVE_STRING_H
53*caf54c4fSMartin Matuska #include <string.h>
54*caf54c4fSMartin Matuska #endif
55*caf54c4fSMartin Matuska #ifdef HAVE_WCTYPE_H
56*caf54c4fSMartin Matuska #include <wctype.h>
57*caf54c4fSMartin Matuska #else
58*caf54c4fSMartin Matuska /* If we don't have wctype, we need to hack up some version of iswprint(). */
59*caf54c4fSMartin Matuska #define iswprint isprint
60*caf54c4fSMartin Matuska #endif
61*caf54c4fSMartin Matuska 
62*caf54c4fSMartin Matuska #include "bsdtar.h"
63*caf54c4fSMartin Matuska #include "err.h"
64*caf54c4fSMartin Matuska 
65*caf54c4fSMartin Matuska static size_t	bsdtar_expand_char(char *, size_t, char);
66*caf54c4fSMartin Matuska static const char *strip_components(const char *path, int elements);
67*caf54c4fSMartin Matuska 
68*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__)
69*caf54c4fSMartin Matuska #define read _read
70*caf54c4fSMartin Matuska #endif
71*caf54c4fSMartin Matuska 
72*caf54c4fSMartin Matuska /* TODO:  Hack up a version of mbtowc for platforms with no wide
73*caf54c4fSMartin Matuska  * character support at all.  I think the following might suffice,
74*caf54c4fSMartin Matuska  * but it needs careful testing.
75*caf54c4fSMartin Matuska  * #if !HAVE_MBTOWC
76*caf54c4fSMartin Matuska  * #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
77*caf54c4fSMartin Matuska  * #endif
78*caf54c4fSMartin Matuska  */
79*caf54c4fSMartin Matuska 
80*caf54c4fSMartin Matuska /*
81*caf54c4fSMartin Matuska  * Print a string, taking care with any non-printable characters.
82*caf54c4fSMartin Matuska  *
83*caf54c4fSMartin Matuska  * Note that we use a stack-allocated buffer to receive the formatted
84*caf54c4fSMartin Matuska  * string if we can.  This is partly performance (avoiding a call to
85*caf54c4fSMartin Matuska  * malloc()), partly out of expedience (we have to call vsnprintf()
86*caf54c4fSMartin Matuska  * before malloc() anyway to find out how big a buffer we need; we may
87*caf54c4fSMartin Matuska  * as well point that first call at a small local buffer in case it
88*caf54c4fSMartin Matuska  * works), but mostly for safety (so we can use this to print messages
89*caf54c4fSMartin Matuska  * about out-of-memory conditions).
90*caf54c4fSMartin Matuska  */
91*caf54c4fSMartin Matuska 
92*caf54c4fSMartin Matuska void
93*caf54c4fSMartin Matuska safe_fprintf(FILE *f, const char *fmt, ...)
94*caf54c4fSMartin Matuska {
95*caf54c4fSMartin Matuska 	char fmtbuff_stack[256]; /* Place to format the printf() string. */
96*caf54c4fSMartin Matuska 	char outbuff[256]; /* Buffer for outgoing characters. */
97*caf54c4fSMartin Matuska 	char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
98*caf54c4fSMartin Matuska 	char *fmtbuff;  /* Pointer to fmtbuff_stack or fmtbuff_heap. */
99*caf54c4fSMartin Matuska 	int fmtbuff_length;
100*caf54c4fSMartin Matuska 	int length, n;
101*caf54c4fSMartin Matuska 	va_list ap;
102*caf54c4fSMartin Matuska 	const char *p;
103*caf54c4fSMartin Matuska 	unsigned i;
104*caf54c4fSMartin Matuska 	wchar_t wc;
105*caf54c4fSMartin Matuska 	char try_wc;
106*caf54c4fSMartin Matuska 
107*caf54c4fSMartin Matuska 	/* Use a stack-allocated buffer if we can, for speed and safety. */
108*caf54c4fSMartin Matuska 	fmtbuff_heap = NULL;
109*caf54c4fSMartin Matuska 	fmtbuff_length = sizeof(fmtbuff_stack);
110*caf54c4fSMartin Matuska 	fmtbuff = fmtbuff_stack;
111*caf54c4fSMartin Matuska 
112*caf54c4fSMartin Matuska 	/* Try formatting into the stack buffer. */
113*caf54c4fSMartin Matuska 	va_start(ap, fmt);
114*caf54c4fSMartin Matuska 	length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
115*caf54c4fSMartin Matuska 	va_end(ap);
116*caf54c4fSMartin Matuska 
117*caf54c4fSMartin Matuska 	/* If the result was too large, allocate a buffer on the heap. */
118*caf54c4fSMartin Matuska 	if (length >= fmtbuff_length) {
119*caf54c4fSMartin Matuska 		fmtbuff_length = length+1;
120*caf54c4fSMartin Matuska 		fmtbuff_heap = malloc(fmtbuff_length);
121*caf54c4fSMartin Matuska 
122*caf54c4fSMartin Matuska 		/* Reformat the result into the heap buffer if we can. */
123*caf54c4fSMartin Matuska 		if (fmtbuff_heap != NULL) {
124*caf54c4fSMartin Matuska 			fmtbuff = fmtbuff_heap;
125*caf54c4fSMartin Matuska 			va_start(ap, fmt);
126*caf54c4fSMartin Matuska 			length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
127*caf54c4fSMartin Matuska 			va_end(ap);
128*caf54c4fSMartin Matuska 		} else {
129*caf54c4fSMartin Matuska 			/* Leave fmtbuff pointing to the truncated
130*caf54c4fSMartin Matuska 			 * string in fmtbuff_stack. */
131*caf54c4fSMartin Matuska 			length = sizeof(fmtbuff_stack) - 1;
132*caf54c4fSMartin Matuska 		}
133*caf54c4fSMartin Matuska 	}
134*caf54c4fSMartin Matuska 
135*caf54c4fSMartin Matuska 	/* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
136*caf54c4fSMartin Matuska 	 * more portable, so we use that here instead. */
137*caf54c4fSMartin Matuska 	n = mbtowc(NULL, NULL, 1); /* Reset the shift state. */
138*caf54c4fSMartin Matuska 
139*caf54c4fSMartin Matuska 	/* Write data, expanding unprintable characters. */
140*caf54c4fSMartin Matuska 	p = fmtbuff;
141*caf54c4fSMartin Matuska 	i = 0;
142*caf54c4fSMartin Matuska 	try_wc = 1;
143*caf54c4fSMartin Matuska 	while (*p != '\0') {
144*caf54c4fSMartin Matuska 
145*caf54c4fSMartin Matuska 		/* Convert to wide char, test if the wide
146*caf54c4fSMartin Matuska 		 * char is printable in the current locale. */
147*caf54c4fSMartin Matuska 		if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
148*caf54c4fSMartin Matuska 			length -= n;
149*caf54c4fSMartin Matuska 			if (iswprint(wc) && wc != L'\\') {
150*caf54c4fSMartin Matuska 				/* Printable, copy the bytes through. */
151*caf54c4fSMartin Matuska 				while (n-- > 0)
152*caf54c4fSMartin Matuska 					outbuff[i++] = *p++;
153*caf54c4fSMartin Matuska 			} else {
154*caf54c4fSMartin Matuska 				/* Not printable, format the bytes. */
155*caf54c4fSMartin Matuska 				while (n-- > 0)
156*caf54c4fSMartin Matuska 					i += (unsigned)bsdtar_expand_char(
157*caf54c4fSMartin Matuska 					    outbuff, i, *p++);
158*caf54c4fSMartin Matuska 			}
159*caf54c4fSMartin Matuska 		} else {
160*caf54c4fSMartin Matuska 			/* After any conversion failure, don't bother
161*caf54c4fSMartin Matuska 			 * trying to convert the rest. */
162*caf54c4fSMartin Matuska 			i += (unsigned)bsdtar_expand_char(outbuff, i, *p++);
163*caf54c4fSMartin Matuska 			try_wc = 0;
164*caf54c4fSMartin Matuska 		}
165*caf54c4fSMartin Matuska 
166*caf54c4fSMartin Matuska 		/* If our output buffer is full, dump it and keep going. */
167*caf54c4fSMartin Matuska 		if (i > (sizeof(outbuff) - 20)) {
168*caf54c4fSMartin Matuska 			outbuff[i] = '\0';
169*caf54c4fSMartin Matuska 			fprintf(f, "%s", outbuff);
170*caf54c4fSMartin Matuska 			i = 0;
171*caf54c4fSMartin Matuska 		}
172*caf54c4fSMartin Matuska 	}
173*caf54c4fSMartin Matuska 	outbuff[i] = '\0';
174*caf54c4fSMartin Matuska 	fprintf(f, "%s", outbuff);
175*caf54c4fSMartin Matuska 
176*caf54c4fSMartin Matuska 	/* If we allocated a heap-based formatting buffer, free it now. */
177*caf54c4fSMartin Matuska 	if (fmtbuff_heap != NULL)
178*caf54c4fSMartin Matuska 		free(fmtbuff_heap);
179*caf54c4fSMartin Matuska }
180*caf54c4fSMartin Matuska 
181*caf54c4fSMartin Matuska /*
182*caf54c4fSMartin Matuska  * Render an arbitrary sequence of bytes into printable ASCII characters.
183*caf54c4fSMartin Matuska  */
184*caf54c4fSMartin Matuska static size_t
185*caf54c4fSMartin Matuska bsdtar_expand_char(char *buff, size_t offset, char c)
186*caf54c4fSMartin Matuska {
187*caf54c4fSMartin Matuska 	size_t i = offset;
188*caf54c4fSMartin Matuska 
189*caf54c4fSMartin Matuska 	if (isprint((unsigned char)c) && c != '\\')
190*caf54c4fSMartin Matuska 		buff[i++] = c;
191*caf54c4fSMartin Matuska 	else {
192*caf54c4fSMartin Matuska 		buff[i++] = '\\';
193*caf54c4fSMartin Matuska 		switch (c) {
194*caf54c4fSMartin Matuska 		case '\a': buff[i++] = 'a'; break;
195*caf54c4fSMartin Matuska 		case '\b': buff[i++] = 'b'; break;
196*caf54c4fSMartin Matuska 		case '\f': buff[i++] = 'f'; break;
197*caf54c4fSMartin Matuska 		case '\n': buff[i++] = 'n'; break;
198*caf54c4fSMartin Matuska #if '\r' != '\n'
199*caf54c4fSMartin Matuska 		/* On some platforms, \n and \r are the same. */
200*caf54c4fSMartin Matuska 		case '\r': buff[i++] = 'r'; break;
201*caf54c4fSMartin Matuska #endif
202*caf54c4fSMartin Matuska 		case '\t': buff[i++] = 't'; break;
203*caf54c4fSMartin Matuska 		case '\v': buff[i++] = 'v'; break;
204*caf54c4fSMartin Matuska 		case '\\': buff[i++] = '\\'; break;
205*caf54c4fSMartin Matuska 		default:
206*caf54c4fSMartin Matuska 			sprintf(buff + i, "%03o", 0xFF & (int)c);
207*caf54c4fSMartin Matuska 			i += 3;
208*caf54c4fSMartin Matuska 		}
209*caf54c4fSMartin Matuska 	}
210*caf54c4fSMartin Matuska 
211*caf54c4fSMartin Matuska 	return (i - offset);
212*caf54c4fSMartin Matuska }
213*caf54c4fSMartin Matuska 
214*caf54c4fSMartin Matuska int
215*caf54c4fSMartin Matuska yes(const char *fmt, ...)
216*caf54c4fSMartin Matuska {
217*caf54c4fSMartin Matuska 	char buff[32];
218*caf54c4fSMartin Matuska 	char *p;
219*caf54c4fSMartin Matuska 	ssize_t l;
220*caf54c4fSMartin Matuska 
221*caf54c4fSMartin Matuska 	va_list ap;
222*caf54c4fSMartin Matuska 	va_start(ap, fmt);
223*caf54c4fSMartin Matuska 	vfprintf(stderr, fmt, ap);
224*caf54c4fSMartin Matuska 	va_end(ap);
225*caf54c4fSMartin Matuska 	fprintf(stderr, " (y/N)? ");
226*caf54c4fSMartin Matuska 	fflush(stderr);
227*caf54c4fSMartin Matuska 
228*caf54c4fSMartin Matuska 	l = read(2, buff, sizeof(buff) - 1);
229*caf54c4fSMartin Matuska 	if (l <= 0)
230*caf54c4fSMartin Matuska 		return (0);
231*caf54c4fSMartin Matuska 	buff[l] = 0;
232*caf54c4fSMartin Matuska 
233*caf54c4fSMartin Matuska 	for (p = buff; *p != '\0'; p++) {
234*caf54c4fSMartin Matuska 		if (isspace((unsigned char)*p))
235*caf54c4fSMartin Matuska 			continue;
236*caf54c4fSMartin Matuska 		switch(*p) {
237*caf54c4fSMartin Matuska 		case 'y': case 'Y':
238*caf54c4fSMartin Matuska 			return (1);
239*caf54c4fSMartin Matuska 		case 'n': case 'N':
240*caf54c4fSMartin Matuska 			return (0);
241*caf54c4fSMartin Matuska 		default:
242*caf54c4fSMartin Matuska 			return (0);
243*caf54c4fSMartin Matuska 		}
244*caf54c4fSMartin Matuska 	}
245*caf54c4fSMartin Matuska 
246*caf54c4fSMartin Matuska 	return (0);
247*caf54c4fSMartin Matuska }
248*caf54c4fSMartin Matuska 
249*caf54c4fSMartin Matuska /*-
250*caf54c4fSMartin Matuska  * The logic here for -C <dir> attempts to avoid
251*caf54c4fSMartin Matuska  * chdir() as long as possible.  For example:
252*caf54c4fSMartin Matuska  * "-C /foo -C /bar file"          needs chdir("/bar") but not chdir("/foo")
253*caf54c4fSMartin Matuska  * "-C /foo -C bar file"           needs chdir("/foo/bar")
254*caf54c4fSMartin Matuska  * "-C /foo -C bar /file1"         does not need chdir()
255*caf54c4fSMartin Matuska  * "-C /foo -C bar /file1 file2"   needs chdir("/foo/bar") before file2
256*caf54c4fSMartin Matuska  *
257*caf54c4fSMartin Matuska  * The only correct way to handle this is to record a "pending" chdir
258*caf54c4fSMartin Matuska  * request and combine multiple requests intelligently until we
259*caf54c4fSMartin Matuska  * need to process a non-absolute file.  set_chdir() adds the new dir
260*caf54c4fSMartin Matuska  * to the pending list; do_chdir() actually executes any pending chdir.
261*caf54c4fSMartin Matuska  *
262*caf54c4fSMartin Matuska  * This way, programs that build tar command lines don't have to worry
263*caf54c4fSMartin Matuska  * about -C with non-existent directories; such requests will only
264*caf54c4fSMartin Matuska  * fail if the directory must be accessed.
265*caf54c4fSMartin Matuska  *
266*caf54c4fSMartin Matuska  * TODO: Make this handle Windows paths correctly.
267*caf54c4fSMartin Matuska  */
268*caf54c4fSMartin Matuska void
269*caf54c4fSMartin Matuska set_chdir(struct bsdtar *bsdtar, const char *newdir)
270*caf54c4fSMartin Matuska {
271*caf54c4fSMartin Matuska 	if (newdir[0] == '/') {
272*caf54c4fSMartin Matuska 		/* The -C /foo -C /bar case; dump first one. */
273*caf54c4fSMartin Matuska 		free(bsdtar->pending_chdir);
274*caf54c4fSMartin Matuska 		bsdtar->pending_chdir = NULL;
275*caf54c4fSMartin Matuska 	}
276*caf54c4fSMartin Matuska 	if (bsdtar->pending_chdir == NULL)
277*caf54c4fSMartin Matuska 		/* Easy case: no previously-saved dir. */
278*caf54c4fSMartin Matuska 		bsdtar->pending_chdir = strdup(newdir);
279*caf54c4fSMartin Matuska 	else {
280*caf54c4fSMartin Matuska 		/* The -C /foo -C bar case; concatenate */
281*caf54c4fSMartin Matuska 		char *old_pending = bsdtar->pending_chdir;
282*caf54c4fSMartin Matuska 		size_t old_len = strlen(old_pending);
283*caf54c4fSMartin Matuska 		bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2);
284*caf54c4fSMartin Matuska 		if (old_pending[old_len - 1] == '/')
285*caf54c4fSMartin Matuska 			old_pending[old_len - 1] = '\0';
286*caf54c4fSMartin Matuska 		if (bsdtar->pending_chdir != NULL)
287*caf54c4fSMartin Matuska 			sprintf(bsdtar->pending_chdir, "%s/%s",
288*caf54c4fSMartin Matuska 			    old_pending, newdir);
289*caf54c4fSMartin Matuska 		free(old_pending);
290*caf54c4fSMartin Matuska 	}
291*caf54c4fSMartin Matuska 	if (bsdtar->pending_chdir == NULL)
292*caf54c4fSMartin Matuska 		lafe_errc(1, errno, "No memory");
293*caf54c4fSMartin Matuska }
294*caf54c4fSMartin Matuska 
295*caf54c4fSMartin Matuska void
296*caf54c4fSMartin Matuska do_chdir(struct bsdtar *bsdtar)
297*caf54c4fSMartin Matuska {
298*caf54c4fSMartin Matuska 	if (bsdtar->pending_chdir == NULL)
299*caf54c4fSMartin Matuska 		return;
300*caf54c4fSMartin Matuska 
301*caf54c4fSMartin Matuska 	if (chdir(bsdtar->pending_chdir) != 0) {
302*caf54c4fSMartin Matuska 		lafe_errc(1, 0, "could not chdir to '%s'\n",
303*caf54c4fSMartin Matuska 		    bsdtar->pending_chdir);
304*caf54c4fSMartin Matuska 	}
305*caf54c4fSMartin Matuska 	free(bsdtar->pending_chdir);
306*caf54c4fSMartin Matuska 	bsdtar->pending_chdir = NULL;
307*caf54c4fSMartin Matuska }
308*caf54c4fSMartin Matuska 
309*caf54c4fSMartin Matuska static const char *
310*caf54c4fSMartin Matuska strip_components(const char *p, int elements)
311*caf54c4fSMartin Matuska {
312*caf54c4fSMartin Matuska 	/* Skip as many elements as necessary. */
313*caf54c4fSMartin Matuska 	while (elements > 0) {
314*caf54c4fSMartin Matuska 		switch (*p++) {
315*caf54c4fSMartin Matuska 		case '/':
316*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__)
317*caf54c4fSMartin Matuska 		case '\\': /* Support \ path sep on Windows ONLY. */
318*caf54c4fSMartin Matuska #endif
319*caf54c4fSMartin Matuska 			elements--;
320*caf54c4fSMartin Matuska 			break;
321*caf54c4fSMartin Matuska 		case '\0':
322*caf54c4fSMartin Matuska 			/* Path is too short, skip it. */
323*caf54c4fSMartin Matuska 			return (NULL);
324*caf54c4fSMartin Matuska 		}
325*caf54c4fSMartin Matuska 	}
326*caf54c4fSMartin Matuska 
327*caf54c4fSMartin Matuska 	/* Skip any / characters.  This handles short paths that have
328*caf54c4fSMartin Matuska 	 * additional / termination.  This also handles the case where
329*caf54c4fSMartin Matuska 	 * the logic above stops in the middle of a duplicate //
330*caf54c4fSMartin Matuska 	 * sequence (which would otherwise get converted to an
331*caf54c4fSMartin Matuska 	 * absolute path). */
332*caf54c4fSMartin Matuska 	for (;;) {
333*caf54c4fSMartin Matuska 		switch (*p) {
334*caf54c4fSMartin Matuska 		case '/':
335*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__)
336*caf54c4fSMartin Matuska 		case '\\': /* Support \ path sep on Windows ONLY. */
337*caf54c4fSMartin Matuska #endif
338*caf54c4fSMartin Matuska 			++p;
339*caf54c4fSMartin Matuska 			break;
340*caf54c4fSMartin Matuska 		case '\0':
341*caf54c4fSMartin Matuska 			return (NULL);
342*caf54c4fSMartin Matuska 		default:
343*caf54c4fSMartin Matuska 			return (p);
344*caf54c4fSMartin Matuska 		}
345*caf54c4fSMartin Matuska 	}
346*caf54c4fSMartin Matuska }
347*caf54c4fSMartin Matuska 
348*caf54c4fSMartin Matuska /*
349*caf54c4fSMartin Matuska  * Handle --strip-components and any future path-rewriting options.
350*caf54c4fSMartin Matuska  * Returns non-zero if the pathname should not be extracted.
351*caf54c4fSMartin Matuska  *
352*caf54c4fSMartin Matuska  * TODO: Support pax-style regex path rewrites.
353*caf54c4fSMartin Matuska  */
354*caf54c4fSMartin Matuska int
355*caf54c4fSMartin Matuska edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry)
356*caf54c4fSMartin Matuska {
357*caf54c4fSMartin Matuska 	const char *name = archive_entry_pathname(entry);
358*caf54c4fSMartin Matuska #if HAVE_REGEX_H
359*caf54c4fSMartin Matuska 	char *subst_name;
360*caf54c4fSMartin Matuska 	int r;
361*caf54c4fSMartin Matuska #endif
362*caf54c4fSMartin Matuska 
363*caf54c4fSMartin Matuska #if HAVE_REGEX_H
364*caf54c4fSMartin Matuska 	r = apply_substitution(bsdtar, name, &subst_name, 0);
365*caf54c4fSMartin Matuska 	if (r == -1) {
366*caf54c4fSMartin Matuska 		lafe_warnc(0, "Invalid substitution, skipping entry");
367*caf54c4fSMartin Matuska 		return 1;
368*caf54c4fSMartin Matuska 	}
369*caf54c4fSMartin Matuska 	if (r == 1) {
370*caf54c4fSMartin Matuska 		archive_entry_copy_pathname(entry, subst_name);
371*caf54c4fSMartin Matuska 		if (*subst_name == '\0') {
372*caf54c4fSMartin Matuska 			free(subst_name);
373*caf54c4fSMartin Matuska 			return -1;
374*caf54c4fSMartin Matuska 		} else
375*caf54c4fSMartin Matuska 			free(subst_name);
376*caf54c4fSMartin Matuska 		name = archive_entry_pathname(entry);
377*caf54c4fSMartin Matuska 	}
378*caf54c4fSMartin Matuska 
379*caf54c4fSMartin Matuska 	if (archive_entry_hardlink(entry)) {
380*caf54c4fSMartin Matuska 		r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 1);
381*caf54c4fSMartin Matuska 		if (r == -1) {
382*caf54c4fSMartin Matuska 			lafe_warnc(0, "Invalid substitution, skipping entry");
383*caf54c4fSMartin Matuska 			return 1;
384*caf54c4fSMartin Matuska 		}
385*caf54c4fSMartin Matuska 		if (r == 1) {
386*caf54c4fSMartin Matuska 			archive_entry_copy_hardlink(entry, subst_name);
387*caf54c4fSMartin Matuska 			free(subst_name);
388*caf54c4fSMartin Matuska 		}
389*caf54c4fSMartin Matuska 	}
390*caf54c4fSMartin Matuska 	if (archive_entry_symlink(entry) != NULL) {
391*caf54c4fSMartin Matuska 		r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1);
392*caf54c4fSMartin Matuska 		if (r == -1) {
393*caf54c4fSMartin Matuska 			lafe_warnc(0, "Invalid substitution, skipping entry");
394*caf54c4fSMartin Matuska 			return 1;
395*caf54c4fSMartin Matuska 		}
396*caf54c4fSMartin Matuska 		if (r == 1) {
397*caf54c4fSMartin Matuska 			archive_entry_copy_symlink(entry, subst_name);
398*caf54c4fSMartin Matuska 			free(subst_name);
399*caf54c4fSMartin Matuska 		}
400*caf54c4fSMartin Matuska 	}
401*caf54c4fSMartin Matuska #endif
402*caf54c4fSMartin Matuska 
403*caf54c4fSMartin Matuska 	/* Strip leading dir names as per --strip-components option. */
404*caf54c4fSMartin Matuska 	if (bsdtar->strip_components > 0) {
405*caf54c4fSMartin Matuska 		const char *linkname = archive_entry_hardlink(entry);
406*caf54c4fSMartin Matuska 
407*caf54c4fSMartin Matuska 		name = strip_components(name, bsdtar->strip_components);
408*caf54c4fSMartin Matuska 		if (name == NULL)
409*caf54c4fSMartin Matuska 			return (1);
410*caf54c4fSMartin Matuska 
411*caf54c4fSMartin Matuska 		if (linkname != NULL) {
412*caf54c4fSMartin Matuska 			linkname = strip_components(linkname,
413*caf54c4fSMartin Matuska 			    bsdtar->strip_components);
414*caf54c4fSMartin Matuska 			if (linkname == NULL)
415*caf54c4fSMartin Matuska 				return (1);
416*caf54c4fSMartin Matuska 			archive_entry_copy_hardlink(entry, linkname);
417*caf54c4fSMartin Matuska 		}
418*caf54c4fSMartin Matuska 	}
419*caf54c4fSMartin Matuska 
420*caf54c4fSMartin Matuska 	/* By default, don't write or restore absolute pathnames. */
421*caf54c4fSMartin Matuska 	if (!bsdtar->option_absolute_paths) {
422*caf54c4fSMartin Matuska 		const char *rp, *p = name;
423*caf54c4fSMartin Matuska 		int slashonly = 1;
424*caf54c4fSMartin Matuska 
425*caf54c4fSMartin Matuska 		/* Remove leading "//./" or "//?/" or "//?/UNC/"
426*caf54c4fSMartin Matuska 		 * (absolute path prefixes used by Windows API) */
427*caf54c4fSMartin Matuska 		if ((p[0] == '/' || p[0] == '\\') &&
428*caf54c4fSMartin Matuska 		    (p[1] == '/' || p[1] == '\\') &&
429*caf54c4fSMartin Matuska 		    (p[2] == '.' || p[2] == '?') &&
430*caf54c4fSMartin Matuska 		    (p[3] == '/' || p[3] == '\\'))
431*caf54c4fSMartin Matuska 		{
432*caf54c4fSMartin Matuska 			if (p[2] == '?' &&
433*caf54c4fSMartin Matuska 			    (p[4] == 'U' || p[4] == 'u') &&
434*caf54c4fSMartin Matuska 			    (p[5] == 'N' || p[5] == 'n') &&
435*caf54c4fSMartin Matuska 			    (p[6] == 'C' || p[6] == 'c') &&
436*caf54c4fSMartin Matuska 			    (p[7] == '/' || p[7] == '\\'))
437*caf54c4fSMartin Matuska 				p += 8;
438*caf54c4fSMartin Matuska 			else
439*caf54c4fSMartin Matuska 				p += 4;
440*caf54c4fSMartin Matuska 			slashonly = 0;
441*caf54c4fSMartin Matuska 		}
442*caf54c4fSMartin Matuska 		do {
443*caf54c4fSMartin Matuska 			rp = p;
444*caf54c4fSMartin Matuska 			/* Remove leading drive letter from archives created
445*caf54c4fSMartin Matuska 			 * on Windows. */
446*caf54c4fSMartin Matuska 			if (((p[0] >= 'a' && p[0] <= 'z') ||
447*caf54c4fSMartin Matuska 			     (p[0] >= 'A' && p[0] <= 'Z')) &&
448*caf54c4fSMartin Matuska 				 p[1] == ':') {
449*caf54c4fSMartin Matuska 				p += 2;
450*caf54c4fSMartin Matuska 				slashonly = 0;
451*caf54c4fSMartin Matuska 			}
452*caf54c4fSMartin Matuska 			/* Remove leading "/../", "//", etc. */
453*caf54c4fSMartin Matuska 			while (p[0] == '/' || p[0] == '\\') {
454*caf54c4fSMartin Matuska 				if (p[1] == '.' && p[2] == '.' &&
455*caf54c4fSMartin Matuska 					(p[3] == '/' || p[3] == '\\')) {
456*caf54c4fSMartin Matuska 					p += 3; /* Remove "/..", leave "/"
457*caf54c4fSMartin Matuska 							 * for next pass. */
458*caf54c4fSMartin Matuska 					slashonly = 0;
459*caf54c4fSMartin Matuska 				} else
460*caf54c4fSMartin Matuska 					p += 1; /* Remove "/". */
461*caf54c4fSMartin Matuska 			}
462*caf54c4fSMartin Matuska 		} while (rp != p);
463*caf54c4fSMartin Matuska 
464*caf54c4fSMartin Matuska 		if (p != name && !bsdtar->warned_lead_slash) {
465*caf54c4fSMartin Matuska 			/* Generate a warning the first time this happens. */
466*caf54c4fSMartin Matuska 			if (slashonly)
467*caf54c4fSMartin Matuska 				lafe_warnc(0,
468*caf54c4fSMartin Matuska 				    "Removing leading '%c' from member names",
469*caf54c4fSMartin Matuska 				    name[0]);
470*caf54c4fSMartin Matuska 			else
471*caf54c4fSMartin Matuska 				lafe_warnc(0,
472*caf54c4fSMartin Matuska 				    "Removing leading drive letter from "
473*caf54c4fSMartin Matuska 				    "member names");
474*caf54c4fSMartin Matuska 			bsdtar->warned_lead_slash = 1;
475*caf54c4fSMartin Matuska 		}
476*caf54c4fSMartin Matuska 
477*caf54c4fSMartin Matuska 		/* Special case: Stripping everything yields ".". */
478*caf54c4fSMartin Matuska 		if (*p == '\0')
479*caf54c4fSMartin Matuska 			name = ".";
480*caf54c4fSMartin Matuska 		else
481*caf54c4fSMartin Matuska 			name = p;
482*caf54c4fSMartin Matuska 	} else {
483*caf54c4fSMartin Matuska 		/* Strip redundant leading '/' characters. */
484*caf54c4fSMartin Matuska 		while (name[0] == '/' && name[1] == '/')
485*caf54c4fSMartin Matuska 			name++;
486*caf54c4fSMartin Matuska 	}
487*caf54c4fSMartin Matuska 
488*caf54c4fSMartin Matuska 	/* Safely replace name in archive_entry. */
489*caf54c4fSMartin Matuska 	if (name != archive_entry_pathname(entry)) {
490*caf54c4fSMartin Matuska 		char *q = strdup(name);
491*caf54c4fSMartin Matuska 		archive_entry_copy_pathname(entry, q);
492*caf54c4fSMartin Matuska 		free(q);
493*caf54c4fSMartin Matuska 	}
494*caf54c4fSMartin Matuska 	return (0);
495*caf54c4fSMartin Matuska }
496*caf54c4fSMartin Matuska 
497*caf54c4fSMartin Matuska /*
498*caf54c4fSMartin Matuska  * It would be nice to just use printf() for formatting large numbers,
499*caf54c4fSMartin Matuska  * but the compatibility problems are quite a headache.  Hence the
500*caf54c4fSMartin Matuska  * following simple utility function.
501*caf54c4fSMartin Matuska  */
502*caf54c4fSMartin Matuska const char *
503*caf54c4fSMartin Matuska tar_i64toa(int64_t n0)
504*caf54c4fSMartin Matuska {
505*caf54c4fSMartin Matuska 	static char buff[24];
506*caf54c4fSMartin Matuska 	int64_t n = n0 < 0 ? -n0 : n0;
507*caf54c4fSMartin Matuska 	char *p = buff + sizeof(buff);
508*caf54c4fSMartin Matuska 
509*caf54c4fSMartin Matuska 	*--p = '\0';
510*caf54c4fSMartin Matuska 	do {
511*caf54c4fSMartin Matuska 		*--p = '0' + (int)(n % 10);
512*caf54c4fSMartin Matuska 		n /= 10;
513*caf54c4fSMartin Matuska 	} while (n > 0);
514*caf54c4fSMartin Matuska 	if (n0 < 0)
515*caf54c4fSMartin Matuska 		*--p = '-';
516*caf54c4fSMartin Matuska 	return p;
517*caf54c4fSMartin Matuska }
518*caf54c4fSMartin Matuska 
519*caf54c4fSMartin Matuska /*
520*caf54c4fSMartin Matuska  * Like strcmp(), but try to be a little more aware of the fact that
521*caf54c4fSMartin Matuska  * we're comparing two paths.  Right now, it just handles leading
522*caf54c4fSMartin Matuska  * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
523*caf54c4fSMartin Matuska  *
524*caf54c4fSMartin Matuska  * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
525*caf54c4fSMartin Matuska  * TODO: After this works, push it down into libarchive.
526*caf54c4fSMartin Matuska  * TODO: Publish the path normalization routines in libarchive so
527*caf54c4fSMartin Matuska  * that bsdtar can normalize paths and use fast strcmp() instead
528*caf54c4fSMartin Matuska  * of this.
529*caf54c4fSMartin Matuska  *
530*caf54c4fSMartin Matuska  * Note: This is currently only used within write.c, so should
531*caf54c4fSMartin Matuska  * not handle \ path separators.
532*caf54c4fSMartin Matuska  */
533*caf54c4fSMartin Matuska 
534*caf54c4fSMartin Matuska int
535*caf54c4fSMartin Matuska pathcmp(const char *a, const char *b)
536*caf54c4fSMartin Matuska {
537*caf54c4fSMartin Matuska 	/* Skip leading './' */
538*caf54c4fSMartin Matuska 	if (a[0] == '.' && a[1] == '/' && a[2] != '\0')
539*caf54c4fSMartin Matuska 		a += 2;
540*caf54c4fSMartin Matuska 	if (b[0] == '.' && b[1] == '/' && b[2] != '\0')
541*caf54c4fSMartin Matuska 		b += 2;
542*caf54c4fSMartin Matuska 	/* Find the first difference, or return (0) if none. */
543*caf54c4fSMartin Matuska 	while (*a == *b) {
544*caf54c4fSMartin Matuska 		if (*a == '\0')
545*caf54c4fSMartin Matuska 			return (0);
546*caf54c4fSMartin Matuska 		a++;
547*caf54c4fSMartin Matuska 		b++;
548*caf54c4fSMartin Matuska 	}
549*caf54c4fSMartin Matuska 	/*
550*caf54c4fSMartin Matuska 	 * If one ends in '/' and the other one doesn't,
551*caf54c4fSMartin Matuska 	 * they're the same.
552*caf54c4fSMartin Matuska 	 */
553*caf54c4fSMartin Matuska 	if (a[0] == '/' && a[1] == '\0' && b[0] == '\0')
554*caf54c4fSMartin Matuska 		return (0);
555*caf54c4fSMartin Matuska 	if (a[0] == '\0' && b[0] == '/' && b[1] == '\0')
556*caf54c4fSMartin Matuska 		return (0);
557*caf54c4fSMartin Matuska 	/* They're really different, return the correct sign. */
558*caf54c4fSMartin Matuska 	return (*(const unsigned char *)a - *(const unsigned char *)b);
559*caf54c4fSMartin Matuska }
560