1*caf54c4fSMartin Matuska /*- 2*caf54c4fSMartin Matuska * Copyright (c) 2003-2007 Tim Kientzle 3*caf54c4fSMartin Matuska * All rights reserved. 4*caf54c4fSMartin Matuska * 5*caf54c4fSMartin Matuska * Redistribution and use in source and binary forms, with or without 6*caf54c4fSMartin Matuska * modification, are permitted provided that the following conditions 7*caf54c4fSMartin Matuska * are met: 8*caf54c4fSMartin Matuska * 1. Redistributions of source code must retain the above copyright 9*caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer. 10*caf54c4fSMartin Matuska * 2. Redistributions in binary form must reproduce the above copyright 11*caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer in the 12*caf54c4fSMartin Matuska * documentation and/or other materials provided with the distribution. 13*caf54c4fSMartin Matuska * 14*caf54c4fSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15*caf54c4fSMartin Matuska * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16*caf54c4fSMartin Matuska * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17*caf54c4fSMartin Matuska * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18*caf54c4fSMartin Matuska * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19*caf54c4fSMartin Matuska * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20*caf54c4fSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21*caf54c4fSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22*caf54c4fSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23*caf54c4fSMartin Matuska * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24*caf54c4fSMartin Matuska */ 25*caf54c4fSMartin Matuska 26*caf54c4fSMartin Matuska #include "bsdtar_platform.h" 27*caf54c4fSMartin Matuska __FBSDID("$FreeBSD: src/usr.bin/tar/util.c,v 1.23 2008/12/15 06:00:25 kientzle Exp $"); 28*caf54c4fSMartin Matuska 29*caf54c4fSMartin Matuska #ifdef HAVE_SYS_STAT_H 30*caf54c4fSMartin Matuska #include <sys/stat.h> 31*caf54c4fSMartin Matuska #endif 32*caf54c4fSMartin Matuska #ifdef HAVE_SYS_TYPES_H 33*caf54c4fSMartin Matuska #include <sys/types.h> /* Linux doesn't define mode_t, etc. in sys/stat.h. */ 34*caf54c4fSMartin Matuska #endif 35*caf54c4fSMartin Matuska #include <ctype.h> 36*caf54c4fSMartin Matuska #ifdef HAVE_ERRNO_H 37*caf54c4fSMartin Matuska #include <errno.h> 38*caf54c4fSMartin Matuska #endif 39*caf54c4fSMartin Matuska #ifdef HAVE_IO_H 40*caf54c4fSMartin Matuska #include <io.h> 41*caf54c4fSMartin Matuska #endif 42*caf54c4fSMartin Matuska #ifdef HAVE_STDARG_H 43*caf54c4fSMartin Matuska #include <stdarg.h> 44*caf54c4fSMartin Matuska #endif 45*caf54c4fSMartin Matuska #ifdef HAVE_STDINT_H 46*caf54c4fSMartin Matuska #include <stdint.h> 47*caf54c4fSMartin Matuska #endif 48*caf54c4fSMartin Matuska #include <stdio.h> 49*caf54c4fSMartin Matuska #ifdef HAVE_STDLIB_H 50*caf54c4fSMartin Matuska #include <stdlib.h> 51*caf54c4fSMartin Matuska #endif 52*caf54c4fSMartin Matuska #ifdef HAVE_STRING_H 53*caf54c4fSMartin Matuska #include <string.h> 54*caf54c4fSMartin Matuska #endif 55*caf54c4fSMartin Matuska #ifdef HAVE_WCTYPE_H 56*caf54c4fSMartin Matuska #include <wctype.h> 57*caf54c4fSMartin Matuska #else 58*caf54c4fSMartin Matuska /* If we don't have wctype, we need to hack up some version of iswprint(). */ 59*caf54c4fSMartin Matuska #define iswprint isprint 60*caf54c4fSMartin Matuska #endif 61*caf54c4fSMartin Matuska 62*caf54c4fSMartin Matuska #include "bsdtar.h" 63*caf54c4fSMartin Matuska #include "err.h" 64*caf54c4fSMartin Matuska 65*caf54c4fSMartin Matuska static size_t bsdtar_expand_char(char *, size_t, char); 66*caf54c4fSMartin Matuska static const char *strip_components(const char *path, int elements); 67*caf54c4fSMartin Matuska 68*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) 69*caf54c4fSMartin Matuska #define read _read 70*caf54c4fSMartin Matuska #endif 71*caf54c4fSMartin Matuska 72*caf54c4fSMartin Matuska /* TODO: Hack up a version of mbtowc for platforms with no wide 73*caf54c4fSMartin Matuska * character support at all. I think the following might suffice, 74*caf54c4fSMartin Matuska * but it needs careful testing. 75*caf54c4fSMartin Matuska * #if !HAVE_MBTOWC 76*caf54c4fSMartin Matuska * #define mbtowc(wcp, p, n) ((*wcp = *p), 1) 77*caf54c4fSMartin Matuska * #endif 78*caf54c4fSMartin Matuska */ 79*caf54c4fSMartin Matuska 80*caf54c4fSMartin Matuska /* 81*caf54c4fSMartin Matuska * Print a string, taking care with any non-printable characters. 82*caf54c4fSMartin Matuska * 83*caf54c4fSMartin Matuska * Note that we use a stack-allocated buffer to receive the formatted 84*caf54c4fSMartin Matuska * string if we can. This is partly performance (avoiding a call to 85*caf54c4fSMartin Matuska * malloc()), partly out of expedience (we have to call vsnprintf() 86*caf54c4fSMartin Matuska * before malloc() anyway to find out how big a buffer we need; we may 87*caf54c4fSMartin Matuska * as well point that first call at a small local buffer in case it 88*caf54c4fSMartin Matuska * works), but mostly for safety (so we can use this to print messages 89*caf54c4fSMartin Matuska * about out-of-memory conditions). 90*caf54c4fSMartin Matuska */ 91*caf54c4fSMartin Matuska 92*caf54c4fSMartin Matuska void 93*caf54c4fSMartin Matuska safe_fprintf(FILE *f, const char *fmt, ...) 94*caf54c4fSMartin Matuska { 95*caf54c4fSMartin Matuska char fmtbuff_stack[256]; /* Place to format the printf() string. */ 96*caf54c4fSMartin Matuska char outbuff[256]; /* Buffer for outgoing characters. */ 97*caf54c4fSMartin Matuska char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */ 98*caf54c4fSMartin Matuska char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */ 99*caf54c4fSMartin Matuska int fmtbuff_length; 100*caf54c4fSMartin Matuska int length, n; 101*caf54c4fSMartin Matuska va_list ap; 102*caf54c4fSMartin Matuska const char *p; 103*caf54c4fSMartin Matuska unsigned i; 104*caf54c4fSMartin Matuska wchar_t wc; 105*caf54c4fSMartin Matuska char try_wc; 106*caf54c4fSMartin Matuska 107*caf54c4fSMartin Matuska /* Use a stack-allocated buffer if we can, for speed and safety. */ 108*caf54c4fSMartin Matuska fmtbuff_heap = NULL; 109*caf54c4fSMartin Matuska fmtbuff_length = sizeof(fmtbuff_stack); 110*caf54c4fSMartin Matuska fmtbuff = fmtbuff_stack; 111*caf54c4fSMartin Matuska 112*caf54c4fSMartin Matuska /* Try formatting into the stack buffer. */ 113*caf54c4fSMartin Matuska va_start(ap, fmt); 114*caf54c4fSMartin Matuska length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap); 115*caf54c4fSMartin Matuska va_end(ap); 116*caf54c4fSMartin Matuska 117*caf54c4fSMartin Matuska /* If the result was too large, allocate a buffer on the heap. */ 118*caf54c4fSMartin Matuska if (length >= fmtbuff_length) { 119*caf54c4fSMartin Matuska fmtbuff_length = length+1; 120*caf54c4fSMartin Matuska fmtbuff_heap = malloc(fmtbuff_length); 121*caf54c4fSMartin Matuska 122*caf54c4fSMartin Matuska /* Reformat the result into the heap buffer if we can. */ 123*caf54c4fSMartin Matuska if (fmtbuff_heap != NULL) { 124*caf54c4fSMartin Matuska fmtbuff = fmtbuff_heap; 125*caf54c4fSMartin Matuska va_start(ap, fmt); 126*caf54c4fSMartin Matuska length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap); 127*caf54c4fSMartin Matuska va_end(ap); 128*caf54c4fSMartin Matuska } else { 129*caf54c4fSMartin Matuska /* Leave fmtbuff pointing to the truncated 130*caf54c4fSMartin Matuska * string in fmtbuff_stack. */ 131*caf54c4fSMartin Matuska length = sizeof(fmtbuff_stack) - 1; 132*caf54c4fSMartin Matuska } 133*caf54c4fSMartin Matuska } 134*caf54c4fSMartin Matuska 135*caf54c4fSMartin Matuska /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit 136*caf54c4fSMartin Matuska * more portable, so we use that here instead. */ 137*caf54c4fSMartin Matuska n = mbtowc(NULL, NULL, 1); /* Reset the shift state. */ 138*caf54c4fSMartin Matuska 139*caf54c4fSMartin Matuska /* Write data, expanding unprintable characters. */ 140*caf54c4fSMartin Matuska p = fmtbuff; 141*caf54c4fSMartin Matuska i = 0; 142*caf54c4fSMartin Matuska try_wc = 1; 143*caf54c4fSMartin Matuska while (*p != '\0') { 144*caf54c4fSMartin Matuska 145*caf54c4fSMartin Matuska /* Convert to wide char, test if the wide 146*caf54c4fSMartin Matuska * char is printable in the current locale. */ 147*caf54c4fSMartin Matuska if (try_wc && (n = mbtowc(&wc, p, length)) != -1) { 148*caf54c4fSMartin Matuska length -= n; 149*caf54c4fSMartin Matuska if (iswprint(wc) && wc != L'\\') { 150*caf54c4fSMartin Matuska /* Printable, copy the bytes through. */ 151*caf54c4fSMartin Matuska while (n-- > 0) 152*caf54c4fSMartin Matuska outbuff[i++] = *p++; 153*caf54c4fSMartin Matuska } else { 154*caf54c4fSMartin Matuska /* Not printable, format the bytes. */ 155*caf54c4fSMartin Matuska while (n-- > 0) 156*caf54c4fSMartin Matuska i += (unsigned)bsdtar_expand_char( 157*caf54c4fSMartin Matuska outbuff, i, *p++); 158*caf54c4fSMartin Matuska } 159*caf54c4fSMartin Matuska } else { 160*caf54c4fSMartin Matuska /* After any conversion failure, don't bother 161*caf54c4fSMartin Matuska * trying to convert the rest. */ 162*caf54c4fSMartin Matuska i += (unsigned)bsdtar_expand_char(outbuff, i, *p++); 163*caf54c4fSMartin Matuska try_wc = 0; 164*caf54c4fSMartin Matuska } 165*caf54c4fSMartin Matuska 166*caf54c4fSMartin Matuska /* If our output buffer is full, dump it and keep going. */ 167*caf54c4fSMartin Matuska if (i > (sizeof(outbuff) - 20)) { 168*caf54c4fSMartin Matuska outbuff[i] = '\0'; 169*caf54c4fSMartin Matuska fprintf(f, "%s", outbuff); 170*caf54c4fSMartin Matuska i = 0; 171*caf54c4fSMartin Matuska } 172*caf54c4fSMartin Matuska } 173*caf54c4fSMartin Matuska outbuff[i] = '\0'; 174*caf54c4fSMartin Matuska fprintf(f, "%s", outbuff); 175*caf54c4fSMartin Matuska 176*caf54c4fSMartin Matuska /* If we allocated a heap-based formatting buffer, free it now. */ 177*caf54c4fSMartin Matuska if (fmtbuff_heap != NULL) 178*caf54c4fSMartin Matuska free(fmtbuff_heap); 179*caf54c4fSMartin Matuska } 180*caf54c4fSMartin Matuska 181*caf54c4fSMartin Matuska /* 182*caf54c4fSMartin Matuska * Render an arbitrary sequence of bytes into printable ASCII characters. 183*caf54c4fSMartin Matuska */ 184*caf54c4fSMartin Matuska static size_t 185*caf54c4fSMartin Matuska bsdtar_expand_char(char *buff, size_t offset, char c) 186*caf54c4fSMartin Matuska { 187*caf54c4fSMartin Matuska size_t i = offset; 188*caf54c4fSMartin Matuska 189*caf54c4fSMartin Matuska if (isprint((unsigned char)c) && c != '\\') 190*caf54c4fSMartin Matuska buff[i++] = c; 191*caf54c4fSMartin Matuska else { 192*caf54c4fSMartin Matuska buff[i++] = '\\'; 193*caf54c4fSMartin Matuska switch (c) { 194*caf54c4fSMartin Matuska case '\a': buff[i++] = 'a'; break; 195*caf54c4fSMartin Matuska case '\b': buff[i++] = 'b'; break; 196*caf54c4fSMartin Matuska case '\f': buff[i++] = 'f'; break; 197*caf54c4fSMartin Matuska case '\n': buff[i++] = 'n'; break; 198*caf54c4fSMartin Matuska #if '\r' != '\n' 199*caf54c4fSMartin Matuska /* On some platforms, \n and \r are the same. */ 200*caf54c4fSMartin Matuska case '\r': buff[i++] = 'r'; break; 201*caf54c4fSMartin Matuska #endif 202*caf54c4fSMartin Matuska case '\t': buff[i++] = 't'; break; 203*caf54c4fSMartin Matuska case '\v': buff[i++] = 'v'; break; 204*caf54c4fSMartin Matuska case '\\': buff[i++] = '\\'; break; 205*caf54c4fSMartin Matuska default: 206*caf54c4fSMartin Matuska sprintf(buff + i, "%03o", 0xFF & (int)c); 207*caf54c4fSMartin Matuska i += 3; 208*caf54c4fSMartin Matuska } 209*caf54c4fSMartin Matuska } 210*caf54c4fSMartin Matuska 211*caf54c4fSMartin Matuska return (i - offset); 212*caf54c4fSMartin Matuska } 213*caf54c4fSMartin Matuska 214*caf54c4fSMartin Matuska int 215*caf54c4fSMartin Matuska yes(const char *fmt, ...) 216*caf54c4fSMartin Matuska { 217*caf54c4fSMartin Matuska char buff[32]; 218*caf54c4fSMartin Matuska char *p; 219*caf54c4fSMartin Matuska ssize_t l; 220*caf54c4fSMartin Matuska 221*caf54c4fSMartin Matuska va_list ap; 222*caf54c4fSMartin Matuska va_start(ap, fmt); 223*caf54c4fSMartin Matuska vfprintf(stderr, fmt, ap); 224*caf54c4fSMartin Matuska va_end(ap); 225*caf54c4fSMartin Matuska fprintf(stderr, " (y/N)? "); 226*caf54c4fSMartin Matuska fflush(stderr); 227*caf54c4fSMartin Matuska 228*caf54c4fSMartin Matuska l = read(2, buff, sizeof(buff) - 1); 229*caf54c4fSMartin Matuska if (l <= 0) 230*caf54c4fSMartin Matuska return (0); 231*caf54c4fSMartin Matuska buff[l] = 0; 232*caf54c4fSMartin Matuska 233*caf54c4fSMartin Matuska for (p = buff; *p != '\0'; p++) { 234*caf54c4fSMartin Matuska if (isspace((unsigned char)*p)) 235*caf54c4fSMartin Matuska continue; 236*caf54c4fSMartin Matuska switch(*p) { 237*caf54c4fSMartin Matuska case 'y': case 'Y': 238*caf54c4fSMartin Matuska return (1); 239*caf54c4fSMartin Matuska case 'n': case 'N': 240*caf54c4fSMartin Matuska return (0); 241*caf54c4fSMartin Matuska default: 242*caf54c4fSMartin Matuska return (0); 243*caf54c4fSMartin Matuska } 244*caf54c4fSMartin Matuska } 245*caf54c4fSMartin Matuska 246*caf54c4fSMartin Matuska return (0); 247*caf54c4fSMartin Matuska } 248*caf54c4fSMartin Matuska 249*caf54c4fSMartin Matuska /*- 250*caf54c4fSMartin Matuska * The logic here for -C <dir> attempts to avoid 251*caf54c4fSMartin Matuska * chdir() as long as possible. For example: 252*caf54c4fSMartin Matuska * "-C /foo -C /bar file" needs chdir("/bar") but not chdir("/foo") 253*caf54c4fSMartin Matuska * "-C /foo -C bar file" needs chdir("/foo/bar") 254*caf54c4fSMartin Matuska * "-C /foo -C bar /file1" does not need chdir() 255*caf54c4fSMartin Matuska * "-C /foo -C bar /file1 file2" needs chdir("/foo/bar") before file2 256*caf54c4fSMartin Matuska * 257*caf54c4fSMartin Matuska * The only correct way to handle this is to record a "pending" chdir 258*caf54c4fSMartin Matuska * request and combine multiple requests intelligently until we 259*caf54c4fSMartin Matuska * need to process a non-absolute file. set_chdir() adds the new dir 260*caf54c4fSMartin Matuska * to the pending list; do_chdir() actually executes any pending chdir. 261*caf54c4fSMartin Matuska * 262*caf54c4fSMartin Matuska * This way, programs that build tar command lines don't have to worry 263*caf54c4fSMartin Matuska * about -C with non-existent directories; such requests will only 264*caf54c4fSMartin Matuska * fail if the directory must be accessed. 265*caf54c4fSMartin Matuska * 266*caf54c4fSMartin Matuska * TODO: Make this handle Windows paths correctly. 267*caf54c4fSMartin Matuska */ 268*caf54c4fSMartin Matuska void 269*caf54c4fSMartin Matuska set_chdir(struct bsdtar *bsdtar, const char *newdir) 270*caf54c4fSMartin Matuska { 271*caf54c4fSMartin Matuska if (newdir[0] == '/') { 272*caf54c4fSMartin Matuska /* The -C /foo -C /bar case; dump first one. */ 273*caf54c4fSMartin Matuska free(bsdtar->pending_chdir); 274*caf54c4fSMartin Matuska bsdtar->pending_chdir = NULL; 275*caf54c4fSMartin Matuska } 276*caf54c4fSMartin Matuska if (bsdtar->pending_chdir == NULL) 277*caf54c4fSMartin Matuska /* Easy case: no previously-saved dir. */ 278*caf54c4fSMartin Matuska bsdtar->pending_chdir = strdup(newdir); 279*caf54c4fSMartin Matuska else { 280*caf54c4fSMartin Matuska /* The -C /foo -C bar case; concatenate */ 281*caf54c4fSMartin Matuska char *old_pending = bsdtar->pending_chdir; 282*caf54c4fSMartin Matuska size_t old_len = strlen(old_pending); 283*caf54c4fSMartin Matuska bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2); 284*caf54c4fSMartin Matuska if (old_pending[old_len - 1] == '/') 285*caf54c4fSMartin Matuska old_pending[old_len - 1] = '\0'; 286*caf54c4fSMartin Matuska if (bsdtar->pending_chdir != NULL) 287*caf54c4fSMartin Matuska sprintf(bsdtar->pending_chdir, "%s/%s", 288*caf54c4fSMartin Matuska old_pending, newdir); 289*caf54c4fSMartin Matuska free(old_pending); 290*caf54c4fSMartin Matuska } 291*caf54c4fSMartin Matuska if (bsdtar->pending_chdir == NULL) 292*caf54c4fSMartin Matuska lafe_errc(1, errno, "No memory"); 293*caf54c4fSMartin Matuska } 294*caf54c4fSMartin Matuska 295*caf54c4fSMartin Matuska void 296*caf54c4fSMartin Matuska do_chdir(struct bsdtar *bsdtar) 297*caf54c4fSMartin Matuska { 298*caf54c4fSMartin Matuska if (bsdtar->pending_chdir == NULL) 299*caf54c4fSMartin Matuska return; 300*caf54c4fSMartin Matuska 301*caf54c4fSMartin Matuska if (chdir(bsdtar->pending_chdir) != 0) { 302*caf54c4fSMartin Matuska lafe_errc(1, 0, "could not chdir to '%s'\n", 303*caf54c4fSMartin Matuska bsdtar->pending_chdir); 304*caf54c4fSMartin Matuska } 305*caf54c4fSMartin Matuska free(bsdtar->pending_chdir); 306*caf54c4fSMartin Matuska bsdtar->pending_chdir = NULL; 307*caf54c4fSMartin Matuska } 308*caf54c4fSMartin Matuska 309*caf54c4fSMartin Matuska static const char * 310*caf54c4fSMartin Matuska strip_components(const char *p, int elements) 311*caf54c4fSMartin Matuska { 312*caf54c4fSMartin Matuska /* Skip as many elements as necessary. */ 313*caf54c4fSMartin Matuska while (elements > 0) { 314*caf54c4fSMartin Matuska switch (*p++) { 315*caf54c4fSMartin Matuska case '/': 316*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) 317*caf54c4fSMartin Matuska case '\\': /* Support \ path sep on Windows ONLY. */ 318*caf54c4fSMartin Matuska #endif 319*caf54c4fSMartin Matuska elements--; 320*caf54c4fSMartin Matuska break; 321*caf54c4fSMartin Matuska case '\0': 322*caf54c4fSMartin Matuska /* Path is too short, skip it. */ 323*caf54c4fSMartin Matuska return (NULL); 324*caf54c4fSMartin Matuska } 325*caf54c4fSMartin Matuska } 326*caf54c4fSMartin Matuska 327*caf54c4fSMartin Matuska /* Skip any / characters. This handles short paths that have 328*caf54c4fSMartin Matuska * additional / termination. This also handles the case where 329*caf54c4fSMartin Matuska * the logic above stops in the middle of a duplicate // 330*caf54c4fSMartin Matuska * sequence (which would otherwise get converted to an 331*caf54c4fSMartin Matuska * absolute path). */ 332*caf54c4fSMartin Matuska for (;;) { 333*caf54c4fSMartin Matuska switch (*p) { 334*caf54c4fSMartin Matuska case '/': 335*caf54c4fSMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) 336*caf54c4fSMartin Matuska case '\\': /* Support \ path sep on Windows ONLY. */ 337*caf54c4fSMartin Matuska #endif 338*caf54c4fSMartin Matuska ++p; 339*caf54c4fSMartin Matuska break; 340*caf54c4fSMartin Matuska case '\0': 341*caf54c4fSMartin Matuska return (NULL); 342*caf54c4fSMartin Matuska default: 343*caf54c4fSMartin Matuska return (p); 344*caf54c4fSMartin Matuska } 345*caf54c4fSMartin Matuska } 346*caf54c4fSMartin Matuska } 347*caf54c4fSMartin Matuska 348*caf54c4fSMartin Matuska /* 349*caf54c4fSMartin Matuska * Handle --strip-components and any future path-rewriting options. 350*caf54c4fSMartin Matuska * Returns non-zero if the pathname should not be extracted. 351*caf54c4fSMartin Matuska * 352*caf54c4fSMartin Matuska * TODO: Support pax-style regex path rewrites. 353*caf54c4fSMartin Matuska */ 354*caf54c4fSMartin Matuska int 355*caf54c4fSMartin Matuska edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry) 356*caf54c4fSMartin Matuska { 357*caf54c4fSMartin Matuska const char *name = archive_entry_pathname(entry); 358*caf54c4fSMartin Matuska #if HAVE_REGEX_H 359*caf54c4fSMartin Matuska char *subst_name; 360*caf54c4fSMartin Matuska int r; 361*caf54c4fSMartin Matuska #endif 362*caf54c4fSMartin Matuska 363*caf54c4fSMartin Matuska #if HAVE_REGEX_H 364*caf54c4fSMartin Matuska r = apply_substitution(bsdtar, name, &subst_name, 0); 365*caf54c4fSMartin Matuska if (r == -1) { 366*caf54c4fSMartin Matuska lafe_warnc(0, "Invalid substitution, skipping entry"); 367*caf54c4fSMartin Matuska return 1; 368*caf54c4fSMartin Matuska } 369*caf54c4fSMartin Matuska if (r == 1) { 370*caf54c4fSMartin Matuska archive_entry_copy_pathname(entry, subst_name); 371*caf54c4fSMartin Matuska if (*subst_name == '\0') { 372*caf54c4fSMartin Matuska free(subst_name); 373*caf54c4fSMartin Matuska return -1; 374*caf54c4fSMartin Matuska } else 375*caf54c4fSMartin Matuska free(subst_name); 376*caf54c4fSMartin Matuska name = archive_entry_pathname(entry); 377*caf54c4fSMartin Matuska } 378*caf54c4fSMartin Matuska 379*caf54c4fSMartin Matuska if (archive_entry_hardlink(entry)) { 380*caf54c4fSMartin Matuska r = apply_substitution(bsdtar, archive_entry_hardlink(entry), &subst_name, 1); 381*caf54c4fSMartin Matuska if (r == -1) { 382*caf54c4fSMartin Matuska lafe_warnc(0, "Invalid substitution, skipping entry"); 383*caf54c4fSMartin Matuska return 1; 384*caf54c4fSMartin Matuska } 385*caf54c4fSMartin Matuska if (r == 1) { 386*caf54c4fSMartin Matuska archive_entry_copy_hardlink(entry, subst_name); 387*caf54c4fSMartin Matuska free(subst_name); 388*caf54c4fSMartin Matuska } 389*caf54c4fSMartin Matuska } 390*caf54c4fSMartin Matuska if (archive_entry_symlink(entry) != NULL) { 391*caf54c4fSMartin Matuska r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1); 392*caf54c4fSMartin Matuska if (r == -1) { 393*caf54c4fSMartin Matuska lafe_warnc(0, "Invalid substitution, skipping entry"); 394*caf54c4fSMartin Matuska return 1; 395*caf54c4fSMartin Matuska } 396*caf54c4fSMartin Matuska if (r == 1) { 397*caf54c4fSMartin Matuska archive_entry_copy_symlink(entry, subst_name); 398*caf54c4fSMartin Matuska free(subst_name); 399*caf54c4fSMartin Matuska } 400*caf54c4fSMartin Matuska } 401*caf54c4fSMartin Matuska #endif 402*caf54c4fSMartin Matuska 403*caf54c4fSMartin Matuska /* Strip leading dir names as per --strip-components option. */ 404*caf54c4fSMartin Matuska if (bsdtar->strip_components > 0) { 405*caf54c4fSMartin Matuska const char *linkname = archive_entry_hardlink(entry); 406*caf54c4fSMartin Matuska 407*caf54c4fSMartin Matuska name = strip_components(name, bsdtar->strip_components); 408*caf54c4fSMartin Matuska if (name == NULL) 409*caf54c4fSMartin Matuska return (1); 410*caf54c4fSMartin Matuska 411*caf54c4fSMartin Matuska if (linkname != NULL) { 412*caf54c4fSMartin Matuska linkname = strip_components(linkname, 413*caf54c4fSMartin Matuska bsdtar->strip_components); 414*caf54c4fSMartin Matuska if (linkname == NULL) 415*caf54c4fSMartin Matuska return (1); 416*caf54c4fSMartin Matuska archive_entry_copy_hardlink(entry, linkname); 417*caf54c4fSMartin Matuska } 418*caf54c4fSMartin Matuska } 419*caf54c4fSMartin Matuska 420*caf54c4fSMartin Matuska /* By default, don't write or restore absolute pathnames. */ 421*caf54c4fSMartin Matuska if (!bsdtar->option_absolute_paths) { 422*caf54c4fSMartin Matuska const char *rp, *p = name; 423*caf54c4fSMartin Matuska int slashonly = 1; 424*caf54c4fSMartin Matuska 425*caf54c4fSMartin Matuska /* Remove leading "//./" or "//?/" or "//?/UNC/" 426*caf54c4fSMartin Matuska * (absolute path prefixes used by Windows API) */ 427*caf54c4fSMartin Matuska if ((p[0] == '/' || p[0] == '\\') && 428*caf54c4fSMartin Matuska (p[1] == '/' || p[1] == '\\') && 429*caf54c4fSMartin Matuska (p[2] == '.' || p[2] == '?') && 430*caf54c4fSMartin Matuska (p[3] == '/' || p[3] == '\\')) 431*caf54c4fSMartin Matuska { 432*caf54c4fSMartin Matuska if (p[2] == '?' && 433*caf54c4fSMartin Matuska (p[4] == 'U' || p[4] == 'u') && 434*caf54c4fSMartin Matuska (p[5] == 'N' || p[5] == 'n') && 435*caf54c4fSMartin Matuska (p[6] == 'C' || p[6] == 'c') && 436*caf54c4fSMartin Matuska (p[7] == '/' || p[7] == '\\')) 437*caf54c4fSMartin Matuska p += 8; 438*caf54c4fSMartin Matuska else 439*caf54c4fSMartin Matuska p += 4; 440*caf54c4fSMartin Matuska slashonly = 0; 441*caf54c4fSMartin Matuska } 442*caf54c4fSMartin Matuska do { 443*caf54c4fSMartin Matuska rp = p; 444*caf54c4fSMartin Matuska /* Remove leading drive letter from archives created 445*caf54c4fSMartin Matuska * on Windows. */ 446*caf54c4fSMartin Matuska if (((p[0] >= 'a' && p[0] <= 'z') || 447*caf54c4fSMartin Matuska (p[0] >= 'A' && p[0] <= 'Z')) && 448*caf54c4fSMartin Matuska p[1] == ':') { 449*caf54c4fSMartin Matuska p += 2; 450*caf54c4fSMartin Matuska slashonly = 0; 451*caf54c4fSMartin Matuska } 452*caf54c4fSMartin Matuska /* Remove leading "/../", "//", etc. */ 453*caf54c4fSMartin Matuska while (p[0] == '/' || p[0] == '\\') { 454*caf54c4fSMartin Matuska if (p[1] == '.' && p[2] == '.' && 455*caf54c4fSMartin Matuska (p[3] == '/' || p[3] == '\\')) { 456*caf54c4fSMartin Matuska p += 3; /* Remove "/..", leave "/" 457*caf54c4fSMartin Matuska * for next pass. */ 458*caf54c4fSMartin Matuska slashonly = 0; 459*caf54c4fSMartin Matuska } else 460*caf54c4fSMartin Matuska p += 1; /* Remove "/". */ 461*caf54c4fSMartin Matuska } 462*caf54c4fSMartin Matuska } while (rp != p); 463*caf54c4fSMartin Matuska 464*caf54c4fSMartin Matuska if (p != name && !bsdtar->warned_lead_slash) { 465*caf54c4fSMartin Matuska /* Generate a warning the first time this happens. */ 466*caf54c4fSMartin Matuska if (slashonly) 467*caf54c4fSMartin Matuska lafe_warnc(0, 468*caf54c4fSMartin Matuska "Removing leading '%c' from member names", 469*caf54c4fSMartin Matuska name[0]); 470*caf54c4fSMartin Matuska else 471*caf54c4fSMartin Matuska lafe_warnc(0, 472*caf54c4fSMartin Matuska "Removing leading drive letter from " 473*caf54c4fSMartin Matuska "member names"); 474*caf54c4fSMartin Matuska bsdtar->warned_lead_slash = 1; 475*caf54c4fSMartin Matuska } 476*caf54c4fSMartin Matuska 477*caf54c4fSMartin Matuska /* Special case: Stripping everything yields ".". */ 478*caf54c4fSMartin Matuska if (*p == '\0') 479*caf54c4fSMartin Matuska name = "."; 480*caf54c4fSMartin Matuska else 481*caf54c4fSMartin Matuska name = p; 482*caf54c4fSMartin Matuska } else { 483*caf54c4fSMartin Matuska /* Strip redundant leading '/' characters. */ 484*caf54c4fSMartin Matuska while (name[0] == '/' && name[1] == '/') 485*caf54c4fSMartin Matuska name++; 486*caf54c4fSMartin Matuska } 487*caf54c4fSMartin Matuska 488*caf54c4fSMartin Matuska /* Safely replace name in archive_entry. */ 489*caf54c4fSMartin Matuska if (name != archive_entry_pathname(entry)) { 490*caf54c4fSMartin Matuska char *q = strdup(name); 491*caf54c4fSMartin Matuska archive_entry_copy_pathname(entry, q); 492*caf54c4fSMartin Matuska free(q); 493*caf54c4fSMartin Matuska } 494*caf54c4fSMartin Matuska return (0); 495*caf54c4fSMartin Matuska } 496*caf54c4fSMartin Matuska 497*caf54c4fSMartin Matuska /* 498*caf54c4fSMartin Matuska * It would be nice to just use printf() for formatting large numbers, 499*caf54c4fSMartin Matuska * but the compatibility problems are quite a headache. Hence the 500*caf54c4fSMartin Matuska * following simple utility function. 501*caf54c4fSMartin Matuska */ 502*caf54c4fSMartin Matuska const char * 503*caf54c4fSMartin Matuska tar_i64toa(int64_t n0) 504*caf54c4fSMartin Matuska { 505*caf54c4fSMartin Matuska static char buff[24]; 506*caf54c4fSMartin Matuska int64_t n = n0 < 0 ? -n0 : n0; 507*caf54c4fSMartin Matuska char *p = buff + sizeof(buff); 508*caf54c4fSMartin Matuska 509*caf54c4fSMartin Matuska *--p = '\0'; 510*caf54c4fSMartin Matuska do { 511*caf54c4fSMartin Matuska *--p = '0' + (int)(n % 10); 512*caf54c4fSMartin Matuska n /= 10; 513*caf54c4fSMartin Matuska } while (n > 0); 514*caf54c4fSMartin Matuska if (n0 < 0) 515*caf54c4fSMartin Matuska *--p = '-'; 516*caf54c4fSMartin Matuska return p; 517*caf54c4fSMartin Matuska } 518*caf54c4fSMartin Matuska 519*caf54c4fSMartin Matuska /* 520*caf54c4fSMartin Matuska * Like strcmp(), but try to be a little more aware of the fact that 521*caf54c4fSMartin Matuska * we're comparing two paths. Right now, it just handles leading 522*caf54c4fSMartin Matuska * "./" and trailing '/' specially, so that "a/b/" == "./a/b" 523*caf54c4fSMartin Matuska * 524*caf54c4fSMartin Matuska * TODO: Make this better, so that "./a//b/./c/" == "a/b/c" 525*caf54c4fSMartin Matuska * TODO: After this works, push it down into libarchive. 526*caf54c4fSMartin Matuska * TODO: Publish the path normalization routines in libarchive so 527*caf54c4fSMartin Matuska * that bsdtar can normalize paths and use fast strcmp() instead 528*caf54c4fSMartin Matuska * of this. 529*caf54c4fSMartin Matuska * 530*caf54c4fSMartin Matuska * Note: This is currently only used within write.c, so should 531*caf54c4fSMartin Matuska * not handle \ path separators. 532*caf54c4fSMartin Matuska */ 533*caf54c4fSMartin Matuska 534*caf54c4fSMartin Matuska int 535*caf54c4fSMartin Matuska pathcmp(const char *a, const char *b) 536*caf54c4fSMartin Matuska { 537*caf54c4fSMartin Matuska /* Skip leading './' */ 538*caf54c4fSMartin Matuska if (a[0] == '.' && a[1] == '/' && a[2] != '\0') 539*caf54c4fSMartin Matuska a += 2; 540*caf54c4fSMartin Matuska if (b[0] == '.' && b[1] == '/' && b[2] != '\0') 541*caf54c4fSMartin Matuska b += 2; 542*caf54c4fSMartin Matuska /* Find the first difference, or return (0) if none. */ 543*caf54c4fSMartin Matuska while (*a == *b) { 544*caf54c4fSMartin Matuska if (*a == '\0') 545*caf54c4fSMartin Matuska return (0); 546*caf54c4fSMartin Matuska a++; 547*caf54c4fSMartin Matuska b++; 548*caf54c4fSMartin Matuska } 549*caf54c4fSMartin Matuska /* 550*caf54c4fSMartin Matuska * If one ends in '/' and the other one doesn't, 551*caf54c4fSMartin Matuska * they're the same. 552*caf54c4fSMartin Matuska */ 553*caf54c4fSMartin Matuska if (a[0] == '/' && a[1] == '\0' && b[0] == '\0') 554*caf54c4fSMartin Matuska return (0); 555*caf54c4fSMartin Matuska if (a[0] == '\0' && b[0] == '/' && b[1] == '\0') 556*caf54c4fSMartin Matuska return (0); 557*caf54c4fSMartin Matuska /* They're really different, return the correct sign. */ 558*caf54c4fSMartin Matuska return (*(const unsigned char *)a - *(const unsigned char *)b); 559*caf54c4fSMartin Matuska } 560