1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #include <sys/time.h> 39 #include <sys/resource.h> 40 #include <err.h> 41 #include <errno.h> 42 #include <fcntl.h> 43 #include <locale.h> 44 #include <paths.h> 45 #include <signal.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include "pax.h" 50 #include "extern.h" 51 static int gen_init(void); 52 53 /* 54 * PAX main routines, general globals and some simple start up routines 55 */ 56 57 /* 58 * Variables that can be accessed by any routine within pax 59 */ 60 int act = DEFOP; /* read/write/append/copy */ 61 FSUB *frmt = NULL; /* archive format type */ 62 int cflag; /* match all EXCEPT pattern/file */ 63 int cwdfd; /* starting cwd */ 64 int dflag; /* directory member match only */ 65 int iflag; /* interactive file/archive rename */ 66 int kflag; /* do not overwrite existing files */ 67 int lflag; /* use hard links when possible */ 68 int nflag; /* select first archive member match */ 69 int tflag; /* restore access time after read */ 70 int uflag; /* ignore older modification time files */ 71 int vflag; /* produce verbose output */ 72 int Dflag; /* same as uflag except for inode change time */ 73 int Hflag; /* follow command line symlinks (write only) */ 74 int Lflag; /* follow symlinks when writing */ 75 int Oflag; /* limit to single volume */ 76 int Xflag; /* archive files with same device id only */ 77 int Yflag; /* same as Dflg except after name mode */ 78 int Zflag; /* same as uflg except after name mode */ 79 int vfpart; /* is partial verbose output in progress */ 80 int patime = 1; /* preserve file access time */ 81 int pmtime = 1; /* preserve file modification times */ 82 int nodirs; /* do not create directories as needed */ 83 int pmode; /* preserve file mode bits */ 84 int pids; /* preserve file uid/gid */ 85 int rmleadslash = 0; /* remove leading '/' from pathnames */ 86 int exit_val; /* exit value */ 87 int docrc; /* check/create file crc */ 88 char *dirptr; /* destination dir in a copy */ 89 const char *argv0; /* root of argv[0] */ 90 sigset_t s_mask; /* signal mask for cleanup critical sect */ 91 FILE *listf; /* file pointer to print file list to */ 92 char *tempfile; /* tempfile to use for mkstemp(3) */ 93 char *tempbase; /* basename of tempfile to use for mkstemp(3) */ 94 95 /* 96 * PAX - Portable Archive Interchange 97 * 98 * A utility to read, write, and write lists of the members of archive 99 * files and copy directory hierarchies. A variety of archive formats 100 * are supported (some are described in POSIX 1003.1 10.1): 101 * 102 * ustar - 10.1.1 extended tar interchange format 103 * cpio - 10.1.2 extended cpio interchange format 104 * tar - old BSD 4.3 tar format 105 * binary cpio - old cpio with binary header format 106 * sysVR4 cpio - with and without CRC 107 * 108 * This version is a superset of IEEE Std 1003.2b-d3 109 * 110 * Summary of Extensions to the IEEE Standard: 111 * 112 * 1 READ ENHANCEMENTS 113 * 1.1 Operations which read archives will continue to operate even when 114 * processing archives which may be damaged, truncated, or fail to meet 115 * format specs in several different ways. Damaged sections of archives 116 * are detected and avoided if possible. Attempts will be made to resync 117 * archive read operations even with badly damaged media. 118 * 1.2 Blocksize requirements are not strictly enforced on archive read. 119 * Tapes which have variable sized records can be read without errors. 120 * 1.3 The user can specify via the non-standard option flag -E if error 121 * resync operation should stop on a media error, try a specified number 122 * of times to correct, or try to correct forever. 123 * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks 124 * of all zeros will be restored with holes appropriate for the target 125 * file system 126 * 1.5 The user is notified whenever something is found during archive 127 * read operations which violates spec (but the read will continue). 128 * 1.6 Multiple archive volumes can be read and may span over different 129 * archive devices 130 * 1.7 Rigidly restores all file attributes exactly as they are stored on the 131 * archive. 132 * 1.8 Modification change time ranges can be specified via multiple -T 133 * options. These allow a user to select files whose modification time 134 * lies within a specific time range. 135 * 1.9 Files can be selected based on owner (user name or uid) via one or more 136 * -U options. 137 * 1.10 Files can be selected based on group (group name or gid) via one o 138 * more -G options. 139 * 1.11 File modification time can be checked against existing file after 140 * name modification (-Z) 141 * 142 * 2 WRITE ENHANCEMENTS 143 * 2.1 Write operation will stop instead of allowing a user to create a flawed 144 * flawed archive (due to any problem). 145 * 2.2 Archives written by pax are forced to strictly conform to both the 146 * archive and pax the specific format specifications. 147 * 2.3 Blocking size and format is rigidly enforced on writes. 148 * 2.4 Formats which may exhibit header overflow problems (they have fields 149 * too small for large file systems, such as inode number storage), use 150 * routines designed to repair this problem. These techniques still 151 * conform to both pax and format specifications, but no longer truncate 152 * these fields. This removes any restrictions on using these archive 153 * formats on large file systems. 154 * 2.5 Multiple archive volumes can be written and may span over different 155 * archive devices 156 * 2.6 A archive volume record limit allows the user to specify the number 157 * of bytes stored on an archive volume. When reached the user is 158 * prompted for the next archive volume. This is specified with the 159 * non-standard -B flag. The limit is rounded up to the next blocksize. 160 * 2.7 All archive padding during write use zero filled sections. This makes 161 * it much easier to pull data out of flawed archive during read 162 * operations. 163 * 2.8 Access time reset with the -t applies to all file nodes (including 164 * directories). 165 * 2.9 Symbolic links can be followed with -L (optional in the spec). 166 * 2.10 Modification or inode change time ranges can be specified via 167 * multiple -T options. These allow a user to select files whose 168 * modification or inode change time lies within a specific time range. 169 * 2.11 Files can be selected based on owner (user name or uid) via one or more 170 * -U options. 171 * 2.12 Files can be selected based on group (group name or gid) via one o 172 * more -G options. 173 * 2.13 Symlinks which appear on the command line can be followed (without 174 * following other symlinks; -H flag) 175 * 176 * 3 COPY ENHANCEMENTS 177 * 3.1 Sparse files (lseek holes) can be copied without expanding the holes 178 * into zero filled blocks. The file copy is created with holes which are 179 * appropriate for the target file system 180 * 3.2 Access time as well as modification time on copied file trees can be 181 * preserved with the appropriate -p options. 182 * 3.3 Access time reset with the -t applies to all file nodes (including 183 * directories). 184 * 3.4 Symbolic links can be followed with -L (optional in the spec). 185 * 3.5 Modification or inode change time ranges can be specified via 186 * multiple -T options. These allow a user to select files whose 187 * modification or inode change time lies within a specific time range. 188 * 3.6 Files can be selected based on owner (user name or uid) via one or more 189 * -U options. 190 * 3.7 Files can be selected based on group (group name or gid) via one o 191 * more -G options. 192 * 3.8 Symlinks which appear on the command line can be followed (without 193 * following other symlinks; -H flag) 194 * 3.9 File inode change time can be checked against existing file before 195 * name modification (-D) 196 * 3.10 File inode change time can be checked against existing file after 197 * name modification (-Y) 198 * 3.11 File modification time can be checked against existing file after 199 * name modification (-Z) 200 * 201 * 4 GENERAL ENHANCEMENTS 202 * 4.1 Internal structure is designed to isolate format dependent and 203 * independent functions. Formats are selected via a format driver table. 204 * This encourages the addition of new archive formats by only having to 205 * write those routines which id, read and write the archive header. 206 */ 207 208 /* 209 * main() 210 * parse options, set up and operate as specified by the user. 211 * any operational flaw will set exit_val to non-zero 212 * Return: 0 if ok, 1 otherwise 213 */ 214 215 int 216 main(int argc, char *argv[]) 217 { 218 const char *tmpdir; 219 size_t tdlen; 220 221 (void) setlocale(LC_ALL, ""); 222 listf = stderr; 223 /* 224 * Keep a reference to cwd, so we can always come back home. 225 */ 226 cwdfd = open(".", O_RDONLY | O_CLOEXEC); 227 if (cwdfd < 0) { 228 syswarn(0, errno, "Can't open current working directory."); 229 return(exit_val); 230 } 231 232 /* 233 * Where should we put temporary files? 234 */ 235 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 236 tmpdir = _PATH_TMP; 237 tdlen = strlen(tmpdir); 238 while (tdlen > 0 && tmpdir[tdlen - 1] == '/') 239 tdlen--; 240 tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE)); 241 if (tempfile == NULL) { 242 paxwarn(1, "Cannot allocate memory for temp file name."); 243 return(exit_val); 244 } 245 if (tdlen) 246 memcpy(tempfile, tmpdir, tdlen); 247 tempbase = tempfile + tdlen; 248 *tempbase++ = '/'; 249 250 /* 251 * parse options, determine operational mode, general init 252 */ 253 options(argc, argv); 254 if ((gen_init() < 0) || (tty_init() < 0)) 255 return(exit_val); 256 257 /* 258 * select a primary operation mode 259 */ 260 switch (act) { 261 case EXTRACT: 262 extract(); 263 break; 264 case ARCHIVE: 265 archive(); 266 break; 267 case APPND: 268 if (gzip_program != NULL) 269 err(1, "can not gzip while appending"); 270 append(); 271 break; 272 case COPY: 273 copy(); 274 break; 275 default: 276 case LIST: 277 list(); 278 break; 279 } 280 return(exit_val); 281 } 282 283 /* 284 * sig_cleanup() 285 * when interrupted we try to do whatever delayed processing we can. 286 * This is not critical, but we really ought to limit our damage when we 287 * are aborted by the user. 288 * Return: 289 * never.... 290 */ 291 292 void 293 sig_cleanup(int which_sig) 294 { 295 /* 296 * restore modes and times for any dirs we may have created 297 * or any dirs we may have read. Set vflag and vfpart so the user 298 * will clearly see the message on a line by itself. 299 */ 300 vflag = vfpart = 1; 301 if (which_sig == SIGXCPU) 302 paxwarn(0, "Cpu time limit reached, cleaning up."); 303 else 304 paxwarn(0, "Signal caught, cleaning up."); 305 306 ar_close(); 307 proc_dir(); 308 if (tflag) 309 atdir_end(); 310 exit(1); 311 } 312 313 /* 314 * setup_sig() 315 * set a signal to be caught, but only if it isn't being ignored already 316 */ 317 318 static int 319 setup_sig(int sig, const struct sigaction *n_hand) 320 { 321 struct sigaction o_hand; 322 323 if (sigaction(sig, NULL, &o_hand) < 0) 324 return (-1); 325 326 if (o_hand.sa_handler == SIG_IGN) 327 return (0); 328 329 return (sigaction(sig, n_hand, NULL)); 330 } 331 332 /* 333 * gen_init() 334 * general setup routines. Not all are required, but they really help 335 * when dealing with a medium to large sized archives. 336 */ 337 338 static int 339 gen_init(void) 340 { 341 struct rlimit reslimit; 342 struct sigaction n_hand; 343 344 /* 345 * Really needed to handle large archives. We can run out of memory for 346 * internal tables really fast when we have a whole lot of files... 347 */ 348 if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ 349 reslimit.rlim_cur = reslimit.rlim_max; 350 (void)setrlimit(RLIMIT_DATA , &reslimit); 351 } 352 353 /* 354 * should file size limits be waived? if the os limits us, this is 355 * needed if we want to write a large archive 356 */ 357 if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ 358 reslimit.rlim_cur = reslimit.rlim_max; 359 (void)setrlimit(RLIMIT_FSIZE , &reslimit); 360 } 361 362 /* 363 * increase the size the stack can grow to 364 */ 365 if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ 366 reslimit.rlim_cur = reslimit.rlim_max; 367 (void)setrlimit(RLIMIT_STACK , &reslimit); 368 } 369 370 /* 371 * not really needed, but doesn't hurt 372 */ 373 if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ 374 reslimit.rlim_cur = reslimit.rlim_max; 375 (void)setrlimit(RLIMIT_RSS , &reslimit); 376 } 377 378 /* 379 * signal handling to reset stored directory times and modes. Since 380 * we deal with broken pipes via failed writes we ignore it. We also 381 * deal with any file size limit through failed writes. Cpu time 382 * limits are caught and a cleanup is forced. 383 */ 384 if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || 385 (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || 386 (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || 387 (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { 388 paxwarn(1, "Unable to set up signal mask"); 389 return(-1); 390 } 391 memset(&n_hand, 0, sizeof n_hand); 392 n_hand.sa_mask = s_mask; 393 n_hand.sa_flags = 0; 394 n_hand.sa_handler = sig_cleanup; 395 396 if (setup_sig(SIGHUP, &n_hand) || 397 setup_sig(SIGTERM, &n_hand) || 398 setup_sig(SIGINT, &n_hand) || 399 setup_sig(SIGQUIT, &n_hand) || 400 setup_sig(SIGXCPU, &n_hand)) 401 goto out; 402 403 n_hand.sa_handler = SIG_IGN; 404 if ((sigaction(SIGPIPE, &n_hand, NULL) < 0) || 405 (sigaction(SIGXFSZ, &n_hand, NULL) < 0)) 406 goto out; 407 return(0); 408 409 out: 410 syswarn(1, errno, "Unable to set up signal handler"); 411 return(-1); 412 } 413