1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE__FUTIME) 27 # include <sys/utime.h> 28 #elif defined(HAVE_UTIME) 29 # include <utime.h> 30 #endif 31 32 #ifdef HAVE_CAPSICUM 33 # ifdef HAVE_SYS_CAPSICUM_H 34 # include <sys/capsicum.h> 35 # else 36 # include <sys/capability.h> 37 # endif 38 #endif 39 40 #include "tuklib_open_stdxxx.h" 41 42 #ifndef O_BINARY 43 # define O_BINARY 0 44 #endif 45 46 #ifndef O_NOCTTY 47 # define O_NOCTTY 0 48 #endif 49 50 // Using this macro to silence a warning from gcc -Wlogical-op. 51 #if EAGAIN == EWOULDBLOCK 52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) 53 #else 54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ 55 ((e) == EAGAIN || (e) == EWOULDBLOCK) 56 #endif 57 58 59 typedef enum { 60 IO_WAIT_MORE, // Reading or writing is possible. 61 IO_WAIT_ERROR, // Error or user_abort 62 IO_WAIT_TIMEOUT, // poll() timed out 63 } io_wait_ret; 64 65 66 /// If true, try to create sparse files when decompressing. 67 static bool try_sparse = true; 68 69 #ifdef ENABLE_SANDBOX 70 /// True if the conditions for sandboxing (described in main()) have been met. 71 static bool sandbox_allowed = false; 72 #endif 73 74 #ifndef TUKLIB_DOSLIKE 75 /// File status flags of standard input. This is used by io_open_src() 76 /// and io_close_src(). 77 static int stdin_flags; 78 static bool restore_stdin_flags = false; 79 80 /// Original file status flags of standard output. This is used by 81 /// io_open_dest() and io_close_dest() to save and restore the flags. 82 static int stdout_flags; 83 static bool restore_stdout_flags = false; 84 85 /// Self-pipe used together with the user_abort variable to avoid 86 /// race conditions with signal handling. 87 static int user_abort_pipe[2]; 88 #endif 89 90 91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 92 93 94 extern void 95 io_init(void) 96 { 97 // Make sure that stdin, stdout, and stderr are connected to 98 // a valid file descriptor. Exit immediately with exit code ERROR 99 // if we cannot make the file descriptors valid. Maybe we should 100 // print an error message, but our stderr could be screwed anyway. 101 tuklib_open_stdxxx(E_ERROR); 102 103 #ifndef TUKLIB_DOSLIKE 104 // If fchown() fails setting the owner, we warn about it only if 105 // we are root. 106 warn_fchown = geteuid() == 0; 107 108 // Create a pipe for the self-pipe trick. 109 if (pipe(user_abort_pipe)) 110 message_fatal(_("Error creating a pipe: %s"), 111 strerror(errno)); 112 113 // Make both ends of the pipe non-blocking. 114 for (unsigned i = 0; i < 2; ++i) { 115 int flags = fcntl(user_abort_pipe[i], F_GETFL); 116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 117 flags | O_NONBLOCK) == -1) 118 message_fatal(_("Error creating a pipe: %s"), 119 strerror(errno)); 120 } 121 #endif 122 123 #ifdef __DJGPP__ 124 // Avoid doing useless things when statting files. 125 // This isn't important but doesn't hurt. 126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 127 #endif 128 129 return; 130 } 131 132 133 #ifndef TUKLIB_DOSLIKE 134 extern void 135 io_write_to_user_abort_pipe(void) 136 { 137 // If the write() fails, it's probably due to the pipe being full. 138 // Failing in that case is fine. If the reason is something else, 139 // there's not much we can do since this is called in a signal 140 // handler. So ignore the errors and try to avoid warnings with 141 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 142 uint8_t b = '\0'; 143 const ssize_t ret = write(user_abort_pipe[1], &b, 1); 144 (void)ret; 145 return; 146 } 147 #endif 148 149 150 extern void 151 io_no_sparse(void) 152 { 153 try_sparse = false; 154 return; 155 } 156 157 158 #ifdef ENABLE_SANDBOX 159 extern void 160 io_allow_sandbox(void) 161 { 162 sandbox_allowed = true; 163 return; 164 } 165 166 167 /// Enables operating-system-specific sandbox if it is possible. 168 /// src_fd is the file descriptor of the input file. 169 static void 170 io_sandbox_enter(int src_fd) 171 { 172 if (!sandbox_allowed) { 173 // This message is more often annoying than useful so 174 // it's commented out. It can be useful when developing 175 // the sandboxing code. 176 //message(V_DEBUG, _("Sandbox is disabled due " 177 // "to incompatible command line arguments")); 178 return; 179 } 180 181 const char dummy_str[] = "x"; 182 183 // Try to ensure that both libc and xz locale files have been 184 // loaded when NLS is enabled. 185 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); 186 187 // Try to ensure that iconv data files needed for handling multibyte 188 // characters have been loaded. This is needed at least with glibc. 189 tuklib_mbstr_width(dummy_str, NULL); 190 191 #ifdef HAVE_CAPSICUM 192 // Capsicum needs FreeBSD 10.0 or later. 193 cap_rights_t rights; 194 195 if (cap_enter()) 196 goto error; 197 198 if (cap_rights_limit(src_fd, cap_rights_init(&rights, 199 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) 200 goto error; 201 202 if (src_fd != STDIN_FILENO && cap_rights_limit( 203 STDIN_FILENO, cap_rights_clear(&rights))) 204 goto error; 205 206 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, 207 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, 208 CAP_WRITE, CAP_SEEK))) 209 goto error; 210 211 if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, 212 CAP_WRITE))) 213 goto error; 214 215 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, 216 CAP_EVENT))) 217 goto error; 218 219 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, 220 CAP_WRITE))) 221 goto error; 222 223 #elif defined(HAVE_PLEDGE) 224 // pledge() was introduced in OpenBSD 5.9. 225 // 226 // main() unconditionally calls pledge() with fairly relaxed 227 // promises which work in all situations. Here we make the 228 // sandbox more strict. 229 if (pledge("stdio", "")) 230 goto error; 231 232 (void)src_fd; 233 234 #else 235 # error ENABLE_SANDBOX is defined but no sandboxing method was found. 236 #endif 237 238 // This message is annoying in xz -lvv. 239 //message(V_DEBUG, _("Sandbox was successfully enabled")); 240 return; 241 242 error: 243 #ifdef HAVE_CAPSICUM 244 // If a kernel is configured without capability mode support or 245 // used in an emulator that does not implement the capability 246 // system calls, then the Capsicum system calls will fail and set 247 // errno to ENOSYS. In that case xz will silently run without 248 // the sandbox. 249 if (errno == ENOSYS) 250 return; 251 #endif 252 message_fatal(_("Failed to enable the sandbox")); 253 } 254 #endif // ENABLE_SANDBOX 255 256 257 #ifndef TUKLIB_DOSLIKE 258 /// \brief Waits for input or output to become available or for a signal 259 /// 260 /// This uses the self-pipe trick to avoid a race condition that can occur 261 /// if a signal is caught after user_abort has been checked but before e.g. 262 /// read() has been called. In that situation read() could block unless 263 /// non-blocking I/O is used. With non-blocking I/O something like select() 264 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 265 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 266 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 267 /// old and very portable. 268 static io_wait_ret 269 io_wait(file_pair *pair, int timeout, bool is_reading) 270 { 271 struct pollfd pfd[2]; 272 273 if (is_reading) { 274 pfd[0].fd = pair->src_fd; 275 pfd[0].events = POLLIN; 276 } else { 277 pfd[0].fd = pair->dest_fd; 278 pfd[0].events = POLLOUT; 279 } 280 281 pfd[1].fd = user_abort_pipe[0]; 282 pfd[1].events = POLLIN; 283 284 while (true) { 285 const int ret = poll(pfd, 2, timeout); 286 287 if (user_abort) 288 return IO_WAIT_ERROR; 289 290 if (ret == -1) { 291 if (errno == EINTR || errno == EAGAIN) 292 continue; 293 294 message_error(_("%s: poll() failed: %s"), 295 is_reading ? pair->src_name 296 : pair->dest_name, 297 strerror(errno)); 298 return IO_WAIT_ERROR; 299 } 300 301 if (ret == 0) 302 return IO_WAIT_TIMEOUT; 303 304 if (pfd[0].revents != 0) 305 return IO_WAIT_MORE; 306 } 307 } 308 #endif 309 310 311 /// \brief Unlink a file 312 /// 313 /// This tries to verify that the file being unlinked really is the file that 314 /// we want to unlink by verifying device and inode numbers. There's still 315 /// a small unavoidable race, but this is much better than nothing (the file 316 /// could have been moved/replaced even hours earlier). 317 static void 318 io_unlink(const char *name, const struct stat *known_st) 319 { 320 #if defined(TUKLIB_DOSLIKE) 321 // On DOS-like systems, st_ino is meaningless, so don't bother 322 // testing it. Just silence a compiler warning. 323 (void)known_st; 324 #else 325 struct stat new_st; 326 327 // If --force was used, use stat() instead of lstat(). This way 328 // (de)compressing symlinks works correctly. However, it also means 329 // that xz cannot detect if a regular file foo is renamed to bar 330 // and then a symlink foo -> bar is created. Because of stat() 331 // instead of lstat(), xz will think that foo hasn't been replaced 332 // with another file. Thus, xz will remove foo even though it no 333 // longer is the same file that xz used when it started compressing. 334 // Probably it's not too bad though, so this doesn't need a more 335 // complex fix. 336 const int stat_ret = opt_force 337 ? stat(name, &new_st) : lstat(name, &new_st); 338 339 if (stat_ret 340 # ifdef __VMS 341 // st_ino is an array, and we don't want to 342 // compare st_dev at all. 343 || memcmp(&new_st.st_ino, &known_st->st_ino, 344 sizeof(new_st.st_ino)) != 0 345 # else 346 // Typical POSIX-like system 347 || new_st.st_dev != known_st->st_dev 348 || new_st.st_ino != known_st->st_ino 349 # endif 350 ) 351 // TRANSLATORS: When compression or decompression finishes, 352 // and xz is going to remove the source file, xz first checks 353 // if the source file still exists, and if it does, does its 354 // device and inode numbers match what xz saw when it opened 355 // the source file. If these checks fail, this message is 356 // shown, %s being the filename, and the file is not deleted. 357 // The check for device and inode numbers is there, because 358 // it is possible that the user has put a new file in place 359 // of the original file, and in that case it obviously 360 // shouldn't be removed. 361 message_warning(_("%s: File seems to have been moved, " 362 "not removing"), name); 363 else 364 #endif 365 // There's a race condition between lstat() and unlink() 366 // but at least we have tried to avoid removing wrong file. 367 if (unlink(name)) 368 message_warning(_("%s: Cannot remove: %s"), 369 name, strerror(errno)); 370 371 return; 372 } 373 374 375 /// \brief Copies owner/group and permissions 376 /// 377 /// \todo ACL and EA support 378 /// 379 static void 380 io_copy_attrs(const file_pair *pair) 381 { 382 // Skip chown and chmod on Windows. 383 #ifndef TUKLIB_DOSLIKE 384 // This function is more tricky than you may think at first. 385 // Blindly copying permissions may permit users to access the 386 // destination file who didn't have permission to access the 387 // source file. 388 389 // Try changing the owner of the file. If we aren't root or the owner 390 // isn't already us, fchown() probably doesn't succeed. We warn 391 // about failing fchown() only if we are root. 392 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) 393 && warn_fchown) 394 message_warning(_("%s: Cannot set the file owner: %s"), 395 pair->dest_name, strerror(errno)); 396 397 mode_t mode; 398 399 // With BSD semantics the new dest file may have a group that 400 // does not belong to the user. If the src file has the same gid 401 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails 402 // in this case which seems to be POSIX compliant. As there is 403 // nothing to do, skip the system call. 404 if (pair->dest_st.st_gid != pair->src_st.st_gid 405 && fchown(pair->dest_fd, (uid_t)(-1), 406 pair->src_st.st_gid)) { 407 message_warning(_("%s: Cannot set the file group: %s"), 408 pair->dest_name, strerror(errno)); 409 // We can still safely copy some additional permissions: 410 // `group' must be at least as strict as `other' and 411 // also vice versa. 412 // 413 // NOTE: After this, the owner of the source file may 414 // get additional permissions. This shouldn't be too bad, 415 // because the owner would have had permission to chmod 416 // the original file anyway. 417 mode = ((pair->src_st.st_mode & 0070) >> 3) 418 & (pair->src_st.st_mode & 0007); 419 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 420 } else { 421 // Drop the setuid, setgid, and sticky bits. 422 mode = pair->src_st.st_mode & 0777; 423 } 424 425 if (fchmod(pair->dest_fd, mode)) 426 message_warning(_("%s: Cannot set the file permissions: %s"), 427 pair->dest_name, strerror(errno)); 428 #endif 429 430 // Copy the timestamps. We have several possible ways to do this, of 431 // which some are better in both security and precision. 432 // 433 // First, get the nanosecond part of the timestamps. As of writing, 434 // it's not standardized by POSIX, and there are several names for 435 // the same thing in struct stat. 436 long atime_nsec; 437 long mtime_nsec; 438 439 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 440 // GNU and Solaris 441 atime_nsec = pair->src_st.st_atim.tv_nsec; 442 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 443 444 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 445 // BSD 446 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 447 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 448 449 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 450 // GNU and BSD without extensions 451 atime_nsec = pair->src_st.st_atimensec; 452 mtime_nsec = pair->src_st.st_mtimensec; 453 454 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 455 // Tru64 456 atime_nsec = pair->src_st.st_uatime * 1000; 457 mtime_nsec = pair->src_st.st_umtime * 1000; 458 459 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 460 // UnixWare 461 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 462 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 463 464 # else 465 // Safe fallback 466 atime_nsec = 0; 467 mtime_nsec = 0; 468 # endif 469 470 // Construct a structure to hold the timestamps and call appropriate 471 // function to set the timestamps. 472 #if defined(HAVE_FUTIMENS) 473 // Use nanosecond precision. 474 struct timespec tv[2]; 475 tv[0].tv_sec = pair->src_st.st_atime; 476 tv[0].tv_nsec = atime_nsec; 477 tv[1].tv_sec = pair->src_st.st_mtime; 478 tv[1].tv_nsec = mtime_nsec; 479 480 (void)futimens(pair->dest_fd, tv); 481 482 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 483 // Use microsecond precision. 484 struct timeval tv[2]; 485 tv[0].tv_sec = pair->src_st.st_atime; 486 tv[0].tv_usec = atime_nsec / 1000; 487 tv[1].tv_sec = pair->src_st.st_mtime; 488 tv[1].tv_usec = mtime_nsec / 1000; 489 490 # if defined(HAVE_FUTIMES) 491 (void)futimes(pair->dest_fd, tv); 492 # elif defined(HAVE_FUTIMESAT) 493 (void)futimesat(pair->dest_fd, NULL, tv); 494 # else 495 // Argh, no function to use a file descriptor to set the timestamp. 496 (void)utimes(pair->dest_name, tv); 497 # endif 498 499 #elif defined(HAVE__FUTIME) 500 // Use one-second precision with Windows-specific _futime(). 501 // We could use utime() too except that for some reason the 502 // timestamp will get reset at close(). With _futime() it works. 503 // This struct cannot be const as _futime() takes a non-const pointer. 504 struct _utimbuf buf = { 505 .actime = pair->src_st.st_atime, 506 .modtime = pair->src_st.st_mtime, 507 }; 508 509 // Avoid warnings. 510 (void)atime_nsec; 511 (void)mtime_nsec; 512 513 (void)_futime(pair->dest_fd, &buf); 514 515 #elif defined(HAVE_UTIME) 516 // Use one-second precision. utime() doesn't support using file 517 // descriptor either. Some systems have broken utime() prototype 518 // so don't make this const. 519 struct utimbuf buf = { 520 .actime = pair->src_st.st_atime, 521 .modtime = pair->src_st.st_mtime, 522 }; 523 524 // Avoid warnings. 525 (void)atime_nsec; 526 (void)mtime_nsec; 527 528 (void)utime(pair->dest_name, &buf); 529 #endif 530 531 return; 532 } 533 534 535 /// Opens the source file. Returns false on success, true on error. 536 static bool 537 io_open_src_real(file_pair *pair) 538 { 539 // There's nothing to open when reading from stdin. 540 if (pair->src_name == stdin_filename) { 541 pair->src_fd = STDIN_FILENO; 542 #ifdef TUKLIB_DOSLIKE 543 setmode(STDIN_FILENO, O_BINARY); 544 #else 545 // Try to set stdin to non-blocking mode. It won't work 546 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 547 // case we proceed as if stdin were non-blocking anyway 548 // (in case of /dev/null it will be in practice). The 549 // same applies to stdout in io_open_dest_real(). 550 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 551 if (stdin_flags == -1) { 552 message_error(_("Error getting the file status flags " 553 "from standard input: %s"), 554 strerror(errno)); 555 return true; 556 } 557 558 if ((stdin_flags & O_NONBLOCK) == 0 559 && fcntl(STDIN_FILENO, F_SETFL, 560 stdin_flags | O_NONBLOCK) != -1) 561 restore_stdin_flags = true; 562 #endif 563 #ifdef HAVE_POSIX_FADVISE 564 // It will fail if stdin is a pipe and that's fine. 565 (void)posix_fadvise(STDIN_FILENO, 0, 0, 566 opt_mode == MODE_LIST 567 ? POSIX_FADV_RANDOM 568 : POSIX_FADV_SEQUENTIAL); 569 #endif 570 return false; 571 } 572 573 // Symlinks are not followed unless writing to stdout or --force 574 // or --keep was used. 575 const bool follow_symlinks 576 = opt_stdout || opt_force || opt_keep_original; 577 578 // We accept only regular files if we are writing the output 579 // to disk too. bzip2 allows overriding this with --force but 580 // gzip and xz don't. 581 const bool reg_files_only = !opt_stdout; 582 583 // Flags for open() 584 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 585 586 #ifndef TUKLIB_DOSLIKE 587 // Use non-blocking I/O: 588 // - It prevents blocking when opening FIFOs and some other 589 // special files, which is good if we want to accept only 590 // regular files. 591 // - It can help avoiding some race conditions with signal handling. 592 flags |= O_NONBLOCK; 593 #endif 594 595 #if defined(O_NOFOLLOW) 596 if (!follow_symlinks) 597 flags |= O_NOFOLLOW; 598 #elif !defined(TUKLIB_DOSLIKE) 599 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 600 // by POSIX). Check for symlinks with a separate lstat() on 601 // these systems. 602 if (!follow_symlinks) { 603 struct stat st; 604 if (lstat(pair->src_name, &st)) { 605 message_error(_("%s: %s"), pair->src_name, 606 strerror(errno)); 607 return true; 608 609 } else if (S_ISLNK(st.st_mode)) { 610 message_warning(_("%s: Is a symbolic link, " 611 "skipping"), pair->src_name); 612 return true; 613 } 614 } 615 #else 616 // Avoid warnings. 617 (void)follow_symlinks; 618 #endif 619 620 // Try to open the file. Signals have been blocked so EINTR shouldn't 621 // be possible. 622 pair->src_fd = open(pair->src_name, flags); 623 624 if (pair->src_fd == -1) { 625 // Signals (that have a signal handler) have been blocked. 626 assert(errno != EINTR); 627 628 #ifdef O_NOFOLLOW 629 // Give an understandable error message if the reason 630 // for failing was that the file was a symbolic link. 631 // 632 // Note that at least Linux, OpenBSD, Solaris, and Darwin 633 // use ELOOP to indicate that O_NOFOLLOW was the reason 634 // that open() failed. Because there may be 635 // directories in the pathname, ELOOP may occur also 636 // because of a symlink loop in the directory part. 637 // So ELOOP doesn't tell us what actually went wrong, 638 // and this stupidity went into POSIX-1.2008 too. 639 // 640 // FreeBSD associates EMLINK with O_NOFOLLOW and 641 // Tru64 uses ENOTSUP. We use these directly here 642 // and skip the lstat() call and the associated race. 643 // I want to hear if there are other kernels that 644 // fail with something else than ELOOP with O_NOFOLLOW. 645 bool was_symlink = false; 646 647 # if defined(__FreeBSD__) || defined(__DragonFly__) 648 if (errno == EMLINK) 649 was_symlink = true; 650 651 # elif defined(__digital__) && defined(__unix__) 652 if (errno == ENOTSUP) 653 was_symlink = true; 654 655 # elif defined(__NetBSD__) 656 if (errno == EFTYPE) 657 was_symlink = true; 658 659 # else 660 if (errno == ELOOP && !follow_symlinks) { 661 const int saved_errno = errno; 662 struct stat st; 663 if (lstat(pair->src_name, &st) == 0 664 && S_ISLNK(st.st_mode)) 665 was_symlink = true; 666 667 errno = saved_errno; 668 } 669 # endif 670 671 if (was_symlink) 672 message_warning(_("%s: Is a symbolic link, " 673 "skipping"), pair->src_name); 674 else 675 #endif 676 // Something else than O_NOFOLLOW failing 677 // (assuming that the race conditions didn't 678 // confuse us). 679 message_error(_("%s: %s"), pair->src_name, 680 strerror(errno)); 681 682 return true; 683 } 684 685 // Stat the source file. We need the result also when we copy 686 // the permissions, and when unlinking. 687 // 688 // NOTE: Use stat() instead of fstat() with DJGPP, because 689 // then we have a better chance to get st_ino value that can 690 // be used in io_open_dest_real() to prevent overwriting the 691 // source file. 692 #ifdef __DJGPP__ 693 if (stat(pair->src_name, &pair->src_st)) 694 goto error_msg; 695 #else 696 if (fstat(pair->src_fd, &pair->src_st)) 697 goto error_msg; 698 #endif 699 700 if (S_ISDIR(pair->src_st.st_mode)) { 701 message_warning(_("%s: Is a directory, skipping"), 702 pair->src_name); 703 goto error; 704 } 705 706 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 707 message_warning(_("%s: Not a regular file, skipping"), 708 pair->src_name); 709 goto error; 710 } 711 712 #ifndef TUKLIB_DOSLIKE 713 if (reg_files_only && !opt_force && !opt_keep_original) { 714 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 715 // gzip rejects setuid and setgid files even 716 // when --force was used. bzip2 doesn't check 717 // for them, but calls fchown() after fchmod(), 718 // and many systems automatically drop setuid 719 // and setgid bits there. 720 // 721 // We accept setuid and setgid files if 722 // --force or --keep was used. We drop these bits 723 // explicitly in io_copy_attr(). 724 message_warning(_("%s: File has setuid or " 725 "setgid bit set, skipping"), 726 pair->src_name); 727 goto error; 728 } 729 730 if (pair->src_st.st_mode & S_ISVTX) { 731 message_warning(_("%s: File has sticky bit " 732 "set, skipping"), 733 pair->src_name); 734 goto error; 735 } 736 737 if (pair->src_st.st_nlink > 1) { 738 message_warning(_("%s: Input file has more " 739 "than one hard link, " 740 "skipping"), pair->src_name); 741 goto error; 742 } 743 } 744 745 // If it is something else than a regular file, wait until 746 // there is input available. This way reading from FIFOs 747 // will work when open() is used with O_NONBLOCK. 748 if (!S_ISREG(pair->src_st.st_mode)) { 749 signals_unblock(); 750 const io_wait_ret ret = io_wait(pair, -1, true); 751 signals_block(); 752 753 if (ret != IO_WAIT_MORE) 754 goto error; 755 } 756 #endif 757 758 #ifdef HAVE_POSIX_FADVISE 759 // It will fail with some special files like FIFOs but that is fine. 760 (void)posix_fadvise(pair->src_fd, 0, 0, 761 opt_mode == MODE_LIST 762 ? POSIX_FADV_RANDOM 763 : POSIX_FADV_SEQUENTIAL); 764 #endif 765 766 return false; 767 768 error_msg: 769 message_error(_("%s: %s"), pair->src_name, strerror(errno)); 770 error: 771 (void)close(pair->src_fd); 772 return true; 773 } 774 775 776 extern file_pair * 777 io_open_src(const char *src_name) 778 { 779 if (src_name[0] == '\0') { 780 message_error(_("Empty filename, skipping")); 781 return NULL; 782 } 783 784 // Since we have only one file open at a time, we can use 785 // a statically allocated structure. 786 static file_pair pair; 787 788 // This implicitly also initializes src_st.st_size to zero 789 // which is expected to be <= 0 by default. fstat() isn't 790 // called when reading from standard input but src_st.st_size 791 // is still read. 792 pair = (file_pair){ 793 .src_name = src_name, 794 .dest_name = NULL, 795 .src_fd = -1, 796 .dest_fd = -1, 797 .src_eof = false, 798 .src_has_seen_input = false, 799 .flush_needed = false, 800 .dest_try_sparse = false, 801 .dest_pending_sparse = 0, 802 }; 803 804 // Block the signals, for which we have a custom signal handler, so 805 // that we don't need to worry about EINTR. 806 signals_block(); 807 const bool error = io_open_src_real(&pair); 808 signals_unblock(); 809 810 #ifdef ENABLE_SANDBOX 811 if (!error) 812 io_sandbox_enter(pair.src_fd); 813 #endif 814 815 return error ? NULL : &pair; 816 } 817 818 819 /// \brief Closes source file of the file_pair structure 820 /// 821 /// \param pair File whose src_fd should be closed 822 /// \param success If true, the file will be removed from the disk if 823 /// closing succeeds and --keep hasn't been used. 824 static void 825 io_close_src(file_pair *pair, bool success) 826 { 827 #ifndef TUKLIB_DOSLIKE 828 if (restore_stdin_flags) { 829 assert(pair->src_fd == STDIN_FILENO); 830 831 restore_stdin_flags = false; 832 833 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 834 message_error(_("Error restoring the status flags " 835 "to standard input: %s"), 836 strerror(errno)); 837 } 838 #endif 839 840 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 841 // Close the file before possibly unlinking it. On DOS-like 842 // systems this is always required since unlinking will fail 843 // if the file is open. On POSIX systems it usually works 844 // to unlink open files, but in some cases it doesn't and 845 // one gets EBUSY in errno. 846 // 847 // xz 5.2.2 and older unlinked the file before closing it 848 // (except on DOS-like systems). The old code didn't handle 849 // EBUSY and could fail e.g. on some CIFS shares. The 850 // advantage of unlinking before closing is negligible 851 // (avoids a race between close() and stat()/lstat() and 852 // unlink()), so let's keep this simple. 853 (void)close(pair->src_fd); 854 855 if (success && !opt_keep_original) 856 io_unlink(pair->src_name, &pair->src_st); 857 } 858 859 return; 860 } 861 862 863 static bool 864 io_open_dest_real(file_pair *pair) 865 { 866 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 867 // We don't modify or free() this. 868 pair->dest_name = (char *)"(stdout)"; 869 pair->dest_fd = STDOUT_FILENO; 870 #ifdef TUKLIB_DOSLIKE 871 setmode(STDOUT_FILENO, O_BINARY); 872 #else 873 // Try to set O_NONBLOCK if it isn't already set. 874 // If it fails, we assume that stdout is non-blocking 875 // in practice. See the comments in io_open_src_real() 876 // for similar situation with stdin. 877 // 878 // NOTE: O_APPEND may be unset later in this function 879 // and it relies on stdout_flags being set here. 880 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 881 if (stdout_flags == -1) { 882 message_error(_("Error getting the file status flags " 883 "from standard output: %s"), 884 strerror(errno)); 885 return true; 886 } 887 888 if ((stdout_flags & O_NONBLOCK) == 0 889 && fcntl(STDOUT_FILENO, F_SETFL, 890 stdout_flags | O_NONBLOCK) != -1) 891 restore_stdout_flags = true; 892 #endif 893 } else { 894 pair->dest_name = suffix_get_dest_name(pair->src_name); 895 if (pair->dest_name == NULL) 896 return true; 897 898 #ifdef __DJGPP__ 899 struct stat st; 900 if (stat(pair->dest_name, &st) == 0) { 901 // Check that it isn't a special file like "prn". 902 if (st.st_dev == -1) { 903 message_error("%s: Refusing to write to " 904 "a DOS special file", 905 pair->dest_name); 906 free(pair->dest_name); 907 return true; 908 } 909 910 // Check that we aren't overwriting the source file. 911 if (st.st_dev == pair->src_st.st_dev 912 && st.st_ino == pair->src_st.st_ino) { 913 message_error("%s: Output file is the same " 914 "as the input file", 915 pair->dest_name); 916 free(pair->dest_name); 917 return true; 918 } 919 } 920 #endif 921 922 // If --force was used, unlink the target file first. 923 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 924 message_error(_("%s: Cannot remove: %s"), 925 pair->dest_name, strerror(errno)); 926 free(pair->dest_name); 927 return true; 928 } 929 930 // Open the file. 931 int flags = O_WRONLY | O_BINARY | O_NOCTTY 932 | O_CREAT | O_EXCL; 933 #ifndef TUKLIB_DOSLIKE 934 flags |= O_NONBLOCK; 935 #endif 936 const mode_t mode = S_IRUSR | S_IWUSR; 937 pair->dest_fd = open(pair->dest_name, flags, mode); 938 939 if (pair->dest_fd == -1) { 940 message_error(_("%s: %s"), pair->dest_name, 941 strerror(errno)); 942 free(pair->dest_name); 943 return true; 944 } 945 } 946 947 if (fstat(pair->dest_fd, &pair->dest_st)) { 948 // If fstat() really fails, we have a safe fallback here. 949 #if defined(__VMS) 950 pair->dest_st.st_ino[0] = 0; 951 pair->dest_st.st_ino[1] = 0; 952 pair->dest_st.st_ino[2] = 0; 953 #else 954 pair->dest_st.st_dev = 0; 955 pair->dest_st.st_ino = 0; 956 #endif 957 } 958 #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__) 959 // Check that the output file is a regular file. We open with O_EXCL 960 // but that doesn't prevent open()/_open() on Windows from opening 961 // files like "con" or "nul". 962 // 963 // With DJGPP this check is done with stat() even before opening 964 // the output file. That method or a variant of it doesn't work on 965 // Windows because on Windows stat()/_stat64() sets st.st_mode so 966 // that S_ISREG(st.st_mode) will be true even for special files. 967 // With fstat()/_fstat64() it works. 968 else if (pair->dest_fd != STDOUT_FILENO 969 && !S_ISREG(pair->dest_st.st_mode)) { 970 message_error("%s: Destination is not a regular file", 971 pair->dest_name); 972 973 // dest_fd needs to be reset to -1 to keep io_close() working. 974 (void)close(pair->dest_fd); 975 pair->dest_fd = -1; 976 977 free(pair->dest_name); 978 return true; 979 } 980 #elif !defined(TUKLIB_DOSLIKE) 981 else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 982 // When writing to standard output, we need to be extra 983 // careful: 984 // - It may be connected to something else than 985 // a regular file. 986 // - We aren't necessarily writing to a new empty file 987 // or to the end of an existing file. 988 // - O_APPEND may be active. 989 // 990 // TODO: I'm keeping this disabled for DOS-like systems 991 // for now. FAT doesn't support sparse files, but NTFS 992 // does, so maybe this should be enabled on Windows after 993 // some testing. 994 if (pair->dest_fd == STDOUT_FILENO) { 995 if (!S_ISREG(pair->dest_st.st_mode)) 996 return false; 997 998 if (stdout_flags & O_APPEND) { 999 // Creating a sparse file is not possible 1000 // when O_APPEND is active (it's used by 1001 // shell's >> redirection). As I understand 1002 // it, it is safe to temporarily disable 1003 // O_APPEND in xz, because if someone 1004 // happened to write to the same file at the 1005 // same time, results would be bad anyway 1006 // (users shouldn't assume that xz uses any 1007 // specific block size when writing data). 1008 // 1009 // The write position may be something else 1010 // than the end of the file, so we must fix 1011 // it to start writing at the end of the file 1012 // to imitate O_APPEND. 1013 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 1014 return false; 1015 1016 // Construct the new file status flags. 1017 // If O_NONBLOCK was set earlier in this 1018 // function, it must be kept here too. 1019 int flags = stdout_flags & ~O_APPEND; 1020 if (restore_stdout_flags) 1021 flags |= O_NONBLOCK; 1022 1023 // If this fcntl() fails, we continue but won't 1024 // try to create sparse output. The original 1025 // flags will still be restored if needed (to 1026 // unset O_NONBLOCK) when the file is finished. 1027 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 1028 return false; 1029 1030 // Disabling O_APPEND succeeded. Mark 1031 // that the flags should be restored 1032 // in io_close_dest(). (This may have already 1033 // been set when enabling O_NONBLOCK.) 1034 restore_stdout_flags = true; 1035 1036 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 1037 != pair->dest_st.st_size) { 1038 // Writing won't start exactly at the end 1039 // of the file. We cannot use sparse output, 1040 // because it would probably corrupt the file. 1041 return false; 1042 } 1043 } 1044 1045 pair->dest_try_sparse = true; 1046 } 1047 #endif 1048 1049 return false; 1050 } 1051 1052 1053 extern bool 1054 io_open_dest(file_pair *pair) 1055 { 1056 signals_block(); 1057 const bool ret = io_open_dest_real(pair); 1058 signals_unblock(); 1059 return ret; 1060 } 1061 1062 1063 /// \brief Closes destination file of the file_pair structure 1064 /// 1065 /// \param pair File whose dest_fd should be closed 1066 /// \param success If false, the file will be removed from the disk. 1067 /// 1068 /// \return Zero if closing succeeds. On error, -1 is returned and 1069 /// error message printed. 1070 static bool 1071 io_close_dest(file_pair *pair, bool success) 1072 { 1073 #ifndef TUKLIB_DOSLIKE 1074 // If io_open_dest() has disabled O_APPEND, restore it here. 1075 if (restore_stdout_flags) { 1076 assert(pair->dest_fd == STDOUT_FILENO); 1077 1078 restore_stdout_flags = false; 1079 1080 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 1081 message_error(_("Error restoring the O_APPEND flag " 1082 "to standard output: %s"), 1083 strerror(errno)); 1084 return true; 1085 } 1086 } 1087 #endif 1088 1089 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 1090 return false; 1091 1092 if (close(pair->dest_fd)) { 1093 message_error(_("%s: Closing the file failed: %s"), 1094 pair->dest_name, strerror(errno)); 1095 1096 // Closing destination file failed, so we cannot trust its 1097 // contents. Get rid of junk: 1098 io_unlink(pair->dest_name, &pair->dest_st); 1099 free(pair->dest_name); 1100 return true; 1101 } 1102 1103 // If the operation using this file wasn't successful, we git rid 1104 // of the junk file. 1105 if (!success) 1106 io_unlink(pair->dest_name, &pair->dest_st); 1107 1108 free(pair->dest_name); 1109 1110 return false; 1111 } 1112 1113 1114 extern void 1115 io_close(file_pair *pair, bool success) 1116 { 1117 // Take care of sparseness at the end of the output file. 1118 if (success && pair->dest_try_sparse 1119 && pair->dest_pending_sparse > 0) { 1120 // Seek forward one byte less than the size of the pending 1121 // hole, then write one zero-byte. This way the file grows 1122 // to its correct size. An alternative would be to use 1123 // ftruncate() but that isn't portable enough (e.g. it 1124 // doesn't work with FAT on Linux; FAT isn't that important 1125 // since it doesn't support sparse files anyway, but we don't 1126 // want to create corrupt files on it). 1127 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 1128 SEEK_CUR) == -1) { 1129 message_error(_("%s: Seeking failed when trying " 1130 "to create a sparse file: %s"), 1131 pair->dest_name, strerror(errno)); 1132 success = false; 1133 } else { 1134 const uint8_t zero[1] = { '\0' }; 1135 if (io_write_buf(pair, zero, 1)) 1136 success = false; 1137 } 1138 } 1139 1140 signals_block(); 1141 1142 // Copy the file attributes. We need to skip this if destination 1143 // file isn't open or it is standard output. 1144 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 1145 io_copy_attrs(pair); 1146 1147 // Close the destination first. If it fails, we must not remove 1148 // the source file! 1149 if (io_close_dest(pair, success)) 1150 success = false; 1151 1152 // Close the source file, and unlink it if the operation using this 1153 // file pair was successful and we haven't requested to keep the 1154 // source file. 1155 io_close_src(pair, success); 1156 1157 signals_unblock(); 1158 1159 return; 1160 } 1161 1162 1163 extern void 1164 io_fix_src_pos(file_pair *pair, size_t rewind_size) 1165 { 1166 assert(rewind_size <= IO_BUFFER_SIZE); 1167 1168 if (rewind_size > 0) { 1169 // This doesn't need to work on unseekable file descriptors, 1170 // so just ignore possible errors. 1171 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 1172 } 1173 1174 return; 1175 } 1176 1177 1178 extern size_t 1179 io_read(file_pair *pair, io_buf *buf, size_t size) 1180 { 1181 assert(size <= IO_BUFFER_SIZE); 1182 1183 size_t pos = 0; 1184 1185 while (pos < size) { 1186 const ssize_t amount = read( 1187 pair->src_fd, buf->u8 + pos, size - pos); 1188 1189 if (amount == 0) { 1190 pair->src_eof = true; 1191 break; 1192 } 1193 1194 if (amount == -1) { 1195 if (errno == EINTR) { 1196 if (user_abort) 1197 return SIZE_MAX; 1198 1199 continue; 1200 } 1201 1202 #ifndef TUKLIB_DOSLIKE 1203 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1204 // Disable the flush-timeout if no input has 1205 // been seen since the previous flush and thus 1206 // there would be nothing to flush after the 1207 // timeout expires (avoids busy waiting). 1208 const int timeout = pair->src_has_seen_input 1209 ? mytime_get_flush_timeout() 1210 : -1; 1211 1212 switch (io_wait(pair, timeout, true)) { 1213 case IO_WAIT_MORE: 1214 continue; 1215 1216 case IO_WAIT_ERROR: 1217 return SIZE_MAX; 1218 1219 case IO_WAIT_TIMEOUT: 1220 pair->flush_needed = true; 1221 return pos; 1222 1223 default: 1224 message_bug(); 1225 } 1226 } 1227 #endif 1228 1229 message_error(_("%s: Read error: %s"), 1230 pair->src_name, strerror(errno)); 1231 1232 return SIZE_MAX; 1233 } 1234 1235 pos += (size_t)(amount); 1236 1237 if (!pair->src_has_seen_input) { 1238 pair->src_has_seen_input = true; 1239 mytime_set_flush_time(); 1240 } 1241 } 1242 1243 return pos; 1244 } 1245 1246 1247 extern bool 1248 io_seek_src(file_pair *pair, uint64_t pos) 1249 { 1250 // Caller must not attempt to seek past the end of the input file 1251 // (seeking to 100 in a 100-byte file is seeking to the end of 1252 // the file, not past the end of the file, and thus that is allowed). 1253 // 1254 // This also validates that pos can be safely cast to off_t. 1255 if (pos > (uint64_t)(pair->src_st.st_size)) 1256 message_bug(); 1257 1258 if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { 1259 message_error(_("%s: Error seeking the file: %s"), 1260 pair->src_name, strerror(errno)); 1261 return true; 1262 } 1263 1264 pair->src_eof = false; 1265 1266 return false; 1267 } 1268 1269 1270 extern bool 1271 io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) 1272 { 1273 // Using lseek() and read() is more portable than pread() and 1274 // for us it is as good as real pread(). 1275 if (io_seek_src(pair, pos)) 1276 return true; 1277 1278 const size_t amount = io_read(pair, buf, size); 1279 if (amount == SIZE_MAX) 1280 return true; 1281 1282 if (amount != size) { 1283 message_error(_("%s: Unexpected end of file"), 1284 pair->src_name); 1285 return true; 1286 } 1287 1288 return false; 1289 } 1290 1291 1292 static bool 1293 is_sparse(const io_buf *buf) 1294 { 1295 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1296 1297 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1298 if (buf->u64[i] != 0) 1299 return false; 1300 1301 return true; 1302 } 1303 1304 1305 static bool 1306 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1307 { 1308 assert(size <= IO_BUFFER_SIZE); 1309 1310 while (size > 0) { 1311 const ssize_t amount = write(pair->dest_fd, buf, size); 1312 if (amount == -1) { 1313 if (errno == EINTR) { 1314 if (user_abort) 1315 return true; 1316 1317 continue; 1318 } 1319 1320 #ifndef TUKLIB_DOSLIKE 1321 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1322 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1323 continue; 1324 1325 return true; 1326 } 1327 #endif 1328 1329 // Handle broken pipe specially. gzip and bzip2 1330 // don't print anything on SIGPIPE. In addition, 1331 // gzip --quiet uses exit status 2 (warning) on 1332 // broken pipe instead of whatever raise(SIGPIPE) 1333 // would make it return. It is there to hide "Broken 1334 // pipe" message on some old shells (probably old 1335 // GNU bash). 1336 // 1337 // We don't do anything special with --quiet, which 1338 // is what bzip2 does too. If we get SIGPIPE, we 1339 // will handle it like other signals by setting 1340 // user_abort, and get EPIPE here. 1341 if (errno != EPIPE) 1342 message_error(_("%s: Write error: %s"), 1343 pair->dest_name, strerror(errno)); 1344 1345 return true; 1346 } 1347 1348 buf += (size_t)(amount); 1349 size -= (size_t)(amount); 1350 } 1351 1352 return false; 1353 } 1354 1355 1356 extern bool 1357 io_write(file_pair *pair, const io_buf *buf, size_t size) 1358 { 1359 assert(size <= IO_BUFFER_SIZE); 1360 1361 if (pair->dest_try_sparse) { 1362 // Check if the block is sparse (contains only zeros). If it 1363 // sparse, we just store the amount and return. We will take 1364 // care of actually skipping over the hole when we hit the 1365 // next data block or close the file. 1366 // 1367 // Since io_close() requires that dest_pending_sparse > 0 1368 // if the file ends with sparse block, we must also return 1369 // if size == 0 to avoid doing the lseek(). 1370 if (size == IO_BUFFER_SIZE) { 1371 // Even if the block was sparse, treat it as non-sparse 1372 // if the pending sparse amount is large compared to 1373 // the size of off_t. In practice this only matters 1374 // on 32-bit systems where off_t isn't always 64 bits. 1375 const off_t pending_max 1376 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); 1377 if (is_sparse(buf) && pair->dest_pending_sparse 1378 < pending_max) { 1379 pair->dest_pending_sparse += (off_t)(size); 1380 return false; 1381 } 1382 } else if (size == 0) { 1383 return false; 1384 } 1385 1386 // This is not a sparse block. If we have a pending hole, 1387 // skip it now. 1388 if (pair->dest_pending_sparse > 0) { 1389 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1390 SEEK_CUR) == -1) { 1391 message_error(_("%s: Seeking failed when " 1392 "trying to create a sparse " 1393 "file: %s"), pair->dest_name, 1394 strerror(errno)); 1395 return true; 1396 } 1397 1398 pair->dest_pending_sparse = 0; 1399 } 1400 } 1401 1402 return io_write_buf(pair, buf->u8, size); 1403 } 1404