1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE__FUTIME) 27 # include <sys/utime.h> 28 #elif defined(HAVE_UTIME) 29 # include <utime.h> 30 #endif 31 32 #ifdef HAVE_CAPSICUM 33 # ifdef HAVE_SYS_CAPSICUM_H 34 # include <sys/capsicum.h> 35 # else 36 # include <sys/capability.h> 37 # endif 38 #endif 39 40 #include "tuklib_open_stdxxx.h" 41 42 #ifndef O_BINARY 43 # define O_BINARY 0 44 #endif 45 46 #ifndef O_NOCTTY 47 # define O_NOCTTY 0 48 #endif 49 50 // Using this macro to silence a warning from gcc -Wlogical-op. 51 #if EAGAIN == EWOULDBLOCK 52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) 53 #else 54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ 55 ((e) == EAGAIN || (e) == EWOULDBLOCK) 56 #endif 57 58 59 typedef enum { 60 IO_WAIT_MORE, // Reading or writing is possible. 61 IO_WAIT_ERROR, // Error or user_abort 62 IO_WAIT_TIMEOUT, // poll() timed out 63 } io_wait_ret; 64 65 66 /// If true, try to create sparse files when decompressing. 67 static bool try_sparse = true; 68 69 #ifdef ENABLE_SANDBOX 70 /// True if the conditions for sandboxing (described in main()) have been met. 71 static bool sandbox_allowed = false; 72 #endif 73 74 #ifndef TUKLIB_DOSLIKE 75 /// File status flags of standard input. This is used by io_open_src() 76 /// and io_close_src(). 77 static int stdin_flags; 78 static bool restore_stdin_flags = false; 79 80 /// Original file status flags of standard output. This is used by 81 /// io_open_dest() and io_close_dest() to save and restore the flags. 82 static int stdout_flags; 83 static bool restore_stdout_flags = false; 84 85 /// Self-pipe used together with the user_abort variable to avoid 86 /// race conditions with signal handling. 87 static int user_abort_pipe[2]; 88 #endif 89 90 91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 92 93 94 extern void 95 io_init(void) 96 { 97 // Make sure that stdin, stdout, and stderr are connected to 98 // a valid file descriptor. Exit immediately with exit code ERROR 99 // if we cannot make the file descriptors valid. Maybe we should 100 // print an error message, but our stderr could be screwed anyway. 101 tuklib_open_stdxxx(E_ERROR); 102 103 #ifndef TUKLIB_DOSLIKE 104 // If fchown() fails setting the owner, we warn about it only if 105 // we are root. 106 warn_fchown = geteuid() == 0; 107 108 // Create a pipe for the self-pipe trick. 109 if (pipe(user_abort_pipe)) 110 message_fatal(_("Error creating a pipe: %s"), 111 strerror(errno)); 112 113 // Make both ends of the pipe non-blocking. 114 for (unsigned i = 0; i < 2; ++i) { 115 int flags = fcntl(user_abort_pipe[i], F_GETFL); 116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 117 flags | O_NONBLOCK) == -1) 118 message_fatal(_("Error creating a pipe: %s"), 119 strerror(errno)); 120 } 121 #endif 122 123 #ifdef __DJGPP__ 124 // Avoid doing useless things when statting files. 125 // This isn't important but doesn't hurt. 126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 127 #endif 128 129 return; 130 } 131 132 133 #ifndef TUKLIB_DOSLIKE 134 extern void 135 io_write_to_user_abort_pipe(void) 136 { 137 // If the write() fails, it's probably due to the pipe being full. 138 // Failing in that case is fine. If the reason is something else, 139 // there's not much we can do since this is called in a signal 140 // handler. So ignore the errors and try to avoid warnings with 141 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 142 uint8_t b = '\0'; 143 const int ret = write(user_abort_pipe[1], &b, 1); 144 (void)ret; 145 return; 146 } 147 #endif 148 149 150 extern void 151 io_no_sparse(void) 152 { 153 try_sparse = false; 154 return; 155 } 156 157 158 #ifdef ENABLE_SANDBOX 159 extern void 160 io_allow_sandbox(void) 161 { 162 sandbox_allowed = true; 163 return; 164 } 165 166 167 /// Enables operating-system-specific sandbox if it is possible. 168 /// src_fd is the file descriptor of the input file. 169 static void 170 io_sandbox_enter(int src_fd) 171 { 172 if (!sandbox_allowed) { 173 message(V_DEBUG, _("Sandbox is disabled due " 174 "to incompatible command line arguments")); 175 return; 176 } 177 178 const char dummy_str[] = "x"; 179 180 // Try to ensure that both libc and xz locale files have been 181 // loaded when NLS is enabled. 182 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); 183 184 // Try to ensure that iconv data files needed for handling multibyte 185 // characters have been loaded. This is needed at least with glibc. 186 tuklib_mbstr_width(dummy_str, NULL); 187 188 #ifdef HAVE_CAPSICUM 189 // Capsicum needs FreeBSD 10.0 or later. 190 cap_rights_t rights; 191 192 if (cap_rights_limit(src_fd, cap_rights_init(&rights, 193 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) 194 goto error; 195 196 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, 197 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, 198 CAP_WRITE, CAP_SEEK))) 199 goto error; 200 201 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, 202 CAP_EVENT))) 203 goto error; 204 205 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, 206 CAP_WRITE))) 207 goto error; 208 209 if (cap_enter()) 210 goto error; 211 212 #else 213 # error ENABLE_SANDBOX is defined but no sandboxing method was found. 214 #endif 215 216 message(V_DEBUG, _("Sandbox was successfully enabled")); 217 return; 218 219 error: 220 message(V_DEBUG, _("Failed to enable the sandbox")); 221 } 222 #endif // ENABLE_SANDBOX 223 224 225 #ifndef TUKLIB_DOSLIKE 226 /// \brief Waits for input or output to become available or for a signal 227 /// 228 /// This uses the self-pipe trick to avoid a race condition that can occur 229 /// if a signal is caught after user_abort has been checked but before e.g. 230 /// read() has been called. In that situation read() could block unless 231 /// non-blocking I/O is used. With non-blocking I/O something like select() 232 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 233 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 234 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 235 /// old and very portable. 236 static io_wait_ret 237 io_wait(file_pair *pair, int timeout, bool is_reading) 238 { 239 struct pollfd pfd[2]; 240 241 if (is_reading) { 242 pfd[0].fd = pair->src_fd; 243 pfd[0].events = POLLIN; 244 } else { 245 pfd[0].fd = pair->dest_fd; 246 pfd[0].events = POLLOUT; 247 } 248 249 pfd[1].fd = user_abort_pipe[0]; 250 pfd[1].events = POLLIN; 251 252 while (true) { 253 const int ret = poll(pfd, 2, timeout); 254 255 if (user_abort) 256 return IO_WAIT_ERROR; 257 258 if (ret == -1) { 259 if (errno == EINTR || errno == EAGAIN) 260 continue; 261 262 message_error(_("%s: poll() failed: %s"), 263 is_reading ? pair->src_name 264 : pair->dest_name, 265 strerror(errno)); 266 return IO_WAIT_ERROR; 267 } 268 269 if (ret == 0) { 270 assert(opt_flush_timeout != 0); 271 flush_needed = true; 272 return IO_WAIT_TIMEOUT; 273 } 274 275 if (pfd[0].revents != 0) 276 return IO_WAIT_MORE; 277 } 278 } 279 #endif 280 281 282 /// \brief Unlink a file 283 /// 284 /// This tries to verify that the file being unlinked really is the file that 285 /// we want to unlink by verifying device and inode numbers. There's still 286 /// a small unavoidable race, but this is much better than nothing (the file 287 /// could have been moved/replaced even hours earlier). 288 static void 289 io_unlink(const char *name, const struct stat *known_st) 290 { 291 #if defined(TUKLIB_DOSLIKE) 292 // On DOS-like systems, st_ino is meaningless, so don't bother 293 // testing it. Just silence a compiler warning. 294 (void)known_st; 295 #else 296 struct stat new_st; 297 298 // If --force was used, use stat() instead of lstat(). This way 299 // (de)compressing symlinks works correctly. However, it also means 300 // that xz cannot detect if a regular file foo is renamed to bar 301 // and then a symlink foo -> bar is created. Because of stat() 302 // instead of lstat(), xz will think that foo hasn't been replaced 303 // with another file. Thus, xz will remove foo even though it no 304 // longer is the same file that xz used when it started compressing. 305 // Probably it's not too bad though, so this doesn't need a more 306 // complex fix. 307 const int stat_ret = opt_force 308 ? stat(name, &new_st) : lstat(name, &new_st); 309 310 if (stat_ret 311 # ifdef __VMS 312 // st_ino is an array, and we don't want to 313 // compare st_dev at all. 314 || memcmp(&new_st.st_ino, &known_st->st_ino, 315 sizeof(new_st.st_ino)) != 0 316 # else 317 // Typical POSIX-like system 318 || new_st.st_dev != known_st->st_dev 319 || new_st.st_ino != known_st->st_ino 320 # endif 321 ) 322 // TRANSLATORS: When compression or decompression finishes, 323 // and xz is going to remove the source file, xz first checks 324 // if the source file still exists, and if it does, does its 325 // device and inode numbers match what xz saw when it opened 326 // the source file. If these checks fail, this message is 327 // shown, %s being the filename, and the file is not deleted. 328 // The check for device and inode numbers is there, because 329 // it is possible that the user has put a new file in place 330 // of the original file, and in that case it obviously 331 // shouldn't be removed. 332 message_error(_("%s: File seems to have been moved, " 333 "not removing"), name); 334 else 335 #endif 336 // There's a race condition between lstat() and unlink() 337 // but at least we have tried to avoid removing wrong file. 338 if (unlink(name)) 339 message_error(_("%s: Cannot remove: %s"), 340 name, strerror(errno)); 341 342 return; 343 } 344 345 346 /// \brief Copies owner/group and permissions 347 /// 348 /// \todo ACL and EA support 349 /// 350 static void 351 io_copy_attrs(const file_pair *pair) 352 { 353 // Skip chown and chmod on Windows. 354 #ifndef TUKLIB_DOSLIKE 355 // This function is more tricky than you may think at first. 356 // Blindly copying permissions may permit users to access the 357 // destination file who didn't have permission to access the 358 // source file. 359 360 // Try changing the owner of the file. If we aren't root or the owner 361 // isn't already us, fchown() probably doesn't succeed. We warn 362 // about failing fchown() only if we are root. 363 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 364 message_warning(_("%s: Cannot set the file owner: %s"), 365 pair->dest_name, strerror(errno)); 366 367 mode_t mode; 368 369 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 370 message_warning(_("%s: Cannot set the file group: %s"), 371 pair->dest_name, strerror(errno)); 372 // We can still safely copy some additional permissions: 373 // `group' must be at least as strict as `other' and 374 // also vice versa. 375 // 376 // NOTE: After this, the owner of the source file may 377 // get additional permissions. This shouldn't be too bad, 378 // because the owner would have had permission to chmod 379 // the original file anyway. 380 mode = ((pair->src_st.st_mode & 0070) >> 3) 381 & (pair->src_st.st_mode & 0007); 382 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 383 } else { 384 // Drop the setuid, setgid, and sticky bits. 385 mode = pair->src_st.st_mode & 0777; 386 } 387 388 if (fchmod(pair->dest_fd, mode)) 389 message_warning(_("%s: Cannot set the file permissions: %s"), 390 pair->dest_name, strerror(errno)); 391 #endif 392 393 // Copy the timestamps. We have several possible ways to do this, of 394 // which some are better in both security and precision. 395 // 396 // First, get the nanosecond part of the timestamps. As of writing, 397 // it's not standardized by POSIX, and there are several names for 398 // the same thing in struct stat. 399 long atime_nsec; 400 long mtime_nsec; 401 402 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 403 // GNU and Solaris 404 atime_nsec = pair->src_st.st_atim.tv_nsec; 405 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 406 407 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 408 // BSD 409 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 410 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 411 412 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 413 // GNU and BSD without extensions 414 atime_nsec = pair->src_st.st_atimensec; 415 mtime_nsec = pair->src_st.st_mtimensec; 416 417 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 418 // Tru64 419 atime_nsec = pair->src_st.st_uatime * 1000; 420 mtime_nsec = pair->src_st.st_umtime * 1000; 421 422 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 423 // UnixWare 424 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 425 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 426 427 # else 428 // Safe fallback 429 atime_nsec = 0; 430 mtime_nsec = 0; 431 # endif 432 433 // Construct a structure to hold the timestamps and call appropriate 434 // function to set the timestamps. 435 #if defined(HAVE_FUTIMENS) 436 // Use nanosecond precision. 437 struct timespec tv[2]; 438 tv[0].tv_sec = pair->src_st.st_atime; 439 tv[0].tv_nsec = atime_nsec; 440 tv[1].tv_sec = pair->src_st.st_mtime; 441 tv[1].tv_nsec = mtime_nsec; 442 443 (void)futimens(pair->dest_fd, tv); 444 445 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 446 // Use microsecond precision. 447 struct timeval tv[2]; 448 tv[0].tv_sec = pair->src_st.st_atime; 449 tv[0].tv_usec = atime_nsec / 1000; 450 tv[1].tv_sec = pair->src_st.st_mtime; 451 tv[1].tv_usec = mtime_nsec / 1000; 452 453 # if defined(HAVE_FUTIMES) 454 (void)futimes(pair->dest_fd, tv); 455 # elif defined(HAVE_FUTIMESAT) 456 (void)futimesat(pair->dest_fd, NULL, tv); 457 # else 458 // Argh, no function to use a file descriptor to set the timestamp. 459 (void)utimes(pair->dest_name, tv); 460 # endif 461 462 #elif defined(HAVE__FUTIME) 463 // Use one-second precision with Windows-specific _futime(). 464 // We could use utime() too except that for some reason the 465 // timestamp will get reset at close(). With _futime() it works. 466 // This struct cannot be const as _futime() takes a non-const pointer. 467 struct _utimbuf buf = { 468 .actime = pair->src_st.st_atime, 469 .modtime = pair->src_st.st_mtime, 470 }; 471 472 // Avoid warnings. 473 (void)atime_nsec; 474 (void)mtime_nsec; 475 476 (void)_futime(pair->dest_fd, &buf); 477 478 #elif defined(HAVE_UTIME) 479 // Use one-second precision. utime() doesn't support using file 480 // descriptor either. Some systems have broken utime() prototype 481 // so don't make this const. 482 struct utimbuf buf = { 483 .actime = pair->src_st.st_atime, 484 .modtime = pair->src_st.st_mtime, 485 }; 486 487 // Avoid warnings. 488 (void)atime_nsec; 489 (void)mtime_nsec; 490 491 (void)utime(pair->dest_name, &buf); 492 #endif 493 494 return; 495 } 496 497 498 /// Opens the source file. Returns false on success, true on error. 499 static bool 500 io_open_src_real(file_pair *pair) 501 { 502 // There's nothing to open when reading from stdin. 503 if (pair->src_name == stdin_filename) { 504 pair->src_fd = STDIN_FILENO; 505 #ifdef TUKLIB_DOSLIKE 506 setmode(STDIN_FILENO, O_BINARY); 507 #else 508 // Try to set stdin to non-blocking mode. It won't work 509 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 510 // case we proceed as if stdin were non-blocking anyway 511 // (in case of /dev/null it will be in practice). The 512 // same applies to stdout in io_open_dest_real(). 513 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 514 if (stdin_flags == -1) { 515 message_error(_("Error getting the file status flags " 516 "from standard input: %s"), 517 strerror(errno)); 518 return true; 519 } 520 521 if ((stdin_flags & O_NONBLOCK) == 0 522 && fcntl(STDIN_FILENO, F_SETFL, 523 stdin_flags | O_NONBLOCK) != -1) 524 restore_stdin_flags = true; 525 #endif 526 #ifdef HAVE_POSIX_FADVISE 527 // It will fail if stdin is a pipe and that's fine. 528 (void)posix_fadvise(STDIN_FILENO, 0, 0, 529 opt_mode == MODE_LIST 530 ? POSIX_FADV_RANDOM 531 : POSIX_FADV_SEQUENTIAL); 532 #endif 533 return false; 534 } 535 536 // Symlinks are not followed unless writing to stdout or --force 537 // was used. 538 const bool follow_symlinks = opt_stdout || opt_force; 539 540 // We accept only regular files if we are writing the output 541 // to disk too. bzip2 allows overriding this with --force but 542 // gzip and xz don't. 543 const bool reg_files_only = !opt_stdout; 544 545 // Flags for open() 546 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 547 548 #ifndef TUKLIB_DOSLIKE 549 // Use non-blocking I/O: 550 // - It prevents blocking when opening FIFOs and some other 551 // special files, which is good if we want to accept only 552 // regular files. 553 // - It can help avoiding some race conditions with signal handling. 554 flags |= O_NONBLOCK; 555 #endif 556 557 #if defined(O_NOFOLLOW) 558 if (!follow_symlinks) 559 flags |= O_NOFOLLOW; 560 #elif !defined(TUKLIB_DOSLIKE) 561 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 562 // by POSIX). Check for symlinks with a separate lstat() on 563 // these systems. 564 if (!follow_symlinks) { 565 struct stat st; 566 if (lstat(pair->src_name, &st)) { 567 message_error("%s: %s", pair->src_name, 568 strerror(errno)); 569 return true; 570 571 } else if (S_ISLNK(st.st_mode)) { 572 message_warning(_("%s: Is a symbolic link, " 573 "skipping"), pair->src_name); 574 return true; 575 } 576 } 577 #else 578 // Avoid warnings. 579 (void)follow_symlinks; 580 #endif 581 582 // Try to open the file. Signals have been blocked so EINTR shouldn't 583 // be possible. 584 pair->src_fd = open(pair->src_name, flags); 585 586 if (pair->src_fd == -1) { 587 // Signals (that have a signal handler) have been blocked. 588 assert(errno != EINTR); 589 590 #ifdef O_NOFOLLOW 591 // Give an understandable error message if the reason 592 // for failing was that the file was a symbolic link. 593 // 594 // Note that at least Linux, OpenBSD, Solaris, and Darwin 595 // use ELOOP to indicate that O_NOFOLLOW was the reason 596 // that open() failed. Because there may be 597 // directories in the pathname, ELOOP may occur also 598 // because of a symlink loop in the directory part. 599 // So ELOOP doesn't tell us what actually went wrong, 600 // and this stupidity went into POSIX-1.2008 too. 601 // 602 // FreeBSD associates EMLINK with O_NOFOLLOW and 603 // Tru64 uses ENOTSUP. We use these directly here 604 // and skip the lstat() call and the associated race. 605 // I want to hear if there are other kernels that 606 // fail with something else than ELOOP with O_NOFOLLOW. 607 bool was_symlink = false; 608 609 # if defined(__FreeBSD__) || defined(__DragonFly__) 610 if (errno == EMLINK) 611 was_symlink = true; 612 613 # elif defined(__digital__) && defined(__unix__) 614 if (errno == ENOTSUP) 615 was_symlink = true; 616 617 # elif defined(__NetBSD__) 618 if (errno == EFTYPE) 619 was_symlink = true; 620 621 # else 622 if (errno == ELOOP && !follow_symlinks) { 623 const int saved_errno = errno; 624 struct stat st; 625 if (lstat(pair->src_name, &st) == 0 626 && S_ISLNK(st.st_mode)) 627 was_symlink = true; 628 629 errno = saved_errno; 630 } 631 # endif 632 633 if (was_symlink) 634 message_warning(_("%s: Is a symbolic link, " 635 "skipping"), pair->src_name); 636 else 637 #endif 638 // Something else than O_NOFOLLOW failing 639 // (assuming that the race conditions didn't 640 // confuse us). 641 message_error("%s: %s", pair->src_name, 642 strerror(errno)); 643 644 return true; 645 } 646 647 // Stat the source file. We need the result also when we copy 648 // the permissions, and when unlinking. 649 // 650 // NOTE: Use stat() instead of fstat() with DJGPP, because 651 // then we have a better chance to get st_ino value that can 652 // be used in io_open_dest_real() to prevent overwriting the 653 // source file. 654 #ifdef __DJGPP__ 655 if (stat(pair->src_name, &pair->src_st)) 656 goto error_msg; 657 #else 658 if (fstat(pair->src_fd, &pair->src_st)) 659 goto error_msg; 660 #endif 661 662 if (S_ISDIR(pair->src_st.st_mode)) { 663 message_warning(_("%s: Is a directory, skipping"), 664 pair->src_name); 665 goto error; 666 } 667 668 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 669 message_warning(_("%s: Not a regular file, skipping"), 670 pair->src_name); 671 goto error; 672 } 673 674 #ifndef TUKLIB_DOSLIKE 675 if (reg_files_only && !opt_force) { 676 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 677 // gzip rejects setuid and setgid files even 678 // when --force was used. bzip2 doesn't check 679 // for them, but calls fchown() after fchmod(), 680 // and many systems automatically drop setuid 681 // and setgid bits there. 682 // 683 // We accept setuid and setgid files if 684 // --force was used. We drop these bits 685 // explicitly in io_copy_attr(). 686 message_warning(_("%s: File has setuid or " 687 "setgid bit set, skipping"), 688 pair->src_name); 689 goto error; 690 } 691 692 if (pair->src_st.st_mode & S_ISVTX) { 693 message_warning(_("%s: File has sticky bit " 694 "set, skipping"), 695 pair->src_name); 696 goto error; 697 } 698 699 if (pair->src_st.st_nlink > 1) { 700 message_warning(_("%s: Input file has more " 701 "than one hard link, " 702 "skipping"), pair->src_name); 703 goto error; 704 } 705 } 706 707 // If it is something else than a regular file, wait until 708 // there is input available. This way reading from FIFOs 709 // will work when open() is used with O_NONBLOCK. 710 if (!S_ISREG(pair->src_st.st_mode)) { 711 signals_unblock(); 712 const io_wait_ret ret = io_wait(pair, -1, true); 713 signals_block(); 714 715 if (ret != IO_WAIT_MORE) 716 goto error; 717 } 718 #endif 719 720 #ifdef HAVE_POSIX_FADVISE 721 // It will fail with some special files like FIFOs but that is fine. 722 (void)posix_fadvise(pair->src_fd, 0, 0, 723 opt_mode == MODE_LIST 724 ? POSIX_FADV_RANDOM 725 : POSIX_FADV_SEQUENTIAL); 726 #endif 727 728 return false; 729 730 error_msg: 731 message_error("%s: %s", pair->src_name, strerror(errno)); 732 error: 733 (void)close(pair->src_fd); 734 return true; 735 } 736 737 738 extern file_pair * 739 io_open_src(const char *src_name) 740 { 741 if (is_empty_filename(src_name)) 742 return NULL; 743 744 // Since we have only one file open at a time, we can use 745 // a statically allocated structure. 746 static file_pair pair; 747 748 pair = (file_pair){ 749 .src_name = src_name, 750 .dest_name = NULL, 751 .src_fd = -1, 752 .dest_fd = -1, 753 .src_eof = false, 754 .dest_try_sparse = false, 755 .dest_pending_sparse = 0, 756 }; 757 758 // Block the signals, for which we have a custom signal handler, so 759 // that we don't need to worry about EINTR. 760 signals_block(); 761 const bool error = io_open_src_real(&pair); 762 signals_unblock(); 763 764 #ifdef ENABLE_SANDBOX 765 if (!error) 766 io_sandbox_enter(pair.src_fd); 767 #endif 768 769 return error ? NULL : &pair; 770 } 771 772 773 /// \brief Closes source file of the file_pair structure 774 /// 775 /// \param pair File whose src_fd should be closed 776 /// \param success If true, the file will be removed from the disk if 777 /// closing succeeds and --keep hasn't been used. 778 static void 779 io_close_src(file_pair *pair, bool success) 780 { 781 #ifndef TUKLIB_DOSLIKE 782 if (restore_stdin_flags) { 783 assert(pair->src_fd == STDIN_FILENO); 784 785 restore_stdin_flags = false; 786 787 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 788 message_error(_("Error restoring the status flags " 789 "to standard input: %s"), 790 strerror(errno)); 791 } 792 #endif 793 794 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 795 // Close the file before possibly unlinking it. On DOS-like 796 // systems this is always required since unlinking will fail 797 // if the file is open. On POSIX systems it usually works 798 // to unlink open files, but in some cases it doesn't and 799 // one gets EBUSY in errno. 800 // 801 // xz 5.2.2 and older unlinked the file before closing it 802 // (except on DOS-like systems). The old code didn't handle 803 // EBUSY and could fail e.g. on some CIFS shares. The 804 // advantage of unlinking before closing is negligible 805 // (avoids a race between close() and stat()/lstat() and 806 // unlink()), so let's keep this simple. 807 (void)close(pair->src_fd); 808 809 if (success && !opt_keep_original) 810 io_unlink(pair->src_name, &pair->src_st); 811 } 812 813 return; 814 } 815 816 817 static bool 818 io_open_dest_real(file_pair *pair) 819 { 820 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 821 // We don't modify or free() this. 822 pair->dest_name = (char *)"(stdout)"; 823 pair->dest_fd = STDOUT_FILENO; 824 #ifdef TUKLIB_DOSLIKE 825 setmode(STDOUT_FILENO, O_BINARY); 826 #else 827 // Try to set O_NONBLOCK if it isn't already set. 828 // If it fails, we assume that stdout is non-blocking 829 // in practice. See the comments in io_open_src_real() 830 // for similar situation with stdin. 831 // 832 // NOTE: O_APPEND may be unset later in this function 833 // and it relies on stdout_flags being set here. 834 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 835 if (stdout_flags == -1) { 836 message_error(_("Error getting the file status flags " 837 "from standard output: %s"), 838 strerror(errno)); 839 return true; 840 } 841 842 if ((stdout_flags & O_NONBLOCK) == 0 843 && fcntl(STDOUT_FILENO, F_SETFL, 844 stdout_flags | O_NONBLOCK) != -1) 845 restore_stdout_flags = true; 846 #endif 847 } else { 848 pair->dest_name = suffix_get_dest_name(pair->src_name); 849 if (pair->dest_name == NULL) 850 return true; 851 852 #ifdef __DJGPP__ 853 struct stat st; 854 if (stat(pair->dest_name, &st) == 0) { 855 // Check that it isn't a special file like "prn". 856 if (st.st_dev == -1) { 857 message_error("%s: Refusing to write to " 858 "a DOS special file", 859 pair->dest_name); 860 free(pair->dest_name); 861 return true; 862 } 863 864 // Check that we aren't overwriting the source file. 865 if (st.st_dev == pair->src_st.st_dev 866 && st.st_ino == pair->src_st.st_ino) { 867 message_error("%s: Output file is the same " 868 "as the input file", 869 pair->dest_name); 870 free(pair->dest_name); 871 return true; 872 } 873 } 874 #endif 875 876 // If --force was used, unlink the target file first. 877 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 878 message_error(_("%s: Cannot remove: %s"), 879 pair->dest_name, strerror(errno)); 880 free(pair->dest_name); 881 return true; 882 } 883 884 // Open the file. 885 int flags = O_WRONLY | O_BINARY | O_NOCTTY 886 | O_CREAT | O_EXCL; 887 #ifndef TUKLIB_DOSLIKE 888 flags |= O_NONBLOCK; 889 #endif 890 const mode_t mode = S_IRUSR | S_IWUSR; 891 pair->dest_fd = open(pair->dest_name, flags, mode); 892 893 if (pair->dest_fd == -1) { 894 message_error("%s: %s", pair->dest_name, 895 strerror(errno)); 896 free(pair->dest_name); 897 return true; 898 } 899 } 900 901 #ifndef TUKLIB_DOSLIKE 902 // dest_st isn't used on DOS-like systems except as a dummy 903 // argument to io_unlink(), so don't fstat() on such systems. 904 if (fstat(pair->dest_fd, &pair->dest_st)) { 905 // If fstat() really fails, we have a safe fallback here. 906 # if defined(__VMS) 907 pair->dest_st.st_ino[0] = 0; 908 pair->dest_st.st_ino[1] = 0; 909 pair->dest_st.st_ino[2] = 0; 910 # else 911 pair->dest_st.st_dev = 0; 912 pair->dest_st.st_ino = 0; 913 # endif 914 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 915 // When writing to standard output, we need to be extra 916 // careful: 917 // - It may be connected to something else than 918 // a regular file. 919 // - We aren't necessarily writing to a new empty file 920 // or to the end of an existing file. 921 // - O_APPEND may be active. 922 // 923 // TODO: I'm keeping this disabled for DOS-like systems 924 // for now. FAT doesn't support sparse files, but NTFS 925 // does, so maybe this should be enabled on Windows after 926 // some testing. 927 if (pair->dest_fd == STDOUT_FILENO) { 928 if (!S_ISREG(pair->dest_st.st_mode)) 929 return false; 930 931 if (stdout_flags & O_APPEND) { 932 // Creating a sparse file is not possible 933 // when O_APPEND is active (it's used by 934 // shell's >> redirection). As I understand 935 // it, it is safe to temporarily disable 936 // O_APPEND in xz, because if someone 937 // happened to write to the same file at the 938 // same time, results would be bad anyway 939 // (users shouldn't assume that xz uses any 940 // specific block size when writing data). 941 // 942 // The write position may be something else 943 // than the end of the file, so we must fix 944 // it to start writing at the end of the file 945 // to imitate O_APPEND. 946 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 947 return false; 948 949 // Construct the new file status flags. 950 // If O_NONBLOCK was set earlier in this 951 // function, it must be kept here too. 952 int flags = stdout_flags & ~O_APPEND; 953 if (restore_stdout_flags) 954 flags |= O_NONBLOCK; 955 956 // If this fcntl() fails, we continue but won't 957 // try to create sparse output. The original 958 // flags will still be restored if needed (to 959 // unset O_NONBLOCK) when the file is finished. 960 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 961 return false; 962 963 // Disabling O_APPEND succeeded. Mark 964 // that the flags should be restored 965 // in io_close_dest(). (This may have already 966 // been set when enabling O_NONBLOCK.) 967 restore_stdout_flags = true; 968 969 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 970 != pair->dest_st.st_size) { 971 // Writing won't start exactly at the end 972 // of the file. We cannot use sparse output, 973 // because it would probably corrupt the file. 974 return false; 975 } 976 } 977 978 pair->dest_try_sparse = true; 979 } 980 #endif 981 982 return false; 983 } 984 985 986 extern bool 987 io_open_dest(file_pair *pair) 988 { 989 signals_block(); 990 const bool ret = io_open_dest_real(pair); 991 signals_unblock(); 992 return ret; 993 } 994 995 996 /// \brief Closes destination file of the file_pair structure 997 /// 998 /// \param pair File whose dest_fd should be closed 999 /// \param success If false, the file will be removed from the disk. 1000 /// 1001 /// \return Zero if closing succeeds. On error, -1 is returned and 1002 /// error message printed. 1003 static bool 1004 io_close_dest(file_pair *pair, bool success) 1005 { 1006 #ifndef TUKLIB_DOSLIKE 1007 // If io_open_dest() has disabled O_APPEND, restore it here. 1008 if (restore_stdout_flags) { 1009 assert(pair->dest_fd == STDOUT_FILENO); 1010 1011 restore_stdout_flags = false; 1012 1013 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 1014 message_error(_("Error restoring the O_APPEND flag " 1015 "to standard output: %s"), 1016 strerror(errno)); 1017 return true; 1018 } 1019 } 1020 #endif 1021 1022 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 1023 return false; 1024 1025 if (close(pair->dest_fd)) { 1026 message_error(_("%s: Closing the file failed: %s"), 1027 pair->dest_name, strerror(errno)); 1028 1029 // Closing destination file failed, so we cannot trust its 1030 // contents. Get rid of junk: 1031 io_unlink(pair->dest_name, &pair->dest_st); 1032 free(pair->dest_name); 1033 return true; 1034 } 1035 1036 // If the operation using this file wasn't successful, we git rid 1037 // of the junk file. 1038 if (!success) 1039 io_unlink(pair->dest_name, &pair->dest_st); 1040 1041 free(pair->dest_name); 1042 1043 return false; 1044 } 1045 1046 1047 extern void 1048 io_close(file_pair *pair, bool success) 1049 { 1050 // Take care of sparseness at the end of the output file. 1051 if (success && pair->dest_try_sparse 1052 && pair->dest_pending_sparse > 0) { 1053 // Seek forward one byte less than the size of the pending 1054 // hole, then write one zero-byte. This way the file grows 1055 // to its correct size. An alternative would be to use 1056 // ftruncate() but that isn't portable enough (e.g. it 1057 // doesn't work with FAT on Linux; FAT isn't that important 1058 // since it doesn't support sparse files anyway, but we don't 1059 // want to create corrupt files on it). 1060 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 1061 SEEK_CUR) == -1) { 1062 message_error(_("%s: Seeking failed when trying " 1063 "to create a sparse file: %s"), 1064 pair->dest_name, strerror(errno)); 1065 success = false; 1066 } else { 1067 const uint8_t zero[1] = { '\0' }; 1068 if (io_write_buf(pair, zero, 1)) 1069 success = false; 1070 } 1071 } 1072 1073 signals_block(); 1074 1075 // Copy the file attributes. We need to skip this if destination 1076 // file isn't open or it is standard output. 1077 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 1078 io_copy_attrs(pair); 1079 1080 // Close the destination first. If it fails, we must not remove 1081 // the source file! 1082 if (io_close_dest(pair, success)) 1083 success = false; 1084 1085 // Close the source file, and unlink it if the operation using this 1086 // file pair was successful and we haven't requested to keep the 1087 // source file. 1088 io_close_src(pair, success); 1089 1090 signals_unblock(); 1091 1092 return; 1093 } 1094 1095 1096 extern void 1097 io_fix_src_pos(file_pair *pair, size_t rewind_size) 1098 { 1099 assert(rewind_size <= IO_BUFFER_SIZE); 1100 1101 if (rewind_size > 0) { 1102 // This doesn't need to work on unseekable file descriptors, 1103 // so just ignore possible errors. 1104 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 1105 } 1106 1107 return; 1108 } 1109 1110 1111 extern size_t 1112 io_read(file_pair *pair, io_buf *buf_union, size_t size) 1113 { 1114 // We use small buffers here. 1115 assert(size < SSIZE_MAX); 1116 1117 uint8_t *buf = buf_union->u8; 1118 size_t left = size; 1119 1120 while (left > 0) { 1121 const ssize_t amount = read(pair->src_fd, buf, left); 1122 1123 if (amount == 0) { 1124 pair->src_eof = true; 1125 break; 1126 } 1127 1128 if (amount == -1) { 1129 if (errno == EINTR) { 1130 if (user_abort) 1131 return SIZE_MAX; 1132 1133 continue; 1134 } 1135 1136 #ifndef TUKLIB_DOSLIKE 1137 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1138 const io_wait_ret ret = io_wait(pair, 1139 mytime_get_flush_timeout(), 1140 true); 1141 switch (ret) { 1142 case IO_WAIT_MORE: 1143 continue; 1144 1145 case IO_WAIT_ERROR: 1146 return SIZE_MAX; 1147 1148 case IO_WAIT_TIMEOUT: 1149 return size - left; 1150 1151 default: 1152 message_bug(); 1153 } 1154 } 1155 #endif 1156 1157 message_error(_("%s: Read error: %s"), 1158 pair->src_name, strerror(errno)); 1159 1160 return SIZE_MAX; 1161 } 1162 1163 buf += (size_t)(amount); 1164 left -= (size_t)(amount); 1165 } 1166 1167 return size - left; 1168 } 1169 1170 1171 extern bool 1172 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 1173 { 1174 // Using lseek() and read() is more portable than pread() and 1175 // for us it is as good as real pread(). 1176 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 1177 message_error(_("%s: Error seeking the file: %s"), 1178 pair->src_name, strerror(errno)); 1179 return true; 1180 } 1181 1182 const size_t amount = io_read(pair, buf, size); 1183 if (amount == SIZE_MAX) 1184 return true; 1185 1186 if (amount != size) { 1187 message_error(_("%s: Unexpected end of file"), 1188 pair->src_name); 1189 return true; 1190 } 1191 1192 return false; 1193 } 1194 1195 1196 static bool 1197 is_sparse(const io_buf *buf) 1198 { 1199 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1200 1201 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1202 if (buf->u64[i] != 0) 1203 return false; 1204 1205 return true; 1206 } 1207 1208 1209 static bool 1210 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1211 { 1212 assert(size < SSIZE_MAX); 1213 1214 while (size > 0) { 1215 const ssize_t amount = write(pair->dest_fd, buf, size); 1216 if (amount == -1) { 1217 if (errno == EINTR) { 1218 if (user_abort) 1219 return true; 1220 1221 continue; 1222 } 1223 1224 #ifndef TUKLIB_DOSLIKE 1225 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1226 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1227 continue; 1228 1229 return true; 1230 } 1231 #endif 1232 1233 // Handle broken pipe specially. gzip and bzip2 1234 // don't print anything on SIGPIPE. In addition, 1235 // gzip --quiet uses exit status 2 (warning) on 1236 // broken pipe instead of whatever raise(SIGPIPE) 1237 // would make it return. It is there to hide "Broken 1238 // pipe" message on some old shells (probably old 1239 // GNU bash). 1240 // 1241 // We don't do anything special with --quiet, which 1242 // is what bzip2 does too. If we get SIGPIPE, we 1243 // will handle it like other signals by setting 1244 // user_abort, and get EPIPE here. 1245 if (errno != EPIPE) 1246 message_error(_("%s: Write error: %s"), 1247 pair->dest_name, strerror(errno)); 1248 1249 return true; 1250 } 1251 1252 buf += (size_t)(amount); 1253 size -= (size_t)(amount); 1254 } 1255 1256 return false; 1257 } 1258 1259 1260 extern bool 1261 io_write(file_pair *pair, const io_buf *buf, size_t size) 1262 { 1263 assert(size <= IO_BUFFER_SIZE); 1264 1265 if (pair->dest_try_sparse) { 1266 // Check if the block is sparse (contains only zeros). If it 1267 // sparse, we just store the amount and return. We will take 1268 // care of actually skipping over the hole when we hit the 1269 // next data block or close the file. 1270 // 1271 // Since io_close() requires that dest_pending_sparse > 0 1272 // if the file ends with sparse block, we must also return 1273 // if size == 0 to avoid doing the lseek(). 1274 if (size == IO_BUFFER_SIZE) { 1275 if (is_sparse(buf)) { 1276 pair->dest_pending_sparse += size; 1277 return false; 1278 } 1279 } else if (size == 0) { 1280 return false; 1281 } 1282 1283 // This is not a sparse block. If we have a pending hole, 1284 // skip it now. 1285 if (pair->dest_pending_sparse > 0) { 1286 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1287 SEEK_CUR) == -1) { 1288 message_error(_("%s: Seeking failed when " 1289 "trying to create a sparse " 1290 "file: %s"), pair->dest_name, 1291 strerror(errno)); 1292 return true; 1293 } 1294 1295 pair->dest_pending_sparse = 0; 1296 } 1297 } 1298 1299 return io_write_buf(pair, buf->u8, size); 1300 } 1301