1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE__FUTIME) 27 # include <sys/utime.h> 28 #elif defined(HAVE_UTIME) 29 # include <utime.h> 30 #endif 31 32 #ifdef HAVE_CAPSICUM 33 # ifdef HAVE_SYS_CAPSICUM_H 34 # include <sys/capsicum.h> 35 # else 36 # include <sys/capability.h> 37 # endif 38 #endif 39 40 #include "tuklib_open_stdxxx.h" 41 42 #ifndef O_BINARY 43 # define O_BINARY 0 44 #endif 45 46 #ifndef O_NOCTTY 47 # define O_NOCTTY 0 48 #endif 49 50 // Using this macro to silence a warning from gcc -Wlogical-op. 51 #if EAGAIN == EWOULDBLOCK 52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) 53 #else 54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ 55 ((e) == EAGAIN || (e) == EWOULDBLOCK) 56 #endif 57 58 59 typedef enum { 60 IO_WAIT_MORE, // Reading or writing is possible. 61 IO_WAIT_ERROR, // Error or user_abort 62 IO_WAIT_TIMEOUT, // poll() timed out 63 } io_wait_ret; 64 65 66 /// If true, try to create sparse files when decompressing. 67 static bool try_sparse = true; 68 69 #ifdef ENABLE_SANDBOX 70 /// True if the conditions for sandboxing (described in main()) have been met. 71 static bool sandbox_allowed = false; 72 #endif 73 74 #ifndef TUKLIB_DOSLIKE 75 /// File status flags of standard input. This is used by io_open_src() 76 /// and io_close_src(). 77 static int stdin_flags; 78 static bool restore_stdin_flags = false; 79 80 /// Original file status flags of standard output. This is used by 81 /// io_open_dest() and io_close_dest() to save and restore the flags. 82 static int stdout_flags; 83 static bool restore_stdout_flags = false; 84 85 /// Self-pipe used together with the user_abort variable to avoid 86 /// race conditions with signal handling. 87 static int user_abort_pipe[2]; 88 #endif 89 90 91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 92 93 94 extern void 95 io_init(void) 96 { 97 // Make sure that stdin, stdout, and stderr are connected to 98 // a valid file descriptor. Exit immediately with exit code ERROR 99 // if we cannot make the file descriptors valid. Maybe we should 100 // print an error message, but our stderr could be screwed anyway. 101 tuklib_open_stdxxx(E_ERROR); 102 103 #ifndef TUKLIB_DOSLIKE 104 // If fchown() fails setting the owner, we warn about it only if 105 // we are root. 106 warn_fchown = geteuid() == 0; 107 108 // Create a pipe for the self-pipe trick. 109 if (pipe(user_abort_pipe)) 110 message_fatal(_("Error creating a pipe: %s"), 111 strerror(errno)); 112 113 // Make both ends of the pipe non-blocking. 114 for (unsigned i = 0; i < 2; ++i) { 115 int flags = fcntl(user_abort_pipe[i], F_GETFL); 116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 117 flags | O_NONBLOCK) == -1) 118 message_fatal(_("Error creating a pipe: %s"), 119 strerror(errno)); 120 } 121 #endif 122 123 #ifdef __DJGPP__ 124 // Avoid doing useless things when statting files. 125 // This isn't important but doesn't hurt. 126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 127 #endif 128 129 return; 130 } 131 132 133 #ifndef TUKLIB_DOSLIKE 134 extern void 135 io_write_to_user_abort_pipe(void) 136 { 137 // If the write() fails, it's probably due to the pipe being full. 138 // Failing in that case is fine. If the reason is something else, 139 // there's not much we can do since this is called in a signal 140 // handler. So ignore the errors and try to avoid warnings with 141 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 142 uint8_t b = '\0'; 143 const int ret = write(user_abort_pipe[1], &b, 1); 144 (void)ret; 145 return; 146 } 147 #endif 148 149 150 extern void 151 io_no_sparse(void) 152 { 153 try_sparse = false; 154 return; 155 } 156 157 158 #ifdef ENABLE_SANDBOX 159 extern void 160 io_allow_sandbox(void) 161 { 162 sandbox_allowed = true; 163 return; 164 } 165 166 167 /// Enables operating-system-specific sandbox if it is possible. 168 /// src_fd is the file descriptor of the input file. 169 static void 170 io_sandbox_enter(int src_fd) 171 { 172 if (!sandbox_allowed) { 173 // This message is more often annoying than useful so 174 // it's commented out. It can be useful when developing 175 // the sandboxing code. 176 //message(V_DEBUG, _("Sandbox is disabled due " 177 // "to incompatible command line arguments")); 178 return; 179 } 180 181 const char dummy_str[] = "x"; 182 183 // Try to ensure that both libc and xz locale files have been 184 // loaded when NLS is enabled. 185 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); 186 187 // Try to ensure that iconv data files needed for handling multibyte 188 // characters have been loaded. This is needed at least with glibc. 189 tuklib_mbstr_width(dummy_str, NULL); 190 191 #ifdef HAVE_CAPSICUM 192 // Capsicum needs FreeBSD 10.0 or later. 193 cap_rights_t rights; 194 195 if (cap_rights_limit(src_fd, cap_rights_init(&rights, 196 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)) < 0 && 197 errno != ENOSYS) 198 goto error; 199 200 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, 201 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, 202 CAP_WRITE, CAP_SEEK)) < 0 && errno != ENOSYS) 203 goto error; 204 205 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, 206 CAP_EVENT)) < 0 && errno != ENOSYS) 207 goto error; 208 209 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, 210 CAP_WRITE)) < 0 && errno != ENOSYS) 211 goto error; 212 213 if (cap_enter() < 0 && errno != ENOSYS) 214 goto error; 215 216 #elif defined(HAVE_PLEDGE) 217 // pledge() was introduced in OpenBSD 5.9. 218 // 219 // main() unconditionally calls pledge() with fairly relaxed 220 // promises which work in all situations. Here we make the 221 // sandbox more strict. 222 if (pledge("stdio", "")) 223 goto error; 224 225 (void)src_fd; 226 227 #else 228 # error ENABLE_SANDBOX is defined but no sandboxing method was found. 229 #endif 230 231 // This message is annoying in xz -lvv. 232 //message(V_DEBUG, _("Sandbox was successfully enabled")); 233 return; 234 235 error: 236 message_fatal(_("Failed to enable the sandbox")); 237 } 238 #endif // ENABLE_SANDBOX 239 240 241 #ifndef TUKLIB_DOSLIKE 242 /// \brief Waits for input or output to become available or for a signal 243 /// 244 /// This uses the self-pipe trick to avoid a race condition that can occur 245 /// if a signal is caught after user_abort has been checked but before e.g. 246 /// read() has been called. In that situation read() could block unless 247 /// non-blocking I/O is used. With non-blocking I/O something like select() 248 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 249 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 250 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 251 /// old and very portable. 252 static io_wait_ret 253 io_wait(file_pair *pair, int timeout, bool is_reading) 254 { 255 struct pollfd pfd[2]; 256 257 if (is_reading) { 258 pfd[0].fd = pair->src_fd; 259 pfd[0].events = POLLIN; 260 } else { 261 pfd[0].fd = pair->dest_fd; 262 pfd[0].events = POLLOUT; 263 } 264 265 pfd[1].fd = user_abort_pipe[0]; 266 pfd[1].events = POLLIN; 267 268 while (true) { 269 const int ret = poll(pfd, 2, timeout); 270 271 if (user_abort) 272 return IO_WAIT_ERROR; 273 274 if (ret == -1) { 275 if (errno == EINTR || errno == EAGAIN) 276 continue; 277 278 message_error(_("%s: poll() failed: %s"), 279 is_reading ? pair->src_name 280 : pair->dest_name, 281 strerror(errno)); 282 return IO_WAIT_ERROR; 283 } 284 285 if (ret == 0) 286 return IO_WAIT_TIMEOUT; 287 288 if (pfd[0].revents != 0) 289 return IO_WAIT_MORE; 290 } 291 } 292 #endif 293 294 295 /// \brief Unlink a file 296 /// 297 /// This tries to verify that the file being unlinked really is the file that 298 /// we want to unlink by verifying device and inode numbers. There's still 299 /// a small unavoidable race, but this is much better than nothing (the file 300 /// could have been moved/replaced even hours earlier). 301 static void 302 io_unlink(const char *name, const struct stat *known_st) 303 { 304 #if defined(TUKLIB_DOSLIKE) 305 // On DOS-like systems, st_ino is meaningless, so don't bother 306 // testing it. Just silence a compiler warning. 307 (void)known_st; 308 #else 309 struct stat new_st; 310 311 // If --force was used, use stat() instead of lstat(). This way 312 // (de)compressing symlinks works correctly. However, it also means 313 // that xz cannot detect if a regular file foo is renamed to bar 314 // and then a symlink foo -> bar is created. Because of stat() 315 // instead of lstat(), xz will think that foo hasn't been replaced 316 // with another file. Thus, xz will remove foo even though it no 317 // longer is the same file that xz used when it started compressing. 318 // Probably it's not too bad though, so this doesn't need a more 319 // complex fix. 320 const int stat_ret = opt_force 321 ? stat(name, &new_st) : lstat(name, &new_st); 322 323 if (stat_ret 324 # ifdef __VMS 325 // st_ino is an array, and we don't want to 326 // compare st_dev at all. 327 || memcmp(&new_st.st_ino, &known_st->st_ino, 328 sizeof(new_st.st_ino)) != 0 329 # else 330 // Typical POSIX-like system 331 || new_st.st_dev != known_st->st_dev 332 || new_st.st_ino != known_st->st_ino 333 # endif 334 ) 335 // TRANSLATORS: When compression or decompression finishes, 336 // and xz is going to remove the source file, xz first checks 337 // if the source file still exists, and if it does, does its 338 // device and inode numbers match what xz saw when it opened 339 // the source file. If these checks fail, this message is 340 // shown, %s being the filename, and the file is not deleted. 341 // The check for device and inode numbers is there, because 342 // it is possible that the user has put a new file in place 343 // of the original file, and in that case it obviously 344 // shouldn't be removed. 345 message_warning(_("%s: File seems to have been moved, " 346 "not removing"), name); 347 else 348 #endif 349 // There's a race condition between lstat() and unlink() 350 // but at least we have tried to avoid removing wrong file. 351 if (unlink(name)) 352 message_warning(_("%s: Cannot remove: %s"), 353 name, strerror(errno)); 354 355 return; 356 } 357 358 359 /// \brief Copies owner/group and permissions 360 /// 361 /// \todo ACL and EA support 362 /// 363 static void 364 io_copy_attrs(const file_pair *pair) 365 { 366 // Skip chown and chmod on Windows. 367 #ifndef TUKLIB_DOSLIKE 368 // This function is more tricky than you may think at first. 369 // Blindly copying permissions may permit users to access the 370 // destination file who didn't have permission to access the 371 // source file. 372 373 // Try changing the owner of the file. If we aren't root or the owner 374 // isn't already us, fchown() probably doesn't succeed. We warn 375 // about failing fchown() only if we are root. 376 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) 377 && warn_fchown) 378 message_warning(_("%s: Cannot set the file owner: %s"), 379 pair->dest_name, strerror(errno)); 380 381 mode_t mode; 382 383 // With BSD semantics the new dest file may have a group that 384 // does not belong to the user. If the src file has the same gid 385 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails 386 // in this case which seems to be POSIX compliant. As there is 387 // nothing to do, skip the system call. 388 if (pair->dest_st.st_gid != pair->src_st.st_gid 389 && fchown(pair->dest_fd, (uid_t)(-1), 390 pair->src_st.st_gid)) { 391 message_warning(_("%s: Cannot set the file group: %s"), 392 pair->dest_name, strerror(errno)); 393 // We can still safely copy some additional permissions: 394 // `group' must be at least as strict as `other' and 395 // also vice versa. 396 // 397 // NOTE: After this, the owner of the source file may 398 // get additional permissions. This shouldn't be too bad, 399 // because the owner would have had permission to chmod 400 // the original file anyway. 401 mode = ((pair->src_st.st_mode & 0070) >> 3) 402 & (pair->src_st.st_mode & 0007); 403 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 404 } else { 405 // Drop the setuid, setgid, and sticky bits. 406 mode = pair->src_st.st_mode & 0777; 407 } 408 409 if (fchmod(pair->dest_fd, mode)) 410 message_warning(_("%s: Cannot set the file permissions: %s"), 411 pair->dest_name, strerror(errno)); 412 #endif 413 414 // Copy the timestamps. We have several possible ways to do this, of 415 // which some are better in both security and precision. 416 // 417 // First, get the nanosecond part of the timestamps. As of writing, 418 // it's not standardized by POSIX, and there are several names for 419 // the same thing in struct stat. 420 long atime_nsec; 421 long mtime_nsec; 422 423 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 424 // GNU and Solaris 425 atime_nsec = pair->src_st.st_atim.tv_nsec; 426 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 427 428 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 429 // BSD 430 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 431 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 432 433 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 434 // GNU and BSD without extensions 435 atime_nsec = pair->src_st.st_atimensec; 436 mtime_nsec = pair->src_st.st_mtimensec; 437 438 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 439 // Tru64 440 atime_nsec = pair->src_st.st_uatime * 1000; 441 mtime_nsec = pair->src_st.st_umtime * 1000; 442 443 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 444 // UnixWare 445 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 446 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 447 448 # else 449 // Safe fallback 450 atime_nsec = 0; 451 mtime_nsec = 0; 452 # endif 453 454 // Construct a structure to hold the timestamps and call appropriate 455 // function to set the timestamps. 456 #if defined(HAVE_FUTIMENS) 457 // Use nanosecond precision. 458 struct timespec tv[2]; 459 tv[0].tv_sec = pair->src_st.st_atime; 460 tv[0].tv_nsec = atime_nsec; 461 tv[1].tv_sec = pair->src_st.st_mtime; 462 tv[1].tv_nsec = mtime_nsec; 463 464 (void)futimens(pair->dest_fd, tv); 465 466 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 467 // Use microsecond precision. 468 struct timeval tv[2]; 469 tv[0].tv_sec = pair->src_st.st_atime; 470 tv[0].tv_usec = atime_nsec / 1000; 471 tv[1].tv_sec = pair->src_st.st_mtime; 472 tv[1].tv_usec = mtime_nsec / 1000; 473 474 # if defined(HAVE_FUTIMES) 475 (void)futimes(pair->dest_fd, tv); 476 # elif defined(HAVE_FUTIMESAT) 477 (void)futimesat(pair->dest_fd, NULL, tv); 478 # else 479 // Argh, no function to use a file descriptor to set the timestamp. 480 (void)utimes(pair->dest_name, tv); 481 # endif 482 483 #elif defined(HAVE__FUTIME) 484 // Use one-second precision with Windows-specific _futime(). 485 // We could use utime() too except that for some reason the 486 // timestamp will get reset at close(). With _futime() it works. 487 // This struct cannot be const as _futime() takes a non-const pointer. 488 struct _utimbuf buf = { 489 .actime = pair->src_st.st_atime, 490 .modtime = pair->src_st.st_mtime, 491 }; 492 493 // Avoid warnings. 494 (void)atime_nsec; 495 (void)mtime_nsec; 496 497 (void)_futime(pair->dest_fd, &buf); 498 499 #elif defined(HAVE_UTIME) 500 // Use one-second precision. utime() doesn't support using file 501 // descriptor either. Some systems have broken utime() prototype 502 // so don't make this const. 503 struct utimbuf buf = { 504 .actime = pair->src_st.st_atime, 505 .modtime = pair->src_st.st_mtime, 506 }; 507 508 // Avoid warnings. 509 (void)atime_nsec; 510 (void)mtime_nsec; 511 512 (void)utime(pair->dest_name, &buf); 513 #endif 514 515 return; 516 } 517 518 519 /// Opens the source file. Returns false on success, true on error. 520 static bool 521 io_open_src_real(file_pair *pair) 522 { 523 // There's nothing to open when reading from stdin. 524 if (pair->src_name == stdin_filename) { 525 pair->src_fd = STDIN_FILENO; 526 #ifdef TUKLIB_DOSLIKE 527 setmode(STDIN_FILENO, O_BINARY); 528 #else 529 // Try to set stdin to non-blocking mode. It won't work 530 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 531 // case we proceed as if stdin were non-blocking anyway 532 // (in case of /dev/null it will be in practice). The 533 // same applies to stdout in io_open_dest_real(). 534 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 535 if (stdin_flags == -1) { 536 message_error(_("Error getting the file status flags " 537 "from standard input: %s"), 538 strerror(errno)); 539 return true; 540 } 541 542 if ((stdin_flags & O_NONBLOCK) == 0 543 && fcntl(STDIN_FILENO, F_SETFL, 544 stdin_flags | O_NONBLOCK) != -1) 545 restore_stdin_flags = true; 546 #endif 547 #ifdef HAVE_POSIX_FADVISE 548 // It will fail if stdin is a pipe and that's fine. 549 (void)posix_fadvise(STDIN_FILENO, 0, 0, 550 opt_mode == MODE_LIST 551 ? POSIX_FADV_RANDOM 552 : POSIX_FADV_SEQUENTIAL); 553 #endif 554 return false; 555 } 556 557 // Symlinks are not followed unless writing to stdout or --force 558 // or --keep was used. 559 const bool follow_symlinks 560 = opt_stdout || opt_force || opt_keep_original; 561 562 // We accept only regular files if we are writing the output 563 // to disk too. bzip2 allows overriding this with --force but 564 // gzip and xz don't. 565 const bool reg_files_only = !opt_stdout; 566 567 // Flags for open() 568 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 569 570 #ifndef TUKLIB_DOSLIKE 571 // Use non-blocking I/O: 572 // - It prevents blocking when opening FIFOs and some other 573 // special files, which is good if we want to accept only 574 // regular files. 575 // - It can help avoiding some race conditions with signal handling. 576 flags |= O_NONBLOCK; 577 #endif 578 579 #if defined(O_NOFOLLOW) 580 if (!follow_symlinks) 581 flags |= O_NOFOLLOW; 582 #elif !defined(TUKLIB_DOSLIKE) 583 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 584 // by POSIX). Check for symlinks with a separate lstat() on 585 // these systems. 586 if (!follow_symlinks) { 587 struct stat st; 588 if (lstat(pair->src_name, &st)) { 589 message_error("%s: %s", pair->src_name, 590 strerror(errno)); 591 return true; 592 593 } else if (S_ISLNK(st.st_mode)) { 594 message_warning(_("%s: Is a symbolic link, " 595 "skipping"), pair->src_name); 596 return true; 597 } 598 } 599 #else 600 // Avoid warnings. 601 (void)follow_symlinks; 602 #endif 603 604 // Try to open the file. Signals have been blocked so EINTR shouldn't 605 // be possible. 606 pair->src_fd = open(pair->src_name, flags); 607 608 if (pair->src_fd == -1) { 609 // Signals (that have a signal handler) have been blocked. 610 assert(errno != EINTR); 611 612 #ifdef O_NOFOLLOW 613 // Give an understandable error message if the reason 614 // for failing was that the file was a symbolic link. 615 // 616 // Note that at least Linux, OpenBSD, Solaris, and Darwin 617 // use ELOOP to indicate that O_NOFOLLOW was the reason 618 // that open() failed. Because there may be 619 // directories in the pathname, ELOOP may occur also 620 // because of a symlink loop in the directory part. 621 // So ELOOP doesn't tell us what actually went wrong, 622 // and this stupidity went into POSIX-1.2008 too. 623 // 624 // FreeBSD associates EMLINK with O_NOFOLLOW and 625 // Tru64 uses ENOTSUP. We use these directly here 626 // and skip the lstat() call and the associated race. 627 // I want to hear if there are other kernels that 628 // fail with something else than ELOOP with O_NOFOLLOW. 629 bool was_symlink = false; 630 631 # if defined(__FreeBSD__) || defined(__DragonFly__) 632 if (errno == EMLINK) 633 was_symlink = true; 634 635 # elif defined(__digital__) && defined(__unix__) 636 if (errno == ENOTSUP) 637 was_symlink = true; 638 639 # elif defined(__NetBSD__) 640 if (errno == EFTYPE) 641 was_symlink = true; 642 643 # else 644 if (errno == ELOOP && !follow_symlinks) { 645 const int saved_errno = errno; 646 struct stat st; 647 if (lstat(pair->src_name, &st) == 0 648 && S_ISLNK(st.st_mode)) 649 was_symlink = true; 650 651 errno = saved_errno; 652 } 653 # endif 654 655 if (was_symlink) 656 message_warning(_("%s: Is a symbolic link, " 657 "skipping"), pair->src_name); 658 else 659 #endif 660 // Something else than O_NOFOLLOW failing 661 // (assuming that the race conditions didn't 662 // confuse us). 663 message_error("%s: %s", pair->src_name, 664 strerror(errno)); 665 666 return true; 667 } 668 669 // Stat the source file. We need the result also when we copy 670 // the permissions, and when unlinking. 671 // 672 // NOTE: Use stat() instead of fstat() with DJGPP, because 673 // then we have a better chance to get st_ino value that can 674 // be used in io_open_dest_real() to prevent overwriting the 675 // source file. 676 #ifdef __DJGPP__ 677 if (stat(pair->src_name, &pair->src_st)) 678 goto error_msg; 679 #else 680 if (fstat(pair->src_fd, &pair->src_st)) 681 goto error_msg; 682 #endif 683 684 if (S_ISDIR(pair->src_st.st_mode)) { 685 message_warning(_("%s: Is a directory, skipping"), 686 pair->src_name); 687 goto error; 688 } 689 690 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 691 message_warning(_("%s: Not a regular file, skipping"), 692 pair->src_name); 693 goto error; 694 } 695 696 #ifndef TUKLIB_DOSLIKE 697 if (reg_files_only && !opt_force && !opt_keep_original) { 698 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 699 // gzip rejects setuid and setgid files even 700 // when --force was used. bzip2 doesn't check 701 // for them, but calls fchown() after fchmod(), 702 // and many systems automatically drop setuid 703 // and setgid bits there. 704 // 705 // We accept setuid and setgid files if 706 // --force or --keep was used. We drop these bits 707 // explicitly in io_copy_attr(). 708 message_warning(_("%s: File has setuid or " 709 "setgid bit set, skipping"), 710 pair->src_name); 711 goto error; 712 } 713 714 if (pair->src_st.st_mode & S_ISVTX) { 715 message_warning(_("%s: File has sticky bit " 716 "set, skipping"), 717 pair->src_name); 718 goto error; 719 } 720 721 if (pair->src_st.st_nlink > 1) { 722 message_warning(_("%s: Input file has more " 723 "than one hard link, " 724 "skipping"), pair->src_name); 725 goto error; 726 } 727 } 728 729 // If it is something else than a regular file, wait until 730 // there is input available. This way reading from FIFOs 731 // will work when open() is used with O_NONBLOCK. 732 if (!S_ISREG(pair->src_st.st_mode)) { 733 signals_unblock(); 734 const io_wait_ret ret = io_wait(pair, -1, true); 735 signals_block(); 736 737 if (ret != IO_WAIT_MORE) 738 goto error; 739 } 740 #endif 741 742 #ifdef HAVE_POSIX_FADVISE 743 // It will fail with some special files like FIFOs but that is fine. 744 (void)posix_fadvise(pair->src_fd, 0, 0, 745 opt_mode == MODE_LIST 746 ? POSIX_FADV_RANDOM 747 : POSIX_FADV_SEQUENTIAL); 748 #endif 749 750 return false; 751 752 error_msg: 753 message_error("%s: %s", pair->src_name, strerror(errno)); 754 error: 755 (void)close(pair->src_fd); 756 return true; 757 } 758 759 760 extern file_pair * 761 io_open_src(const char *src_name) 762 { 763 if (src_name[0] == '\0') { 764 message_error(_("Empty filename, skipping")); 765 return NULL; 766 } 767 768 // Since we have only one file open at a time, we can use 769 // a statically allocated structure. 770 static file_pair pair; 771 772 // This implicitly also initializes src_st.st_size to zero 773 // which is expected to be <= 0 by default. fstat() isn't 774 // called when reading from standard input but src_st.st_size 775 // is still read. 776 pair = (file_pair){ 777 .src_name = src_name, 778 .dest_name = NULL, 779 .src_fd = -1, 780 .dest_fd = -1, 781 .src_eof = false, 782 .src_has_seen_input = false, 783 .flush_needed = false, 784 .dest_try_sparse = false, 785 .dest_pending_sparse = 0, 786 }; 787 788 // Block the signals, for which we have a custom signal handler, so 789 // that we don't need to worry about EINTR. 790 signals_block(); 791 const bool error = io_open_src_real(&pair); 792 signals_unblock(); 793 794 #ifdef ENABLE_SANDBOX 795 if (!error) 796 io_sandbox_enter(pair.src_fd); 797 #endif 798 799 return error ? NULL : &pair; 800 } 801 802 803 /// \brief Closes source file of the file_pair structure 804 /// 805 /// \param pair File whose src_fd should be closed 806 /// \param success If true, the file will be removed from the disk if 807 /// closing succeeds and --keep hasn't been used. 808 static void 809 io_close_src(file_pair *pair, bool success) 810 { 811 #ifndef TUKLIB_DOSLIKE 812 if (restore_stdin_flags) { 813 assert(pair->src_fd == STDIN_FILENO); 814 815 restore_stdin_flags = false; 816 817 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 818 message_error(_("Error restoring the status flags " 819 "to standard input: %s"), 820 strerror(errno)); 821 } 822 #endif 823 824 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 825 // Close the file before possibly unlinking it. On DOS-like 826 // systems this is always required since unlinking will fail 827 // if the file is open. On POSIX systems it usually works 828 // to unlink open files, but in some cases it doesn't and 829 // one gets EBUSY in errno. 830 // 831 // xz 5.2.2 and older unlinked the file before closing it 832 // (except on DOS-like systems). The old code didn't handle 833 // EBUSY and could fail e.g. on some CIFS shares. The 834 // advantage of unlinking before closing is negligible 835 // (avoids a race between close() and stat()/lstat() and 836 // unlink()), so let's keep this simple. 837 (void)close(pair->src_fd); 838 839 if (success && !opt_keep_original) 840 io_unlink(pair->src_name, &pair->src_st); 841 } 842 843 return; 844 } 845 846 847 static bool 848 io_open_dest_real(file_pair *pair) 849 { 850 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 851 // We don't modify or free() this. 852 pair->dest_name = (char *)"(stdout)"; 853 pair->dest_fd = STDOUT_FILENO; 854 #ifdef TUKLIB_DOSLIKE 855 setmode(STDOUT_FILENO, O_BINARY); 856 #else 857 // Try to set O_NONBLOCK if it isn't already set. 858 // If it fails, we assume that stdout is non-blocking 859 // in practice. See the comments in io_open_src_real() 860 // for similar situation with stdin. 861 // 862 // NOTE: O_APPEND may be unset later in this function 863 // and it relies on stdout_flags being set here. 864 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 865 if (stdout_flags == -1) { 866 message_error(_("Error getting the file status flags " 867 "from standard output: %s"), 868 strerror(errno)); 869 return true; 870 } 871 872 if ((stdout_flags & O_NONBLOCK) == 0 873 && fcntl(STDOUT_FILENO, F_SETFL, 874 stdout_flags | O_NONBLOCK) != -1) 875 restore_stdout_flags = true; 876 #endif 877 } else { 878 pair->dest_name = suffix_get_dest_name(pair->src_name); 879 if (pair->dest_name == NULL) 880 return true; 881 882 #ifdef __DJGPP__ 883 struct stat st; 884 if (stat(pair->dest_name, &st) == 0) { 885 // Check that it isn't a special file like "prn". 886 if (st.st_dev == -1) { 887 message_error("%s: Refusing to write to " 888 "a DOS special file", 889 pair->dest_name); 890 free(pair->dest_name); 891 return true; 892 } 893 894 // Check that we aren't overwriting the source file. 895 if (st.st_dev == pair->src_st.st_dev 896 && st.st_ino == pair->src_st.st_ino) { 897 message_error("%s: Output file is the same " 898 "as the input file", 899 pair->dest_name); 900 free(pair->dest_name); 901 return true; 902 } 903 } 904 #endif 905 906 // If --force was used, unlink the target file first. 907 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 908 message_error(_("%s: Cannot remove: %s"), 909 pair->dest_name, strerror(errno)); 910 free(pair->dest_name); 911 return true; 912 } 913 914 // Open the file. 915 int flags = O_WRONLY | O_BINARY | O_NOCTTY 916 | O_CREAT | O_EXCL; 917 #ifndef TUKLIB_DOSLIKE 918 flags |= O_NONBLOCK; 919 #endif 920 const mode_t mode = S_IRUSR | S_IWUSR; 921 pair->dest_fd = open(pair->dest_name, flags, mode); 922 923 if (pair->dest_fd == -1) { 924 message_error("%s: %s", pair->dest_name, 925 strerror(errno)); 926 free(pair->dest_name); 927 return true; 928 } 929 } 930 931 #ifndef TUKLIB_DOSLIKE 932 // dest_st isn't used on DOS-like systems except as a dummy 933 // argument to io_unlink(), so don't fstat() on such systems. 934 if (fstat(pair->dest_fd, &pair->dest_st)) { 935 // If fstat() really fails, we have a safe fallback here. 936 # if defined(__VMS) 937 pair->dest_st.st_ino[0] = 0; 938 pair->dest_st.st_ino[1] = 0; 939 pair->dest_st.st_ino[2] = 0; 940 # else 941 pair->dest_st.st_dev = 0; 942 pair->dest_st.st_ino = 0; 943 # endif 944 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 945 // When writing to standard output, we need to be extra 946 // careful: 947 // - It may be connected to something else than 948 // a regular file. 949 // - We aren't necessarily writing to a new empty file 950 // or to the end of an existing file. 951 // - O_APPEND may be active. 952 // 953 // TODO: I'm keeping this disabled for DOS-like systems 954 // for now. FAT doesn't support sparse files, but NTFS 955 // does, so maybe this should be enabled on Windows after 956 // some testing. 957 if (pair->dest_fd == STDOUT_FILENO) { 958 if (!S_ISREG(pair->dest_st.st_mode)) 959 return false; 960 961 if (stdout_flags & O_APPEND) { 962 // Creating a sparse file is not possible 963 // when O_APPEND is active (it's used by 964 // shell's >> redirection). As I understand 965 // it, it is safe to temporarily disable 966 // O_APPEND in xz, because if someone 967 // happened to write to the same file at the 968 // same time, results would be bad anyway 969 // (users shouldn't assume that xz uses any 970 // specific block size when writing data). 971 // 972 // The write position may be something else 973 // than the end of the file, so we must fix 974 // it to start writing at the end of the file 975 // to imitate O_APPEND. 976 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 977 return false; 978 979 // Construct the new file status flags. 980 // If O_NONBLOCK was set earlier in this 981 // function, it must be kept here too. 982 int flags = stdout_flags & ~O_APPEND; 983 if (restore_stdout_flags) 984 flags |= O_NONBLOCK; 985 986 // If this fcntl() fails, we continue but won't 987 // try to create sparse output. The original 988 // flags will still be restored if needed (to 989 // unset O_NONBLOCK) when the file is finished. 990 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 991 return false; 992 993 // Disabling O_APPEND succeeded. Mark 994 // that the flags should be restored 995 // in io_close_dest(). (This may have already 996 // been set when enabling O_NONBLOCK.) 997 restore_stdout_flags = true; 998 999 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 1000 != pair->dest_st.st_size) { 1001 // Writing won't start exactly at the end 1002 // of the file. We cannot use sparse output, 1003 // because it would probably corrupt the file. 1004 return false; 1005 } 1006 } 1007 1008 pair->dest_try_sparse = true; 1009 } 1010 #endif 1011 1012 return false; 1013 } 1014 1015 1016 extern bool 1017 io_open_dest(file_pair *pair) 1018 { 1019 signals_block(); 1020 const bool ret = io_open_dest_real(pair); 1021 signals_unblock(); 1022 return ret; 1023 } 1024 1025 1026 /// \brief Closes destination file of the file_pair structure 1027 /// 1028 /// \param pair File whose dest_fd should be closed 1029 /// \param success If false, the file will be removed from the disk. 1030 /// 1031 /// \return Zero if closing succeeds. On error, -1 is returned and 1032 /// error message printed. 1033 static bool 1034 io_close_dest(file_pair *pair, bool success) 1035 { 1036 #ifndef TUKLIB_DOSLIKE 1037 // If io_open_dest() has disabled O_APPEND, restore it here. 1038 if (restore_stdout_flags) { 1039 assert(pair->dest_fd == STDOUT_FILENO); 1040 1041 restore_stdout_flags = false; 1042 1043 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 1044 message_error(_("Error restoring the O_APPEND flag " 1045 "to standard output: %s"), 1046 strerror(errno)); 1047 return true; 1048 } 1049 } 1050 #endif 1051 1052 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 1053 return false; 1054 1055 if (close(pair->dest_fd)) { 1056 message_error(_("%s: Closing the file failed: %s"), 1057 pair->dest_name, strerror(errno)); 1058 1059 // Closing destination file failed, so we cannot trust its 1060 // contents. Get rid of junk: 1061 io_unlink(pair->dest_name, &pair->dest_st); 1062 free(pair->dest_name); 1063 return true; 1064 } 1065 1066 // If the operation using this file wasn't successful, we git rid 1067 // of the junk file. 1068 if (!success) 1069 io_unlink(pair->dest_name, &pair->dest_st); 1070 1071 free(pair->dest_name); 1072 1073 return false; 1074 } 1075 1076 1077 extern void 1078 io_close(file_pair *pair, bool success) 1079 { 1080 // Take care of sparseness at the end of the output file. 1081 if (success && pair->dest_try_sparse 1082 && pair->dest_pending_sparse > 0) { 1083 // Seek forward one byte less than the size of the pending 1084 // hole, then write one zero-byte. This way the file grows 1085 // to its correct size. An alternative would be to use 1086 // ftruncate() but that isn't portable enough (e.g. it 1087 // doesn't work with FAT on Linux; FAT isn't that important 1088 // since it doesn't support sparse files anyway, but we don't 1089 // want to create corrupt files on it). 1090 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 1091 SEEK_CUR) == -1) { 1092 message_error(_("%s: Seeking failed when trying " 1093 "to create a sparse file: %s"), 1094 pair->dest_name, strerror(errno)); 1095 success = false; 1096 } else { 1097 const uint8_t zero[1] = { '\0' }; 1098 if (io_write_buf(pair, zero, 1)) 1099 success = false; 1100 } 1101 } 1102 1103 signals_block(); 1104 1105 // Copy the file attributes. We need to skip this if destination 1106 // file isn't open or it is standard output. 1107 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 1108 io_copy_attrs(pair); 1109 1110 // Close the destination first. If it fails, we must not remove 1111 // the source file! 1112 if (io_close_dest(pair, success)) 1113 success = false; 1114 1115 // Close the source file, and unlink it if the operation using this 1116 // file pair was successful and we haven't requested to keep the 1117 // source file. 1118 io_close_src(pair, success); 1119 1120 signals_unblock(); 1121 1122 return; 1123 } 1124 1125 1126 extern void 1127 io_fix_src_pos(file_pair *pair, size_t rewind_size) 1128 { 1129 assert(rewind_size <= IO_BUFFER_SIZE); 1130 1131 if (rewind_size > 0) { 1132 // This doesn't need to work on unseekable file descriptors, 1133 // so just ignore possible errors. 1134 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 1135 } 1136 1137 return; 1138 } 1139 1140 1141 extern size_t 1142 io_read(file_pair *pair, io_buf *buf, size_t size) 1143 { 1144 // We use small buffers here. 1145 assert(size < SSIZE_MAX); 1146 1147 size_t pos = 0; 1148 1149 while (pos < size) { 1150 const ssize_t amount = read( 1151 pair->src_fd, buf->u8 + pos, size - pos); 1152 1153 if (amount == 0) { 1154 pair->src_eof = true; 1155 break; 1156 } 1157 1158 if (amount == -1) { 1159 if (errno == EINTR) { 1160 if (user_abort) 1161 return SIZE_MAX; 1162 1163 continue; 1164 } 1165 1166 #ifndef TUKLIB_DOSLIKE 1167 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1168 // Disable the flush-timeout if no input has 1169 // been seen since the previous flush and thus 1170 // there would be nothing to flush after the 1171 // timeout expires (avoids busy waiting). 1172 const int timeout = pair->src_has_seen_input 1173 ? mytime_get_flush_timeout() 1174 : -1; 1175 1176 switch (io_wait(pair, timeout, true)) { 1177 case IO_WAIT_MORE: 1178 continue; 1179 1180 case IO_WAIT_ERROR: 1181 return SIZE_MAX; 1182 1183 case IO_WAIT_TIMEOUT: 1184 pair->flush_needed = true; 1185 return pos; 1186 1187 default: 1188 message_bug(); 1189 } 1190 } 1191 #endif 1192 1193 message_error(_("%s: Read error: %s"), 1194 pair->src_name, strerror(errno)); 1195 1196 return SIZE_MAX; 1197 } 1198 1199 pos += (size_t)(amount); 1200 1201 if (!pair->src_has_seen_input) { 1202 pair->src_has_seen_input = true; 1203 mytime_set_flush_time(); 1204 } 1205 } 1206 1207 return pos; 1208 } 1209 1210 1211 extern bool 1212 io_seek_src(file_pair *pair, uint64_t pos) 1213 { 1214 // Caller must not attempt to seek past the end of the input file 1215 // (seeking to 100 in a 100-byte file is seeking to the end of 1216 // the file, not past the end of the file, and thus that is allowed). 1217 // 1218 // This also validates that pos can be safely cast to off_t. 1219 if (pos > (uint64_t)(pair->src_st.st_size)) 1220 message_bug(); 1221 1222 if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { 1223 message_error(_("%s: Error seeking the file: %s"), 1224 pair->src_name, strerror(errno)); 1225 return true; 1226 } 1227 1228 pair->src_eof = false; 1229 1230 return false; 1231 } 1232 1233 1234 extern bool 1235 io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) 1236 { 1237 // Using lseek() and read() is more portable than pread() and 1238 // for us it is as good as real pread(). 1239 if (io_seek_src(pair, pos)) 1240 return true; 1241 1242 const size_t amount = io_read(pair, buf, size); 1243 if (amount == SIZE_MAX) 1244 return true; 1245 1246 if (amount != size) { 1247 message_error(_("%s: Unexpected end of file"), 1248 pair->src_name); 1249 return true; 1250 } 1251 1252 return false; 1253 } 1254 1255 1256 static bool 1257 is_sparse(const io_buf *buf) 1258 { 1259 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1260 1261 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1262 if (buf->u64[i] != 0) 1263 return false; 1264 1265 return true; 1266 } 1267 1268 1269 static bool 1270 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1271 { 1272 assert(size < SSIZE_MAX); 1273 1274 while (size > 0) { 1275 const ssize_t amount = write(pair->dest_fd, buf, size); 1276 if (amount == -1) { 1277 if (errno == EINTR) { 1278 if (user_abort) 1279 return true; 1280 1281 continue; 1282 } 1283 1284 #ifndef TUKLIB_DOSLIKE 1285 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1286 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1287 continue; 1288 1289 return true; 1290 } 1291 #endif 1292 1293 // Handle broken pipe specially. gzip and bzip2 1294 // don't print anything on SIGPIPE. In addition, 1295 // gzip --quiet uses exit status 2 (warning) on 1296 // broken pipe instead of whatever raise(SIGPIPE) 1297 // would make it return. It is there to hide "Broken 1298 // pipe" message on some old shells (probably old 1299 // GNU bash). 1300 // 1301 // We don't do anything special with --quiet, which 1302 // is what bzip2 does too. If we get SIGPIPE, we 1303 // will handle it like other signals by setting 1304 // user_abort, and get EPIPE here. 1305 if (errno != EPIPE) 1306 message_error(_("%s: Write error: %s"), 1307 pair->dest_name, strerror(errno)); 1308 1309 return true; 1310 } 1311 1312 buf += (size_t)(amount); 1313 size -= (size_t)(amount); 1314 } 1315 1316 return false; 1317 } 1318 1319 1320 extern bool 1321 io_write(file_pair *pair, const io_buf *buf, size_t size) 1322 { 1323 assert(size <= IO_BUFFER_SIZE); 1324 1325 if (pair->dest_try_sparse) { 1326 // Check if the block is sparse (contains only zeros). If it 1327 // sparse, we just store the amount and return. We will take 1328 // care of actually skipping over the hole when we hit the 1329 // next data block or close the file. 1330 // 1331 // Since io_close() requires that dest_pending_sparse > 0 1332 // if the file ends with sparse block, we must also return 1333 // if size == 0 to avoid doing the lseek(). 1334 if (size == IO_BUFFER_SIZE) { 1335 // Even if the block was sparse, treat it as non-sparse 1336 // if the pending sparse amount is large compared to 1337 // the size of off_t. In practice this only matters 1338 // on 32-bit systems where off_t isn't always 64 bits. 1339 const off_t pending_max 1340 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); 1341 if (is_sparse(buf) && pair->dest_pending_sparse 1342 < pending_max) { 1343 pair->dest_pending_sparse += (off_t)(size); 1344 return false; 1345 } 1346 } else if (size == 0) { 1347 return false; 1348 } 1349 1350 // This is not a sparse block. If we have a pending hole, 1351 // skip it now. 1352 if (pair->dest_pending_sparse > 0) { 1353 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1354 SEEK_CUR) == -1) { 1355 message_error(_("%s: Seeking failed when " 1356 "trying to create a sparse " 1357 "file: %s"), pair->dest_name, 1358 strerror(errno)); 1359 return true; 1360 } 1361 1362 pair->dest_pending_sparse = 0; 1363 } 1364 } 1365 1366 return io_write_buf(pair, buf->u8, size); 1367 } 1368