1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file file_io.c 6 /// \brief File opening, unlinking, and closing 7 // 8 // Author: Lasse Collin 9 // 10 /////////////////////////////////////////////////////////////////////////////// 11 12 #include "private.h" 13 14 #include <fcntl.h> 15 16 #ifdef TUKLIB_DOSLIKE 17 # include <io.h> 18 #else 19 # include <poll.h> 20 static bool warn_fchown; 21 #endif 22 23 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 24 # include <sys/time.h> 25 #elif defined(HAVE__FUTIME) 26 # include <sys/utime.h> 27 #elif defined(HAVE_UTIME) 28 # include <utime.h> 29 #endif 30 31 #include "tuklib_open_stdxxx.h" 32 33 #ifdef _MSC_VER 34 # ifdef _WIN64 35 typedef __int64 ssize_t; 36 # else 37 typedef int ssize_t; 38 # endif 39 40 typedef int mode_t; 41 # define S_IRUSR _S_IREAD 42 # define S_IWUSR _S_IWRITE 43 44 # define setmode _setmode 45 # define open _open 46 # define close _close 47 # define lseek _lseeki64 48 # define unlink _unlink 49 50 // The casts are to silence warnings. 51 // The sizes are known to be small enough. 52 # define read(fd, buf, size) _read(fd, buf, (unsigned int)(size)) 53 # define write(fd, buf, size) _write(fd, buf, (unsigned int)(size)) 54 55 # define S_ISDIR(m) (((m) & _S_IFMT) == _S_IFDIR) 56 # define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG) 57 #endif 58 59 #ifndef O_BINARY 60 # define O_BINARY 0 61 #endif 62 63 #ifndef O_NOCTTY 64 # define O_NOCTTY 0 65 #endif 66 67 // Using this macro to silence a warning from gcc -Wlogical-op. 68 #if EAGAIN == EWOULDBLOCK 69 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) 70 #else 71 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ 72 ((e) == EAGAIN || (e) == EWOULDBLOCK) 73 #endif 74 75 76 typedef enum { 77 IO_WAIT_MORE, // Reading or writing is possible. 78 IO_WAIT_ERROR, // Error or user_abort 79 IO_WAIT_TIMEOUT, // poll() timed out 80 } io_wait_ret; 81 82 83 /// If true, try to create sparse files when decompressing. 84 static bool try_sparse = true; 85 86 #ifndef TUKLIB_DOSLIKE 87 /// File status flags of standard input. This is used by io_open_src() 88 /// and io_close_src(). 89 static int stdin_flags; 90 static bool restore_stdin_flags = false; 91 92 /// Original file status flags of standard output. This is used by 93 /// io_open_dest() and io_close_dest() to save and restore the flags. 94 static int stdout_flags; 95 static bool restore_stdout_flags = false; 96 97 /// Self-pipe used together with the user_abort variable to avoid 98 /// race conditions with signal handling. 99 static int user_abort_pipe[2]; 100 #endif 101 102 103 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 104 105 106 extern void 107 io_init(void) 108 { 109 // Make sure that stdin, stdout, and stderr are connected to 110 // a valid file descriptor. Exit immediately with exit code ERROR 111 // if we cannot make the file descriptors valid. Maybe we should 112 // print an error message, but our stderr could be screwed anyway. 113 tuklib_open_stdxxx(E_ERROR); 114 115 #ifndef TUKLIB_DOSLIKE 116 // If fchown() fails setting the owner, we warn about it only if 117 // we are root. 118 warn_fchown = geteuid() == 0; 119 120 // Create a pipe for the self-pipe trick. 121 if (pipe(user_abort_pipe)) 122 message_fatal(_("Error creating a pipe: %s"), 123 strerror(errno)); 124 125 // Make both ends of the pipe non-blocking. 126 for (unsigned i = 0; i < 2; ++i) { 127 int flags = fcntl(user_abort_pipe[i], F_GETFL); 128 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 129 flags | O_NONBLOCK) == -1) 130 message_fatal(_("Error creating a pipe: %s"), 131 strerror(errno)); 132 } 133 #endif 134 135 #ifdef __DJGPP__ 136 // Avoid doing useless things when statting files. 137 // This isn't important but doesn't hurt. 138 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 139 #endif 140 141 return; 142 } 143 144 145 #ifndef TUKLIB_DOSLIKE 146 extern void 147 io_write_to_user_abort_pipe(void) 148 { 149 // If the write() fails, it's probably due to the pipe being full. 150 // Failing in that case is fine. If the reason is something else, 151 // there's not much we can do since this is called in a signal 152 // handler. So ignore the errors and try to avoid warnings with 153 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 154 uint8_t b = '\0'; 155 const ssize_t ret = write(user_abort_pipe[1], &b, 1); 156 (void)ret; 157 return; 158 } 159 #endif 160 161 162 extern void 163 io_no_sparse(void) 164 { 165 try_sparse = false; 166 return; 167 } 168 169 170 #ifndef TUKLIB_DOSLIKE 171 /// \brief Waits for input or output to become available or for a signal 172 /// 173 /// This uses the self-pipe trick to avoid a race condition that can occur 174 /// if a signal is caught after user_abort has been checked but before e.g. 175 /// read() has been called. In that situation read() could block unless 176 /// non-blocking I/O is used. With non-blocking I/O something like select() 177 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 178 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 179 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 180 /// old and very portable. 181 static io_wait_ret 182 io_wait(file_pair *pair, int timeout, bool is_reading) 183 { 184 struct pollfd pfd[2]; 185 186 if (is_reading) { 187 pfd[0].fd = pair->src_fd; 188 pfd[0].events = POLLIN; 189 } else { 190 pfd[0].fd = pair->dest_fd; 191 pfd[0].events = POLLOUT; 192 } 193 194 pfd[1].fd = user_abort_pipe[0]; 195 pfd[1].events = POLLIN; 196 197 while (true) { 198 const int ret = poll(pfd, 2, timeout); 199 200 if (user_abort) 201 return IO_WAIT_ERROR; 202 203 if (ret == -1) { 204 if (errno == EINTR || errno == EAGAIN) 205 continue; 206 207 message_error(_("%s: poll() failed: %s"), 208 is_reading ? pair->src_name 209 : pair->dest_name, 210 strerror(errno)); 211 return IO_WAIT_ERROR; 212 } 213 214 if (ret == 0) 215 return IO_WAIT_TIMEOUT; 216 217 if (pfd[0].revents != 0) 218 return IO_WAIT_MORE; 219 } 220 } 221 #endif 222 223 224 /// \brief Unlink a file 225 /// 226 /// This tries to verify that the file being unlinked really is the file that 227 /// we want to unlink by verifying device and inode numbers. There's still 228 /// a small unavoidable race, but this is much better than nothing (the file 229 /// could have been moved/replaced even hours earlier). 230 static void 231 io_unlink(const char *name, const struct stat *known_st) 232 { 233 #if defined(TUKLIB_DOSLIKE) 234 // On DOS-like systems, st_ino is meaningless, so don't bother 235 // testing it. Just silence a compiler warning. 236 (void)known_st; 237 #else 238 struct stat new_st; 239 240 // If --force was used, use stat() instead of lstat(). This way 241 // (de)compressing symlinks works correctly. However, it also means 242 // that xz cannot detect if a regular file foo is renamed to bar 243 // and then a symlink foo -> bar is created. Because of stat() 244 // instead of lstat(), xz will think that foo hasn't been replaced 245 // with another file. Thus, xz will remove foo even though it no 246 // longer is the same file that xz used when it started compressing. 247 // Probably it's not too bad though, so this doesn't need a more 248 // complex fix. 249 const int stat_ret = opt_force 250 ? stat(name, &new_st) : lstat(name, &new_st); 251 252 if (stat_ret 253 # ifdef __VMS 254 // st_ino is an array, and we don't want to 255 // compare st_dev at all. 256 || memcmp(&new_st.st_ino, &known_st->st_ino, 257 sizeof(new_st.st_ino)) != 0 258 # else 259 // Typical POSIX-like system 260 || new_st.st_dev != known_st->st_dev 261 || new_st.st_ino != known_st->st_ino 262 # endif 263 ) 264 // TRANSLATORS: When compression or decompression finishes, 265 // and xz is going to remove the source file, xz first checks 266 // if the source file still exists, and if it does, does its 267 // device and inode numbers match what xz saw when it opened 268 // the source file. If these checks fail, this message is 269 // shown, %s being the filename, and the file is not deleted. 270 // The check for device and inode numbers is there, because 271 // it is possible that the user has put a new file in place 272 // of the original file, and in that case it obviously 273 // shouldn't be removed. 274 message_warning(_("%s: File seems to have been moved, " 275 "not removing"), name); 276 else 277 #endif 278 // There's a race condition between lstat() and unlink() 279 // but at least we have tried to avoid removing wrong file. 280 if (unlink(name)) 281 message_warning(_("%s: Cannot remove: %s"), 282 name, strerror(errno)); 283 284 return; 285 } 286 287 288 /// \brief Copies owner/group and permissions 289 /// 290 /// \todo ACL and EA support 291 /// 292 static void 293 io_copy_attrs(const file_pair *pair) 294 { 295 // Skip chown and chmod on Windows. 296 #ifndef TUKLIB_DOSLIKE 297 // This function is more tricky than you may think at first. 298 // Blindly copying permissions may permit users to access the 299 // destination file who didn't have permission to access the 300 // source file. 301 302 // Try changing the owner of the file. If we aren't root or the owner 303 // isn't already us, fchown() probably doesn't succeed. We warn 304 // about failing fchown() only if we are root. 305 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) 306 && warn_fchown) 307 message_warning(_("%s: Cannot set the file owner: %s"), 308 pair->dest_name, strerror(errno)); 309 310 mode_t mode; 311 312 // With BSD semantics the new dest file may have a group that 313 // does not belong to the user. If the src file has the same gid 314 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails 315 // in this case which seems to be POSIX compliant. As there is 316 // nothing to do, skip the system call. 317 if (pair->dest_st.st_gid != pair->src_st.st_gid 318 && fchown(pair->dest_fd, (uid_t)(-1), 319 pair->src_st.st_gid)) { 320 message_warning(_("%s: Cannot set the file group: %s"), 321 pair->dest_name, strerror(errno)); 322 // We can still safely copy some additional permissions: 323 // 'group' must be at least as strict as 'other' and 324 // also vice versa. 325 // 326 // NOTE: After this, the owner of the source file may 327 // get additional permissions. This shouldn't be too bad, 328 // because the owner would have had permission to chmod 329 // the original file anyway. 330 mode = ((pair->src_st.st_mode & 0070) >> 3) 331 & (pair->src_st.st_mode & 0007); 332 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 333 } else { 334 // Drop the setuid, setgid, and sticky bits. 335 mode = pair->src_st.st_mode & 0777; 336 } 337 338 if (fchmod(pair->dest_fd, mode)) 339 message_warning(_("%s: Cannot set the file permissions: %s"), 340 pair->dest_name, strerror(errno)); 341 #endif 342 343 // Copy the timestamps. We have several possible ways to do this, of 344 // which some are better in both security and precision. 345 // 346 // First, get the nanosecond part of the timestamps. As of writing, 347 // it's not standardized by POSIX, and there are several names for 348 // the same thing in struct stat. 349 long atime_nsec; 350 long mtime_nsec; 351 352 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 353 // GNU and Solaris 354 atime_nsec = pair->src_st.st_atim.tv_nsec; 355 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 356 357 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 358 // BSD 359 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 360 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 361 362 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 363 // GNU and BSD without extensions 364 atime_nsec = pair->src_st.st_atimensec; 365 mtime_nsec = pair->src_st.st_mtimensec; 366 367 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 368 // Tru64 369 atime_nsec = pair->src_st.st_uatime * 1000; 370 mtime_nsec = pair->src_st.st_umtime * 1000; 371 372 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 373 // UnixWare 374 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 375 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 376 377 # else 378 // Safe fallback 379 atime_nsec = 0; 380 mtime_nsec = 0; 381 # endif 382 383 // Construct a structure to hold the timestamps and call appropriate 384 // function to set the timestamps. 385 #if defined(HAVE_FUTIMENS) 386 // Use nanosecond precision. 387 struct timespec tv[2]; 388 tv[0].tv_sec = pair->src_st.st_atime; 389 tv[0].tv_nsec = atime_nsec; 390 tv[1].tv_sec = pair->src_st.st_mtime; 391 tv[1].tv_nsec = mtime_nsec; 392 393 (void)futimens(pair->dest_fd, tv); 394 395 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 396 // Use microsecond precision. 397 struct timeval tv[2]; 398 tv[0].tv_sec = pair->src_st.st_atime; 399 tv[0].tv_usec = atime_nsec / 1000; 400 tv[1].tv_sec = pair->src_st.st_mtime; 401 tv[1].tv_usec = mtime_nsec / 1000; 402 403 # if defined(HAVE_FUTIMES) 404 (void)futimes(pair->dest_fd, tv); 405 # elif defined(HAVE_FUTIMESAT) 406 (void)futimesat(pair->dest_fd, NULL, tv); 407 # else 408 // Argh, no function to use a file descriptor to set the timestamp. 409 (void)utimes(pair->dest_name, tv); 410 # endif 411 412 #elif defined(HAVE__FUTIME) 413 // Use one-second precision with Windows-specific _futime(). 414 // We could use utime() too except that for some reason the 415 // timestamp will get reset at close(). With _futime() it works. 416 // This struct cannot be const as _futime() takes a non-const pointer. 417 struct _utimbuf buf = { 418 .actime = pair->src_st.st_atime, 419 .modtime = pair->src_st.st_mtime, 420 }; 421 422 // Avoid warnings. 423 (void)atime_nsec; 424 (void)mtime_nsec; 425 426 (void)_futime(pair->dest_fd, &buf); 427 428 #elif defined(HAVE_UTIME) 429 // Use one-second precision. utime() doesn't support using file 430 // descriptor either. Some systems have broken utime() prototype 431 // so don't make this const. 432 struct utimbuf buf = { 433 .actime = pair->src_st.st_atime, 434 .modtime = pair->src_st.st_mtime, 435 }; 436 437 // Avoid warnings. 438 (void)atime_nsec; 439 (void)mtime_nsec; 440 441 (void)utime(pair->dest_name, &buf); 442 #endif 443 444 return; 445 } 446 447 448 /// Opens the source file. Returns false on success, true on error. 449 static bool 450 io_open_src_real(file_pair *pair) 451 { 452 // There's nothing to open when reading from stdin. 453 if (pair->src_name == stdin_filename) { 454 pair->src_fd = STDIN_FILENO; 455 #ifdef TUKLIB_DOSLIKE 456 setmode(STDIN_FILENO, O_BINARY); 457 #else 458 // Try to set stdin to non-blocking mode. It won't work 459 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 460 // case we proceed as if stdin were non-blocking anyway 461 // (in case of /dev/null it will be in practice). The 462 // same applies to stdout in io_open_dest_real(). 463 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 464 if (stdin_flags == -1) { 465 message_error(_("Error getting the file status flags " 466 "from standard input: %s"), 467 strerror(errno)); 468 return true; 469 } 470 471 if ((stdin_flags & O_NONBLOCK) == 0 472 && fcntl(STDIN_FILENO, F_SETFL, 473 stdin_flags | O_NONBLOCK) != -1) 474 restore_stdin_flags = true; 475 #endif 476 #ifdef HAVE_POSIX_FADVISE 477 // It will fail if stdin is a pipe and that's fine. 478 (void)posix_fadvise(STDIN_FILENO, 0, 0, 479 opt_mode == MODE_LIST 480 ? POSIX_FADV_RANDOM 481 : POSIX_FADV_SEQUENTIAL); 482 #endif 483 return false; 484 } 485 486 // Symlinks are not followed unless writing to stdout or --force 487 // or --keep was used. 488 const bool follow_symlinks 489 = opt_stdout || opt_force || opt_keep_original; 490 491 // We accept only regular files if we are writing the output 492 // to disk too. bzip2 allows overriding this with --force but 493 // gzip and xz don't. 494 const bool reg_files_only = !opt_stdout; 495 496 // Flags for open() 497 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 498 499 #ifndef TUKLIB_DOSLIKE 500 // Use non-blocking I/O: 501 // - It prevents blocking when opening FIFOs and some other 502 // special files, which is good if we want to accept only 503 // regular files. 504 // - It can help avoiding some race conditions with signal handling. 505 flags |= O_NONBLOCK; 506 #endif 507 508 #if defined(O_NOFOLLOW) 509 if (!follow_symlinks) 510 flags |= O_NOFOLLOW; 511 #elif !defined(TUKLIB_DOSLIKE) 512 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 513 // by POSIX). Check for symlinks with a separate lstat() on 514 // these systems. 515 if (!follow_symlinks) { 516 struct stat st; 517 if (lstat(pair->src_name, &st)) { 518 message_error(_("%s: %s"), pair->src_name, 519 strerror(errno)); 520 return true; 521 522 } else if (S_ISLNK(st.st_mode)) { 523 message_warning(_("%s: Is a symbolic link, " 524 "skipping"), pair->src_name); 525 return true; 526 } 527 } 528 #else 529 // Avoid warnings. 530 (void)follow_symlinks; 531 #endif 532 533 // Try to open the file. Signals have been blocked so EINTR shouldn't 534 // be possible. 535 pair->src_fd = open(pair->src_name, flags); 536 537 if (pair->src_fd == -1) { 538 // Signals (that have a signal handler) have been blocked. 539 assert(errno != EINTR); 540 541 #ifdef O_NOFOLLOW 542 // Give an understandable error message if the reason 543 // for failing was that the file was a symbolic link. 544 // 545 // Note that at least Linux, OpenBSD, Solaris, and Darwin 546 // use ELOOP to indicate that O_NOFOLLOW was the reason 547 // that open() failed. Because there may be 548 // directories in the pathname, ELOOP may occur also 549 // because of a symlink loop in the directory part. 550 // So ELOOP doesn't tell us what actually went wrong, 551 // and this stupidity went into POSIX-1.2008 too. 552 // 553 // FreeBSD associates EMLINK with O_NOFOLLOW and 554 // Tru64 uses ENOTSUP. We use these directly here 555 // and skip the lstat() call and the associated race. 556 // I want to hear if there are other kernels that 557 // fail with something else than ELOOP with O_NOFOLLOW. 558 bool was_symlink = false; 559 560 # if defined(__FreeBSD__) || defined(__DragonFly__) 561 if (errno == EMLINK) 562 was_symlink = true; 563 564 # elif defined(__digital__) && defined(__unix__) 565 if (errno == ENOTSUP) 566 was_symlink = true; 567 568 # elif defined(__NetBSD__) 569 if (errno == EFTYPE) 570 was_symlink = true; 571 572 # else 573 if (errno == ELOOP && !follow_symlinks) { 574 const int saved_errno = errno; 575 struct stat st; 576 if (lstat(pair->src_name, &st) == 0 577 && S_ISLNK(st.st_mode)) 578 was_symlink = true; 579 580 errno = saved_errno; 581 } 582 # endif 583 584 if (was_symlink) 585 message_warning(_("%s: Is a symbolic link, " 586 "skipping"), pair->src_name); 587 else 588 #endif 589 // Something else than O_NOFOLLOW failing 590 // (assuming that the race conditions didn't 591 // confuse us). 592 message_error(_("%s: %s"), pair->src_name, 593 strerror(errno)); 594 595 return true; 596 } 597 598 // Stat the source file. We need the result also when we copy 599 // the permissions, and when unlinking. 600 // 601 // NOTE: Use stat() instead of fstat() with DJGPP, because 602 // then we have a better chance to get st_ino value that can 603 // be used in io_open_dest_real() to prevent overwriting the 604 // source file. 605 #ifdef __DJGPP__ 606 if (stat(pair->src_name, &pair->src_st)) 607 goto error_msg; 608 #else 609 if (fstat(pair->src_fd, &pair->src_st)) 610 goto error_msg; 611 #endif 612 613 if (S_ISDIR(pair->src_st.st_mode)) { 614 message_warning(_("%s: Is a directory, skipping"), 615 pair->src_name); 616 goto error; 617 } 618 619 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 620 message_warning(_("%s: Not a regular file, skipping"), 621 pair->src_name); 622 goto error; 623 } 624 625 #ifndef TUKLIB_DOSLIKE 626 if (reg_files_only && !opt_force && !opt_keep_original) { 627 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 628 // gzip rejects setuid and setgid files even 629 // when --force was used. bzip2 doesn't check 630 // for them, but calls fchown() after fchmod(), 631 // and many systems automatically drop setuid 632 // and setgid bits there. 633 // 634 // We accept setuid and setgid files if 635 // --force or --keep was used. We drop these bits 636 // explicitly in io_copy_attr(). 637 message_warning(_("%s: File has setuid or " 638 "setgid bit set, skipping"), 639 pair->src_name); 640 goto error; 641 } 642 643 if (pair->src_st.st_mode & S_ISVTX) { 644 message_warning(_("%s: File has sticky bit " 645 "set, skipping"), 646 pair->src_name); 647 goto error; 648 } 649 650 if (pair->src_st.st_nlink > 1) { 651 message_warning(_("%s: Input file has more " 652 "than one hard link, " 653 "skipping"), pair->src_name); 654 goto error; 655 } 656 } 657 658 // If it is something else than a regular file, wait until 659 // there is input available. This way reading from FIFOs 660 // will work when open() is used with O_NONBLOCK. 661 if (!S_ISREG(pair->src_st.st_mode)) { 662 signals_unblock(); 663 const io_wait_ret ret = io_wait(pair, -1, true); 664 signals_block(); 665 666 if (ret != IO_WAIT_MORE) 667 goto error; 668 } 669 #endif 670 671 #ifdef HAVE_POSIX_FADVISE 672 // It will fail with some special files like FIFOs but that is fine. 673 (void)posix_fadvise(pair->src_fd, 0, 0, 674 opt_mode == MODE_LIST 675 ? POSIX_FADV_RANDOM 676 : POSIX_FADV_SEQUENTIAL); 677 #endif 678 679 return false; 680 681 error_msg: 682 message_error(_("%s: %s"), pair->src_name, strerror(errno)); 683 error: 684 (void)close(pair->src_fd); 685 return true; 686 } 687 688 689 extern file_pair * 690 io_open_src(const char *src_name) 691 { 692 if (src_name[0] == '\0') { 693 message_error(_("Empty filename, skipping")); 694 return NULL; 695 } 696 697 // Since we have only one file open at a time, we can use 698 // a statically allocated structure. 699 static file_pair pair; 700 701 // This implicitly also initializes src_st.st_size to zero 702 // which is expected to be <= 0 by default. fstat() isn't 703 // called when reading from standard input but src_st.st_size 704 // is still read. 705 pair = (file_pair){ 706 .src_name = src_name, 707 .dest_name = NULL, 708 .src_fd = -1, 709 .dest_fd = -1, 710 .src_eof = false, 711 .src_has_seen_input = false, 712 .flush_needed = false, 713 .dest_try_sparse = false, 714 .dest_pending_sparse = 0, 715 }; 716 717 // Block the signals, for which we have a custom signal handler, so 718 // that we don't need to worry about EINTR. 719 signals_block(); 720 const bool error = io_open_src_real(&pair); 721 signals_unblock(); 722 723 #ifdef ENABLE_SANDBOX 724 if (!error) 725 sandbox_enable_strict_if_allowed(pair.src_fd, 726 user_abort_pipe[0], user_abort_pipe[1]); 727 #endif 728 729 return error ? NULL : &pair; 730 } 731 732 733 /// \brief Closes source file of the file_pair structure 734 /// 735 /// \param pair File whose src_fd should be closed 736 /// \param success If true, the file will be removed from the disk if 737 /// closing succeeds and --keep hasn't been used. 738 static void 739 io_close_src(file_pair *pair, bool success) 740 { 741 #ifndef TUKLIB_DOSLIKE 742 if (restore_stdin_flags) { 743 assert(pair->src_fd == STDIN_FILENO); 744 745 restore_stdin_flags = false; 746 747 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 748 message_error(_("Error restoring the status flags " 749 "to standard input: %s"), 750 strerror(errno)); 751 } 752 #endif 753 754 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 755 // Close the file before possibly unlinking it. On DOS-like 756 // systems this is always required since unlinking will fail 757 // if the file is open. On POSIX systems it usually works 758 // to unlink open files, but in some cases it doesn't and 759 // one gets EBUSY in errno. 760 // 761 // xz 5.2.2 and older unlinked the file before closing it 762 // (except on DOS-like systems). The old code didn't handle 763 // EBUSY and could fail e.g. on some CIFS shares. The 764 // advantage of unlinking before closing is negligible 765 // (avoids a race between close() and stat()/lstat() and 766 // unlink()), so let's keep this simple. 767 (void)close(pair->src_fd); 768 769 if (success && !opt_keep_original) 770 io_unlink(pair->src_name, &pair->src_st); 771 } 772 773 return; 774 } 775 776 777 static bool 778 io_open_dest_real(file_pair *pair) 779 { 780 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 781 // We don't modify or free() this. 782 pair->dest_name = (char *)"(stdout)"; 783 pair->dest_fd = STDOUT_FILENO; 784 #ifdef TUKLIB_DOSLIKE 785 setmode(STDOUT_FILENO, O_BINARY); 786 #else 787 // Try to set O_NONBLOCK if it isn't already set. 788 // If it fails, we assume that stdout is non-blocking 789 // in practice. See the comments in io_open_src_real() 790 // for similar situation with stdin. 791 // 792 // NOTE: O_APPEND may be unset later in this function 793 // and it relies on stdout_flags being set here. 794 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 795 if (stdout_flags == -1) { 796 message_error(_("Error getting the file status flags " 797 "from standard output: %s"), 798 strerror(errno)); 799 return true; 800 } 801 802 if ((stdout_flags & O_NONBLOCK) == 0 803 && fcntl(STDOUT_FILENO, F_SETFL, 804 stdout_flags | O_NONBLOCK) != -1) 805 restore_stdout_flags = true; 806 #endif 807 } else { 808 pair->dest_name = suffix_get_dest_name(pair->src_name); 809 if (pair->dest_name == NULL) 810 return true; 811 812 #ifdef __DJGPP__ 813 struct stat st; 814 if (stat(pair->dest_name, &st) == 0) { 815 // Check that it isn't a special file like "prn". 816 if (st.st_dev == -1) { 817 message_error("%s: Refusing to write to " 818 "a DOS special file", 819 pair->dest_name); 820 free(pair->dest_name); 821 return true; 822 } 823 824 // Check that we aren't overwriting the source file. 825 if (st.st_dev == pair->src_st.st_dev 826 && st.st_ino == pair->src_st.st_ino) { 827 message_error("%s: Output file is the same " 828 "as the input file", 829 pair->dest_name); 830 free(pair->dest_name); 831 return true; 832 } 833 } 834 #endif 835 836 // If --force was used, unlink the target file first. 837 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 838 message_error(_("%s: Cannot remove: %s"), 839 pair->dest_name, strerror(errno)); 840 free(pair->dest_name); 841 return true; 842 } 843 844 // Open the file. 845 int flags = O_WRONLY | O_BINARY | O_NOCTTY 846 | O_CREAT | O_EXCL; 847 #ifndef TUKLIB_DOSLIKE 848 flags |= O_NONBLOCK; 849 #endif 850 const mode_t mode = S_IRUSR | S_IWUSR; 851 pair->dest_fd = open(pair->dest_name, flags, mode); 852 853 if (pair->dest_fd == -1) { 854 message_error(_("%s: %s"), pair->dest_name, 855 strerror(errno)); 856 free(pair->dest_name); 857 return true; 858 } 859 } 860 861 if (fstat(pair->dest_fd, &pair->dest_st)) { 862 // If fstat() really fails, we have a safe fallback here. 863 #if defined(__VMS) 864 pair->dest_st.st_ino[0] = 0; 865 pair->dest_st.st_ino[1] = 0; 866 pair->dest_st.st_ino[2] = 0; 867 #else 868 pair->dest_st.st_dev = 0; 869 pair->dest_st.st_ino = 0; 870 #endif 871 } 872 #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__) 873 // Check that the output file is a regular file. We open with O_EXCL 874 // but that doesn't prevent open()/_open() on Windows from opening 875 // files like "con" or "nul". 876 // 877 // With DJGPP this check is done with stat() even before opening 878 // the output file. That method or a variant of it doesn't work on 879 // Windows because on Windows stat()/_stat64() sets st.st_mode so 880 // that S_ISREG(st.st_mode) will be true even for special files. 881 // With fstat()/_fstat64() it works. 882 else if (pair->dest_fd != STDOUT_FILENO 883 && !S_ISREG(pair->dest_st.st_mode)) { 884 message_error("%s: Destination is not a regular file", 885 pair->dest_name); 886 887 // dest_fd needs to be reset to -1 to keep io_close() working. 888 (void)close(pair->dest_fd); 889 pair->dest_fd = -1; 890 891 free(pair->dest_name); 892 return true; 893 } 894 #elif !defined(TUKLIB_DOSLIKE) 895 else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 896 // When writing to standard output, we need to be extra 897 // careful: 898 // - It may be connected to something else than 899 // a regular file. 900 // - We aren't necessarily writing to a new empty file 901 // or to the end of an existing file. 902 // - O_APPEND may be active. 903 // 904 // TODO: I'm keeping this disabled for DOS-like systems 905 // for now. FAT doesn't support sparse files, but NTFS 906 // does, so maybe this should be enabled on Windows after 907 // some testing. 908 if (pair->dest_fd == STDOUT_FILENO) { 909 if (!S_ISREG(pair->dest_st.st_mode)) 910 return false; 911 912 if (stdout_flags & O_APPEND) { 913 // Creating a sparse file is not possible 914 // when O_APPEND is active (it's used by 915 // shell's >> redirection). As I understand 916 // it, it is safe to temporarily disable 917 // O_APPEND in xz, because if someone 918 // happened to write to the same file at the 919 // same time, results would be bad anyway 920 // (users shouldn't assume that xz uses any 921 // specific block size when writing data). 922 // 923 // The write position may be something else 924 // than the end of the file, so we must fix 925 // it to start writing at the end of the file 926 // to imitate O_APPEND. 927 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 928 return false; 929 930 // Construct the new file status flags. 931 // If O_NONBLOCK was set earlier in this 932 // function, it must be kept here too. 933 int flags = stdout_flags & ~O_APPEND; 934 if (restore_stdout_flags) 935 flags |= O_NONBLOCK; 936 937 // If this fcntl() fails, we continue but won't 938 // try to create sparse output. The original 939 // flags will still be restored if needed (to 940 // unset O_NONBLOCK) when the file is finished. 941 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 942 return false; 943 944 // Disabling O_APPEND succeeded. Mark 945 // that the flags should be restored 946 // in io_close_dest(). (This may have already 947 // been set when enabling O_NONBLOCK.) 948 restore_stdout_flags = true; 949 950 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 951 != pair->dest_st.st_size) { 952 // Writing won't start exactly at the end 953 // of the file. We cannot use sparse output, 954 // because it would probably corrupt the file. 955 return false; 956 } 957 } 958 959 pair->dest_try_sparse = true; 960 } 961 #endif 962 963 return false; 964 } 965 966 967 extern bool 968 io_open_dest(file_pair *pair) 969 { 970 signals_block(); 971 const bool ret = io_open_dest_real(pair); 972 signals_unblock(); 973 return ret; 974 } 975 976 977 /// \brief Closes destination file of the file_pair structure 978 /// 979 /// \param pair File whose dest_fd should be closed 980 /// \param success If false, the file will be removed from the disk. 981 /// 982 /// \return Zero if closing succeeds. On error, -1 is returned and 983 /// error message printed. 984 static bool 985 io_close_dest(file_pair *pair, bool success) 986 { 987 #ifndef TUKLIB_DOSLIKE 988 // If io_open_dest() has disabled O_APPEND, restore it here. 989 if (restore_stdout_flags) { 990 assert(pair->dest_fd == STDOUT_FILENO); 991 992 restore_stdout_flags = false; 993 994 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 995 message_error(_("Error restoring the O_APPEND flag " 996 "to standard output: %s"), 997 strerror(errno)); 998 return true; 999 } 1000 } 1001 #endif 1002 1003 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 1004 return false; 1005 1006 if (close(pair->dest_fd)) { 1007 message_error(_("%s: Closing the file failed: %s"), 1008 pair->dest_name, strerror(errno)); 1009 1010 // Closing destination file failed, so we cannot trust its 1011 // contents. Get rid of junk: 1012 io_unlink(pair->dest_name, &pair->dest_st); 1013 free(pair->dest_name); 1014 return true; 1015 } 1016 1017 // If the operation using this file wasn't successful, we git rid 1018 // of the junk file. 1019 if (!success) 1020 io_unlink(pair->dest_name, &pair->dest_st); 1021 1022 free(pair->dest_name); 1023 1024 return false; 1025 } 1026 1027 1028 extern void 1029 io_close(file_pair *pair, bool success) 1030 { 1031 // Take care of sparseness at the end of the output file. 1032 if (success && pair->dest_try_sparse 1033 && pair->dest_pending_sparse > 0) { 1034 // Seek forward one byte less than the size of the pending 1035 // hole, then write one zero-byte. This way the file grows 1036 // to its correct size. An alternative would be to use 1037 // ftruncate() but that isn't portable enough (e.g. it 1038 // doesn't work with FAT on Linux; FAT isn't that important 1039 // since it doesn't support sparse files anyway, but we don't 1040 // want to create corrupt files on it). 1041 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 1042 SEEK_CUR) == -1) { 1043 message_error(_("%s: Seeking failed when trying " 1044 "to create a sparse file: %s"), 1045 pair->dest_name, strerror(errno)); 1046 success = false; 1047 } else { 1048 const uint8_t zero[1] = { '\0' }; 1049 if (io_write_buf(pair, zero, 1)) 1050 success = false; 1051 } 1052 } 1053 1054 signals_block(); 1055 1056 // Copy the file attributes. We need to skip this if destination 1057 // file isn't open or it is standard output. 1058 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 1059 io_copy_attrs(pair); 1060 1061 // Close the destination first. If it fails, we must not remove 1062 // the source file! 1063 if (io_close_dest(pair, success)) 1064 success = false; 1065 1066 // Close the source file, and unlink it if the operation using this 1067 // file pair was successful and we haven't requested to keep the 1068 // source file. 1069 io_close_src(pair, success); 1070 1071 signals_unblock(); 1072 1073 return; 1074 } 1075 1076 1077 extern void 1078 io_fix_src_pos(file_pair *pair, size_t rewind_size) 1079 { 1080 assert(rewind_size <= IO_BUFFER_SIZE); 1081 1082 if (rewind_size > 0) { 1083 // This doesn't need to work on unseekable file descriptors, 1084 // so just ignore possible errors. 1085 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 1086 } 1087 1088 return; 1089 } 1090 1091 1092 extern size_t 1093 io_read(file_pair *pair, io_buf *buf, size_t size) 1094 { 1095 assert(size <= IO_BUFFER_SIZE); 1096 1097 size_t pos = 0; 1098 1099 while (pos < size) { 1100 const ssize_t amount = read( 1101 pair->src_fd, buf->u8 + pos, size - pos); 1102 1103 if (amount == 0) { 1104 pair->src_eof = true; 1105 break; 1106 } 1107 1108 if (amount == -1) { 1109 if (errno == EINTR) { 1110 if (user_abort) 1111 return SIZE_MAX; 1112 1113 continue; 1114 } 1115 1116 #ifndef TUKLIB_DOSLIKE 1117 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1118 // Disable the flush-timeout if no input has 1119 // been seen since the previous flush and thus 1120 // there would be nothing to flush after the 1121 // timeout expires (avoids busy waiting). 1122 const int timeout = pair->src_has_seen_input 1123 ? mytime_get_flush_timeout() 1124 : -1; 1125 1126 switch (io_wait(pair, timeout, true)) { 1127 case IO_WAIT_MORE: 1128 continue; 1129 1130 case IO_WAIT_ERROR: 1131 return SIZE_MAX; 1132 1133 case IO_WAIT_TIMEOUT: 1134 pair->flush_needed = true; 1135 return pos; 1136 1137 default: 1138 message_bug(); 1139 } 1140 } 1141 #endif 1142 1143 message_error(_("%s: Read error: %s"), 1144 pair->src_name, strerror(errno)); 1145 1146 return SIZE_MAX; 1147 } 1148 1149 pos += (size_t)(amount); 1150 1151 if (!pair->src_has_seen_input) { 1152 pair->src_has_seen_input = true; 1153 mytime_set_flush_time(); 1154 } 1155 } 1156 1157 return pos; 1158 } 1159 1160 1161 extern bool 1162 io_seek_src(file_pair *pair, uint64_t pos) 1163 { 1164 // Caller must not attempt to seek past the end of the input file 1165 // (seeking to 100 in a 100-byte file is seeking to the end of 1166 // the file, not past the end of the file, and thus that is allowed). 1167 // 1168 // This also validates that pos can be safely cast to off_t. 1169 if (pos > (uint64_t)(pair->src_st.st_size)) 1170 message_bug(); 1171 1172 if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { 1173 message_error(_("%s: Error seeking the file: %s"), 1174 pair->src_name, strerror(errno)); 1175 return true; 1176 } 1177 1178 pair->src_eof = false; 1179 1180 return false; 1181 } 1182 1183 1184 extern bool 1185 io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) 1186 { 1187 // Using lseek() and read() is more portable than pread() and 1188 // for us it is as good as real pread(). 1189 if (io_seek_src(pair, pos)) 1190 return true; 1191 1192 const size_t amount = io_read(pair, buf, size); 1193 if (amount == SIZE_MAX) 1194 return true; 1195 1196 if (amount != size) { 1197 message_error(_("%s: Unexpected end of file"), 1198 pair->src_name); 1199 return true; 1200 } 1201 1202 return false; 1203 } 1204 1205 1206 static bool 1207 is_sparse(const io_buf *buf) 1208 { 1209 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1210 1211 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1212 if (buf->u64[i] != 0) 1213 return false; 1214 1215 return true; 1216 } 1217 1218 1219 static bool 1220 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1221 { 1222 assert(size <= IO_BUFFER_SIZE); 1223 1224 while (size > 0) { 1225 const ssize_t amount = write(pair->dest_fd, buf, size); 1226 if (amount == -1) { 1227 if (errno == EINTR) { 1228 if (user_abort) 1229 return true; 1230 1231 continue; 1232 } 1233 1234 #ifndef TUKLIB_DOSLIKE 1235 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1236 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1237 continue; 1238 1239 return true; 1240 } 1241 #endif 1242 1243 // Handle broken pipe specially. gzip and bzip2 1244 // don't print anything on SIGPIPE. In addition, 1245 // gzip --quiet uses exit status 2 (warning) on 1246 // broken pipe instead of whatever raise(SIGPIPE) 1247 // would make it return. It is there to hide "Broken 1248 // pipe" message on some old shells (probably old 1249 // GNU bash). 1250 // 1251 // We don't do anything special with --quiet, which 1252 // is what bzip2 does too. If we get SIGPIPE, we 1253 // will handle it like other signals by setting 1254 // user_abort, and get EPIPE here. 1255 if (errno != EPIPE) 1256 message_error(_("%s: Write error: %s"), 1257 pair->dest_name, strerror(errno)); 1258 1259 return true; 1260 } 1261 1262 buf += (size_t)(amount); 1263 size -= (size_t)(amount); 1264 } 1265 1266 return false; 1267 } 1268 1269 1270 extern bool 1271 io_write(file_pair *pair, const io_buf *buf, size_t size) 1272 { 1273 assert(size <= IO_BUFFER_SIZE); 1274 1275 if (pair->dest_try_sparse) { 1276 // Check if the block is sparse (contains only zeros). If it 1277 // sparse, we just store the amount and return. We will take 1278 // care of actually skipping over the hole when we hit the 1279 // next data block or close the file. 1280 // 1281 // Since io_close() requires that dest_pending_sparse > 0 1282 // if the file ends with sparse block, we must also return 1283 // if size == 0 to avoid doing the lseek(). 1284 if (size == IO_BUFFER_SIZE) { 1285 // Even if the block was sparse, treat it as non-sparse 1286 // if the pending sparse amount is large compared to 1287 // the size of off_t. In practice this only matters 1288 // on 32-bit systems where off_t isn't always 64 bits. 1289 const off_t pending_max 1290 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); 1291 if (is_sparse(buf) && pair->dest_pending_sparse 1292 < pending_max) { 1293 pair->dest_pending_sparse += (off_t)(size); 1294 return false; 1295 } 1296 } else if (size == 0) { 1297 return false; 1298 } 1299 1300 // This is not a sparse block. If we have a pending hole, 1301 // skip it now. 1302 if (pair->dest_pending_sparse > 0) { 1303 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1304 SEEK_CUR) == -1) { 1305 message_error(_("%s: Seeking failed when " 1306 "trying to create a sparse " 1307 "file: %s"), pair->dest_name, 1308 strerror(errno)); 1309 return true; 1310 } 1311 1312 pair->dest_pending_sparse = 0; 1313 } 1314 } 1315 1316 return io_write_buf(pair, buf->u8, size); 1317 } 1318