1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE_UTIME) 27 # include <utime.h> 28 #endif 29 30 #include "tuklib_open_stdxxx.h" 31 32 #ifndef O_BINARY 33 # define O_BINARY 0 34 #endif 35 36 #ifndef O_NOCTTY 37 # define O_NOCTTY 0 38 #endif 39 40 41 typedef enum { 42 IO_WAIT_MORE, // Reading or writing is possible. 43 IO_WAIT_ERROR, // Error or user_abort 44 IO_WAIT_TIMEOUT, // poll() timed out 45 } io_wait_ret; 46 47 48 /// If true, try to create sparse files when decompressing. 49 static bool try_sparse = true; 50 51 #ifndef TUKLIB_DOSLIKE 52 /// File status flags of standard input. This is used by io_open_src() 53 /// and io_close_src(). 54 static int stdin_flags; 55 static bool restore_stdin_flags = false; 56 57 /// Original file status flags of standard output. This is used by 58 /// io_open_dest() and io_close_dest() to save and restore the flags. 59 static int stdout_flags; 60 static bool restore_stdout_flags = false; 61 62 /// Self-pipe used together with the user_abort variable to avoid 63 /// race conditions with signal handling. 64 static int user_abort_pipe[2]; 65 #endif 66 67 68 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 69 70 71 extern void 72 io_init(void) 73 { 74 // Make sure that stdin, stdout, and stderr are connected to 75 // a valid file descriptor. Exit immediately with exit code ERROR 76 // if we cannot make the file descriptors valid. Maybe we should 77 // print an error message, but our stderr could be screwed anyway. 78 tuklib_open_stdxxx(E_ERROR); 79 80 #ifndef TUKLIB_DOSLIKE 81 // If fchown() fails setting the owner, we warn about it only if 82 // we are root. 83 warn_fchown = geteuid() == 0; 84 85 // Create a pipe for the self-pipe trick. If pipe2() is available, 86 // we can avoid the fcntl() calls. 87 # ifdef HAVE_PIPE2 88 if (pipe2(user_abort_pipe, O_NONBLOCK)) 89 message_fatal(_("Error creating a pipe: %s"), 90 strerror(errno)); 91 # else 92 if (pipe(user_abort_pipe)) 93 message_fatal(_("Error creating a pipe: %s"), 94 strerror(errno)); 95 96 // Make both ends of the pipe non-blocking. 97 for (unsigned i = 0; i < 2; ++i) { 98 int flags = fcntl(user_abort_pipe[i], F_GETFL); 99 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 100 flags | O_NONBLOCK) == -1) 101 message_fatal(_("Error creating a pipe: %s"), 102 strerror(errno)); 103 } 104 # endif 105 #endif 106 107 #ifdef __DJGPP__ 108 // Avoid doing useless things when statting files. 109 // This isn't important but doesn't hurt. 110 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 111 #endif 112 113 return; 114 } 115 116 117 #ifndef TUKLIB_DOSLIKE 118 extern void 119 io_write_to_user_abort_pipe(void) 120 { 121 // If the write() fails, it's probably due to the pipe being full. 122 // Failing in that case is fine. If the reason is something else, 123 // there's not much we can do since this is called in a signal 124 // handler. So ignore the errors and try to avoid warnings with 125 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 126 uint8_t b = '\0'; 127 const int ret = write(user_abort_pipe[1], &b, 1); 128 (void)ret; 129 return; 130 } 131 #endif 132 133 134 extern void 135 io_no_sparse(void) 136 { 137 try_sparse = false; 138 return; 139 } 140 141 142 #ifndef TUKLIB_DOSLIKE 143 /// \brief Waits for input or output to become available or for a signal 144 /// 145 /// This uses the self-pipe trick to avoid a race condition that can occur 146 /// if a signal is caught after user_abort has been checked but before e.g. 147 /// read() has been called. In that situation read() could block unless 148 /// non-blocking I/O is used. With non-blocking I/O something like select() 149 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 150 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 151 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 152 /// old and very portable. 153 static io_wait_ret 154 io_wait(file_pair *pair, int timeout, bool is_reading) 155 { 156 struct pollfd pfd[2]; 157 158 if (is_reading) { 159 pfd[0].fd = pair->src_fd; 160 pfd[0].events = POLLIN; 161 } else { 162 pfd[0].fd = pair->dest_fd; 163 pfd[0].events = POLLOUT; 164 } 165 166 pfd[1].fd = user_abort_pipe[0]; 167 pfd[1].events = POLLIN; 168 169 while (true) { 170 const int ret = poll(pfd, 2, timeout); 171 172 if (user_abort) 173 return IO_WAIT_ERROR; 174 175 if (ret == -1) { 176 if (errno == EINTR || errno == EAGAIN) 177 continue; 178 179 message_error(_("%s: poll() failed: %s"), 180 is_reading ? pair->src_name 181 : pair->dest_name, 182 strerror(errno)); 183 return IO_WAIT_ERROR; 184 } 185 186 if (ret == 0) { 187 assert(opt_flush_timeout != 0); 188 flush_needed = true; 189 return IO_WAIT_TIMEOUT; 190 } 191 192 if (pfd[0].revents != 0) 193 return IO_WAIT_MORE; 194 } 195 } 196 #endif 197 198 199 /// \brief Unlink a file 200 /// 201 /// This tries to verify that the file being unlinked really is the file that 202 /// we want to unlink by verifying device and inode numbers. There's still 203 /// a small unavoidable race, but this is much better than nothing (the file 204 /// could have been moved/replaced even hours earlier). 205 static void 206 io_unlink(const char *name, const struct stat *known_st) 207 { 208 #if defined(TUKLIB_DOSLIKE) 209 // On DOS-like systems, st_ino is meaningless, so don't bother 210 // testing it. Just silence a compiler warning. 211 (void)known_st; 212 #else 213 struct stat new_st; 214 215 // If --force was used, use stat() instead of lstat(). This way 216 // (de)compressing symlinks works correctly. However, it also means 217 // that xz cannot detect if a regular file foo is renamed to bar 218 // and then a symlink foo -> bar is created. Because of stat() 219 // instead of lstat(), xz will think that foo hasn't been replaced 220 // with another file. Thus, xz will remove foo even though it no 221 // longer is the same file that xz used when it started compressing. 222 // Probably it's not too bad though, so this doesn't need a more 223 // complex fix. 224 const int stat_ret = opt_force 225 ? stat(name, &new_st) : lstat(name, &new_st); 226 227 if (stat_ret 228 # ifdef __VMS 229 // st_ino is an array, and we don't want to 230 // compare st_dev at all. 231 || memcmp(&new_st.st_ino, &known_st->st_ino, 232 sizeof(new_st.st_ino)) != 0 233 # else 234 // Typical POSIX-like system 235 || new_st.st_dev != known_st->st_dev 236 || new_st.st_ino != known_st->st_ino 237 # endif 238 ) 239 // TRANSLATORS: When compression or decompression finishes, 240 // and xz is going to remove the source file, xz first checks 241 // if the source file still exists, and if it does, does its 242 // device and inode numbers match what xz saw when it opened 243 // the source file. If these checks fail, this message is 244 // shown, %s being the filename, and the file is not deleted. 245 // The check for device and inode numbers is there, because 246 // it is possible that the user has put a new file in place 247 // of the original file, and in that case it obviously 248 // shouldn't be removed. 249 message_error(_("%s: File seems to have been moved, " 250 "not removing"), name); 251 else 252 #endif 253 // There's a race condition between lstat() and unlink() 254 // but at least we have tried to avoid removing wrong file. 255 if (unlink(name)) 256 message_error(_("%s: Cannot remove: %s"), 257 name, strerror(errno)); 258 259 return; 260 } 261 262 263 /// \brief Copies owner/group and permissions 264 /// 265 /// \todo ACL and EA support 266 /// 267 static void 268 io_copy_attrs(const file_pair *pair) 269 { 270 // Skip chown and chmod on Windows. 271 #ifndef TUKLIB_DOSLIKE 272 // This function is more tricky than you may think at first. 273 // Blindly copying permissions may permit users to access the 274 // destination file who didn't have permission to access the 275 // source file. 276 277 // Try changing the owner of the file. If we aren't root or the owner 278 // isn't already us, fchown() probably doesn't succeed. We warn 279 // about failing fchown() only if we are root. 280 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 281 message_warning(_("%s: Cannot set the file owner: %s"), 282 pair->dest_name, strerror(errno)); 283 284 mode_t mode; 285 286 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 287 message_warning(_("%s: Cannot set the file group: %s"), 288 pair->dest_name, strerror(errno)); 289 // We can still safely copy some additional permissions: 290 // `group' must be at least as strict as `other' and 291 // also vice versa. 292 // 293 // NOTE: After this, the owner of the source file may 294 // get additional permissions. This shouldn't be too bad, 295 // because the owner would have had permission to chmod 296 // the original file anyway. 297 mode = ((pair->src_st.st_mode & 0070) >> 3) 298 & (pair->src_st.st_mode & 0007); 299 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 300 } else { 301 // Drop the setuid, setgid, and sticky bits. 302 mode = pair->src_st.st_mode & 0777; 303 } 304 305 if (fchmod(pair->dest_fd, mode)) 306 message_warning(_("%s: Cannot set the file permissions: %s"), 307 pair->dest_name, strerror(errno)); 308 #endif 309 310 // Copy the timestamps. We have several possible ways to do this, of 311 // which some are better in both security and precision. 312 // 313 // First, get the nanosecond part of the timestamps. As of writing, 314 // it's not standardized by POSIX, and there are several names for 315 // the same thing in struct stat. 316 long atime_nsec; 317 long mtime_nsec; 318 319 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 320 // GNU and Solaris 321 atime_nsec = pair->src_st.st_atim.tv_nsec; 322 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 323 324 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 325 // BSD 326 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 327 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 328 329 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 330 // GNU and BSD without extensions 331 atime_nsec = pair->src_st.st_atimensec; 332 mtime_nsec = pair->src_st.st_mtimensec; 333 334 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 335 // Tru64 336 atime_nsec = pair->src_st.st_uatime * 1000; 337 mtime_nsec = pair->src_st.st_umtime * 1000; 338 339 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 340 // UnixWare 341 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 342 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 343 344 # else 345 // Safe fallback 346 atime_nsec = 0; 347 mtime_nsec = 0; 348 # endif 349 350 // Construct a structure to hold the timestamps and call appropriate 351 // function to set the timestamps. 352 #if defined(HAVE_FUTIMENS) 353 // Use nanosecond precision. 354 struct timespec tv[2]; 355 tv[0].tv_sec = pair->src_st.st_atime; 356 tv[0].tv_nsec = atime_nsec; 357 tv[1].tv_sec = pair->src_st.st_mtime; 358 tv[1].tv_nsec = mtime_nsec; 359 360 (void)futimens(pair->dest_fd, tv); 361 362 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 363 // Use microsecond precision. 364 struct timeval tv[2]; 365 tv[0].tv_sec = pair->src_st.st_atime; 366 tv[0].tv_usec = atime_nsec / 1000; 367 tv[1].tv_sec = pair->src_st.st_mtime; 368 tv[1].tv_usec = mtime_nsec / 1000; 369 370 # if defined(HAVE_FUTIMES) 371 (void)futimes(pair->dest_fd, tv); 372 # elif defined(HAVE_FUTIMESAT) 373 (void)futimesat(pair->dest_fd, NULL, tv); 374 # else 375 // Argh, no function to use a file descriptor to set the timestamp. 376 (void)utimes(pair->dest_name, tv); 377 # endif 378 379 #elif defined(HAVE_UTIME) 380 // Use one-second precision. utime() doesn't support using file 381 // descriptor either. Some systems have broken utime() prototype 382 // so don't make this const. 383 struct utimbuf buf = { 384 .actime = pair->src_st.st_atime, 385 .modtime = pair->src_st.st_mtime, 386 }; 387 388 // Avoid warnings. 389 (void)atime_nsec; 390 (void)mtime_nsec; 391 392 (void)utime(pair->dest_name, &buf); 393 #endif 394 395 return; 396 } 397 398 399 /// Opens the source file. Returns false on success, true on error. 400 static bool 401 io_open_src_real(file_pair *pair) 402 { 403 // There's nothing to open when reading from stdin. 404 if (pair->src_name == stdin_filename) { 405 pair->src_fd = STDIN_FILENO; 406 #ifdef TUKLIB_DOSLIKE 407 setmode(STDIN_FILENO, O_BINARY); 408 #else 409 // Try to set stdin to non-blocking mode. It won't work 410 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 411 // case we proceed as if stdin were non-blocking anyway 412 // (in case of /dev/null it will be in practice). The 413 // same applies to stdout in io_open_dest_real(). 414 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 415 if (stdin_flags == -1) { 416 message_error(_("Error getting the file status flags " 417 "from standard input: %s"), 418 strerror(errno)); 419 return true; 420 } 421 422 if ((stdin_flags & O_NONBLOCK) == 0 423 && fcntl(STDIN_FILENO, F_SETFL, 424 stdin_flags | O_NONBLOCK) != -1) 425 restore_stdin_flags = true; 426 #endif 427 #ifdef HAVE_POSIX_FADVISE 428 // It will fail if stdin is a pipe and that's fine. 429 (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL); 430 #endif 431 return false; 432 } 433 434 // Symlinks are not followed unless writing to stdout or --force 435 // was used. 436 const bool follow_symlinks = opt_stdout || opt_force; 437 438 // We accept only regular files if we are writing the output 439 // to disk too. bzip2 allows overriding this with --force but 440 // gzip and xz don't. 441 const bool reg_files_only = !opt_stdout; 442 443 // Flags for open() 444 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 445 446 #ifndef TUKLIB_DOSLIKE 447 // Use non-blocking I/O: 448 // - It prevents blocking when opening FIFOs and some other 449 // special files, which is good if we want to accept only 450 // regular files. 451 // - It can help avoiding some race conditions with signal handling. 452 flags |= O_NONBLOCK; 453 #endif 454 455 #if defined(O_NOFOLLOW) 456 if (!follow_symlinks) 457 flags |= O_NOFOLLOW; 458 #elif !defined(TUKLIB_DOSLIKE) 459 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 460 // by POSIX). Check for symlinks with a separate lstat() on 461 // these systems. 462 if (!follow_symlinks) { 463 struct stat st; 464 if (lstat(pair->src_name, &st)) { 465 message_error("%s: %s", pair->src_name, 466 strerror(errno)); 467 return true; 468 469 } else if (S_ISLNK(st.st_mode)) { 470 message_warning(_("%s: Is a symbolic link, " 471 "skipping"), pair->src_name); 472 return true; 473 } 474 } 475 #else 476 // Avoid warnings. 477 (void)follow_symlinks; 478 #endif 479 480 // Try to open the file. Signals have been blocked so EINTR shouldn't 481 // be possible. 482 pair->src_fd = open(pair->src_name, flags); 483 484 if (pair->src_fd == -1) { 485 // Signals (that have a signal handler) have been blocked. 486 assert(errno != EINTR); 487 488 #ifdef O_NOFOLLOW 489 // Give an understandable error message if the reason 490 // for failing was that the file was a symbolic link. 491 // 492 // Note that at least Linux, OpenBSD, Solaris, and Darwin 493 // use ELOOP to indicate that O_NOFOLLOW was the reason 494 // that open() failed. Because there may be 495 // directories in the pathname, ELOOP may occur also 496 // because of a symlink loop in the directory part. 497 // So ELOOP doesn't tell us what actually went wrong, 498 // and this stupidity went into POSIX-1.2008 too. 499 // 500 // FreeBSD associates EMLINK with O_NOFOLLOW and 501 // Tru64 uses ENOTSUP. We use these directly here 502 // and skip the lstat() call and the associated race. 503 // I want to hear if there are other kernels that 504 // fail with something else than ELOOP with O_NOFOLLOW. 505 bool was_symlink = false; 506 507 # if defined(__FreeBSD__) || defined(__DragonFly__) 508 if (errno == EMLINK) 509 was_symlink = true; 510 511 # elif defined(__digital__) && defined(__unix__) 512 if (errno == ENOTSUP) 513 was_symlink = true; 514 515 # elif defined(__NetBSD__) 516 if (errno == EFTYPE) 517 was_symlink = true; 518 519 # else 520 if (errno == ELOOP && !follow_symlinks) { 521 const int saved_errno = errno; 522 struct stat st; 523 if (lstat(pair->src_name, &st) == 0 524 && S_ISLNK(st.st_mode)) 525 was_symlink = true; 526 527 errno = saved_errno; 528 } 529 # endif 530 531 if (was_symlink) 532 message_warning(_("%s: Is a symbolic link, " 533 "skipping"), pair->src_name); 534 else 535 #endif 536 // Something else than O_NOFOLLOW failing 537 // (assuming that the race conditions didn't 538 // confuse us). 539 message_error("%s: %s", pair->src_name, 540 strerror(errno)); 541 542 return true; 543 } 544 545 // Stat the source file. We need the result also when we copy 546 // the permissions, and when unlinking. 547 // 548 // NOTE: Use stat() instead of fstat() with DJGPP, because 549 // then we have a better chance to get st_ino value that can 550 // be used in io_open_dest_real() to prevent overwriting the 551 // source file. 552 #ifdef __DJGPP__ 553 if (stat(pair->src_name, &pair->src_st)) 554 goto error_msg; 555 #else 556 if (fstat(pair->src_fd, &pair->src_st)) 557 goto error_msg; 558 #endif 559 560 if (S_ISDIR(pair->src_st.st_mode)) { 561 message_warning(_("%s: Is a directory, skipping"), 562 pair->src_name); 563 goto error; 564 } 565 566 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 567 message_warning(_("%s: Not a regular file, skipping"), 568 pair->src_name); 569 goto error; 570 } 571 572 #ifndef TUKLIB_DOSLIKE 573 if (reg_files_only && !opt_force) { 574 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 575 // gzip rejects setuid and setgid files even 576 // when --force was used. bzip2 doesn't check 577 // for them, but calls fchown() after fchmod(), 578 // and many systems automatically drop setuid 579 // and setgid bits there. 580 // 581 // We accept setuid and setgid files if 582 // --force was used. We drop these bits 583 // explicitly in io_copy_attr(). 584 message_warning(_("%s: File has setuid or " 585 "setgid bit set, skipping"), 586 pair->src_name); 587 goto error; 588 } 589 590 if (pair->src_st.st_mode & S_ISVTX) { 591 message_warning(_("%s: File has sticky bit " 592 "set, skipping"), 593 pair->src_name); 594 goto error; 595 } 596 597 if (pair->src_st.st_nlink > 1) { 598 message_warning(_("%s: Input file has more " 599 "than one hard link, " 600 "skipping"), pair->src_name); 601 goto error; 602 } 603 } 604 605 // If it is something else than a regular file, wait until 606 // there is input available. This way reading from FIFOs 607 // will work when open() is used with O_NONBLOCK. 608 if (!S_ISREG(pair->src_st.st_mode)) { 609 signals_unblock(); 610 const io_wait_ret ret = io_wait(pair, -1, true); 611 signals_block(); 612 613 if (ret != IO_WAIT_MORE) 614 goto error; 615 } 616 #endif 617 618 #ifdef HAVE_POSIX_FADVISE 619 // It will fail with some special files like FIFOs but that is fine. 620 (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL); 621 #endif 622 623 return false; 624 625 error_msg: 626 message_error("%s: %s", pair->src_name, strerror(errno)); 627 error: 628 (void)close(pair->src_fd); 629 return true; 630 } 631 632 633 extern file_pair * 634 io_open_src(const char *src_name) 635 { 636 if (is_empty_filename(src_name)) 637 return NULL; 638 639 // Since we have only one file open at a time, we can use 640 // a statically allocated structure. 641 static file_pair pair; 642 643 pair = (file_pair){ 644 .src_name = src_name, 645 .dest_name = NULL, 646 .src_fd = -1, 647 .dest_fd = -1, 648 .src_eof = false, 649 .dest_try_sparse = false, 650 .dest_pending_sparse = 0, 651 }; 652 653 // Block the signals, for which we have a custom signal handler, so 654 // that we don't need to worry about EINTR. 655 signals_block(); 656 const bool error = io_open_src_real(&pair); 657 signals_unblock(); 658 659 return error ? NULL : &pair; 660 } 661 662 663 /// \brief Closes source file of the file_pair structure 664 /// 665 /// \param pair File whose src_fd should be closed 666 /// \param success If true, the file will be removed from the disk if 667 /// closing succeeds and --keep hasn't been used. 668 static void 669 io_close_src(file_pair *pair, bool success) 670 { 671 #ifndef TUKLIB_DOSLIKE 672 if (restore_stdin_flags) { 673 assert(pair->src_fd == STDIN_FILENO); 674 675 restore_stdin_flags = false; 676 677 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 678 message_error(_("Error restoring the status flags " 679 "to standard input: %s"), 680 strerror(errno)); 681 } 682 #endif 683 684 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 685 #ifdef TUKLIB_DOSLIKE 686 (void)close(pair->src_fd); 687 #endif 688 689 // If we are going to unlink(), do it before closing the file. 690 // This way there's no risk that someone replaces the file and 691 // happens to get same inode number, which would make us 692 // unlink() wrong file. 693 // 694 // NOTE: DOS-like systems are an exception to this, because 695 // they don't allow unlinking files that are open. *sigh* 696 if (success && !opt_keep_original) 697 io_unlink(pair->src_name, &pair->src_st); 698 699 #ifndef TUKLIB_DOSLIKE 700 (void)close(pair->src_fd); 701 #endif 702 } 703 704 return; 705 } 706 707 708 static bool 709 io_open_dest_real(file_pair *pair) 710 { 711 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 712 // We don't modify or free() this. 713 pair->dest_name = (char *)"(stdout)"; 714 pair->dest_fd = STDOUT_FILENO; 715 #ifdef TUKLIB_DOSLIKE 716 setmode(STDOUT_FILENO, O_BINARY); 717 #else 718 // Try to set O_NONBLOCK if it isn't already set. 719 // If it fails, we assume that stdout is non-blocking 720 // in practice. See the comments in io_open_src_real() 721 // for similar situation with stdin. 722 // 723 // NOTE: O_APPEND may be unset later in this function 724 // and it relies on stdout_flags being set here. 725 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 726 if (stdout_flags == -1) { 727 message_error(_("Error getting the file status flags " 728 "from standard output: %s"), 729 strerror(errno)); 730 return true; 731 } 732 733 if ((stdout_flags & O_NONBLOCK) == 0 734 && fcntl(STDOUT_FILENO, F_SETFL, 735 stdout_flags | O_NONBLOCK) != -1) 736 restore_stdout_flags = true; 737 #endif 738 } else { 739 pair->dest_name = suffix_get_dest_name(pair->src_name); 740 if (pair->dest_name == NULL) 741 return true; 742 743 #ifdef __DJGPP__ 744 struct stat st; 745 if (stat(pair->dest_name, &st) == 0) { 746 // Check that it isn't a special file like "prn". 747 if (st.st_dev == -1) { 748 message_error("%s: Refusing to write to " 749 "a DOS special file", 750 pair->dest_name); 751 free(pair->dest_name); 752 return true; 753 } 754 755 // Check that we aren't overwriting the source file. 756 if (st.st_dev == pair->src_st.st_dev 757 && st.st_ino == pair->src_st.st_ino) { 758 message_error("%s: Output file is the same " 759 "as the input file", 760 pair->dest_name); 761 free(pair->dest_name); 762 return true; 763 } 764 } 765 #endif 766 767 // If --force was used, unlink the target file first. 768 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 769 message_error(_("%s: Cannot remove: %s"), 770 pair->dest_name, strerror(errno)); 771 free(pair->dest_name); 772 return true; 773 } 774 775 // Open the file. 776 int flags = O_WRONLY | O_BINARY | O_NOCTTY 777 | O_CREAT | O_EXCL; 778 #ifndef TUKLIB_DOSLIKE 779 flags |= O_NONBLOCK; 780 #endif 781 const mode_t mode = S_IRUSR | S_IWUSR; 782 pair->dest_fd = open(pair->dest_name, flags, mode); 783 784 if (pair->dest_fd == -1) { 785 message_error("%s: %s", pair->dest_name, 786 strerror(errno)); 787 free(pair->dest_name); 788 return true; 789 } 790 } 791 792 #ifndef TUKLIB_DOSLIKE 793 // dest_st isn't used on DOS-like systems except as a dummy 794 // argument to io_unlink(), so don't fstat() on such systems. 795 if (fstat(pair->dest_fd, &pair->dest_st)) { 796 // If fstat() really fails, we have a safe fallback here. 797 # if defined(__VMS) 798 pair->dest_st.st_ino[0] = 0; 799 pair->dest_st.st_ino[1] = 0; 800 pair->dest_st.st_ino[2] = 0; 801 # else 802 pair->dest_st.st_dev = 0; 803 pair->dest_st.st_ino = 0; 804 # endif 805 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 806 // When writing to standard output, we need to be extra 807 // careful: 808 // - It may be connected to something else than 809 // a regular file. 810 // - We aren't necessarily writing to a new empty file 811 // or to the end of an existing file. 812 // - O_APPEND may be active. 813 // 814 // TODO: I'm keeping this disabled for DOS-like systems 815 // for now. FAT doesn't support sparse files, but NTFS 816 // does, so maybe this should be enabled on Windows after 817 // some testing. 818 if (pair->dest_fd == STDOUT_FILENO) { 819 if (!S_ISREG(pair->dest_st.st_mode)) 820 return false; 821 822 if (stdout_flags & O_APPEND) { 823 // Creating a sparse file is not possible 824 // when O_APPEND is active (it's used by 825 // shell's >> redirection). As I understand 826 // it, it is safe to temporarily disable 827 // O_APPEND in xz, because if someone 828 // happened to write to the same file at the 829 // same time, results would be bad anyway 830 // (users shouldn't assume that xz uses any 831 // specific block size when writing data). 832 // 833 // The write position may be something else 834 // than the end of the file, so we must fix 835 // it to start writing at the end of the file 836 // to imitate O_APPEND. 837 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 838 return false; 839 840 // Construct the new file status flags. 841 // If O_NONBLOCK was set earlier in this 842 // function, it must be kept here too. 843 int flags = stdout_flags & ~O_APPEND; 844 if (restore_stdout_flags) 845 flags |= O_NONBLOCK; 846 847 // If this fcntl() fails, we continue but won't 848 // try to create sparse output. The original 849 // flags will still be restored if needed (to 850 // unset O_NONBLOCK) when the file is finished. 851 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 852 return false; 853 854 // Disabling O_APPEND succeeded. Mark 855 // that the flags should be restored 856 // in io_close_dest(). (This may have already 857 // been set when enabling O_NONBLOCK.) 858 restore_stdout_flags = true; 859 860 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 861 != pair->dest_st.st_size) { 862 // Writing won't start exactly at the end 863 // of the file. We cannot use sparse output, 864 // because it would probably corrupt the file. 865 return false; 866 } 867 } 868 869 pair->dest_try_sparse = true; 870 } 871 #endif 872 873 return false; 874 } 875 876 877 extern bool 878 io_open_dest(file_pair *pair) 879 { 880 signals_block(); 881 const bool ret = io_open_dest_real(pair); 882 signals_unblock(); 883 return ret; 884 } 885 886 887 /// \brief Closes destination file of the file_pair structure 888 /// 889 /// \param pair File whose dest_fd should be closed 890 /// \param success If false, the file will be removed from the disk. 891 /// 892 /// \return Zero if closing succeeds. On error, -1 is returned and 893 /// error message printed. 894 static bool 895 io_close_dest(file_pair *pair, bool success) 896 { 897 #ifndef TUKLIB_DOSLIKE 898 // If io_open_dest() has disabled O_APPEND, restore it here. 899 if (restore_stdout_flags) { 900 assert(pair->dest_fd == STDOUT_FILENO); 901 902 restore_stdout_flags = false; 903 904 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 905 message_error(_("Error restoring the O_APPEND flag " 906 "to standard output: %s"), 907 strerror(errno)); 908 return true; 909 } 910 } 911 #endif 912 913 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 914 return false; 915 916 if (close(pair->dest_fd)) { 917 message_error(_("%s: Closing the file failed: %s"), 918 pair->dest_name, strerror(errno)); 919 920 // Closing destination file failed, so we cannot trust its 921 // contents. Get rid of junk: 922 io_unlink(pair->dest_name, &pair->dest_st); 923 free(pair->dest_name); 924 return true; 925 } 926 927 // If the operation using this file wasn't successful, we git rid 928 // of the junk file. 929 if (!success) 930 io_unlink(pair->dest_name, &pair->dest_st); 931 932 free(pair->dest_name); 933 934 return false; 935 } 936 937 938 extern void 939 io_close(file_pair *pair, bool success) 940 { 941 // Take care of sparseness at the end of the output file. 942 if (success && pair->dest_try_sparse 943 && pair->dest_pending_sparse > 0) { 944 // Seek forward one byte less than the size of the pending 945 // hole, then write one zero-byte. This way the file grows 946 // to its correct size. An alternative would be to use 947 // ftruncate() but that isn't portable enough (e.g. it 948 // doesn't work with FAT on Linux; FAT isn't that important 949 // since it doesn't support sparse files anyway, but we don't 950 // want to create corrupt files on it). 951 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 952 SEEK_CUR) == -1) { 953 message_error(_("%s: Seeking failed when trying " 954 "to create a sparse file: %s"), 955 pair->dest_name, strerror(errno)); 956 success = false; 957 } else { 958 const uint8_t zero[1] = { '\0' }; 959 if (io_write_buf(pair, zero, 1)) 960 success = false; 961 } 962 } 963 964 signals_block(); 965 966 // Copy the file attributes. We need to skip this if destination 967 // file isn't open or it is standard output. 968 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 969 io_copy_attrs(pair); 970 971 // Close the destination first. If it fails, we must not remove 972 // the source file! 973 if (io_close_dest(pair, success)) 974 success = false; 975 976 // Close the source file, and unlink it if the operation using this 977 // file pair was successful and we haven't requested to keep the 978 // source file. 979 io_close_src(pair, success); 980 981 signals_unblock(); 982 983 return; 984 } 985 986 987 extern void 988 io_fix_src_pos(file_pair *pair, size_t rewind_size) 989 { 990 assert(rewind_size <= IO_BUFFER_SIZE); 991 992 if (rewind_size > 0) { 993 // This doesn't need to work on unseekable file descriptors, 994 // so just ignore possible errors. 995 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 996 } 997 998 return; 999 } 1000 1001 1002 extern size_t 1003 io_read(file_pair *pair, io_buf *buf_union, size_t size) 1004 { 1005 // We use small buffers here. 1006 assert(size < SSIZE_MAX); 1007 1008 uint8_t *buf = buf_union->u8; 1009 size_t left = size; 1010 1011 while (left > 0) { 1012 const ssize_t amount = read(pair->src_fd, buf, left); 1013 1014 if (amount == 0) { 1015 pair->src_eof = true; 1016 break; 1017 } 1018 1019 if (amount == -1) { 1020 if (errno == EINTR) { 1021 if (user_abort) 1022 return SIZE_MAX; 1023 1024 continue; 1025 } 1026 1027 #ifndef TUKLIB_DOSLIKE 1028 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1029 const io_wait_ret ret = io_wait(pair, 1030 mytime_get_flush_timeout(), 1031 true); 1032 switch (ret) { 1033 case IO_WAIT_MORE: 1034 continue; 1035 1036 case IO_WAIT_ERROR: 1037 return SIZE_MAX; 1038 1039 case IO_WAIT_TIMEOUT: 1040 return size - left; 1041 1042 default: 1043 message_bug(); 1044 } 1045 } 1046 #endif 1047 1048 message_error(_("%s: Read error: %s"), 1049 pair->src_name, strerror(errno)); 1050 1051 return SIZE_MAX; 1052 } 1053 1054 buf += (size_t)(amount); 1055 left -= (size_t)(amount); 1056 } 1057 1058 return size - left; 1059 } 1060 1061 1062 extern bool 1063 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 1064 { 1065 // Using lseek() and read() is more portable than pread() and 1066 // for us it is as good as real pread(). 1067 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 1068 message_error(_("%s: Error seeking the file: %s"), 1069 pair->src_name, strerror(errno)); 1070 return true; 1071 } 1072 1073 const size_t amount = io_read(pair, buf, size); 1074 if (amount == SIZE_MAX) 1075 return true; 1076 1077 if (amount != size) { 1078 message_error(_("%s: Unexpected end of file"), 1079 pair->src_name); 1080 return true; 1081 } 1082 1083 return false; 1084 } 1085 1086 1087 static bool 1088 is_sparse(const io_buf *buf) 1089 { 1090 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1091 1092 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1093 if (buf->u64[i] != 0) 1094 return false; 1095 1096 return true; 1097 } 1098 1099 1100 static bool 1101 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1102 { 1103 assert(size < SSIZE_MAX); 1104 1105 while (size > 0) { 1106 const ssize_t amount = write(pair->dest_fd, buf, size); 1107 if (amount == -1) { 1108 if (errno == EINTR) { 1109 if (user_abort) 1110 return true; 1111 1112 continue; 1113 } 1114 1115 #ifndef TUKLIB_DOSLIKE 1116 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1117 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1118 continue; 1119 1120 return true; 1121 } 1122 #endif 1123 1124 // Handle broken pipe specially. gzip and bzip2 1125 // don't print anything on SIGPIPE. In addition, 1126 // gzip --quiet uses exit status 2 (warning) on 1127 // broken pipe instead of whatever raise(SIGPIPE) 1128 // would make it return. It is there to hide "Broken 1129 // pipe" message on some old shells (probably old 1130 // GNU bash). 1131 // 1132 // We don't do anything special with --quiet, which 1133 // is what bzip2 does too. If we get SIGPIPE, we 1134 // will handle it like other signals by setting 1135 // user_abort, and get EPIPE here. 1136 if (errno != EPIPE) 1137 message_error(_("%s: Write error: %s"), 1138 pair->dest_name, strerror(errno)); 1139 1140 return true; 1141 } 1142 1143 buf += (size_t)(amount); 1144 size -= (size_t)(amount); 1145 } 1146 1147 return false; 1148 } 1149 1150 1151 extern bool 1152 io_write(file_pair *pair, const io_buf *buf, size_t size) 1153 { 1154 assert(size <= IO_BUFFER_SIZE); 1155 1156 if (pair->dest_try_sparse) { 1157 // Check if the block is sparse (contains only zeros). If it 1158 // sparse, we just store the amount and return. We will take 1159 // care of actually skipping over the hole when we hit the 1160 // next data block or close the file. 1161 // 1162 // Since io_close() requires that dest_pending_sparse > 0 1163 // if the file ends with sparse block, we must also return 1164 // if size == 0 to avoid doing the lseek(). 1165 if (size == IO_BUFFER_SIZE) { 1166 if (is_sparse(buf)) { 1167 pair->dest_pending_sparse += size; 1168 return false; 1169 } 1170 } else if (size == 0) { 1171 return false; 1172 } 1173 1174 // This is not a sparse block. If we have a pending hole, 1175 // skip it now. 1176 if (pair->dest_pending_sparse > 0) { 1177 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1178 SEEK_CUR) == -1) { 1179 message_error(_("%s: Seeking failed when " 1180 "trying to create a sparse " 1181 "file: %s"), pair->dest_name, 1182 strerror(errno)); 1183 return true; 1184 } 1185 1186 pair->dest_pending_sparse = 0; 1187 } 1188 } 1189 1190 return io_write_buf(pair, buf->u8, size); 1191 } 1192