1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE_UTIME) 27 # include <utime.h> 28 #endif 29 30 #include "tuklib_open_stdxxx.h" 31 32 #ifndef O_BINARY 33 # define O_BINARY 0 34 #endif 35 36 #ifndef O_NOCTTY 37 # define O_NOCTTY 0 38 #endif 39 40 41 typedef enum { 42 IO_WAIT_MORE, // Reading or writing is possible. 43 IO_WAIT_ERROR, // Error or user_abort 44 IO_WAIT_TIMEOUT, // poll() timed out 45 } io_wait_ret; 46 47 48 /// If true, try to create sparse files when decompressing. 49 static bool try_sparse = true; 50 51 #ifndef TUKLIB_DOSLIKE 52 /// File status flags of standard input. This is used by io_open_src() 53 /// and io_close_src(). 54 static int stdin_flags; 55 static bool restore_stdin_flags = false; 56 57 /// Original file status flags of standard output. This is used by 58 /// io_open_dest() and io_close_dest() to save and restore the flags. 59 static int stdout_flags; 60 static bool restore_stdout_flags = false; 61 62 /// Self-pipe used together with the user_abort variable to avoid 63 /// race conditions with signal handling. 64 static int user_abort_pipe[2]; 65 #endif 66 67 68 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 69 70 71 extern void 72 io_init(void) 73 { 74 // Make sure that stdin, stdout, and stderr are connected to 75 // a valid file descriptor. Exit immediately with exit code ERROR 76 // if we cannot make the file descriptors valid. Maybe we should 77 // print an error message, but our stderr could be screwed anyway. 78 tuklib_open_stdxxx(E_ERROR); 79 80 #ifndef TUKLIB_DOSLIKE 81 // If fchown() fails setting the owner, we warn about it only if 82 // we are root. 83 warn_fchown = geteuid() == 0; 84 85 if (pipe(user_abort_pipe) 86 || fcntl(user_abort_pipe[0], F_SETFL, O_NONBLOCK) 87 == -1 88 || fcntl(user_abort_pipe[1], F_SETFL, O_NONBLOCK) 89 == -1) 90 message_fatal(_("Error creating a pipe: %s"), 91 strerror(errno)); 92 #endif 93 94 #ifdef __DJGPP__ 95 // Avoid doing useless things when statting files. 96 // This isn't important but doesn't hurt. 97 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 98 #endif 99 100 return; 101 } 102 103 104 #ifndef TUKLIB_DOSLIKE 105 extern void 106 io_write_to_user_abort_pipe(void) 107 { 108 // If the write() fails, it's probably due to the pipe being full. 109 // Failing in that case is fine. If the reason is something else, 110 // there's not much we can do since this is called in a signal 111 // handler. So ignore the errors and try to avoid warnings with 112 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 113 uint8_t b = '\0'; 114 const int ret = write(user_abort_pipe[1], &b, 1); 115 (void)ret; 116 return; 117 } 118 #endif 119 120 121 extern void 122 io_no_sparse(void) 123 { 124 try_sparse = false; 125 return; 126 } 127 128 129 #ifndef TUKLIB_DOSLIKE 130 /// \brief Waits for input or output to become available or for a signal 131 /// 132 /// This uses the self-pipe trick to avoid a race condition that can occur 133 /// if a signal is caught after user_abort has been checked but before e.g. 134 /// read() has been called. In that situation read() could block unless 135 /// non-blocking I/O is used. With non-blocking I/O something like select() 136 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 137 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 138 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 139 /// old and very portable. 140 static io_wait_ret 141 io_wait(file_pair *pair, int timeout, bool is_reading) 142 { 143 struct pollfd pfd[2]; 144 145 if (is_reading) { 146 pfd[0].fd = pair->src_fd; 147 pfd[0].events = POLLIN; 148 } else { 149 pfd[0].fd = pair->dest_fd; 150 pfd[0].events = POLLOUT; 151 } 152 153 pfd[1].fd = user_abort_pipe[0]; 154 pfd[1].events = POLLIN; 155 156 while (true) { 157 const int ret = poll(pfd, 2, timeout); 158 159 if (user_abort) 160 return IO_WAIT_ERROR; 161 162 if (ret == -1) { 163 if (errno == EINTR || errno == EAGAIN) 164 continue; 165 166 message_error(_("%s: poll() failed: %s"), 167 is_reading ? pair->src_name 168 : pair->dest_name, 169 strerror(errno)); 170 return IO_WAIT_ERROR; 171 } 172 173 if (ret == 0) { 174 assert(opt_flush_timeout != 0); 175 flush_needed = true; 176 return IO_WAIT_TIMEOUT; 177 } 178 179 if (pfd[0].revents != 0) 180 return IO_WAIT_MORE; 181 } 182 } 183 #endif 184 185 186 /// \brief Unlink a file 187 /// 188 /// This tries to verify that the file being unlinked really is the file that 189 /// we want to unlink by verifying device and inode numbers. There's still 190 /// a small unavoidable race, but this is much better than nothing (the file 191 /// could have been moved/replaced even hours earlier). 192 static void 193 io_unlink(const char *name, const struct stat *known_st) 194 { 195 #if defined(TUKLIB_DOSLIKE) 196 // On DOS-like systems, st_ino is meaningless, so don't bother 197 // testing it. Just silence a compiler warning. 198 (void)known_st; 199 #else 200 struct stat new_st; 201 202 // If --force was used, use stat() instead of lstat(). This way 203 // (de)compressing symlinks works correctly. However, it also means 204 // that xz cannot detect if a regular file foo is renamed to bar 205 // and then a symlink foo -> bar is created. Because of stat() 206 // instead of lstat(), xz will think that foo hasn't been replaced 207 // with another file. Thus, xz will remove foo even though it no 208 // longer is the same file that xz used when it started compressing. 209 // Probably it's not too bad though, so this doesn't need a more 210 // complex fix. 211 const int stat_ret = opt_force 212 ? stat(name, &new_st) : lstat(name, &new_st); 213 214 if (stat_ret 215 # ifdef __VMS 216 // st_ino is an array, and we don't want to 217 // compare st_dev at all. 218 || memcmp(&new_st.st_ino, &known_st->st_ino, 219 sizeof(new_st.st_ino)) != 0 220 # else 221 // Typical POSIX-like system 222 || new_st.st_dev != known_st->st_dev 223 || new_st.st_ino != known_st->st_ino 224 # endif 225 ) 226 // TRANSLATORS: When compression or decompression finishes, 227 // and xz is going to remove the source file, xz first checks 228 // if the source file still exists, and if it does, does its 229 // device and inode numbers match what xz saw when it opened 230 // the source file. If these checks fail, this message is 231 // shown, %s being the filename, and the file is not deleted. 232 // The check for device and inode numbers is there, because 233 // it is possible that the user has put a new file in place 234 // of the original file, and in that case it obviously 235 // shouldn't be removed. 236 message_error(_("%s: File seems to have been moved, " 237 "not removing"), name); 238 else 239 #endif 240 // There's a race condition between lstat() and unlink() 241 // but at least we have tried to avoid removing wrong file. 242 if (unlink(name)) 243 message_error(_("%s: Cannot remove: %s"), 244 name, strerror(errno)); 245 246 return; 247 } 248 249 250 /// \brief Copies owner/group and permissions 251 /// 252 /// \todo ACL and EA support 253 /// 254 static void 255 io_copy_attrs(const file_pair *pair) 256 { 257 // Skip chown and chmod on Windows. 258 #ifndef TUKLIB_DOSLIKE 259 // This function is more tricky than you may think at first. 260 // Blindly copying permissions may permit users to access the 261 // destination file who didn't have permission to access the 262 // source file. 263 264 // Try changing the owner of the file. If we aren't root or the owner 265 // isn't already us, fchown() probably doesn't succeed. We warn 266 // about failing fchown() only if we are root. 267 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 268 message_warning(_("%s: Cannot set the file owner: %s"), 269 pair->dest_name, strerror(errno)); 270 271 mode_t mode; 272 273 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 274 message_warning(_("%s: Cannot set the file group: %s"), 275 pair->dest_name, strerror(errno)); 276 // We can still safely copy some additional permissions: 277 // `group' must be at least as strict as `other' and 278 // also vice versa. 279 // 280 // NOTE: After this, the owner of the source file may 281 // get additional permissions. This shouldn't be too bad, 282 // because the owner would have had permission to chmod 283 // the original file anyway. 284 mode = ((pair->src_st.st_mode & 0070) >> 3) 285 & (pair->src_st.st_mode & 0007); 286 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 287 } else { 288 // Drop the setuid, setgid, and sticky bits. 289 mode = pair->src_st.st_mode & 0777; 290 } 291 292 if (fchmod(pair->dest_fd, mode)) 293 message_warning(_("%s: Cannot set the file permissions: %s"), 294 pair->dest_name, strerror(errno)); 295 #endif 296 297 // Copy the timestamps. We have several possible ways to do this, of 298 // which some are better in both security and precision. 299 // 300 // First, get the nanosecond part of the timestamps. As of writing, 301 // it's not standardized by POSIX, and there are several names for 302 // the same thing in struct stat. 303 long atime_nsec; 304 long mtime_nsec; 305 306 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 307 // GNU and Solaris 308 atime_nsec = pair->src_st.st_atim.tv_nsec; 309 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 310 311 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 312 // BSD 313 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 314 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 315 316 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 317 // GNU and BSD without extensions 318 atime_nsec = pair->src_st.st_atimensec; 319 mtime_nsec = pair->src_st.st_mtimensec; 320 321 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 322 // Tru64 323 atime_nsec = pair->src_st.st_uatime * 1000; 324 mtime_nsec = pair->src_st.st_umtime * 1000; 325 326 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 327 // UnixWare 328 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 329 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 330 331 # else 332 // Safe fallback 333 atime_nsec = 0; 334 mtime_nsec = 0; 335 # endif 336 337 // Construct a structure to hold the timestamps and call appropriate 338 // function to set the timestamps. 339 #if defined(HAVE_FUTIMENS) 340 // Use nanosecond precision. 341 struct timespec tv[2]; 342 tv[0].tv_sec = pair->src_st.st_atime; 343 tv[0].tv_nsec = atime_nsec; 344 tv[1].tv_sec = pair->src_st.st_mtime; 345 tv[1].tv_nsec = mtime_nsec; 346 347 (void)futimens(pair->dest_fd, tv); 348 349 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 350 // Use microsecond precision. 351 struct timeval tv[2]; 352 tv[0].tv_sec = pair->src_st.st_atime; 353 tv[0].tv_usec = atime_nsec / 1000; 354 tv[1].tv_sec = pair->src_st.st_mtime; 355 tv[1].tv_usec = mtime_nsec / 1000; 356 357 # if defined(HAVE_FUTIMES) 358 (void)futimes(pair->dest_fd, tv); 359 # elif defined(HAVE_FUTIMESAT) 360 (void)futimesat(pair->dest_fd, NULL, tv); 361 # else 362 // Argh, no function to use a file descriptor to set the timestamp. 363 (void)utimes(pair->dest_name, tv); 364 # endif 365 366 #elif defined(HAVE_UTIME) 367 // Use one-second precision. utime() doesn't support using file 368 // descriptor either. Some systems have broken utime() prototype 369 // so don't make this const. 370 struct utimbuf buf = { 371 .actime = pair->src_st.st_atime, 372 .modtime = pair->src_st.st_mtime, 373 }; 374 375 // Avoid warnings. 376 (void)atime_nsec; 377 (void)mtime_nsec; 378 379 (void)utime(pair->dest_name, &buf); 380 #endif 381 382 return; 383 } 384 385 386 /// Opens the source file. Returns false on success, true on error. 387 static bool 388 io_open_src_real(file_pair *pair) 389 { 390 // There's nothing to open when reading from stdin. 391 if (pair->src_name == stdin_filename) { 392 pair->src_fd = STDIN_FILENO; 393 #ifdef TUKLIB_DOSLIKE 394 setmode(STDIN_FILENO, O_BINARY); 395 #else 396 // Enable O_NONBLOCK for stdin. 397 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 398 if (stdin_flags == -1) { 399 message_error(_("Error getting the file status flags " 400 "from standard input: %s"), 401 strerror(errno)); 402 return true; 403 } 404 405 if ((stdin_flags & O_NONBLOCK) == 0) { 406 if (fcntl(STDIN_FILENO, F_SETFL, 407 stdin_flags | O_NONBLOCK) == -1) { 408 message_error(_("Error setting O_NONBLOCK " 409 "on standard input: %s"), 410 strerror(errno)); 411 return true; 412 } 413 414 restore_stdin_flags = true; 415 } 416 #endif 417 #ifdef HAVE_POSIX_FADVISE 418 // It will fail if stdin is a pipe and that's fine. 419 (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL); 420 #endif 421 return false; 422 } 423 424 // Symlinks are not followed unless writing to stdout or --force 425 // was used. 426 const bool follow_symlinks = opt_stdout || opt_force; 427 428 // We accept only regular files if we are writing the output 429 // to disk too. bzip2 allows overriding this with --force but 430 // gzip and xz don't. 431 const bool reg_files_only = !opt_stdout; 432 433 // Flags for open() 434 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 435 436 #ifndef TUKLIB_DOSLIKE 437 // Use non-blocking I/O: 438 // - It prevents blocking when opening FIFOs and some other 439 // special files, which is good if we want to accept only 440 // regular files. 441 // - It can help avoiding some race conditions with signal handling. 442 flags |= O_NONBLOCK; 443 #endif 444 445 #if defined(O_NOFOLLOW) 446 if (!follow_symlinks) 447 flags |= O_NOFOLLOW; 448 #elif !defined(TUKLIB_DOSLIKE) 449 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 450 // by POSIX). Check for symlinks with a separate lstat() on 451 // these systems. 452 if (!follow_symlinks) { 453 struct stat st; 454 if (lstat(pair->src_name, &st)) { 455 message_error("%s: %s", pair->src_name, 456 strerror(errno)); 457 return true; 458 459 } else if (S_ISLNK(st.st_mode)) { 460 message_warning(_("%s: Is a symbolic link, " 461 "skipping"), pair->src_name); 462 return true; 463 } 464 } 465 #else 466 // Avoid warnings. 467 (void)follow_symlinks; 468 #endif 469 470 // Try to open the file. Signals have been blocked so EINTR shouldn't 471 // be possible. 472 pair->src_fd = open(pair->src_name, flags); 473 474 if (pair->src_fd == -1) { 475 // Signals (that have a signal handler) have been blocked. 476 assert(errno != EINTR); 477 478 #ifdef O_NOFOLLOW 479 // Give an understandable error message if the reason 480 // for failing was that the file was a symbolic link. 481 // 482 // Note that at least Linux, OpenBSD, Solaris, and Darwin 483 // use ELOOP to indicate that O_NOFOLLOW was the reason 484 // that open() failed. Because there may be 485 // directories in the pathname, ELOOP may occur also 486 // because of a symlink loop in the directory part. 487 // So ELOOP doesn't tell us what actually went wrong, 488 // and this stupidity went into POSIX-1.2008 too. 489 // 490 // FreeBSD associates EMLINK with O_NOFOLLOW and 491 // Tru64 uses ENOTSUP. We use these directly here 492 // and skip the lstat() call and the associated race. 493 // I want to hear if there are other kernels that 494 // fail with something else than ELOOP with O_NOFOLLOW. 495 bool was_symlink = false; 496 497 # if defined(__FreeBSD__) || defined(__DragonFly__) 498 if (errno == EMLINK) 499 was_symlink = true; 500 501 # elif defined(__digital__) && defined(__unix__) 502 if (errno == ENOTSUP) 503 was_symlink = true; 504 505 # elif defined(__NetBSD__) 506 if (errno == EFTYPE) 507 was_symlink = true; 508 509 # else 510 if (errno == ELOOP && !follow_symlinks) { 511 const int saved_errno = errno; 512 struct stat st; 513 if (lstat(pair->src_name, &st) == 0 514 && S_ISLNK(st.st_mode)) 515 was_symlink = true; 516 517 errno = saved_errno; 518 } 519 # endif 520 521 if (was_symlink) 522 message_warning(_("%s: Is a symbolic link, " 523 "skipping"), pair->src_name); 524 else 525 #endif 526 // Something else than O_NOFOLLOW failing 527 // (assuming that the race conditions didn't 528 // confuse us). 529 message_error("%s: %s", pair->src_name, 530 strerror(errno)); 531 532 return true; 533 } 534 535 // Stat the source file. We need the result also when we copy 536 // the permissions, and when unlinking. 537 // 538 // NOTE: Use stat() instead of fstat() with DJGPP, because 539 // then we have a better chance to get st_ino value that can 540 // be used in io_open_dest_real() to prevent overwriting the 541 // source file. 542 #ifdef __DJGPP__ 543 if (stat(pair->src_name, &pair->src_st)) 544 goto error_msg; 545 #else 546 if (fstat(pair->src_fd, &pair->src_st)) 547 goto error_msg; 548 #endif 549 550 if (S_ISDIR(pair->src_st.st_mode)) { 551 message_warning(_("%s: Is a directory, skipping"), 552 pair->src_name); 553 goto error; 554 } 555 556 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 557 message_warning(_("%s: Not a regular file, skipping"), 558 pair->src_name); 559 goto error; 560 } 561 562 #ifndef TUKLIB_DOSLIKE 563 if (reg_files_only && !opt_force) { 564 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 565 // gzip rejects setuid and setgid files even 566 // when --force was used. bzip2 doesn't check 567 // for them, but calls fchown() after fchmod(), 568 // and many systems automatically drop setuid 569 // and setgid bits there. 570 // 571 // We accept setuid and setgid files if 572 // --force was used. We drop these bits 573 // explicitly in io_copy_attr(). 574 message_warning(_("%s: File has setuid or " 575 "setgid bit set, skipping"), 576 pair->src_name); 577 goto error; 578 } 579 580 if (pair->src_st.st_mode & S_ISVTX) { 581 message_warning(_("%s: File has sticky bit " 582 "set, skipping"), 583 pair->src_name); 584 goto error; 585 } 586 587 if (pair->src_st.st_nlink > 1) { 588 message_warning(_("%s: Input file has more " 589 "than one hard link, " 590 "skipping"), pair->src_name); 591 goto error; 592 } 593 } 594 595 // If it is something else than a regular file, wait until 596 // there is input available. This way reading from FIFOs 597 // will work when open() is used with O_NONBLOCK. 598 if (!S_ISREG(pair->src_st.st_mode)) { 599 signals_unblock(); 600 const io_wait_ret ret = io_wait(pair, -1, true); 601 signals_block(); 602 603 if (ret != IO_WAIT_MORE) 604 goto error; 605 } 606 #endif 607 608 #ifdef HAVE_POSIX_FADVISE 609 // It will fail with some special files like FIFOs but that is fine. 610 (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL); 611 #endif 612 613 return false; 614 615 error_msg: 616 message_error("%s: %s", pair->src_name, strerror(errno)); 617 error: 618 (void)close(pair->src_fd); 619 return true; 620 } 621 622 623 extern file_pair * 624 io_open_src(const char *src_name) 625 { 626 if (is_empty_filename(src_name)) 627 return NULL; 628 629 // Since we have only one file open at a time, we can use 630 // a statically allocated structure. 631 static file_pair pair; 632 633 pair = (file_pair){ 634 .src_name = src_name, 635 .dest_name = NULL, 636 .src_fd = -1, 637 .dest_fd = -1, 638 .src_eof = false, 639 .dest_try_sparse = false, 640 .dest_pending_sparse = 0, 641 }; 642 643 // Block the signals, for which we have a custom signal handler, so 644 // that we don't need to worry about EINTR. 645 signals_block(); 646 const bool error = io_open_src_real(&pair); 647 signals_unblock(); 648 649 return error ? NULL : &pair; 650 } 651 652 653 /// \brief Closes source file of the file_pair structure 654 /// 655 /// \param pair File whose src_fd should be closed 656 /// \param success If true, the file will be removed from the disk if 657 /// closing succeeds and --keep hasn't been used. 658 static void 659 io_close_src(file_pair *pair, bool success) 660 { 661 #ifndef TUKLIB_DOSLIKE 662 if (restore_stdin_flags) { 663 assert(pair->src_fd == STDIN_FILENO); 664 665 restore_stdin_flags = false; 666 667 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 668 message_error(_("Error restoring the status flags " 669 "to standard input: %s"), 670 strerror(errno)); 671 } 672 #endif 673 674 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 675 #ifdef TUKLIB_DOSLIKE 676 (void)close(pair->src_fd); 677 #endif 678 679 // If we are going to unlink(), do it before closing the file. 680 // This way there's no risk that someone replaces the file and 681 // happens to get same inode number, which would make us 682 // unlink() wrong file. 683 // 684 // NOTE: DOS-like systems are an exception to this, because 685 // they don't allow unlinking files that are open. *sigh* 686 if (success && !opt_keep_original) 687 io_unlink(pair->src_name, &pair->src_st); 688 689 #ifndef TUKLIB_DOSLIKE 690 (void)close(pair->src_fd); 691 #endif 692 } 693 694 return; 695 } 696 697 698 static bool 699 io_open_dest_real(file_pair *pair) 700 { 701 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 702 // We don't modify or free() this. 703 pair->dest_name = (char *)"(stdout)"; 704 pair->dest_fd = STDOUT_FILENO; 705 #ifdef TUKLIB_DOSLIKE 706 setmode(STDOUT_FILENO, O_BINARY); 707 #else 708 // Set O_NONBLOCK if it isn't already set. 709 // 710 // NOTE: O_APPEND may be unset later in this function 711 // and it relies on stdout_flags being set here. 712 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 713 if (stdout_flags == -1) { 714 message_error(_("Error getting the file status flags " 715 "from standard output: %s"), 716 strerror(errno)); 717 return true; 718 } 719 720 if ((stdout_flags & O_NONBLOCK) == 0) { 721 if (fcntl(STDOUT_FILENO, F_SETFL, 722 stdout_flags | O_NONBLOCK) == -1) { 723 message_error(_("Error setting O_NONBLOCK " 724 "on standard output: %s"), 725 strerror(errno)); 726 return true; 727 } 728 729 restore_stdout_flags = true; 730 } 731 #endif 732 } else { 733 pair->dest_name = suffix_get_dest_name(pair->src_name); 734 if (pair->dest_name == NULL) 735 return true; 736 737 #ifdef __DJGPP__ 738 struct stat st; 739 if (stat(pair->dest_name, &st) == 0) { 740 // Check that it isn't a special file like "prn". 741 if (st.st_dev == -1) { 742 message_error("%s: Refusing to write to " 743 "a DOS special file", 744 pair->dest_name); 745 return true; 746 } 747 748 // Check that we aren't overwriting the source file. 749 if (st.st_dev == pair->src_st.st_dev 750 && st.st_ino == pair->src_st.st_ino) { 751 message_error("%s: Output file is the same " 752 "as the input file", 753 pair->dest_name); 754 return true; 755 } 756 } 757 #endif 758 759 // If --force was used, unlink the target file first. 760 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 761 message_error(_("%s: Cannot remove: %s"), 762 pair->dest_name, strerror(errno)); 763 free(pair->dest_name); 764 return true; 765 } 766 767 // Open the file. 768 int flags = O_WRONLY | O_BINARY | O_NOCTTY 769 | O_CREAT | O_EXCL; 770 #ifndef TUKLIB_DOSLIKE 771 flags |= O_NONBLOCK; 772 #endif 773 const mode_t mode = S_IRUSR | S_IWUSR; 774 pair->dest_fd = open(pair->dest_name, flags, mode); 775 776 if (pair->dest_fd == -1) { 777 message_error("%s: %s", pair->dest_name, 778 strerror(errno)); 779 free(pair->dest_name); 780 return true; 781 } 782 } 783 784 #ifndef TUKLIB_DOSLIKE 785 // dest_st isn't used on DOS-like systems except as a dummy 786 // argument to io_unlink(), so don't fstat() on such systems. 787 if (fstat(pair->dest_fd, &pair->dest_st)) { 788 // If fstat() really fails, we have a safe fallback here. 789 # if defined(__VMS) 790 pair->dest_st.st_ino[0] = 0; 791 pair->dest_st.st_ino[1] = 0; 792 pair->dest_st.st_ino[2] = 0; 793 # else 794 pair->dest_st.st_dev = 0; 795 pair->dest_st.st_ino = 0; 796 # endif 797 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 798 // When writing to standard output, we need to be extra 799 // careful: 800 // - It may be connected to something else than 801 // a regular file. 802 // - We aren't necessarily writing to a new empty file 803 // or to the end of an existing file. 804 // - O_APPEND may be active. 805 // 806 // TODO: I'm keeping this disabled for DOS-like systems 807 // for now. FAT doesn't support sparse files, but NTFS 808 // does, so maybe this should be enabled on Windows after 809 // some testing. 810 if (pair->dest_fd == STDOUT_FILENO) { 811 if (!S_ISREG(pair->dest_st.st_mode)) 812 return false; 813 814 if (stdout_flags & O_APPEND) { 815 // Creating a sparse file is not possible 816 // when O_APPEND is active (it's used by 817 // shell's >> redirection). As I understand 818 // it, it is safe to temporarily disable 819 // O_APPEND in xz, because if someone 820 // happened to write to the same file at the 821 // same time, results would be bad anyway 822 // (users shouldn't assume that xz uses any 823 // specific block size when writing data). 824 // 825 // The write position may be something else 826 // than the end of the file, so we must fix 827 // it to start writing at the end of the file 828 // to imitate O_APPEND. 829 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 830 return false; 831 832 // O_NONBLOCK was set earlier in this function 833 // so it must be kept here too. If this 834 // fcntl() call fails, we continue but won't 835 // try to create sparse output. The original 836 // flags will still be restored if needed (to 837 // unset O_NONBLOCK) when the file is finished. 838 if (fcntl(STDOUT_FILENO, F_SETFL, 839 (stdout_flags | O_NONBLOCK) 840 & ~O_APPEND) == -1) 841 return false; 842 843 // Disabling O_APPEND succeeded. Mark 844 // that the flags should be restored 845 // in io_close_dest(). This quite likely was 846 // already set when enabling O_NONBLOCK but 847 // just in case O_NONBLOCK was already set, 848 // set this again here. 849 restore_stdout_flags = true; 850 851 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 852 != pair->dest_st.st_size) { 853 // Writing won't start exactly at the end 854 // of the file. We cannot use sparse output, 855 // because it would probably corrupt the file. 856 return false; 857 } 858 } 859 860 pair->dest_try_sparse = true; 861 } 862 #endif 863 864 return false; 865 } 866 867 868 extern bool 869 io_open_dest(file_pair *pair) 870 { 871 signals_block(); 872 const bool ret = io_open_dest_real(pair); 873 signals_unblock(); 874 return ret; 875 } 876 877 878 /// \brief Closes destination file of the file_pair structure 879 /// 880 /// \param pair File whose dest_fd should be closed 881 /// \param success If false, the file will be removed from the disk. 882 /// 883 /// \return Zero if closing succeeds. On error, -1 is returned and 884 /// error message printed. 885 static bool 886 io_close_dest(file_pair *pair, bool success) 887 { 888 #ifndef TUKLIB_DOSLIKE 889 // If io_open_dest() has disabled O_APPEND, restore it here. 890 if (restore_stdout_flags) { 891 assert(pair->dest_fd == STDOUT_FILENO); 892 893 restore_stdout_flags = false; 894 895 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 896 message_error(_("Error restoring the O_APPEND flag " 897 "to standard output: %s"), 898 strerror(errno)); 899 return true; 900 } 901 } 902 #endif 903 904 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 905 return false; 906 907 if (close(pair->dest_fd)) { 908 message_error(_("%s: Closing the file failed: %s"), 909 pair->dest_name, strerror(errno)); 910 911 // Closing destination file failed, so we cannot trust its 912 // contents. Get rid of junk: 913 io_unlink(pair->dest_name, &pair->dest_st); 914 free(pair->dest_name); 915 return true; 916 } 917 918 // If the operation using this file wasn't successful, we git rid 919 // of the junk file. 920 if (!success) 921 io_unlink(pair->dest_name, &pair->dest_st); 922 923 free(pair->dest_name); 924 925 return false; 926 } 927 928 929 extern void 930 io_close(file_pair *pair, bool success) 931 { 932 // Take care of sparseness at the end of the output file. 933 if (success && pair->dest_try_sparse 934 && pair->dest_pending_sparse > 0) { 935 // Seek forward one byte less than the size of the pending 936 // hole, then write one zero-byte. This way the file grows 937 // to its correct size. An alternative would be to use 938 // ftruncate() but that isn't portable enough (e.g. it 939 // doesn't work with FAT on Linux; FAT isn't that important 940 // since it doesn't support sparse files anyway, but we don't 941 // want to create corrupt files on it). 942 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 943 SEEK_CUR) == -1) { 944 message_error(_("%s: Seeking failed when trying " 945 "to create a sparse file: %s"), 946 pair->dest_name, strerror(errno)); 947 success = false; 948 } else { 949 const uint8_t zero[1] = { '\0' }; 950 if (io_write_buf(pair, zero, 1)) 951 success = false; 952 } 953 } 954 955 signals_block(); 956 957 // Copy the file attributes. We need to skip this if destination 958 // file isn't open or it is standard output. 959 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 960 io_copy_attrs(pair); 961 962 // Close the destination first. If it fails, we must not remove 963 // the source file! 964 if (io_close_dest(pair, success)) 965 success = false; 966 967 // Close the source file, and unlink it if the operation using this 968 // file pair was successful and we haven't requested to keep the 969 // source file. 970 io_close_src(pair, success); 971 972 signals_unblock(); 973 974 return; 975 } 976 977 978 extern void 979 io_fix_src_pos(file_pair *pair, size_t rewind_size) 980 { 981 assert(rewind_size <= IO_BUFFER_SIZE); 982 983 if (rewind_size > 0) { 984 // This doesn't need to work on unseekable file descriptors, 985 // so just ignore possible errors. 986 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 987 } 988 989 return; 990 } 991 992 993 extern size_t 994 io_read(file_pair *pair, io_buf *buf_union, size_t size) 995 { 996 // We use small buffers here. 997 assert(size < SSIZE_MAX); 998 999 uint8_t *buf = buf_union->u8; 1000 size_t left = size; 1001 1002 while (left > 0) { 1003 const ssize_t amount = read(pair->src_fd, buf, left); 1004 1005 if (amount == 0) { 1006 pair->src_eof = true; 1007 break; 1008 } 1009 1010 if (amount == -1) { 1011 if (errno == EINTR) { 1012 if (user_abort) 1013 return SIZE_MAX; 1014 1015 continue; 1016 } 1017 1018 #ifndef TUKLIB_DOSLIKE 1019 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1020 const io_wait_ret ret = io_wait(pair, 1021 mytime_get_flush_timeout(), 1022 true); 1023 switch (ret) { 1024 case IO_WAIT_MORE: 1025 continue; 1026 1027 case IO_WAIT_ERROR: 1028 return SIZE_MAX; 1029 1030 case IO_WAIT_TIMEOUT: 1031 return size - left; 1032 1033 default: 1034 message_bug(); 1035 } 1036 } 1037 #endif 1038 1039 message_error(_("%s: Read error: %s"), 1040 pair->src_name, strerror(errno)); 1041 1042 return SIZE_MAX; 1043 } 1044 1045 buf += (size_t)(amount); 1046 left -= (size_t)(amount); 1047 } 1048 1049 return size - left; 1050 } 1051 1052 1053 extern bool 1054 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 1055 { 1056 // Using lseek() and read() is more portable than pread() and 1057 // for us it is as good as real pread(). 1058 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 1059 message_error(_("%s: Error seeking the file: %s"), 1060 pair->src_name, strerror(errno)); 1061 return true; 1062 } 1063 1064 const size_t amount = io_read(pair, buf, size); 1065 if (amount == SIZE_MAX) 1066 return true; 1067 1068 if (amount != size) { 1069 message_error(_("%s: Unexpected end of file"), 1070 pair->src_name); 1071 return true; 1072 } 1073 1074 return false; 1075 } 1076 1077 1078 static bool 1079 is_sparse(const io_buf *buf) 1080 { 1081 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1082 1083 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1084 if (buf->u64[i] != 0) 1085 return false; 1086 1087 return true; 1088 } 1089 1090 1091 static bool 1092 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1093 { 1094 assert(size < SSIZE_MAX); 1095 1096 while (size > 0) { 1097 const ssize_t amount = write(pair->dest_fd, buf, size); 1098 if (amount == -1) { 1099 if (errno == EINTR) { 1100 if (user_abort) 1101 return true; 1102 1103 continue; 1104 } 1105 1106 #ifndef TUKLIB_DOSLIKE 1107 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1108 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1109 continue; 1110 1111 return true; 1112 } 1113 #endif 1114 1115 // Handle broken pipe specially. gzip and bzip2 1116 // don't print anything on SIGPIPE. In addition, 1117 // gzip --quiet uses exit status 2 (warning) on 1118 // broken pipe instead of whatever raise(SIGPIPE) 1119 // would make it return. It is there to hide "Broken 1120 // pipe" message on some old shells (probably old 1121 // GNU bash). 1122 // 1123 // We don't do anything special with --quiet, which 1124 // is what bzip2 does too. If we get SIGPIPE, we 1125 // will handle it like other signals by setting 1126 // user_abort, and get EPIPE here. 1127 if (errno != EPIPE) 1128 message_error(_("%s: Write error: %s"), 1129 pair->dest_name, strerror(errno)); 1130 1131 return true; 1132 } 1133 1134 buf += (size_t)(amount); 1135 size -= (size_t)(amount); 1136 } 1137 1138 return false; 1139 } 1140 1141 1142 extern bool 1143 io_write(file_pair *pair, const io_buf *buf, size_t size) 1144 { 1145 assert(size <= IO_BUFFER_SIZE); 1146 1147 if (pair->dest_try_sparse) { 1148 // Check if the block is sparse (contains only zeros). If it 1149 // sparse, we just store the amount and return. We will take 1150 // care of actually skipping over the hole when we hit the 1151 // next data block or close the file. 1152 // 1153 // Since io_close() requires that dest_pending_sparse > 0 1154 // if the file ends with sparse block, we must also return 1155 // if size == 0 to avoid doing the lseek(). 1156 if (size == IO_BUFFER_SIZE) { 1157 if (is_sparse(buf)) { 1158 pair->dest_pending_sparse += size; 1159 return false; 1160 } 1161 } else if (size == 0) { 1162 return false; 1163 } 1164 1165 // This is not a sparse block. If we have a pending hole, 1166 // skip it now. 1167 if (pair->dest_pending_sparse > 0) { 1168 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1169 SEEK_CUR) == -1) { 1170 message_error(_("%s: Seeking failed when " 1171 "trying to create a sparse " 1172 "file: %s"), pair->dest_name, 1173 strerror(errno)); 1174 return true; 1175 } 1176 1177 pair->dest_pending_sparse = 0; 1178 } 1179 } 1180 1181 return io_write_buf(pair, buf->u8, size); 1182 } 1183