1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE_UTIME) 27 # include <utime.h> 28 #endif 29 30 #include "tuklib_open_stdxxx.h" 31 32 #ifndef O_BINARY 33 # define O_BINARY 0 34 #endif 35 36 #ifndef O_NOCTTY 37 # define O_NOCTTY 0 38 #endif 39 40 41 typedef enum { 42 IO_WAIT_MORE, // Reading or writing is possible. 43 IO_WAIT_ERROR, // Error or user_abort 44 IO_WAIT_TIMEOUT, // poll() timed out 45 } io_wait_ret; 46 47 48 /// If true, try to create sparse files when decompressing. 49 static bool try_sparse = true; 50 51 #ifndef TUKLIB_DOSLIKE 52 /// File status flags of standard input. This is used by io_open_src() 53 /// and io_close_src(). 54 static int stdin_flags; 55 static bool restore_stdin_flags = false; 56 57 /// Original file status flags of standard output. This is used by 58 /// io_open_dest() and io_close_dest() to save and restore the flags. 59 static int stdout_flags; 60 static bool restore_stdout_flags = false; 61 62 /// Self-pipe used together with the user_abort variable to avoid 63 /// race conditions with signal handling. 64 static int user_abort_pipe[2]; 65 #endif 66 67 68 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 69 70 71 extern void 72 io_init(void) 73 { 74 // Make sure that stdin, stdout, and stderr are connected to 75 // a valid file descriptor. Exit immediately with exit code ERROR 76 // if we cannot make the file descriptors valid. Maybe we should 77 // print an error message, but our stderr could be screwed anyway. 78 tuklib_open_stdxxx(E_ERROR); 79 80 #ifndef TUKLIB_DOSLIKE 81 // If fchown() fails setting the owner, we warn about it only if 82 // we are root. 83 warn_fchown = geteuid() == 0; 84 85 // Create a pipe for the self-pipe trick. 86 if (pipe(user_abort_pipe)) 87 message_fatal(_("Error creating a pipe: %s"), 88 strerror(errno)); 89 90 // Make both ends of the pipe non-blocking. 91 for (unsigned i = 0; i < 2; ++i) { 92 int flags = fcntl(user_abort_pipe[i], F_GETFL); 93 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 94 flags | O_NONBLOCK) == -1) 95 message_fatal(_("Error creating a pipe: %s"), 96 strerror(errno)); 97 } 98 #endif 99 100 #ifdef __DJGPP__ 101 // Avoid doing useless things when statting files. 102 // This isn't important but doesn't hurt. 103 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 104 #endif 105 106 return; 107 } 108 109 110 #ifndef TUKLIB_DOSLIKE 111 extern void 112 io_write_to_user_abort_pipe(void) 113 { 114 // If the write() fails, it's probably due to the pipe being full. 115 // Failing in that case is fine. If the reason is something else, 116 // there's not much we can do since this is called in a signal 117 // handler. So ignore the errors and try to avoid warnings with 118 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 119 uint8_t b = '\0'; 120 const int ret = write(user_abort_pipe[1], &b, 1); 121 (void)ret; 122 return; 123 } 124 #endif 125 126 127 extern void 128 io_no_sparse(void) 129 { 130 try_sparse = false; 131 return; 132 } 133 134 135 #ifndef TUKLIB_DOSLIKE 136 /// \brief Waits for input or output to become available or for a signal 137 /// 138 /// This uses the self-pipe trick to avoid a race condition that can occur 139 /// if a signal is caught after user_abort has been checked but before e.g. 140 /// read() has been called. In that situation read() could block unless 141 /// non-blocking I/O is used. With non-blocking I/O something like select() 142 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 143 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 144 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 145 /// old and very portable. 146 static io_wait_ret 147 io_wait(file_pair *pair, int timeout, bool is_reading) 148 { 149 struct pollfd pfd[2]; 150 151 if (is_reading) { 152 pfd[0].fd = pair->src_fd; 153 pfd[0].events = POLLIN; 154 } else { 155 pfd[0].fd = pair->dest_fd; 156 pfd[0].events = POLLOUT; 157 } 158 159 pfd[1].fd = user_abort_pipe[0]; 160 pfd[1].events = POLLIN; 161 162 while (true) { 163 const int ret = poll(pfd, 2, timeout); 164 165 if (user_abort) 166 return IO_WAIT_ERROR; 167 168 if (ret == -1) { 169 if (errno == EINTR || errno == EAGAIN) 170 continue; 171 172 message_error(_("%s: poll() failed: %s"), 173 is_reading ? pair->src_name 174 : pair->dest_name, 175 strerror(errno)); 176 return IO_WAIT_ERROR; 177 } 178 179 if (ret == 0) { 180 assert(opt_flush_timeout != 0); 181 flush_needed = true; 182 return IO_WAIT_TIMEOUT; 183 } 184 185 if (pfd[0].revents != 0) 186 return IO_WAIT_MORE; 187 } 188 } 189 #endif 190 191 192 /// \brief Unlink a file 193 /// 194 /// This tries to verify that the file being unlinked really is the file that 195 /// we want to unlink by verifying device and inode numbers. There's still 196 /// a small unavoidable race, but this is much better than nothing (the file 197 /// could have been moved/replaced even hours earlier). 198 static void 199 io_unlink(const char *name, const struct stat *known_st) 200 { 201 #if defined(TUKLIB_DOSLIKE) 202 // On DOS-like systems, st_ino is meaningless, so don't bother 203 // testing it. Just silence a compiler warning. 204 (void)known_st; 205 #else 206 struct stat new_st; 207 208 // If --force was used, use stat() instead of lstat(). This way 209 // (de)compressing symlinks works correctly. However, it also means 210 // that xz cannot detect if a regular file foo is renamed to bar 211 // and then a symlink foo -> bar is created. Because of stat() 212 // instead of lstat(), xz will think that foo hasn't been replaced 213 // with another file. Thus, xz will remove foo even though it no 214 // longer is the same file that xz used when it started compressing. 215 // Probably it's not too bad though, so this doesn't need a more 216 // complex fix. 217 const int stat_ret = opt_force 218 ? stat(name, &new_st) : lstat(name, &new_st); 219 220 if (stat_ret 221 # ifdef __VMS 222 // st_ino is an array, and we don't want to 223 // compare st_dev at all. 224 || memcmp(&new_st.st_ino, &known_st->st_ino, 225 sizeof(new_st.st_ino)) != 0 226 # else 227 // Typical POSIX-like system 228 || new_st.st_dev != known_st->st_dev 229 || new_st.st_ino != known_st->st_ino 230 # endif 231 ) 232 // TRANSLATORS: When compression or decompression finishes, 233 // and xz is going to remove the source file, xz first checks 234 // if the source file still exists, and if it does, does its 235 // device and inode numbers match what xz saw when it opened 236 // the source file. If these checks fail, this message is 237 // shown, %s being the filename, and the file is not deleted. 238 // The check for device and inode numbers is there, because 239 // it is possible that the user has put a new file in place 240 // of the original file, and in that case it obviously 241 // shouldn't be removed. 242 message_error(_("%s: File seems to have been moved, " 243 "not removing"), name); 244 else 245 #endif 246 // There's a race condition between lstat() and unlink() 247 // but at least we have tried to avoid removing wrong file. 248 if (unlink(name)) 249 message_error(_("%s: Cannot remove: %s"), 250 name, strerror(errno)); 251 252 return; 253 } 254 255 256 /// \brief Copies owner/group and permissions 257 /// 258 /// \todo ACL and EA support 259 /// 260 static void 261 io_copy_attrs(const file_pair *pair) 262 { 263 // Skip chown and chmod on Windows. 264 #ifndef TUKLIB_DOSLIKE 265 // This function is more tricky than you may think at first. 266 // Blindly copying permissions may permit users to access the 267 // destination file who didn't have permission to access the 268 // source file. 269 270 // Try changing the owner of the file. If we aren't root or the owner 271 // isn't already us, fchown() probably doesn't succeed. We warn 272 // about failing fchown() only if we are root. 273 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown) 274 message_warning(_("%s: Cannot set the file owner: %s"), 275 pair->dest_name, strerror(errno)); 276 277 mode_t mode; 278 279 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) { 280 message_warning(_("%s: Cannot set the file group: %s"), 281 pair->dest_name, strerror(errno)); 282 // We can still safely copy some additional permissions: 283 // `group' must be at least as strict as `other' and 284 // also vice versa. 285 // 286 // NOTE: After this, the owner of the source file may 287 // get additional permissions. This shouldn't be too bad, 288 // because the owner would have had permission to chmod 289 // the original file anyway. 290 mode = ((pair->src_st.st_mode & 0070) >> 3) 291 & (pair->src_st.st_mode & 0007); 292 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 293 } else { 294 // Drop the setuid, setgid, and sticky bits. 295 mode = pair->src_st.st_mode & 0777; 296 } 297 298 if (fchmod(pair->dest_fd, mode)) 299 message_warning(_("%s: Cannot set the file permissions: %s"), 300 pair->dest_name, strerror(errno)); 301 #endif 302 303 // Copy the timestamps. We have several possible ways to do this, of 304 // which some are better in both security and precision. 305 // 306 // First, get the nanosecond part of the timestamps. As of writing, 307 // it's not standardized by POSIX, and there are several names for 308 // the same thing in struct stat. 309 long atime_nsec; 310 long mtime_nsec; 311 312 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 313 // GNU and Solaris 314 atime_nsec = pair->src_st.st_atim.tv_nsec; 315 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 316 317 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 318 // BSD 319 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 320 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 321 322 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 323 // GNU and BSD without extensions 324 atime_nsec = pair->src_st.st_atimensec; 325 mtime_nsec = pair->src_st.st_mtimensec; 326 327 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 328 // Tru64 329 atime_nsec = pair->src_st.st_uatime * 1000; 330 mtime_nsec = pair->src_st.st_umtime * 1000; 331 332 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 333 // UnixWare 334 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 335 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 336 337 # else 338 // Safe fallback 339 atime_nsec = 0; 340 mtime_nsec = 0; 341 # endif 342 343 // Construct a structure to hold the timestamps and call appropriate 344 // function to set the timestamps. 345 #if defined(HAVE_FUTIMENS) 346 // Use nanosecond precision. 347 struct timespec tv[2]; 348 tv[0].tv_sec = pair->src_st.st_atime; 349 tv[0].tv_nsec = atime_nsec; 350 tv[1].tv_sec = pair->src_st.st_mtime; 351 tv[1].tv_nsec = mtime_nsec; 352 353 (void)futimens(pair->dest_fd, tv); 354 355 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 356 // Use microsecond precision. 357 struct timeval tv[2]; 358 tv[0].tv_sec = pair->src_st.st_atime; 359 tv[0].tv_usec = atime_nsec / 1000; 360 tv[1].tv_sec = pair->src_st.st_mtime; 361 tv[1].tv_usec = mtime_nsec / 1000; 362 363 # if defined(HAVE_FUTIMES) 364 (void)futimes(pair->dest_fd, tv); 365 # elif defined(HAVE_FUTIMESAT) 366 (void)futimesat(pair->dest_fd, NULL, tv); 367 # else 368 // Argh, no function to use a file descriptor to set the timestamp. 369 (void)utimes(pair->dest_name, tv); 370 # endif 371 372 #elif defined(HAVE_UTIME) 373 // Use one-second precision. utime() doesn't support using file 374 // descriptor either. Some systems have broken utime() prototype 375 // so don't make this const. 376 struct utimbuf buf = { 377 .actime = pair->src_st.st_atime, 378 .modtime = pair->src_st.st_mtime, 379 }; 380 381 // Avoid warnings. 382 (void)atime_nsec; 383 (void)mtime_nsec; 384 385 (void)utime(pair->dest_name, &buf); 386 #endif 387 388 return; 389 } 390 391 392 /// Opens the source file. Returns false on success, true on error. 393 static bool 394 io_open_src_real(file_pair *pair) 395 { 396 // There's nothing to open when reading from stdin. 397 if (pair->src_name == stdin_filename) { 398 pair->src_fd = STDIN_FILENO; 399 #ifdef TUKLIB_DOSLIKE 400 setmode(STDIN_FILENO, O_BINARY); 401 #else 402 // Try to set stdin to non-blocking mode. It won't work 403 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 404 // case we proceed as if stdin were non-blocking anyway 405 // (in case of /dev/null it will be in practice). The 406 // same applies to stdout in io_open_dest_real(). 407 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 408 if (stdin_flags == -1) { 409 message_error(_("Error getting the file status flags " 410 "from standard input: %s"), 411 strerror(errno)); 412 return true; 413 } 414 415 if ((stdin_flags & O_NONBLOCK) == 0 416 && fcntl(STDIN_FILENO, F_SETFL, 417 stdin_flags | O_NONBLOCK) != -1) 418 restore_stdin_flags = true; 419 #endif 420 #ifdef HAVE_POSIX_FADVISE 421 // It will fail if stdin is a pipe and that's fine. 422 (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL); 423 #endif 424 return false; 425 } 426 427 // Symlinks are not followed unless writing to stdout or --force 428 // was used. 429 const bool follow_symlinks = opt_stdout || opt_force; 430 431 // We accept only regular files if we are writing the output 432 // to disk too. bzip2 allows overriding this with --force but 433 // gzip and xz don't. 434 const bool reg_files_only = !opt_stdout; 435 436 // Flags for open() 437 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 438 439 #ifndef TUKLIB_DOSLIKE 440 // Use non-blocking I/O: 441 // - It prevents blocking when opening FIFOs and some other 442 // special files, which is good if we want to accept only 443 // regular files. 444 // - It can help avoiding some race conditions with signal handling. 445 flags |= O_NONBLOCK; 446 #endif 447 448 #if defined(O_NOFOLLOW) 449 if (!follow_symlinks) 450 flags |= O_NOFOLLOW; 451 #elif !defined(TUKLIB_DOSLIKE) 452 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 453 // by POSIX). Check for symlinks with a separate lstat() on 454 // these systems. 455 if (!follow_symlinks) { 456 struct stat st; 457 if (lstat(pair->src_name, &st)) { 458 message_error("%s: %s", pair->src_name, 459 strerror(errno)); 460 return true; 461 462 } else if (S_ISLNK(st.st_mode)) { 463 message_warning(_("%s: Is a symbolic link, " 464 "skipping"), pair->src_name); 465 return true; 466 } 467 } 468 #else 469 // Avoid warnings. 470 (void)follow_symlinks; 471 #endif 472 473 // Try to open the file. Signals have been blocked so EINTR shouldn't 474 // be possible. 475 pair->src_fd = open(pair->src_name, flags); 476 477 if (pair->src_fd == -1) { 478 // Signals (that have a signal handler) have been blocked. 479 assert(errno != EINTR); 480 481 #ifdef O_NOFOLLOW 482 // Give an understandable error message if the reason 483 // for failing was that the file was a symbolic link. 484 // 485 // Note that at least Linux, OpenBSD, Solaris, and Darwin 486 // use ELOOP to indicate that O_NOFOLLOW was the reason 487 // that open() failed. Because there may be 488 // directories in the pathname, ELOOP may occur also 489 // because of a symlink loop in the directory part. 490 // So ELOOP doesn't tell us what actually went wrong, 491 // and this stupidity went into POSIX-1.2008 too. 492 // 493 // FreeBSD associates EMLINK with O_NOFOLLOW and 494 // Tru64 uses ENOTSUP. We use these directly here 495 // and skip the lstat() call and the associated race. 496 // I want to hear if there are other kernels that 497 // fail with something else than ELOOP with O_NOFOLLOW. 498 bool was_symlink = false; 499 500 # if defined(__FreeBSD__) || defined(__DragonFly__) 501 if (errno == EMLINK) 502 was_symlink = true; 503 504 # elif defined(__digital__) && defined(__unix__) 505 if (errno == ENOTSUP) 506 was_symlink = true; 507 508 # elif defined(__NetBSD__) 509 if (errno == EFTYPE) 510 was_symlink = true; 511 512 # else 513 if (errno == ELOOP && !follow_symlinks) { 514 const int saved_errno = errno; 515 struct stat st; 516 if (lstat(pair->src_name, &st) == 0 517 && S_ISLNK(st.st_mode)) 518 was_symlink = true; 519 520 errno = saved_errno; 521 } 522 # endif 523 524 if (was_symlink) 525 message_warning(_("%s: Is a symbolic link, " 526 "skipping"), pair->src_name); 527 else 528 #endif 529 // Something else than O_NOFOLLOW failing 530 // (assuming that the race conditions didn't 531 // confuse us). 532 message_error("%s: %s", pair->src_name, 533 strerror(errno)); 534 535 return true; 536 } 537 538 // Stat the source file. We need the result also when we copy 539 // the permissions, and when unlinking. 540 // 541 // NOTE: Use stat() instead of fstat() with DJGPP, because 542 // then we have a better chance to get st_ino value that can 543 // be used in io_open_dest_real() to prevent overwriting the 544 // source file. 545 #ifdef __DJGPP__ 546 if (stat(pair->src_name, &pair->src_st)) 547 goto error_msg; 548 #else 549 if (fstat(pair->src_fd, &pair->src_st)) 550 goto error_msg; 551 #endif 552 553 if (S_ISDIR(pair->src_st.st_mode)) { 554 message_warning(_("%s: Is a directory, skipping"), 555 pair->src_name); 556 goto error; 557 } 558 559 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 560 message_warning(_("%s: Not a regular file, skipping"), 561 pair->src_name); 562 goto error; 563 } 564 565 #ifndef TUKLIB_DOSLIKE 566 if (reg_files_only && !opt_force) { 567 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 568 // gzip rejects setuid and setgid files even 569 // when --force was used. bzip2 doesn't check 570 // for them, but calls fchown() after fchmod(), 571 // and many systems automatically drop setuid 572 // and setgid bits there. 573 // 574 // We accept setuid and setgid files if 575 // --force was used. We drop these bits 576 // explicitly in io_copy_attr(). 577 message_warning(_("%s: File has setuid or " 578 "setgid bit set, skipping"), 579 pair->src_name); 580 goto error; 581 } 582 583 if (pair->src_st.st_mode & S_ISVTX) { 584 message_warning(_("%s: File has sticky bit " 585 "set, skipping"), 586 pair->src_name); 587 goto error; 588 } 589 590 if (pair->src_st.st_nlink > 1) { 591 message_warning(_("%s: Input file has more " 592 "than one hard link, " 593 "skipping"), pair->src_name); 594 goto error; 595 } 596 } 597 598 // If it is something else than a regular file, wait until 599 // there is input available. This way reading from FIFOs 600 // will work when open() is used with O_NONBLOCK. 601 if (!S_ISREG(pair->src_st.st_mode)) { 602 signals_unblock(); 603 const io_wait_ret ret = io_wait(pair, -1, true); 604 signals_block(); 605 606 if (ret != IO_WAIT_MORE) 607 goto error; 608 } 609 #endif 610 611 #ifdef HAVE_POSIX_FADVISE 612 // It will fail with some special files like FIFOs but that is fine. 613 (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL); 614 #endif 615 616 return false; 617 618 error_msg: 619 message_error("%s: %s", pair->src_name, strerror(errno)); 620 error: 621 (void)close(pair->src_fd); 622 return true; 623 } 624 625 626 extern file_pair * 627 io_open_src(const char *src_name) 628 { 629 if (is_empty_filename(src_name)) 630 return NULL; 631 632 // Since we have only one file open at a time, we can use 633 // a statically allocated structure. 634 static file_pair pair; 635 636 pair = (file_pair){ 637 .src_name = src_name, 638 .dest_name = NULL, 639 .src_fd = -1, 640 .dest_fd = -1, 641 .src_eof = false, 642 .dest_try_sparse = false, 643 .dest_pending_sparse = 0, 644 }; 645 646 // Block the signals, for which we have a custom signal handler, so 647 // that we don't need to worry about EINTR. 648 signals_block(); 649 const bool error = io_open_src_real(&pair); 650 signals_unblock(); 651 652 return error ? NULL : &pair; 653 } 654 655 656 /// \brief Closes source file of the file_pair structure 657 /// 658 /// \param pair File whose src_fd should be closed 659 /// \param success If true, the file will be removed from the disk if 660 /// closing succeeds and --keep hasn't been used. 661 static void 662 io_close_src(file_pair *pair, bool success) 663 { 664 #ifndef TUKLIB_DOSLIKE 665 if (restore_stdin_flags) { 666 assert(pair->src_fd == STDIN_FILENO); 667 668 restore_stdin_flags = false; 669 670 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 671 message_error(_("Error restoring the status flags " 672 "to standard input: %s"), 673 strerror(errno)); 674 } 675 #endif 676 677 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 678 #ifdef TUKLIB_DOSLIKE 679 (void)close(pair->src_fd); 680 #endif 681 682 // If we are going to unlink(), do it before closing the file. 683 // This way there's no risk that someone replaces the file and 684 // happens to get same inode number, which would make us 685 // unlink() wrong file. 686 // 687 // NOTE: DOS-like systems are an exception to this, because 688 // they don't allow unlinking files that are open. *sigh* 689 if (success && !opt_keep_original) 690 io_unlink(pair->src_name, &pair->src_st); 691 692 #ifndef TUKLIB_DOSLIKE 693 (void)close(pair->src_fd); 694 #endif 695 } 696 697 return; 698 } 699 700 701 static bool 702 io_open_dest_real(file_pair *pair) 703 { 704 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 705 // We don't modify or free() this. 706 pair->dest_name = (char *)"(stdout)"; 707 pair->dest_fd = STDOUT_FILENO; 708 #ifdef TUKLIB_DOSLIKE 709 setmode(STDOUT_FILENO, O_BINARY); 710 #else 711 // Try to set O_NONBLOCK if it isn't already set. 712 // If it fails, we assume that stdout is non-blocking 713 // in practice. See the comments in io_open_src_real() 714 // for similar situation with stdin. 715 // 716 // NOTE: O_APPEND may be unset later in this function 717 // and it relies on stdout_flags being set here. 718 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 719 if (stdout_flags == -1) { 720 message_error(_("Error getting the file status flags " 721 "from standard output: %s"), 722 strerror(errno)); 723 return true; 724 } 725 726 if ((stdout_flags & O_NONBLOCK) == 0 727 && fcntl(STDOUT_FILENO, F_SETFL, 728 stdout_flags | O_NONBLOCK) != -1) 729 restore_stdout_flags = true; 730 #endif 731 } else { 732 pair->dest_name = suffix_get_dest_name(pair->src_name); 733 if (pair->dest_name == NULL) 734 return true; 735 736 #ifdef __DJGPP__ 737 struct stat st; 738 if (stat(pair->dest_name, &st) == 0) { 739 // Check that it isn't a special file like "prn". 740 if (st.st_dev == -1) { 741 message_error("%s: Refusing to write to " 742 "a DOS special file", 743 pair->dest_name); 744 free(pair->dest_name); 745 return true; 746 } 747 748 // Check that we aren't overwriting the source file. 749 if (st.st_dev == pair->src_st.st_dev 750 && st.st_ino == pair->src_st.st_ino) { 751 message_error("%s: Output file is the same " 752 "as the input file", 753 pair->dest_name); 754 free(pair->dest_name); 755 return true; 756 } 757 } 758 #endif 759 760 // If --force was used, unlink the target file first. 761 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 762 message_error(_("%s: Cannot remove: %s"), 763 pair->dest_name, strerror(errno)); 764 free(pair->dest_name); 765 return true; 766 } 767 768 // Open the file. 769 int flags = O_WRONLY | O_BINARY | O_NOCTTY 770 | O_CREAT | O_EXCL; 771 #ifndef TUKLIB_DOSLIKE 772 flags |= O_NONBLOCK; 773 #endif 774 const mode_t mode = S_IRUSR | S_IWUSR; 775 pair->dest_fd = open(pair->dest_name, flags, mode); 776 777 if (pair->dest_fd == -1) { 778 message_error("%s: %s", pair->dest_name, 779 strerror(errno)); 780 free(pair->dest_name); 781 return true; 782 } 783 } 784 785 #ifndef TUKLIB_DOSLIKE 786 // dest_st isn't used on DOS-like systems except as a dummy 787 // argument to io_unlink(), so don't fstat() on such systems. 788 if (fstat(pair->dest_fd, &pair->dest_st)) { 789 // If fstat() really fails, we have a safe fallback here. 790 # if defined(__VMS) 791 pair->dest_st.st_ino[0] = 0; 792 pair->dest_st.st_ino[1] = 0; 793 pair->dest_st.st_ino[2] = 0; 794 # else 795 pair->dest_st.st_dev = 0; 796 pair->dest_st.st_ino = 0; 797 # endif 798 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 799 // When writing to standard output, we need to be extra 800 // careful: 801 // - It may be connected to something else than 802 // a regular file. 803 // - We aren't necessarily writing to a new empty file 804 // or to the end of an existing file. 805 // - O_APPEND may be active. 806 // 807 // TODO: I'm keeping this disabled for DOS-like systems 808 // for now. FAT doesn't support sparse files, but NTFS 809 // does, so maybe this should be enabled on Windows after 810 // some testing. 811 if (pair->dest_fd == STDOUT_FILENO) { 812 if (!S_ISREG(pair->dest_st.st_mode)) 813 return false; 814 815 if (stdout_flags & O_APPEND) { 816 // Creating a sparse file is not possible 817 // when O_APPEND is active (it's used by 818 // shell's >> redirection). As I understand 819 // it, it is safe to temporarily disable 820 // O_APPEND in xz, because if someone 821 // happened to write to the same file at the 822 // same time, results would be bad anyway 823 // (users shouldn't assume that xz uses any 824 // specific block size when writing data). 825 // 826 // The write position may be something else 827 // than the end of the file, so we must fix 828 // it to start writing at the end of the file 829 // to imitate O_APPEND. 830 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 831 return false; 832 833 // Construct the new file status flags. 834 // If O_NONBLOCK was set earlier in this 835 // function, it must be kept here too. 836 int flags = stdout_flags & ~O_APPEND; 837 if (restore_stdout_flags) 838 flags |= O_NONBLOCK; 839 840 // If this fcntl() fails, we continue but won't 841 // try to create sparse output. The original 842 // flags will still be restored if needed (to 843 // unset O_NONBLOCK) when the file is finished. 844 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 845 return false; 846 847 // Disabling O_APPEND succeeded. Mark 848 // that the flags should be restored 849 // in io_close_dest(). (This may have already 850 // been set when enabling O_NONBLOCK.) 851 restore_stdout_flags = true; 852 853 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 854 != pair->dest_st.st_size) { 855 // Writing won't start exactly at the end 856 // of the file. We cannot use sparse output, 857 // because it would probably corrupt the file. 858 return false; 859 } 860 } 861 862 pair->dest_try_sparse = true; 863 } 864 #endif 865 866 return false; 867 } 868 869 870 extern bool 871 io_open_dest(file_pair *pair) 872 { 873 signals_block(); 874 const bool ret = io_open_dest_real(pair); 875 signals_unblock(); 876 return ret; 877 } 878 879 880 /// \brief Closes destination file of the file_pair structure 881 /// 882 /// \param pair File whose dest_fd should be closed 883 /// \param success If false, the file will be removed from the disk. 884 /// 885 /// \return Zero if closing succeeds. On error, -1 is returned and 886 /// error message printed. 887 static bool 888 io_close_dest(file_pair *pair, bool success) 889 { 890 #ifndef TUKLIB_DOSLIKE 891 // If io_open_dest() has disabled O_APPEND, restore it here. 892 if (restore_stdout_flags) { 893 assert(pair->dest_fd == STDOUT_FILENO); 894 895 restore_stdout_flags = false; 896 897 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 898 message_error(_("Error restoring the O_APPEND flag " 899 "to standard output: %s"), 900 strerror(errno)); 901 return true; 902 } 903 } 904 #endif 905 906 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 907 return false; 908 909 if (close(pair->dest_fd)) { 910 message_error(_("%s: Closing the file failed: %s"), 911 pair->dest_name, strerror(errno)); 912 913 // Closing destination file failed, so we cannot trust its 914 // contents. Get rid of junk: 915 io_unlink(pair->dest_name, &pair->dest_st); 916 free(pair->dest_name); 917 return true; 918 } 919 920 // If the operation using this file wasn't successful, we git rid 921 // of the junk file. 922 if (!success) 923 io_unlink(pair->dest_name, &pair->dest_st); 924 925 free(pair->dest_name); 926 927 return false; 928 } 929 930 931 extern void 932 io_close(file_pair *pair, bool success) 933 { 934 // Take care of sparseness at the end of the output file. 935 if (success && pair->dest_try_sparse 936 && pair->dest_pending_sparse > 0) { 937 // Seek forward one byte less than the size of the pending 938 // hole, then write one zero-byte. This way the file grows 939 // to its correct size. An alternative would be to use 940 // ftruncate() but that isn't portable enough (e.g. it 941 // doesn't work with FAT on Linux; FAT isn't that important 942 // since it doesn't support sparse files anyway, but we don't 943 // want to create corrupt files on it). 944 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 945 SEEK_CUR) == -1) { 946 message_error(_("%s: Seeking failed when trying " 947 "to create a sparse file: %s"), 948 pair->dest_name, strerror(errno)); 949 success = false; 950 } else { 951 const uint8_t zero[1] = { '\0' }; 952 if (io_write_buf(pair, zero, 1)) 953 success = false; 954 } 955 } 956 957 signals_block(); 958 959 // Copy the file attributes. We need to skip this if destination 960 // file isn't open or it is standard output. 961 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 962 io_copy_attrs(pair); 963 964 // Close the destination first. If it fails, we must not remove 965 // the source file! 966 if (io_close_dest(pair, success)) 967 success = false; 968 969 // Close the source file, and unlink it if the operation using this 970 // file pair was successful and we haven't requested to keep the 971 // source file. 972 io_close_src(pair, success); 973 974 signals_unblock(); 975 976 return; 977 } 978 979 980 extern void 981 io_fix_src_pos(file_pair *pair, size_t rewind_size) 982 { 983 assert(rewind_size <= IO_BUFFER_SIZE); 984 985 if (rewind_size > 0) { 986 // This doesn't need to work on unseekable file descriptors, 987 // so just ignore possible errors. 988 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 989 } 990 991 return; 992 } 993 994 995 extern size_t 996 io_read(file_pair *pair, io_buf *buf_union, size_t size) 997 { 998 // We use small buffers here. 999 assert(size < SSIZE_MAX); 1000 1001 uint8_t *buf = buf_union->u8; 1002 size_t left = size; 1003 1004 while (left > 0) { 1005 const ssize_t amount = read(pair->src_fd, buf, left); 1006 1007 if (amount == 0) { 1008 pair->src_eof = true; 1009 break; 1010 } 1011 1012 if (amount == -1) { 1013 if (errno == EINTR) { 1014 if (user_abort) 1015 return SIZE_MAX; 1016 1017 continue; 1018 } 1019 1020 #ifndef TUKLIB_DOSLIKE 1021 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1022 const io_wait_ret ret = io_wait(pair, 1023 mytime_get_flush_timeout(), 1024 true); 1025 switch (ret) { 1026 case IO_WAIT_MORE: 1027 continue; 1028 1029 case IO_WAIT_ERROR: 1030 return SIZE_MAX; 1031 1032 case IO_WAIT_TIMEOUT: 1033 return size - left; 1034 1035 default: 1036 message_bug(); 1037 } 1038 } 1039 #endif 1040 1041 message_error(_("%s: Read error: %s"), 1042 pair->src_name, strerror(errno)); 1043 1044 return SIZE_MAX; 1045 } 1046 1047 buf += (size_t)(amount); 1048 left -= (size_t)(amount); 1049 } 1050 1051 return size - left; 1052 } 1053 1054 1055 extern bool 1056 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 1057 { 1058 // Using lseek() and read() is more portable than pread() and 1059 // for us it is as good as real pread(). 1060 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 1061 message_error(_("%s: Error seeking the file: %s"), 1062 pair->src_name, strerror(errno)); 1063 return true; 1064 } 1065 1066 const size_t amount = io_read(pair, buf, size); 1067 if (amount == SIZE_MAX) 1068 return true; 1069 1070 if (amount != size) { 1071 message_error(_("%s: Unexpected end of file"), 1072 pair->src_name); 1073 return true; 1074 } 1075 1076 return false; 1077 } 1078 1079 1080 static bool 1081 is_sparse(const io_buf *buf) 1082 { 1083 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1084 1085 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1086 if (buf->u64[i] != 0) 1087 return false; 1088 1089 return true; 1090 } 1091 1092 1093 static bool 1094 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1095 { 1096 assert(size < SSIZE_MAX); 1097 1098 while (size > 0) { 1099 const ssize_t amount = write(pair->dest_fd, buf, size); 1100 if (amount == -1) { 1101 if (errno == EINTR) { 1102 if (user_abort) 1103 return true; 1104 1105 continue; 1106 } 1107 1108 #ifndef TUKLIB_DOSLIKE 1109 if (errno == EAGAIN || errno == EWOULDBLOCK) { 1110 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1111 continue; 1112 1113 return true; 1114 } 1115 #endif 1116 1117 // Handle broken pipe specially. gzip and bzip2 1118 // don't print anything on SIGPIPE. In addition, 1119 // gzip --quiet uses exit status 2 (warning) on 1120 // broken pipe instead of whatever raise(SIGPIPE) 1121 // would make it return. It is there to hide "Broken 1122 // pipe" message on some old shells (probably old 1123 // GNU bash). 1124 // 1125 // We don't do anything special with --quiet, which 1126 // is what bzip2 does too. If we get SIGPIPE, we 1127 // will handle it like other signals by setting 1128 // user_abort, and get EPIPE here. 1129 if (errno != EPIPE) 1130 message_error(_("%s: Write error: %s"), 1131 pair->dest_name, strerror(errno)); 1132 1133 return true; 1134 } 1135 1136 buf += (size_t)(amount); 1137 size -= (size_t)(amount); 1138 } 1139 1140 return false; 1141 } 1142 1143 1144 extern bool 1145 io_write(file_pair *pair, const io_buf *buf, size_t size) 1146 { 1147 assert(size <= IO_BUFFER_SIZE); 1148 1149 if (pair->dest_try_sparse) { 1150 // Check if the block is sparse (contains only zeros). If it 1151 // sparse, we just store the amount and return. We will take 1152 // care of actually skipping over the hole when we hit the 1153 // next data block or close the file. 1154 // 1155 // Since io_close() requires that dest_pending_sparse > 0 1156 // if the file ends with sparse block, we must also return 1157 // if size == 0 to avoid doing the lseek(). 1158 if (size == IO_BUFFER_SIZE) { 1159 if (is_sparse(buf)) { 1160 pair->dest_pending_sparse += size; 1161 return false; 1162 } 1163 } else if (size == 0) { 1164 return false; 1165 } 1166 1167 // This is not a sparse block. If we have a pending hole, 1168 // skip it now. 1169 if (pair->dest_pending_sparse > 0) { 1170 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1171 SEEK_CUR) == -1) { 1172 message_error(_("%s: Seeking failed when " 1173 "trying to create a sparse " 1174 "file: %s"), pair->dest_name, 1175 strerror(errno)); 1176 return true; 1177 } 1178 1179 pair->dest_pending_sparse = 0; 1180 } 1181 } 1182 1183 return io_write_buf(pair, buf->u8, size); 1184 } 1185