1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file file_io.c 4 /// \brief File opening, unlinking, and closing 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 #include <fcntl.h> 16 17 #ifdef TUKLIB_DOSLIKE 18 # include <io.h> 19 #else 20 # include <poll.h> 21 static bool warn_fchown; 22 #endif 23 24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 25 # include <sys/time.h> 26 #elif defined(HAVE__FUTIME) 27 # include <sys/utime.h> 28 #elif defined(HAVE_UTIME) 29 # include <utime.h> 30 #endif 31 32 #ifdef HAVE_CAPSICUM 33 # ifdef HAVE_SYS_CAPSICUM_H 34 # include <sys/capsicum.h> 35 # else 36 # include <sys/capability.h> 37 # endif 38 #endif 39 40 #include "tuklib_open_stdxxx.h" 41 42 #ifndef O_BINARY 43 # define O_BINARY 0 44 #endif 45 46 #ifndef O_NOCTTY 47 # define O_NOCTTY 0 48 #endif 49 50 // Using this macro to silence a warning from gcc -Wlogical-op. 51 #if EAGAIN == EWOULDBLOCK 52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) 53 #else 54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ 55 ((e) == EAGAIN || (e) == EWOULDBLOCK) 56 #endif 57 58 59 typedef enum { 60 IO_WAIT_MORE, // Reading or writing is possible. 61 IO_WAIT_ERROR, // Error or user_abort 62 IO_WAIT_TIMEOUT, // poll() timed out 63 } io_wait_ret; 64 65 66 /// If true, try to create sparse files when decompressing. 67 static bool try_sparse = true; 68 69 #ifdef ENABLE_SANDBOX 70 /// True if the conditions for sandboxing (described in main()) have been met. 71 static bool sandbox_allowed = false; 72 #endif 73 74 #ifndef TUKLIB_DOSLIKE 75 /// File status flags of standard input. This is used by io_open_src() 76 /// and io_close_src(). 77 static int stdin_flags; 78 static bool restore_stdin_flags = false; 79 80 /// Original file status flags of standard output. This is used by 81 /// io_open_dest() and io_close_dest() to save and restore the flags. 82 static int stdout_flags; 83 static bool restore_stdout_flags = false; 84 85 /// Self-pipe used together with the user_abort variable to avoid 86 /// race conditions with signal handling. 87 static int user_abort_pipe[2]; 88 #endif 89 90 91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); 92 93 94 extern void 95 io_init(void) 96 { 97 // Make sure that stdin, stdout, and stderr are connected to 98 // a valid file descriptor. Exit immediately with exit code ERROR 99 // if we cannot make the file descriptors valid. Maybe we should 100 // print an error message, but our stderr could be screwed anyway. 101 tuklib_open_stdxxx(E_ERROR); 102 103 #ifndef TUKLIB_DOSLIKE 104 // If fchown() fails setting the owner, we warn about it only if 105 // we are root. 106 warn_fchown = geteuid() == 0; 107 108 // Create a pipe for the self-pipe trick. 109 if (pipe(user_abort_pipe)) 110 message_fatal(_("Error creating a pipe: %s"), 111 strerror(errno)); 112 113 // Make both ends of the pipe non-blocking. 114 for (unsigned i = 0; i < 2; ++i) { 115 int flags = fcntl(user_abort_pipe[i], F_GETFL); 116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, 117 flags | O_NONBLOCK) == -1) 118 message_fatal(_("Error creating a pipe: %s"), 119 strerror(errno)); 120 } 121 #endif 122 123 #ifdef __DJGPP__ 124 // Avoid doing useless things when statting files. 125 // This isn't important but doesn't hurt. 126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; 127 #endif 128 129 return; 130 } 131 132 133 #ifndef TUKLIB_DOSLIKE 134 extern void 135 io_write_to_user_abort_pipe(void) 136 { 137 // If the write() fails, it's probably due to the pipe being full. 138 // Failing in that case is fine. If the reason is something else, 139 // there's not much we can do since this is called in a signal 140 // handler. So ignore the errors and try to avoid warnings with 141 // GCC and glibc when _FORTIFY_SOURCE=2 is used. 142 uint8_t b = '\0'; 143 const int ret = write(user_abort_pipe[1], &b, 1); 144 (void)ret; 145 return; 146 } 147 #endif 148 149 150 extern void 151 io_no_sparse(void) 152 { 153 try_sparse = false; 154 return; 155 } 156 157 158 #ifdef ENABLE_SANDBOX 159 extern void 160 io_allow_sandbox(void) 161 { 162 sandbox_allowed = true; 163 return; 164 } 165 166 167 /// Enables operating-system-specific sandbox if it is possible. 168 /// src_fd is the file descriptor of the input file. 169 static void 170 io_sandbox_enter(int src_fd) 171 { 172 if (!sandbox_allowed) { 173 // This message is more often annoying than useful so 174 // it's commented out. It can be useful when developing 175 // the sandboxing code. 176 //message(V_DEBUG, _("Sandbox is disabled due " 177 // "to incompatible command line arguments")); 178 return; 179 } 180 181 const char dummy_str[] = "x"; 182 183 // Try to ensure that both libc and xz locale files have been 184 // loaded when NLS is enabled. 185 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); 186 187 // Try to ensure that iconv data files needed for handling multibyte 188 // characters have been loaded. This is needed at least with glibc. 189 tuklib_mbstr_width(dummy_str, NULL); 190 191 #ifdef HAVE_CAPSICUM 192 // Capsicum needs FreeBSD 10.0 or later. 193 cap_rights_t rights; 194 195 if (cap_rights_limit(src_fd, cap_rights_init(&rights, 196 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) 197 goto error; 198 199 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, 200 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, 201 CAP_WRITE, CAP_SEEK))) 202 goto error; 203 204 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, 205 CAP_EVENT))) 206 goto error; 207 208 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, 209 CAP_WRITE))) 210 goto error; 211 212 if (cap_enter()) 213 goto error; 214 215 #else 216 # error ENABLE_SANDBOX is defined but no sandboxing method was found. 217 #endif 218 219 // This message is annoying in xz -lvv. 220 //message(V_DEBUG, _("Sandbox was successfully enabled")); 221 return; 222 223 error: 224 message(V_DEBUG, _("Failed to enable the sandbox")); 225 } 226 #endif // ENABLE_SANDBOX 227 228 229 #ifndef TUKLIB_DOSLIKE 230 /// \brief Waits for input or output to become available or for a signal 231 /// 232 /// This uses the self-pipe trick to avoid a race condition that can occur 233 /// if a signal is caught after user_abort has been checked but before e.g. 234 /// read() has been called. In that situation read() could block unless 235 /// non-blocking I/O is used. With non-blocking I/O something like select() 236 /// or poll() is needed to avoid a busy-wait loop, and the same race condition 237 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in 238 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is 239 /// old and very portable. 240 static io_wait_ret 241 io_wait(file_pair *pair, int timeout, bool is_reading) 242 { 243 struct pollfd pfd[2]; 244 245 if (is_reading) { 246 pfd[0].fd = pair->src_fd; 247 pfd[0].events = POLLIN; 248 } else { 249 pfd[0].fd = pair->dest_fd; 250 pfd[0].events = POLLOUT; 251 } 252 253 pfd[1].fd = user_abort_pipe[0]; 254 pfd[1].events = POLLIN; 255 256 while (true) { 257 const int ret = poll(pfd, 2, timeout); 258 259 if (user_abort) 260 return IO_WAIT_ERROR; 261 262 if (ret == -1) { 263 if (errno == EINTR || errno == EAGAIN) 264 continue; 265 266 message_error(_("%s: poll() failed: %s"), 267 is_reading ? pair->src_name 268 : pair->dest_name, 269 strerror(errno)); 270 return IO_WAIT_ERROR; 271 } 272 273 if (ret == 0) 274 return IO_WAIT_TIMEOUT; 275 276 if (pfd[0].revents != 0) 277 return IO_WAIT_MORE; 278 } 279 } 280 #endif 281 282 283 /// \brief Unlink a file 284 /// 285 /// This tries to verify that the file being unlinked really is the file that 286 /// we want to unlink by verifying device and inode numbers. There's still 287 /// a small unavoidable race, but this is much better than nothing (the file 288 /// could have been moved/replaced even hours earlier). 289 static void 290 io_unlink(const char *name, const struct stat *known_st) 291 { 292 #if defined(TUKLIB_DOSLIKE) 293 // On DOS-like systems, st_ino is meaningless, so don't bother 294 // testing it. Just silence a compiler warning. 295 (void)known_st; 296 #else 297 struct stat new_st; 298 299 // If --force was used, use stat() instead of lstat(). This way 300 // (de)compressing symlinks works correctly. However, it also means 301 // that xz cannot detect if a regular file foo is renamed to bar 302 // and then a symlink foo -> bar is created. Because of stat() 303 // instead of lstat(), xz will think that foo hasn't been replaced 304 // with another file. Thus, xz will remove foo even though it no 305 // longer is the same file that xz used when it started compressing. 306 // Probably it's not too bad though, so this doesn't need a more 307 // complex fix. 308 const int stat_ret = opt_force 309 ? stat(name, &new_st) : lstat(name, &new_st); 310 311 if (stat_ret 312 # ifdef __VMS 313 // st_ino is an array, and we don't want to 314 // compare st_dev at all. 315 || memcmp(&new_st.st_ino, &known_st->st_ino, 316 sizeof(new_st.st_ino)) != 0 317 # else 318 // Typical POSIX-like system 319 || new_st.st_dev != known_st->st_dev 320 || new_st.st_ino != known_st->st_ino 321 # endif 322 ) 323 // TRANSLATORS: When compression or decompression finishes, 324 // and xz is going to remove the source file, xz first checks 325 // if the source file still exists, and if it does, does its 326 // device and inode numbers match what xz saw when it opened 327 // the source file. If these checks fail, this message is 328 // shown, %s being the filename, and the file is not deleted. 329 // The check for device and inode numbers is there, because 330 // it is possible that the user has put a new file in place 331 // of the original file, and in that case it obviously 332 // shouldn't be removed. 333 message_warning(_("%s: File seems to have been moved, " 334 "not removing"), name); 335 else 336 #endif 337 // There's a race condition between lstat() and unlink() 338 // but at least we have tried to avoid removing wrong file. 339 if (unlink(name)) 340 message_warning(_("%s: Cannot remove: %s"), 341 name, strerror(errno)); 342 343 return; 344 } 345 346 347 /// \brief Copies owner/group and permissions 348 /// 349 /// \todo ACL and EA support 350 /// 351 static void 352 io_copy_attrs(const file_pair *pair) 353 { 354 // Skip chown and chmod on Windows. 355 #ifndef TUKLIB_DOSLIKE 356 // This function is more tricky than you may think at first. 357 // Blindly copying permissions may permit users to access the 358 // destination file who didn't have permission to access the 359 // source file. 360 361 // Try changing the owner of the file. If we aren't root or the owner 362 // isn't already us, fchown() probably doesn't succeed. We warn 363 // about failing fchown() only if we are root. 364 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) 365 && warn_fchown) 366 message_warning(_("%s: Cannot set the file owner: %s"), 367 pair->dest_name, strerror(errno)); 368 369 mode_t mode; 370 371 // With BSD semantics the new dest file may have a group that 372 // does not belong to the user. If the src file has the same gid 373 // nothing has to be done. Nevertheless OpenBSD fchown(2) fails 374 // in this case which seems to be POSIX compliant. As there is 375 // nothing to do, skip the system call. 376 if (pair->dest_st.st_gid != pair->src_st.st_gid 377 && fchown(pair->dest_fd, (uid_t)(-1), 378 pair->src_st.st_gid)) { 379 message_warning(_("%s: Cannot set the file group: %s"), 380 pair->dest_name, strerror(errno)); 381 // We can still safely copy some additional permissions: 382 // `group' must be at least as strict as `other' and 383 // also vice versa. 384 // 385 // NOTE: After this, the owner of the source file may 386 // get additional permissions. This shouldn't be too bad, 387 // because the owner would have had permission to chmod 388 // the original file anyway. 389 mode = ((pair->src_st.st_mode & 0070) >> 3) 390 & (pair->src_st.st_mode & 0007); 391 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; 392 } else { 393 // Drop the setuid, setgid, and sticky bits. 394 mode = pair->src_st.st_mode & 0777; 395 } 396 397 if (fchmod(pair->dest_fd, mode)) 398 message_warning(_("%s: Cannot set the file permissions: %s"), 399 pair->dest_name, strerror(errno)); 400 #endif 401 402 // Copy the timestamps. We have several possible ways to do this, of 403 // which some are better in both security and precision. 404 // 405 // First, get the nanosecond part of the timestamps. As of writing, 406 // it's not standardized by POSIX, and there are several names for 407 // the same thing in struct stat. 408 long atime_nsec; 409 long mtime_nsec; 410 411 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) 412 // GNU and Solaris 413 atime_nsec = pair->src_st.st_atim.tv_nsec; 414 mtime_nsec = pair->src_st.st_mtim.tv_nsec; 415 416 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) 417 // BSD 418 atime_nsec = pair->src_st.st_atimespec.tv_nsec; 419 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; 420 421 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) 422 // GNU and BSD without extensions 423 atime_nsec = pair->src_st.st_atimensec; 424 mtime_nsec = pair->src_st.st_mtimensec; 425 426 # elif defined(HAVE_STRUCT_STAT_ST_UATIME) 427 // Tru64 428 atime_nsec = pair->src_st.st_uatime * 1000; 429 mtime_nsec = pair->src_st.st_umtime * 1000; 430 431 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) 432 // UnixWare 433 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; 434 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; 435 436 # else 437 // Safe fallback 438 atime_nsec = 0; 439 mtime_nsec = 0; 440 # endif 441 442 // Construct a structure to hold the timestamps and call appropriate 443 // function to set the timestamps. 444 #if defined(HAVE_FUTIMENS) 445 // Use nanosecond precision. 446 struct timespec tv[2]; 447 tv[0].tv_sec = pair->src_st.st_atime; 448 tv[0].tv_nsec = atime_nsec; 449 tv[1].tv_sec = pair->src_st.st_mtime; 450 tv[1].tv_nsec = mtime_nsec; 451 452 (void)futimens(pair->dest_fd, tv); 453 454 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) 455 // Use microsecond precision. 456 struct timeval tv[2]; 457 tv[0].tv_sec = pair->src_st.st_atime; 458 tv[0].tv_usec = atime_nsec / 1000; 459 tv[1].tv_sec = pair->src_st.st_mtime; 460 tv[1].tv_usec = mtime_nsec / 1000; 461 462 # if defined(HAVE_FUTIMES) 463 (void)futimes(pair->dest_fd, tv); 464 # elif defined(HAVE_FUTIMESAT) 465 (void)futimesat(pair->dest_fd, NULL, tv); 466 # else 467 // Argh, no function to use a file descriptor to set the timestamp. 468 (void)utimes(pair->dest_name, tv); 469 # endif 470 471 #elif defined(HAVE__FUTIME) 472 // Use one-second precision with Windows-specific _futime(). 473 // We could use utime() too except that for some reason the 474 // timestamp will get reset at close(). With _futime() it works. 475 // This struct cannot be const as _futime() takes a non-const pointer. 476 struct _utimbuf buf = { 477 .actime = pair->src_st.st_atime, 478 .modtime = pair->src_st.st_mtime, 479 }; 480 481 // Avoid warnings. 482 (void)atime_nsec; 483 (void)mtime_nsec; 484 485 (void)_futime(pair->dest_fd, &buf); 486 487 #elif defined(HAVE_UTIME) 488 // Use one-second precision. utime() doesn't support using file 489 // descriptor either. Some systems have broken utime() prototype 490 // so don't make this const. 491 struct utimbuf buf = { 492 .actime = pair->src_st.st_atime, 493 .modtime = pair->src_st.st_mtime, 494 }; 495 496 // Avoid warnings. 497 (void)atime_nsec; 498 (void)mtime_nsec; 499 500 (void)utime(pair->dest_name, &buf); 501 #endif 502 503 return; 504 } 505 506 507 /// Opens the source file. Returns false on success, true on error. 508 static bool 509 io_open_src_real(file_pair *pair) 510 { 511 // There's nothing to open when reading from stdin. 512 if (pair->src_name == stdin_filename) { 513 pair->src_fd = STDIN_FILENO; 514 #ifdef TUKLIB_DOSLIKE 515 setmode(STDIN_FILENO, O_BINARY); 516 #else 517 // Try to set stdin to non-blocking mode. It won't work 518 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such 519 // case we proceed as if stdin were non-blocking anyway 520 // (in case of /dev/null it will be in practice). The 521 // same applies to stdout in io_open_dest_real(). 522 stdin_flags = fcntl(STDIN_FILENO, F_GETFL); 523 if (stdin_flags == -1) { 524 message_error(_("Error getting the file status flags " 525 "from standard input: %s"), 526 strerror(errno)); 527 return true; 528 } 529 530 if ((stdin_flags & O_NONBLOCK) == 0 531 && fcntl(STDIN_FILENO, F_SETFL, 532 stdin_flags | O_NONBLOCK) != -1) 533 restore_stdin_flags = true; 534 #endif 535 #ifdef HAVE_POSIX_FADVISE 536 // It will fail if stdin is a pipe and that's fine. 537 (void)posix_fadvise(STDIN_FILENO, 0, 0, 538 opt_mode == MODE_LIST 539 ? POSIX_FADV_RANDOM 540 : POSIX_FADV_SEQUENTIAL); 541 #endif 542 return false; 543 } 544 545 // Symlinks are not followed unless writing to stdout or --force 546 // or --keep was used. 547 const bool follow_symlinks 548 = opt_stdout || opt_force || opt_keep_original; 549 550 // We accept only regular files if we are writing the output 551 // to disk too. bzip2 allows overriding this with --force but 552 // gzip and xz don't. 553 const bool reg_files_only = !opt_stdout; 554 555 // Flags for open() 556 int flags = O_RDONLY | O_BINARY | O_NOCTTY; 557 558 #ifndef TUKLIB_DOSLIKE 559 // Use non-blocking I/O: 560 // - It prevents blocking when opening FIFOs and some other 561 // special files, which is good if we want to accept only 562 // regular files. 563 // - It can help avoiding some race conditions with signal handling. 564 flags |= O_NONBLOCK; 565 #endif 566 567 #if defined(O_NOFOLLOW) 568 if (!follow_symlinks) 569 flags |= O_NOFOLLOW; 570 #elif !defined(TUKLIB_DOSLIKE) 571 // Some POSIX-like systems lack O_NOFOLLOW (it's not required 572 // by POSIX). Check for symlinks with a separate lstat() on 573 // these systems. 574 if (!follow_symlinks) { 575 struct stat st; 576 if (lstat(pair->src_name, &st)) { 577 message_error("%s: %s", pair->src_name, 578 strerror(errno)); 579 return true; 580 581 } else if (S_ISLNK(st.st_mode)) { 582 message_warning(_("%s: Is a symbolic link, " 583 "skipping"), pair->src_name); 584 return true; 585 } 586 } 587 #else 588 // Avoid warnings. 589 (void)follow_symlinks; 590 #endif 591 592 // Try to open the file. Signals have been blocked so EINTR shouldn't 593 // be possible. 594 pair->src_fd = open(pair->src_name, flags); 595 596 if (pair->src_fd == -1) { 597 // Signals (that have a signal handler) have been blocked. 598 assert(errno != EINTR); 599 600 #ifdef O_NOFOLLOW 601 // Give an understandable error message if the reason 602 // for failing was that the file was a symbolic link. 603 // 604 // Note that at least Linux, OpenBSD, Solaris, and Darwin 605 // use ELOOP to indicate that O_NOFOLLOW was the reason 606 // that open() failed. Because there may be 607 // directories in the pathname, ELOOP may occur also 608 // because of a symlink loop in the directory part. 609 // So ELOOP doesn't tell us what actually went wrong, 610 // and this stupidity went into POSIX-1.2008 too. 611 // 612 // FreeBSD associates EMLINK with O_NOFOLLOW and 613 // Tru64 uses ENOTSUP. We use these directly here 614 // and skip the lstat() call and the associated race. 615 // I want to hear if there are other kernels that 616 // fail with something else than ELOOP with O_NOFOLLOW. 617 bool was_symlink = false; 618 619 # if defined(__FreeBSD__) || defined(__DragonFly__) 620 if (errno == EMLINK) 621 was_symlink = true; 622 623 # elif defined(__digital__) && defined(__unix__) 624 if (errno == ENOTSUP) 625 was_symlink = true; 626 627 # elif defined(__NetBSD__) 628 if (errno == EFTYPE) 629 was_symlink = true; 630 631 # else 632 if (errno == ELOOP && !follow_symlinks) { 633 const int saved_errno = errno; 634 struct stat st; 635 if (lstat(pair->src_name, &st) == 0 636 && S_ISLNK(st.st_mode)) 637 was_symlink = true; 638 639 errno = saved_errno; 640 } 641 # endif 642 643 if (was_symlink) 644 message_warning(_("%s: Is a symbolic link, " 645 "skipping"), pair->src_name); 646 else 647 #endif 648 // Something else than O_NOFOLLOW failing 649 // (assuming that the race conditions didn't 650 // confuse us). 651 message_error("%s: %s", pair->src_name, 652 strerror(errno)); 653 654 return true; 655 } 656 657 // Stat the source file. We need the result also when we copy 658 // the permissions, and when unlinking. 659 // 660 // NOTE: Use stat() instead of fstat() with DJGPP, because 661 // then we have a better chance to get st_ino value that can 662 // be used in io_open_dest_real() to prevent overwriting the 663 // source file. 664 #ifdef __DJGPP__ 665 if (stat(pair->src_name, &pair->src_st)) 666 goto error_msg; 667 #else 668 if (fstat(pair->src_fd, &pair->src_st)) 669 goto error_msg; 670 #endif 671 672 if (S_ISDIR(pair->src_st.st_mode)) { 673 message_warning(_("%s: Is a directory, skipping"), 674 pair->src_name); 675 goto error; 676 } 677 678 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { 679 message_warning(_("%s: Not a regular file, skipping"), 680 pair->src_name); 681 goto error; 682 } 683 684 #ifndef TUKLIB_DOSLIKE 685 if (reg_files_only && !opt_force && !opt_keep_original) { 686 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { 687 // gzip rejects setuid and setgid files even 688 // when --force was used. bzip2 doesn't check 689 // for them, but calls fchown() after fchmod(), 690 // and many systems automatically drop setuid 691 // and setgid bits there. 692 // 693 // We accept setuid and setgid files if 694 // --force or --keep was used. We drop these bits 695 // explicitly in io_copy_attr(). 696 message_warning(_("%s: File has setuid or " 697 "setgid bit set, skipping"), 698 pair->src_name); 699 goto error; 700 } 701 702 if (pair->src_st.st_mode & S_ISVTX) { 703 message_warning(_("%s: File has sticky bit " 704 "set, skipping"), 705 pair->src_name); 706 goto error; 707 } 708 709 if (pair->src_st.st_nlink > 1) { 710 message_warning(_("%s: Input file has more " 711 "than one hard link, " 712 "skipping"), pair->src_name); 713 goto error; 714 } 715 } 716 717 // If it is something else than a regular file, wait until 718 // there is input available. This way reading from FIFOs 719 // will work when open() is used with O_NONBLOCK. 720 if (!S_ISREG(pair->src_st.st_mode)) { 721 signals_unblock(); 722 const io_wait_ret ret = io_wait(pair, -1, true); 723 signals_block(); 724 725 if (ret != IO_WAIT_MORE) 726 goto error; 727 } 728 #endif 729 730 #ifdef HAVE_POSIX_FADVISE 731 // It will fail with some special files like FIFOs but that is fine. 732 (void)posix_fadvise(pair->src_fd, 0, 0, 733 opt_mode == MODE_LIST 734 ? POSIX_FADV_RANDOM 735 : POSIX_FADV_SEQUENTIAL); 736 #endif 737 738 return false; 739 740 error_msg: 741 message_error("%s: %s", pair->src_name, strerror(errno)); 742 error: 743 (void)close(pair->src_fd); 744 return true; 745 } 746 747 748 extern file_pair * 749 io_open_src(const char *src_name) 750 { 751 if (is_empty_filename(src_name)) 752 return NULL; 753 754 // Since we have only one file open at a time, we can use 755 // a statically allocated structure. 756 static file_pair pair; 757 758 // This implicitly also initializes src_st.st_size to zero 759 // which is expected to be <= 0 by default. fstat() isn't 760 // called when reading from standard input but src_st.st_size 761 // is still read. 762 pair = (file_pair){ 763 .src_name = src_name, 764 .dest_name = NULL, 765 .src_fd = -1, 766 .dest_fd = -1, 767 .src_eof = false, 768 .src_has_seen_input = false, 769 .flush_needed = false, 770 .dest_try_sparse = false, 771 .dest_pending_sparse = 0, 772 }; 773 774 // Block the signals, for which we have a custom signal handler, so 775 // that we don't need to worry about EINTR. 776 signals_block(); 777 const bool error = io_open_src_real(&pair); 778 signals_unblock(); 779 780 #ifdef ENABLE_SANDBOX 781 if (!error) 782 io_sandbox_enter(pair.src_fd); 783 #endif 784 785 return error ? NULL : &pair; 786 } 787 788 789 /// \brief Closes source file of the file_pair structure 790 /// 791 /// \param pair File whose src_fd should be closed 792 /// \param success If true, the file will be removed from the disk if 793 /// closing succeeds and --keep hasn't been used. 794 static void 795 io_close_src(file_pair *pair, bool success) 796 { 797 #ifndef TUKLIB_DOSLIKE 798 if (restore_stdin_flags) { 799 assert(pair->src_fd == STDIN_FILENO); 800 801 restore_stdin_flags = false; 802 803 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) 804 message_error(_("Error restoring the status flags " 805 "to standard input: %s"), 806 strerror(errno)); 807 } 808 #endif 809 810 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { 811 // Close the file before possibly unlinking it. On DOS-like 812 // systems this is always required since unlinking will fail 813 // if the file is open. On POSIX systems it usually works 814 // to unlink open files, but in some cases it doesn't and 815 // one gets EBUSY in errno. 816 // 817 // xz 5.2.2 and older unlinked the file before closing it 818 // (except on DOS-like systems). The old code didn't handle 819 // EBUSY and could fail e.g. on some CIFS shares. The 820 // advantage of unlinking before closing is negligible 821 // (avoids a race between close() and stat()/lstat() and 822 // unlink()), so let's keep this simple. 823 (void)close(pair->src_fd); 824 825 if (success && !opt_keep_original) 826 io_unlink(pair->src_name, &pair->src_st); 827 } 828 829 return; 830 } 831 832 833 static bool 834 io_open_dest_real(file_pair *pair) 835 { 836 if (opt_stdout || pair->src_fd == STDIN_FILENO) { 837 // We don't modify or free() this. 838 pair->dest_name = (char *)"(stdout)"; 839 pair->dest_fd = STDOUT_FILENO; 840 #ifdef TUKLIB_DOSLIKE 841 setmode(STDOUT_FILENO, O_BINARY); 842 #else 843 // Try to set O_NONBLOCK if it isn't already set. 844 // If it fails, we assume that stdout is non-blocking 845 // in practice. See the comments in io_open_src_real() 846 // for similar situation with stdin. 847 // 848 // NOTE: O_APPEND may be unset later in this function 849 // and it relies on stdout_flags being set here. 850 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); 851 if (stdout_flags == -1) { 852 message_error(_("Error getting the file status flags " 853 "from standard output: %s"), 854 strerror(errno)); 855 return true; 856 } 857 858 if ((stdout_flags & O_NONBLOCK) == 0 859 && fcntl(STDOUT_FILENO, F_SETFL, 860 stdout_flags | O_NONBLOCK) != -1) 861 restore_stdout_flags = true; 862 #endif 863 } else { 864 pair->dest_name = suffix_get_dest_name(pair->src_name); 865 if (pair->dest_name == NULL) 866 return true; 867 868 #ifdef __DJGPP__ 869 struct stat st; 870 if (stat(pair->dest_name, &st) == 0) { 871 // Check that it isn't a special file like "prn". 872 if (st.st_dev == -1) { 873 message_error("%s: Refusing to write to " 874 "a DOS special file", 875 pair->dest_name); 876 free(pair->dest_name); 877 return true; 878 } 879 880 // Check that we aren't overwriting the source file. 881 if (st.st_dev == pair->src_st.st_dev 882 && st.st_ino == pair->src_st.st_ino) { 883 message_error("%s: Output file is the same " 884 "as the input file", 885 pair->dest_name); 886 free(pair->dest_name); 887 return true; 888 } 889 } 890 #endif 891 892 // If --force was used, unlink the target file first. 893 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { 894 message_error(_("%s: Cannot remove: %s"), 895 pair->dest_name, strerror(errno)); 896 free(pair->dest_name); 897 return true; 898 } 899 900 // Open the file. 901 int flags = O_WRONLY | O_BINARY | O_NOCTTY 902 | O_CREAT | O_EXCL; 903 #ifndef TUKLIB_DOSLIKE 904 flags |= O_NONBLOCK; 905 #endif 906 const mode_t mode = S_IRUSR | S_IWUSR; 907 pair->dest_fd = open(pair->dest_name, flags, mode); 908 909 if (pair->dest_fd == -1) { 910 message_error("%s: %s", pair->dest_name, 911 strerror(errno)); 912 free(pair->dest_name); 913 return true; 914 } 915 } 916 917 #ifndef TUKLIB_DOSLIKE 918 // dest_st isn't used on DOS-like systems except as a dummy 919 // argument to io_unlink(), so don't fstat() on such systems. 920 if (fstat(pair->dest_fd, &pair->dest_st)) { 921 // If fstat() really fails, we have a safe fallback here. 922 # if defined(__VMS) 923 pair->dest_st.st_ino[0] = 0; 924 pair->dest_st.st_ino[1] = 0; 925 pair->dest_st.st_ino[2] = 0; 926 # else 927 pair->dest_st.st_dev = 0; 928 pair->dest_st.st_ino = 0; 929 # endif 930 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) { 931 // When writing to standard output, we need to be extra 932 // careful: 933 // - It may be connected to something else than 934 // a regular file. 935 // - We aren't necessarily writing to a new empty file 936 // or to the end of an existing file. 937 // - O_APPEND may be active. 938 // 939 // TODO: I'm keeping this disabled for DOS-like systems 940 // for now. FAT doesn't support sparse files, but NTFS 941 // does, so maybe this should be enabled on Windows after 942 // some testing. 943 if (pair->dest_fd == STDOUT_FILENO) { 944 if (!S_ISREG(pair->dest_st.st_mode)) 945 return false; 946 947 if (stdout_flags & O_APPEND) { 948 // Creating a sparse file is not possible 949 // when O_APPEND is active (it's used by 950 // shell's >> redirection). As I understand 951 // it, it is safe to temporarily disable 952 // O_APPEND in xz, because if someone 953 // happened to write to the same file at the 954 // same time, results would be bad anyway 955 // (users shouldn't assume that xz uses any 956 // specific block size when writing data). 957 // 958 // The write position may be something else 959 // than the end of the file, so we must fix 960 // it to start writing at the end of the file 961 // to imitate O_APPEND. 962 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) 963 return false; 964 965 // Construct the new file status flags. 966 // If O_NONBLOCK was set earlier in this 967 // function, it must be kept here too. 968 int flags = stdout_flags & ~O_APPEND; 969 if (restore_stdout_flags) 970 flags |= O_NONBLOCK; 971 972 // If this fcntl() fails, we continue but won't 973 // try to create sparse output. The original 974 // flags will still be restored if needed (to 975 // unset O_NONBLOCK) when the file is finished. 976 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) 977 return false; 978 979 // Disabling O_APPEND succeeded. Mark 980 // that the flags should be restored 981 // in io_close_dest(). (This may have already 982 // been set when enabling O_NONBLOCK.) 983 restore_stdout_flags = true; 984 985 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) 986 != pair->dest_st.st_size) { 987 // Writing won't start exactly at the end 988 // of the file. We cannot use sparse output, 989 // because it would probably corrupt the file. 990 return false; 991 } 992 } 993 994 pair->dest_try_sparse = true; 995 } 996 #endif 997 998 return false; 999 } 1000 1001 1002 extern bool 1003 io_open_dest(file_pair *pair) 1004 { 1005 signals_block(); 1006 const bool ret = io_open_dest_real(pair); 1007 signals_unblock(); 1008 return ret; 1009 } 1010 1011 1012 /// \brief Closes destination file of the file_pair structure 1013 /// 1014 /// \param pair File whose dest_fd should be closed 1015 /// \param success If false, the file will be removed from the disk. 1016 /// 1017 /// \return Zero if closing succeeds. On error, -1 is returned and 1018 /// error message printed. 1019 static bool 1020 io_close_dest(file_pair *pair, bool success) 1021 { 1022 #ifndef TUKLIB_DOSLIKE 1023 // If io_open_dest() has disabled O_APPEND, restore it here. 1024 if (restore_stdout_flags) { 1025 assert(pair->dest_fd == STDOUT_FILENO); 1026 1027 restore_stdout_flags = false; 1028 1029 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { 1030 message_error(_("Error restoring the O_APPEND flag " 1031 "to standard output: %s"), 1032 strerror(errno)); 1033 return true; 1034 } 1035 } 1036 #endif 1037 1038 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) 1039 return false; 1040 1041 if (close(pair->dest_fd)) { 1042 message_error(_("%s: Closing the file failed: %s"), 1043 pair->dest_name, strerror(errno)); 1044 1045 // Closing destination file failed, so we cannot trust its 1046 // contents. Get rid of junk: 1047 io_unlink(pair->dest_name, &pair->dest_st); 1048 free(pair->dest_name); 1049 return true; 1050 } 1051 1052 // If the operation using this file wasn't successful, we git rid 1053 // of the junk file. 1054 if (!success) 1055 io_unlink(pair->dest_name, &pair->dest_st); 1056 1057 free(pair->dest_name); 1058 1059 return false; 1060 } 1061 1062 1063 extern void 1064 io_close(file_pair *pair, bool success) 1065 { 1066 // Take care of sparseness at the end of the output file. 1067 if (success && pair->dest_try_sparse 1068 && pair->dest_pending_sparse > 0) { 1069 // Seek forward one byte less than the size of the pending 1070 // hole, then write one zero-byte. This way the file grows 1071 // to its correct size. An alternative would be to use 1072 // ftruncate() but that isn't portable enough (e.g. it 1073 // doesn't work with FAT on Linux; FAT isn't that important 1074 // since it doesn't support sparse files anyway, but we don't 1075 // want to create corrupt files on it). 1076 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, 1077 SEEK_CUR) == -1) { 1078 message_error(_("%s: Seeking failed when trying " 1079 "to create a sparse file: %s"), 1080 pair->dest_name, strerror(errno)); 1081 success = false; 1082 } else { 1083 const uint8_t zero[1] = { '\0' }; 1084 if (io_write_buf(pair, zero, 1)) 1085 success = false; 1086 } 1087 } 1088 1089 signals_block(); 1090 1091 // Copy the file attributes. We need to skip this if destination 1092 // file isn't open or it is standard output. 1093 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) 1094 io_copy_attrs(pair); 1095 1096 // Close the destination first. If it fails, we must not remove 1097 // the source file! 1098 if (io_close_dest(pair, success)) 1099 success = false; 1100 1101 // Close the source file, and unlink it if the operation using this 1102 // file pair was successful and we haven't requested to keep the 1103 // source file. 1104 io_close_src(pair, success); 1105 1106 signals_unblock(); 1107 1108 return; 1109 } 1110 1111 1112 extern void 1113 io_fix_src_pos(file_pair *pair, size_t rewind_size) 1114 { 1115 assert(rewind_size <= IO_BUFFER_SIZE); 1116 1117 if (rewind_size > 0) { 1118 // This doesn't need to work on unseekable file descriptors, 1119 // so just ignore possible errors. 1120 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); 1121 } 1122 1123 return; 1124 } 1125 1126 1127 extern size_t 1128 io_read(file_pair *pair, io_buf *buf, size_t size) 1129 { 1130 // We use small buffers here. 1131 assert(size < SSIZE_MAX); 1132 1133 size_t pos = 0; 1134 1135 while (pos < size) { 1136 const ssize_t amount = read( 1137 pair->src_fd, buf->u8 + pos, size - pos); 1138 1139 if (amount == 0) { 1140 pair->src_eof = true; 1141 break; 1142 } 1143 1144 if (amount == -1) { 1145 if (errno == EINTR) { 1146 if (user_abort) 1147 return SIZE_MAX; 1148 1149 continue; 1150 } 1151 1152 #ifndef TUKLIB_DOSLIKE 1153 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1154 // Disable the flush-timeout if no input has 1155 // been seen since the previous flush and thus 1156 // there would be nothing to flush after the 1157 // timeout expires (avoids busy waiting). 1158 const int timeout = pair->src_has_seen_input 1159 ? mytime_get_flush_timeout() 1160 : -1; 1161 1162 switch (io_wait(pair, timeout, true)) { 1163 case IO_WAIT_MORE: 1164 continue; 1165 1166 case IO_WAIT_ERROR: 1167 return SIZE_MAX; 1168 1169 case IO_WAIT_TIMEOUT: 1170 pair->flush_needed = true; 1171 return pos; 1172 1173 default: 1174 message_bug(); 1175 } 1176 } 1177 #endif 1178 1179 message_error(_("%s: Read error: %s"), 1180 pair->src_name, strerror(errno)); 1181 1182 return SIZE_MAX; 1183 } 1184 1185 pos += (size_t)(amount); 1186 1187 if (!pair->src_has_seen_input) { 1188 pair->src_has_seen_input = true; 1189 mytime_set_flush_time(); 1190 } 1191 } 1192 1193 return pos; 1194 } 1195 1196 1197 extern bool 1198 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos) 1199 { 1200 // Using lseek() and read() is more portable than pread() and 1201 // for us it is as good as real pread(). 1202 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) { 1203 message_error(_("%s: Error seeking the file: %s"), 1204 pair->src_name, strerror(errno)); 1205 return true; 1206 } 1207 1208 const size_t amount = io_read(pair, buf, size); 1209 if (amount == SIZE_MAX) 1210 return true; 1211 1212 if (amount != size) { 1213 message_error(_("%s: Unexpected end of file"), 1214 pair->src_name); 1215 return true; 1216 } 1217 1218 return false; 1219 } 1220 1221 1222 static bool 1223 is_sparse(const io_buf *buf) 1224 { 1225 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); 1226 1227 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) 1228 if (buf->u64[i] != 0) 1229 return false; 1230 1231 return true; 1232 } 1233 1234 1235 static bool 1236 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) 1237 { 1238 assert(size < SSIZE_MAX); 1239 1240 while (size > 0) { 1241 const ssize_t amount = write(pair->dest_fd, buf, size); 1242 if (amount == -1) { 1243 if (errno == EINTR) { 1244 if (user_abort) 1245 return true; 1246 1247 continue; 1248 } 1249 1250 #ifndef TUKLIB_DOSLIKE 1251 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { 1252 if (io_wait(pair, -1, false) == IO_WAIT_MORE) 1253 continue; 1254 1255 return true; 1256 } 1257 #endif 1258 1259 // Handle broken pipe specially. gzip and bzip2 1260 // don't print anything on SIGPIPE. In addition, 1261 // gzip --quiet uses exit status 2 (warning) on 1262 // broken pipe instead of whatever raise(SIGPIPE) 1263 // would make it return. It is there to hide "Broken 1264 // pipe" message on some old shells (probably old 1265 // GNU bash). 1266 // 1267 // We don't do anything special with --quiet, which 1268 // is what bzip2 does too. If we get SIGPIPE, we 1269 // will handle it like other signals by setting 1270 // user_abort, and get EPIPE here. 1271 if (errno != EPIPE) 1272 message_error(_("%s: Write error: %s"), 1273 pair->dest_name, strerror(errno)); 1274 1275 return true; 1276 } 1277 1278 buf += (size_t)(amount); 1279 size -= (size_t)(amount); 1280 } 1281 1282 return false; 1283 } 1284 1285 1286 extern bool 1287 io_write(file_pair *pair, const io_buf *buf, size_t size) 1288 { 1289 assert(size <= IO_BUFFER_SIZE); 1290 1291 if (pair->dest_try_sparse) { 1292 // Check if the block is sparse (contains only zeros). If it 1293 // sparse, we just store the amount and return. We will take 1294 // care of actually skipping over the hole when we hit the 1295 // next data block or close the file. 1296 // 1297 // Since io_close() requires that dest_pending_sparse > 0 1298 // if the file ends with sparse block, we must also return 1299 // if size == 0 to avoid doing the lseek(). 1300 if (size == IO_BUFFER_SIZE) { 1301 // Even if the block was sparse, treat it as non-sparse 1302 // if the pending sparse amount is large compared to 1303 // the size of off_t. In practice this only matters 1304 // on 32-bit systems where off_t isn't always 64 bits. 1305 const off_t pending_max 1306 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); 1307 if (is_sparse(buf) && pair->dest_pending_sparse 1308 < pending_max) { 1309 pair->dest_pending_sparse += (off_t)(size); 1310 return false; 1311 } 1312 } else if (size == 0) { 1313 return false; 1314 } 1315 1316 // This is not a sparse block. If we have a pending hole, 1317 // skip it now. 1318 if (pair->dest_pending_sparse > 0) { 1319 if (lseek(pair->dest_fd, pair->dest_pending_sparse, 1320 SEEK_CUR) == -1) { 1321 message_error(_("%s: Seeking failed when " 1322 "trying to create a sparse " 1323 "file: %s"), pair->dest_name, 1324 strerror(errno)); 1325 return true; 1326 } 1327 1328 pair->dest_pending_sparse = 0; 1329 } 1330 } 1331 1332 return io_write_buf(pair, buf->u8, size); 1333 } 1334