xref: /freebsd/contrib/xz/src/xz/file_io.c (revision bc96366c864c07ef352edb92017357917c75b36c)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       file_io.c
4 /// \brief      File opening, unlinking, and closing
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 #include <fcntl.h>
16 
17 #ifdef TUKLIB_DOSLIKE
18 #	include <io.h>
19 #else
20 static bool warn_fchown;
21 #endif
22 
23 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
24 #	include <sys/time.h>
25 #elif defined(HAVE_UTIME)
26 #	include <utime.h>
27 #endif
28 
29 #include "tuklib_open_stdxxx.h"
30 
31 #ifndef O_BINARY
32 #	define O_BINARY 0
33 #endif
34 
35 #ifndef O_NOCTTY
36 #	define O_NOCTTY 0
37 #endif
38 
39 
40 /// If true, try to create sparse files when decompressing.
41 static bool try_sparse = true;
42 
43 #ifndef TUKLIB_DOSLIKE
44 /// Original file status flags of standard output. This is used by
45 /// io_open_dest() and io_close_dest() to save and restore the flags.
46 static int stdout_flags;
47 static bool restore_stdout_flags = false;
48 #endif
49 
50 
51 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
52 
53 
54 extern void
55 io_init(void)
56 {
57 	// Make sure that stdin, stdout, and stderr are connected to
58 	// a valid file descriptor. Exit immediately with exit code ERROR
59 	// if we cannot make the file descriptors valid. Maybe we should
60 	// print an error message, but our stderr could be screwed anyway.
61 	tuklib_open_stdxxx(E_ERROR);
62 
63 #ifndef TUKLIB_DOSLIKE
64 	// If fchown() fails setting the owner, we warn about it only if
65 	// we are root.
66 	warn_fchown = geteuid() == 0;
67 #endif
68 
69 #ifdef __DJGPP__
70 	// Avoid doing useless things when statting files.
71 	// This isn't important but doesn't hurt.
72 	_djstat_flags = _STAT_INODE | _STAT_EXEC_EXT
73 			| _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
74 #endif
75 
76 	return;
77 }
78 
79 
80 extern void
81 io_no_sparse(void)
82 {
83 	try_sparse = false;
84 	return;
85 }
86 
87 
88 /// \brief      Unlink a file
89 ///
90 /// This tries to verify that the file being unlinked really is the file that
91 /// we want to unlink by verifying device and inode numbers. There's still
92 /// a small unavoidable race, but this is much better than nothing (the file
93 /// could have been moved/replaced even hours earlier).
94 static void
95 io_unlink(const char *name, const struct stat *known_st)
96 {
97 #if defined(TUKLIB_DOSLIKE)
98 	// On DOS-like systems, st_ino is meaningless, so don't bother
99 	// testing it. Just silence a compiler warning.
100 	(void)known_st;
101 #else
102 	struct stat new_st;
103 
104 	// If --force was used, use stat() instead of lstat(). This way
105 	// (de)compressing symlinks works correctly. However, it also means
106 	// that xz cannot detect if a regular file foo is renamed to bar
107 	// and then a symlink foo -> bar is created. Because of stat()
108 	// instead of lstat(), xz will think that foo hasn't been replaced
109 	// with another file. Thus, xz will remove foo even though it no
110 	// longer is the same file that xz used when it started compressing.
111 	// Probably it's not too bad though, so this doesn't need a more
112 	// complex fix.
113 	const int stat_ret = opt_force
114 			? stat(name, &new_st) : lstat(name, &new_st);
115 
116 	if (stat_ret
117 #	ifdef __VMS
118 			// st_ino is an array, and we don't want to
119 			// compare st_dev at all.
120 			|| memcmp(&new_st.st_ino, &known_st->st_ino,
121 				sizeof(new_st.st_ino)) != 0
122 #	else
123 			// Typical POSIX-like system
124 			|| new_st.st_dev != known_st->st_dev
125 			|| new_st.st_ino != known_st->st_ino
126 #	endif
127 			)
128 		// TRANSLATORS: When compression or decompression finishes,
129 		// and xz is going to remove the source file, xz first checks
130 		// if the source file still exists, and if it does, does its
131 		// device and inode numbers match what xz saw when it opened
132 		// the source file. If these checks fail, this message is
133 		// shown, %s being the filename, and the file is not deleted.
134 		// The check for device and inode numbers is there, because
135 		// it is possible that the user has put a new file in place
136 		// of the original file, and in that case it obviously
137 		// shouldn't be removed.
138 		message_error(_("%s: File seems to have been moved, "
139 				"not removing"), name);
140 	else
141 #endif
142 		// There's a race condition between lstat() and unlink()
143 		// but at least we have tried to avoid removing wrong file.
144 		if (unlink(name))
145 			message_error(_("%s: Cannot remove: %s"),
146 					name, strerror(errno));
147 
148 	return;
149 }
150 
151 
152 /// \brief      Copies owner/group and permissions
153 ///
154 /// \todo       ACL and EA support
155 ///
156 static void
157 io_copy_attrs(const file_pair *pair)
158 {
159 	// Skip chown and chmod on Windows.
160 #ifndef TUKLIB_DOSLIKE
161 	// This function is more tricky than you may think at first.
162 	// Blindly copying permissions may permit users to access the
163 	// destination file who didn't have permission to access the
164 	// source file.
165 
166 	// Try changing the owner of the file. If we aren't root or the owner
167 	// isn't already us, fchown() probably doesn't succeed. We warn
168 	// about failing fchown() only if we are root.
169 	if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown)
170 		message_warning(_("%s: Cannot set the file owner: %s"),
171 				pair->dest_name, strerror(errno));
172 
173 	mode_t mode;
174 
175 	if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
176 		message_warning(_("%s: Cannot set the file group: %s"),
177 				pair->dest_name, strerror(errno));
178 		// We can still safely copy some additional permissions:
179 		// `group' must be at least as strict as `other' and
180 		// also vice versa.
181 		//
182 		// NOTE: After this, the owner of the source file may
183 		// get additional permissions. This shouldn't be too bad,
184 		// because the owner would have had permission to chmod
185 		// the original file anyway.
186 		mode = ((pair->src_st.st_mode & 0070) >> 3)
187 				& (pair->src_st.st_mode & 0007);
188 		mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
189 	} else {
190 		// Drop the setuid, setgid, and sticky bits.
191 		mode = pair->src_st.st_mode & 0777;
192 	}
193 
194 	if (fchmod(pair->dest_fd, mode))
195 		message_warning(_("%s: Cannot set the file permissions: %s"),
196 				pair->dest_name, strerror(errno));
197 #endif
198 
199 	// Copy the timestamps. We have several possible ways to do this, of
200 	// which some are better in both security and precision.
201 	//
202 	// First, get the nanosecond part of the timestamps. As of writing,
203 	// it's not standardized by POSIX, and there are several names for
204 	// the same thing in struct stat.
205 	long atime_nsec;
206 	long mtime_nsec;
207 
208 #	if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
209 	// GNU and Solaris
210 	atime_nsec = pair->src_st.st_atim.tv_nsec;
211 	mtime_nsec = pair->src_st.st_mtim.tv_nsec;
212 
213 #	elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
214 	// BSD
215 	atime_nsec = pair->src_st.st_atimespec.tv_nsec;
216 	mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
217 
218 #	elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
219 	// GNU and BSD without extensions
220 	atime_nsec = pair->src_st.st_atimensec;
221 	mtime_nsec = pair->src_st.st_mtimensec;
222 
223 #	elif defined(HAVE_STRUCT_STAT_ST_UATIME)
224 	// Tru64
225 	atime_nsec = pair->src_st.st_uatime * 1000;
226 	mtime_nsec = pair->src_st.st_umtime * 1000;
227 
228 #	elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
229 	// UnixWare
230 	atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
231 	mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
232 
233 #	else
234 	// Safe fallback
235 	atime_nsec = 0;
236 	mtime_nsec = 0;
237 #	endif
238 
239 	// Construct a structure to hold the timestamps and call appropriate
240 	// function to set the timestamps.
241 #if defined(HAVE_FUTIMENS)
242 	// Use nanosecond precision.
243 	struct timespec tv[2];
244 	tv[0].tv_sec = pair->src_st.st_atime;
245 	tv[0].tv_nsec = atime_nsec;
246 	tv[1].tv_sec = pair->src_st.st_mtime;
247 	tv[1].tv_nsec = mtime_nsec;
248 
249 	(void)futimens(pair->dest_fd, tv);
250 
251 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
252 	// Use microsecond precision.
253 	struct timeval tv[2];
254 	tv[0].tv_sec = pair->src_st.st_atime;
255 	tv[0].tv_usec = atime_nsec / 1000;
256 	tv[1].tv_sec = pair->src_st.st_mtime;
257 	tv[1].tv_usec = mtime_nsec / 1000;
258 
259 #	if defined(HAVE_FUTIMES)
260 	(void)futimes(pair->dest_fd, tv);
261 #	elif defined(HAVE_FUTIMESAT)
262 	(void)futimesat(pair->dest_fd, NULL, tv);
263 #	else
264 	// Argh, no function to use a file descriptor to set the timestamp.
265 	(void)utimes(pair->dest_name, tv);
266 #	endif
267 
268 #elif defined(HAVE_UTIME)
269 	// Use one-second precision. utime() doesn't support using file
270 	// descriptor either. Some systems have broken utime() prototype
271 	// so don't make this const.
272 	struct utimbuf buf = {
273 		.actime = pair->src_st.st_atime,
274 		.modtime = pair->src_st.st_mtime,
275 	};
276 
277 	// Avoid warnings.
278 	(void)atime_nsec;
279 	(void)mtime_nsec;
280 
281 	(void)utime(pair->dest_name, &buf);
282 #endif
283 
284 	return;
285 }
286 
287 
288 /// Opens the source file. Returns false on success, true on error.
289 static bool
290 io_open_src_real(file_pair *pair)
291 {
292 	// There's nothing to open when reading from stdin.
293 	if (pair->src_name == stdin_filename) {
294 		pair->src_fd = STDIN_FILENO;
295 #ifdef TUKLIB_DOSLIKE
296 		setmode(STDIN_FILENO, O_BINARY);
297 #endif
298 		return false;
299 	}
300 
301 	// Symlinks are not followed unless writing to stdout or --force
302 	// was used.
303 	const bool follow_symlinks = opt_stdout || opt_force;
304 
305 	// We accept only regular files if we are writing the output
306 	// to disk too. bzip2 allows overriding this with --force but
307 	// gzip and xz don't.
308 	const bool reg_files_only = !opt_stdout;
309 
310 	// Flags for open()
311 	int flags = O_RDONLY | O_BINARY | O_NOCTTY;
312 
313 #ifndef TUKLIB_DOSLIKE
314 	// If we accept only regular files, we need to be careful to avoid
315 	// problems with special files like devices and FIFOs. O_NONBLOCK
316 	// prevents blocking when opening such files. When we want to accept
317 	// special files, we must not use O_NONBLOCK, or otherwise we won't
318 	// block waiting e.g. FIFOs to become readable.
319 	if (reg_files_only)
320 		flags |= O_NONBLOCK;
321 #endif
322 
323 #if defined(O_NOFOLLOW)
324 	if (!follow_symlinks)
325 		flags |= O_NOFOLLOW;
326 #elif !defined(TUKLIB_DOSLIKE)
327 	// Some POSIX-like systems lack O_NOFOLLOW (it's not required
328 	// by POSIX). Check for symlinks with a separate lstat() on
329 	// these systems.
330 	if (!follow_symlinks) {
331 		struct stat st;
332 		if (lstat(pair->src_name, &st)) {
333 			message_error("%s: %s", pair->src_name,
334 					strerror(errno));
335 			return true;
336 
337 		} else if (S_ISLNK(st.st_mode)) {
338 			message_warning(_("%s: Is a symbolic link, "
339 					"skipping"), pair->src_name);
340 			return true;
341 		}
342 	}
343 #else
344 	// Avoid warnings.
345 	(void)follow_symlinks;
346 #endif
347 
348 	// Try to open the file. If we are accepting non-regular files,
349 	// unblock the caught signals so that open() can be interrupted
350 	// if it blocks e.g. due to a FIFO file.
351 	if (!reg_files_only)
352 		signals_unblock();
353 
354 	// Maybe this wouldn't need a loop, since all the signal handlers for
355 	// which we don't use SA_RESTART set user_abort to true. But it
356 	// doesn't hurt to have it just in case.
357 	do {
358 		pair->src_fd = open(pair->src_name, flags);
359 	} while (pair->src_fd == -1 && errno == EINTR && !user_abort);
360 
361 	if (!reg_files_only)
362 		signals_block();
363 
364 	if (pair->src_fd == -1) {
365 		// If we were interrupted, don't display any error message.
366 		if (errno == EINTR) {
367 			// All the signals that don't have SA_RESTART
368 			// set user_abort.
369 			assert(user_abort);
370 			return true;
371 		}
372 
373 #ifdef O_NOFOLLOW
374 		// Give an understandable error message if the reason
375 		// for failing was that the file was a symbolic link.
376 		//
377 		// Note that at least Linux, OpenBSD, Solaris, and Darwin
378 		// use ELOOP to indicate that O_NOFOLLOW was the reason
379 		// that open() failed. Because there may be
380 		// directories in the pathname, ELOOP may occur also
381 		// because of a symlink loop in the directory part.
382 		// So ELOOP doesn't tell us what actually went wrong,
383 		// and this stupidity went into POSIX-1.2008 too.
384 		//
385 		// FreeBSD associates EMLINK with O_NOFOLLOW and
386 		// Tru64 uses ENOTSUP. We use these directly here
387 		// and skip the lstat() call and the associated race.
388 		// I want to hear if there are other kernels that
389 		// fail with something else than ELOOP with O_NOFOLLOW.
390 		bool was_symlink = false;
391 
392 #	if defined(__FreeBSD__) || defined(__DragonFly__)
393 		if (errno == EMLINK)
394 			was_symlink = true;
395 
396 #	elif defined(__digital__) && defined(__unix__)
397 		if (errno == ENOTSUP)
398 			was_symlink = true;
399 
400 #	elif defined(__NetBSD__)
401 		if (errno == EFTYPE)
402 			was_symlink = true;
403 
404 #	else
405 		if (errno == ELOOP && !follow_symlinks) {
406 			const int saved_errno = errno;
407 			struct stat st;
408 			if (lstat(pair->src_name, &st) == 0
409 					&& S_ISLNK(st.st_mode))
410 				was_symlink = true;
411 
412 			errno = saved_errno;
413 		}
414 #	endif
415 
416 		if (was_symlink)
417 			message_warning(_("%s: Is a symbolic link, "
418 					"skipping"), pair->src_name);
419 		else
420 #endif
421 			// Something else than O_NOFOLLOW failing
422 			// (assuming that the race conditions didn't
423 			// confuse us).
424 			message_error("%s: %s", pair->src_name,
425 					strerror(errno));
426 
427 		return true;
428 	}
429 
430 #ifndef TUKLIB_DOSLIKE
431 	// Drop O_NONBLOCK, which is used only when we are accepting only
432 	// regular files. After the open() call, we want things to block
433 	// instead of giving EAGAIN.
434 	if (reg_files_only) {
435 		flags = fcntl(pair->src_fd, F_GETFL);
436 		if (flags == -1)
437 			goto error_msg;
438 
439 		flags &= ~O_NONBLOCK;
440 
441 		if (fcntl(pair->src_fd, F_SETFL, flags) == -1)
442 			goto error_msg;
443 	}
444 #endif
445 
446 	// Stat the source file. We need the result also when we copy
447 	// the permissions, and when unlinking.
448 	if (fstat(pair->src_fd, &pair->src_st))
449 		goto error_msg;
450 
451 	if (S_ISDIR(pair->src_st.st_mode)) {
452 		message_warning(_("%s: Is a directory, skipping"),
453 				pair->src_name);
454 		goto error;
455 	}
456 
457 	if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
458 		message_warning(_("%s: Not a regular file, skipping"),
459 				pair->src_name);
460 		goto error;
461 	}
462 
463 #ifndef TUKLIB_DOSLIKE
464 	if (reg_files_only && !opt_force) {
465 		if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
466 			// gzip rejects setuid and setgid files even
467 			// when --force was used. bzip2 doesn't check
468 			// for them, but calls fchown() after fchmod(),
469 			// and many systems automatically drop setuid
470 			// and setgid bits there.
471 			//
472 			// We accept setuid and setgid files if
473 			// --force was used. We drop these bits
474 			// explicitly in io_copy_attr().
475 			message_warning(_("%s: File has setuid or "
476 					"setgid bit set, skipping"),
477 					pair->src_name);
478 			goto error;
479 		}
480 
481 		if (pair->src_st.st_mode & S_ISVTX) {
482 			message_warning(_("%s: File has sticky bit "
483 					"set, skipping"),
484 					pair->src_name);
485 			goto error;
486 		}
487 
488 		if (pair->src_st.st_nlink > 1) {
489 			message_warning(_("%s: Input file has more "
490 					"than one hard link, "
491 					"skipping"), pair->src_name);
492 			goto error;
493 		}
494 	}
495 #endif
496 
497 	return false;
498 
499 error_msg:
500 	message_error("%s: %s", pair->src_name, strerror(errno));
501 error:
502 	(void)close(pair->src_fd);
503 	return true;
504 }
505 
506 
507 extern file_pair *
508 io_open_src(const char *src_name)
509 {
510 	if (is_empty_filename(src_name))
511 		return NULL;
512 
513 	// Since we have only one file open at a time, we can use
514 	// a statically allocated structure.
515 	static file_pair pair;
516 
517 	pair = (file_pair){
518 		.src_name = src_name,
519 		.dest_name = NULL,
520 		.src_fd = -1,
521 		.dest_fd = -1,
522 		.src_eof = false,
523 		.dest_try_sparse = false,
524 		.dest_pending_sparse = 0,
525 	};
526 
527 	// Block the signals, for which we have a custom signal handler, so
528 	// that we don't need to worry about EINTR.
529 	signals_block();
530 	const bool error = io_open_src_real(&pair);
531 	signals_unblock();
532 
533 	return error ? NULL : &pair;
534 }
535 
536 
537 /// \brief      Closes source file of the file_pair structure
538 ///
539 /// \param      pair    File whose src_fd should be closed
540 /// \param      success If true, the file will be removed from the disk if
541 ///                     closing succeeds and --keep hasn't been used.
542 static void
543 io_close_src(file_pair *pair, bool success)
544 {
545 	if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
546 #ifdef TUKLIB_DOSLIKE
547 		(void)close(pair->src_fd);
548 #endif
549 
550 		// If we are going to unlink(), do it before closing the file.
551 		// This way there's no risk that someone replaces the file and
552 		// happens to get same inode number, which would make us
553 		// unlink() wrong file.
554 		//
555 		// NOTE: DOS-like systems are an exception to this, because
556 		// they don't allow unlinking files that are open. *sigh*
557 		if (success && !opt_keep_original)
558 			io_unlink(pair->src_name, &pair->src_st);
559 
560 #ifndef TUKLIB_DOSLIKE
561 		(void)close(pair->src_fd);
562 #endif
563 	}
564 
565 	return;
566 }
567 
568 
569 static bool
570 io_open_dest_real(file_pair *pair)
571 {
572 	if (opt_stdout || pair->src_fd == STDIN_FILENO) {
573 		// We don't modify or free() this.
574 		pair->dest_name = (char *)"(stdout)";
575 		pair->dest_fd = STDOUT_FILENO;
576 #ifdef TUKLIB_DOSLIKE
577 		setmode(STDOUT_FILENO, O_BINARY);
578 #endif
579 	} else {
580 		pair->dest_name = suffix_get_dest_name(pair->src_name);
581 		if (pair->dest_name == NULL)
582 			return true;
583 
584 		// If --force was used, unlink the target file first.
585 		if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
586 			message_error(_("%s: Cannot remove: %s"),
587 					pair->dest_name, strerror(errno));
588 			free(pair->dest_name);
589 			return true;
590 		}
591 
592 		// Open the file.
593 		const int flags = O_WRONLY | O_BINARY | O_NOCTTY
594 				| O_CREAT | O_EXCL;
595 		const mode_t mode = S_IRUSR | S_IWUSR;
596 		pair->dest_fd = open(pair->dest_name, flags, mode);
597 
598 		if (pair->dest_fd == -1) {
599 			message_error("%s: %s", pair->dest_name,
600 					strerror(errno));
601 			free(pair->dest_name);
602 			return true;
603 		}
604 	}
605 
606 	// If this really fails... well, we have a safe fallback.
607 	if (fstat(pair->dest_fd, &pair->dest_st)) {
608 #if defined(__VMS)
609 		pair->dest_st.st_ino[0] = 0;
610 		pair->dest_st.st_ino[1] = 0;
611 		pair->dest_st.st_ino[2] = 0;
612 #elif !defined(TUKLIB_DOSLIKE)
613 		pair->dest_st.st_dev = 0;
614 		pair->dest_st.st_ino = 0;
615 #endif
616 #ifndef TUKLIB_DOSLIKE
617 	} else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
618 		// When writing to standard output, we need to be extra
619 		// careful:
620 		//  - It may be connected to something else than
621 		//    a regular file.
622 		//  - We aren't necessarily writing to a new empty file
623 		//    or to the end of an existing file.
624 		//  - O_APPEND may be active.
625 		//
626 		// TODO: I'm keeping this disabled for DOS-like systems
627 		// for now. FAT doesn't support sparse files, but NTFS
628 		// does, so maybe this should be enabled on Windows after
629 		// some testing.
630 		if (pair->dest_fd == STDOUT_FILENO) {
631 			if (!S_ISREG(pair->dest_st.st_mode))
632 				return false;
633 
634 			stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
635 			if (stdout_flags == -1)
636 				return false;
637 
638 			if (stdout_flags & O_APPEND) {
639 				// Creating a sparse file is not possible
640 				// when O_APPEND is active (it's used by
641 				// shell's >> redirection). As I understand
642 				// it, it is safe to temporarily disable
643 				// O_APPEND in xz, because if someone
644 				// happened to write to the same file at the
645 				// same time, results would be bad anyway
646 				// (users shouldn't assume that xz uses any
647 				// specific block size when writing data).
648 				//
649 				// The write position may be something else
650 				// than the end of the file, so we must fix
651 				// it to start writing at the end of the file
652 				// to imitate O_APPEND.
653 				if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
654 					return false;
655 
656 				if (fcntl(STDOUT_FILENO, F_SETFL,
657 						stdout_flags & ~O_APPEND)
658 						== -1)
659 					return false;
660 
661 				// Disabling O_APPEND succeeded. Mark
662 				// that the flags should be restored
663 				// in io_close_dest().
664 				restore_stdout_flags = true;
665 
666 			} else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
667 					!= pair->dest_st.st_size) {
668 				// Writing won't start exactly at the end
669 				// of the file. We cannot use sparse output,
670 				// because it would probably corrupt the file.
671 				return false;
672 			}
673 		}
674 
675 		pair->dest_try_sparse = true;
676 #endif
677 	}
678 
679 	return false;
680 }
681 
682 
683 extern bool
684 io_open_dest(file_pair *pair)
685 {
686 	signals_block();
687 	const bool ret = io_open_dest_real(pair);
688 	signals_unblock();
689 	return ret;
690 }
691 
692 
693 /// \brief      Closes destination file of the file_pair structure
694 ///
695 /// \param      pair    File whose dest_fd should be closed
696 /// \param      success If false, the file will be removed from the disk.
697 ///
698 /// \return     Zero if closing succeeds. On error, -1 is returned and
699 ///             error message printed.
700 static bool
701 io_close_dest(file_pair *pair, bool success)
702 {
703 #ifndef TUKLIB_DOSLIKE
704 	// If io_open_dest() has disabled O_APPEND, restore it here.
705 	if (restore_stdout_flags) {
706 		assert(pair->dest_fd == STDOUT_FILENO);
707 
708 		restore_stdout_flags = false;
709 
710 		if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
711 			message_error(_("Error restoring the O_APPEND flag "
712 					"to standard output: %s"),
713 					strerror(errno));
714 			return true;
715 		}
716 	}
717 #endif
718 
719 	if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
720 		return false;
721 
722 	if (close(pair->dest_fd)) {
723 		message_error(_("%s: Closing the file failed: %s"),
724 				pair->dest_name, strerror(errno));
725 
726 		// Closing destination file failed, so we cannot trust its
727 		// contents. Get rid of junk:
728 		io_unlink(pair->dest_name, &pair->dest_st);
729 		free(pair->dest_name);
730 		return true;
731 	}
732 
733 	// If the operation using this file wasn't successful, we git rid
734 	// of the junk file.
735 	if (!success)
736 		io_unlink(pair->dest_name, &pair->dest_st);
737 
738 	free(pair->dest_name);
739 
740 	return false;
741 }
742 
743 
744 extern void
745 io_close(file_pair *pair, bool success)
746 {
747 	// Take care of sparseness at the end of the output file.
748 	if (success && pair->dest_try_sparse
749 			&& pair->dest_pending_sparse > 0) {
750 		// Seek forward one byte less than the size of the pending
751 		// hole, then write one zero-byte. This way the file grows
752 		// to its correct size. An alternative would be to use
753 		// ftruncate() but that isn't portable enough (e.g. it
754 		// doesn't work with FAT on Linux; FAT isn't that important
755 		// since it doesn't support sparse files anyway, but we don't
756 		// want to create corrupt files on it).
757 		if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
758 				SEEK_CUR) == -1) {
759 			message_error(_("%s: Seeking failed when trying "
760 					"to create a sparse file: %s"),
761 					pair->dest_name, strerror(errno));
762 			success = false;
763 		} else {
764 			const uint8_t zero[1] = { '\0' };
765 			if (io_write_buf(pair, zero, 1))
766 				success = false;
767 		}
768 	}
769 
770 	signals_block();
771 
772 	// Copy the file attributes. We need to skip this if destination
773 	// file isn't open or it is standard output.
774 	if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
775 		io_copy_attrs(pair);
776 
777 	// Close the destination first. If it fails, we must not remove
778 	// the source file!
779 	if (io_close_dest(pair, success))
780 		success = false;
781 
782 	// Close the source file, and unlink it if the operation using this
783 	// file pair was successful and we haven't requested to keep the
784 	// source file.
785 	io_close_src(pair, success);
786 
787 	signals_unblock();
788 
789 	return;
790 }
791 
792 
793 extern size_t
794 io_read(file_pair *pair, io_buf *buf_union, size_t size)
795 {
796 	// We use small buffers here.
797 	assert(size < SSIZE_MAX);
798 
799 	uint8_t *buf = buf_union->u8;
800 	size_t left = size;
801 
802 	while (left > 0) {
803 		const ssize_t amount = read(pair->src_fd, buf, left);
804 
805 		if (amount == 0) {
806 			pair->src_eof = true;
807 			break;
808 		}
809 
810 		if (amount == -1) {
811 			if (errno == EINTR) {
812 				if (user_abort)
813 					return SIZE_MAX;
814 
815 				continue;
816 			}
817 
818 			message_error(_("%s: Read error: %s"),
819 					pair->src_name, strerror(errno));
820 
821 			// FIXME Is this needed?
822 			pair->src_eof = true;
823 
824 			return SIZE_MAX;
825 		}
826 
827 		buf += (size_t)(amount);
828 		left -= (size_t)(amount);
829 	}
830 
831 	return size - left;
832 }
833 
834 
835 extern bool
836 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
837 {
838 	// Using lseek() and read() is more portable than pread() and
839 	// for us it is as good as real pread().
840 	if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
841 		message_error(_("%s: Error seeking the file: %s"),
842 				pair->src_name, strerror(errno));
843 		return true;
844 	}
845 
846 	const size_t amount = io_read(pair, buf, size);
847 	if (amount == SIZE_MAX)
848 		return true;
849 
850 	if (amount != size) {
851 		message_error(_("%s: Unexpected end of file"),
852 				pair->src_name);
853 		return true;
854 	}
855 
856 	return false;
857 }
858 
859 
860 static bool
861 is_sparse(const io_buf *buf)
862 {
863 	assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
864 
865 	for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
866 		if (buf->u64[i] != 0)
867 			return false;
868 
869 	return true;
870 }
871 
872 
873 static bool
874 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
875 {
876 	assert(size < SSIZE_MAX);
877 
878 	while (size > 0) {
879 		const ssize_t amount = write(pair->dest_fd, buf, size);
880 		if (amount == -1) {
881 			if (errno == EINTR) {
882 				if (user_abort)
883 					return true;
884 
885 				continue;
886 			}
887 
888 			// Handle broken pipe specially. gzip and bzip2
889 			// don't print anything on SIGPIPE. In addition,
890 			// gzip --quiet uses exit status 2 (warning) on
891 			// broken pipe instead of whatever raise(SIGPIPE)
892 			// would make it return. It is there to hide "Broken
893 			// pipe" message on some old shells (probably old
894 			// GNU bash).
895 			//
896 			// We don't do anything special with --quiet, which
897 			// is what bzip2 does too. If we get SIGPIPE, we
898 			// will handle it like other signals by setting
899 			// user_abort, and get EPIPE here.
900 			if (errno != EPIPE)
901 				message_error(_("%s: Write error: %s"),
902 					pair->dest_name, strerror(errno));
903 
904 			return true;
905 		}
906 
907 		buf += (size_t)(amount);
908 		size -= (size_t)(amount);
909 	}
910 
911 	return false;
912 }
913 
914 
915 extern bool
916 io_write(file_pair *pair, const io_buf *buf, size_t size)
917 {
918 	assert(size <= IO_BUFFER_SIZE);
919 
920 	if (pair->dest_try_sparse) {
921 		// Check if the block is sparse (contains only zeros). If it
922 		// sparse, we just store the amount and return. We will take
923 		// care of actually skipping over the hole when we hit the
924 		// next data block or close the file.
925 		//
926 		// Since io_close() requires that dest_pending_sparse > 0
927 		// if the file ends with sparse block, we must also return
928 		// if size == 0 to avoid doing the lseek().
929 		if (size == IO_BUFFER_SIZE) {
930 			if (is_sparse(buf)) {
931 				pair->dest_pending_sparse += size;
932 				return false;
933 			}
934 		} else if (size == 0) {
935 			return false;
936 		}
937 
938 		// This is not a sparse block. If we have a pending hole,
939 		// skip it now.
940 		if (pair->dest_pending_sparse > 0) {
941 			if (lseek(pair->dest_fd, pair->dest_pending_sparse,
942 					SEEK_CUR) == -1) {
943 				message_error(_("%s: Seeking failed when "
944 						"trying to create a sparse "
945 						"file: %s"), pair->dest_name,
946 						strerror(errno));
947 				return true;
948 			}
949 
950 			pair->dest_pending_sparse = 0;
951 		}
952 	}
953 
954 	return io_write_buf(pair, buf->u8, size);
955 }
956