xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_format_tar.c (revision bd66c1b43e33540205dbc1187c2f2a15c58b57ba)
1 /*-
2  * Copyright (c) 2003-2023 Tim Kientzle
3  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4  * Copyright (c) 2016 Martin Matuska
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 
30 #ifdef HAVE_ERRNO_H
31 #include <errno.h>
32 #endif
33 #include <stddef.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 
41 #include "archive.h"
42 #include "archive_acl_private.h" /* For ACL parsing routines. */
43 #include "archive_entry.h"
44 #include "archive_entry_locale.h"
45 #include "archive_private.h"
46 #include "archive_read_private.h"
47 
48 #define tar_min(a,b) ((a) < (b) ? (a) : (b))
49 
50 /*
51  * Layout of POSIX 'ustar' tar header.
52  */
53 struct archive_entry_header_ustar {
54 	char	name[100];
55 	char	mode[8];
56 	char	uid[8];
57 	char	gid[8];
58 	char	size[12];
59 	char	mtime[12];
60 	char	checksum[8];
61 	char	typeflag[1];
62 	char	linkname[100];	/* "old format" header ends here */
63 	char	magic[6];	/* For POSIX: "ustar\0" */
64 	char	version[2];	/* For POSIX: "00" */
65 	char	uname[32];
66 	char	gname[32];
67 	char	rdevmajor[8];
68 	char	rdevminor[8];
69 	char	prefix[155];
70 };
71 
72 /*
73  * Structure of GNU tar header
74  */
75 struct gnu_sparse {
76 	char	offset[12];
77 	char	numbytes[12];
78 };
79 
80 struct archive_entry_header_gnutar {
81 	char	name[100];
82 	char	mode[8];
83 	char	uid[8];
84 	char	gid[8];
85 	char	size[12];
86 	char	mtime[12];
87 	char	checksum[8];
88 	char	typeflag[1];
89 	char	linkname[100];
90 	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
91 	char	uname[32];
92 	char	gname[32];
93 	char	rdevmajor[8];
94 	char	rdevminor[8];
95 	char	atime[12];
96 	char	ctime[12];
97 	char	offset[12];
98 	char	longnames[4];
99 	char	unused[1];
100 	struct gnu_sparse sparse[4];
101 	char	isextended[1];
102 	char	realsize[12];
103 	/*
104 	 * Old GNU format doesn't use POSIX 'prefix' field; they use
105 	 * the 'L' (longname) entry instead.
106 	 */
107 };
108 
109 /*
110  * Data specific to this format.
111  */
112 struct sparse_block {
113 	struct sparse_block	*next;
114 	int64_t	offset;
115 	int64_t	remaining;
116 	int hole;
117 };
118 
119 struct tar {
120 	struct archive_string	 entry_pathname;
121 	/* For "GNU.sparse.name" and other similar path extensions. */
122 	struct archive_string	 entry_pathname_override;
123 	struct archive_string	 entry_uname;
124 	struct archive_string	 entry_gname;
125 	struct archive_string	 entry_linkpath;
126 	struct archive_string	 line;
127 	int			 pax_hdrcharset_utf8;
128 	int64_t			 entry_bytes_remaining;
129 	int64_t			 entry_offset;
130 	int64_t			 entry_padding;
131 	int64_t 		 entry_bytes_unconsumed;
132 	int64_t			 realsize;
133 	struct sparse_block	*sparse_list;
134 	struct sparse_block	*sparse_last;
135 	int64_t			 sparse_offset;
136 	int64_t			 sparse_numbytes;
137 	int			 sparse_gnu_major;
138 	int			 sparse_gnu_minor;
139 	char			 sparse_gnu_attributes_seen;
140 	char			 filetype;
141 
142 	struct archive_string	 localname;
143 	struct archive_string_conv *opt_sconv;
144 	struct archive_string_conv *sconv;
145 	struct archive_string_conv *sconv_acl;
146 	struct archive_string_conv *sconv_default;
147 	int			 init_default_conversion;
148 	int			 compat_2x;
149 	int			 process_mac_extensions;
150 	int			 read_concatenated_archives;
151 	int			 realsize_override;
152 };
153 
154 static int	archive_block_is_null(const char *p);
155 static char	*base64_decode(const char *, size_t, size_t *);
156 static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
157 		    int64_t offset, int64_t remaining);
158 
159 static void	gnu_clear_sparse_list(struct tar *);
160 static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
161 		    const struct archive_entry_header_gnutar *header, size_t *);
162 static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
163 		    const struct gnu_sparse *sparse, int length);
164 static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
165 		    const char *, size_t);
166 static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
167 		    size_t *);
168 static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
169 		    struct archive_entry *, const void *, size_t *);
170 static int	header_common(struct archive_read *,  struct tar *,
171 		    struct archive_entry *, const void *);
172 static int	header_old_tar(struct archive_read *, struct tar *,
173 		    struct archive_entry *, const void *);
174 static int	header_pax_extension(struct archive_read *, struct tar *,
175 		    struct archive_entry *, const void *, size_t *);
176 static int	header_pax_global(struct archive_read *, struct tar *,
177 		    struct archive_entry *, const void *h, size_t *);
178 static int	header_gnu_longlink(struct archive_read *, struct tar *,
179 		    struct archive_entry *, const void *h, size_t *);
180 static int	header_gnu_longname(struct archive_read *, struct tar *,
181 		    struct archive_entry *, const void *h, size_t *);
182 static int	is_mac_metadata_entry(struct archive_entry *entry);
183 static int	read_mac_metadata_blob(struct archive_read *,
184 		    struct archive_entry *, size_t *);
185 static int	header_volume(struct archive_read *, struct tar *,
186 		    struct archive_entry *, const void *h, size_t *);
187 static int	header_ustar(struct archive_read *, struct tar *,
188 		    struct archive_entry *, const void *h);
189 static int	header_gnutar(struct archive_read *, struct tar *,
190 		    struct archive_entry *, const void *h, size_t *);
191 static int	archive_read_format_tar_bid(struct archive_read *, int);
192 static int	archive_read_format_tar_options(struct archive_read *,
193 		    const char *, const char *);
194 static int	archive_read_format_tar_cleanup(struct archive_read *);
195 static int	archive_read_format_tar_read_data(struct archive_read *a,
196 		    const void **buff, size_t *size, int64_t *offset);
197 static int	archive_read_format_tar_skip(struct archive_read *a);
198 static int	archive_read_format_tar_read_header(struct archive_read *,
199 		    struct archive_entry *);
200 static int	checksum(struct archive_read *, const void *);
201 static int 	pax_attribute(struct archive_read *, struct tar *,
202 		    struct archive_entry *, const char *key, size_t key_length,
203 		    size_t value_length, size_t *unconsumed);
204 static int	pax_attribute_LIBARCHIVE_xattr(struct archive_entry *,
205 		    const char *, size_t, const char *, size_t);
206 static int	pax_attribute_SCHILY_acl(struct archive_read *, struct tar *,
207 		    struct archive_entry *, size_t, int);
208 static int	pax_attribute_SUN_holesdata(struct archive_read *, struct tar *,
209 		    struct archive_entry *, const char *, size_t);
210 static void	pax_time(const char *, size_t, int64_t *sec, long *nanos);
211 static ssize_t	readline(struct archive_read *, struct tar *, const char **,
212 		    ssize_t limit, size_t *);
213 static int	read_body_to_string(struct archive_read *, struct tar *,
214 		    struct archive_string *, const void *h, size_t *);
215 static int	read_bytes_to_string(struct archive_read *,
216 		    struct archive_string *, size_t, size_t *);
217 static int64_t	tar_atol(const char *, size_t);
218 static int64_t	tar_atol10(const char *, size_t);
219 static int64_t	tar_atol256(const char *, size_t);
220 static int64_t	tar_atol8(const char *, size_t);
221 static int	tar_read_header(struct archive_read *, struct tar *,
222 		    struct archive_entry *, size_t *);
223 static int	tohex(int c);
224 static char	*url_decode(const char *, size_t);
225 static void	tar_flush_unconsumed(struct archive_read *, size_t *);
226 
227 /* Sanity limits:  These numbers should be low enough to
228  * prevent a maliciously-crafted archive from forcing us to
229  * allocate extreme amounts of memory.  But of course, they
230  * need to be high enough for any correct value.  These
231  * will likely need some adjustment as we get more experience. */
232 static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */
233 static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */
234 static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */
235 static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */
236 static const size_t fflags_limit = 512; /* Longest fflags */
237 static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */
238 static const int64_t entry_limit = 0xfffffffffffffffLL; /* 2^60 bytes = 1 ExbiByte */
239 
240 int
archive_read_support_format_gnutar(struct archive * a)241 archive_read_support_format_gnutar(struct archive *a)
242 {
243 	archive_check_magic(a, ARCHIVE_READ_MAGIC,
244 	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
245 	return (archive_read_support_format_tar(a));
246 }
247 
248 
249 int
archive_read_support_format_tar(struct archive * _a)250 archive_read_support_format_tar(struct archive *_a)
251 {
252 	struct archive_read *a = (struct archive_read *)_a;
253 	struct tar *tar;
254 	int r;
255 
256 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
257 	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
258 
259 	tar = calloc(1, sizeof(*tar));
260 	if (tar == NULL) {
261 		archive_set_error(&a->archive, ENOMEM,
262 		    "Can't allocate tar data");
263 		return (ARCHIVE_FATAL);
264 	}
265 #ifdef HAVE_COPYFILE_H
266 	/* Set this by default on Mac OS. */
267 	tar->process_mac_extensions = 1;
268 #endif
269 
270 	r = __archive_read_register_format(a, tar, "tar",
271 	    archive_read_format_tar_bid,
272 	    archive_read_format_tar_options,
273 	    archive_read_format_tar_read_header,
274 	    archive_read_format_tar_read_data,
275 	    archive_read_format_tar_skip,
276 	    NULL,
277 	    archive_read_format_tar_cleanup,
278 	    NULL,
279 	    NULL);
280 
281 	if (r != ARCHIVE_OK)
282 		free(tar);
283 	return (ARCHIVE_OK);
284 }
285 
286 static int
archive_read_format_tar_cleanup(struct archive_read * a)287 archive_read_format_tar_cleanup(struct archive_read *a)
288 {
289 	struct tar *tar;
290 
291 	tar = (struct tar *)(a->format->data);
292 	gnu_clear_sparse_list(tar);
293 	archive_string_free(&tar->entry_pathname);
294 	archive_string_free(&tar->entry_pathname_override);
295 	archive_string_free(&tar->entry_uname);
296 	archive_string_free(&tar->entry_gname);
297 	archive_string_free(&tar->entry_linkpath);
298 	archive_string_free(&tar->line);
299 	archive_string_free(&tar->localname);
300 	free(tar);
301 	(a->format->data) = NULL;
302 	return (ARCHIVE_OK);
303 }
304 
305 /*
306  * Validate number field
307  *
308  * This has to be pretty lenient in order to accommodate the enormous
309  * variety of tar writers in the world:
310  *  = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
311  *    zeros and allows fields to be terminated with space or null characters
312  *  = Many writers use different termination (in particular, libarchive
313  *    omits terminator bytes to squeeze one or two more digits)
314  *  = Many writers pad with space and omit leading zeros
315  *  = GNU tar and star write base-256 values if numbers are too
316  *    big to be represented in octal
317  *
318  *  Examples of specific tar headers that we should support:
319  *  = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
320  *    null bytes, pads size with spaces and other numeric fields with zeroes
321  *  = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
322  *    may have uid and gid fields filled with spaces without any octal digits
323  *    at all and pads all numeric fields with spaces
324  *
325  * This should tolerate all variants in use.  It will reject a field
326  * where the writer just left garbage after a trailing NUL.
327  */
328 static int
validate_number_field(const char * p_field,size_t i_size)329 validate_number_field(const char* p_field, size_t i_size)
330 {
331 	unsigned char marker = (unsigned char)p_field[0];
332 	if (marker == 128 || marker == 255 || marker == 0) {
333 		/* Base-256 marker, there's nothing we can check. */
334 		return 1;
335 	} else {
336 		/* Must be octal */
337 		size_t i = 0;
338 		/* Skip any leading spaces */
339 		while (i < i_size && p_field[i] == ' ') {
340 			++i;
341 		}
342 		/* Skip octal digits. */
343 		while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
344 			++i;
345 		}
346 		/* Any remaining characters must be space or NUL padding. */
347 		while (i < i_size) {
348 			if (p_field[i] != ' ' && p_field[i] != 0) {
349 				return 0;
350 			}
351 			++i;
352 		}
353 		return 1;
354 	}
355 }
356 
357 static int
archive_read_format_tar_bid(struct archive_read * a,int best_bid)358 archive_read_format_tar_bid(struct archive_read *a, int best_bid)
359 {
360 	int bid;
361 	const char *h;
362 	const struct archive_entry_header_ustar *header;
363 
364 	(void)best_bid; /* UNUSED */
365 
366 	bid = 0;
367 
368 	/* Now let's look at the actual header and see if it matches. */
369 	h = __archive_read_ahead(a, 512, NULL);
370 	if (h == NULL)
371 		return (-1);
372 
373 	/* If it's an end-of-archive mark, we can handle it. */
374 	if (h[0] == 0 && archive_block_is_null(h)) {
375 		/*
376 		 * Usually, I bid the number of bits verified, but
377 		 * in this case, 4096 seems excessive so I picked 10 as
378 		 * an arbitrary but reasonable-seeming value.
379 		 */
380 		return (10);
381 	}
382 
383 	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
384 	if (!checksum(a, h))
385 		return (0);
386 	bid += 48;  /* Checksum is usually 6 octal digits. */
387 
388 	header = (const struct archive_entry_header_ustar *)h;
389 
390 	/* Recognize POSIX formats. */
391 	if ((memcmp(header->magic, "ustar\0", 6) == 0)
392 	    && (memcmp(header->version, "00", 2) == 0))
393 		bid += 56;
394 
395 	/* Recognize GNU tar format. */
396 	if ((memcmp(header->magic, "ustar ", 6) == 0)
397 	    && (memcmp(header->version, " \0", 2) == 0))
398 		bid += 56;
399 
400 	/* Type flag must be null, digit or A-Z, a-z. */
401 	if (header->typeflag[0] != 0 &&
402 	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
403 	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
404 	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
405 		return (0);
406 	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
407 
408 	/*
409 	 * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
410 	 */
411 	if (validate_number_field(header->mode, sizeof(header->mode)) == 0
412 	    || validate_number_field(header->uid, sizeof(header->uid)) == 0
413 	    || validate_number_field(header->gid, sizeof(header->gid)) == 0
414 	    || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
415 	    || validate_number_field(header->size, sizeof(header->size)) == 0
416 	    || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
417 	    || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0) {
418 		bid = 0;
419 	}
420 
421 	return (bid);
422 }
423 
424 static int
archive_read_format_tar_options(struct archive_read * a,const char * key,const char * val)425 archive_read_format_tar_options(struct archive_read *a,
426     const char *key, const char *val)
427 {
428 	struct tar *tar;
429 	int ret = ARCHIVE_FAILED;
430 
431 	tar = (struct tar *)(a->format->data);
432 	if (strcmp(key, "compat-2x")  == 0) {
433 		/* Handle UTF-8 filenames as libarchive 2.x */
434 		tar->compat_2x = (val != NULL && val[0] != 0);
435 		tar->init_default_conversion = tar->compat_2x;
436 		return (ARCHIVE_OK);
437 	} else if (strcmp(key, "hdrcharset")  == 0) {
438 		if (val == NULL || val[0] == 0)
439 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
440 			    "tar: hdrcharset option needs a character-set name");
441 		else {
442 			tar->opt_sconv =
443 			    archive_string_conversion_from_charset(
444 				&a->archive, val, 0);
445 			if (tar->opt_sconv != NULL)
446 				ret = ARCHIVE_OK;
447 			else
448 				ret = ARCHIVE_FATAL;
449 		}
450 		return (ret);
451 	} else if (strcmp(key, "mac-ext") == 0) {
452 		tar->process_mac_extensions = (val != NULL && val[0] != 0);
453 		return (ARCHIVE_OK);
454 	} else if (strcmp(key, "read_concatenated_archives") == 0) {
455 		tar->read_concatenated_archives = (val != NULL && val[0] != 0);
456 		return (ARCHIVE_OK);
457 	}
458 
459 	/* Note: The "warn" return is just to inform the options
460 	 * supervisor that we didn't handle it.  It will generate
461 	 * a suitable error if no one used this option. */
462 	return (ARCHIVE_WARN);
463 }
464 
465 /* utility function- this exists to centralize the logic of tracking
466  * how much unconsumed data we have floating around, and to consume
467  * anything outstanding since we're going to do read_aheads
468  */
469 static void
tar_flush_unconsumed(struct archive_read * a,size_t * unconsumed)470 tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
471 {
472 	if (*unconsumed) {
473 /*
474 		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
475 		 * this block of code is to poison claimed unconsumed space, ensuring
476 		 * things break if it is in use still.
477 		 * currently it WILL break things, so enable it only for debugging this issue
478 		if (data) {
479 			memset(data, 0xff, *unconsumed);
480 		}
481 */
482 		__archive_read_consume(a, *unconsumed);
483 		*unconsumed = 0;
484 	}
485 }
486 
487 /*
488  * The function invoked by archive_read_next_header().  This
489  * just sets up a few things and then calls the internal
490  * tar_read_header() function below.
491  */
492 static int
archive_read_format_tar_read_header(struct archive_read * a,struct archive_entry * entry)493 archive_read_format_tar_read_header(struct archive_read *a,
494     struct archive_entry *entry)
495 {
496 	/*
497 	 * When converting tar archives to cpio archives, it is
498 	 * essential that each distinct file have a distinct inode
499 	 * number.  To simplify this, we keep a static count here to
500 	 * assign fake dev/inode numbers to each tar entry.  Note that
501 	 * pax format archives may overwrite this with something more
502 	 * useful.
503 	 *
504 	 * Ideally, we would track every file read from the archive so
505 	 * that we could assign the same dev/ino pair to hardlinks,
506 	 * but the memory required to store a complete lookup table is
507 	 * probably not worthwhile just to support the relatively
508 	 * obscure tar->cpio conversion case.
509 	 */
510 	/* TODO: Move this into `struct tar` to avoid conflicts
511 	 * when reading multiple archives */
512 	static int default_inode;
513 	static int default_dev;
514 	struct tar *tar;
515 	const char *p;
516 	const wchar_t *wp;
517 	int r;
518 	size_t l, unconsumed = 0;
519 
520 	/* Assign default device/inode values. */
521 	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
522 	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
523 	/* Limit generated st_ino number to 16 bits. */
524 	if (default_inode >= 0xffff) {
525 		++default_dev;
526 		default_inode = 0;
527 	}
528 
529 	tar = (struct tar *)(a->format->data);
530 	tar->entry_offset = 0;
531 	gnu_clear_sparse_list(tar);
532 	tar->realsize = -1; /* Mark this as "unset" */
533 	tar->realsize_override = 0;
534 
535 	/* Setup default string conversion. */
536 	tar->sconv = tar->opt_sconv;
537 	if (tar->sconv == NULL) {
538 		if (!tar->init_default_conversion) {
539 			tar->sconv_default =
540 			    archive_string_default_conversion_for_read(&(a->archive));
541 			tar->init_default_conversion = 1;
542 		}
543 		tar->sconv = tar->sconv_default;
544 	}
545 
546 	r = tar_read_header(a, tar, entry, &unconsumed);
547 
548 	tar_flush_unconsumed(a, &unconsumed);
549 
550 	/*
551 	 * "non-sparse" files are really just sparse files with
552 	 * a single block.
553 	 */
554 	if (tar->sparse_list == NULL) {
555 		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
556 		    != ARCHIVE_OK)
557 			return (ARCHIVE_FATAL);
558 	} else {
559 		struct sparse_block *sb;
560 
561 		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
562 			if (!sb->hole)
563 				archive_entry_sparse_add_entry(entry,
564 				    sb->offset, sb->remaining);
565 		}
566 	}
567 
568 	if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
569 		/*
570 		 * "Regular" entry with trailing '/' is really
571 		 * directory: This is needed for certain old tar
572 		 * variants and even for some broken newer ones.
573 		 */
574 		if ((wp = archive_entry_pathname_w(entry)) != NULL) {
575 			l = wcslen(wp);
576 			if (l > 0 && wp[l - 1] == L'/') {
577 				archive_entry_set_filetype(entry, AE_IFDIR);
578 				tar->entry_bytes_remaining = 0;
579 				tar->entry_padding = 0;
580 			}
581 		} else if ((p = archive_entry_pathname(entry)) != NULL) {
582 			l = strlen(p);
583 			if (l > 0 && p[l - 1] == '/') {
584 				archive_entry_set_filetype(entry, AE_IFDIR);
585 				tar->entry_bytes_remaining = 0;
586 				tar->entry_padding = 0;
587 			}
588 		}
589 	}
590 	return (r);
591 }
592 
593 static int
archive_read_format_tar_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)594 archive_read_format_tar_read_data(struct archive_read *a,
595     const void **buff, size_t *size, int64_t *offset)
596 {
597 	ssize_t bytes_read;
598 	struct tar *tar;
599 	struct sparse_block *p;
600 
601 	tar = (struct tar *)(a->format->data);
602 
603 	for (;;) {
604 		/* Remove exhausted entries from sparse list. */
605 		while (tar->sparse_list != NULL &&
606 		    tar->sparse_list->remaining == 0) {
607 			p = tar->sparse_list;
608 			tar->sparse_list = p->next;
609 			free(p);
610 		}
611 
612 		if (tar->entry_bytes_unconsumed) {
613 			__archive_read_consume(a, tar->entry_bytes_unconsumed);
614 			tar->entry_bytes_unconsumed = 0;
615 		}
616 
617 		/* If we're at end of file, return EOF. */
618 		if (tar->sparse_list == NULL ||
619 		    tar->entry_bytes_remaining == 0) {
620 			if (__archive_read_consume(a, tar->entry_padding) < 0)
621 				return (ARCHIVE_FATAL);
622 			tar->entry_padding = 0;
623 			*buff = NULL;
624 			*size = 0;
625 			*offset = tar->realsize;
626 			return (ARCHIVE_EOF);
627 		}
628 
629 		*buff = __archive_read_ahead(a, 1, &bytes_read);
630 		if (*buff == NULL) {
631 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
632 			    "Truncated tar archive"
633 			    " detected while reading data");
634 			return (ARCHIVE_FATAL);
635 		}
636 		if (bytes_read > tar->entry_bytes_remaining)
637 			bytes_read = (ssize_t)tar->entry_bytes_remaining;
638 		/* Don't read more than is available in the
639 		 * current sparse block. */
640 		if (tar->sparse_list->remaining < bytes_read)
641 			bytes_read = (ssize_t)tar->sparse_list->remaining;
642 		*size = bytes_read;
643 		*offset = tar->sparse_list->offset;
644 		tar->sparse_list->remaining -= bytes_read;
645 		tar->sparse_list->offset += bytes_read;
646 		tar->entry_bytes_remaining -= bytes_read;
647 		tar->entry_bytes_unconsumed = bytes_read;
648 
649 		if (!tar->sparse_list->hole)
650 			return (ARCHIVE_OK);
651 		/* Current is hole data and skip this. */
652 	}
653 }
654 
655 static int
archive_read_format_tar_skip(struct archive_read * a)656 archive_read_format_tar_skip(struct archive_read *a)
657 {
658 	int64_t bytes_skipped;
659 	int64_t request;
660 	struct sparse_block *p;
661 	struct tar* tar;
662 
663 	tar = (struct tar *)(a->format->data);
664 
665 	/* Do not consume the hole of a sparse file. */
666 	request = 0;
667 	for (p = tar->sparse_list; p != NULL; p = p->next) {
668 		if (!p->hole) {
669 			if (p->remaining >= INT64_MAX - request) {
670 				return ARCHIVE_FATAL;
671 			}
672 			request += p->remaining;
673 		}
674 	}
675 	if (request > tar->entry_bytes_remaining)
676 		request = tar->entry_bytes_remaining;
677 	request += tar->entry_padding + tar->entry_bytes_unconsumed;
678 
679 	bytes_skipped = __archive_read_consume(a, request);
680 	if (bytes_skipped < 0)
681 		return (ARCHIVE_FATAL);
682 
683 	tar->entry_bytes_remaining = 0;
684 	tar->entry_bytes_unconsumed = 0;
685 	tar->entry_padding = 0;
686 
687 	/* Free the sparse list. */
688 	gnu_clear_sparse_list(tar);
689 
690 	return (ARCHIVE_OK);
691 }
692 
693 /*
694  * This function reads and interprets all of the headers associated
695  * with a single entry.
696  */
697 static int
tar_read_header(struct archive_read * a,struct tar * tar,struct archive_entry * entry,size_t * unconsumed)698 tar_read_header(struct archive_read *a, struct tar *tar,
699     struct archive_entry *entry, size_t *unconsumed)
700 {
701 	ssize_t bytes;
702 	int err = ARCHIVE_OK, err2;
703 	int eof_fatal = 0; /* EOF is okay at some points... */
704 	const char *h;
705 	const struct archive_entry_header_ustar *header;
706 	const struct archive_entry_header_gnutar *gnuheader;
707 
708 	/* Bitmask of what header types we've seen. */
709 	int32_t seen_headers = 0;
710 	static const int32_t seen_A_header = 1;
711 	static const int32_t seen_g_header = 2;
712 	static const int32_t seen_K_header = 4;
713 	static const int32_t seen_L_header = 8;
714 	static const int32_t seen_V_header = 16;
715 	static const int32_t seen_x_header = 32; /* Also X */
716 	static const int32_t seen_mac_metadata = 512;
717 
718 	tar->pax_hdrcharset_utf8 = 1;
719 	tar->sparse_gnu_attributes_seen = 0;
720 	archive_string_empty(&(tar->entry_gname));
721 	archive_string_empty(&(tar->entry_pathname));
722 	archive_string_empty(&(tar->entry_pathname_override));
723 	archive_string_empty(&(tar->entry_uname));
724 	archive_string_empty(&tar->entry_linkpath);
725 
726 	/* Ensure format is set. */
727 	if (a->archive.archive_format_name == NULL) {
728 		a->archive.archive_format = ARCHIVE_FORMAT_TAR;
729 		a->archive.archive_format_name = "tar";
730 	}
731 
732 	/*
733 	 * TODO: Write global/default pax options into
734 	 * 'entry' struct here before overwriting with
735 	 * file-specific options.
736 	 */
737 
738 	/* Loop over all the headers needed for the next entry */
739 	for (;;) {
740 
741 		/* Find the next valid header record. */
742 		while (1) {
743 			tar_flush_unconsumed(a, unconsumed);
744 
745 			/* Read 512-byte header record */
746 			h = __archive_read_ahead(a, 512, &bytes);
747 			if (bytes == 0) { /* EOF at a block boundary. */
748 				if (eof_fatal) {
749 					/* We've read a special header already;
750 					 * if there's no regular header, then this is
751 					 * a premature EOF. */
752 					archive_set_error(&a->archive, EINVAL,
753 							  "Damaged tar archive");
754 					return (ARCHIVE_FATAL);
755 				} else {
756 					return (ARCHIVE_EOF);
757 				}
758 			}
759 			if (h == NULL) {  /* Short block at EOF; this is bad. */
760 				archive_set_error(&a->archive,
761 				    ARCHIVE_ERRNO_FILE_FORMAT,
762 				    "Truncated tar archive"
763 				    " detected while reading next heaader");
764 				return (ARCHIVE_FATAL);
765 			}
766 			*unconsumed += 512;
767 
768 			if (h[0] == 0 && archive_block_is_null(h)) {
769 				/* We found a NULL block which indicates end-of-archive */
770 
771 				if (tar->read_concatenated_archives) {
772 					/* We're ignoring NULL blocks, so keep going. */
773 					continue;
774 				}
775 
776 				/* Try to consume a second all-null record, as well. */
777 				/* If we can't, that's okay. */
778 				tar_flush_unconsumed(a, unconsumed);
779 				h = __archive_read_ahead(a, 512, NULL);
780 				if (h != NULL && h[0] == 0 && archive_block_is_null(h))
781 						__archive_read_consume(a, 512);
782 
783 				archive_clear_error(&a->archive);
784 				return (ARCHIVE_EOF);
785 			}
786 
787 			/* This is NOT a null block, so it must be a valid header. */
788 			if (!checksum(a, h)) {
789 				tar_flush_unconsumed(a, unconsumed);
790 				archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
791 				/* If we've read some critical information (pax headers, etc)
792 				 * and _then_ see a bad header, we can't really recover. */
793 				if (eof_fatal) {
794 					return (ARCHIVE_FATAL);
795 				} else {
796 					return (ARCHIVE_RETRY);
797 				}
798 			}
799 			break;
800 		}
801 
802 		/* Determine the format variant. */
803 		header = (const struct archive_entry_header_ustar *)h;
804 		switch(header->typeflag[0]) {
805 		case 'A': /* Solaris tar ACL */
806 			if (seen_headers & seen_A_header) {
807 				return (ARCHIVE_FATAL);
808 			}
809 			seen_headers |= seen_A_header;
810 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
811 			a->archive.archive_format_name = "Solaris tar";
812 			err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed);
813 			break;
814 		case 'g': /* POSIX-standard 'g' header. */
815 			if (seen_headers & seen_g_header) {
816 				return (ARCHIVE_FATAL);
817 			}
818 			seen_headers |= seen_g_header;
819 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
820 			a->archive.archive_format_name = "POSIX pax interchange format";
821 			err2 = header_pax_global(a, tar, entry, h, unconsumed);
822 			break;
823 		case 'K': /* Long link name (GNU tar, others) */
824 			if (seen_headers & seen_K_header) {
825 				return (ARCHIVE_FATAL);
826 			}
827 			seen_headers |= seen_K_header;
828 			err2 = header_gnu_longlink(a, tar, entry, h, unconsumed);
829 			break;
830 		case 'L': /* Long filename (GNU tar, others) */
831 			if (seen_headers & seen_L_header) {
832 				return (ARCHIVE_FATAL);
833 			}
834 			seen_headers |= seen_L_header;
835 			err2 = header_gnu_longname(a, tar, entry, h, unconsumed);
836 			break;
837 		case 'V': /* GNU volume header */
838 			if (seen_headers & seen_V_header) {
839 				return (ARCHIVE_FATAL);
840 			}
841 			seen_headers |= seen_V_header;
842 			err2 = header_volume(a, tar, entry, h, unconsumed);
843 			break;
844 		case 'X': /* Used by SUN tar; same as 'x'. */
845 			if (seen_headers & seen_x_header) {
846 				return (ARCHIVE_FATAL);
847 			}
848 			seen_headers |= seen_x_header;
849 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
850 			a->archive.archive_format_name =
851 				"POSIX pax interchange format (Sun variant)";
852 			err2 = header_pax_extension(a, tar, entry, h, unconsumed);
853 			break;
854 		case 'x': /* POSIX-standard 'x' header. */
855 			if (seen_headers & seen_x_header) {
856 				return (ARCHIVE_FATAL);
857 			}
858 			seen_headers |= seen_x_header;
859 			a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
860 			a->archive.archive_format_name = "POSIX pax interchange format";
861 			err2 = header_pax_extension(a, tar, entry, h, unconsumed);
862 			break;
863 		default: /* Regular header: Legacy tar, GNU tar, or ustar */
864 			gnuheader = (const struct archive_entry_header_gnutar *)h;
865 			if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
866 				a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
867 				a->archive.archive_format_name = "GNU tar format";
868 				err2 = header_gnutar(a, tar, entry, h, unconsumed);
869 			} else if (memcmp(header->magic, "ustar", 5) == 0) {
870 				if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
871 					a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
872 					a->archive.archive_format_name = "POSIX ustar format";
873 				}
874 				err2 = header_ustar(a, tar, entry, h);
875 			} else {
876 				a->archive.archive_format = ARCHIVE_FORMAT_TAR;
877 				a->archive.archive_format_name = "tar (non-POSIX)";
878 				err2 = header_old_tar(a, tar, entry, h);
879 			}
880 			err = err_combine(err, err2);
881 			/* We return warnings or success as-is.  Anything else is fatal. */
882 			if (err < ARCHIVE_WARN) {
883 				return (ARCHIVE_FATAL);
884 			}
885 			/* Filename of the form `._filename` is an AppleDouble
886 			 * extension entry.  The body is the macOS metadata blob;
887 			 * this is followed by another entry with the actual
888 			 * regular file data.
889 			 * This design has two drawbacks:
890 			 * = it's brittle; you might just have a file with such a name
891 			 * = it duplicates any long pathname extensions
892 			 *
893 			 * TODO: This probably shouldn't be here at all.  Consider
894 			 * just returning the contents as a regular entry here and
895 			 * then dealing with it when we write data to disk.
896 			 */
897 			if (tar->process_mac_extensions
898 			    && ((seen_headers & seen_mac_metadata) == 0)
899 			    && is_mac_metadata_entry(entry)) {
900 				err2 = read_mac_metadata_blob(a, entry, unconsumed);
901 				if (err2 < ARCHIVE_WARN) {
902 					return (ARCHIVE_FATAL);
903 				}
904 				err = err_combine(err, err2);
905 				/* Note: Other headers can appear again. */
906 				seen_headers = seen_mac_metadata;
907 				break;
908 			}
909 
910 			/* Reconcile GNU sparse attributes */
911 			if (tar->sparse_gnu_attributes_seen) {
912 				/* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */
913 				if (tar->filetype != 'S' && tar->filetype != '0') {
914 					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
915 							  "Non-regular file cannot be sparse");
916 					return (ARCHIVE_WARN);
917 				} else if (tar->sparse_gnu_major == 0 &&
918 				    tar->sparse_gnu_minor == 0) {
919 					/* Sparse map already parsed from 'x' header */
920 				} else if (tar->sparse_gnu_major == 0 &&
921 				    tar->sparse_gnu_minor == 1) {
922 					/* Sparse map already parsed from 'x' header */
923 				} else if (tar->sparse_gnu_major == 1 &&
924 				    tar->sparse_gnu_minor == 0) {
925 					/* Sparse map is prepended to file contents */
926 					ssize_t bytes_read;
927 					bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
928 					if (bytes_read < 0)
929 						return ((int)bytes_read);
930 					tar->entry_bytes_remaining -= bytes_read;
931 				} else {
932 					archive_set_error(&a->archive,
933 							  ARCHIVE_ERRNO_MISC,
934 							  "Unrecognized GNU sparse file format");
935 					return (ARCHIVE_WARN);
936 				}
937 			}
938 			return (err);
939 		}
940 
941 		/* We're between headers ... */
942 		err = err_combine(err, err2);
943 		if (err == ARCHIVE_FATAL)
944 			return (err);
945 
946 		/* The GNU volume header and the pax `g` global header
947 		 * are both allowed to be the only header in an
948 		 * archive.  If we've seen any other header, a
949 		 * following EOF is fatal. */
950 		if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) {
951 			eof_fatal = 1;
952 		}
953 	}
954 }
955 
956 /*
957  * Return true if block checksum is correct.
958  */
959 static int
checksum(struct archive_read * a,const void * h)960 checksum(struct archive_read *a, const void *h)
961 {
962 	const unsigned char *bytes;
963 	const struct archive_entry_header_ustar	*header;
964 	int check, sum;
965 	size_t i;
966 
967 	(void)a; /* UNUSED */
968 	bytes = (const unsigned char *)h;
969 	header = (const struct archive_entry_header_ustar *)h;
970 
971 	/* Checksum field must hold an octal number */
972 	for (i = 0; i < sizeof(header->checksum); ++i) {
973 		char c = header->checksum[i];
974 		if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
975 			return 0;
976 	}
977 
978 	/*
979 	 * Test the checksum.  Note that POSIX specifies _unsigned_
980 	 * bytes for this calculation.
981 	 */
982 	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
983 	check = 0;
984 	for (i = 0; i < 148; i++)
985 		check += (unsigned char)bytes[i];
986 	for (; i < 156; i++)
987 		check += 32;
988 	for (; i < 512; i++)
989 		check += (unsigned char)bytes[i];
990 	if (sum == check)
991 		return (1);
992 
993 	/*
994 	 * Repeat test with _signed_ bytes, just in case this archive
995 	 * was created by an old BSD, Solaris, or HP-UX tar with a
996 	 * broken checksum calculation.
997 	 */
998 	check = 0;
999 	for (i = 0; i < 148; i++)
1000 		check += (signed char)bytes[i];
1001 	for (; i < 156; i++)
1002 		check += 32;
1003 	for (; i < 512; i++)
1004 		check += (signed char)bytes[i];
1005 	if (sum == check)
1006 		return (1);
1007 
1008 #if DONT_FAIL_ON_CRC_ERROR
1009 	/* Speed up fuzzing by pretending the checksum is always right. */
1010 	return (1);
1011 #else
1012 	return (0);
1013 #endif
1014 }
1015 
1016 /*
1017  * Return true if this block contains only nulls.
1018  */
1019 static int
archive_block_is_null(const char * p)1020 archive_block_is_null(const char *p)
1021 {
1022 	unsigned i;
1023 
1024 	for (i = 0; i < 512; i++)
1025 		if (*p++)
1026 			return (0);
1027 	return (1);
1028 }
1029 
1030 /*
1031  * Interpret 'A' Solaris ACL header
1032  */
1033 static int
header_Solaris_ACL(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1034 header_Solaris_ACL(struct archive_read *a, struct tar *tar,
1035     struct archive_entry *entry, const void *h, size_t *unconsumed)
1036 {
1037 	const struct archive_entry_header_ustar *header;
1038 	struct archive_string	 acl_text;
1039 	size_t size;
1040 	int err, acl_type;
1041 	int64_t type;
1042 	char *acl, *p;
1043 
1044 	header = (const struct archive_entry_header_ustar *)h;
1045 	size = (size_t)tar_atol(header->size, sizeof(header->size));
1046 	archive_string_init(&acl_text);
1047 	err = read_body_to_string(a, tar, &acl_text, h, unconsumed);
1048 	if (err != ARCHIVE_OK) {
1049 		archive_string_free(&acl_text);
1050 		return (err);
1051 	}
1052 
1053 	/* TODO: Examine the first characters to see if this
1054 	 * is an AIX ACL descriptor.  We'll likely never support
1055 	 * them, but it would be polite to recognize and warn when
1056 	 * we do see them. */
1057 
1058 	/* Leading octal number indicates ACL type and number of entries. */
1059 	p = acl = acl_text.s;
1060 	type = 0;
1061 	while (*p != '\0' && p < acl + size) {
1062 		if (*p < '0' || *p > '7') {
1063 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1064 			    "Malformed Solaris ACL attribute (invalid digit)");
1065 			archive_string_free(&acl_text);
1066 			return(ARCHIVE_WARN);
1067 		}
1068 		type <<= 3;
1069 		type += *p - '0';
1070 		if (type > 077777777) {
1071 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1072 			    "Malformed Solaris ACL attribute (count too large)");
1073 			archive_string_free(&acl_text);
1074 			return (ARCHIVE_WARN);
1075 		}
1076 		p++;
1077 	}
1078 	switch ((int)type & ~0777777) {
1079 	case 01000000:
1080 		/* POSIX.1e ACL */
1081 		acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
1082 		break;
1083 	case 03000000:
1084 		/* NFSv4 ACL */
1085 		acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
1086 		break;
1087 	default:
1088 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1089 		    "Malformed Solaris ACL attribute (unsupported type %o)",
1090 		    (int)type);
1091 		archive_string_free(&acl_text);
1092 		return (ARCHIVE_WARN);
1093 	}
1094 	p++;
1095 
1096 	if (p >= acl + size) {
1097 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1098 		    "Malformed Solaris ACL attribute (body overflow)");
1099 		archive_string_free(&acl_text);
1100 		return(ARCHIVE_WARN);
1101 	}
1102 
1103 	/* ACL text is null-terminated; find the end. */
1104 	size -= (p - acl);
1105 	acl = p;
1106 
1107 	while (*p != '\0' && p < acl + size)
1108 		p++;
1109 
1110 	if (tar->sconv_acl == NULL) {
1111 		tar->sconv_acl = archive_string_conversion_from_charset(
1112 		    &(a->archive), "UTF-8", 1);
1113 		if (tar->sconv_acl == NULL) {
1114 			archive_string_free(&acl_text);
1115 			return (ARCHIVE_FATAL);
1116 		}
1117 	}
1118 	archive_strncpy(&(tar->localname), acl, p - acl);
1119 	err = archive_acl_from_text_l(archive_entry_acl(entry),
1120 	    tar->localname.s, acl_type, tar->sconv_acl);
1121 	/* Workaround: Force perm_is_set() to be correct */
1122 	/* If this bit were stored in the ACL, this wouldn't be needed */
1123 	archive_entry_set_perm(entry, archive_entry_perm(entry));
1124 	if (err != ARCHIVE_OK) {
1125 		if (errno == ENOMEM) {
1126 			archive_set_error(&a->archive, ENOMEM,
1127 			    "Can't allocate memory for ACL");
1128 		} else
1129 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1130 			    "Malformed Solaris ACL attribute (unparsable)");
1131 	}
1132 	archive_string_free(&acl_text);
1133 	return (err);
1134 }
1135 
1136 /*
1137  * Interpret 'K' long linkname header.
1138  */
1139 static int
header_gnu_longlink(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1140 header_gnu_longlink(struct archive_read *a, struct tar *tar,
1141     struct archive_entry *entry, const void *h, size_t *unconsumed)
1142 {
1143 	int err;
1144 
1145 	struct archive_string linkpath;
1146 	archive_string_init(&linkpath);
1147 	err = read_body_to_string(a, tar, &linkpath, h, unconsumed);
1148 	archive_entry_set_link(entry, linkpath.s);
1149 	archive_string_free(&linkpath);
1150 	return (err);
1151 }
1152 
1153 static int
set_conversion_failed_error(struct archive_read * a,struct archive_string_conv * sconv,const char * name)1154 set_conversion_failed_error(struct archive_read *a,
1155     struct archive_string_conv *sconv, const char *name)
1156 {
1157 	if (errno == ENOMEM) {
1158 		archive_set_error(&a->archive, ENOMEM,
1159 		    "Can't allocate memory for %s", name);
1160 		return (ARCHIVE_FATAL);
1161 	}
1162 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1163 	    "%s can't be converted from %s to current locale.",
1164 	    name, archive_string_conversion_charset_name(sconv));
1165 	return (ARCHIVE_WARN);
1166 }
1167 
1168 /*
1169  * Interpret 'L' long filename header.
1170  */
1171 static int
header_gnu_longname(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1172 header_gnu_longname(struct archive_read *a, struct tar *tar,
1173     struct archive_entry *entry, const void *h, size_t *unconsumed)
1174 {
1175 	int err;
1176 	struct archive_string longname;
1177 
1178 	archive_string_init(&longname);
1179 	err = read_body_to_string(a, tar, &longname, h, unconsumed);
1180 	if (err == ARCHIVE_OK) {
1181 		if (archive_entry_copy_pathname_l(entry, longname.s,
1182 		    archive_strlen(&longname), tar->sconv) != 0)
1183 			err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1184 	}
1185 	archive_string_free(&longname);
1186 	return (err);
1187 }
1188 
1189 /*
1190  * Interpret 'V' GNU tar volume header.
1191  */
1192 static int
header_volume(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1193 header_volume(struct archive_read *a, struct tar *tar,
1194     struct archive_entry *entry, const void *h, size_t *unconsumed)
1195 {
1196 	const struct archive_entry_header_ustar *header;
1197 	int64_t size, to_consume;
1198 
1199 	(void)a; /* UNUSED */
1200 	(void)tar; /* UNUSED */
1201 	(void)entry; /* UNUSED */
1202 
1203 	header = (const struct archive_entry_header_ustar *)h;
1204 	size = tar_atol(header->size, sizeof(header->size));
1205 	if (size > (int64_t)pathname_limit) {
1206 		return (ARCHIVE_FATAL);
1207 	}
1208 	to_consume = ((size + 511) & ~511);
1209 	*unconsumed += to_consume;
1210 	return (ARCHIVE_OK);
1211 }
1212 
1213 /*
1214  * Read the next `size` bytes into the provided string.
1215  * Null-terminate the string.
1216  */
1217 static int
read_bytes_to_string(struct archive_read * a,struct archive_string * as,size_t size,size_t * unconsumed)1218 read_bytes_to_string(struct archive_read *a,
1219 		     struct archive_string *as, size_t size,
1220 		     size_t *unconsumed) {
1221 	const void *src;
1222 
1223 	/* Fail if we can't make our buffer big enough. */
1224 	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1225 		archive_set_error(&a->archive, ENOMEM,
1226 		    "No memory");
1227 		return (ARCHIVE_FATAL);
1228 	}
1229 
1230 	tar_flush_unconsumed(a, unconsumed);
1231 
1232 	/* Read the body into the string. */
1233 	src = __archive_read_ahead(a, size, NULL);
1234 	if (src == NULL) {
1235 		archive_set_error(&a->archive, EINVAL,
1236 		    "Truncated archive"
1237 		    " detected while reading metadata");
1238 		*unconsumed = 0;
1239 		return (ARCHIVE_FATAL);
1240 	}
1241 	memcpy(as->s, src, (size_t)size);
1242 	as->s[size] = '\0';
1243 	as->length = (size_t)size;
1244 	*unconsumed += size;
1245 	return (ARCHIVE_OK);
1246 }
1247 
1248 /*
1249  * Read body of an archive entry into an archive_string object.
1250  */
1251 static int
read_body_to_string(struct archive_read * a,struct tar * tar,struct archive_string * as,const void * h,size_t * unconsumed)1252 read_body_to_string(struct archive_read *a, struct tar *tar,
1253     struct archive_string *as, const void *h, size_t *unconsumed)
1254 {
1255 	int64_t size;
1256 	const struct archive_entry_header_ustar *header;
1257 	int r;
1258 
1259 	(void)tar; /* UNUSED */
1260 	header = (const struct archive_entry_header_ustar *)h;
1261 	size  = tar_atol(header->size, sizeof(header->size));
1262 	if (size > entry_limit) {
1263 		return (ARCHIVE_FATAL);
1264 	}
1265 	if ((size > (int64_t)pathname_limit) || (size < 0)) {
1266 		archive_string_empty(as);
1267 		int64_t to_consume = ((size + 511) & ~511);
1268 		if (to_consume != __archive_read_consume(a, to_consume)) {
1269 			return (ARCHIVE_FATAL);
1270 		}
1271 		archive_set_error(&a->archive, EINVAL,
1272 		    "Special header too large: %d > 1MiB",
1273 		    (int)size);
1274 		return (ARCHIVE_WARN);
1275 	}
1276 	r = read_bytes_to_string(a, as, size, unconsumed);
1277 	*unconsumed += 0x1ff & (-size);
1278 	return(r);
1279 }
1280 
1281 /*
1282  * Parse out common header elements.
1283  *
1284  * This would be the same as header_old_tar, except that the
1285  * filename is handled slightly differently for old and POSIX
1286  * entries  (POSIX entries support a 'prefix').  This factoring
1287  * allows header_old_tar and header_ustar
1288  * to handle filenames differently, while still putting most of the
1289  * common parsing into one place.
1290  */
1291 static int
header_common(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1292 header_common(struct archive_read *a, struct tar *tar,
1293     struct archive_entry *entry, const void *h)
1294 {
1295 	const struct archive_entry_header_ustar	*header;
1296 	const char *existing_linkpath;
1297 	const wchar_t *existing_wcs_linkpath;
1298 	int     err = ARCHIVE_OK;
1299 
1300 	header = (const struct archive_entry_header_ustar *)h;
1301 
1302 	/* Parse out the numeric fields (all are octal) */
1303 
1304 	/* Split mode handling: Set filetype always, perm only if not already set */
1305 	archive_entry_set_filetype(entry,
1306 	    (mode_t)tar_atol(header->mode, sizeof(header->mode)));
1307 	if (!archive_entry_perm_is_set(entry)) {
1308 		archive_entry_set_perm(entry,
1309 			(mode_t)tar_atol(header->mode, sizeof(header->mode)));
1310 	}
1311 	if (!archive_entry_uid_is_set(entry)) {
1312 		archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1313 	}
1314 	if (!archive_entry_gid_is_set(entry)) {
1315 		archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1316 	}
1317 
1318 	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1319 	if (tar->entry_bytes_remaining < 0) {
1320 		tar->entry_bytes_remaining = 0;
1321 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1322 		    "Tar entry has negative size");
1323 		return (ARCHIVE_FATAL);
1324 	}
1325 	if (tar->entry_bytes_remaining > entry_limit) {
1326 		tar->entry_bytes_remaining = 0;
1327 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1328 		    "Tar entry size overflow");
1329 		return (ARCHIVE_FATAL);
1330 	}
1331 	if (!tar->realsize_override) {
1332 		tar->realsize = tar->entry_bytes_remaining;
1333 	}
1334 	archive_entry_set_size(entry, tar->realsize);
1335 
1336 	if (!archive_entry_mtime_is_set(entry)) {
1337 		archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1338 	}
1339 
1340 	/* Handle the tar type flag appropriately. */
1341 	tar->filetype = header->typeflag[0];
1342 
1343 	/*
1344 	 * TODO: If the linkpath came from Pax extension header, then
1345 	 * we should obey the hdrcharset_utf8 flag when converting these.
1346 	 */
1347 	switch (tar->filetype) {
1348 	case '1': /* Hard link */
1349 		archive_entry_set_link_to_hardlink(entry);
1350 		existing_wcs_linkpath = archive_entry_hardlink_w(entry);
1351 		existing_linkpath = archive_entry_hardlink(entry);
1352 		if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1353 		    && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1354 			struct archive_string linkpath;
1355 			archive_string_init(&linkpath);
1356 			archive_strncpy(&linkpath,
1357 					header->linkname, sizeof(header->linkname));
1358 			if (archive_entry_copy_hardlink_l(entry, linkpath.s,
1359 							  archive_strlen(&linkpath), tar->sconv) != 0) {
1360 				err = set_conversion_failed_error(a, tar->sconv,
1361 								  "Linkname");
1362 				if (err == ARCHIVE_FATAL) {
1363 					archive_string_free(&linkpath);
1364 					return (err);
1365 				}
1366 			}
1367 			archive_string_free(&linkpath);
1368 		}
1369 		/*
1370 		 * The following may seem odd, but: Technically, tar
1371 		 * does not store the file type for a "hard link"
1372 		 * entry, only the fact that it is a hard link.  So, I
1373 		 * leave the type zero normally.  But, pax interchange
1374 		 * format allows hard links to have data, which
1375 		 * implies that the underlying entry is a regular
1376 		 * file.
1377 		 */
1378 		if (archive_entry_size(entry) > 0)
1379 			archive_entry_set_filetype(entry, AE_IFREG);
1380 
1381 		/*
1382 		 * A tricky point: Traditionally, tar readers have
1383 		 * ignored the size field when reading hardlink
1384 		 * entries, and some writers put non-zero sizes even
1385 		 * though the body is empty.  POSIX blessed this
1386 		 * convention in the 1988 standard, but broke with
1387 		 * this tradition in 2001 by permitting hardlink
1388 		 * entries to store valid bodies in pax interchange
1389 		 * format, but not in ustar format.  Since there is no
1390 		 * hard and fast way to distinguish pax interchange
1391 		 * from earlier archives (the 'x' and 'g' entries are
1392 		 * optional, after all), we need a heuristic.
1393 		 */
1394 		if (archive_entry_size(entry) == 0) {
1395 			/* If the size is already zero, we're done. */
1396 		}  else if (a->archive.archive_format
1397 		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1398 			/* Definitely pax extended; must obey hardlink size. */
1399 		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1400 		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1401 		{
1402 			/* Old-style or GNU tar: we must ignore the size. */
1403 			archive_entry_set_size(entry, 0);
1404 			tar->entry_bytes_remaining = 0;
1405 		} else if (archive_read_format_tar_bid(a, 50) > 50) {
1406 			/*
1407 			 * We don't know if it's pax: If the bid
1408 			 * function sees a valid ustar header
1409 			 * immediately following, then let's ignore
1410 			 * the hardlink size.
1411 			 */
1412 			archive_entry_set_size(entry, 0);
1413 			tar->entry_bytes_remaining = 0;
1414 		}
1415 		/*
1416 		 * TODO: There are still two cases I'd like to handle:
1417 		 *   = a ustar non-pax archive with a hardlink entry at
1418 		 *     end-of-archive.  (Look for block of nulls following?)
1419 		 *   = a pax archive that has not seen any pax headers
1420 		 *     and has an entry which is a hardlink entry storing
1421 		 *     a body containing an uncompressed tar archive.
1422 		 * The first is worth addressing; I don't see any reliable
1423 		 * way to deal with the second possibility.
1424 		 */
1425 		break;
1426 	case '2': /* Symlink */
1427 		archive_entry_set_link_to_symlink(entry);
1428 		existing_wcs_linkpath = archive_entry_symlink_w(entry);
1429 		existing_linkpath = archive_entry_symlink(entry);
1430 		if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1431 		    && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1432 			struct archive_string linkpath;
1433 			archive_string_init(&linkpath);
1434 			archive_strncpy(&linkpath,
1435 					header->linkname, sizeof(header->linkname));
1436 			if (archive_entry_copy_symlink_l(entry, linkpath.s,
1437 			    archive_strlen(&linkpath), tar->sconv) != 0) {
1438 				err = set_conversion_failed_error(a, tar->sconv,
1439 				    "Linkname");
1440 				if (err == ARCHIVE_FATAL) {
1441 					archive_string_free(&linkpath);
1442 					return (err);
1443 				}
1444 			}
1445 			archive_string_free(&linkpath);
1446 		}
1447 		archive_entry_set_filetype(entry, AE_IFLNK);
1448 		archive_entry_set_size(entry, 0);
1449 		tar->entry_bytes_remaining = 0;
1450 		break;
1451 	case '3': /* Character device */
1452 		archive_entry_set_filetype(entry, AE_IFCHR);
1453 		archive_entry_set_size(entry, 0);
1454 		tar->entry_bytes_remaining = 0;
1455 		break;
1456 	case '4': /* Block device */
1457 		archive_entry_set_filetype(entry, AE_IFBLK);
1458 		archive_entry_set_size(entry, 0);
1459 		tar->entry_bytes_remaining = 0;
1460 		break;
1461 	case '5': /* Dir */
1462 		archive_entry_set_filetype(entry, AE_IFDIR);
1463 		archive_entry_set_size(entry, 0);
1464 		tar->entry_bytes_remaining = 0;
1465 		break;
1466 	case '6': /* FIFO device */
1467 		archive_entry_set_filetype(entry, AE_IFIFO);
1468 		archive_entry_set_size(entry, 0);
1469 		tar->entry_bytes_remaining = 0;
1470 		break;
1471 	case 'D': /* GNU incremental directory type */
1472 		/*
1473 		 * No special handling is actually required here.
1474 		 * It might be nice someday to preprocess the file list and
1475 		 * provide it to the client, though.
1476 		 */
1477 		archive_entry_set_filetype(entry, AE_IFDIR);
1478 		break;
1479 	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1480 		/*
1481 		 * As far as I can tell, this is just like a regular file
1482 		 * entry, except that the contents should be _appended_ to
1483 		 * the indicated file at the indicated offset.  This may
1484 		 * require some API work to fully support.
1485 		 */
1486 		break;
1487 	case 'N': /* Old GNU "long filename" entry. */
1488 		/* The body of this entry is a script for renaming
1489 		 * previously-extracted entries.  Ugh.  It will never
1490 		 * be supported by libarchive. */
1491 		archive_entry_set_filetype(entry, AE_IFREG);
1492 		break;
1493 	case 'S': /* GNU sparse files */
1494 		/*
1495 		 * Sparse files are really just regular files with
1496 		 * sparse information in the extended area.
1497 		 */
1498 		/* FALLTHROUGH */
1499 	case '0': /* ustar "regular" file */
1500 		/* FALLTHROUGH */
1501 	default: /* Non-standard file types */
1502 		/*
1503 		 * Per POSIX: non-recognized types should always be
1504 		 * treated as regular files.
1505 		 */
1506 		archive_entry_set_filetype(entry, AE_IFREG);
1507 		break;
1508 	}
1509 	return (err);
1510 }
1511 
1512 /*
1513  * Parse out header elements for "old-style" tar archives.
1514  */
1515 static int
header_old_tar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1516 header_old_tar(struct archive_read *a, struct tar *tar,
1517     struct archive_entry *entry, const void *h)
1518 {
1519 	const struct archive_entry_header_ustar	*header;
1520 	int err = ARCHIVE_OK, err2;
1521 
1522 	/*
1523 	 * Copy filename over (to ensure null termination).
1524 	 * Skip if pathname was already set e.g. by header_gnu_longname()
1525 	 */
1526 	header = (const struct archive_entry_header_ustar *)h;
1527 
1528 	const char *existing_pathname = archive_entry_pathname(entry);
1529 	const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry);
1530 	if ((existing_pathname == NULL || existing_pathname[0] == '\0')
1531 	    && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0') &&
1532 	    archive_entry_copy_pathname_l(entry,
1533 	    header->name, sizeof(header->name), tar->sconv) != 0) {
1534 		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1535 		if (err == ARCHIVE_FATAL)
1536 			return (err);
1537 	}
1538 
1539 	/* Grab rest of common fields */
1540 	err2 = header_common(a, tar, entry, h);
1541 	if (err > err2)
1542 		err = err2;
1543 
1544 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1545 	return (err);
1546 }
1547 
1548 /*
1549  * Is this likely an AppleDouble extension?
1550  */
1551 static int
is_mac_metadata_entry(struct archive_entry * entry)1552 is_mac_metadata_entry(struct archive_entry *entry) {
1553 	const char *p, *name;
1554 	const wchar_t *wp, *wname;
1555 
1556 	wname = wp = archive_entry_pathname_w(entry);
1557 	if (wp != NULL) {
1558 		/* Find the last path element. */
1559 		for (; *wp != L'\0'; ++wp) {
1560 			if (wp[0] == '/' && wp[1] != L'\0')
1561 				wname = wp + 1;
1562 		}
1563 		/*
1564 		 * If last path element starts with "._", then
1565 		 * this is a Mac extension.
1566 		 */
1567 		if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0')
1568 			return 1;
1569 	} else {
1570 		/* Find the last path element. */
1571 		name = p = archive_entry_pathname(entry);
1572 		if (p == NULL)
1573 			return (ARCHIVE_FAILED);
1574 		for (; *p != '\0'; ++p) {
1575 			if (p[0] == '/' && p[1] != '\0')
1576 				name = p + 1;
1577 		}
1578 		/*
1579 		 * If last path element starts with "._", then
1580 		 * this is a Mac extension.
1581 		 */
1582 		if (name[0] == '.' && name[1] == '_' && name[2] != '\0')
1583 			return 1;
1584 	}
1585 	/* Not a mac extension */
1586 	return 0;
1587 }
1588 
1589 /*
1590  * Read a Mac AppleDouble-encoded blob of file metadata,
1591  * if there is one.
1592  *
1593  * TODO: In Libarchive 4, we should consider ripping this
1594  * out -- instead, return a file starting with `._` as
1595  * a regular file and let the client (or archive_write logic)
1596  * handle it.
1597  */
1598 static int
read_mac_metadata_blob(struct archive_read * a,struct archive_entry * entry,size_t * unconsumed)1599 read_mac_metadata_blob(struct archive_read *a,
1600     struct archive_entry *entry, size_t *unconsumed)
1601 {
1602 	int64_t size;
1603 	size_t msize;
1604 	const void *data;
1605 
1606  	/* Read the body as a Mac OS metadata blob. */
1607 	size = archive_entry_size(entry);
1608 	msize = (size_t)size;
1609 	if (size < 0 || (uintmax_t)msize != (uintmax_t)size) {
1610 		*unconsumed = 0;
1611 		return (ARCHIVE_FATAL);
1612 	}
1613 
1614 	/* TODO: Should this merely skip the overlarge entry and
1615 	 * WARN?  Or is xattr_limit sufficiently large that we can
1616 	 * safely assume anything larger is malicious? */
1617 	if (size > (int64_t)xattr_limit) {
1618 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1619 		    "Oversized AppleDouble extension has size %llu > %llu",
1620 		    (unsigned long long)size,
1621 		    (unsigned long long)xattr_limit);
1622 		return (ARCHIVE_FATAL);
1623 	}
1624 
1625 	/*
1626 	 * TODO: Look beyond the body here to peek at the next header.
1627 	 * If it's a regular header (not an extension header)
1628 	 * that has the wrong name, just return the current
1629 	 * entry as-is, without consuming the body here.
1630 	 * That would reduce the risk of us mis-identifying
1631 	 * an ordinary file that just happened to have
1632 	 * a name starting with "._".
1633 	 *
1634 	 * Q: Is the above idea really possible?  Even
1635 	 * when there are GNU or pax extension entries?
1636 	 */
1637 	tar_flush_unconsumed(a, unconsumed);
1638 	data = __archive_read_ahead(a, msize, NULL);
1639 	if (data == NULL) {
1640 		archive_set_error(&a->archive, EINVAL,
1641 		    "Truncated archive"
1642 		    " detected while reading macOS metadata");
1643 		*unconsumed = 0;
1644 		return (ARCHIVE_FATAL);
1645 	}
1646 	archive_entry_clear(entry);
1647 	archive_entry_copy_mac_metadata(entry, data, msize);
1648 	*unconsumed = (msize + 511) & ~ 511;
1649 	return (ARCHIVE_OK);
1650 }
1651 
1652 /*
1653  * Parse a file header for a pax extended archive entry.
1654  */
1655 static int
header_pax_global(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1656 header_pax_global(struct archive_read *a, struct tar *tar,
1657     struct archive_entry *entry, const void *h, size_t *unconsumed)
1658 {
1659 	const struct archive_entry_header_ustar *header;
1660 	int64_t size, to_consume;
1661 
1662 	(void)a; /* UNUSED */
1663 	(void)tar; /* UNUSED */
1664 	(void)entry; /* UNUSED */
1665 
1666 	header = (const struct archive_entry_header_ustar *)h;
1667 	size = tar_atol(header->size, sizeof(header->size));
1668 	if (size > entry_limit) {
1669 		return (ARCHIVE_FATAL);
1670 	}
1671 	to_consume = ((size + 511) & ~511);
1672 	*unconsumed += to_consume;
1673 	return (ARCHIVE_OK);
1674 }
1675 
1676 /*
1677  * Parse a file header for a Posix "ustar" archive entry.  This also
1678  * handles "pax" or "extended ustar" entries.
1679  *
1680  * In order to correctly handle pax attributes (which precede this),
1681  * we have to skip parsing any field for which the entry already has
1682  * contents.
1683  */
1684 static int
header_ustar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1685 header_ustar(struct archive_read *a, struct tar *tar,
1686     struct archive_entry *entry, const void *h)
1687 {
1688 	const struct archive_entry_header_ustar	*header;
1689 	struct archive_string as;
1690 	int err = ARCHIVE_OK, r;
1691 
1692 	header = (const struct archive_entry_header_ustar *)h;
1693 
1694 	/* Copy name into an internal buffer to ensure null-termination. */
1695 	const char *existing_pathname = archive_entry_pathname(entry);
1696 	const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry);
1697 	if ((existing_pathname == NULL || existing_pathname[0] == '\0')
1698 	    && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0')) {
1699 		archive_string_init(&as);
1700 		if (header->prefix[0]) {
1701 			archive_strncpy(&as, header->prefix, sizeof(header->prefix));
1702 			if (as.s[archive_strlen(&as) - 1] != '/')
1703 				archive_strappend_char(&as, '/');
1704 			archive_strncat(&as, header->name, sizeof(header->name));
1705 		} else {
1706 			archive_strncpy(&as, header->name, sizeof(header->name));
1707 		}
1708 		if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as),
1709 		    tar->sconv) != 0) {
1710 			err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1711 			if (err == ARCHIVE_FATAL)
1712 				return (err);
1713 		}
1714 		archive_string_free(&as);
1715 	}
1716 
1717 	/* Handle rest of common fields. */
1718 	r = header_common(a, tar, entry, h);
1719 	if (r == ARCHIVE_FATAL)
1720 		return (r);
1721 	if (r < err)
1722 		err = r;
1723 
1724 	/* Handle POSIX ustar fields. */
1725 	const char *existing_uname = archive_entry_uname(entry);
1726 	if (existing_uname == NULL || existing_uname[0] == '\0') {
1727 		if (archive_entry_copy_uname_l(entry,
1728 		    header->uname, sizeof(header->uname), tar->sconv) != 0) {
1729 			err = set_conversion_failed_error(a, tar->sconv, "Uname");
1730 			if (err == ARCHIVE_FATAL)
1731 				return (err);
1732 		}
1733 	}
1734 
1735 	const char *existing_gname = archive_entry_gname(entry);
1736 	if (existing_gname == NULL || existing_gname[0] == '\0') {
1737 		if (archive_entry_copy_gname_l(entry,
1738 		    header->gname, sizeof(header->gname), tar->sconv) != 0) {
1739 			err = set_conversion_failed_error(a, tar->sconv, "Gname");
1740 			if (err == ARCHIVE_FATAL)
1741 				return (err);
1742 		}
1743 	}
1744 
1745 	/* Parse out device numbers only for char and block specials. */
1746 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1747 		if (!archive_entry_rdev_is_set(entry)) {
1748 			archive_entry_set_rdevmajor(entry, (dev_t)
1749 			    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1750 			archive_entry_set_rdevminor(entry, (dev_t)
1751 			    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1752 		}
1753 	} else {
1754 		archive_entry_set_rdev(entry, 0);
1755 	}
1756 
1757 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1758 
1759 	return (err);
1760 }
1761 
1762 static int
header_pax_extension(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)1763 header_pax_extension(struct archive_read *a, struct tar *tar,
1764     struct archive_entry *entry, const void *h, size_t *unconsumed)
1765 {
1766 	/* Sanity checks: The largest `x` body I've ever heard of was
1767 	 * a little over 4MB.  So I doubt there has ever been a
1768 	 * well-formed archive with an `x` body over 1GiB.  Similarly,
1769 	 * it seems plausible that no single attribute has ever been
1770 	 * larger than 100MB.  So if we see a larger value here, it's
1771 	 * almost certainly a sign of a corrupted/malicious archive. */
1772 
1773 	/* Maximum sane size for extension body: 1 GiB */
1774 	/* This cannot be raised to larger than 8GiB without
1775 	 * exceeding the maximum size for a standard ustar
1776 	 * entry. */
1777 	const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024;
1778 	/* Maximum size for a single line/attr: 100 million characters */
1779 	/* This cannot be raised to more than 2GiB without exceeding
1780 	 * a `size_t` on 32-bit platforms. */
1781 	const size_t max_parsed_line_length = 99999999ULL;
1782 	/* Largest attribute prolog:  size + name. */
1783 	const size_t max_size_name = 512;
1784 
1785 	/* Size and padding of the full extension body */
1786 	int64_t ext_size, ext_padding;
1787 	size_t line_length, value_length, name_length;
1788 	ssize_t to_read, did_read;
1789 	const struct archive_entry_header_ustar *header;
1790 	const char *p, *attr_start, *name_start;
1791 	struct archive_string_conv *sconv;
1792 	struct archive_string *pas = NULL;
1793 	struct archive_string attr_name;
1794 	int err = ARCHIVE_OK, r;
1795 
1796 	header = (const struct archive_entry_header_ustar *)h;
1797 	ext_size  = tar_atol(header->size, sizeof(header->size));
1798 	if (ext_size > entry_limit) {
1799 		return (ARCHIVE_FATAL);
1800 	}
1801 	if (ext_size < 0) {
1802 	  archive_set_error(&a->archive, EINVAL,
1803 			    "pax extension header has invalid size: %lld",
1804 			    (long long)ext_size);
1805 	  return (ARCHIVE_FATAL);
1806 	}
1807 
1808 	ext_padding = 0x1ff & (-ext_size);
1809 	if (ext_size > ext_size_limit) {
1810 		/* Consume the pax extension body and return an error */
1811 		if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) {
1812 			return (ARCHIVE_FATAL);
1813 		}
1814 		archive_set_error(&a->archive, EINVAL,
1815 		    "Ignoring oversized pax extensions: %d > %d",
1816 		    (int)ext_size, (int)ext_size_limit);
1817 		return (ARCHIVE_WARN);
1818 	}
1819 	tar_flush_unconsumed(a, unconsumed);
1820 
1821 	/* Parse the size/name of each pax attribute in the body */
1822 	archive_string_init(&attr_name);
1823 	while (ext_size > 0) {
1824 		/* Read enough bytes to parse the size/name of the next attribute */
1825 		to_read = max_size_name;
1826 		if (to_read > ext_size) {
1827 			to_read = ext_size;
1828 		}
1829 		p = __archive_read_ahead(a, to_read, &did_read);
1830 		if (p == NULL) { /* EOF */
1831 			archive_set_error(&a->archive, EINVAL,
1832 					  "Truncated tar archive"
1833 					  " detected while reading pax attribute name");
1834 			return (ARCHIVE_FATAL);
1835 		}
1836 		if (did_read > ext_size) {
1837 			did_read = ext_size;
1838 		}
1839 
1840 		/* Parse size of attribute */
1841 		line_length = 0;
1842 		attr_start = p;
1843 		while (1) {
1844 			if (p >= attr_start + did_read) {
1845 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1846 						  "Ignoring malformed pax attributes: overlarge attribute size field");
1847 				*unconsumed += ext_size + ext_padding;
1848 				return (ARCHIVE_WARN);
1849 			}
1850 			if (*p == ' ') {
1851 				p++;
1852 				break;
1853 			}
1854 			if (*p < '0' || *p > '9') {
1855 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1856 						  "Ignoring malformed pax attributes: malformed attribute size field");
1857 				*unconsumed += ext_size + ext_padding;
1858 				return (ARCHIVE_WARN);
1859 			}
1860 			line_length *= 10;
1861 			line_length += *p - '0';
1862 			if (line_length > max_parsed_line_length) {
1863 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1864 						  "Ignoring malformed pax attribute: size > %lld",
1865 						  (long long)max_parsed_line_length);
1866 				*unconsumed += ext_size + ext_padding;
1867 				return (ARCHIVE_WARN);
1868 			}
1869 			p++;
1870 		}
1871 
1872 		if ((int64_t)line_length > ext_size) {
1873 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1874 						  "Ignoring malformed pax attribute:  %lld > %lld",
1875 						  (long long)line_length, (long long)ext_size);
1876 				*unconsumed += ext_size + ext_padding;
1877 				return (ARCHIVE_WARN);
1878 		}
1879 
1880 		/* Parse name of attribute */
1881 		if (p >= attr_start + did_read
1882 		    || p >= attr_start + line_length
1883 		    || *p == '=') {
1884 			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1885 					  "Ignoring malformed pax attributes: empty name found");
1886 			*unconsumed += ext_size + ext_padding;
1887 			return (ARCHIVE_WARN);
1888 		}
1889 		name_start = p;
1890 		while (1) {
1891 			if (p >= attr_start + did_read || p >= attr_start + line_length) {
1892 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1893 						  "Ignoring malformed pax attributes: overlarge attribute name");
1894 				*unconsumed += ext_size + ext_padding;
1895 				return (ARCHIVE_WARN);
1896 			}
1897 			if (*p == '=') {
1898 				break;
1899 			}
1900 			p++;
1901 		}
1902 		name_length = p - name_start;
1903 		p++; // Skip '='
1904 
1905 		// Save the name before we consume it
1906 		archive_strncpy(&attr_name, name_start, name_length);
1907 
1908 		ext_size -= p - attr_start;
1909 		value_length = line_length - (p - attr_start);
1910 
1911 		/* Consume size, name, and `=` */
1912 		*unconsumed += p - attr_start;
1913 		tar_flush_unconsumed(a, unconsumed);
1914 
1915 		/* pax_attribute will consume value_length - 1 */
1916 		r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed);
1917 		ext_size -= value_length - 1;
1918 
1919 		// Release the allocated attr_name (either here or before every return in this function)
1920 		archive_string_free(&attr_name);
1921 
1922 		if (r < ARCHIVE_WARN) {
1923 			*unconsumed += ext_size + ext_padding;
1924 			return (r);
1925 		}
1926 		err = err_combine(err, r);
1927 
1928 		/* Consume the `\n` that follows the pax attribute value. */
1929 		tar_flush_unconsumed(a, unconsumed);
1930 		p = __archive_read_ahead(a, 1, &did_read);
1931 		if (p == NULL) {
1932 			archive_set_error(&a->archive, EINVAL,
1933 					  "Truncated tar archive"
1934 					  " detected while completing pax attribute");
1935 			return (ARCHIVE_FATAL);
1936 		}
1937 		if (p[0] != '\n') {
1938 			archive_set_error(&a->archive, EINVAL,
1939 					  "Malformed pax attributes");
1940 			*unconsumed += ext_size + ext_padding;
1941 			return (ARCHIVE_WARN);
1942 		}
1943 		ext_size -= 1;
1944 		*unconsumed += 1;
1945 		tar_flush_unconsumed(a, unconsumed);
1946 	}
1947 	*unconsumed += ext_size + ext_padding;
1948 
1949 	/*
1950 	 * Some PAX values -- pathname, linkpath, uname, gname --
1951 	 * can't be copied into the entry until we know the character
1952 	 * set to use:
1953 	 */
1954 	if (!tar->pax_hdrcharset_utf8)
1955 		/* PAX specified "BINARY", so use the default charset */
1956 		sconv = tar->opt_sconv;
1957 	else {
1958 		/* PAX default UTF-8 */
1959 		sconv = archive_string_conversion_from_charset(
1960 		    &(a->archive), "UTF-8", 1);
1961 		if (sconv == NULL)
1962 			return (ARCHIVE_FATAL);
1963 		if (tar->compat_2x)
1964 			archive_string_conversion_set_opt(sconv,
1965 			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1966 	}
1967 
1968 	/* Pathname */
1969 	pas = NULL;
1970 	if (archive_strlen(&(tar->entry_pathname_override)) > 0) {
1971 		/* Prefer GNU.sparse.name attribute if present */
1972 		/* GNU sparse files store a fake name under the standard
1973 		 * "pathname" key. */
1974 		pas = &(tar->entry_pathname_override);
1975 	} else if (archive_strlen(&(tar->entry_pathname)) > 0) {
1976 		/* Use standard "pathname" PAX extension */
1977 		pas = &(tar->entry_pathname);
1978 	}
1979 	if (pas != NULL) {
1980 		if (archive_entry_copy_pathname_l(entry, pas->s,
1981 		    archive_strlen(pas), sconv) != 0) {
1982 			err = set_conversion_failed_error(a, sconv, "Pathname");
1983 			if (err == ARCHIVE_FATAL)
1984 				return (err);
1985 			/* Use raw name without conversion */
1986 			archive_entry_copy_pathname(entry, pas->s);
1987 		}
1988 	}
1989 	/* Uname */
1990 	if (archive_strlen(&(tar->entry_uname)) > 0) {
1991 		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1992 		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1993 			err = set_conversion_failed_error(a, sconv, "Uname");
1994 			if (err == ARCHIVE_FATAL)
1995 				return (err);
1996 			/* Use raw name without conversion */
1997 			archive_entry_copy_uname(entry, tar->entry_uname.s);
1998 		}
1999 	}
2000 	/* Gname */
2001 	if (archive_strlen(&(tar->entry_gname)) > 0) {
2002 		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
2003 		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
2004 			err = set_conversion_failed_error(a, sconv, "Gname");
2005 			if (err == ARCHIVE_FATAL)
2006 				return (err);
2007 			/* Use raw name without conversion */
2008 			archive_entry_copy_gname(entry, tar->entry_gname.s);
2009 		}
2010 	}
2011 	/* Linkpath */
2012 	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
2013 		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
2014 		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
2015 			err = set_conversion_failed_error(a, sconv, "Linkpath");
2016 			if (err == ARCHIVE_FATAL)
2017 				return (err);
2018 			/* Use raw name without conversion */
2019 			archive_entry_copy_link(entry, tar->entry_linkpath.s);
2020 		}
2021 	}
2022 
2023 	/* Extension may have given us a corrected `entry_bytes_remaining` for
2024 	 * the main entry; update the padding appropriately. */
2025 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2026 	return (err);
2027 }
2028 
2029 static int
pax_attribute_LIBARCHIVE_xattr(struct archive_entry * entry,const char * name,size_t name_length,const char * value,size_t value_length)2030 pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry,
2031 	const char *name, size_t name_length, const char *value, size_t value_length)
2032 {
2033 	char *name_decoded;
2034 	void *value_decoded;
2035 	size_t value_len;
2036 
2037 	if (name_length < 1)
2038 		return 3;
2039 
2040 	/* URL-decode name */
2041 	name_decoded = url_decode(name, name_length);
2042 	if (name_decoded == NULL)
2043 		return 2;
2044 
2045 	/* Base-64 decode value */
2046 	value_decoded = base64_decode(value, value_length, &value_len);
2047 	if (value_decoded == NULL) {
2048 		free(name_decoded);
2049 		return 1;
2050 	}
2051 
2052 	archive_entry_xattr_add_entry(entry, name_decoded,
2053 		value_decoded, value_len);
2054 
2055 	free(name_decoded);
2056 	free(value_decoded);
2057 	return 0;
2058 }
2059 
2060 static int
pax_attribute_SCHILY_xattr(struct archive_entry * entry,const char * name,size_t name_length,const char * value,size_t value_length)2061 pax_attribute_SCHILY_xattr(struct archive_entry *entry,
2062 	const char *name, size_t name_length, const char *value, size_t value_length)
2063 {
2064 	if (name_length < 1 || name_length > 128) {
2065 		return 1;
2066 	}
2067 
2068 	char * null_terminated_name = malloc(name_length + 1);
2069 	if (null_terminated_name != NULL) {
2070 		memcpy(null_terminated_name, name, name_length);
2071 		null_terminated_name[name_length] = '\0';
2072 		archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length);
2073 		free(null_terminated_name);
2074 	}
2075 
2076 	return 0;
2077 }
2078 
2079 static int
pax_attribute_RHT_security_selinux(struct archive_entry * entry,const char * value,size_t value_length)2080 pax_attribute_RHT_security_selinux(struct archive_entry *entry,
2081 	const char *value, size_t value_length)
2082 {
2083 	archive_entry_xattr_add_entry(entry, "security.selinux",
2084             value, value_length);
2085 
2086 	return 0;
2087 }
2088 
2089 static int
pax_attribute_SCHILY_acl(struct archive_read * a,struct tar * tar,struct archive_entry * entry,size_t value_length,int type)2090 pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar,
2091 	struct archive_entry *entry, size_t value_length, int type)
2092 {
2093 	int r;
2094 	const char *p;
2095 	const char* errstr;
2096 
2097 	switch (type) {
2098 	case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
2099 		errstr = "SCHILY.acl.access";
2100 		break;
2101 	case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
2102 		errstr = "SCHILY.acl.default";
2103 		break;
2104 	case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
2105 		errstr = "SCHILY.acl.ace";
2106 		break;
2107 	default:
2108 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2109 		    "Unknown ACL type: %d", type);
2110 		return(ARCHIVE_FATAL);
2111 	}
2112 
2113 	if (tar->sconv_acl == NULL) {
2114 		tar->sconv_acl =
2115 		    archive_string_conversion_from_charset(
2116 			&(a->archive), "UTF-8", 1);
2117 		if (tar->sconv_acl == NULL)
2118 			return (ARCHIVE_FATAL);
2119 	}
2120 
2121 	if (value_length > acl_limit) {
2122 		__archive_read_consume(a, value_length);
2123 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2124 				  "Unreasonably large ACL: %d > %d",
2125 				  (int)value_length, (int)acl_limit);
2126 		return (ARCHIVE_WARN);
2127 	}
2128 
2129 	p = __archive_read_ahead(a, value_length, NULL);
2130 	if (p == NULL) {
2131 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2132 				  "Truncated tar archive "
2133 				  "detected while reading ACL data");
2134 		return (ARCHIVE_FATAL);
2135 	}
2136 
2137 	r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length,
2138 	    type, tar->sconv_acl);
2139 	__archive_read_consume(a, value_length);
2140 	/* Workaround: Force perm_is_set() to be correct */
2141 	/* If this bit were stored in the ACL, this wouldn't be needed */
2142 	archive_entry_set_perm(entry, archive_entry_perm(entry));
2143 	if (r != ARCHIVE_OK) {
2144 		if (r == ARCHIVE_FATAL) {
2145 			archive_set_error(&a->archive, ENOMEM,
2146 			    "%s %s", "Can't allocate memory for ",
2147 			    errstr);
2148 			return (r);
2149 		}
2150 		archive_set_error(&a->archive,
2151 		    ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
2152 	}
2153 	return (r);
2154 }
2155 
2156 static int
pax_attribute_read_time(struct archive_read * a,size_t value_length,int64_t * ps,long * pn,size_t * unconsumed)2157 pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, size_t *unconsumed) {
2158 	struct archive_string as;
2159 	int r;
2160 
2161 	if (value_length > 128) {
2162 		__archive_read_consume(a, value_length);
2163 		*ps = 0;
2164 		*pn = 0;
2165 		return (ARCHIVE_FATAL);
2166 	}
2167 
2168 	archive_string_init(&as);
2169 	r = read_bytes_to_string(a, &as, value_length, unconsumed);
2170 	if (r < ARCHIVE_OK) {
2171 		archive_string_free(&as);
2172 		return (r);
2173 	}
2174 
2175 	pax_time(as.s, archive_strlen(&as), ps, pn);
2176 	archive_string_free(&as);
2177 	if (*ps < 0 || *ps == INT64_MAX) {
2178 		return (ARCHIVE_WARN);
2179 	}
2180 	return (ARCHIVE_OK);
2181 }
2182 
2183 static int
pax_attribute_read_number(struct archive_read * a,size_t value_length,int64_t * result)2184 pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) {
2185 	struct archive_string as;
2186 	size_t unconsumed = 0;
2187 	int r;
2188 
2189 	if (value_length > 64) {
2190 		__archive_read_consume(a, value_length);
2191 		*result = 0;
2192 		return (ARCHIVE_FATAL);
2193 	}
2194 
2195 	archive_string_init(&as);
2196 	r = read_bytes_to_string(a, &as, value_length, &unconsumed);
2197 	tar_flush_unconsumed(a, &unconsumed);
2198 	if (r < ARCHIVE_OK) {
2199 		archive_string_free(&as);
2200 		return (r);
2201 	}
2202 
2203 	*result = tar_atol10(as.s, archive_strlen(&as));
2204 	archive_string_free(&as);
2205 	if (*result < 0 || *result == INT64_MAX) {
2206 		*result = INT64_MAX;
2207 		return (ARCHIVE_WARN);
2208 	}
2209 	return (ARCHIVE_OK);
2210 }
2211 
2212 /*
2213  * Parse a single key=value attribute.
2214  *
2215  * POSIX reserves all-lowercase keywords.  Vendor-specific extensions
2216  * should always have keywords of the form "VENDOR.attribute" In
2217  * particular, it's quite feasible to support many different vendor
2218  * extensions here.  I'm using "LIBARCHIVE" for extensions unique to
2219  * this library.
2220  *
2221  * TODO: Investigate other vendor-specific extensions and see if
2222  * any of them look useful.
2223  */
2224 static int
pax_attribute(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const char * key,size_t key_length,size_t value_length,size_t * unconsumed)2225 pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry,
2226 	      const char *key, size_t key_length, size_t value_length, size_t *unconsumed)
2227 {
2228 	int64_t t;
2229 	long n;
2230 	const char *p;
2231 	ssize_t bytes_read;
2232 	int err = ARCHIVE_OK;
2233 
2234 	switch (key[0]) {
2235 	case 'G':
2236 		/* GNU.* extensions */
2237 		if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) {
2238 			key += 4;
2239 			key_length -= 4;
2240 
2241 			/* GNU.sparse marks the existence of GNU sparse information */
2242 			if (key_length == 6 && memcmp(key, "sparse", 6) == 0) {
2243 				tar->sparse_gnu_attributes_seen = 1;
2244 			}
2245 
2246 			/* GNU.sparse.* extensions */
2247 			else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) {
2248 				tar->sparse_gnu_attributes_seen = 1;
2249 				key += 7;
2250 				key_length -= 7;
2251 
2252 				/* GNU "0.0" sparse pax format. */
2253 				if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) {
2254 					/* GNU.sparse.numblocks */
2255 					tar->sparse_offset = -1;
2256 					tar->sparse_numbytes = -1;
2257 					tar->sparse_gnu_major = 0;
2258 					tar->sparse_gnu_minor = 0;
2259 				}
2260 				else if (key_length == 6 && memcmp(key, "offset", 6) == 0) {
2261 					/* GNU.sparse.offset */
2262 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2263 						tar->sparse_offset = t;
2264 						if (tar->sparse_numbytes != -1) {
2265 							if (gnu_add_sparse_entry(a, tar,
2266 									 tar->sparse_offset, tar->sparse_numbytes)
2267 							    != ARCHIVE_OK)
2268 								return (ARCHIVE_FATAL);
2269 							tar->sparse_offset = -1;
2270 							tar->sparse_numbytes = -1;
2271 						}
2272 					}
2273 					return (err);
2274 				}
2275 				else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) {
2276 					/* GNU.sparse.numbytes */
2277 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2278 						tar->sparse_numbytes = t;
2279 						if (tar->sparse_offset != -1) {
2280 							if (gnu_add_sparse_entry(a, tar,
2281 									 tar->sparse_offset, tar->sparse_numbytes)
2282 							    != ARCHIVE_OK)
2283 								return (ARCHIVE_FATAL);
2284 							tar->sparse_offset = -1;
2285 							tar->sparse_numbytes = -1;
2286 						}
2287 					}
2288 					return (err);
2289 				}
2290 				else if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2291 					/* GNU.sparse.size */
2292 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2293 						tar->realsize = t;
2294 						archive_entry_set_size(entry, tar->realsize);
2295 						tar->realsize_override = 1;
2296 					}
2297 					return (err);
2298 				}
2299 
2300 				/* GNU "0.1" sparse pax format. */
2301 				else if (key_length == 3 && memcmp(key, "map", 3) == 0) {
2302 					/* GNU.sparse.map */
2303 					tar->sparse_gnu_major = 0;
2304 					tar->sparse_gnu_minor = 1;
2305 					if (value_length > sparse_map_limit) {
2306 						archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2307 								  "Unreasonably large sparse map: %d > %d",
2308 								  (int)value_length, (int)sparse_map_limit);
2309 						err = ARCHIVE_FAILED;
2310 					} else {
2311 						p = __archive_read_ahead(a, value_length, &bytes_read);
2312 						if (p == NULL) {
2313 							archive_set_error(&a->archive, EINVAL,
2314 									  "Truncated archive"
2315 									  " detected while reading GNU sparse data");
2316 							return (ARCHIVE_FATAL);
2317 						}
2318 						if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) {
2319 							err = ARCHIVE_WARN;
2320 						}
2321 					}
2322 					__archive_read_consume(a, value_length);
2323 					return (err);
2324 				}
2325 
2326 				/* GNU "1.0" sparse pax format */
2327 				else if (key_length == 5 && memcmp(key, "major", 5) == 0) {
2328 					/* GNU.sparse.major */
2329 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2330 					    && t >= 0
2331 					    && t <= 10) {
2332 						tar->sparse_gnu_major = (int)t;
2333 					}
2334 					return (err);
2335 				}
2336 				else if (key_length == 5 && memcmp(key, "minor", 5) == 0) {
2337 					/* GNU.sparse.minor */
2338 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2339 					    && t >= 0
2340 					    && t <= 10) {
2341 						tar->sparse_gnu_minor = (int)t;
2342 					}
2343 					return (err);
2344 				}
2345 				else if (key_length == 4 && memcmp(key, "name", 4) == 0) {
2346 					/* GNU.sparse.name */
2347 					/*
2348 					 * The real filename; when storing sparse
2349 					 * files, GNU tar puts a synthesized name into
2350 					 * the regular 'path' attribute in an attempt
2351 					 * to limit confusion. ;-)
2352 					 */
2353 					if (value_length > pathname_limit) {
2354 						*unconsumed += value_length;
2355 						err = ARCHIVE_WARN;
2356 					} else {
2357 						err = read_bytes_to_string(a, &(tar->entry_pathname_override),
2358 									   value_length, unconsumed);
2359 					}
2360 					return (err);
2361 				}
2362 				else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2363 					/* GNU.sparse.realsize */
2364 					if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2365 						tar->realsize = t;
2366 						archive_entry_set_size(entry, tar->realsize);
2367 						tar->realsize_override = 1;
2368 					}
2369 					return (err);
2370 				}
2371 			}
2372 		}
2373 		break;
2374 	case 'L':
2375 		/* LIBARCHIVE extensions */
2376 		if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) {
2377 			key_length -= 11;
2378 			key += 11;
2379 
2380 			/* TODO: Handle arbitrary extended attributes... */
2381 			/*
2382 			  if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
2383 				  archive_entry_set_xxxxxx(entry, value);
2384 			*/
2385 			if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) {
2386 				/* LIBARCHIVE.creationtime */
2387 				if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2388 					archive_entry_set_birthtime(entry, t, n);
2389 				}
2390 				return (err);
2391 			}
2392 			else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) {
2393 				/* LIBARCHIVE.symlinktype */
2394 				if (value_length < 16) {
2395 					p = __archive_read_ahead(a, value_length, &bytes_read);
2396 					if (p == NULL) {
2397 						archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2398 								  "Truncated tar archive "
2399 								  "detected while reading `symlinktype` attribute");
2400 						return (ARCHIVE_FATAL);
2401 					}
2402 					if (value_length == 4 && memcmp(p, "file", 4) == 0) {
2403 						archive_entry_set_symlink_type(entry,
2404 									       AE_SYMLINK_TYPE_FILE);
2405 					} else if (value_length == 3 && memcmp(p, "dir", 3) == 0) {
2406 							archive_entry_set_symlink_type(entry,
2407 										       AE_SYMLINK_TYPE_DIRECTORY);
2408 					} else {
2409 						archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2410 								  "Unrecognized symlink type");
2411 						err = ARCHIVE_WARN;
2412 					}
2413 				} else {
2414 					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2415 							  "symlink type is very long"
2416 							  "(longest recognized value is 4 bytes, this is %d)",
2417 							  (int)value_length);
2418 					err = ARCHIVE_WARN;
2419 				}
2420 				__archive_read_consume(a, value_length);
2421 				return (err);
2422 			}
2423 			else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2424 				key_length -= 6;
2425 				key += 6;
2426 				if (value_length > xattr_limit) {
2427 					err = ARCHIVE_WARN;
2428 				} else {
2429 					p = __archive_read_ahead(a, value_length, &bytes_read);
2430 					if (p == NULL) {
2431 						archive_set_error(&a->archive, EINVAL,
2432 								  "Truncated archive"
2433 								  " detected while reading xattr information");
2434 						return (ARCHIVE_FATAL);
2435 					}
2436 					if (pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) {
2437 						/* TODO: Unable to parse xattr */
2438 						err = ARCHIVE_WARN;
2439 					}
2440 				}
2441 				__archive_read_consume(a, value_length);
2442 				return (err);
2443 			}
2444 		}
2445 		break;
2446 	case 'R':
2447 		/* GNU tar uses RHT.security header to store SELinux xattrs
2448 		 * SCHILY.xattr.security.selinux == RHT.security.selinux */
2449 		if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) {
2450 			if (value_length > xattr_limit) {
2451 				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2452 						  "Ignoring unreasonably large security.selinux attribute:"
2453 						  " %d > %d",
2454 						  (int)value_length, (int)xattr_limit);
2455 				/* TODO: Should this be FAILED instead? */
2456 				err = ARCHIVE_WARN;
2457 			} else {
2458 				p = __archive_read_ahead(a, value_length, &bytes_read);
2459 				if (p == NULL) {
2460 					archive_set_error(&a->archive, EINVAL,
2461 							  "Truncated archive"
2462 							  " detected while reading selinux data");
2463 					return (ARCHIVE_FATAL);
2464 				}
2465 				if (pax_attribute_RHT_security_selinux(entry, p, value_length)) {
2466 					/* TODO: Unable to parse xattr */
2467 					err = ARCHIVE_WARN;
2468 				}
2469 			}
2470 			__archive_read_consume(a, value_length);
2471 			return (err);
2472 		}
2473 		break;
2474 	case 'S':
2475 		/* SCHILY.* extensions used by "star" archiver */
2476 		if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) {
2477 			key_length -= 7;
2478 			key += 7;
2479 
2480 			if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) {
2481 				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2482 						      ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
2483 				// TODO: Mark mode as set
2484 				return (err);
2485 			}
2486 			else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) {
2487 				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2488 						      ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
2489 				return (err);
2490 			}
2491 			else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) {
2492 				err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2493 						      ARCHIVE_ENTRY_ACL_TYPE_NFS4);
2494 				// TODO: Mark mode as set
2495 				return (err);
2496 			}
2497 			else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) {
2498 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2499 					archive_entry_set_rdevmajor(entry, (dev_t)t);
2500 				}
2501 				return (err);
2502 			}
2503 			else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) {
2504 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2505 					archive_entry_set_rdevminor(entry, (dev_t)t);
2506 				}
2507 				return (err);
2508 			}
2509 			else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) {
2510 				if (value_length < fflags_limit) {
2511 					p = __archive_read_ahead(a, value_length, &bytes_read);
2512 					if (p == NULL) {
2513 						/* Truncated archive */
2514 						archive_set_error(&a->archive, EINVAL,
2515 								  "Truncated archive"
2516 								  " detected while reading SCHILY.fflags");
2517 						return (ARCHIVE_FATAL);
2518 					}
2519 					archive_entry_copy_fflags_text_len(entry, p, value_length);
2520 					err = ARCHIVE_OK;
2521 				} else {
2522 					/* Overlong fflags field */
2523 					err = ARCHIVE_WARN;
2524 				}
2525 				__archive_read_consume(a, value_length);
2526 				return (err);
2527 			}
2528 			else if (key_length == 3 && memcmp(key, "dev", 3) == 0) {
2529 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2530 					archive_entry_set_dev(entry, (dev_t)t);
2531 				}
2532 				return (err);
2533 			}
2534 			else if (key_length == 3 && memcmp(key, "ino", 3) == 0) {
2535 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2536 					archive_entry_set_ino(entry, t);
2537 				}
2538 				return (err);
2539 			}
2540 			else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) {
2541 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2542 					archive_entry_set_nlink(entry, (unsigned int)t);
2543 				}
2544 				return (err);
2545 			}
2546 			else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2547 				if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2548 					tar->realsize = t;
2549 					tar->realsize_override = 1;
2550 					archive_entry_set_size(entry, tar->realsize);
2551 				}
2552 				return (err);
2553 			}
2554 			else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2555 				key_length -= 6;
2556 				key += 6;
2557 				if (value_length < xattr_limit) {
2558 					p = __archive_read_ahead(a, value_length, &bytes_read);
2559 					if (p == NULL) {
2560 						archive_set_error(&a->archive, EINVAL,
2561 								  "Truncated archive"
2562 								  " detected while reading SCHILY.xattr");
2563 						return (ARCHIVE_FATAL);
2564 					}
2565 					if (pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) {
2566 						/* TODO: Unable to parse xattr */
2567 						err = ARCHIVE_WARN;
2568 					}
2569 				} else {
2570 					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2571 							  "Unreasonably large xattr: %d > %d",
2572 							  (int)value_length, (int)xattr_limit);
2573 					err = ARCHIVE_WARN;
2574 				}
2575 				__archive_read_consume(a, value_length);
2576 				return (err);
2577 			}
2578 		}
2579 		/* SUN.* extensions from Solaris tar */
2580 		if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) {
2581 			key_length -= 4;
2582 			key += 4;
2583 
2584 			if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) {
2585 				/* SUN.holesdata */
2586 				if (value_length < sparse_map_limit) {
2587 					p = __archive_read_ahead(a, value_length, &bytes_read);
2588 					if (p == NULL) {
2589 						archive_set_error(&a->archive, EINVAL,
2590 								  "Truncated archive"
2591 								  " detected while reading SUN.holesdata");
2592 						return (ARCHIVE_FATAL);
2593 					}
2594 					err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length);
2595 					if (err < ARCHIVE_OK) {
2596 						archive_set_error(&a->archive,
2597 								  ARCHIVE_ERRNO_MISC,
2598 								  "Parse error: SUN.holesdata");
2599 					}
2600 				} else {
2601 					archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2602 							  "Unreasonably large sparse map: %d > %d",
2603 							  (int)value_length, (int)sparse_map_limit);
2604 					err = ARCHIVE_FAILED;
2605 				}
2606 				__archive_read_consume(a, value_length);
2607 				return (err);
2608 			}
2609 		}
2610 		break;
2611 	case 'a':
2612 		if (key_length == 5 && memcmp(key, "atime", 5) == 0) {
2613 			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2614 				archive_entry_set_atime(entry, t, n);
2615 			}
2616 			return (err);
2617 		}
2618 		break;
2619 	case 'c':
2620 		if (key_length == 5 && memcmp(key, "ctime", 5) == 0) {
2621 			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2622 				archive_entry_set_ctime(entry, t, n);
2623 			}
2624 			return (err);
2625 		} else if (key_length == 7 && memcmp(key, "charset", 7) == 0) {
2626 			/* TODO: Publish charset information in entry. */
2627 		} else if (key_length == 7 && memcmp(key, "comment", 7) == 0) {
2628 			/* TODO: Publish comment in entry. */
2629 		}
2630 		break;
2631 	case 'g':
2632 		if (key_length == 3 && memcmp(key, "gid", 3) == 0) {
2633 			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2634 				archive_entry_set_gid(entry, t);
2635 			}
2636 			return (err);
2637 		} else if (key_length == 5 && memcmp(key, "gname", 5) == 0) {
2638 			if (value_length > guname_limit) {
2639 				*unconsumed += value_length;
2640 				err = ARCHIVE_WARN;
2641 			} else {
2642 				err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed);
2643 			}
2644 			return (err);
2645 		}
2646 		break;
2647 	case 'h':
2648 		if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) {
2649 			if (value_length < 64) {
2650 				p = __archive_read_ahead(a, value_length, &bytes_read);
2651 				if (p == NULL) {
2652 					archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2653 							  "Truncated tar archive "
2654 							  "detected while reading hdrcharset attribute");
2655 					return (ARCHIVE_FATAL);
2656 				}
2657 				if (value_length == 6
2658 				    && memcmp(p, "BINARY", 6) == 0) {
2659 					/* Binary  mode. */
2660 					tar->pax_hdrcharset_utf8 = 0;
2661 					err = ARCHIVE_OK;
2662 				} else if (value_length == 23
2663 					   && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) {
2664 					tar->pax_hdrcharset_utf8 = 1;
2665 					err = ARCHIVE_OK;
2666 				} else {
2667 					/* TODO: Unrecognized character set */
2668 					err  = ARCHIVE_WARN;
2669 				}
2670 			} else {
2671 				archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2672 						  "hdrcharset attribute is unreasonably large (%d bytes)",
2673 						  (int)value_length);
2674 				err = ARCHIVE_WARN;
2675 			}
2676 			__archive_read_consume(a, value_length);
2677 			return (err);
2678 		}
2679 		break;
2680 	case 'l':
2681 		/* pax interchange doesn't distinguish hardlink vs. symlink. */
2682 		if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) {
2683 			if (value_length > pathname_limit) {
2684 				*unconsumed += value_length;
2685 				err = ARCHIVE_WARN;
2686 			} else {
2687 				err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed);
2688 			}
2689 			return (err);
2690 		}
2691 		break;
2692 	case 'm':
2693 		if (key_length == 5 && memcmp(key, "mtime", 5) == 0) {
2694 			if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2695 				archive_entry_set_mtime(entry, t, n);
2696 			}
2697 			return (err);
2698 		}
2699 		break;
2700 	case 'p':
2701 		if (key_length == 4 && memcmp(key, "path", 4) == 0) {
2702 			if (value_length > pathname_limit) {
2703 				*unconsumed += value_length;
2704 				err = ARCHIVE_WARN;
2705 			} else {
2706 				err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed);
2707 			}
2708 			return (err);
2709 		}
2710 		break;
2711 	case 'r':
2712 		/* POSIX has reserved 'realtime.*' */
2713 		break;
2714 	case 's':
2715 		/* POSIX has reserved 'security.*' */
2716 		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
2717 		if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2718 			/* "size" is the size of the data in the entry. */
2719 			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2720 				tar->entry_bytes_remaining = t;
2721 				/*
2722 				 * The "size" pax header keyword always overrides the
2723 				 * "size" field in the tar header.
2724 				 * GNU.sparse.realsize, GNU.sparse.size and
2725 				 * SCHILY.realsize override this value.
2726 				 */
2727 				if (!tar->realsize_override) {
2728 					archive_entry_set_size(entry,
2729 							       tar->entry_bytes_remaining);
2730 					tar->realsize
2731 						= tar->entry_bytes_remaining;
2732 				}
2733 			}
2734 			else if (t == INT64_MAX) {
2735 				/* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */
2736 				tar->entry_bytes_remaining = 0;
2737 				archive_set_error(&a->archive,
2738 				    ARCHIVE_ERRNO_MISC,
2739 				    "Tar size attribute overflow");
2740 				return (ARCHIVE_FATAL);
2741 			}
2742 			return (err);
2743 		}
2744 		break;
2745 	case 'u':
2746 		if (key_length == 3 && memcmp(key, "uid", 3) == 0) {
2747 			if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2748 				archive_entry_set_uid(entry, t);
2749 			}
2750 			return (err);
2751 		} else if (key_length == 5 && memcmp(key, "uname", 5) == 0) {
2752 			if (value_length > guname_limit) {
2753 				*unconsumed += value_length;
2754 				err = ARCHIVE_WARN;
2755 			} else {
2756 				err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed);
2757 			}
2758 			return (err);
2759 		}
2760 		break;
2761 	}
2762 
2763 	/* Unrecognized key, just skip the entire value. */
2764 	__archive_read_consume(a, value_length);
2765 	return (err);
2766 }
2767 
2768 
2769 
2770 /*
2771  * parse a decimal time value, which may include a fractional portion
2772  */
2773 static void
pax_time(const char * p,size_t length,int64_t * ps,long * pn)2774 pax_time(const char *p, size_t length, int64_t *ps, long *pn)
2775 {
2776 	char digit;
2777 	int64_t	s;
2778 	unsigned long l;
2779 	int sign;
2780 	int64_t limit, last_digit_limit;
2781 
2782 	limit = INT64_MAX / 10;
2783 	last_digit_limit = INT64_MAX % 10;
2784 
2785 	if (length <= 0) {
2786 		*ps = 0;
2787 		return;
2788 	}
2789 	s = 0;
2790 	sign = 1;
2791 	if (*p == '-') {
2792 		sign = -1;
2793 		p++;
2794 		length--;
2795 	}
2796 	while (length > 0 && *p >= '0' && *p <= '9') {
2797 		digit = *p - '0';
2798 		if (s > limit ||
2799 		    (s == limit && digit > last_digit_limit)) {
2800 			s = INT64_MAX;
2801 			break;
2802 		}
2803 		s = (s * 10) + digit;
2804 		++p;
2805 		--length;
2806 	}
2807 
2808 	*ps = s * sign;
2809 
2810 	/* Calculate nanoseconds. */
2811 	*pn = 0;
2812 
2813 	if (length <= 0 || *p != '.')
2814 		return;
2815 
2816 	l = 100000000UL;
2817 	do {
2818 		++p;
2819 		--length;
2820 		if (length > 0 && *p >= '0' && *p <= '9')
2821 			*pn += (*p - '0') * l;
2822 		else
2823 			break;
2824 	} while (l /= 10);
2825 }
2826 
2827 /*
2828  * Parse GNU tar header
2829  */
2830 static int
header_gnutar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,size_t * unconsumed)2831 header_gnutar(struct archive_read *a, struct tar *tar,
2832     struct archive_entry *entry, const void *h, size_t *unconsumed)
2833 {
2834 	const struct archive_entry_header_gnutar *header;
2835 	int64_t t;
2836 	int err = ARCHIVE_OK;
2837 
2838 	/*
2839 	 * GNU header is like POSIX ustar, except 'prefix' is
2840 	 * replaced with some other fields. This also means the
2841 	 * filename is stored as in old-style archives.
2842 	 */
2843 
2844 	/* Grab fields common to all tar variants. */
2845 	err = header_common(a, tar, entry, h);
2846 	if (err == ARCHIVE_FATAL)
2847 		return (err);
2848 
2849 	/* Copy filename over (to ensure null termination). */
2850 	header = (const struct archive_entry_header_gnutar *)h;
2851 	const char *existing_pathname = archive_entry_pathname(entry);
2852 	if (existing_pathname == NULL || existing_pathname[0] == '\0') {
2853 		if (archive_entry_copy_pathname_l(entry,
2854 		    header->name, sizeof(header->name), tar->sconv) != 0) {
2855 			err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2856 			if (err == ARCHIVE_FATAL)
2857 				return (err);
2858 		}
2859 	}
2860 
2861 	/* Fields common to ustar and GNU */
2862 	/* XXX Can the following be factored out since it's common
2863 	 * to ustar and gnu tar?  Is it okay to move it down into
2864 	 * header_common, perhaps?  */
2865 	const char *existing_uname = archive_entry_uname(entry);
2866 	if (existing_uname == NULL || existing_uname[0] == '\0') {
2867 		if (archive_entry_copy_uname_l(entry,
2868 		    header->uname, sizeof(header->uname), tar->sconv) != 0) {
2869 			err = set_conversion_failed_error(a, tar->sconv, "Uname");
2870 			if (err == ARCHIVE_FATAL)
2871 				return (err);
2872 		}
2873 	}
2874 
2875 	const char *existing_gname = archive_entry_gname(entry);
2876 	if (existing_gname == NULL || existing_gname[0] == '\0') {
2877 		if (archive_entry_copy_gname_l(entry,
2878 		    header->gname, sizeof(header->gname), tar->sconv) != 0) {
2879 			err = set_conversion_failed_error(a, tar->sconv, "Gname");
2880 			if (err == ARCHIVE_FATAL)
2881 				return (err);
2882 		}
2883 	}
2884 
2885 	/* Parse out device numbers only for char and block specials */
2886 	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2887 		if (!archive_entry_rdev_is_set(entry)) {
2888 			archive_entry_set_rdevmajor(entry, (dev_t)
2889 			    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2890 			archive_entry_set_rdevminor(entry, (dev_t)
2891 			    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2892 		}
2893 	} else {
2894 		archive_entry_set_rdev(entry, 0);
2895 	}
2896 
2897 	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2898 
2899 	/* Grab GNU-specific fields. */
2900 	if (!archive_entry_atime_is_set(entry)) {
2901 		t = tar_atol(header->atime, sizeof(header->atime));
2902 		if (t > 0)
2903 			archive_entry_set_atime(entry, t, 0);
2904 	}
2905 	if (!archive_entry_ctime_is_set(entry)) {
2906 		t = tar_atol(header->ctime, sizeof(header->ctime));
2907 		if (t > 0)
2908 			archive_entry_set_ctime(entry, t, 0);
2909 	}
2910 
2911 	if (header->realsize[0] != 0) {
2912 		tar->realsize
2913 		    = tar_atol(header->realsize, sizeof(header->realsize));
2914 		archive_entry_set_size(entry, tar->realsize);
2915 		tar->realsize_override = 1;
2916 	}
2917 
2918 	if (header->sparse[0].offset[0] != 0) {
2919 		if (gnu_sparse_old_read(a, tar, header, unconsumed)
2920 		    != ARCHIVE_OK)
2921 			return (ARCHIVE_FATAL);
2922 	} else {
2923 		if (header->isextended[0] != 0) {
2924 			/* XXX WTF? XXX */
2925 		}
2926 	}
2927 
2928 	return (err);
2929 }
2930 
2931 static int
gnu_add_sparse_entry(struct archive_read * a,struct tar * tar,int64_t offset,int64_t remaining)2932 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2933     int64_t offset, int64_t remaining)
2934 {
2935 	struct sparse_block *p;
2936 
2937 	p = calloc(1, sizeof(*p));
2938 	if (p == NULL) {
2939 		archive_set_error(&a->archive, ENOMEM, "Out of memory");
2940 		return (ARCHIVE_FATAL);
2941 	}
2942 	if (tar->sparse_last != NULL)
2943 		tar->sparse_last->next = p;
2944 	else
2945 		tar->sparse_list = p;
2946 	tar->sparse_last = p;
2947 	if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) {
2948 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
2949 		return (ARCHIVE_FATAL);
2950 	}
2951 	p->offset = offset;
2952 	p->remaining = remaining;
2953 	return (ARCHIVE_OK);
2954 }
2955 
2956 static void
gnu_clear_sparse_list(struct tar * tar)2957 gnu_clear_sparse_list(struct tar *tar)
2958 {
2959 	struct sparse_block *p;
2960 
2961 	while (tar->sparse_list != NULL) {
2962 		p = tar->sparse_list;
2963 		tar->sparse_list = p->next;
2964 		free(p);
2965 	}
2966 	tar->sparse_last = NULL;
2967 }
2968 
2969 /*
2970  * GNU tar old-format sparse data.
2971  *
2972  * GNU old-format sparse data is stored in a fixed-field
2973  * format.  Offset/size values are 11-byte octal fields (same
2974  * format as 'size' field in ustart header).  These are
2975  * stored in the header, allocating subsequent header blocks
2976  * as needed.  Extending the header in this way is a pretty
2977  * severe POSIX violation; this design has earned GNU tar a
2978  * lot of criticism.
2979  */
2980 
2981 static int
gnu_sparse_old_read(struct archive_read * a,struct tar * tar,const struct archive_entry_header_gnutar * header,size_t * unconsumed)2982 gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2983     const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2984 {
2985 	ssize_t bytes_read;
2986 	const void *data;
2987 	struct extended {
2988 		struct gnu_sparse sparse[21];
2989 		char	isextended[1];
2990 		char	padding[7];
2991 	};
2992 	const struct extended *ext;
2993 
2994 	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2995 		return (ARCHIVE_FATAL);
2996 	if (header->isextended[0] == 0)
2997 		return (ARCHIVE_OK);
2998 
2999 	do {
3000 		tar_flush_unconsumed(a, unconsumed);
3001 		data = __archive_read_ahead(a, 512, &bytes_read);
3002 		if (data == NULL) {
3003 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3004 			    "Truncated tar archive "
3005 			    "detected while reading sparse file data");
3006 			return (ARCHIVE_FATAL);
3007 		}
3008 		*unconsumed = 512;
3009 		ext = (const struct extended *)data;
3010 		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
3011 			return (ARCHIVE_FATAL);
3012 	} while (ext->isextended[0] != 0);
3013 	if (tar->sparse_list != NULL)
3014 		tar->entry_offset = tar->sparse_list->offset;
3015 	return (ARCHIVE_OK);
3016 }
3017 
3018 static int
gnu_sparse_old_parse(struct archive_read * a,struct tar * tar,const struct gnu_sparse * sparse,int length)3019 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
3020     const struct gnu_sparse *sparse, int length)
3021 {
3022 	while (length > 0 && sparse->offset[0] != 0) {
3023 		if (gnu_add_sparse_entry(a, tar,
3024 		    tar_atol(sparse->offset, sizeof(sparse->offset)),
3025 		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
3026 		    != ARCHIVE_OK)
3027 			return (ARCHIVE_FATAL);
3028 		sparse++;
3029 		length--;
3030 	}
3031 	return (ARCHIVE_OK);
3032 }
3033 
3034 /*
3035  * GNU tar sparse format 0.0
3036  *
3037  * Beginning with GNU tar 1.15, sparse files are stored using
3038  * information in the pax extended header.  The GNU tar maintainers
3039  * have gone through a number of variations in the process of working
3040  * out this scheme; fortunately, they're all numbered.
3041  *
3042  * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
3043  * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
3044  * store offset/size for each block.  The repeated instances of these
3045  * latter fields violate the pax specification (which frowns on
3046  * duplicate keys), so this format was quickly replaced.
3047  */
3048 
3049 /*
3050  * GNU tar sparse format 0.1
3051  *
3052  * This version replaced the offset/numbytes attributes with
3053  * a single "map" attribute that stored a list of integers.  This
3054  * format had two problems: First, the "map" attribute could be very
3055  * long, which caused problems for some implementations.  More
3056  * importantly, the sparse data was lost when extracted by archivers
3057  * that didn't recognize this extension.
3058  */
3059 static int
gnu_sparse_01_parse(struct archive_read * a,struct tar * tar,const char * p,size_t length)3060 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length)
3061 {
3062 	const char *e;
3063 	int64_t offset = -1, size = -1;
3064 
3065 	for (;;) {
3066 		e = p;
3067 		while (length > 0 && *e != ',') {
3068 			if (*e < '0' || *e > '9')
3069 				return (ARCHIVE_WARN);
3070 			e++;
3071 			length--;
3072 		}
3073 		if (offset < 0) {
3074 			offset = tar_atol10(p, e - p);
3075 			if (offset < 0)
3076 				return (ARCHIVE_WARN);
3077 		} else {
3078 			size = tar_atol10(p, e - p);
3079 			if (size < 0)
3080 				return (ARCHIVE_WARN);
3081 			if (gnu_add_sparse_entry(a, tar, offset, size)
3082 			    != ARCHIVE_OK)
3083 				return (ARCHIVE_FATAL);
3084 			offset = -1;
3085 		}
3086 		if (length == 0)
3087 			return (ARCHIVE_OK);
3088 		p = e + 1;
3089 		length--;
3090 	}
3091 }
3092 
3093 /*
3094  * GNU tar sparse format 1.0
3095  *
3096  * The idea: The offset/size data is stored as a series of base-10
3097  * ASCII numbers prepended to the file data, so that dearchivers that
3098  * don't support this format will extract the block map along with the
3099  * data and a separate post-process can restore the sparseness.
3100  *
3101  * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
3102  * padding to the body of the file when using this format.  GNU tar
3103  * 1.17 corrected this bug without bumping the version number, so
3104  * it's not possible to support both variants.  This code supports
3105  * the later variant at the expense of not supporting the former.
3106  *
3107  * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
3108  * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
3109  */
3110 
3111 /*
3112  * Read the next line from the input, and parse it as a decimal
3113  * integer followed by '\n'.  Returns positive integer value or
3114  * negative on error.
3115  */
3116 static int64_t
gnu_sparse_10_atol(struct archive_read * a,struct tar * tar,int64_t * remaining,size_t * unconsumed)3117 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
3118     int64_t *remaining, size_t *unconsumed)
3119 {
3120 	int64_t l, limit, last_digit_limit;
3121 	const char *p;
3122 	ssize_t bytes_read;
3123 	int base, digit;
3124 
3125 	base = 10;
3126 	limit = INT64_MAX / base;
3127 	last_digit_limit = INT64_MAX % base;
3128 
3129 	/*
3130 	 * Skip any lines starting with '#'; GNU tar specs
3131 	 * don't require this, but they should.
3132 	 */
3133 	do {
3134 		bytes_read = readline(a, tar, &p,
3135 			(ssize_t)tar_min(*remaining, 100), unconsumed);
3136 		if (bytes_read <= 0)
3137 			return (ARCHIVE_FATAL);
3138 		*remaining -= bytes_read;
3139 	} while (p[0] == '#');
3140 
3141 	l = 0;
3142 	while (bytes_read > 0) {
3143 		if (*p == '\n')
3144 			return (l);
3145 		if (*p < '0' || *p >= '0' + base)
3146 			return (ARCHIVE_WARN);
3147 		digit = *p - '0';
3148 		if (l > limit || (l == limit && digit > last_digit_limit))
3149 			l = INT64_MAX; /* Truncate on overflow. */
3150 		else
3151 			l = (l * base) + digit;
3152 		p++;
3153 		bytes_read--;
3154 	}
3155 	/* TODO: Error message. */
3156 	return (ARCHIVE_WARN);
3157 }
3158 
3159 /*
3160  * Returns length (in bytes) of the sparse data description
3161  * that was read.
3162  */
3163 static ssize_t
gnu_sparse_10_read(struct archive_read * a,struct tar * tar,size_t * unconsumed)3164 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
3165 {
3166 	ssize_t bytes_read;
3167 	int entries;
3168 	int64_t offset, size, to_skip, remaining;
3169 
3170 	/* Clear out the existing sparse list. */
3171 	gnu_clear_sparse_list(tar);
3172 
3173 	remaining = tar->entry_bytes_remaining;
3174 
3175 	/* Parse entries. */
3176 	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3177 	if (entries < 0)
3178 		return (ARCHIVE_FATAL);
3179 	/* Parse the individual entries. */
3180 	while (entries-- > 0) {
3181 		/* Parse offset/size */
3182 		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3183 		if (offset < 0)
3184 			return (ARCHIVE_FATAL);
3185 		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3186 		if (size < 0)
3187 			return (ARCHIVE_FATAL);
3188 		/* Add a new sparse entry. */
3189 		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
3190 			return (ARCHIVE_FATAL);
3191 	}
3192 	/* Skip rest of block... */
3193 	tar_flush_unconsumed(a, unconsumed);
3194 	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
3195 	to_skip = 0x1ff & -bytes_read;
3196 	/* Fail if tar->entry_bytes_remaing would get negative */
3197 	if (to_skip > remaining)
3198 		return (ARCHIVE_FATAL);
3199 	if (to_skip != __archive_read_consume(a, to_skip))
3200 		return (ARCHIVE_FATAL);
3201 	return ((ssize_t)(bytes_read + to_skip));
3202 }
3203 
3204 /*
3205  * Solaris pax extension for a sparse file. This is recorded with the
3206  * data and hole pairs. The way recording sparse information by Solaris'
3207  * pax simply indicates where data and sparse are, so the stored contents
3208  * consist of both data and hole.
3209  */
3210 static int
pax_attribute_SUN_holesdata(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const char * p,size_t length)3211 pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar,
3212 	struct archive_entry *entry, const char *p, size_t length)
3213 {
3214 	const char *e;
3215 	int64_t start, end;
3216 	int hole = 1;
3217 
3218 	(void)entry; /* UNUSED */
3219 
3220 	end = 0;
3221 	if (length <= 0)
3222 		return (ARCHIVE_WARN);
3223 	if (*p == ' ') {
3224 		p++;
3225 		length--;
3226 	} else {
3227 		return (ARCHIVE_WARN);
3228 	}
3229 	for (;;) {
3230 		e = p;
3231 		while (length > 0 && *e != ' ') {
3232 			if (*e < '0' || *e > '9')
3233 				return (ARCHIVE_WARN);
3234 			e++;
3235 			length--;
3236 		}
3237 		start = end;
3238 		end = tar_atol10(p, e - p);
3239 		if (end < 0)
3240 			return (ARCHIVE_WARN);
3241 		if (start < end) {
3242 			if (gnu_add_sparse_entry(a, tar, start,
3243 			    end - start) != ARCHIVE_OK)
3244 				return (ARCHIVE_FATAL);
3245 			tar->sparse_last->hole = hole;
3246 		}
3247 		if (length == 0 || *e == '\n') {
3248 			if (length == 0 && *e == '\n') {
3249 				return (ARCHIVE_OK);
3250 			} else {
3251 				return (ARCHIVE_WARN);
3252 			}
3253 		}
3254 		p = e + 1;
3255 		length--;
3256 		hole = hole == 0;
3257 	}
3258 }
3259 
3260 /*-
3261  * Convert text->integer.
3262  *
3263  * Traditional tar formats (including POSIX) specify base-8 for
3264  * all of the standard numeric fields.  This is a significant limitation
3265  * in practice:
3266  *   = file size is limited to 8GB
3267  *   = rdevmajor and rdevminor are limited to 21 bits
3268  *   = uid/gid are limited to 21 bits
3269  *
3270  * There are two workarounds for this:
3271  *   = pax extended headers, which use variable-length string fields
3272  *   = GNU tar and STAR both allow either base-8 or base-256 in
3273  *      most fields.  The high bit is set to indicate base-256.
3274  *
3275  * On read, this implementation supports both extensions.
3276  */
3277 static int64_t
tar_atol(const char * p,size_t char_cnt)3278 tar_atol(const char *p, size_t char_cnt)
3279 {
3280 	/*
3281 	 * Technically, GNU tar considers a field to be in base-256
3282 	 * only if the first byte is 0xff or 0x80.
3283 	 */
3284 	if (*p & 0x80)
3285 		return (tar_atol256(p, char_cnt));
3286 	return (tar_atol8(p, char_cnt));
3287 }
3288 
3289 /*
3290  * Note that this implementation does not (and should not!) obey
3291  * locale settings; you cannot simply substitute strtol here, since
3292  * it does obey locale.
3293  */
3294 static int64_t
tar_atol_base_n(const char * p,size_t char_cnt,int base)3295 tar_atol_base_n(const char *p, size_t char_cnt, int base)
3296 {
3297 	int64_t	l, maxval, limit, last_digit_limit;
3298 	int digit, sign;
3299 
3300 	maxval = INT64_MAX;
3301 	limit = INT64_MAX / base;
3302 	last_digit_limit = INT64_MAX % base;
3303 
3304 	/* the pointer will not be dereferenced if char_cnt is zero
3305 	 * due to the way the && operator is evaluated.
3306 	 */
3307 	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
3308 		p++;
3309 		char_cnt--;
3310 	}
3311 
3312 	sign = 1;
3313 	if (char_cnt != 0 && *p == '-') {
3314 		sign = -1;
3315 		p++;
3316 		char_cnt--;
3317 
3318 		maxval = INT64_MIN;
3319 		limit = -(INT64_MIN / base);
3320 		last_digit_limit = -(INT64_MIN % base);
3321 	}
3322 
3323 	l = 0;
3324 	if (char_cnt != 0) {
3325 		digit = *p - '0';
3326 		while (digit >= 0 && digit < base  && char_cnt != 0) {
3327 			if (l>limit || (l == limit && digit >= last_digit_limit)) {
3328 				return maxval; /* Truncate on overflow. */
3329 			}
3330 			l = (l * base) + digit;
3331 			digit = *++p - '0';
3332 			char_cnt--;
3333 		}
3334 	}
3335 	return (sign < 0) ? -l : l;
3336 }
3337 
3338 static int64_t
tar_atol8(const char * p,size_t char_cnt)3339 tar_atol8(const char *p, size_t char_cnt)
3340 {
3341 	return tar_atol_base_n(p, char_cnt, 8);
3342 }
3343 
3344 static int64_t
tar_atol10(const char * p,size_t char_cnt)3345 tar_atol10(const char *p, size_t char_cnt)
3346 {
3347 	return tar_atol_base_n(p, char_cnt, 10);
3348 }
3349 
3350 /*
3351  * Parse a base-256 integer.  This is just a variable-length
3352  * twos-complement signed binary value in big-endian order, except
3353  * that the high-order bit is ignored.  The values here can be up to
3354  * 12 bytes, so we need to be careful about overflowing 64-bit
3355  * (8-byte) integers.
3356  *
3357  * This code unashamedly assumes that the local machine uses 8-bit
3358  * bytes and twos-complement arithmetic.
3359  */
3360 static int64_t
tar_atol256(const char * _p,size_t char_cnt)3361 tar_atol256(const char *_p, size_t char_cnt)
3362 {
3363 	uint64_t l;
3364 	const unsigned char *p = (const unsigned char *)_p;
3365 	unsigned char c, neg;
3366 
3367 	/* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
3368 	c = *p;
3369 	if (c & 0x40) {
3370 		neg = 0xff;
3371 		c |= 0x80;
3372 		l = ~ARCHIVE_LITERAL_ULL(0);
3373 	} else {
3374 		neg = 0;
3375 		c &= 0x7f;
3376 		l = 0;
3377 	}
3378 
3379 	/* If more than 8 bytes, check that we can ignore
3380 	 * high-order bits without overflow. */
3381 	while (char_cnt > sizeof(int64_t)) {
3382 		--char_cnt;
3383 		if (c != neg)
3384 			return neg ? INT64_MIN : INT64_MAX;
3385 		c = *++p;
3386 	}
3387 
3388 	/* c is first byte that fits; if sign mismatch, return overflow */
3389 	if ((c ^ neg) & 0x80) {
3390 		return neg ? INT64_MIN : INT64_MAX;
3391 	}
3392 
3393 	/* Accumulate remaining bytes. */
3394 	while (--char_cnt > 0) {
3395 		l = (l << 8) | c;
3396 		c = *++p;
3397 	}
3398 	l = (l << 8) | c;
3399 	/* Return signed twos-complement value. */
3400 	return (int64_t)(l);
3401 }
3402 
3403 /*
3404  * Returns length of line (including trailing newline)
3405  * or negative on error.  'start' argument is updated to
3406  * point to first character of line.  This avoids copying
3407  * when possible.
3408  */
3409 static ssize_t
readline(struct archive_read * a,struct tar * tar,const char ** start,ssize_t limit,size_t * unconsumed)3410 readline(struct archive_read *a, struct tar *tar, const char **start,
3411     ssize_t limit, size_t *unconsumed)
3412 {
3413 	ssize_t bytes_read;
3414 	ssize_t total_size = 0;
3415 	const void *t;
3416 	const char *s;
3417 	void *p;
3418 
3419 	tar_flush_unconsumed(a, unconsumed);
3420 
3421 	t = __archive_read_ahead(a, 1, &bytes_read);
3422 	if (bytes_read <= 0 || t == NULL)
3423 		return (ARCHIVE_FATAL);
3424 	s = t;  /* Start of line? */
3425 	p = memchr(t, '\n', bytes_read);
3426 	/* If we found '\n' in the read buffer, return pointer to that. */
3427 	if (p != NULL) {
3428 		bytes_read = 1 + ((const char *)p) - s;
3429 		if (bytes_read > limit) {
3430 			archive_set_error(&a->archive,
3431 			    ARCHIVE_ERRNO_FILE_FORMAT,
3432 			    "Line too long");
3433 			return (ARCHIVE_FATAL);
3434 		}
3435 		*unconsumed = bytes_read;
3436 		*start = s;
3437 		return (bytes_read);
3438 	}
3439 	*unconsumed = bytes_read;
3440 	/* Otherwise, we need to accumulate in a line buffer. */
3441 	for (;;) {
3442 		if (total_size + bytes_read > limit) {
3443 			archive_set_error(&a->archive,
3444 			    ARCHIVE_ERRNO_FILE_FORMAT,
3445 			    "Line too long");
3446 			return (ARCHIVE_FATAL);
3447 		}
3448 		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
3449 			archive_set_error(&a->archive, ENOMEM,
3450 			    "Can't allocate working buffer");
3451 			return (ARCHIVE_FATAL);
3452 		}
3453 		memcpy(tar->line.s + total_size, t, bytes_read);
3454 		tar_flush_unconsumed(a, unconsumed);
3455 		total_size += bytes_read;
3456 		/* If we found '\n', clean up and return. */
3457 		if (p != NULL) {
3458 			*start = tar->line.s;
3459 			return (total_size);
3460 		}
3461 		/* Read some more. */
3462 		t = __archive_read_ahead(a, 1, &bytes_read);
3463 		if (bytes_read <= 0 || t == NULL)
3464 			return (ARCHIVE_FATAL);
3465 		s = t;  /* Start of line? */
3466 		p = memchr(t, '\n', bytes_read);
3467 		/* If we found '\n', trim the read. */
3468 		if (p != NULL) {
3469 			bytes_read = 1 + ((const char *)p) - s;
3470 		}
3471 		*unconsumed = bytes_read;
3472 	}
3473 }
3474 
3475 /*
3476  * base64_decode - Base64 decode
3477  *
3478  * This accepts most variations of base-64 encoding, including:
3479  *    * with or without line breaks
3480  *    * with or without the final group padded with '=' or '_' characters
3481  * (The most economical Base-64 variant does not pad the last group and
3482  * omits line breaks; RFC1341 used for MIME requires both.)
3483  */
3484 static char *
base64_decode(const char * s,size_t len,size_t * out_len)3485 base64_decode(const char *s, size_t len, size_t *out_len)
3486 {
3487 	static const unsigned char digits[64] = {
3488 		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
3489 		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
3490 		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
3491 		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
3492 		'4','5','6','7','8','9','+','/' };
3493 	static unsigned char decode_table[128];
3494 	char *out, *d;
3495 	const unsigned char *src = (const unsigned char *)s;
3496 
3497 	/* If the decode table is not yet initialized, prepare it. */
3498 	if (decode_table[digits[1]] != 1) {
3499 		unsigned i;
3500 		memset(decode_table, 0xff, sizeof(decode_table));
3501 		for (i = 0; i < sizeof(digits); i++)
3502 			decode_table[digits[i]] = i;
3503 	}
3504 
3505 	/* Allocate enough space to hold the entire output. */
3506 	/* Note that we may not use all of this... */
3507 	out = malloc(len - len / 4 + 1);
3508 	if (out == NULL) {
3509 		*out_len = 0;
3510 		return (NULL);
3511 	}
3512 	d = out;
3513 
3514 	while (len > 0) {
3515 		/* Collect the next group of (up to) four characters. */
3516 		int v = 0;
3517 		int group_size = 0;
3518 		while (group_size < 4 && len > 0) {
3519 			/* '=' or '_' padding indicates final group. */
3520 			if (*src == '=' || *src == '_') {
3521 				len = 0;
3522 				break;
3523 			}
3524 			/* Skip illegal characters (including line breaks) */
3525 			if (*src > 127 || *src < 32
3526 			    || decode_table[*src] == 0xff) {
3527 				len--;
3528 				src++;
3529 				continue;
3530 			}
3531 			v <<= 6;
3532 			v |= decode_table[*src++];
3533 			len --;
3534 			group_size++;
3535 		}
3536 		/* Align a short group properly. */
3537 		v <<= 6 * (4 - group_size);
3538 		/* Unpack the group we just collected. */
3539 		switch (group_size) {
3540 		case 4: d[2] = v & 0xff;
3541 			/* FALLTHROUGH */
3542 		case 3: d[1] = (v >> 8) & 0xff;
3543 			/* FALLTHROUGH */
3544 		case 2: d[0] = (v >> 16) & 0xff;
3545 			break;
3546 		case 1: /* this is invalid! */
3547 			break;
3548 		}
3549 		d += group_size * 3 / 4;
3550 	}
3551 
3552 	*out_len = d - out;
3553 	return (out);
3554 }
3555 
3556 static char *
url_decode(const char * in,size_t length)3557 url_decode(const char *in, size_t length)
3558 {
3559 	char *out, *d;
3560 	const char *s;
3561 
3562 	out = malloc(length + 1);
3563 	if (out == NULL)
3564 		return (NULL);
3565 	for (s = in, d = out; length > 0 && *s != '\0'; ) {
3566 		if (s[0] == '%' && length > 2) {
3567 			/* Try to convert % escape */
3568 			int digit1 = tohex(s[1]);
3569 			int digit2 = tohex(s[2]);
3570 			if (digit1 >= 0 && digit2 >= 0) {
3571 				/* Looks good, consume three chars */
3572 				s += 3;
3573 				length -= 3;
3574 				/* Convert output */
3575 				*d++ = ((digit1 << 4) | digit2);
3576 				continue;
3577 			}
3578 			/* Else fall through and treat '%' as normal char */
3579 		}
3580 		*d++ = *s++;
3581 		--length;
3582 	}
3583 	*d = '\0';
3584 	return (out);
3585 }
3586 
3587 static int
tohex(int c)3588 tohex(int c)
3589 {
3590 	if (c >= '0' && c <= '9')
3591 		return (c - '0');
3592 	else if (c >= 'A' && c <= 'F')
3593 		return (c - 'A' + 10);
3594 	else if (c >= 'a' && c <= 'f')
3595 		return (c - 'a' + 10);
3596 	else
3597 		return (-1);
3598 }
3599