1 /*-
2 * Copyright (c) 2003-2023 Tim Kientzle
3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4 * Copyright (c) 2016 Martin Matuska
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "archive_platform.h"
29
30 #ifdef HAVE_ERRNO_H
31 #include <errno.h>
32 #endif
33 #include <stddef.h>
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_acl_private.h" /* For ACL parsing routines. */
43 #include "archive_entry.h"
44 #include "archive_entry_locale.h"
45 #include "archive_private.h"
46 #include "archive_read_private.h"
47
48 #define tar_min(a,b) ((a) < (b) ? (a) : (b))
49
50 /*
51 * Layout of POSIX 'ustar' tar header.
52 */
53 struct archive_entry_header_ustar {
54 char name[100];
55 char mode[8];
56 char uid[8];
57 char gid[8];
58 char size[12];
59 char mtime[12];
60 char checksum[8];
61 char typeflag[1];
62 char linkname[100]; /* "old format" header ends here */
63 char magic[6]; /* For POSIX: "ustar\0" */
64 char version[2]; /* For POSIX: "00" */
65 char uname[32];
66 char gname[32];
67 char rdevmajor[8];
68 char rdevminor[8];
69 char prefix[155];
70 };
71
72 /*
73 * Structure of GNU tar header
74 */
75 struct gnu_sparse {
76 char offset[12];
77 char numbytes[12];
78 };
79
80 struct archive_entry_header_gnutar {
81 char name[100];
82 char mode[8];
83 char uid[8];
84 char gid[8];
85 char size[12];
86 char mtime[12];
87 char checksum[8];
88 char typeflag[1];
89 char linkname[100];
90 char magic[8]; /* "ustar \0" (note blank/blank/null at end) */
91 char uname[32];
92 char gname[32];
93 char rdevmajor[8];
94 char rdevminor[8];
95 char atime[12];
96 char ctime[12];
97 char offset[12];
98 char longnames[4];
99 char unused[1];
100 struct gnu_sparse sparse[4];
101 char isextended[1];
102 char realsize[12];
103 /*
104 * Old GNU format doesn't use POSIX 'prefix' field; they use
105 * the 'L' (longname) entry instead.
106 */
107 };
108
109 /*
110 * Data specific to this format.
111 */
112 struct sparse_block {
113 struct sparse_block *next;
114 int64_t offset;
115 int64_t remaining;
116 int hole;
117 };
118
119 struct tar {
120 struct archive_string entry_pathname;
121 /* For "GNU.sparse.name" and other similar path extensions. */
122 struct archive_string entry_pathname_override;
123 struct archive_string entry_uname;
124 struct archive_string entry_gname;
125 struct archive_string entry_linkpath;
126 struct archive_string line;
127 int pax_hdrcharset_utf8;
128 int64_t entry_bytes_remaining;
129 int64_t entry_offset;
130 int64_t entry_padding;
131 int64_t entry_bytes_unconsumed;
132 int64_t disk_size;
133 int64_t GNU_sparse_realsize;
134 int64_t GNU_sparse_size;
135 int64_t SCHILY_sparse_realsize;
136 int64_t pax_size;
137 struct sparse_block *sparse_list;
138 struct sparse_block *sparse_last;
139 int64_t sparse_offset;
140 int64_t sparse_numbytes;
141 int sparse_gnu_major;
142 int sparse_gnu_minor;
143 char sparse_gnu_attributes_seen;
144 char filetype;
145 char size_fields; /* Bits defined below */
146
147 struct archive_string localname;
148 struct archive_string_conv *opt_sconv;
149 struct archive_string_conv *sconv;
150 struct archive_string_conv *sconv_acl;
151 struct archive_string_conv *sconv_default;
152 int init_default_conversion;
153 int compat_2x;
154 int process_mac_extensions;
155 int read_concatenated_archives;
156 };
157
158 /* Track which size fields were present in the headers */
159 #define TAR_SIZE_PAX_SIZE 1
160 #define TAR_SIZE_GNU_SPARSE_REALSIZE 2
161 #define TAR_SIZE_GNU_SPARSE_SIZE 4
162 #define TAR_SIZE_SCHILY_SPARSE_REALSIZE 8
163
164
165 static int archive_block_is_null(const char *p);
166 static char *base64_decode(const char *, size_t, size_t *);
167 static int gnu_add_sparse_entry(struct archive_read *, struct tar *,
168 int64_t offset, int64_t remaining);
169
170 static void gnu_clear_sparse_list(struct tar *);
171 static int gnu_sparse_old_read(struct archive_read *, struct tar *,
172 const struct archive_entry_header_gnutar *header, int64_t *);
173 static int gnu_sparse_old_parse(struct archive_read *, struct tar *,
174 const struct gnu_sparse *sparse, int length);
175 static int gnu_sparse_01_parse(struct archive_read *, struct tar *,
176 const char *, size_t);
177 static int64_t gnu_sparse_10_read(struct archive_read *, struct tar *,
178 int64_t *);
179 static int header_Solaris_ACL(struct archive_read *, struct tar *,
180 struct archive_entry *, const void *, int64_t *);
181 static int header_common(struct archive_read *, struct tar *,
182 struct archive_entry *, const void *);
183 static int header_old_tar(struct archive_read *, struct tar *,
184 struct archive_entry *, const void *);
185 static int header_pax_extension(struct archive_read *, struct tar *,
186 struct archive_entry *, const void *, int64_t *);
187 static int header_pax_global(struct archive_read *, struct tar *,
188 struct archive_entry *, const void *h, int64_t *);
189 static int header_gnu_longlink(struct archive_read *, struct tar *,
190 struct archive_entry *, const void *h, int64_t *);
191 static int header_gnu_longname(struct archive_read *, struct tar *,
192 struct archive_entry *, const void *h, int64_t *);
193 static int is_mac_metadata_entry(struct archive_entry *entry);
194 static int read_mac_metadata_blob(struct archive_read *,
195 struct archive_entry *, int64_t *);
196 static int header_volume(struct archive_read *, struct tar *,
197 struct archive_entry *, const void *h, int64_t *);
198 static int header_ustar(struct archive_read *, struct tar *,
199 struct archive_entry *, const void *h);
200 static int header_gnutar(struct archive_read *, struct tar *,
201 struct archive_entry *, const void *h, int64_t *);
202 static int archive_read_format_tar_bid(struct archive_read *, int);
203 static int archive_read_format_tar_options(struct archive_read *,
204 const char *, const char *);
205 static int archive_read_format_tar_cleanup(struct archive_read *);
206 static int archive_read_format_tar_read_data(struct archive_read *a,
207 const void **buff, size_t *size, int64_t *offset);
208 static int archive_read_format_tar_skip(struct archive_read *a);
209 static int archive_read_format_tar_read_header(struct archive_read *,
210 struct archive_entry *);
211 static int checksum(struct archive_read *, const void *);
212 static int pax_attribute(struct archive_read *, struct tar *,
213 struct archive_entry *, const char *key, size_t key_length,
214 size_t value_length, int64_t *unconsumed);
215 static int pax_attribute_LIBARCHIVE_xattr(struct archive_entry *,
216 const char *, size_t, const char *, size_t);
217 static int pax_attribute_SCHILY_acl(struct archive_read *, struct tar *,
218 struct archive_entry *, size_t, int);
219 static int pax_attribute_SUN_holesdata(struct archive_read *, struct tar *,
220 struct archive_entry *, const char *, size_t);
221 static void pax_time(const char *, size_t, int64_t *sec, long *nanos);
222 static ssize_t readline(struct archive_read *, struct tar *, const char **,
223 ssize_t limit, int64_t *);
224 static int read_body_to_string(struct archive_read *, struct tar *,
225 struct archive_string *, const void *h, int64_t *);
226 static int read_bytes_to_string(struct archive_read *,
227 struct archive_string *, size_t, int64_t *);
228 static int64_t tar_atol(const char *, size_t);
229 static int64_t tar_atol10(const char *, size_t);
230 static int64_t tar_atol256(const char *, size_t);
231 static int64_t tar_atol8(const char *, size_t);
232 static int tar_read_header(struct archive_read *, struct tar *,
233 struct archive_entry *, int64_t *);
234 static int tohex(int c);
235 static char *url_decode(const char *, size_t);
236 static void tar_flush_unconsumed(struct archive_read *, int64_t *);
237
238 /* Sanity limits: These numbers should be low enough to
239 * prevent a maliciously-crafted archive from forcing us to
240 * allocate extreme amounts of memory. But of course, they
241 * need to be high enough for any correct value. These
242 * will likely need some adjustment as we get more experience. */
243 static const size_t guname_limit = 65536; /* Longest uname or gname: 64kiB */
244 static const size_t pathname_limit = 1048576; /* Longest path name: 1MiB */
245 static const size_t sparse_map_limit = 8 * 1048576; /* Longest sparse map: 8MiB */
246 static const size_t xattr_limit = 16 * 1048576; /* Longest xattr: 16MiB */
247 static const size_t fflags_limit = 512; /* Longest fflags */
248 static const size_t acl_limit = 131072; /* Longest textual ACL: 128kiB */
249 static const int64_t entry_limit = 0xfffffffffffffffLL; /* 2^60 bytes = 1 ExbiByte */
250
251 int
archive_read_support_format_gnutar(struct archive * a)252 archive_read_support_format_gnutar(struct archive *a)
253 {
254 archive_check_magic(a, ARCHIVE_READ_MAGIC,
255 ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
256 return (archive_read_support_format_tar(a));
257 }
258
259
260 int
archive_read_support_format_tar(struct archive * _a)261 archive_read_support_format_tar(struct archive *_a)
262 {
263 struct archive_read *a = (struct archive_read *)_a;
264 struct tar *tar;
265 int r;
266
267 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
268 ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
269
270 tar = calloc(1, sizeof(*tar));
271 if (tar == NULL) {
272 archive_set_error(&a->archive, ENOMEM,
273 "Can't allocate tar data");
274 return (ARCHIVE_FATAL);
275 }
276 #ifdef HAVE_COPYFILE_H
277 /* Set this by default on Mac OS. */
278 tar->process_mac_extensions = 1;
279 #endif
280
281 r = __archive_read_register_format(a, tar, "tar",
282 archive_read_format_tar_bid,
283 archive_read_format_tar_options,
284 archive_read_format_tar_read_header,
285 archive_read_format_tar_read_data,
286 archive_read_format_tar_skip,
287 NULL,
288 archive_read_format_tar_cleanup,
289 NULL,
290 NULL);
291
292 if (r != ARCHIVE_OK)
293 free(tar);
294 return (ARCHIVE_OK);
295 }
296
297 static int
archive_read_format_tar_cleanup(struct archive_read * a)298 archive_read_format_tar_cleanup(struct archive_read *a)
299 {
300 struct tar *tar;
301
302 tar = (struct tar *)(a->format->data);
303 gnu_clear_sparse_list(tar);
304 archive_string_free(&tar->entry_pathname);
305 archive_string_free(&tar->entry_pathname_override);
306 archive_string_free(&tar->entry_uname);
307 archive_string_free(&tar->entry_gname);
308 archive_string_free(&tar->entry_linkpath);
309 archive_string_free(&tar->line);
310 archive_string_free(&tar->localname);
311 free(tar);
312 (a->format->data) = NULL;
313 return (ARCHIVE_OK);
314 }
315
316 /*
317 * Validate number field
318 *
319 * This has to be pretty lenient in order to accommodate the enormous
320 * variety of tar writers in the world:
321 * = POSIX (IEEE Std 1003.1-1988) ustar requires octal values with leading
322 * zeros and allows fields to be terminated with space or null characters
323 * = Many writers use different termination (in particular, libarchive
324 * omits terminator bytes to squeeze one or two more digits)
325 * = Many writers pad with space and omit leading zeros
326 * = GNU tar and star write base-256 values if numbers are too
327 * big to be represented in octal
328 *
329 * Examples of specific tar headers that we should support:
330 * = Perl Archive::Tar terminates uid, gid, devminor and devmajor with two
331 * null bytes, pads size with spaces and other numeric fields with zeroes
332 * = plexus-archiver prior to 2.6.3 (before switching to commons-compress)
333 * may have uid and gid fields filled with spaces without any octal digits
334 * at all and pads all numeric fields with spaces
335 *
336 * This should tolerate all variants in use. It will reject a field
337 * where the writer just left garbage after a trailing NUL.
338 */
339 static int
validate_number_field(const char * p_field,size_t i_size)340 validate_number_field(const char* p_field, size_t i_size)
341 {
342 unsigned char marker = (unsigned char)p_field[0];
343 if (marker == 128 || marker == 255 || marker == 0) {
344 /* Base-256 marker, there's nothing we can check. */
345 return 1;
346 } else {
347 /* Must be octal */
348 size_t i = 0;
349 /* Skip any leading spaces */
350 while (i < i_size && p_field[i] == ' ') {
351 ++i;
352 }
353 /* Skip octal digits. */
354 while (i < i_size && p_field[i] >= '0' && p_field[i] <= '7') {
355 ++i;
356 }
357 /* Any remaining characters must be space or NUL padding. */
358 while (i < i_size) {
359 if (p_field[i] != ' ' && p_field[i] != 0) {
360 return 0;
361 }
362 ++i;
363 }
364 return 1;
365 }
366 }
367
368 static int
archive_read_format_tar_bid(struct archive_read * a,int best_bid)369 archive_read_format_tar_bid(struct archive_read *a, int best_bid)
370 {
371 int bid;
372 const char *h;
373 const struct archive_entry_header_ustar *header;
374
375 (void)best_bid; /* UNUSED */
376
377 bid = 0;
378
379 /* Now let's look at the actual header and see if it matches. */
380 h = __archive_read_ahead(a, 512, NULL);
381 if (h == NULL)
382 return (-1);
383
384 /* If it's an end-of-archive mark, we can handle it. */
385 if (h[0] == 0 && archive_block_is_null(h)) {
386 /*
387 * Usually, I bid the number of bits verified, but
388 * in this case, 4096 seems excessive so I picked 10 as
389 * an arbitrary but reasonable-seeming value.
390 */
391 return (10);
392 }
393
394 /* If it's not an end-of-archive mark, it must have a valid checksum.*/
395 if (!checksum(a, h))
396 return (0);
397 bid += 48; /* Checksum is usually 6 octal digits. */
398
399 header = (const struct archive_entry_header_ustar *)h;
400
401 /* Recognize POSIX formats. */
402 if ((memcmp(header->magic, "ustar\0", 6) == 0)
403 && (memcmp(header->version, "00", 2) == 0))
404 bid += 56;
405
406 /* Recognize GNU tar format. */
407 if ((memcmp(header->magic, "ustar ", 6) == 0)
408 && (memcmp(header->version, " \0", 2) == 0))
409 bid += 56;
410
411 /* Type flag must be null, digit or A-Z, a-z. */
412 if (header->typeflag[0] != 0 &&
413 !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
414 !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
415 !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
416 return (0);
417 bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */
418
419 /*
420 * Check format of mode/uid/gid/mtime/size/rdevmajor/rdevminor fields.
421 */
422 if (validate_number_field(header->mode, sizeof(header->mode)) == 0
423 || validate_number_field(header->uid, sizeof(header->uid)) == 0
424 || validate_number_field(header->gid, sizeof(header->gid)) == 0
425 || validate_number_field(header->mtime, sizeof(header->mtime)) == 0
426 || validate_number_field(header->size, sizeof(header->size)) == 0
427 || validate_number_field(header->rdevmajor, sizeof(header->rdevmajor)) == 0
428 || validate_number_field(header->rdevminor, sizeof(header->rdevminor)) == 0) {
429 bid = 0;
430 }
431
432 return (bid);
433 }
434
435 static int
archive_read_format_tar_options(struct archive_read * a,const char * key,const char * val)436 archive_read_format_tar_options(struct archive_read *a,
437 const char *key, const char *val)
438 {
439 struct tar *tar;
440 int ret = ARCHIVE_FAILED;
441
442 tar = (struct tar *)(a->format->data);
443 if (strcmp(key, "compat-2x") == 0) {
444 /* Handle UTF-8 filenames as libarchive 2.x */
445 tar->compat_2x = (val != NULL && val[0] != 0);
446 tar->init_default_conversion = tar->compat_2x;
447 return (ARCHIVE_OK);
448 } else if (strcmp(key, "hdrcharset") == 0) {
449 if (val == NULL || val[0] == 0)
450 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
451 "tar: hdrcharset option needs a character-set name");
452 else {
453 tar->opt_sconv =
454 archive_string_conversion_from_charset(
455 &a->archive, val, 0);
456 if (tar->opt_sconv != NULL)
457 ret = ARCHIVE_OK;
458 else
459 ret = ARCHIVE_FATAL;
460 }
461 return (ret);
462 } else if (strcmp(key, "mac-ext") == 0) {
463 tar->process_mac_extensions = (val != NULL && val[0] != 0);
464 return (ARCHIVE_OK);
465 } else if (strcmp(key, "read_concatenated_archives") == 0) {
466 tar->read_concatenated_archives = (val != NULL && val[0] != 0);
467 return (ARCHIVE_OK);
468 }
469
470 /* Note: The "warn" return is just to inform the options
471 * supervisor that we didn't handle it. It will generate
472 * a suitable error if no one used this option. */
473 return (ARCHIVE_WARN);
474 }
475
476 /* utility function- this exists to centralize the logic of tracking
477 * how much unconsumed data we have floating around, and to consume
478 * anything outstanding since we're going to do read_aheads
479 */
480 static void
tar_flush_unconsumed(struct archive_read * a,int64_t * unconsumed)481 tar_flush_unconsumed(struct archive_read *a, int64_t *unconsumed)
482 {
483 if (*unconsumed) {
484 /*
485 void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
486 * this block of code is to poison claimed unconsumed space, ensuring
487 * things break if it is in use still.
488 * currently it WILL break things, so enable it only for debugging this issue
489 if (data) {
490 memset(data, 0xff, *unconsumed);
491 }
492 */
493 __archive_read_consume(a, *unconsumed);
494 *unconsumed = 0;
495 }
496 }
497
498 /*
499 * The function invoked by archive_read_next_header(). This
500 * just sets up a few things and then calls the internal
501 * tar_read_header() function below.
502 */
503 static int
archive_read_format_tar_read_header(struct archive_read * a,struct archive_entry * entry)504 archive_read_format_tar_read_header(struct archive_read *a,
505 struct archive_entry *entry)
506 {
507 /*
508 * When converting tar archives to cpio archives, it is
509 * essential that each distinct file have a distinct inode
510 * number. To simplify this, we keep a static count here to
511 * assign fake dev/inode numbers to each tar entry. Note that
512 * pax format archives may overwrite this with something more
513 * useful.
514 *
515 * Ideally, we would track every file read from the archive so
516 * that we could assign the same dev/ino pair to hardlinks,
517 * but the memory required to store a complete lookup table is
518 * probably not worthwhile just to support the relatively
519 * obscure tar->cpio conversion case.
520 */
521 /* TODO: Move this into `struct tar` to avoid conflicts
522 * when reading multiple archives */
523 static int default_inode;
524 static int default_dev;
525 struct tar *tar;
526 const char *p;
527 const wchar_t *wp;
528 int r;
529 size_t l;
530 int64_t unconsumed = 0;
531
532 /* Assign default device/inode values. */
533 archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
534 archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
535 /* Limit generated st_ino number to 16 bits. */
536 if (default_inode >= 0xffff) {
537 ++default_dev;
538 default_inode = 0;
539 }
540
541 tar = (struct tar *)(a->format->data);
542 tar->entry_offset = 0;
543 gnu_clear_sparse_list(tar);
544 tar->size_fields = 0; /* We don't have any size info yet */
545
546 /* Setup default string conversion. */
547 tar->sconv = tar->opt_sconv;
548 if (tar->sconv == NULL) {
549 if (!tar->init_default_conversion) {
550 tar->sconv_default =
551 archive_string_default_conversion_for_read(&(a->archive));
552 tar->init_default_conversion = 1;
553 }
554 tar->sconv = tar->sconv_default;
555 }
556
557 r = tar_read_header(a, tar, entry, &unconsumed);
558
559 tar_flush_unconsumed(a, &unconsumed);
560
561 /*
562 * "non-sparse" files are really just sparse files with
563 * a single block.
564 */
565 if (tar->sparse_list == NULL) {
566 if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
567 != ARCHIVE_OK)
568 return (ARCHIVE_FATAL);
569 } else {
570 struct sparse_block *sb;
571
572 for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
573 if (!sb->hole)
574 archive_entry_sparse_add_entry(entry,
575 sb->offset, sb->remaining);
576 }
577 }
578
579 if (r == ARCHIVE_OK && archive_entry_filetype(entry) == AE_IFREG) {
580 /*
581 * "Regular" entry with trailing '/' is really
582 * directory: This is needed for certain old tar
583 * variants and even for some broken newer ones.
584 */
585 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
586 l = wcslen(wp);
587 if (l > 0 && wp[l - 1] == L'/') {
588 archive_entry_set_filetype(entry, AE_IFDIR);
589 tar->entry_bytes_remaining = 0;
590 tar->entry_padding = 0;
591 }
592 } else if ((p = archive_entry_pathname(entry)) != NULL) {
593 l = strlen(p);
594 if (l > 0 && p[l - 1] == '/') {
595 archive_entry_set_filetype(entry, AE_IFDIR);
596 tar->entry_bytes_remaining = 0;
597 tar->entry_padding = 0;
598 }
599 }
600 }
601 return (r);
602 }
603
604 static int
archive_read_format_tar_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)605 archive_read_format_tar_read_data(struct archive_read *a,
606 const void **buff, size_t *size, int64_t *offset)
607 {
608 ssize_t bytes_read;
609 struct tar *tar;
610 struct sparse_block *p;
611
612 tar = (struct tar *)(a->format->data);
613
614 for (;;) {
615 /* Remove exhausted entries from sparse list. */
616 while (tar->sparse_list != NULL &&
617 tar->sparse_list->remaining == 0) {
618 p = tar->sparse_list;
619 tar->sparse_list = p->next;
620 free(p);
621 }
622
623 if (tar->entry_bytes_unconsumed) {
624 __archive_read_consume(a, tar->entry_bytes_unconsumed);
625 tar->entry_bytes_unconsumed = 0;
626 }
627
628 /* If we're at end of file, return EOF. */
629 if (tar->sparse_list == NULL ||
630 tar->entry_bytes_remaining == 0) {
631 int64_t request = tar->entry_bytes_remaining +
632 tar->entry_padding;
633
634 if (__archive_read_consume(a, request) != request)
635 return (ARCHIVE_FATAL);
636 tar->entry_padding = 0;
637 *buff = NULL;
638 *size = 0;
639 *offset = tar->disk_size;
640 return (ARCHIVE_EOF);
641 }
642
643 *buff = __archive_read_ahead(a, 1, &bytes_read);
644 if (*buff == NULL) {
645 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
646 "Truncated tar archive"
647 " detected while reading data");
648 return (ARCHIVE_FATAL);
649 }
650 if (bytes_read > tar->entry_bytes_remaining)
651 bytes_read = (ssize_t)tar->entry_bytes_remaining;
652 /* Don't read more than is available in the
653 * current sparse block. */
654 if (tar->sparse_list->remaining < bytes_read)
655 bytes_read = (ssize_t)tar->sparse_list->remaining;
656 *size = bytes_read;
657 *offset = tar->sparse_list->offset;
658 tar->sparse_list->remaining -= bytes_read;
659 tar->sparse_list->offset += bytes_read;
660 tar->entry_bytes_remaining -= bytes_read;
661 tar->entry_bytes_unconsumed = bytes_read;
662
663 if (!tar->sparse_list->hole)
664 return (ARCHIVE_OK);
665 /* Current is hole data and skip this. */
666 }
667 }
668
669 static int
archive_read_format_tar_skip(struct archive_read * a)670 archive_read_format_tar_skip(struct archive_read *a)
671 {
672 int64_t request;
673 struct tar* tar;
674
675 tar = (struct tar *)(a->format->data);
676
677 request = tar->entry_bytes_remaining + tar->entry_padding +
678 tar->entry_bytes_unconsumed;
679
680 if (__archive_read_consume(a, request) != request)
681 return (ARCHIVE_FATAL);
682
683 tar->entry_bytes_remaining = 0;
684 tar->entry_bytes_unconsumed = 0;
685 tar->entry_padding = 0;
686
687 /* Free the sparse list. */
688 gnu_clear_sparse_list(tar);
689
690 return (ARCHIVE_OK);
691 }
692
693 /*
694 * This function resets the accumulated state while reading
695 * a header.
696 */
697 static void
tar_reset_header_state(struct tar * tar)698 tar_reset_header_state(struct tar *tar)
699 {
700 tar->pax_hdrcharset_utf8 = 1;
701 tar->sparse_gnu_attributes_seen = 0;
702 archive_string_empty(&(tar->entry_gname));
703 archive_string_empty(&(tar->entry_pathname));
704 archive_string_empty(&(tar->entry_pathname_override));
705 archive_string_empty(&(tar->entry_uname));
706 archive_string_empty(&tar->entry_linkpath);
707 }
708
709 /*
710 * This function reads and interprets all of the headers associated
711 * with a single entry.
712 */
713 static int
tar_read_header(struct archive_read * a,struct tar * tar,struct archive_entry * entry,int64_t * unconsumed)714 tar_read_header(struct archive_read *a, struct tar *tar,
715 struct archive_entry *entry, int64_t *unconsumed)
716 {
717 ssize_t bytes;
718 int err = ARCHIVE_OK, err2;
719 int eof_fatal = 0; /* EOF is okay at some points... */
720 const char *h;
721 const struct archive_entry_header_ustar *header;
722 const struct archive_entry_header_gnutar *gnuheader;
723
724 /* Bitmask of what header types we've seen. */
725 int32_t seen_headers = 0;
726 static const int32_t seen_A_header = 1;
727 static const int32_t seen_g_header = 2;
728 static const int32_t seen_K_header = 4;
729 static const int32_t seen_L_header = 8;
730 static const int32_t seen_V_header = 16;
731 static const int32_t seen_x_header = 32; /* Also X */
732 static const int32_t seen_mac_metadata = 512;
733
734 tar_reset_header_state(tar);
735
736 /* Ensure format is set. */
737 if (a->archive.archive_format_name == NULL) {
738 a->archive.archive_format = ARCHIVE_FORMAT_TAR;
739 a->archive.archive_format_name = "tar";
740 }
741
742 /*
743 * TODO: Write global/default pax options into
744 * 'entry' struct here before overwriting with
745 * file-specific options.
746 */
747
748 /* Loop over all the headers needed for the next entry */
749 for (;;) {
750
751 /* Find the next valid header record. */
752 while (1) {
753 tar_flush_unconsumed(a, unconsumed);
754
755 /* Read 512-byte header record */
756 h = __archive_read_ahead(a, 512, &bytes);
757 if (bytes == 0) { /* EOF at a block boundary. */
758 if (eof_fatal) {
759 /* We've read a special header already;
760 * if there's no regular header, then this is
761 * a premature EOF. */
762 archive_set_error(&a->archive, EINVAL,
763 "Damaged tar archive (end-of-archive within a sequence of headers)");
764 return (ARCHIVE_FATAL);
765 } else {
766 return (ARCHIVE_EOF);
767 }
768 }
769 if (h == NULL) { /* Short block at EOF; this is bad. */
770 archive_set_error(&a->archive,
771 ARCHIVE_ERRNO_FILE_FORMAT,
772 "Truncated tar archive"
773 " detected while reading next header");
774 return (ARCHIVE_FATAL);
775 }
776 *unconsumed += 512;
777
778 if (h[0] == 0 && archive_block_is_null(h)) {
779 /* We found a NULL block which indicates end-of-archive */
780
781 if (tar->read_concatenated_archives) {
782 /* We're ignoring NULL blocks, so keep going. */
783 continue;
784 }
785
786 /* Try to consume a second all-null record, as well. */
787 /* If we can't, that's okay. */
788 tar_flush_unconsumed(a, unconsumed);
789 h = __archive_read_ahead(a, 512, NULL);
790 if (h != NULL && h[0] == 0 && archive_block_is_null(h))
791 __archive_read_consume(a, 512);
792
793 archive_clear_error(&a->archive);
794 return (ARCHIVE_EOF);
795 }
796
797 /* This is NOT a null block, so it must be a valid header. */
798 if (!checksum(a, h)) {
799 tar_flush_unconsumed(a, unconsumed);
800 archive_set_error(&a->archive, EINVAL,
801 "Damaged tar archive (bad header checksum)");
802 /* If we've read some critical information (pax headers, etc)
803 * and _then_ see a bad header, we can't really recover. */
804 if (eof_fatal) {
805 return (ARCHIVE_FATAL);
806 } else {
807 return (ARCHIVE_RETRY);
808 }
809 }
810 break;
811 }
812
813 /* Determine the format variant. */
814 header = (const struct archive_entry_header_ustar *)h;
815 switch(header->typeflag[0]) {
816 case 'A': /* Solaris tar ACL */
817 if (seen_headers & seen_A_header) {
818 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
819 "Redundant 'A' header");
820 return (ARCHIVE_FATAL);
821 }
822 seen_headers |= seen_A_header;
823 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
824 a->archive.archive_format_name = "Solaris tar";
825 err2 = header_Solaris_ACL(a, tar, entry, h, unconsumed);
826 break;
827 case 'g': /* POSIX-standard 'g' header. */
828 if (seen_headers & seen_g_header) {
829 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
830 "Redundant 'g' header");
831 return (ARCHIVE_FATAL);
832 }
833 seen_headers |= seen_g_header;
834 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
835 a->archive.archive_format_name = "POSIX pax interchange format";
836 err2 = header_pax_global(a, tar, entry, h, unconsumed);
837 break;
838 case 'K': /* Long link name (GNU tar, others) */
839 if (seen_headers & seen_K_header) {
840 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
841 "Damaged archive: Redundant 'K' headers may cause linknames to be incorrect");
842 err = err_combine(err, ARCHIVE_WARN);
843 }
844 seen_headers |= seen_K_header;
845 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
846 a->archive.archive_format_name = "GNU tar format";
847 err2 = header_gnu_longlink(a, tar, entry, h, unconsumed);
848 break;
849 case 'L': /* Long filename (GNU tar, others) */
850 if (seen_headers & seen_L_header) {
851 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
852 "Damaged archive: Redundant 'L' headers may cause filenames to be incorrect");
853 err = err_combine(err, ARCHIVE_WARN);
854 }
855 seen_headers |= seen_L_header;
856 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
857 a->archive.archive_format_name = "GNU tar format";
858 err2 = header_gnu_longname(a, tar, entry, h, unconsumed);
859 break;
860 case 'V': /* GNU volume header */
861 if (seen_headers & seen_V_header) {
862 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
863 "Redundant 'V' header");
864 err = err_combine(err, ARCHIVE_WARN);
865 }
866 seen_headers |= seen_V_header;
867 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
868 a->archive.archive_format_name = "GNU tar format";
869 err2 = header_volume(a, tar, entry, h, unconsumed);
870 break;
871 case 'X': /* Used by SUN tar; same as 'x'. */
872 if (seen_headers & seen_x_header) {
873 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
874 "Redundant 'X'/'x' header");
875 return (ARCHIVE_FATAL);
876 }
877 seen_headers |= seen_x_header;
878 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
879 a->archive.archive_format_name =
880 "POSIX pax interchange format (Sun variant)";
881 err2 = header_pax_extension(a, tar, entry, h, unconsumed);
882 break;
883 case 'x': /* POSIX-standard 'x' header. */
884 if (seen_headers & seen_x_header) {
885 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
886 "Redundant 'x' header");
887 return (ARCHIVE_FATAL);
888 }
889 seen_headers |= seen_x_header;
890 a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
891 a->archive.archive_format_name = "POSIX pax interchange format";
892 err2 = header_pax_extension(a, tar, entry, h, unconsumed);
893 break;
894 default: /* Regular header: Legacy tar, GNU tar, or ustar */
895 gnuheader = (const struct archive_entry_header_gnutar *)h;
896 if (memcmp(gnuheader->magic, "ustar \0", 8) == 0) {
897 a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
898 a->archive.archive_format_name = "GNU tar format";
899 err2 = header_gnutar(a, tar, entry, h, unconsumed);
900 } else if (memcmp(header->magic, "ustar", 5) == 0) {
901 if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
902 a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
903 a->archive.archive_format_name = "POSIX ustar format";
904 }
905 err2 = header_ustar(a, tar, entry, h);
906 } else {
907 a->archive.archive_format = ARCHIVE_FORMAT_TAR;
908 a->archive.archive_format_name = "tar (non-POSIX)";
909 err2 = header_old_tar(a, tar, entry, h);
910 }
911 err = err_combine(err, err2);
912 /* We return warnings or success as-is. Anything else is fatal. */
913 if (err < ARCHIVE_WARN) {
914 return (ARCHIVE_FATAL);
915 }
916 /* Filename of the form `._filename` is an AppleDouble
917 * extension entry. The body is the macOS metadata blob;
918 * this is followed by another entry with the actual
919 * regular file data.
920 * This design has two drawbacks:
921 * = it's brittle; you might just have a file with such a name
922 * = it duplicates any long pathname extensions
923 *
924 * TODO: This probably shouldn't be here at all. Consider
925 * just returning the contents as a regular entry here and
926 * then dealing with it when we write data to disk.
927 */
928 if (tar->process_mac_extensions
929 && ((seen_headers & seen_mac_metadata) == 0)
930 && is_mac_metadata_entry(entry)) {
931 err2 = read_mac_metadata_blob(a, entry, unconsumed);
932 if (err2 < ARCHIVE_WARN) {
933 return (ARCHIVE_FATAL);
934 }
935 err = err_combine(err, err2);
936 /* Note: Other headers can appear again. */
937 seen_headers = seen_mac_metadata;
938 tar_reset_header_state(tar);
939 break;
940 }
941
942 /* Reconcile GNU sparse attributes */
943 if (tar->sparse_gnu_attributes_seen) {
944 /* Only 'S' (GNU sparse) and ustar '0' regular files can be sparse */
945 if (tar->filetype != 'S' && tar->filetype != '0') {
946 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
947 "Non-regular file cannot be sparse");
948 return (ARCHIVE_WARN);
949 } else if (tar->sparse_gnu_major == 0 &&
950 tar->sparse_gnu_minor == 0) {
951 /* Sparse map already parsed from 'x' header */
952 } else if (tar->sparse_gnu_major == 0 &&
953 tar->sparse_gnu_minor == 1) {
954 /* Sparse map already parsed from 'x' header */
955 } else if (tar->sparse_gnu_major == 1 &&
956 tar->sparse_gnu_minor == 0) {
957 /* Sparse map is prepended to file contents */
958 ssize_t bytes_read;
959 bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
960 if (bytes_read < 0)
961 return ((int)bytes_read);
962 tar->entry_bytes_remaining -= bytes_read;
963 } else {
964 archive_set_error(&a->archive,
965 ARCHIVE_ERRNO_MISC,
966 "Unrecognized GNU sparse file format");
967 return (ARCHIVE_WARN);
968 }
969 }
970 return (err);
971 }
972
973 /* We're between headers ... */
974 err = err_combine(err, err2);
975 if (err == ARCHIVE_FATAL)
976 return (err);
977
978 /* The GNU volume header and the pax `g` global header
979 * are both allowed to be the only header in an
980 * archive. If we've seen any other header, a
981 * following EOF is fatal. */
982 if ((seen_headers & ~seen_V_header & ~seen_g_header) != 0) {
983 eof_fatal = 1;
984 }
985 }
986 }
987
988 /*
989 * Return true if block checksum is correct.
990 */
991 static int
checksum(struct archive_read * a,const void * h)992 checksum(struct archive_read *a, const void *h)
993 {
994 const unsigned char *bytes;
995 const struct archive_entry_header_ustar *header;
996 int check, sum;
997 size_t i;
998
999 (void)a; /* UNUSED */
1000 bytes = (const unsigned char *)h;
1001 header = (const struct archive_entry_header_ustar *)h;
1002
1003 /* Checksum field must hold an octal number */
1004 for (i = 0; i < sizeof(header->checksum); ++i) {
1005 char c = header->checksum[i];
1006 if (c != ' ' && c != '\0' && (c < '0' || c > '7'))
1007 return 0;
1008 }
1009
1010 /*
1011 * Test the checksum. Note that POSIX specifies _unsigned_
1012 * bytes for this calculation.
1013 */
1014 sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
1015 check = 0;
1016 for (i = 0; i < 148; i++)
1017 check += (unsigned char)bytes[i];
1018 for (; i < 156; i++)
1019 check += 32;
1020 for (; i < 512; i++)
1021 check += (unsigned char)bytes[i];
1022 if (sum == check)
1023 return (1);
1024
1025 /*
1026 * Repeat test with _signed_ bytes, just in case this archive
1027 * was created by an old BSD, Solaris, or HP-UX tar with a
1028 * broken checksum calculation.
1029 */
1030 check = 0;
1031 for (i = 0; i < 148; i++)
1032 check += (signed char)bytes[i];
1033 for (; i < 156; i++)
1034 check += 32;
1035 for (; i < 512; i++)
1036 check += (signed char)bytes[i];
1037 if (sum == check)
1038 return (1);
1039
1040 #if DONT_FAIL_ON_CRC_ERROR
1041 /* Speed up fuzzing by pretending the checksum is always right. */
1042 return (1);
1043 #else
1044 return (0);
1045 #endif
1046 }
1047
1048 /*
1049 * Return true if this block contains only nulls.
1050 */
1051 static int
archive_block_is_null(const char * p)1052 archive_block_is_null(const char *p)
1053 {
1054 unsigned i;
1055
1056 for (i = 0; i < 512; i++)
1057 if (*p++)
1058 return (0);
1059 return (1);
1060 }
1061
1062 /*
1063 * Interpret 'A' Solaris ACL header
1064 */
1065 static int
header_Solaris_ACL(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1066 header_Solaris_ACL(struct archive_read *a, struct tar *tar,
1067 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1068 {
1069 const struct archive_entry_header_ustar *header;
1070 struct archive_string acl_text;
1071 size_t size;
1072 int err, acl_type;
1073 uint64_t type;
1074 char *acl, *p;
1075
1076 header = (const struct archive_entry_header_ustar *)h;
1077 size = (size_t)tar_atol(header->size, sizeof(header->size));
1078 archive_string_init(&acl_text);
1079 err = read_body_to_string(a, tar, &acl_text, h, unconsumed);
1080 if (err != ARCHIVE_OK) {
1081 archive_string_free(&acl_text);
1082 return (err);
1083 }
1084
1085 /* TODO: Examine the first characters to see if this
1086 * is an AIX ACL descriptor. We'll likely never support
1087 * them, but it would be polite to recognize and warn when
1088 * we do see them. */
1089
1090 /* Leading octal number indicates ACL type and number of entries. */
1091 p = acl = acl_text.s;
1092 type = 0;
1093 while (*p != '\0' && p < acl + size) {
1094 if (*p < '0' || *p > '7') {
1095 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1096 "Malformed Solaris ACL attribute (invalid digit)");
1097 archive_string_free(&acl_text);
1098 return(ARCHIVE_WARN);
1099 }
1100 type <<= 3;
1101 type += *p - '0';
1102 if (type > 077777777) {
1103 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1104 "Malformed Solaris ACL attribute (count too large)");
1105 archive_string_free(&acl_text);
1106 return (ARCHIVE_WARN);
1107 }
1108 p++;
1109 }
1110 switch (type & ~0777777) {
1111 case 01000000:
1112 /* POSIX.1e ACL */
1113 acl_type = ARCHIVE_ENTRY_ACL_TYPE_ACCESS;
1114 break;
1115 case 03000000:
1116 /* NFSv4 ACL */
1117 acl_type = ARCHIVE_ENTRY_ACL_TYPE_NFS4;
1118 break;
1119 default:
1120 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1121 "Malformed Solaris ACL attribute (unsupported type %llu)",
1122 (unsigned long long)type);
1123 archive_string_free(&acl_text);
1124 return (ARCHIVE_WARN);
1125 }
1126 p++;
1127
1128 if (p >= acl + size) {
1129 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1130 "Malformed Solaris ACL attribute (body overflow)");
1131 archive_string_free(&acl_text);
1132 return(ARCHIVE_WARN);
1133 }
1134
1135 /* ACL text is null-terminated; find the end. */
1136 size -= (p - acl);
1137 acl = p;
1138
1139 while (*p != '\0' && p < acl + size)
1140 p++;
1141
1142 if (tar->sconv_acl == NULL) {
1143 tar->sconv_acl = archive_string_conversion_from_charset(
1144 &(a->archive), "UTF-8", 1);
1145 if (tar->sconv_acl == NULL) {
1146 archive_string_free(&acl_text);
1147 return (ARCHIVE_FATAL);
1148 }
1149 }
1150 archive_strncpy(&(tar->localname), acl, p - acl);
1151 err = archive_acl_from_text_l(archive_entry_acl(entry),
1152 tar->localname.s, acl_type, tar->sconv_acl);
1153 /* Workaround: Force perm_is_set() to be correct */
1154 /* If this bit were stored in the ACL, this wouldn't be needed */
1155 archive_entry_set_perm(entry, archive_entry_perm(entry));
1156 if (err != ARCHIVE_OK) {
1157 if (errno == ENOMEM) {
1158 archive_set_error(&a->archive, ENOMEM,
1159 "Can't allocate memory for ACL");
1160 } else
1161 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1162 "Malformed Solaris ACL attribute (unparsable)");
1163 }
1164 archive_string_free(&acl_text);
1165 return (err);
1166 }
1167
1168 /*
1169 * Interpret 'K' long linkname header.
1170 */
1171 static int
header_gnu_longlink(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1172 header_gnu_longlink(struct archive_read *a, struct tar *tar,
1173 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1174 {
1175 int err;
1176
1177 struct archive_string linkpath;
1178 archive_string_init(&linkpath);
1179 err = read_body_to_string(a, tar, &linkpath, h, unconsumed);
1180 if (err == ARCHIVE_OK) {
1181 archive_entry_set_link(entry, linkpath.s);
1182 }
1183 archive_string_free(&linkpath);
1184 return (err);
1185 }
1186
1187 static int
set_conversion_failed_error(struct archive_read * a,struct archive_string_conv * sconv,const char * name)1188 set_conversion_failed_error(struct archive_read *a,
1189 struct archive_string_conv *sconv, const char *name)
1190 {
1191 if (errno == ENOMEM) {
1192 archive_set_error(&a->archive, ENOMEM,
1193 "Can't allocate memory for %s", name);
1194 return (ARCHIVE_FATAL);
1195 }
1196 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1197 "%s can't be converted from %s to current locale.",
1198 name, archive_string_conversion_charset_name(sconv));
1199 return (ARCHIVE_WARN);
1200 }
1201
1202 /*
1203 * Interpret 'L' long filename header.
1204 */
1205 static int
header_gnu_longname(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1206 header_gnu_longname(struct archive_read *a, struct tar *tar,
1207 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1208 {
1209 int err;
1210 struct archive_string longname;
1211
1212 archive_string_init(&longname);
1213 err = read_body_to_string(a, tar, &longname, h, unconsumed);
1214 if (err == ARCHIVE_OK) {
1215 if (archive_entry_copy_pathname_l(entry, longname.s,
1216 archive_strlen(&longname), tar->sconv) != 0)
1217 err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1218 }
1219 archive_string_free(&longname);
1220 return (err);
1221 }
1222
1223 /*
1224 * Interpret 'V' GNU tar volume header.
1225 */
1226 static int
header_volume(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1227 header_volume(struct archive_read *a, struct tar *tar,
1228 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1229 {
1230 const struct archive_entry_header_ustar *header;
1231 int64_t size, to_consume;
1232
1233 (void)a; /* UNUSED */
1234 (void)tar; /* UNUSED */
1235 (void)entry; /* UNUSED */
1236
1237 header = (const struct archive_entry_header_ustar *)h;
1238 size = tar_atol(header->size, sizeof(header->size));
1239 if (size > (int64_t)pathname_limit) {
1240 return (ARCHIVE_FATAL);
1241 }
1242 to_consume = ((size + 511) & ~511);
1243 *unconsumed += to_consume;
1244 return (ARCHIVE_OK);
1245 }
1246
1247 /*
1248 * Read the next `size` bytes into the provided string.
1249 * Null-terminate the string.
1250 */
1251 static int
read_bytes_to_string(struct archive_read * a,struct archive_string * as,size_t size,int64_t * unconsumed)1252 read_bytes_to_string(struct archive_read *a,
1253 struct archive_string *as, size_t size,
1254 int64_t *unconsumed) {
1255 const void *src;
1256
1257 /* Fail if we can't make our buffer big enough. */
1258 if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1259 archive_set_error(&a->archive, ENOMEM,
1260 "No memory");
1261 return (ARCHIVE_FATAL);
1262 }
1263
1264 tar_flush_unconsumed(a, unconsumed);
1265
1266 /* Read the body into the string. */
1267 src = __archive_read_ahead(a, size, NULL);
1268 if (src == NULL) {
1269 archive_set_error(&a->archive, EINVAL,
1270 "Truncated archive"
1271 " detected while reading metadata");
1272 *unconsumed = 0;
1273 return (ARCHIVE_FATAL);
1274 }
1275 memcpy(as->s, src, (size_t)size);
1276 as->s[size] = '\0';
1277 as->length = (size_t)size;
1278 *unconsumed += size;
1279 return (ARCHIVE_OK);
1280 }
1281
1282 /*
1283 * Read body of an archive entry into an archive_string object.
1284 */
1285 static int
read_body_to_string(struct archive_read * a,struct tar * tar,struct archive_string * as,const void * h,int64_t * unconsumed)1286 read_body_to_string(struct archive_read *a, struct tar *tar,
1287 struct archive_string *as, const void *h, int64_t *unconsumed)
1288 {
1289 int64_t size;
1290 const struct archive_entry_header_ustar *header;
1291 int r;
1292
1293 (void)tar; /* UNUSED */
1294 header = (const struct archive_entry_header_ustar *)h;
1295 size = tar_atol(header->size, sizeof(header->size));
1296 if (size < 0 || size > entry_limit) {
1297 archive_set_error(&a->archive, EINVAL,
1298 "Special header has invalid size: %lld",
1299 (long long)size);
1300 return (ARCHIVE_FATAL);
1301 }
1302 if (size > (int64_t)pathname_limit) {
1303 archive_string_empty(as);
1304 int64_t to_consume = ((size + 511) & ~511);
1305 if (to_consume != __archive_read_consume(a, to_consume)) {
1306 return (ARCHIVE_FATAL);
1307 }
1308 archive_set_error(&a->archive, EINVAL,
1309 "Special header too large: %lld > 1MiB",
1310 (long long)size);
1311 return (ARCHIVE_WARN);
1312 }
1313 r = read_bytes_to_string(a, as, size, unconsumed);
1314 *unconsumed += 0x1ff & (-size);
1315 return(r);
1316 }
1317
1318 /*
1319 * Parse out common header elements.
1320 *
1321 * This would be the same as header_old_tar, except that the
1322 * filename is handled slightly differently for old and POSIX
1323 * entries (POSIX entries support a 'prefix'). This factoring
1324 * allows header_old_tar and header_ustar
1325 * to handle filenames differently, while still putting most of the
1326 * common parsing into one place.
1327 *
1328 * This is called _after_ ustar, GNU tar, Schily, etc, special
1329 * fields have already been parsed into the `tar` structure.
1330 * So we can make final decisions here about how to reconcile
1331 * size, mode, etc, information.
1332 */
1333 static int
header_common(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1334 header_common(struct archive_read *a, struct tar *tar,
1335 struct archive_entry *entry, const void *h)
1336 {
1337 const struct archive_entry_header_ustar *header;
1338 const char *existing_linkpath;
1339 const wchar_t *existing_wcs_linkpath;
1340 int err = ARCHIVE_OK;
1341
1342 header = (const struct archive_entry_header_ustar *)h;
1343
1344 /* Parse out the numeric fields (all are octal) */
1345
1346 /* Split mode handling: Set filetype always, perm only if not already set */
1347 archive_entry_set_filetype(entry,
1348 (mode_t)tar_atol(header->mode, sizeof(header->mode)));
1349 if (!archive_entry_perm_is_set(entry)) {
1350 archive_entry_set_perm(entry,
1351 (mode_t)tar_atol(header->mode, sizeof(header->mode)));
1352 }
1353
1354 /* Set uid, gid, mtime if not already set */
1355 if (!archive_entry_uid_is_set(entry)) {
1356 archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1357 }
1358 if (!archive_entry_gid_is_set(entry)) {
1359 archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1360 }
1361 if (!archive_entry_mtime_is_set(entry)) {
1362 archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1363 }
1364
1365 /* Reconcile the size info. */
1366 /* First, how big is the file on disk? */
1367 if ((tar->size_fields & TAR_SIZE_GNU_SPARSE_REALSIZE) != 0) {
1368 /* GNU sparse format 1.0 uses `GNU.sparse.realsize`
1369 * to hold the size of the file on disk. */
1370 tar->disk_size = tar->GNU_sparse_realsize;
1371 } else if ((tar->size_fields & TAR_SIZE_GNU_SPARSE_SIZE) != 0
1372 && (tar->sparse_gnu_major == 0)) {
1373 /* GNU sparse format 0.0 and 0.1 use `GNU.sparse.size`
1374 * to hold the size of the file on disk. */
1375 tar->disk_size = tar->GNU_sparse_size;
1376 } else if ((tar->size_fields & TAR_SIZE_SCHILY_SPARSE_REALSIZE) != 0) {
1377 tar->disk_size = tar->SCHILY_sparse_realsize;
1378 } else if ((tar->size_fields & TAR_SIZE_PAX_SIZE) != 0) {
1379 tar->disk_size = tar->pax_size;
1380 } else {
1381 /* There wasn't a suitable pax header, so use the ustar info */
1382 tar->disk_size = tar_atol(header->size, sizeof(header->size));
1383 }
1384
1385 if (tar->disk_size < 0) {
1386 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1387 "Tar entry has negative file size");
1388 return (ARCHIVE_FATAL);
1389 } else if (tar->disk_size > entry_limit) {
1390 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1391 "Tar entry size overflow");
1392 return (ARCHIVE_FATAL);
1393 } else {
1394 archive_entry_set_size(entry, tar->disk_size);
1395 }
1396
1397 /* Second, how big is the data in the archive? */
1398 if ((tar->size_fields & TAR_SIZE_GNU_SPARSE_SIZE) != 0
1399 && (tar->sparse_gnu_major == 1)) {
1400 /* GNU sparse format 1.0 uses `GNU.sparse.size`
1401 * to hold the size of the data in the archive. */
1402 tar->entry_bytes_remaining = tar->GNU_sparse_size;
1403 } else if ((tar->size_fields & TAR_SIZE_PAX_SIZE) != 0) {
1404 tar->entry_bytes_remaining = tar->pax_size;
1405 } else {
1406 tar->entry_bytes_remaining
1407 = tar_atol(header->size, sizeof(header->size));
1408 }
1409 if (tar->entry_bytes_remaining < 0) {
1410 tar->entry_bytes_remaining = 0;
1411 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1412 "Tar entry has negative size");
1413 return (ARCHIVE_FATAL);
1414 } else if (tar->entry_bytes_remaining > entry_limit) {
1415 tar->entry_bytes_remaining = 0;
1416 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1417 "Tar entry size overflow");
1418 return (ARCHIVE_FATAL);
1419 }
1420
1421 /* Handle the tar type flag appropriately. */
1422 tar->filetype = header->typeflag[0];
1423
1424 /*
1425 * TODO: If the linkpath came from Pax extension header, then
1426 * we should obey the hdrcharset_utf8 flag when converting these.
1427 */
1428 switch (tar->filetype) {
1429 case '1': /* Hard link */
1430 archive_entry_set_link_to_hardlink(entry);
1431 existing_wcs_linkpath = archive_entry_hardlink_w(entry);
1432 existing_linkpath = archive_entry_hardlink(entry);
1433 if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1434 && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1435 struct archive_string linkpath;
1436 archive_string_init(&linkpath);
1437 archive_strncpy(&linkpath,
1438 header->linkname, sizeof(header->linkname));
1439 if (archive_entry_copy_hardlink_l(entry, linkpath.s,
1440 archive_strlen(&linkpath), tar->sconv) != 0) {
1441 err = set_conversion_failed_error(a, tar->sconv,
1442 "Linkname");
1443 if (err == ARCHIVE_FATAL) {
1444 archive_string_free(&linkpath);
1445 return (err);
1446 }
1447 }
1448 archive_string_free(&linkpath);
1449 }
1450 /*
1451 * The following may seem odd, but: Technically, tar
1452 * does not store the file type for a "hard link"
1453 * entry, only the fact that it is a hard link. So, I
1454 * leave the type zero normally. But, pax interchange
1455 * format allows hard links to have data, which
1456 * implies that the underlying entry is a regular
1457 * file.
1458 */
1459 if (archive_entry_size(entry) > 0)
1460 archive_entry_set_filetype(entry, AE_IFREG);
1461
1462 /*
1463 * A tricky point: Traditionally, tar readers have
1464 * ignored the size field when reading hardlink
1465 * entries, and some writers put non-zero sizes even
1466 * though the body is empty. POSIX blessed this
1467 * convention in the 1988 standard, but broke with
1468 * this tradition in 2001 by permitting hardlink
1469 * entries to store valid bodies in pax interchange
1470 * format, but not in ustar format. Since there is no
1471 * hard and fast way to distinguish pax interchange
1472 * from earlier archives (the 'x' and 'g' entries are
1473 * optional, after all), we need a heuristic.
1474 */
1475 if (archive_entry_size(entry) == 0) {
1476 /* If the size is already zero, we're done. */
1477 } else if (a->archive.archive_format
1478 == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1479 /* Definitely pax extended; must obey hardlink size. */
1480 } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1481 || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1482 {
1483 /* Old-style or GNU tar: we must ignore the size. */
1484 archive_entry_set_size(entry, 0);
1485 tar->entry_bytes_remaining = 0;
1486 } else if (archive_read_format_tar_bid(a, 50) > 50) {
1487 /*
1488 * We don't know if it's pax: If the bid
1489 * function sees a valid ustar header
1490 * immediately following, then let's ignore
1491 * the hardlink size.
1492 */
1493 archive_entry_set_size(entry, 0);
1494 tar->entry_bytes_remaining = 0;
1495 }
1496 /*
1497 * TODO: There are still two cases I'd like to handle:
1498 * = a ustar non-pax archive with a hardlink entry at
1499 * end-of-archive. (Look for block of nulls following?)
1500 * = a pax archive that has not seen any pax headers
1501 * and has an entry which is a hardlink entry storing
1502 * a body containing an uncompressed tar archive.
1503 * The first is worth addressing; I don't see any reliable
1504 * way to deal with the second possibility.
1505 */
1506 break;
1507 case '2': /* Symlink */
1508 archive_entry_set_link_to_symlink(entry);
1509 existing_wcs_linkpath = archive_entry_symlink_w(entry);
1510 existing_linkpath = archive_entry_symlink(entry);
1511 if ((existing_linkpath == NULL || existing_linkpath[0] == '\0')
1512 && (existing_wcs_linkpath == NULL || existing_wcs_linkpath[0] == '\0')) {
1513 struct archive_string linkpath;
1514 archive_string_init(&linkpath);
1515 archive_strncpy(&linkpath,
1516 header->linkname, sizeof(header->linkname));
1517 if (archive_entry_copy_symlink_l(entry, linkpath.s,
1518 archive_strlen(&linkpath), tar->sconv) != 0) {
1519 err = set_conversion_failed_error(a, tar->sconv,
1520 "Linkname");
1521 if (err == ARCHIVE_FATAL) {
1522 archive_string_free(&linkpath);
1523 return (err);
1524 }
1525 }
1526 archive_string_free(&linkpath);
1527 }
1528 archive_entry_set_filetype(entry, AE_IFLNK);
1529 archive_entry_set_size(entry, 0);
1530 tar->entry_bytes_remaining = 0;
1531 break;
1532 case '3': /* Character device */
1533 archive_entry_set_filetype(entry, AE_IFCHR);
1534 archive_entry_set_size(entry, 0);
1535 tar->entry_bytes_remaining = 0;
1536 break;
1537 case '4': /* Block device */
1538 archive_entry_set_filetype(entry, AE_IFBLK);
1539 archive_entry_set_size(entry, 0);
1540 tar->entry_bytes_remaining = 0;
1541 break;
1542 case '5': /* Dir */
1543 archive_entry_set_filetype(entry, AE_IFDIR);
1544 archive_entry_set_size(entry, 0);
1545 tar->entry_bytes_remaining = 0;
1546 break;
1547 case '6': /* FIFO device */
1548 archive_entry_set_filetype(entry, AE_IFIFO);
1549 archive_entry_set_size(entry, 0);
1550 tar->entry_bytes_remaining = 0;
1551 break;
1552 case 'D': /* GNU incremental directory type */
1553 /*
1554 * No special handling is actually required here.
1555 * It might be nice someday to preprocess the file list and
1556 * provide it to the client, though.
1557 */
1558 archive_entry_set_filetype(entry, AE_IFDIR);
1559 break;
1560 case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1561 /*
1562 * As far as I can tell, this is just like a regular file
1563 * entry, except that the contents should be _appended_ to
1564 * the indicated file at the indicated offset. This may
1565 * require some API work to fully support.
1566 */
1567 break;
1568 case 'N': /* Old GNU "long filename" entry. */
1569 /* The body of this entry is a script for renaming
1570 * previously-extracted entries. Ugh. It will never
1571 * be supported by libarchive. */
1572 archive_entry_set_filetype(entry, AE_IFREG);
1573 break;
1574 case 'S': /* GNU sparse files */
1575 /*
1576 * Sparse files are really just regular files with
1577 * sparse information in the extended area.
1578 */
1579 /* FALLTHROUGH */
1580 case '0': /* ustar "regular" file */
1581 /* FALLTHROUGH */
1582 default: /* Non-standard file types */
1583 /*
1584 * Per POSIX: non-recognized types should always be
1585 * treated as regular files.
1586 */
1587 archive_entry_set_filetype(entry, AE_IFREG);
1588 break;
1589 }
1590 return (err);
1591 }
1592
1593 /*
1594 * Parse out header elements for "old-style" tar archives.
1595 */
1596 static int
header_old_tar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1597 header_old_tar(struct archive_read *a, struct tar *tar,
1598 struct archive_entry *entry, const void *h)
1599 {
1600 const struct archive_entry_header_ustar *header;
1601 int err = ARCHIVE_OK, err2;
1602
1603 /*
1604 * Copy filename over (to ensure null termination).
1605 * Skip if pathname was already set e.g. by header_gnu_longname()
1606 */
1607 header = (const struct archive_entry_header_ustar *)h;
1608
1609 const char *existing_pathname = archive_entry_pathname(entry);
1610 const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry);
1611 if ((existing_pathname == NULL || existing_pathname[0] == '\0')
1612 && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0') &&
1613 archive_entry_copy_pathname_l(entry,
1614 header->name, sizeof(header->name), tar->sconv) != 0) {
1615 err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1616 if (err == ARCHIVE_FATAL)
1617 return (err);
1618 }
1619
1620 /* Grab rest of common fields */
1621 err2 = header_common(a, tar, entry, h);
1622 if (err > err2)
1623 err = err2;
1624
1625 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1626 return (err);
1627 }
1628
1629 /*
1630 * Is this likely an AppleDouble extension?
1631 */
1632 static int
is_mac_metadata_entry(struct archive_entry * entry)1633 is_mac_metadata_entry(struct archive_entry *entry) {
1634 const char *p, *name;
1635 const wchar_t *wp, *wname;
1636
1637 wname = wp = archive_entry_pathname_w(entry);
1638 if (wp != NULL) {
1639 /* Find the last path element. */
1640 for (; *wp != L'\0'; ++wp) {
1641 if (wp[0] == '/' && wp[1] != L'\0')
1642 wname = wp + 1;
1643 }
1644 /*
1645 * If last path element starts with "._", then
1646 * this is a Mac extension.
1647 */
1648 if (wname[0] == L'.' && wname[1] == L'_' && wname[2] != L'\0')
1649 return 1;
1650 } else {
1651 /* Find the last path element. */
1652 name = p = archive_entry_pathname(entry);
1653 if (p == NULL)
1654 return (ARCHIVE_FAILED);
1655 for (; *p != '\0'; ++p) {
1656 if (p[0] == '/' && p[1] != '\0')
1657 name = p + 1;
1658 }
1659 /*
1660 * If last path element starts with "._", then
1661 * this is a Mac extension.
1662 */
1663 if (name[0] == '.' && name[1] == '_' && name[2] != '\0')
1664 return 1;
1665 }
1666 /* Not a mac extension */
1667 return 0;
1668 }
1669
1670 /*
1671 * Read a Mac AppleDouble-encoded blob of file metadata,
1672 * if there is one.
1673 *
1674 * TODO: In Libarchive 4, we should consider ripping this
1675 * out -- instead, return a file starting with `._` as
1676 * a regular file and let the client (or archive_write logic)
1677 * handle it.
1678 */
1679 static int
read_mac_metadata_blob(struct archive_read * a,struct archive_entry * entry,int64_t * unconsumed)1680 read_mac_metadata_blob(struct archive_read *a,
1681 struct archive_entry *entry, int64_t *unconsumed)
1682 {
1683 int64_t size;
1684 size_t msize;
1685 const void *data;
1686
1687 /* Read the body as a Mac OS metadata blob. */
1688 size = archive_entry_size(entry);
1689 msize = (size_t)size;
1690 if (size < 0 || (uintmax_t)msize != (uintmax_t)size) {
1691 *unconsumed = 0;
1692 return (ARCHIVE_FATAL);
1693 }
1694
1695 /* TODO: Should this merely skip the overlarge entry and
1696 * WARN? Or is xattr_limit sufficiently large that we can
1697 * safely assume anything larger is malicious? */
1698 if (size > (int64_t)xattr_limit) {
1699 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1700 "Oversized AppleDouble extension has size %llu > %llu",
1701 (unsigned long long)size,
1702 (unsigned long long)xattr_limit);
1703 return (ARCHIVE_FATAL);
1704 }
1705
1706 /*
1707 * TODO: Look beyond the body here to peek at the next header.
1708 * If it's a regular header (not an extension header)
1709 * that has the wrong name, just return the current
1710 * entry as-is, without consuming the body here.
1711 * That would reduce the risk of us mis-identifying
1712 * an ordinary file that just happened to have
1713 * a name starting with "._".
1714 *
1715 * Q: Is the above idea really possible? Even
1716 * when there are GNU or pax extension entries?
1717 */
1718 tar_flush_unconsumed(a, unconsumed);
1719 data = __archive_read_ahead(a, msize, NULL);
1720 if (data == NULL) {
1721 archive_set_error(&a->archive, EINVAL,
1722 "Truncated archive"
1723 " detected while reading macOS metadata");
1724 *unconsumed = 0;
1725 return (ARCHIVE_FATAL);
1726 }
1727 archive_entry_clear(entry);
1728 archive_entry_copy_mac_metadata(entry, data, msize);
1729 *unconsumed = (msize + 511) & ~ 511;
1730 return (ARCHIVE_OK);
1731 }
1732
1733 /*
1734 * Parse a file header for a pax extended archive entry.
1735 */
1736 static int
header_pax_global(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1737 header_pax_global(struct archive_read *a, struct tar *tar,
1738 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1739 {
1740 const struct archive_entry_header_ustar *header;
1741 int64_t size, to_consume;
1742
1743 (void)a; /* UNUSED */
1744 (void)tar; /* UNUSED */
1745 (void)entry; /* UNUSED */
1746
1747 header = (const struct archive_entry_header_ustar *)h;
1748 size = tar_atol(header->size, sizeof(header->size));
1749 if (size < 0 || size > entry_limit) {
1750 archive_set_error(&a->archive, EINVAL,
1751 "Special header has invalid size: %lld",
1752 (long long)size);
1753 return (ARCHIVE_FATAL);
1754 }
1755 to_consume = ((size + 511) & ~511);
1756 *unconsumed += to_consume;
1757 return (ARCHIVE_OK);
1758 }
1759
1760 /*
1761 * Parse a file header for a Posix "ustar" archive entry. This also
1762 * handles "pax" or "extended ustar" entries.
1763 *
1764 * In order to correctly handle pax attributes (which precede this),
1765 * we have to skip parsing any field for which the entry already has
1766 * contents.
1767 */
1768 static int
header_ustar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h)1769 header_ustar(struct archive_read *a, struct tar *tar,
1770 struct archive_entry *entry, const void *h)
1771 {
1772 const struct archive_entry_header_ustar *header;
1773 struct archive_string as;
1774 int err = ARCHIVE_OK, r;
1775
1776 header = (const struct archive_entry_header_ustar *)h;
1777
1778 /* Copy name into an internal buffer to ensure null-termination. */
1779 const char *existing_pathname = archive_entry_pathname(entry);
1780 const wchar_t *existing_wcs_pathname = archive_entry_pathname_w(entry);
1781 if ((existing_pathname == NULL || existing_pathname[0] == '\0')
1782 && (existing_wcs_pathname == NULL || existing_wcs_pathname[0] == '\0')) {
1783 archive_string_init(&as);
1784 if (header->prefix[0]) {
1785 archive_strncpy(&as, header->prefix, sizeof(header->prefix));
1786 if (as.s[archive_strlen(&as) - 1] != '/')
1787 archive_strappend_char(&as, '/');
1788 archive_strncat(&as, header->name, sizeof(header->name));
1789 } else {
1790 archive_strncpy(&as, header->name, sizeof(header->name));
1791 }
1792 if (archive_entry_copy_pathname_l(entry, as.s, archive_strlen(&as),
1793 tar->sconv) != 0) {
1794 err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1795 if (err == ARCHIVE_FATAL)
1796 return (err);
1797 }
1798 archive_string_free(&as);
1799 }
1800
1801 /* Handle rest of common fields. */
1802 r = header_common(a, tar, entry, h);
1803 if (r == ARCHIVE_FATAL)
1804 return (r);
1805 if (r < err)
1806 err = r;
1807
1808 /* Handle POSIX ustar fields. */
1809 const char *existing_uname = archive_entry_uname(entry);
1810 if (existing_uname == NULL || existing_uname[0] == '\0') {
1811 if (archive_entry_copy_uname_l(entry,
1812 header->uname, sizeof(header->uname), tar->sconv) != 0) {
1813 err = set_conversion_failed_error(a, tar->sconv, "Uname");
1814 if (err == ARCHIVE_FATAL)
1815 return (err);
1816 }
1817 }
1818
1819 const char *existing_gname = archive_entry_gname(entry);
1820 if (existing_gname == NULL || existing_gname[0] == '\0') {
1821 if (archive_entry_copy_gname_l(entry,
1822 header->gname, sizeof(header->gname), tar->sconv) != 0) {
1823 err = set_conversion_failed_error(a, tar->sconv, "Gname");
1824 if (err == ARCHIVE_FATAL)
1825 return (err);
1826 }
1827 }
1828
1829 /* Parse out device numbers only for char and block specials. */
1830 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1831 if (!archive_entry_rdev_is_set(entry)) {
1832 archive_entry_set_rdevmajor(entry, (dev_t)
1833 tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1834 archive_entry_set_rdevminor(entry, (dev_t)
1835 tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1836 }
1837 } else {
1838 archive_entry_set_rdev(entry, 0);
1839 }
1840
1841 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1842
1843 return (err);
1844 }
1845
1846 static int
header_pax_extension(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)1847 header_pax_extension(struct archive_read *a, struct tar *tar,
1848 struct archive_entry *entry, const void *h, int64_t *unconsumed)
1849 {
1850 /* Sanity checks: The largest `x` body I've ever heard of was
1851 * a little over 4MB. So I doubt there has ever been a
1852 * well-formed archive with an `x` body over 1GiB. Similarly,
1853 * it seems plausible that no single attribute has ever been
1854 * larger than 100MB. So if we see a larger value here, it's
1855 * almost certainly a sign of a corrupted/malicious archive. */
1856
1857 /* Maximum sane size for extension body: 1 GiB */
1858 /* This cannot be raised to larger than 8GiB without
1859 * exceeding the maximum size for a standard ustar
1860 * entry. */
1861 const int64_t ext_size_limit = 1024 * 1024 * (int64_t)1024;
1862 /* Maximum size for a single line/attr: 100 million characters */
1863 /* This cannot be raised to more than 2GiB without exceeding
1864 * a `size_t` on 32-bit platforms. */
1865 const size_t max_parsed_line_length = 99999999ULL;
1866 /* Largest attribute prolog: size + name. */
1867 const size_t max_size_name = 512;
1868
1869 /* Size and padding of the full extension body */
1870 int64_t ext_size, ext_padding;
1871 size_t line_length, value_length, name_length;
1872 ssize_t to_read, did_read;
1873 const struct archive_entry_header_ustar *header;
1874 const char *p, *attr_start, *name_start;
1875 struct archive_string_conv *sconv;
1876 struct archive_string *pas = NULL;
1877 struct archive_string attr_name;
1878 int err = ARCHIVE_OK, r;
1879
1880 header = (const struct archive_entry_header_ustar *)h;
1881 ext_size = tar_atol(header->size, sizeof(header->size));
1882 if (ext_size > entry_limit) {
1883 return (ARCHIVE_FATAL);
1884 }
1885 if (ext_size < 0) {
1886 archive_set_error(&a->archive, EINVAL,
1887 "pax extension header has invalid size: %lld",
1888 (long long)ext_size);
1889 return (ARCHIVE_FATAL);
1890 }
1891
1892 ext_padding = 0x1ff & (-ext_size);
1893 if (ext_size > ext_size_limit) {
1894 /* Consume the pax extension body and return an error */
1895 if (ext_size + ext_padding != __archive_read_consume(a, ext_size + ext_padding)) {
1896 return (ARCHIVE_FATAL);
1897 }
1898 archive_set_error(&a->archive, EINVAL,
1899 "Ignoring oversized pax extensions: %lld > %lld",
1900 (long long)ext_size, (long long)ext_size_limit);
1901 return (ARCHIVE_WARN);
1902 }
1903 tar_flush_unconsumed(a, unconsumed);
1904
1905 /* Parse the size/name of each pax attribute in the body */
1906 archive_string_init(&attr_name);
1907 while (ext_size > 0) {
1908 /* Read enough bytes to parse the size/name of the next attribute */
1909 to_read = max_size_name;
1910 if (to_read > ext_size) {
1911 to_read = ext_size;
1912 }
1913 p = __archive_read_ahead(a, to_read, &did_read);
1914 if (p == NULL) { /* EOF */
1915 archive_set_error(&a->archive, EINVAL,
1916 "Truncated tar archive"
1917 " detected while reading pax attribute name");
1918 return (ARCHIVE_FATAL);
1919 }
1920 if (did_read > ext_size) {
1921 did_read = ext_size;
1922 }
1923
1924 /* Parse size of attribute */
1925 line_length = 0;
1926 attr_start = p;
1927 while (1) {
1928 if (p >= attr_start + did_read) {
1929 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1930 "Ignoring malformed pax attributes: overlarge attribute size field");
1931 *unconsumed += ext_size + ext_padding;
1932 return (ARCHIVE_WARN);
1933 }
1934 if (*p == ' ') {
1935 p++;
1936 break;
1937 }
1938 if (*p < '0' || *p > '9') {
1939 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1940 "Ignoring malformed pax attributes: malformed attribute size field");
1941 *unconsumed += ext_size + ext_padding;
1942 return (ARCHIVE_WARN);
1943 }
1944 line_length *= 10;
1945 line_length += *p - '0';
1946 if (line_length > max_parsed_line_length) {
1947 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1948 "Ignoring malformed pax attribute: size > %lld",
1949 (long long)max_parsed_line_length);
1950 *unconsumed += ext_size + ext_padding;
1951 return (ARCHIVE_WARN);
1952 }
1953 p++;
1954 }
1955
1956 if ((int64_t)line_length > ext_size) {
1957 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1958 "Ignoring malformed pax attribute: %lld > %lld",
1959 (long long)line_length, (long long)ext_size);
1960 *unconsumed += ext_size + ext_padding;
1961 return (ARCHIVE_WARN);
1962 }
1963
1964 /* Parse name of attribute */
1965 if (p >= attr_start + did_read
1966 || p >= attr_start + line_length
1967 || *p == '=') {
1968 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1969 "Ignoring malformed pax attributes: empty name found");
1970 *unconsumed += ext_size + ext_padding;
1971 return (ARCHIVE_WARN);
1972 }
1973 name_start = p;
1974 while (1) {
1975 if (p >= attr_start + did_read || p >= attr_start + line_length) {
1976 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1977 "Ignoring malformed pax attributes: overlarge attribute name");
1978 *unconsumed += ext_size + ext_padding;
1979 return (ARCHIVE_WARN);
1980 }
1981 if (*p == '=') {
1982 break;
1983 }
1984 p++;
1985 }
1986 name_length = p - name_start;
1987 p++; // Skip '='
1988
1989 // Save the name before we consume it
1990 archive_strncpy(&attr_name, name_start, name_length);
1991
1992 ext_size -= p - attr_start;
1993 value_length = line_length - (p - attr_start);
1994
1995 /* Consume size, name, and `=` */
1996 *unconsumed += p - attr_start;
1997 tar_flush_unconsumed(a, unconsumed);
1998
1999 if (value_length == 0) {
2000 archive_set_error(&a->archive, EINVAL,
2001 "Malformed pax attributes");
2002 *unconsumed += ext_size + ext_padding;
2003 return (ARCHIVE_WARN);
2004 }
2005
2006 /* pax_attribute will consume value_length - 1 */
2007 r = pax_attribute(a, tar, entry, attr_name.s, archive_strlen(&attr_name), value_length - 1, unconsumed);
2008 ext_size -= value_length - 1;
2009
2010 // Release the allocated attr_name (either here or before every return in this function)
2011 archive_string_free(&attr_name);
2012
2013 if (r < ARCHIVE_WARN) {
2014 *unconsumed += ext_size + ext_padding;
2015 return (r);
2016 }
2017 err = err_combine(err, r);
2018
2019 /* Consume the `\n` that follows the pax attribute value. */
2020 tar_flush_unconsumed(a, unconsumed);
2021 p = __archive_read_ahead(a, 1, &did_read);
2022 if (p == NULL) {
2023 archive_set_error(&a->archive, EINVAL,
2024 "Truncated tar archive"
2025 " detected while completing pax attribute");
2026 return (ARCHIVE_FATAL);
2027 }
2028 if (p[0] != '\n') {
2029 archive_set_error(&a->archive, EINVAL,
2030 "Malformed pax attributes");
2031 *unconsumed += ext_size + ext_padding;
2032 return (ARCHIVE_WARN);
2033 }
2034 ext_size -= 1;
2035 *unconsumed += 1;
2036 tar_flush_unconsumed(a, unconsumed);
2037 }
2038 *unconsumed += ext_size + ext_padding;
2039
2040 /*
2041 * Some PAX values -- pathname, linkpath, uname, gname --
2042 * can't be copied into the entry until we know the character
2043 * set to use:
2044 */
2045 if (!tar->pax_hdrcharset_utf8)
2046 /* PAX specified "BINARY", so use the default charset */
2047 sconv = tar->opt_sconv;
2048 else {
2049 /* PAX default UTF-8 */
2050 sconv = archive_string_conversion_from_charset(
2051 &(a->archive), "UTF-8", 1);
2052 if (sconv == NULL)
2053 return (ARCHIVE_FATAL);
2054 if (tar->compat_2x)
2055 archive_string_conversion_set_opt(sconv,
2056 SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
2057 }
2058
2059 /* Pathname */
2060 pas = NULL;
2061 if (archive_strlen(&(tar->entry_pathname_override)) > 0) {
2062 /* Prefer GNU.sparse.name attribute if present */
2063 /* GNU sparse files store a fake name under the standard
2064 * "pathname" key. */
2065 pas = &(tar->entry_pathname_override);
2066 } else if (archive_strlen(&(tar->entry_pathname)) > 0) {
2067 /* Use standard "pathname" PAX extension */
2068 pas = &(tar->entry_pathname);
2069 }
2070 if (pas != NULL) {
2071 if (archive_entry_copy_pathname_l(entry, pas->s,
2072 archive_strlen(pas), sconv) != 0) {
2073 err = set_conversion_failed_error(a, sconv, "Pathname");
2074 if (err == ARCHIVE_FATAL)
2075 return (err);
2076 /* Use raw name without conversion */
2077 archive_entry_copy_pathname(entry, pas->s);
2078 }
2079 }
2080 /* Uname */
2081 if (archive_strlen(&(tar->entry_uname)) > 0) {
2082 if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
2083 archive_strlen(&(tar->entry_uname)), sconv) != 0) {
2084 err = set_conversion_failed_error(a, sconv, "Uname");
2085 if (err == ARCHIVE_FATAL)
2086 return (err);
2087 /* Use raw name without conversion */
2088 archive_entry_copy_uname(entry, tar->entry_uname.s);
2089 }
2090 }
2091 /* Gname */
2092 if (archive_strlen(&(tar->entry_gname)) > 0) {
2093 if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
2094 archive_strlen(&(tar->entry_gname)), sconv) != 0) {
2095 err = set_conversion_failed_error(a, sconv, "Gname");
2096 if (err == ARCHIVE_FATAL)
2097 return (err);
2098 /* Use raw name without conversion */
2099 archive_entry_copy_gname(entry, tar->entry_gname.s);
2100 }
2101 }
2102 /* Linkpath */
2103 if (archive_strlen(&(tar->entry_linkpath)) > 0) {
2104 if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
2105 archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
2106 err = set_conversion_failed_error(a, sconv, "Linkpath");
2107 if (err == ARCHIVE_FATAL)
2108 return (err);
2109 /* Use raw name without conversion */
2110 archive_entry_copy_link(entry, tar->entry_linkpath.s);
2111 }
2112 }
2113
2114 /* Extension may have given us a corrected `entry_bytes_remaining` for
2115 * the main entry; update the padding appropriately. */
2116 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2117 return (err);
2118 }
2119
2120 static int
pax_attribute_LIBARCHIVE_xattr(struct archive_entry * entry,const char * name,size_t name_length,const char * value,size_t value_length)2121 pax_attribute_LIBARCHIVE_xattr(struct archive_entry *entry,
2122 const char *name, size_t name_length, const char *value, size_t value_length)
2123 {
2124 char *name_decoded;
2125 void *value_decoded;
2126 size_t value_len;
2127
2128 if (name_length < 1)
2129 return 3;
2130
2131 /* URL-decode name */
2132 name_decoded = url_decode(name, name_length);
2133 if (name_decoded == NULL)
2134 return 2;
2135
2136 /* Base-64 decode value */
2137 value_decoded = base64_decode(value, value_length, &value_len);
2138 if (value_decoded == NULL) {
2139 free(name_decoded);
2140 return 1;
2141 }
2142
2143 archive_entry_xattr_add_entry(entry, name_decoded,
2144 value_decoded, value_len);
2145
2146 free(name_decoded);
2147 free(value_decoded);
2148 return 0;
2149 }
2150
2151 static int
pax_attribute_SCHILY_xattr(struct archive_entry * entry,const char * name,size_t name_length,const char * value,size_t value_length)2152 pax_attribute_SCHILY_xattr(struct archive_entry *entry,
2153 const char *name, size_t name_length, const char *value, size_t value_length)
2154 {
2155 if (name_length < 1 || name_length > 128) {
2156 return 1;
2157 }
2158
2159 char * null_terminated_name = malloc(name_length + 1);
2160 if (null_terminated_name != NULL) {
2161 memcpy(null_terminated_name, name, name_length);
2162 null_terminated_name[name_length] = '\0';
2163 archive_entry_xattr_add_entry(entry, null_terminated_name, value, value_length);
2164 free(null_terminated_name);
2165 }
2166
2167 return 0;
2168 }
2169
2170 static int
pax_attribute_RHT_security_selinux(struct archive_entry * entry,const char * value,size_t value_length)2171 pax_attribute_RHT_security_selinux(struct archive_entry *entry,
2172 const char *value, size_t value_length)
2173 {
2174 archive_entry_xattr_add_entry(entry, "security.selinux",
2175 value, value_length);
2176
2177 return 0;
2178 }
2179
2180 static int
pax_attribute_SCHILY_acl(struct archive_read * a,struct tar * tar,struct archive_entry * entry,size_t value_length,int type)2181 pax_attribute_SCHILY_acl(struct archive_read *a, struct tar *tar,
2182 struct archive_entry *entry, size_t value_length, int type)
2183 {
2184 int r;
2185 const char *p;
2186 const char* errstr;
2187
2188 switch (type) {
2189 case ARCHIVE_ENTRY_ACL_TYPE_ACCESS:
2190 errstr = "SCHILY.acl.access";
2191 break;
2192 case ARCHIVE_ENTRY_ACL_TYPE_DEFAULT:
2193 errstr = "SCHILY.acl.default";
2194 break;
2195 case ARCHIVE_ENTRY_ACL_TYPE_NFS4:
2196 errstr = "SCHILY.acl.ace";
2197 break;
2198 default:
2199 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2200 "Unknown ACL type: %d", type);
2201 return(ARCHIVE_FATAL);
2202 }
2203
2204 if (tar->sconv_acl == NULL) {
2205 tar->sconv_acl =
2206 archive_string_conversion_from_charset(
2207 &(a->archive), "UTF-8", 1);
2208 if (tar->sconv_acl == NULL)
2209 return (ARCHIVE_FATAL);
2210 }
2211
2212 if (value_length > acl_limit) {
2213 __archive_read_consume(a, value_length);
2214 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2215 "Unreasonably large ACL: %llu > %llu",
2216 (unsigned long long)value_length,
2217 (unsigned long long)acl_limit);
2218 return (ARCHIVE_WARN);
2219 }
2220
2221 p = __archive_read_ahead(a, value_length, NULL);
2222 if (p == NULL) {
2223 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2224 "Truncated tar archive "
2225 "detected while reading ACL data");
2226 return (ARCHIVE_FATAL);
2227 }
2228
2229 r = archive_acl_from_text_nl(archive_entry_acl(entry), p, value_length,
2230 type, tar->sconv_acl);
2231 __archive_read_consume(a, value_length);
2232 /* Workaround: Force perm_is_set() to be correct */
2233 /* If this bit were stored in the ACL, this wouldn't be needed */
2234 archive_entry_set_perm(entry, archive_entry_perm(entry));
2235 if (r != ARCHIVE_OK) {
2236 if (r == ARCHIVE_FATAL) {
2237 archive_set_error(&a->archive, ENOMEM,
2238 "%s %s", "Can't allocate memory for ",
2239 errstr);
2240 return (r);
2241 }
2242 archive_set_error(&a->archive,
2243 ARCHIVE_ERRNO_MISC, "%s %s", "Parse error: ", errstr);
2244 }
2245 return (r);
2246 }
2247
2248 static int
pax_attribute_read_time(struct archive_read * a,size_t value_length,int64_t * ps,long * pn,int64_t * unconsumed)2249 pax_attribute_read_time(struct archive_read *a, size_t value_length, int64_t *ps, long *pn, int64_t *unconsumed) {
2250 struct archive_string as;
2251 int r;
2252
2253 if (value_length > 128) {
2254 __archive_read_consume(a, value_length);
2255 *ps = 0;
2256 *pn = 0;
2257 return (ARCHIVE_FATAL);
2258 }
2259
2260 archive_string_init(&as);
2261 r = read_bytes_to_string(a, &as, value_length, unconsumed);
2262 if (r < ARCHIVE_OK) {
2263 archive_string_free(&as);
2264 *ps = 0;
2265 *pn = 0;
2266 return (r);
2267 }
2268
2269 pax_time(as.s, archive_strlen(&as), ps, pn);
2270 archive_string_free(&as);
2271 if (*ps == INT64_MIN) {
2272 *ps = 0;
2273 *pn = 0;
2274 return (ARCHIVE_WARN);
2275 }
2276 return (ARCHIVE_OK);
2277 }
2278
2279 static int
pax_attribute_read_number(struct archive_read * a,size_t value_length,int64_t * result)2280 pax_attribute_read_number(struct archive_read *a, size_t value_length, int64_t *result) {
2281 struct archive_string as;
2282 int64_t unconsumed = 0;
2283 int r;
2284
2285 if (value_length > 64) {
2286 __archive_read_consume(a, value_length);
2287 *result = 0;
2288 return (ARCHIVE_FATAL);
2289 }
2290
2291 archive_string_init(&as);
2292 r = read_bytes_to_string(a, &as, value_length, &unconsumed);
2293 tar_flush_unconsumed(a, &unconsumed);
2294 if (r < ARCHIVE_OK) {
2295 archive_string_free(&as);
2296 *result = 0;
2297 return (r);
2298 }
2299
2300 *result = tar_atol10(as.s, archive_strlen(&as));
2301 archive_string_free(&as);
2302 if (*result < 0 || *result == INT64_MAX) {
2303 *result = INT64_MAX;
2304 return (ARCHIVE_WARN);
2305 }
2306 return (ARCHIVE_OK);
2307 }
2308
2309 /*
2310 * Parse a single key=value attribute.
2311 *
2312 * POSIX reserves all-lowercase keywords. Vendor-specific extensions
2313 * should always have keywords of the form "VENDOR.attribute" In
2314 * particular, it's quite feasible to support many different vendor
2315 * extensions here. I'm using "LIBARCHIVE" for extensions unique to
2316 * this library.
2317 *
2318 * TODO: Investigate other vendor-specific extensions and see if
2319 * any of them look useful.
2320 */
2321 static int
pax_attribute(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const char * key,size_t key_length,size_t value_length,int64_t * unconsumed)2322 pax_attribute(struct archive_read *a, struct tar *tar, struct archive_entry *entry,
2323 const char *key, size_t key_length, size_t value_length, int64_t *unconsumed)
2324 {
2325 int64_t t;
2326 long n;
2327 const char *p;
2328 ssize_t bytes_read;
2329 int err = ARCHIVE_OK;
2330
2331 switch (key[0]) {
2332 case 'G':
2333 /* GNU.* extensions */
2334 if (key_length > 4 && memcmp(key, "GNU.", 4) == 0) {
2335 key += 4;
2336 key_length -= 4;
2337
2338 /* GNU.sparse marks the existence of GNU sparse information */
2339 if (key_length == 6 && memcmp(key, "sparse", 6) == 0) {
2340 tar->sparse_gnu_attributes_seen = 1;
2341 }
2342
2343 /* GNU.sparse.* extensions */
2344 else if (key_length > 7 && memcmp(key, "sparse.", 7) == 0) {
2345 tar->sparse_gnu_attributes_seen = 1;
2346 key += 7;
2347 key_length -= 7;
2348
2349 /* GNU "0.0" sparse pax format. */
2350 if (key_length == 9 && memcmp(key, "numblocks", 9) == 0) {
2351 /* GNU.sparse.numblocks */
2352 tar->sparse_offset = -1;
2353 tar->sparse_numbytes = -1;
2354 tar->sparse_gnu_major = 0;
2355 tar->sparse_gnu_minor = 0;
2356 }
2357 else if (key_length == 6 && memcmp(key, "offset", 6) == 0) {
2358 /* GNU.sparse.offset */
2359 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2360 tar->sparse_offset = t;
2361 if (tar->sparse_numbytes != -1) {
2362 if (gnu_add_sparse_entry(a, tar,
2363 tar->sparse_offset, tar->sparse_numbytes)
2364 != ARCHIVE_OK)
2365 return (ARCHIVE_FATAL);
2366 tar->sparse_offset = -1;
2367 tar->sparse_numbytes = -1;
2368 }
2369 }
2370 return (err);
2371 }
2372 else if (key_length == 8 && memcmp(key, "numbytes", 8) == 0) {
2373 /* GNU.sparse.numbytes */
2374 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2375 tar->sparse_numbytes = t;
2376 if (tar->sparse_offset != -1) {
2377 if (gnu_add_sparse_entry(a, tar,
2378 tar->sparse_offset, tar->sparse_numbytes)
2379 != ARCHIVE_OK)
2380 return (ARCHIVE_FATAL);
2381 tar->sparse_offset = -1;
2382 tar->sparse_numbytes = -1;
2383 }
2384 }
2385 return (err);
2386 }
2387 else if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2388 /* GNU.sparse.size */
2389 /* This is either the size of stored entry OR the size of data on disk,
2390 * depending on which GNU sparse format version is in use.
2391 * Since pax attributes can be in any order, we may not actually
2392 * know at this point how to interpret this. */
2393 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2394 tar->GNU_sparse_size = t;
2395 tar->size_fields |= TAR_SIZE_GNU_SPARSE_SIZE;
2396 }
2397 return (err);
2398 }
2399
2400 /* GNU "0.1" sparse pax format. */
2401 else if (key_length == 3 && memcmp(key, "map", 3) == 0) {
2402 /* GNU.sparse.map */
2403 tar->sparse_gnu_major = 0;
2404 tar->sparse_gnu_minor = 1;
2405 if (value_length > sparse_map_limit) {
2406 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2407 "Unreasonably large sparse map: %llu > %llu",
2408 (unsigned long long)value_length,
2409 (unsigned long long)sparse_map_limit);
2410 err = ARCHIVE_FAILED;
2411 } else {
2412 p = __archive_read_ahead(a, value_length, &bytes_read);
2413 if (p == NULL) {
2414 archive_set_error(&a->archive, EINVAL,
2415 "Truncated archive"
2416 " detected while reading GNU sparse data");
2417 return (ARCHIVE_FATAL);
2418 }
2419 if (gnu_sparse_01_parse(a, tar, p, value_length) != ARCHIVE_OK) {
2420 err = ARCHIVE_WARN;
2421 }
2422 }
2423 __archive_read_consume(a, value_length);
2424 return (err);
2425 }
2426
2427 /* GNU "1.0" sparse pax format */
2428 else if (key_length == 5 && memcmp(key, "major", 5) == 0) {
2429 /* GNU.sparse.major */
2430 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2431 && t >= 0
2432 && t <= 10) {
2433 tar->sparse_gnu_major = (int)t;
2434 }
2435 return (err);
2436 }
2437 else if (key_length == 5 && memcmp(key, "minor", 5) == 0) {
2438 /* GNU.sparse.minor */
2439 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK
2440 && t >= 0
2441 && t <= 10) {
2442 tar->sparse_gnu_minor = (int)t;
2443 }
2444 return (err);
2445 }
2446 else if (key_length == 4 && memcmp(key, "name", 4) == 0) {
2447 /* GNU.sparse.name */
2448 /*
2449 * The real filename; when storing sparse
2450 * files, GNU tar puts a synthesized name into
2451 * the regular 'path' attribute in an attempt
2452 * to limit confusion. ;-)
2453 */
2454 if (value_length > pathname_limit) {
2455 *unconsumed += value_length;
2456 err = ARCHIVE_WARN;
2457 } else {
2458 err = read_bytes_to_string(a, &(tar->entry_pathname_override),
2459 value_length, unconsumed);
2460 }
2461 return (err);
2462 }
2463 else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2464 /* GNU.sparse.realsize = size of file on disk */
2465 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2466 tar->GNU_sparse_realsize = t;
2467 tar->size_fields |= TAR_SIZE_GNU_SPARSE_REALSIZE;
2468 }
2469 return (err);
2470 }
2471 }
2472 }
2473 break;
2474 case 'L':
2475 /* LIBARCHIVE extensions */
2476 if (key_length > 11 && memcmp(key, "LIBARCHIVE.", 11) == 0) {
2477 key_length -= 11;
2478 key += 11;
2479
2480 /* TODO: Handle arbitrary extended attributes... */
2481 /*
2482 if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
2483 archive_entry_set_xxxxxx(entry, value);
2484 */
2485 if (key_length == 12 && memcmp(key, "creationtime", 12) == 0) {
2486 /* LIBARCHIVE.creationtime */
2487 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2488 archive_entry_set_birthtime(entry, t, n);
2489 }
2490 return (err);
2491 }
2492 else if (key_length == 11 && memcmp(key, "symlinktype", 11) == 0) {
2493 /* LIBARCHIVE.symlinktype */
2494 if (value_length < 16) {
2495 p = __archive_read_ahead(a, value_length, &bytes_read);
2496 if (p == NULL) {
2497 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2498 "Truncated tar archive "
2499 "detected while reading `symlinktype` attribute");
2500 return (ARCHIVE_FATAL);
2501 }
2502 if (value_length == 4 && memcmp(p, "file", 4) == 0) {
2503 archive_entry_set_symlink_type(entry,
2504 AE_SYMLINK_TYPE_FILE);
2505 } else if (value_length == 3 && memcmp(p, "dir", 3) == 0) {
2506 archive_entry_set_symlink_type(entry,
2507 AE_SYMLINK_TYPE_DIRECTORY);
2508 } else {
2509 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2510 "Unrecognized symlink type");
2511 err = ARCHIVE_WARN;
2512 }
2513 } else {
2514 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2515 "symlink type is very long"
2516 "(longest recognized value is 4 bytes, this is %llu)",
2517 (unsigned long long)value_length);
2518 err = ARCHIVE_WARN;
2519 }
2520 __archive_read_consume(a, value_length);
2521 return (err);
2522 }
2523 else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2524 key_length -= 6;
2525 key += 6;
2526 if (value_length > xattr_limit) {
2527 err = ARCHIVE_WARN;
2528 } else {
2529 p = __archive_read_ahead(a, value_length, &bytes_read);
2530 if (p == NULL) {
2531 archive_set_error(&a->archive, EINVAL,
2532 "Truncated archive"
2533 " detected while reading xattr information");
2534 return (ARCHIVE_FATAL);
2535 }
2536 if (pax_attribute_LIBARCHIVE_xattr(entry, key, key_length, p, value_length)) {
2537 /* TODO: Unable to parse xattr */
2538 err = ARCHIVE_WARN;
2539 }
2540 }
2541 __archive_read_consume(a, value_length);
2542 return (err);
2543 }
2544 }
2545 break;
2546 case 'R':
2547 /* GNU tar uses RHT.security header to store SELinux xattrs
2548 * SCHILY.xattr.security.selinux == RHT.security.selinux */
2549 if (key_length == 20 && memcmp(key, "RHT.security.selinux", 20) == 0) {
2550 if (value_length > xattr_limit) {
2551 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2552 "Ignoring unreasonably large security.selinux attribute:"
2553 " %llu > %llu",
2554 (unsigned long long)value_length,
2555 (unsigned long long)xattr_limit);
2556 /* TODO: Should this be FAILED instead? */
2557 err = ARCHIVE_WARN;
2558 } else {
2559 p = __archive_read_ahead(a, value_length, &bytes_read);
2560 if (p == NULL) {
2561 archive_set_error(&a->archive, EINVAL,
2562 "Truncated archive"
2563 " detected while reading selinux data");
2564 return (ARCHIVE_FATAL);
2565 }
2566 if (pax_attribute_RHT_security_selinux(entry, p, value_length)) {
2567 /* TODO: Unable to parse xattr */
2568 err = ARCHIVE_WARN;
2569 }
2570 }
2571 __archive_read_consume(a, value_length);
2572 return (err);
2573 }
2574 break;
2575 case 'S':
2576 /* SCHILY.* extensions used by "star" archiver */
2577 if (key_length > 7 && memcmp(key, "SCHILY.", 7) == 0) {
2578 key_length -= 7;
2579 key += 7;
2580
2581 if (key_length == 10 && memcmp(key, "acl.access", 10) == 0) {
2582 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2583 ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
2584 // TODO: Mark mode as set
2585 return (err);
2586 }
2587 else if (key_length == 11 && memcmp(key, "acl.default", 11) == 0) {
2588 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2589 ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
2590 return (err);
2591 }
2592 else if (key_length == 7 && memcmp(key, "acl.ace", 7) == 0) {
2593 err = pax_attribute_SCHILY_acl(a, tar, entry, value_length,
2594 ARCHIVE_ENTRY_ACL_TYPE_NFS4);
2595 // TODO: Mark mode as set
2596 return (err);
2597 }
2598 else if (key_length == 8 && memcmp(key, "devmajor", 8) == 0) {
2599 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2600 archive_entry_set_rdevmajor(entry, (dev_t)t);
2601 }
2602 return (err);
2603 }
2604 else if (key_length == 8 && memcmp(key, "devminor", 8) == 0) {
2605 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2606 archive_entry_set_rdevminor(entry, (dev_t)t);
2607 }
2608 return (err);
2609 }
2610 else if (key_length == 6 && memcmp(key, "fflags", 6) == 0) {
2611 if (value_length < fflags_limit) {
2612 p = __archive_read_ahead(a, value_length, &bytes_read);
2613 if (p == NULL) {
2614 /* Truncated archive */
2615 archive_set_error(&a->archive, EINVAL,
2616 "Truncated archive"
2617 " detected while reading SCHILY.fflags");
2618 return (ARCHIVE_FATAL);
2619 }
2620 archive_entry_copy_fflags_text_len(entry, p, value_length);
2621 err = ARCHIVE_OK;
2622 } else {
2623 /* Overlong fflags field */
2624 err = ARCHIVE_WARN;
2625 }
2626 __archive_read_consume(a, value_length);
2627 return (err);
2628 }
2629 else if (key_length == 3 && memcmp(key, "dev", 3) == 0) {
2630 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2631 archive_entry_set_dev(entry, (dev_t)t);
2632 }
2633 return (err);
2634 }
2635 else if (key_length == 3 && memcmp(key, "ino", 3) == 0) {
2636 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2637 archive_entry_set_ino(entry, t);
2638 }
2639 return (err);
2640 }
2641 else if (key_length == 5 && memcmp(key, "nlink", 5) == 0) {
2642 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2643 archive_entry_set_nlink(entry, (unsigned int)t);
2644 }
2645 return (err);
2646 }
2647 else if (key_length == 8 && memcmp(key, "realsize", 8) == 0) {
2648 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2649 tar->SCHILY_sparse_realsize = t;
2650 tar->size_fields |= TAR_SIZE_SCHILY_SPARSE_REALSIZE;
2651 }
2652 return (err);
2653 }
2654 /* TODO: Is there a SCHILY.sparse.size similar to GNU.sparse.size ? */
2655 else if (key_length > 6 && memcmp(key, "xattr.", 6) == 0) {
2656 key_length -= 6;
2657 key += 6;
2658 if (value_length < xattr_limit) {
2659 p = __archive_read_ahead(a, value_length, &bytes_read);
2660 if (p == NULL) {
2661 archive_set_error(&a->archive, EINVAL,
2662 "Truncated archive"
2663 " detected while reading SCHILY.xattr");
2664 return (ARCHIVE_FATAL);
2665 }
2666 if (pax_attribute_SCHILY_xattr(entry, key, key_length, p, value_length)) {
2667 /* TODO: Unable to parse xattr */
2668 err = ARCHIVE_WARN;
2669 }
2670 } else {
2671 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2672 "Unreasonably large xattr: %llu > %llu",
2673 (unsigned long long)value_length,
2674 (unsigned long long)xattr_limit);
2675 err = ARCHIVE_WARN;
2676 }
2677 __archive_read_consume(a, value_length);
2678 return (err);
2679 }
2680 }
2681 /* SUN.* extensions from Solaris tar */
2682 if (key_length > 4 && memcmp(key, "SUN.", 4) == 0) {
2683 key_length -= 4;
2684 key += 4;
2685
2686 if (key_length == 9 && memcmp(key, "holesdata", 9) == 0) {
2687 /* SUN.holesdata */
2688 if (value_length < sparse_map_limit) {
2689 p = __archive_read_ahead(a, value_length, &bytes_read);
2690 if (p == NULL) {
2691 archive_set_error(&a->archive, EINVAL,
2692 "Truncated archive"
2693 " detected while reading SUN.holesdata");
2694 return (ARCHIVE_FATAL);
2695 }
2696 err = pax_attribute_SUN_holesdata(a, tar, entry, p, value_length);
2697 if (err < ARCHIVE_OK) {
2698 archive_set_error(&a->archive,
2699 ARCHIVE_ERRNO_MISC,
2700 "Parse error: SUN.holesdata");
2701 }
2702 } else {
2703 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
2704 "Unreasonably large sparse map: %llu > %llu",
2705 (unsigned long long)value_length,
2706 (unsigned long long)sparse_map_limit);
2707 err = ARCHIVE_FAILED;
2708 }
2709 __archive_read_consume(a, value_length);
2710 return (err);
2711 }
2712 }
2713 break;
2714 case 'a':
2715 if (key_length == 5 && memcmp(key, "atime", 5) == 0) {
2716 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2717 archive_entry_set_atime(entry, t, n);
2718 }
2719 return (err);
2720 }
2721 break;
2722 case 'c':
2723 if (key_length == 5 && memcmp(key, "ctime", 5) == 0) {
2724 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2725 archive_entry_set_ctime(entry, t, n);
2726 }
2727 return (err);
2728 } else if (key_length == 7 && memcmp(key, "charset", 7) == 0) {
2729 /* TODO: Publish charset information in entry. */
2730 } else if (key_length == 7 && memcmp(key, "comment", 7) == 0) {
2731 /* TODO: Publish comment in entry. */
2732 }
2733 break;
2734 case 'g':
2735 if (key_length == 3 && memcmp(key, "gid", 3) == 0) {
2736 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2737 archive_entry_set_gid(entry, t);
2738 }
2739 return (err);
2740 } else if (key_length == 5 && memcmp(key, "gname", 5) == 0) {
2741 if (value_length > guname_limit) {
2742 *unconsumed += value_length;
2743 err = ARCHIVE_WARN;
2744 } else {
2745 err = read_bytes_to_string(a, &(tar->entry_gname), value_length, unconsumed);
2746 }
2747 return (err);
2748 }
2749 break;
2750 case 'h':
2751 if (key_length == 10 && memcmp(key, "hdrcharset", 10) == 0) {
2752 if (value_length < 64) {
2753 p = __archive_read_ahead(a, value_length, &bytes_read);
2754 if (p == NULL) {
2755 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2756 "Truncated tar archive "
2757 "detected while reading hdrcharset attribute");
2758 return (ARCHIVE_FATAL);
2759 }
2760 if (value_length == 6
2761 && memcmp(p, "BINARY", 6) == 0) {
2762 /* Binary mode. */
2763 tar->pax_hdrcharset_utf8 = 0;
2764 err = ARCHIVE_OK;
2765 } else if (value_length == 23
2766 && memcmp(p, "ISO-IR 10646 2000 UTF-8", 23) == 0) {
2767 tar->pax_hdrcharset_utf8 = 1;
2768 err = ARCHIVE_OK;
2769 } else {
2770 /* TODO: Unrecognized character set */
2771 err = ARCHIVE_WARN;
2772 }
2773 } else {
2774 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2775 "hdrcharset attribute is unreasonably large (%llu bytes)",
2776 (unsigned long long)value_length);
2777 err = ARCHIVE_WARN;
2778 }
2779 __archive_read_consume(a, value_length);
2780 return (err);
2781 }
2782 break;
2783 case 'l':
2784 /* pax interchange doesn't distinguish hardlink vs. symlink. */
2785 if (key_length == 8 && memcmp(key, "linkpath", 8) == 0) {
2786 if (value_length > pathname_limit) {
2787 *unconsumed += value_length;
2788 err = ARCHIVE_WARN;
2789 } else {
2790 err = read_bytes_to_string(a, &tar->entry_linkpath, value_length, unconsumed);
2791 }
2792 return (err);
2793 }
2794 break;
2795 case 'm':
2796 if (key_length == 5 && memcmp(key, "mtime", 5) == 0) {
2797 if ((err = pax_attribute_read_time(a, value_length, &t, &n, unconsumed)) == ARCHIVE_OK) {
2798 archive_entry_set_mtime(entry, t, n);
2799 }
2800 return (err);
2801 }
2802 break;
2803 case 'p':
2804 if (key_length == 4 && memcmp(key, "path", 4) == 0) {
2805 if (value_length > pathname_limit) {
2806 *unconsumed += value_length;
2807 err = ARCHIVE_WARN;
2808 } else {
2809 err = read_bytes_to_string(a, &(tar->entry_pathname), value_length, unconsumed);
2810 }
2811 return (err);
2812 }
2813 break;
2814 case 'r':
2815 /* POSIX has reserved 'realtime.*' */
2816 break;
2817 case 's':
2818 /* POSIX has reserved 'security.*' */
2819 /* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
2820 if (key_length == 4 && memcmp(key, "size", 4) == 0) {
2821 /* "size" is the size of the data in the entry. */
2822 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2823 tar->pax_size = t;
2824 tar->size_fields |= TAR_SIZE_PAX_SIZE;
2825 }
2826 else if (t == INT64_MAX) {
2827 /* Note: pax_attr_read_number returns INT64_MAX on overflow or < 0 */
2828 tar->entry_bytes_remaining = 0;
2829 archive_set_error(&a->archive,
2830 ARCHIVE_ERRNO_MISC,
2831 "Tar size attribute overflow");
2832 return (ARCHIVE_FATAL);
2833 }
2834 return (err);
2835 }
2836 break;
2837 case 'u':
2838 if (key_length == 3 && memcmp(key, "uid", 3) == 0) {
2839 if ((err = pax_attribute_read_number(a, value_length, &t)) == ARCHIVE_OK) {
2840 archive_entry_set_uid(entry, t);
2841 }
2842 return (err);
2843 } else if (key_length == 5 && memcmp(key, "uname", 5) == 0) {
2844 if (value_length > guname_limit) {
2845 *unconsumed += value_length;
2846 err = ARCHIVE_WARN;
2847 } else {
2848 err = read_bytes_to_string(a, &(tar->entry_uname), value_length, unconsumed);
2849 }
2850 return (err);
2851 }
2852 break;
2853 }
2854
2855 /* Unrecognized key, just skip the entire value. */
2856 __archive_read_consume(a, value_length);
2857 return (err);
2858 }
2859
2860
2861
2862 /*
2863 * Parse a decimal time value, which may include a fractional portion
2864 *
2865 * Sets ps to INT64_MIN on error.
2866 */
2867 static void
pax_time(const char * p,size_t length,int64_t * ps,long * pn)2868 pax_time(const char *p, size_t length, int64_t *ps, long *pn)
2869 {
2870 char digit;
2871 int64_t s;
2872 unsigned long l;
2873 int sign;
2874 int64_t limit, last_digit_limit;
2875
2876 limit = INT64_MAX / 10;
2877 last_digit_limit = INT64_MAX % 10;
2878
2879 if (length <= 0) {
2880 *ps = 0;
2881 *pn = 0;
2882 return;
2883 }
2884 s = 0;
2885 sign = 1;
2886 if (*p == '-') {
2887 sign = -1;
2888 p++;
2889 length--;
2890 }
2891 while (length > 0 && *p >= '0' && *p <= '9') {
2892 digit = *p - '0';
2893 if (s > limit ||
2894 (s == limit && digit > last_digit_limit)) {
2895 *ps = INT64_MIN;
2896 *pn = 0;
2897 return;
2898 }
2899 s = (s * 10) + digit;
2900 ++p;
2901 --length;
2902 }
2903
2904 *ps = s * sign;
2905
2906 /* Calculate nanoseconds. */
2907 *pn = 0;
2908
2909 if (length <= 0 || *p != '.')
2910 return;
2911
2912 l = 100000000UL;
2913 do {
2914 ++p;
2915 --length;
2916 if (length > 0 && *p >= '0' && *p <= '9')
2917 *pn += (*p - '0') * l;
2918 else
2919 break;
2920 } while (l /= 10);
2921 }
2922
2923 /*
2924 * Parse GNU tar header
2925 */
2926 static int
header_gnutar(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const void * h,int64_t * unconsumed)2927 header_gnutar(struct archive_read *a, struct tar *tar,
2928 struct archive_entry *entry, const void *h, int64_t *unconsumed)
2929 {
2930 const struct archive_entry_header_gnutar *header;
2931 int64_t t;
2932 int err = ARCHIVE_OK;
2933
2934 /*
2935 * GNU header is like POSIX ustar, except 'prefix' is
2936 * replaced with some other fields. This also means the
2937 * filename is stored as in old-style archives.
2938 */
2939
2940 /* Copy filename over (to ensure null termination). */
2941 header = (const struct archive_entry_header_gnutar *)h;
2942 const char *existing_pathname = archive_entry_pathname(entry);
2943 if (existing_pathname == NULL || existing_pathname[0] == '\0') {
2944 if (archive_entry_copy_pathname_l(entry,
2945 header->name, sizeof(header->name), tar->sconv) != 0) {
2946 err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2947 if (err == ARCHIVE_FATAL)
2948 return (err);
2949 }
2950 }
2951
2952 /* Fields common to ustar and GNU */
2953 /* XXX Can the following be factored out since it's common
2954 * to ustar and gnu tar? Is it okay to move it down into
2955 * header_common, perhaps? */
2956 const char *existing_uname = archive_entry_uname(entry);
2957 if (existing_uname == NULL || existing_uname[0] == '\0') {
2958 if (archive_entry_copy_uname_l(entry,
2959 header->uname, sizeof(header->uname), tar->sconv) != 0) {
2960 err = set_conversion_failed_error(a, tar->sconv, "Uname");
2961 if (err == ARCHIVE_FATAL)
2962 return (err);
2963 }
2964 }
2965
2966 const char *existing_gname = archive_entry_gname(entry);
2967 if (existing_gname == NULL || existing_gname[0] == '\0') {
2968 if (archive_entry_copy_gname_l(entry,
2969 header->gname, sizeof(header->gname), tar->sconv) != 0) {
2970 err = set_conversion_failed_error(a, tar->sconv, "Gname");
2971 if (err == ARCHIVE_FATAL)
2972 return (err);
2973 }
2974 }
2975
2976 /* Parse out device numbers only for char and block specials */
2977 if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2978 if (!archive_entry_rdev_is_set(entry)) {
2979 archive_entry_set_rdevmajor(entry, (dev_t)
2980 tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2981 archive_entry_set_rdevminor(entry, (dev_t)
2982 tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2983 }
2984 } else {
2985 archive_entry_set_rdev(entry, 0);
2986 }
2987
2988 /* Grab GNU-specific fields. */
2989 if (!archive_entry_atime_is_set(entry)) {
2990 t = tar_atol(header->atime, sizeof(header->atime));
2991 if (t > 0)
2992 archive_entry_set_atime(entry, t, 0);
2993 }
2994 if (!archive_entry_ctime_is_set(entry)) {
2995 t = tar_atol(header->ctime, sizeof(header->ctime));
2996 if (t > 0)
2997 archive_entry_set_ctime(entry, t, 0);
2998 }
2999
3000 if (header->realsize[0] != 0) {
3001 /* Treat as a synonym for the pax GNU.sparse.realsize attr */
3002 tar->GNU_sparse_realsize
3003 = tar_atol(header->realsize, sizeof(header->realsize));
3004 tar->size_fields |= TAR_SIZE_GNU_SPARSE_REALSIZE;
3005 }
3006
3007 if (header->sparse[0].offset[0] != 0) {
3008 if (gnu_sparse_old_read(a, tar, header, unconsumed)
3009 != ARCHIVE_OK)
3010 return (ARCHIVE_FATAL);
3011 } else {
3012 if (header->isextended[0] != 0) {
3013 /* XXX WTF? XXX */
3014 }
3015 }
3016
3017 /* Grab fields common to all tar variants. */
3018 err = header_common(a, tar, entry, h);
3019 if (err == ARCHIVE_FATAL)
3020 return (err);
3021
3022 tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
3023
3024 return (err);
3025 }
3026
3027 static int
gnu_add_sparse_entry(struct archive_read * a,struct tar * tar,int64_t offset,int64_t remaining)3028 gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
3029 int64_t offset, int64_t remaining)
3030 {
3031 struct sparse_block *p;
3032
3033 p = calloc(1, sizeof(*p));
3034 if (p == NULL) {
3035 archive_set_error(&a->archive, ENOMEM, "Out of memory");
3036 return (ARCHIVE_FATAL);
3037 }
3038 if (tar->sparse_last != NULL)
3039 tar->sparse_last->next = p;
3040 else
3041 tar->sparse_list = p;
3042 tar->sparse_last = p;
3043 if (remaining < 0 || offset < 0 || offset > INT64_MAX - remaining) {
3044 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Malformed sparse map data");
3045 return (ARCHIVE_FATAL);
3046 }
3047 p->offset = offset;
3048 p->remaining = remaining;
3049 return (ARCHIVE_OK);
3050 }
3051
3052 static void
gnu_clear_sparse_list(struct tar * tar)3053 gnu_clear_sparse_list(struct tar *tar)
3054 {
3055 struct sparse_block *p;
3056
3057 while (tar->sparse_list != NULL) {
3058 p = tar->sparse_list;
3059 tar->sparse_list = p->next;
3060 free(p);
3061 }
3062 tar->sparse_last = NULL;
3063 }
3064
3065 /*
3066 * GNU tar old-format sparse data.
3067 *
3068 * GNU old-format sparse data is stored in a fixed-field
3069 * format. Offset/size values are 11-byte octal fields (same
3070 * format as 'size' field in ustart header). These are
3071 * stored in the header, allocating subsequent header blocks
3072 * as needed. Extending the header in this way is a pretty
3073 * severe POSIX violation; this design has earned GNU tar a
3074 * lot of criticism.
3075 */
3076
3077 static int
gnu_sparse_old_read(struct archive_read * a,struct tar * tar,const struct archive_entry_header_gnutar * header,int64_t * unconsumed)3078 gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
3079 const struct archive_entry_header_gnutar *header, int64_t *unconsumed)
3080 {
3081 ssize_t bytes_read;
3082 const void *data;
3083 struct extended {
3084 struct gnu_sparse sparse[21];
3085 char isextended[1];
3086 char padding[7];
3087 };
3088 const struct extended *ext;
3089
3090 if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
3091 return (ARCHIVE_FATAL);
3092 if (header->isextended[0] == 0)
3093 return (ARCHIVE_OK);
3094
3095 do {
3096 tar_flush_unconsumed(a, unconsumed);
3097 data = __archive_read_ahead(a, 512, &bytes_read);
3098 if (data == NULL) {
3099 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3100 "Truncated tar archive "
3101 "detected while reading sparse file data");
3102 return (ARCHIVE_FATAL);
3103 }
3104 *unconsumed = 512;
3105 ext = (const struct extended *)data;
3106 if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
3107 return (ARCHIVE_FATAL);
3108 } while (ext->isextended[0] != 0);
3109 if (tar->sparse_list != NULL)
3110 tar->entry_offset = tar->sparse_list->offset;
3111 return (ARCHIVE_OK);
3112 }
3113
3114 static int
gnu_sparse_old_parse(struct archive_read * a,struct tar * tar,const struct gnu_sparse * sparse,int length)3115 gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
3116 const struct gnu_sparse *sparse, int length)
3117 {
3118 while (length > 0 && sparse->offset[0] != 0) {
3119 if (gnu_add_sparse_entry(a, tar,
3120 tar_atol(sparse->offset, sizeof(sparse->offset)),
3121 tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
3122 != ARCHIVE_OK)
3123 return (ARCHIVE_FATAL);
3124 sparse++;
3125 length--;
3126 }
3127 return (ARCHIVE_OK);
3128 }
3129
3130 /*
3131 * GNU tar sparse format 0.0
3132 *
3133 * Beginning with GNU tar 1.15, sparse files are stored using
3134 * information in the pax extended header. The GNU tar maintainers
3135 * have gone through a number of variations in the process of working
3136 * out this scheme; fortunately, they're all numbered.
3137 *
3138 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
3139 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
3140 * store offset/size for each block. The repeated instances of these
3141 * latter fields violate the pax specification (which frowns on
3142 * duplicate keys), so this format was quickly replaced.
3143 */
3144
3145 /*
3146 * GNU tar sparse format 0.1
3147 *
3148 * This version replaced the offset/numbytes attributes with
3149 * a single "map" attribute that stored a list of integers. This
3150 * format had two problems: First, the "map" attribute could be very
3151 * long, which caused problems for some implementations. More
3152 * importantly, the sparse data was lost when extracted by archivers
3153 * that didn't recognize this extension.
3154 */
3155 static int
gnu_sparse_01_parse(struct archive_read * a,struct tar * tar,const char * p,size_t length)3156 gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p, size_t length)
3157 {
3158 const char *e;
3159 int64_t offset = -1, size = -1;
3160
3161 for (;;) {
3162 e = p;
3163 while (length > 0 && *e != ',') {
3164 if (*e < '0' || *e > '9')
3165 return (ARCHIVE_WARN);
3166 e++;
3167 length--;
3168 }
3169 if (offset < 0) {
3170 offset = tar_atol10(p, e - p);
3171 if (offset < 0)
3172 return (ARCHIVE_WARN);
3173 } else {
3174 size = tar_atol10(p, e - p);
3175 if (size < 0)
3176 return (ARCHIVE_WARN);
3177 if (gnu_add_sparse_entry(a, tar, offset, size)
3178 != ARCHIVE_OK)
3179 return (ARCHIVE_FATAL);
3180 offset = -1;
3181 }
3182 if (length == 0)
3183 return (ARCHIVE_OK);
3184 p = e + 1;
3185 length--;
3186 }
3187 }
3188
3189 /*
3190 * GNU tar sparse format 1.0
3191 *
3192 * The idea: The offset/size data is stored as a series of base-10
3193 * ASCII numbers prepended to the file data, so that dearchivers that
3194 * don't support this format will extract the block map along with the
3195 * data and a separate post-process can restore the sparseness.
3196 *
3197 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
3198 * padding to the body of the file when using this format. GNU tar
3199 * 1.17 corrected this bug without bumping the version number, so
3200 * it's not possible to support both variants. This code supports
3201 * the later variant at the expense of not supporting the former.
3202 *
3203 * This variant also introduced the GNU.sparse.major/GNU.sparse.minor attributes.
3204 */
3205
3206 /*
3207 * Read the next line from the input, and parse it as a decimal
3208 * integer followed by '\n'. Returns positive integer value or
3209 * negative on error.
3210 */
3211 static int64_t
gnu_sparse_10_atol(struct archive_read * a,struct tar * tar,int64_t * remaining,int64_t * unconsumed)3212 gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
3213 int64_t *remaining, int64_t *unconsumed)
3214 {
3215 int64_t l, limit, last_digit_limit;
3216 const char *p;
3217 ssize_t bytes_read;
3218 int base, digit;
3219
3220 base = 10;
3221 limit = INT64_MAX / base;
3222 last_digit_limit = INT64_MAX % base;
3223
3224 /*
3225 * Skip any lines starting with '#'; GNU tar specs
3226 * don't require this, but they should.
3227 */
3228 do {
3229 bytes_read = readline(a, tar, &p,
3230 (ssize_t)tar_min(*remaining, 100), unconsumed);
3231 if (bytes_read <= 0)
3232 return (ARCHIVE_FATAL);
3233 *remaining -= bytes_read;
3234 } while (p[0] == '#');
3235
3236 l = 0;
3237 while (bytes_read > 0) {
3238 if (*p == '\n')
3239 return (l);
3240 if (*p < '0' || *p >= '0' + base)
3241 return (ARCHIVE_WARN);
3242 digit = *p - '0';
3243 if (l > limit || (l == limit && digit > last_digit_limit))
3244 l = INT64_MAX; /* Truncate on overflow. */
3245 else
3246 l = (l * base) + digit;
3247 p++;
3248 bytes_read--;
3249 }
3250 /* TODO: Error message. */
3251 return (ARCHIVE_WARN);
3252 }
3253
3254 /*
3255 * Returns length (in bytes) of the sparse data description
3256 * that was read.
3257 */
3258 static int64_t
gnu_sparse_10_read(struct archive_read * a,struct tar * tar,int64_t * unconsumed)3259 gnu_sparse_10_read(struct archive_read *a, struct tar *tar, int64_t *unconsumed)
3260 {
3261 int64_t bytes_read, entries, offset, size, to_skip, remaining;
3262
3263 /* Clear out the existing sparse list. */
3264 gnu_clear_sparse_list(tar);
3265
3266 remaining = tar->entry_bytes_remaining;
3267
3268 /* Parse entries. */
3269 entries = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3270 if (entries < 0)
3271 return (ARCHIVE_FATAL);
3272 /* Parse the individual entries. */
3273 while (entries-- > 0) {
3274 /* Parse offset/size */
3275 offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3276 if (offset < 0)
3277 return (ARCHIVE_FATAL);
3278 size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
3279 if (size < 0)
3280 return (ARCHIVE_FATAL);
3281 /* Add a new sparse entry. */
3282 if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
3283 return (ARCHIVE_FATAL);
3284 }
3285 /* Skip rest of block... */
3286 tar_flush_unconsumed(a, unconsumed);
3287 bytes_read = tar->entry_bytes_remaining - remaining;
3288 to_skip = 0x1ff & -bytes_read;
3289 /* Fail if tar->entry_bytes_remaing would get negative */
3290 if (to_skip > remaining)
3291 return (ARCHIVE_FATAL);
3292 if (to_skip != __archive_read_consume(a, to_skip))
3293 return (ARCHIVE_FATAL);
3294 return (bytes_read + to_skip);
3295 }
3296
3297 /*
3298 * Solaris pax extension for a sparse file. This is recorded with the
3299 * data and hole pairs. The way recording sparse information by Solaris'
3300 * pax simply indicates where data and sparse are, so the stored contents
3301 * consist of both data and hole.
3302 */
3303 static int
pax_attribute_SUN_holesdata(struct archive_read * a,struct tar * tar,struct archive_entry * entry,const char * p,size_t length)3304 pax_attribute_SUN_holesdata(struct archive_read *a, struct tar *tar,
3305 struct archive_entry *entry, const char *p, size_t length)
3306 {
3307 const char *e;
3308 int64_t start, end;
3309 int hole = 1;
3310
3311 (void)entry; /* UNUSED */
3312
3313 end = 0;
3314 if (length <= 0)
3315 return (ARCHIVE_WARN);
3316 if (*p == ' ') {
3317 p++;
3318 length--;
3319 } else {
3320 return (ARCHIVE_WARN);
3321 }
3322 for (;;) {
3323 e = p;
3324 while (length > 0 && *e != ' ') {
3325 if (*e < '0' || *e > '9')
3326 return (ARCHIVE_WARN);
3327 e++;
3328 length--;
3329 }
3330 start = end;
3331 end = tar_atol10(p, e - p);
3332 if (end < 0)
3333 return (ARCHIVE_WARN);
3334 if (start < end) {
3335 if (gnu_add_sparse_entry(a, tar, start,
3336 end - start) != ARCHIVE_OK)
3337 return (ARCHIVE_FATAL);
3338 tar->sparse_last->hole = hole;
3339 }
3340 if (length == 0 || *e == '\n') {
3341 if (length == 0 && *e == '\n') {
3342 return (ARCHIVE_OK);
3343 } else {
3344 return (ARCHIVE_WARN);
3345 }
3346 }
3347 p = e + 1;
3348 length--;
3349 hole = hole == 0;
3350 }
3351 }
3352
3353 /*-
3354 * Convert text->integer.
3355 *
3356 * Traditional tar formats (including POSIX) specify base-8 for
3357 * all of the standard numeric fields. This is a significant limitation
3358 * in practice:
3359 * = file size is limited to 8GB
3360 * = rdevmajor and rdevminor are limited to 21 bits
3361 * = uid/gid are limited to 21 bits
3362 *
3363 * There are two workarounds for this:
3364 * = pax extended headers, which use variable-length string fields
3365 * = GNU tar and STAR both allow either base-8 or base-256 in
3366 * most fields. The high bit is set to indicate base-256.
3367 *
3368 * On read, this implementation supports both extensions.
3369 */
3370 static int64_t
tar_atol(const char * p,size_t char_cnt)3371 tar_atol(const char *p, size_t char_cnt)
3372 {
3373 /*
3374 * Technically, GNU tar considers a field to be in base-256
3375 * only if the first byte is 0xff or 0x80.
3376 */
3377 if (*p & 0x80)
3378 return (tar_atol256(p, char_cnt));
3379 return (tar_atol8(p, char_cnt));
3380 }
3381
3382 /*
3383 * Note that this implementation does not (and should not!) obey
3384 * locale settings; you cannot simply substitute strtol here, since
3385 * it does obey locale.
3386 */
3387 static int64_t
tar_atol_base_n(const char * p,size_t char_cnt,int base)3388 tar_atol_base_n(const char *p, size_t char_cnt, int base)
3389 {
3390 int64_t l, maxval, limit, last_digit_limit;
3391 int digit, sign;
3392
3393 maxval = INT64_MAX;
3394 limit = INT64_MAX / base;
3395 last_digit_limit = INT64_MAX % base;
3396
3397 /* the pointer will not be dereferenced if char_cnt is zero
3398 * due to the way the && operator is evaluated.
3399 */
3400 while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
3401 p++;
3402 char_cnt--;
3403 }
3404
3405 sign = 1;
3406 if (char_cnt != 0 && *p == '-') {
3407 sign = -1;
3408 p++;
3409 char_cnt--;
3410
3411 maxval = INT64_MIN;
3412 limit = -(INT64_MIN / base);
3413 last_digit_limit = -(INT64_MIN % base);
3414 }
3415
3416 l = 0;
3417 if (char_cnt != 0) {
3418 digit = *p - '0';
3419 while (digit >= 0 && digit < base && char_cnt != 0) {
3420 if (l>limit || (l == limit && digit >= last_digit_limit)) {
3421 return maxval; /* Truncate on overflow. */
3422 }
3423 l = (l * base) + digit;
3424 digit = *++p - '0';
3425 char_cnt--;
3426 }
3427 }
3428 return (sign < 0) ? -l : l;
3429 }
3430
3431 static int64_t
tar_atol8(const char * p,size_t char_cnt)3432 tar_atol8(const char *p, size_t char_cnt)
3433 {
3434 return tar_atol_base_n(p, char_cnt, 8);
3435 }
3436
3437 static int64_t
tar_atol10(const char * p,size_t char_cnt)3438 tar_atol10(const char *p, size_t char_cnt)
3439 {
3440 return tar_atol_base_n(p, char_cnt, 10);
3441 }
3442
3443 /*
3444 * Parse a base-256 integer. This is just a variable-length
3445 * twos-complement signed binary value in big-endian order, except
3446 * that the high-order bit is ignored. The values here can be up to
3447 * 12 bytes, so we need to be careful about overflowing 64-bit
3448 * (8-byte) integers.
3449 *
3450 * This code unashamedly assumes that the local machine uses 8-bit
3451 * bytes and twos-complement arithmetic.
3452 */
3453 static int64_t
tar_atol256(const char * _p,size_t char_cnt)3454 tar_atol256(const char *_p, size_t char_cnt)
3455 {
3456 uint64_t l;
3457 const unsigned char *p = (const unsigned char *)_p;
3458 unsigned char c, neg;
3459
3460 /* Extend 7-bit 2s-comp to 8-bit 2s-comp, decide sign. */
3461 c = *p;
3462 if (c & 0x40) {
3463 neg = 0xff;
3464 c |= 0x80;
3465 l = ~ARCHIVE_LITERAL_ULL(0);
3466 } else {
3467 neg = 0;
3468 c &= 0x7f;
3469 l = 0;
3470 }
3471
3472 /* If more than 8 bytes, check that we can ignore
3473 * high-order bits without overflow. */
3474 while (char_cnt > sizeof(int64_t)) {
3475 --char_cnt;
3476 if (c != neg)
3477 return neg ? INT64_MIN : INT64_MAX;
3478 c = *++p;
3479 }
3480
3481 /* c is first byte that fits; if sign mismatch, return overflow */
3482 if ((c ^ neg) & 0x80) {
3483 return neg ? INT64_MIN : INT64_MAX;
3484 }
3485
3486 /* Accumulate remaining bytes. */
3487 while (--char_cnt > 0) {
3488 l = (l << 8) | c;
3489 c = *++p;
3490 }
3491 l = (l << 8) | c;
3492 /* Return signed twos-complement value. */
3493 return (int64_t)(l);
3494 }
3495
3496 /*
3497 * Returns length of line (including trailing newline)
3498 * or negative on error. 'start' argument is updated to
3499 * point to first character of line. This avoids copying
3500 * when possible.
3501 */
3502 static ssize_t
readline(struct archive_read * a,struct tar * tar,const char ** start,ssize_t limit,int64_t * unconsumed)3503 readline(struct archive_read *a, struct tar *tar, const char **start,
3504 ssize_t limit, int64_t *unconsumed)
3505 {
3506 ssize_t bytes_read;
3507 ssize_t total_size = 0;
3508 const void *t;
3509 const char *s;
3510 void *p;
3511
3512 tar_flush_unconsumed(a, unconsumed);
3513
3514 t = __archive_read_ahead(a, 1, &bytes_read);
3515 if (bytes_read <= 0 || t == NULL)
3516 return (ARCHIVE_FATAL);
3517 s = t; /* Start of line? */
3518 p = memchr(t, '\n', bytes_read);
3519 /* If we found '\n' in the read buffer, return pointer to that. */
3520 if (p != NULL) {
3521 bytes_read = 1 + ((const char *)p) - s;
3522 if (bytes_read > limit) {
3523 archive_set_error(&a->archive,
3524 ARCHIVE_ERRNO_FILE_FORMAT,
3525 "Line too long");
3526 return (ARCHIVE_FATAL);
3527 }
3528 *unconsumed = bytes_read;
3529 *start = s;
3530 return (bytes_read);
3531 }
3532 *unconsumed = bytes_read;
3533 /* Otherwise, we need to accumulate in a line buffer. */
3534 for (;;) {
3535 if (total_size + bytes_read > limit) {
3536 archive_set_error(&a->archive,
3537 ARCHIVE_ERRNO_FILE_FORMAT,
3538 "Line too long");
3539 return (ARCHIVE_FATAL);
3540 }
3541 if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
3542 archive_set_error(&a->archive, ENOMEM,
3543 "Can't allocate working buffer");
3544 return (ARCHIVE_FATAL);
3545 }
3546 memcpy(tar->line.s + total_size, t, bytes_read);
3547 tar_flush_unconsumed(a, unconsumed);
3548 total_size += bytes_read;
3549 /* If we found '\n', clean up and return. */
3550 if (p != NULL) {
3551 *start = tar->line.s;
3552 return (total_size);
3553 }
3554 /* Read some more. */
3555 t = __archive_read_ahead(a, 1, &bytes_read);
3556 if (bytes_read <= 0 || t == NULL)
3557 return (ARCHIVE_FATAL);
3558 s = t; /* Start of line? */
3559 p = memchr(t, '\n', bytes_read);
3560 /* If we found '\n', trim the read. */
3561 if (p != NULL) {
3562 bytes_read = 1 + ((const char *)p) - s;
3563 }
3564 *unconsumed = bytes_read;
3565 }
3566 }
3567
3568 /*
3569 * base64_decode - Base64 decode
3570 *
3571 * This accepts most variations of base-64 encoding, including:
3572 * * with or without line breaks
3573 * * with or without the final group padded with '=' or '_' characters
3574 * (The most economical Base-64 variant does not pad the last group and
3575 * omits line breaks; RFC1341 used for MIME requires both.)
3576 */
3577 static char *
base64_decode(const char * s,size_t len,size_t * out_len)3578 base64_decode(const char *s, size_t len, size_t *out_len)
3579 {
3580 static const unsigned char digits[64] = {
3581 'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
3582 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
3583 'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
3584 'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
3585 '4','5','6','7','8','9','+','/' };
3586 static unsigned char decode_table[128];
3587 char *out, *d;
3588 const unsigned char *src = (const unsigned char *)s;
3589
3590 /* If the decode table is not yet initialized, prepare it. */
3591 if (decode_table[digits[1]] != 1) {
3592 unsigned i;
3593 memset(decode_table, 0xff, sizeof(decode_table));
3594 for (i = 0; i < sizeof(digits); i++)
3595 decode_table[digits[i]] = i;
3596 }
3597
3598 /* Allocate enough space to hold the entire output. */
3599 /* Note that we may not use all of this... */
3600 out = malloc(len - len / 4 + 1);
3601 if (out == NULL) {
3602 *out_len = 0;
3603 return (NULL);
3604 }
3605 d = out;
3606
3607 while (len > 0) {
3608 /* Collect the next group of (up to) four characters. */
3609 int v = 0;
3610 int group_size = 0;
3611 while (group_size < 4 && len > 0) {
3612 /* '=' or '_' padding indicates final group. */
3613 if (*src == '=' || *src == '_') {
3614 len = 0;
3615 break;
3616 }
3617 /* Skip illegal characters (including line breaks) */
3618 if (*src > 127 || *src < 32
3619 || decode_table[*src] == 0xff) {
3620 len--;
3621 src++;
3622 continue;
3623 }
3624 v <<= 6;
3625 v |= decode_table[*src++];
3626 len --;
3627 group_size++;
3628 }
3629 /* Align a short group properly. */
3630 v <<= 6 * (4 - group_size);
3631 /* Unpack the group we just collected. */
3632 switch (group_size) {
3633 case 4: d[2] = v & 0xff;
3634 /* FALLTHROUGH */
3635 case 3: d[1] = (v >> 8) & 0xff;
3636 /* FALLTHROUGH */
3637 case 2: d[0] = (v >> 16) & 0xff;
3638 break;
3639 case 1: /* this is invalid! */
3640 break;
3641 }
3642 d += group_size * 3 / 4;
3643 }
3644
3645 *out_len = d - out;
3646 return (out);
3647 }
3648
3649 static char *
url_decode(const char * in,size_t length)3650 url_decode(const char *in, size_t length)
3651 {
3652 char *out, *d;
3653 const char *s;
3654
3655 out = malloc(length + 1);
3656 if (out == NULL)
3657 return (NULL);
3658 for (s = in, d = out; length > 0 && *s != '\0'; ) {
3659 if (s[0] == '%' && length > 2) {
3660 /* Try to convert % escape */
3661 int digit1 = tohex(s[1]);
3662 int digit2 = tohex(s[2]);
3663 if (digit1 >= 0 && digit2 >= 0) {
3664 /* Looks good, consume three chars */
3665 s += 3;
3666 length -= 3;
3667 /* Convert output */
3668 *d++ = ((digit1 << 4) | digit2);
3669 continue;
3670 }
3671 /* Else fall through and treat '%' as normal char */
3672 }
3673 *d++ = *s++;
3674 --length;
3675 }
3676 *d = '\0';
3677 return (out);
3678 }
3679
3680 static int
tohex(int c)3681 tohex(int c)
3682 {
3683 if (c >= '0' && c <= '9')
3684 return (c - '0');
3685 else if (c >= 'A' && c <= 'F')
3686 return (c - 'A' + 10);
3687 else if (c >= 'a' && c <= 'f')
3688 return (c - 'a' + 10);
3689 else
3690 return (-1);
3691 }
3692