1 /*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "archive_platform.h"
29
30 #ifdef HAVE_SYS_STAT_H
31 #include <sys/stat.h>
32 #endif
33 #ifdef HAVE_ERRNO_H
34 #include <errno.h>
35 #endif
36 #ifdef HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
42 #ifdef HAVE_LIMITS_H
43 #include <limits.h>
44 #endif
45
46 #include "archive.h"
47 #include "archive_entry.h"
48 #include "archive_private.h"
49 #include "archive_read_private.h"
50
51 struct ar {
52 int64_t entry_bytes_remaining;
53 /* unconsumed is purely to track data we've gotten from readahead,
54 * but haven't yet marked as consumed. Must be paired with
55 * entry_bytes_remaining usage/modification.
56 */
57 size_t entry_bytes_unconsumed;
58 int64_t entry_offset;
59 int64_t entry_padding;
60 char *strtab;
61 size_t strtab_size;
62 char read_global_header;
63 };
64
65 /*
66 * Define structure of the "ar" header.
67 */
68 #define AR_name_offset 0
69 #define AR_name_size 16
70 #define AR_date_offset 16
71 #define AR_date_size 12
72 #define AR_uid_offset 28
73 #define AR_uid_size 6
74 #define AR_gid_offset 34
75 #define AR_gid_size 6
76 #define AR_mode_offset 40
77 #define AR_mode_size 8
78 #define AR_size_offset 48
79 #define AR_size_size 10
80 #define AR_fmag_offset 58
81 #define AR_fmag_size 2
82
83 static int archive_read_format_ar_bid(struct archive_read *a, int);
84 static int archive_read_format_ar_cleanup(struct archive_read *a);
85 static int archive_read_format_ar_read_data(struct archive_read *a,
86 const void **buff, size_t *size, int64_t *offset);
87 static int archive_read_format_ar_skip(struct archive_read *a);
88 static int archive_read_format_ar_read_header(struct archive_read *a,
89 struct archive_entry *e);
90 static uint64_t ar_atol8(const char *p, unsigned char_cnt);
91 static uint64_t ar_atol10(const char *p, unsigned char_cnt);
92 static int ar_parse_gnu_filename_table(struct archive_read *a);
93 static int ar_parse_common_header(struct ar *ar, struct archive_entry *,
94 const char *h);
95
96 int
archive_read_support_format_ar(struct archive * _a)97 archive_read_support_format_ar(struct archive *_a)
98 {
99 struct archive_read *a = (struct archive_read *)_a;
100 struct ar *ar;
101 int r;
102
103 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
104 ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
105
106 ar = calloc(1, sizeof(*ar));
107 if (ar == NULL) {
108 archive_set_error(&a->archive, ENOMEM,
109 "Can't allocate ar data");
110 return (ARCHIVE_FATAL);
111 }
112 ar->strtab = NULL;
113
114 r = __archive_read_register_format(a,
115 ar,
116 "ar",
117 archive_read_format_ar_bid,
118 NULL,
119 archive_read_format_ar_read_header,
120 archive_read_format_ar_read_data,
121 archive_read_format_ar_skip,
122 NULL,
123 archive_read_format_ar_cleanup,
124 NULL,
125 NULL);
126
127 if (r != ARCHIVE_OK) {
128 free(ar);
129 return (r);
130 }
131 return (ARCHIVE_OK);
132 }
133
134 static int
archive_read_format_ar_cleanup(struct archive_read * a)135 archive_read_format_ar_cleanup(struct archive_read *a)
136 {
137 struct ar *ar;
138
139 ar = (struct ar *)(a->format->data);
140 free(ar->strtab);
141 free(ar);
142 (a->format->data) = NULL;
143 return (ARCHIVE_OK);
144 }
145
146 static int
archive_read_format_ar_bid(struct archive_read * a,int best_bid)147 archive_read_format_ar_bid(struct archive_read *a, int best_bid)
148 {
149 const void *h;
150
151 (void)best_bid; /* UNUSED */
152
153 /*
154 * Verify the 8-byte file signature.
155 * TODO: Do we need to check more than this?
156 */
157 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
158 return (-1);
159 if (memcmp(h, "!<arch>\n", 8) == 0) {
160 return (64);
161 }
162 return (-1);
163 }
164
165 static int
_ar_read_header(struct archive_read * a,struct archive_entry * entry,struct ar * ar,const char * h,size_t * unconsumed)166 _ar_read_header(struct archive_read *a, struct archive_entry *entry,
167 struct ar *ar, const char *h, size_t *unconsumed)
168 {
169 char filename[AR_name_size + 1];
170 uint64_t number; /* Used to hold parsed numbers before validation. */
171 size_t bsd_name_length, entry_size;
172 char *p, *st;
173 const void *b;
174 int r;
175
176 /* Verify the magic signature on the file header. */
177 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
178 archive_set_error(&a->archive, EINVAL,
179 "Incorrect file header signature");
180 return (ARCHIVE_FATAL);
181 }
182
183 /* Copy filename into work buffer. */
184 strncpy(filename, h + AR_name_offset, AR_name_size);
185 filename[AR_name_size] = '\0';
186
187 /*
188 * Guess the format variant based on the filename.
189 */
190 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
191 /* We don't already know the variant, so let's guess. */
192 /*
193 * Biggest clue is presence of '/': GNU starts special
194 * filenames with '/', appends '/' as terminator to
195 * non-special names, so anything with '/' should be
196 * GNU except for BSD long filenames.
197 */
198 if (strncmp(filename, "#1/", 3) == 0)
199 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
200 else if (strchr(filename, '/') != NULL)
201 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
202 else if (strncmp(filename, "__.SYMDEF", 9) == 0)
203 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
204 /*
205 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
206 * if name exactly fills 16-byte field? If so, we
207 * can't assume entries without '/' are BSD. XXX
208 */
209 }
210
211 /* Update format name from the code. */
212 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
213 a->archive.archive_format_name = "ar (GNU/SVR4)";
214 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
215 a->archive.archive_format_name = "ar (BSD)";
216 else
217 a->archive.archive_format_name = "ar";
218
219 /*
220 * Remove trailing spaces from the filename. GNU and BSD
221 * variants both pad filename area out with spaces.
222 * This will only be wrong if GNU/SVR4 'ar' implementations
223 * omit trailing '/' for 16-char filenames and we have
224 * a 16-char filename that ends in ' '.
225 */
226 p = filename + AR_name_size - 1;
227 while (p >= filename && *p == ' ') {
228 *p = '\0';
229 p--;
230 }
231
232 /*
233 * Remove trailing slash unless first character is '/'.
234 * (BSD entries never end in '/', so this will only trim
235 * GNU-format entries. GNU special entries start with '/'
236 * and are not terminated in '/', so we don't trim anything
237 * that starts with '/'.)
238 */
239 if (filename[0] != '/' && p > filename && *p == '/') {
240 *p = '\0';
241 }
242
243 if (p < filename) {
244 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
245 "Found entry with empty filename");
246 return (ARCHIVE_FATAL);
247 }
248
249 /*
250 * '//' is the GNU filename table.
251 * Later entries can refer to names in this table.
252 */
253 if (strcmp(filename, "//") == 0) {
254 /* This must come before any call to _read_ahead. */
255 ar_parse_common_header(ar, entry, h);
256 archive_entry_copy_pathname(entry, filename);
257 archive_entry_set_filetype(entry, AE_IFREG);
258 /* Get the size of the filename table. */
259 number = ar_atol10(h + AR_size_offset, AR_size_size);
260 if (number > SIZE_MAX || number > 1024 * 1024 * 1024) {
261 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
262 "Filename table too large");
263 return (ARCHIVE_FATAL);
264 }
265 entry_size = (size_t)number;
266 if (entry_size == 0) {
267 archive_set_error(&a->archive, EINVAL,
268 "Invalid string table");
269 return (ARCHIVE_FATAL);
270 }
271 if (ar->strtab != NULL) {
272 archive_set_error(&a->archive, EINVAL,
273 "More than one string table exists");
274 return (ARCHIVE_FATAL);
275 }
276
277 /* Read the filename table into memory. */
278 st = malloc(entry_size);
279 if (st == NULL) {
280 archive_set_error(&a->archive, ENOMEM,
281 "Can't allocate filename table buffer");
282 return (ARCHIVE_FATAL);
283 }
284 ar->strtab = st;
285 ar->strtab_size = entry_size;
286
287 if (*unconsumed) {
288 __archive_read_consume(a, *unconsumed);
289 *unconsumed = 0;
290 }
291
292 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
293 return (ARCHIVE_FATAL);
294 memcpy(st, b, entry_size);
295 __archive_read_consume(a, entry_size);
296 /* All contents are consumed. */
297 ar->entry_bytes_remaining = 0;
298 archive_entry_set_size(entry, ar->entry_bytes_remaining);
299
300 /* Parse the filename table. */
301 return (ar_parse_gnu_filename_table(a));
302 }
303
304 /*
305 * GNU variant handles long filenames by storing /<number>
306 * to indicate a name stored in the filename table.
307 * XXX TODO: Verify that it's all digits... Don't be fooled
308 * by "/9xyz" XXX
309 */
310 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
311 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
312 /*
313 * If we can't look up the real name, warn and return
314 * the entry with the wrong name.
315 */
316 if (ar->strtab == NULL || number >= ar->strtab_size) {
317 archive_set_error(&a->archive, EINVAL,
318 "Can't find long filename for GNU/SVR4 archive entry");
319 archive_entry_copy_pathname(entry, filename);
320 /* Parse the time, owner, mode, size fields. */
321 ar_parse_common_header(ar, entry, h);
322 return (ARCHIVE_FATAL);
323 }
324
325 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
326 /* Parse the time, owner, mode, size fields. */
327 return (ar_parse_common_header(ar, entry, h));
328 }
329
330 /*
331 * BSD handles long filenames by storing "#1/" followed by the
332 * length of filename as a decimal number, then prepends the
333 * the filename to the file contents.
334 */
335 if (strncmp(filename, "#1/", 3) == 0) {
336 /* Parse the time, owner, mode, size fields. */
337 /* This must occur before _read_ahead is called again. */
338 ar_parse_common_header(ar, entry, h);
339
340 /* Parse the size of the name, adjust the file size. */
341 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
342 /* Sanity check the filename length:
343 * = Must be <= SIZE_MAX - 1
344 * = Must be <= 1MB
345 * = Cannot be bigger than the entire entry
346 */
347 if (number > SIZE_MAX - 1
348 || number > 1024 * 1024
349 || (int64_t)number > ar->entry_bytes_remaining) {
350 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
351 "Bad input file size");
352 return (ARCHIVE_FATAL);
353 }
354 bsd_name_length = (size_t)number;
355 ar->entry_bytes_remaining -= bsd_name_length;
356 /* Adjust file size reported to client. */
357 archive_entry_set_size(entry, ar->entry_bytes_remaining);
358
359 if (*unconsumed) {
360 __archive_read_consume(a, *unconsumed);
361 *unconsumed = 0;
362 }
363
364 /* Read the long name into memory. */
365 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
366 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
367 "Truncated input file");
368 return (ARCHIVE_FATAL);
369 }
370 /* Store it in the entry. */
371 p = malloc(bsd_name_length + 1);
372 if (p == NULL) {
373 archive_set_error(&a->archive, ENOMEM,
374 "Can't allocate fname buffer");
375 return (ARCHIVE_FATAL);
376 }
377 strncpy(p, b, bsd_name_length);
378 p[bsd_name_length] = '\0';
379
380 __archive_read_consume(a, bsd_name_length);
381
382 archive_entry_copy_pathname(entry, p);
383 free(p);
384 return (ARCHIVE_OK);
385 }
386
387 /*
388 * "/" is the SVR4/GNU archive symbol table.
389 * "/SYM64/" is the SVR4/GNU 64-bit variant archive symbol table.
390 */
391 if (strcmp(filename, "/") == 0 || strcmp(filename, "/SYM64/") == 0) {
392 archive_entry_copy_pathname(entry, filename);
393 /* Parse the time, owner, mode, size fields. */
394 r = ar_parse_common_header(ar, entry, h);
395 /* Force the file type to a regular file. */
396 archive_entry_set_filetype(entry, AE_IFREG);
397 return (r);
398 }
399
400 /*
401 * "__.SYMDEF" is a BSD archive symbol table.
402 */
403 if (strcmp(filename, "__.SYMDEF") == 0) {
404 archive_entry_copy_pathname(entry, filename);
405 /* Parse the time, owner, mode, size fields. */
406 return (ar_parse_common_header(ar, entry, h));
407 }
408
409 /*
410 * Otherwise, this is a standard entry. The filename
411 * has already been trimmed as much as possible, based
412 * on our current knowledge of the format.
413 */
414 archive_entry_copy_pathname(entry, filename);
415 return (ar_parse_common_header(ar, entry, h));
416 }
417
418 static int
archive_read_format_ar_read_header(struct archive_read * a,struct archive_entry * entry)419 archive_read_format_ar_read_header(struct archive_read *a,
420 struct archive_entry *entry)
421 {
422 struct ar *ar = (struct ar*)(a->format->data);
423 size_t unconsumed;
424 const void *header_data;
425 int ret;
426
427 if (!ar->read_global_header) {
428 /*
429 * We are now at the beginning of the archive,
430 * so we need first consume the ar global header.
431 */
432 __archive_read_consume(a, 8);
433 ar->read_global_header = 1;
434 /* Set a default format code for now. */
435 a->archive.archive_format = ARCHIVE_FORMAT_AR;
436 }
437
438 /* Read the header for the next file entry. */
439 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
440 /* Broken header. */
441 return (ARCHIVE_EOF);
442
443 unconsumed = 60;
444
445 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
446
447 if (unconsumed)
448 __archive_read_consume(a, unconsumed);
449
450 return ret;
451 }
452
453
454 static int
ar_parse_common_header(struct ar * ar,struct archive_entry * entry,const char * h)455 ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
456 const char *h)
457 {
458 uint64_t n;
459
460 /* Copy remaining header */
461 archive_entry_set_mtime(entry,
462 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
463 archive_entry_set_uid(entry,
464 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
465 archive_entry_set_gid(entry,
466 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
467 archive_entry_set_mode(entry,
468 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
469 archive_entry_set_filetype(entry, AE_IFREG);
470 n = ar_atol10(h + AR_size_offset, AR_size_size);
471
472 ar->entry_offset = 0;
473 ar->entry_padding = n % 2;
474 archive_entry_set_size(entry, n);
475 ar->entry_bytes_remaining = n;
476 return (ARCHIVE_OK);
477 }
478
479 static int
archive_read_format_ar_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)480 archive_read_format_ar_read_data(struct archive_read *a,
481 const void **buff, size_t *size, int64_t *offset)
482 {
483 ssize_t bytes_read;
484 struct ar *ar;
485
486 ar = (struct ar *)(a->format->data);
487
488 if (ar->entry_bytes_unconsumed) {
489 __archive_read_consume(a, ar->entry_bytes_unconsumed);
490 ar->entry_bytes_unconsumed = 0;
491 }
492
493 if (ar->entry_bytes_remaining > 0) {
494 *buff = __archive_read_ahead(a, 1, &bytes_read);
495 if (bytes_read == 0) {
496 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
497 "Truncated ar archive");
498 return (ARCHIVE_FATAL);
499 }
500 if (bytes_read < 0)
501 return (ARCHIVE_FATAL);
502 if (bytes_read > ar->entry_bytes_remaining)
503 bytes_read = (ssize_t)ar->entry_bytes_remaining;
504 *size = bytes_read;
505 ar->entry_bytes_unconsumed = bytes_read;
506 *offset = ar->entry_offset;
507 ar->entry_offset += bytes_read;
508 ar->entry_bytes_remaining -= bytes_read;
509 return (ARCHIVE_OK);
510 } else {
511 int64_t skipped = __archive_read_consume(a, ar->entry_padding);
512 if (skipped >= 0) {
513 ar->entry_padding -= skipped;
514 }
515 if (ar->entry_padding) {
516 if (skipped >= 0) {
517 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
518 "Truncated ar archive - failed consuming padding");
519 }
520 return (ARCHIVE_FATAL);
521 }
522 *buff = NULL;
523 *size = 0;
524 *offset = ar->entry_offset;
525 return (ARCHIVE_EOF);
526 }
527 }
528
529 static int
archive_read_format_ar_skip(struct archive_read * a)530 archive_read_format_ar_skip(struct archive_read *a)
531 {
532 int64_t bytes_skipped;
533 struct ar* ar;
534
535 ar = (struct ar *)(a->format->data);
536
537 bytes_skipped = __archive_read_consume(a,
538 ar->entry_bytes_remaining + ar->entry_padding
539 + ar->entry_bytes_unconsumed);
540 if (bytes_skipped < 0)
541 return (ARCHIVE_FATAL);
542
543 ar->entry_bytes_remaining = 0;
544 ar->entry_bytes_unconsumed = 0;
545 ar->entry_padding = 0;
546
547 return (ARCHIVE_OK);
548 }
549
550 static int
ar_parse_gnu_filename_table(struct archive_read * a)551 ar_parse_gnu_filename_table(struct archive_read *a)
552 {
553 struct ar *ar;
554 char *p;
555 size_t size;
556
557 ar = (struct ar*)(a->format->data);
558 size = ar->strtab_size;
559
560 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
561 if (*p == '/') {
562 *p++ = '\0';
563 if (*p != '\n')
564 goto bad_string_table;
565 *p = '\0';
566 }
567 }
568 /*
569 * GNU ar always pads the table to an even size.
570 * The pad character is either '\n' or '`'.
571 */
572 if (p != ar->strtab + size && *p != '\n' && *p != '`')
573 goto bad_string_table;
574
575 /* Enforce zero termination. */
576 ar->strtab[size - 1] = '\0';
577
578 return (ARCHIVE_OK);
579
580 bad_string_table:
581 archive_set_error(&a->archive, EINVAL,
582 "Invalid string table");
583 free(ar->strtab);
584 ar->strtab = NULL;
585 return (ARCHIVE_FATAL);
586 }
587
588 static uint64_t
ar_atol8(const char * p,unsigned char_cnt)589 ar_atol8(const char *p, unsigned char_cnt)
590 {
591 uint64_t l, limit, last_digit_limit;
592 unsigned int digit, base;
593
594 base = 8;
595 limit = UINT64_MAX / base;
596 last_digit_limit = UINT64_MAX % base;
597
598 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
599 p++;
600
601 l = 0;
602 digit = *p - '0';
603 while (*p >= '0' && digit < base && char_cnt-- > 0) {
604 if (l>limit || (l == limit && digit > last_digit_limit)) {
605 l = UINT64_MAX; /* Truncate on overflow. */
606 break;
607 }
608 l = (l * base) + digit;
609 digit = *++p - '0';
610 }
611 return (l);
612 }
613
614 static uint64_t
ar_atol10(const char * p,unsigned char_cnt)615 ar_atol10(const char *p, unsigned char_cnt)
616 {
617 uint64_t l, limit, last_digit_limit;
618 unsigned int base, digit;
619
620 base = 10;
621 limit = UINT64_MAX / base;
622 last_digit_limit = UINT64_MAX % base;
623
624 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
625 p++;
626 l = 0;
627 digit = *p - '0';
628 while (*p >= '0' && digit < base && char_cnt-- > 0) {
629 if (l > limit || (l == limit && digit > last_digit_limit)) {
630 l = UINT64_MAX; /* Truncate on overflow. */
631 break;
632 }
633 l = (l * base) + digit;
634 digit = *++p - '0';
635 }
636 return (l);
637 }
638