xref: /freebsd/contrib/libarchive/libarchive/archive_read_support_format_mtree.c (revision eb5165bb491138f60d9004bc4c781490016d9288)
1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * Copyright (c) 2008 Joerg Sonnenberger
4  * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "archive_platform.h"
29 
30 #ifdef HAVE_SYS_STAT_H
31 #include <sys/stat.h>
32 #endif
33 #ifdef HAVE_ERRNO_H
34 #include <errno.h>
35 #endif
36 #ifdef HAVE_FCNTL_H
37 #include <fcntl.h>
38 #endif
39 #include <stddef.h>
40 /* #include <stdint.h> */ /* See archive_platform.h */
41 #ifdef HAVE_STDLIB_H
42 #include <stdlib.h>
43 #endif
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47 #ifdef HAVE_CTYPE_H
48 #include <ctype.h>
49 #endif
50 
51 #include "archive.h"
52 #include "archive_entry.h"
53 #include "archive_entry_private.h"
54 #include "archive_platform_stat.h"
55 #include "archive_private.h"
56 #include "archive_rb.h"
57 #include "archive_read_private.h"
58 #include "archive_string.h"
59 #include "archive_pack_dev.h"
60 
61 #ifndef O_BINARY
62 #define	O_BINARY 0
63 #endif
64 #ifndef O_CLOEXEC
65 #define O_CLOEXEC	0
66 #endif
67 
68 #define	MTREE_HAS_DEVICE	0x0001
69 #define	MTREE_HAS_FFLAGS	0x0002
70 #define	MTREE_HAS_GID		0x0004
71 #define	MTREE_HAS_GNAME		0x0008
72 #define	MTREE_HAS_MTIME		0x0010
73 #define	MTREE_HAS_NLINK		0x0020
74 #define	MTREE_HAS_PERM		0x0040
75 #define	MTREE_HAS_SIZE		0x0080
76 #define	MTREE_HAS_TYPE		0x0100
77 #define	MTREE_HAS_UID		0x0200
78 #define	MTREE_HAS_UNAME		0x0400
79 
80 #define	MTREE_HAS_OPTIONAL	0x0800
81 #define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
82 
83 #define	MAX_LINE_LEN		(1024 * 1024)
84 
85 struct mtree_option {
86 	struct mtree_option *next;
87 	char *value;
88 };
89 
90 struct mtree_entry {
91 	struct archive_rb_node rbnode;
92 	struct mtree_entry *next_dup;
93 	struct mtree_entry *next;
94 	struct mtree_option *options;
95 	char *name;
96 	char full;
97 	char used;
98 };
99 
100 struct mtree {
101 	struct archive_string	 line;
102 	size_t			 buffsize;
103 	char			*buff;
104 	int64_t			 offset;
105 	int			 fd;
106 	int			 archive_format;
107 	const char		*archive_format_name;
108 	struct mtree_entry	*entries;
109 	struct mtree_entry	*this_entry;
110 	struct archive_rb_tree	 entry_rbtree;
111 	struct archive_string	 current_dir;
112 	struct archive_string	 contents_name;
113 
114 	struct archive_entry_linkresolver *resolver;
115 	struct archive_rb_tree rbtree;
116 
117 	int64_t			 cur_size;
118 	char checkfs;
119 };
120 
121 static int	bid_keycmp(const char *, const char *, ssize_t);
122 static int	cleanup(struct archive_read *);
123 static int	detect_form(struct archive_read *, int *);
124 static int	mtree_bid(struct archive_read *, int);
125 static int	parse_file(struct archive_read *, struct archive_entry *,
126 		    struct mtree *, struct mtree_entry *, int *);
127 static void	parse_escapes(char *, struct mtree_entry *);
128 static int	parse_line(struct archive_read *, struct archive_entry *,
129 		    struct mtree *, struct mtree_entry *, int *);
130 static int	parse_keyword(struct archive_read *, struct mtree *,
131 		    struct archive_entry *, struct mtree_option *, int *);
132 static int	read_data(struct archive_read *a,
133 		    const void **buff, size_t *size, int64_t *offset);
134 static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
135 static int	skip(struct archive_read *a);
136 static int	read_header(struct archive_read *,
137 		    struct archive_entry *);
138 static int64_t	mtree_atol(char **, int base);
139 #ifndef HAVE_STRNLEN
140 static size_t	mtree_strnlen(const char *, size_t);
141 #endif
142 
143 /*
144  * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
145  * here.  TODO: Move this to configure time, but be careful
146  * about cross-compile environments.
147  */
148 static int64_t
get_time_t_max(void)149 get_time_t_max(void)
150 {
151 #if defined(TIME_T_MAX)
152 	return TIME_T_MAX;
153 #else
154 	/* ISO C allows time_t to be a floating-point type,
155 	   but POSIX requires an integer type.  The following
156 	   should work on any system that follows the POSIX
157 	   conventions. */
158 	if (((time_t)0) < ((time_t)-1)) {
159 		/* Time_t is unsigned */
160 		return (~(time_t)0);
161 	} else {
162 		/* Time_t is signed. */
163 		/* Assume it's the same as int64_t or int32_t */
164 		if (sizeof(time_t) == sizeof(int64_t)) {
165 			return (time_t)INT64_MAX;
166 		} else {
167 			return (time_t)INT32_MAX;
168 		}
169 	}
170 #endif
171 }
172 
173 static int64_t
get_time_t_min(void)174 get_time_t_min(void)
175 {
176 #if defined(TIME_T_MIN)
177 	return TIME_T_MIN;
178 #else
179 	if (((time_t)0) < ((time_t)-1)) {
180 		/* Time_t is unsigned */
181 		return (time_t)0;
182 	} else {
183 		/* Time_t is signed. */
184 		if (sizeof(time_t) == sizeof(int64_t)) {
185 			return (time_t)INT64_MIN;
186 		} else {
187 			return (time_t)INT32_MIN;
188 		}
189 	}
190 #endif
191 }
192 
193 #ifdef HAVE_STRNLEN
194 #define mtree_strnlen(a,b) strnlen(a,b)
195 #else
196 static size_t
mtree_strnlen(const char * p,size_t maxlen)197 mtree_strnlen(const char *p, size_t maxlen)
198 {
199 	size_t i;
200 
201 	for (i = 0; i <= maxlen; i++) {
202 		if (p[i] == 0)
203 			break;
204 	}
205 	if (i > maxlen)
206 		return (-1);/* invalid */
207 	return (i);
208 }
209 #endif
210 
211 static int
archive_read_format_mtree_options(struct archive_read * a,const char * key,const char * val)212 archive_read_format_mtree_options(struct archive_read *a,
213     const char *key, const char *val)
214 {
215 	struct mtree *mtree;
216 
217 	mtree = (struct mtree *)(a->format->data);
218 	if (strcmp(key, "checkfs")  == 0) {
219 		/* Allows to read information missing from the mtree from the file system */
220 		if (val == NULL || val[0] == 0) {
221 			mtree->checkfs = 0;
222 		} else {
223 			mtree->checkfs = 1;
224 		}
225 		return (ARCHIVE_OK);
226 	}
227 
228 	/* Note: The "warn" return is just to inform the options
229 	 * supervisor that we didn't handle it.  It will generate
230 	 * a suitable error if no one used this option. */
231 	return (ARCHIVE_WARN);
232 }
233 
234 static void
free_options(struct mtree_option * head)235 free_options(struct mtree_option *head)
236 {
237 	struct mtree_option *next;
238 
239 	for (; head != NULL; head = next) {
240 		next = head->next;
241 		free(head->value);
242 		free(head);
243 	}
244 }
245 
246 static int
mtree_cmp_node(const struct archive_rb_node * n1,const struct archive_rb_node * n2)247 mtree_cmp_node(const struct archive_rb_node *n1,
248     const struct archive_rb_node *n2)
249 {
250 	const struct mtree_entry *e1 = (const struct mtree_entry *)n1;
251 	const struct mtree_entry *e2 = (const struct mtree_entry *)n2;
252 
253 	return (strcmp(e1->name, e2->name));
254 }
255 
256 static int
mtree_cmp_key(const struct archive_rb_node * n,const void * key)257 mtree_cmp_key(const struct archive_rb_node *n, const void *key)
258 {
259 	const struct mtree_entry *e = (const struct mtree_entry *)n;
260 
261 	return (strcmp(e->name, key));
262 }
263 
264 int
archive_read_support_format_mtree(struct archive * _a)265 archive_read_support_format_mtree(struct archive *_a)
266 {
267 	static const struct archive_rb_tree_ops rb_ops = {
268 		mtree_cmp_node, mtree_cmp_key,
269 	};
270 	struct archive_read *a = (struct archive_read *)_a;
271 	struct mtree *mtree;
272 	int r;
273 
274 	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
275 	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
276 
277 	mtree = calloc(1, sizeof(*mtree));
278 	if (mtree == NULL) {
279 		archive_set_error(&a->archive, ENOMEM,
280 		    "Can't allocate mtree data");
281 		return (ARCHIVE_FATAL);
282 	}
283 	mtree->checkfs = 0;
284 	mtree->fd = -1;
285 
286 	__archive_rb_tree_init(&mtree->rbtree, &rb_ops);
287 
288 	r = __archive_read_register_format(a, mtree, "mtree",
289            mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
290 
291 	if (r != ARCHIVE_OK)
292 		free(mtree);
293 	return (ARCHIVE_OK);
294 }
295 
296 static int
cleanup(struct archive_read * a)297 cleanup(struct archive_read *a)
298 {
299 	struct mtree *mtree;
300 	struct mtree_entry *p, *q;
301 
302 	mtree = (struct mtree *)(a->format->data);
303 
304 	/* Close any dangling file descriptor before freeing */
305     if (mtree->fd >= 0) {
306         close(mtree->fd);
307         mtree->fd = -1;
308     }
309 	p = mtree->entries;
310 	while (p != NULL) {
311 		q = p->next;
312 		free(p->name);
313 		free_options(p->options);
314 		free(p);
315 		p = q;
316 	}
317 	archive_string_free(&mtree->line);
318 	archive_string_free(&mtree->current_dir);
319 	archive_string_free(&mtree->contents_name);
320 	archive_entry_linkresolver_free(mtree->resolver);
321 
322 	free(mtree->buff);
323 	free(mtree);
324 	(a->format->data) = NULL;
325 	return (ARCHIVE_OK);
326 }
327 
328 static ssize_t
get_line_size(const char * b,ssize_t avail,ssize_t * nlsize)329 get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
330 {
331 	ssize_t len;
332 
333 	len = 0;
334 	while (len < avail) {
335 		switch (*b) {
336 		case '\0':/* Non-ascii character or control character. */
337 			if (nlsize != NULL)
338 				*nlsize = 0;
339 			return (-1);
340 		case '\r':
341 			if (avail-len > 1 && b[1] == '\n') {
342 				if (nlsize != NULL)
343 					*nlsize = 2;
344 				return (len+2);
345 			}
346 			/* FALL THROUGH */
347 		case '\n':
348 			if (nlsize != NULL)
349 				*nlsize = 1;
350 			return (len+1);
351 		default:
352 			b++;
353 			len++;
354 			break;
355 		}
356 	}
357 	if (nlsize != NULL)
358 		*nlsize = 0;
359 	return (avail);
360 }
361 
362 /*
363  *  <---------------- ravail --------------------->
364  *  <-- diff ------> <---  avail ----------------->
365  *                   <---- len ----------->
366  * | Previous lines | line being parsed  nl extra |
367  *                  ^
368  *                  b
369  *
370  */
371 static ssize_t
next_line(struct archive_read * a,const char ** b,ssize_t * avail,ssize_t * ravail,ssize_t * nl)372 next_line(struct archive_read *a,
373     const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
374 {
375 	ssize_t len;
376 	int quit;
377 
378 	quit = 0;
379 	if (*avail == 0) {
380 		*nl = 0;
381 		len = 0;
382 	} else
383 		len = get_line_size(*b, *avail, nl);
384 	/*
385 	 * Read bytes more while it does not reach the end of line.
386 	 */
387 	while (*nl == 0 && len == *avail && !quit) {
388 		ssize_t diff = *ravail - *avail;
389 		size_t nbytes_req = (*ravail+1023) & ~1023U;
390 		ssize_t tested;
391 
392 		/*
393 		 * Place an arbitrary limit on the line length.
394 		 * mtree is almost free-form input and without line length limits,
395 		 * it can consume a lot of memory.
396 		 */
397 		if (len >= MAX_LINE_LEN)
398 			return (-1);
399 
400 		/* Increase reading bytes if it is not enough for at least
401 		 * two new lines. */
402 		if (nbytes_req < (size_t)*ravail + 160)
403 			nbytes_req <<= 1;
404 
405 		*b = __archive_read_ahead(a, nbytes_req, avail);
406 		if (*b == NULL) {
407 			if (*ravail >= *avail)
408 				return (0);
409 			/* Reading bytes reaches the end of file. */
410 			*b = __archive_read_ahead(a, *avail, avail);
411 			quit = 1;
412 		}
413 		*ravail = *avail;
414 		*b += diff;
415 		*avail -= diff;
416 		tested = len;/* Skip some bytes we already determined. */
417 		len = get_line_size(*b + len, *avail - len, nl);
418 		if (len >= 0)
419 			len += tested;
420 	}
421 	return (len);
422 }
423 
424 /*
425  * Compare characters with an mtree keyword.
426  * Returns the length of an mtree keyword if matched.
427  * Returns 0 if not matched.
428  */
429 static int
bid_keycmp(const char * p,const char * key,ssize_t len)430 bid_keycmp(const char *p, const char *key, ssize_t len)
431 {
432 	int match_len = 0;
433 
434 	while (len > 0 && *p && *key) {
435 		if (*p == *key) {
436 			--len;
437 			++p;
438 			++key;
439 			++match_len;
440 			continue;
441 		}
442 		return (0);/* Not match */
443 	}
444 	if (*key != '\0')
445 		return (0);/* Not match */
446 
447 	/* A following character should be specified characters */
448 	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
449 	    p[0] == '\n' || p[0] == '\r' ||
450 	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
451 		return (match_len);
452 	return (0);/* Not match */
453 }
454 
455 /*
456  * Test whether the characters 'p' has is mtree keyword.
457  * Returns the length of a detected keyword.
458  * Returns 0 if any keywords were not found.
459  */
460 static int
bid_keyword(const char * p,ssize_t len)461 bid_keyword(const char *p,  ssize_t len)
462 {
463 	static const char * const keys_c[] = {
464 		"content", "contents", "cksum", NULL
465 	};
466 	static const char * const keys_df[] = {
467 		"device", "flags", NULL
468 	};
469 	static const char * const keys_g[] = {
470 		"gid", "gname", NULL
471 	};
472 	static const char * const keys_il[] = {
473 		"ignore", "inode", "link", NULL
474 	};
475 	static const char * const keys_m[] = {
476 		"md5", "md5digest", "mode", NULL
477 	};
478 	static const char * const keys_no[] = {
479 		"nlink", "nochange", "optional", NULL
480 	};
481 	static const char * const keys_r[] = {
482 		"resdevice", "rmd160", "rmd160digest", NULL
483 	};
484 	static const char * const keys_s[] = {
485 		"sha1", "sha1digest",
486 		"sha256", "sha256digest",
487 		"sha384", "sha384digest",
488 		"sha512", "sha512digest",
489 		"size", NULL
490 	};
491 	static const char * const keys_t[] = {
492 		"tags", "time", "type", NULL
493 	};
494 	static const char * const keys_u[] = {
495 		"uid", "uname",	NULL
496 	};
497 	const char * const *keys;
498 	int i;
499 
500 	switch (*p) {
501 	case 'c': keys = keys_c; break;
502 	case 'd': case 'f': keys = keys_df; break;
503 	case 'g': keys = keys_g; break;
504 	case 'i': case 'l': keys = keys_il; break;
505 	case 'm': keys = keys_m; break;
506 	case 'n': case 'o': keys = keys_no; break;
507 	case 'r': keys = keys_r; break;
508 	case 's': keys = keys_s; break;
509 	case 't': keys = keys_t; break;
510 	case 'u': keys = keys_u; break;
511 	default: return (0);/* Unknown key */
512 	}
513 
514 	for (i = 0; keys[i] != NULL; i++) {
515 		int l = bid_keycmp(p, keys[i], len);
516 		if (l > 0)
517 			return (l);
518 	}
519 	return (0);/* Unknown key */
520 }
521 
522 /*
523  * Test whether there is a set of mtree keywords.
524  * Returns the number of keywords.
525  * Returns -1 if we got incorrect sequence.
526  * This function expects a set of "<space characters>keyword=value".
527  * When "unset" is specified, expects a set of "<space characters>keyword".
528  */
529 static int
bid_keyword_list(const char * p,ssize_t len,int unset,int last_is_path)530 bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
531 {
532 	int l;
533 	int keycnt = 0;
534 
535 	while (len > 0 && *p) {
536 		int blank = 0;
537 
538 		/* Test whether there are blank characters in the line. */
539 		while (len >0 && (*p == ' ' || *p == '\t')) {
540 			++p;
541 			--len;
542 			blank = 1;
543 		}
544 		if (*p == '\n' || *p == '\r')
545 			break;
546 		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
547 			break;
548 		if (!blank && !last_is_path) /* No blank character. */
549 			return (-1);
550 		if (last_is_path && len == 0)
551 				return (keycnt);
552 
553 		if (unset) {
554 			l = bid_keycmp(p, "all", len);
555 			if (l > 0)
556 				return (1);
557 		}
558 		/* Test whether there is a correct key in the line. */
559 		l = bid_keyword(p, len);
560 		if (l == 0)
561 			return (-1);/* Unknown keyword was found. */
562 		p += l;
563 		len -= l;
564 		keycnt++;
565 
566 		/* Skip value */
567 		if (*p == '=') {
568 			int value = 0;
569 			++p;
570 			--len;
571 			while (len > 0 && *p != ' ' && *p != '\t') {
572 				++p;
573 				--len;
574 				value = 1;
575 			}
576 			/* A keyword should have a value unless this is
577 			 * an "/unset" operation. */
578 			if (!unset && value == 0)
579 				return (-1);
580 		}
581 	}
582 	return (keycnt);
583 }
584 
585 static int
bid_entry(const char * p,ssize_t len,ssize_t nl,int * last_is_path)586 bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
587 {
588 	int f = 0;
589 	static const unsigned char safe_char[256] = {
590 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
591 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
592 		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
593 		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
594 		/* 0123456789:;<>?  EXCLUSION:(=) */
595 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
596 		/* @ABCDEFGHIJKLMNO */
597 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
598 		/* PQRSTUVWXYZ[\]^_  */
599 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
600 		/* `abcdefghijklmno */
601 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
602 		/* pqrstuvwxyz{|}~ */
603 		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
604 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
605 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
606 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
607 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
608 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
609 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
610 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
611 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
612 	};
613 	ssize_t ll;
614 	const char *pp = p;
615 	const char * const pp_end = pp + len;
616 
617 	*last_is_path = 0;
618 	/*
619 	 * Skip the path-name which is quoted.
620 	 */
621 	for (;pp < pp_end; ++pp) {
622 		if (!safe_char[*(const unsigned char *)pp]) {
623 			if (*pp != ' ' && *pp != '\t' && *pp != '\r'
624 			    && *pp != '\n')
625 				f = 0;
626 			break;
627 		}
628 		f = 1;
629 	}
630 	ll = pp_end - pp;
631 
632 	/* If a path-name was not found at the first, try to check
633 	 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
634 	 * which places the path-name at the last. */
635 	if (f == 0) {
636 		const char *pb = p + len - nl;
637 		int name_len = 0;
638 		int slash;
639 
640 		/* The form D accepts only a single line for an entry. */
641 		if (pb-2 >= p &&
642 		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
643 			return (-1);
644 		if (pb-1 >= p && pb[-1] == '\\')
645 			return (-1);
646 
647 		slash = 0;
648 		while (p <= --pb && *pb != ' ' && *pb != '\t') {
649 			if (!safe_char[*(const unsigned char *)pb])
650 				return (-1);
651 			name_len++;
652 			/* The pathname should have a slash in this
653 			 * format. */
654 			if (*pb == '/')
655 				slash = 1;
656 		}
657 		if (name_len == 0 || slash == 0)
658 			return (-1);
659 		/* If '/' is placed at the first in this field, this is not
660 		 * a valid filename. */
661 		if (pb[1] == '/')
662 			return (-1);
663 		ll = len - nl - name_len;
664 		pp = p;
665 		*last_is_path = 1;
666 	}
667 
668 	return (bid_keyword_list(pp, ll, 0, *last_is_path));
669 }
670 
671 #define MAX_BID_ENTRY	3
672 
673 static int
mtree_bid(struct archive_read * a,int best_bid)674 mtree_bid(struct archive_read *a, int best_bid)
675 {
676 	const char *signature = "#mtree";
677 	const char *p;
678 
679 	(void)best_bid; /* UNUSED */
680 
681 	/* Now let's look at the actual header and see if it matches. */
682 	p = __archive_read_ahead(a, strlen(signature), NULL);
683 	if (p == NULL)
684 		return (-1);
685 
686 	if (memcmp(p, signature, strlen(signature)) == 0)
687 		return (8 * (int)strlen(signature));
688 
689 	/*
690 	 * There is not a mtree signature. Let's try to detect mtree format.
691 	 */
692 	return (detect_form(a, NULL));
693 }
694 
695 static int
detect_form(struct archive_read * a,int * is_form_d)696 detect_form(struct archive_read *a, int *is_form_d)
697 {
698 	const char *p;
699 	ssize_t avail, ravail;
700 	ssize_t len, nl;
701 	int entry_cnt = 0, multiline = 0;
702 	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
703 			* (In this source we call it `form D') . */
704 
705 	if (is_form_d != NULL)
706 		*is_form_d = 0;
707 	p = __archive_read_ahead(a, 1, &avail);
708 	if (p == NULL)
709 		return (-1);
710 	ravail = avail;
711 	for (;;) {
712 		len = next_line(a, &p, &avail, &ravail, &nl);
713 		/* The terminal character of the line should be
714 		 * a new line character, '\r\n' or '\n'. */
715 		if (len <= 0 || nl == 0)
716 			break;
717 		if (!multiline) {
718 			/* Leading whitespace is never significant,
719 			 * ignore it. */
720 			while (len > 0 && (*p == ' ' || *p == '\t')) {
721 				++p;
722 				--avail;
723 				--len;
724 			}
725 			/* Skip comment or empty line. */
726 			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
727 				p += len;
728 				avail -= len;
729 				continue;
730 			}
731 		} else {
732 			/* A continuance line; the terminal
733 			 * character of previous line was '\' character. */
734 			if (bid_keyword_list(p, len, 0, 0) <= 0)
735 				break;
736 			if (p[len-nl-1] != '\\') {
737 				if (multiline == 1 &&
738 				    ++entry_cnt >= MAX_BID_ENTRY)
739 					break;
740 				multiline = 0;
741 			}
742 			p += len;
743 			avail -= len;
744 			continue;
745 		}
746 		if (p[0] != '/') {
747 			int last_is_path, keywords;
748 
749 			keywords = bid_entry(p, len, nl, &last_is_path);
750 			if (keywords >= 0) {
751 				if (form_D == 0) {
752 					if (last_is_path)
753 						form_D = 1;
754 					else if (keywords > 0)
755 						/* This line is not `form D'. */
756 						form_D = -1;
757 				} else if (form_D == 1) {
758 					if (!last_is_path && keywords > 0)
759 						/* This this is not `form D'
760 						 * and we cannot accept mixed
761 						 * format. */
762 						break;
763 				}
764 				if (!last_is_path && p[len-nl-1] == '\\')
765 					/* This line continues. */
766 					multiline = 1;
767 				else {
768 					/* We've got plenty of correct lines
769 					 * to assume that this file is an mtree
770 					 * format. */
771 					if (++entry_cnt >= MAX_BID_ENTRY)
772 						break;
773 				}
774 			} else
775 				break;
776 		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
777 			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
778 				break;
779 			/* This line continues. */
780 			if (p[len-nl-1] == '\\')
781 				multiline = 2;
782 		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
783 			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
784 				break;
785 			/* This line continues. */
786 			if (p[len-nl-1] == '\\')
787 				multiline = 2;
788 		} else
789 			break;
790 
791 		/* Test next line. */
792 		p += len;
793 		avail -= len;
794 	}
795 	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
796 		if (is_form_d != NULL) {
797 			if (form_D == 1)
798 				*is_form_d = 1;
799 		}
800 		return (32);
801 	}
802 
803 	return (0);
804 }
805 
806 /*
807  * The extended mtree format permits multiple lines specifying
808  * attributes for each file.  For those entries, only the last line
809  * is actually used.  Practically speaking, that means we have
810  * to read the entire mtree file into memory up front.
811  *
812  * The parsing is done in two steps.  First, it is decided if a line
813  * changes the global defaults and if it does, it is processed accordingly.
814  * Otherwise, the options of the line are merged with the current
815  * global options.
816  */
817 static int
add_option(struct archive_read * a,struct mtree_option ** global,const char * value,size_t len)818 add_option(struct archive_read *a, struct mtree_option **global,
819     const char *value, size_t len)
820 {
821 	struct mtree_option *opt;
822 
823 	if ((opt = malloc(sizeof(*opt))) == NULL) {
824 		archive_set_error(&a->archive, errno, "Can't allocate memory");
825 		return (ARCHIVE_FATAL);
826 	}
827 	if ((opt->value = malloc(len + 1)) == NULL) {
828 		free(opt);
829 		archive_set_error(&a->archive, errno, "Can't allocate memory");
830 		return (ARCHIVE_FATAL);
831 	}
832 	memcpy(opt->value, value, len);
833 	opt->value[len] = '\0';
834 	opt->next = *global;
835 	*global = opt;
836 	return (ARCHIVE_OK);
837 }
838 
839 static void
remove_option(struct mtree_option ** global,const char * value,size_t len)840 remove_option(struct mtree_option **global, const char *value, size_t len)
841 {
842 	struct mtree_option *iter, *last;
843 
844 	last = NULL;
845 	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
846 		if (strncmp(iter->value, value, len) == 0 &&
847 		    (iter->value[len] == '\0' ||
848 		     iter->value[len] == '='))
849 			break;
850 	}
851 	if (iter == NULL)
852 		return;
853 	if (last == NULL)
854 		*global = iter->next;
855 	else
856 		last->next = iter->next;
857 
858 	free(iter->value);
859 	free(iter);
860 }
861 
862 static int
process_global_set(struct archive_read * a,struct mtree_option ** global,const char * line)863 process_global_set(struct archive_read *a,
864     struct mtree_option **global, const char *line)
865 {
866 	const char *next, *eq;
867 	size_t len;
868 	int r;
869 
870 	line += 4;
871 	for (;;) {
872 		next = line + strspn(line, " \t\r\n");
873 		if (*next == '\0')
874 			return (ARCHIVE_OK);
875 		line = next;
876 		next = line + strcspn(line, " \t\r\n");
877 		eq = strchr(line, '=');
878 		if (eq > next)
879 			len = next - line;
880 		else
881 			len = eq - line;
882 
883 		remove_option(global, line, len);
884 		r = add_option(a, global, line, next - line);
885 		if (r != ARCHIVE_OK)
886 			return (r);
887 		line = next;
888 	}
889 }
890 
891 static int
process_global_unset(struct archive_read * a,struct mtree_option ** global,const char * line)892 process_global_unset(struct archive_read *a,
893     struct mtree_option **global, const char *line)
894 {
895 	const char *next;
896 	size_t len;
897 
898 	line += 6;
899 	if (strchr(line, '=') != NULL) {
900 		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
901 		    "/unset shall not contain `='");
902 		return ARCHIVE_FATAL;
903 	}
904 
905 	for (;;) {
906 		next = line + strspn(line, " \t\r\n");
907 		if (*next == '\0')
908 			return (ARCHIVE_OK);
909 		line = next;
910 		len = strcspn(line, " \t\r\n");
911 
912 		if (len == 3 && strncmp(line, "all", 3) == 0) {
913 			free_options(*global);
914 			*global = NULL;
915 		} else {
916 			remove_option(global, line, len);
917 		}
918 
919 		line += len;
920 	}
921 }
922 
923 static int
process_add_entry(struct archive_read * a,struct mtree * mtree,struct mtree_option ** global,const char * line,ssize_t line_len,struct mtree_entry ** last_entry,int is_form_d)924 process_add_entry(struct archive_read *a, struct mtree *mtree,
925     struct mtree_option **global, const char *line, ssize_t line_len,
926     struct mtree_entry **last_entry, int is_form_d)
927 {
928 	struct mtree_entry *entry;
929 	struct mtree_option *iter;
930 	const char *next, *eq, *name, *end;
931 	size_t name_len, len;
932 	int r, i;
933 
934 	if ((entry = malloc(sizeof(*entry))) == NULL) {
935 		archive_set_error(&a->archive, errno, "Can't allocate memory");
936 		return (ARCHIVE_FATAL);
937 	}
938 	entry->next = NULL;
939 	entry->options = NULL;
940 	entry->name = NULL;
941 	entry->used = 0;
942 	entry->full = 0;
943 
944 	/* Add this entry to list. */
945 	if (*last_entry == NULL)
946 		mtree->entries = entry;
947 	else
948 		(*last_entry)->next = entry;
949 	*last_entry = entry;
950 
951 	if (is_form_d) {
952 		/* Filename is last item on line. */
953 		/* Adjust line_len to trim trailing whitespace */
954 		while (line_len > 0) {
955 			char last_character = line[line_len - 1];
956 			if (last_character == '\r'
957 			    || last_character == '\n'
958 			    || last_character == '\t'
959 			    || last_character == ' ') {
960 				line_len--;
961 			} else {
962 				break;
963 			}
964 		}
965 		/* Name starts after the last whitespace separator */
966 		name = line;
967 		for (i = 0; i < line_len; i++) {
968 			if (line[i] == '\r'
969 			    || line[i] == '\n'
970 			    || line[i] == '\t'
971 			    || line[i] == ' ') {
972 				name = line + i + 1;
973 			}
974 		}
975 		name_len = line + line_len - name;
976 		end = name;
977 	} else {
978 		/* Filename is first item on line */
979 		name_len = strcspn(line, " \t\r\n");
980 		name = line;
981 		line += name_len;
982 		end = line + line_len;
983 	}
984 	/* name/name_len is the name within the line. */
985 	/* line..end brackets the entire line except the name */
986 
987 	if ((entry->name = malloc(name_len + 1)) == NULL) {
988 		archive_set_error(&a->archive, errno, "Can't allocate memory");
989 		return (ARCHIVE_FATAL);
990 	}
991 
992 	memcpy(entry->name, name, name_len);
993 	entry->name[name_len] = '\0';
994 	parse_escapes(entry->name, entry);
995 
996 	entry->next_dup = NULL;
997 	if (entry->full) {
998 		if (!__archive_rb_tree_insert_node(&mtree->rbtree, &entry->rbnode)) {
999 			struct mtree_entry *alt;
1000 			alt = (struct mtree_entry *)__archive_rb_tree_find_node(
1001 			    &mtree->rbtree, entry->name);
1002 			if (alt != NULL) {
1003 				while (alt->next_dup)
1004 					alt = alt->next_dup;
1005 				alt->next_dup = entry;
1006 			}
1007 		}
1008 	}
1009 
1010 	for (iter = *global; iter != NULL; iter = iter->next) {
1011 		r = add_option(a, &entry->options, iter->value,
1012 		    strlen(iter->value));
1013 		if (r != ARCHIVE_OK)
1014 			return (r);
1015 	}
1016 
1017 	for (;;) {
1018 		next = line + strspn(line, " \t\r\n");
1019 		if (*next == '\0')
1020 			return (ARCHIVE_OK);
1021 		if (next >= end)
1022 			return (ARCHIVE_OK);
1023 		line = next;
1024 		next = line + strcspn(line, " \t\r\n");
1025 		eq = strchr(line, '=');
1026 		if (eq == NULL || eq > next)
1027 			len = next - line;
1028 		else
1029 			len = eq - line;
1030 
1031 		remove_option(&entry->options, line, len);
1032 		r = add_option(a, &entry->options, line, next - line);
1033 		if (r != ARCHIVE_OK)
1034 			return (r);
1035 		line = next;
1036 	}
1037 }
1038 
1039 static int
read_mtree(struct archive_read * a,struct mtree * mtree)1040 read_mtree(struct archive_read *a, struct mtree *mtree)
1041 {
1042 	ssize_t len;
1043 	uintmax_t counter;
1044 	char *p, *s;
1045 	struct mtree_option *global;
1046 	struct mtree_entry *last_entry;
1047 	int r, is_form_d;
1048 
1049 	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
1050 	mtree->archive_format_name = "mtree";
1051 
1052 	global = NULL;
1053 	last_entry = NULL;
1054 
1055 	(void)detect_form(a, &is_form_d);
1056 
1057 	for (counter = 1; ; ++counter) {
1058 		r = ARCHIVE_OK;
1059 		len = readline(a, mtree, &p, 65536);
1060 		if (len == 0) {
1061 			mtree->this_entry = mtree->entries;
1062 			free_options(global);
1063 			return (ARCHIVE_OK);
1064 		}
1065 		if (len < 0) {
1066 			free_options(global);
1067 			return ((int)len);
1068 		}
1069 		/* Leading whitespace is never significant, ignore it. */
1070 		while (*p == ' ' || *p == '\t') {
1071 			++p;
1072 			--len;
1073 		}
1074 		/* Skip content lines and blank lines. */
1075 		if (*p == '#')
1076 			continue;
1077 		if (*p == '\r' || *p == '\n' || *p == '\0')
1078 			continue;
1079 		/* Non-printable characters are not allowed */
1080 		for (s = p;s < p + len - 1; s++) {
1081 			if (!isprint((unsigned char)*s) && *s != '\t') {
1082 				archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1083 					"Non-printable character 0x%02X", (unsigned char)(*s));
1084 				r = ARCHIVE_FATAL;
1085 				break;
1086 			}
1087 		}
1088 		if (r != ARCHIVE_OK)
1089 			break;
1090 		if (*p != '/') {
1091 			r = process_add_entry(a, mtree, &global, p, len,
1092 			    &last_entry, is_form_d);
1093 		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
1094 			if (p[4] != ' ' && p[4] != '\t')
1095 				break;
1096 			r = process_global_set(a, &global, p);
1097 		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
1098 			if (p[6] != ' ' && p[6] != '\t')
1099 				break;
1100 			r = process_global_unset(a, &global, p);
1101 		} else
1102 			break;
1103 
1104 		if (r != ARCHIVE_OK) {
1105 			free_options(global);
1106 			return r;
1107 		}
1108 	}
1109 
1110 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1111 	    "Can't parse line %ju", counter);
1112 	free_options(global);
1113 	return (ARCHIVE_FATAL);
1114 }
1115 
1116 /*
1117  * Read in the entire mtree file into memory on the first request.
1118  * Then use the next unused file to satisfy each header request.
1119  */
1120 static int
read_header(struct archive_read * a,struct archive_entry * entry)1121 read_header(struct archive_read *a, struct archive_entry *entry)
1122 {
1123 	struct mtree *mtree;
1124 	char *p;
1125 	int r, use_next;
1126 
1127 	mtree = (struct mtree *)(a->format->data);
1128 
1129 	if (mtree->fd >= 0) {
1130 		close(mtree->fd);
1131 		mtree->fd = -1;
1132 	}
1133 
1134 	if (mtree->entries == NULL) {
1135 		mtree->resolver = archive_entry_linkresolver_new();
1136 		if (mtree->resolver == NULL)
1137 			return ARCHIVE_FATAL;
1138 		archive_entry_linkresolver_set_strategy(mtree->resolver,
1139 		    ARCHIVE_FORMAT_MTREE);
1140 		r = read_mtree(a, mtree);
1141 		if (r != ARCHIVE_OK)
1142 			return (r);
1143 	}
1144 
1145 	a->archive.archive_format = mtree->archive_format;
1146 	a->archive.archive_format_name = mtree->archive_format_name;
1147 
1148 	for (;;) {
1149 		if (mtree->this_entry == NULL)
1150 			return (ARCHIVE_EOF);
1151 		if (strcmp(mtree->this_entry->name, "..") == 0) {
1152 			mtree->this_entry->used = 1;
1153 			if (archive_strlen(&mtree->current_dir) > 0) {
1154 				/* Roll back current path. */
1155 				p = mtree->current_dir.s
1156 				    + mtree->current_dir.length - 1;
1157 				while (p >= mtree->current_dir.s && *p != '/')
1158 					--p;
1159 				if (p >= mtree->current_dir.s)
1160 					--p;
1161 				mtree->current_dir.length
1162 				    = p - mtree->current_dir.s + 1;
1163 			}
1164 		}
1165 		if (!mtree->this_entry->used) {
1166 			use_next = 0;
1167 			r = parse_file(a, entry, mtree, mtree->this_entry,
1168 				&use_next);
1169 			if (use_next == 0)
1170 				return (r);
1171 		}
1172 		mtree->this_entry = mtree->this_entry->next;
1173 	}
1174 }
1175 
1176 /*
1177  * A single file can have multiple lines contribute specifications.
1178  * Parse as many lines as necessary, then pull additional information
1179  * from a backing file on disk as necessary.
1180  */
1181 static int
parse_file(struct archive_read * a,struct archive_entry * entry,struct mtree * mtree,struct mtree_entry * mentry,int * use_next)1182 parse_file(struct archive_read *a, struct archive_entry *entry,
1183     struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1184 {
1185 	const char *path;
1186 	la_seek_stat_t st_storage, *st;
1187 	struct mtree_entry *mp;
1188 	struct archive_entry *sparse_entry;
1189 	int r = ARCHIVE_OK, r1, parsed_kws;
1190 
1191 	mentry->used = 1;
1192 
1193 	/* Initialize reasonable defaults. */
1194 	archive_entry_set_filetype(entry, AE_IFREG);
1195 	archive_entry_set_size(entry, 0);
1196 	archive_string_empty(&mtree->contents_name);
1197 
1198 	/* Parse options from this line. */
1199 	parsed_kws = 0;
1200 	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1201 
1202 	if (mentry->full) {
1203 		archive_entry_copy_pathname(entry, mentry->name);
1204 		/*
1205 		 * "Full" entries are allowed to have multiple lines
1206 		 * and those lines aren't required to be adjacent.  We
1207 		 * don't support multiple lines for "relative" entries
1208 		 * nor do we make any attempt to merge data from
1209 		 * separate "relative" and "full" entries.  (Merging
1210 		 * "relative" and "full" entries would require dealing
1211 		 * with pathname canonicalization, which is a very
1212 		 * tricky subject.)
1213 		 */
1214 		mp = (struct mtree_entry *)__archive_rb_tree_find_node(
1215 		    &mtree->rbtree, mentry->name);
1216 		for (; mp; mp = mp->next_dup) {
1217 			if (mp->full && !mp->used) {
1218 				/* Later lines override earlier ones. */
1219 				mp->used = 1;
1220 				r1 = parse_line(a, entry, mtree, mp, &parsed_kws);
1221 				if (r1 < r)
1222 					r = r1;
1223 			}
1224 		}
1225 	} else {
1226 		/*
1227 		 * Relative entries require us to construct
1228 		 * the full path and possibly update the
1229 		 * current directory.
1230 		 */
1231 		size_t n = archive_strlen(&mtree->current_dir);
1232 		if (n > 0)
1233 			archive_strcat(&mtree->current_dir, "/");
1234 		archive_strcat(&mtree->current_dir, mentry->name);
1235 		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1236 		if (archive_entry_filetype(entry) != AE_IFDIR)
1237 			mtree->current_dir.length = n;
1238 	}
1239 
1240 	if (mtree->checkfs) {
1241 		/*
1242 		 * Try to open and stat the file to get the real size
1243 		 * and other file info.  It would be nice to avoid
1244 		 * this here so that getting a listing of an mtree
1245 		 * wouldn't require opening every referenced contents
1246 		 * file.  But then we wouldn't know the actual
1247 		 * contents size, so I don't see a really viable way
1248 		 * around this.  (Also, we may want to someday pull
1249 		 * other unspecified info from the contents file on
1250 		 * disk.)
1251 		 */
1252 		mtree->fd = -1;
1253 		if (archive_strlen(&mtree->contents_name) > 0)
1254 			path = mtree->contents_name.s;
1255 		else
1256 			path = archive_entry_pathname(entry);
1257 
1258 		if (archive_entry_filetype(entry) == AE_IFREG ||
1259 				archive_entry_filetype(entry) == AE_IFDIR) {
1260 			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1261 			__archive_ensure_cloexec_flag(mtree->fd);
1262 			if (mtree->fd == -1 && (
1263 #if defined(_WIN32) && !defined(__CYGWIN__)
1264         /*
1265          * On Windows, attempting to open a file with an
1266          * invalid name result in EINVAL (Error 22)
1267          */
1268 				(errno != ENOENT && errno != EINVAL)
1269 #else
1270 				errno != ENOENT
1271 #endif
1272         || archive_strlen(&mtree->contents_name) > 0)) {
1273 				archive_set_error(&a->archive, errno,
1274 						"Can't open %s", path);
1275 				r = ARCHIVE_WARN;
1276 			}
1277 		}
1278 
1279 		st = &st_storage;
1280 		if (mtree->fd >= 0) {
1281 			if (la_seek_fstat(mtree->fd, st) == -1) {
1282 				archive_set_error(&a->archive, errno,
1283 						"Could not fstat %s", path);
1284 				r = ARCHIVE_WARN;
1285 				/* If we can't stat it, don't keep it open. */
1286 				close(mtree->fd);
1287 				mtree->fd = -1;
1288 				st = NULL;
1289 			}
1290 		}
1291 #ifdef HAVE_LSTAT
1292 		else if (lstat(path, st) == -1)
1293 #else
1294 		else if (la_seek_stat(path, st) == -1)
1295 #endif
1296 		{
1297 			st = NULL;
1298 		}
1299 
1300 		/*
1301 		 * Check for a mismatch between the type in the specification
1302 		 * and the type of the contents object on disk.
1303 		 */
1304 		if (st != NULL) {
1305 			if (((st->st_mode & S_IFMT) == S_IFREG &&
1306 			      archive_entry_filetype(entry) == AE_IFREG)
1307 #ifdef S_IFLNK
1308 			  ||((st->st_mode & S_IFMT) == S_IFLNK &&
1309 			      archive_entry_filetype(entry) == AE_IFLNK)
1310 #endif
1311 #ifdef S_IFSOCK
1312 			  ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1313 			      archive_entry_filetype(entry) == AE_IFSOCK)
1314 #endif
1315 #ifdef S_IFCHR
1316 			  ||((st->st_mode & S_IFMT) == S_IFCHR &&
1317 			      archive_entry_filetype(entry) == AE_IFCHR)
1318 #endif
1319 #ifdef S_IFBLK
1320 			  ||((st->st_mode & S_IFMT) == S_IFBLK &&
1321 			      archive_entry_filetype(entry) == AE_IFBLK)
1322 #endif
1323 			  ||((st->st_mode & S_IFMT) == S_IFDIR &&
1324 			      archive_entry_filetype(entry) == AE_IFDIR)
1325 #ifdef S_IFIFO
1326 			  ||((st->st_mode & S_IFMT) == S_IFIFO &&
1327 			      archive_entry_filetype(entry) == AE_IFIFO)
1328 #endif
1329 			) {
1330 				/* Types match. */
1331 			} else {
1332 				/* Types don't match; bail out gracefully. */
1333 				if (mtree->fd >= 0)
1334 					close(mtree->fd);
1335 				mtree->fd = -1;
1336 				if (parsed_kws & MTREE_HAS_OPTIONAL) {
1337 					/* It's not an error for an optional
1338 					 * entry to not match disk. */
1339 					*use_next = 1;
1340 				} else if (r == ARCHIVE_OK) {
1341 					archive_set_error(&a->archive,
1342 					    ARCHIVE_ERRNO_MISC,
1343 					    "mtree specification has different"
1344 					    " type for %s",
1345 					    archive_entry_pathname(entry));
1346 					r = ARCHIVE_WARN;
1347 				}
1348 				return (r);
1349 			}
1350 		}
1351 
1352 		/*
1353 		 * If there is a contents file on disk, pick some of the
1354 		 * metadata from that file.  For most of these, we only
1355 		 * set it from the contents if it wasn't already parsed
1356 		 * from the specification.
1357 		 */
1358 		if (st != NULL) {
1359 			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1360 				(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1361 				(archive_entry_filetype(entry) == AE_IFCHR ||
1362 				 archive_entry_filetype(entry) == AE_IFBLK))
1363 				archive_entry_set_rdev(entry, st->st_rdev);
1364 			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1365 				== 0 ||
1366 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1367 				archive_entry_set_gid(entry, st->st_gid);
1368 			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1369 				== 0 ||
1370 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1371 				archive_entry_set_uid(entry, st->st_uid);
1372 			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1373 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1374 #if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1375 				archive_entry_set_mtime(entry, st->st_mtime,
1376 						st->st_mtimespec.tv_nsec);
1377 #elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1378 				archive_entry_set_mtime(entry, st->st_mtime,
1379 						st->st_mtim.tv_nsec);
1380 #elif HAVE_STRUCT_STAT_ST_MTIME_N
1381 				archive_entry_set_mtime(entry, st->st_mtime,
1382 						st->st_mtime_n);
1383 #elif HAVE_STRUCT_STAT_ST_UMTIME
1384 				archive_entry_set_mtime(entry, st->st_mtime,
1385 						st->st_umtime*1000);
1386 #elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1387 				archive_entry_set_mtime(entry, st->st_mtime,
1388 						st->st_mtime_usec*1000);
1389 #else
1390 				archive_entry_set_mtime(entry, st->st_mtime, 0);
1391 #endif
1392 			}
1393 			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1394 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1395 				archive_entry_set_nlink(entry, st->st_nlink);
1396 			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1397 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1398 				archive_entry_set_perm(entry, st->st_mode);
1399 			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1400 			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1401 				archive_entry_set_size(entry, st->st_size);
1402 			archive_entry_set_ino(entry, st->st_ino);
1403 			archive_entry_set_dev(entry, st->st_dev);
1404 
1405 			archive_entry_linkify(mtree->resolver, &entry,
1406 				&sparse_entry);
1407 		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1408 			/*
1409 			 * Couldn't open the entry, stat it or the on-disk type
1410 			 * didn't match.  If this entry is optional, just
1411 			 * ignore it and read the next header entry.
1412 			 */
1413 			*use_next = 1;
1414 			return ARCHIVE_OK;
1415 		}
1416 	}
1417 
1418 	mtree->cur_size = archive_entry_size(entry);
1419 	mtree->offset = 0;
1420 
1421 	return r;
1422 }
1423 
1424 /*
1425  * Each line contains a sequence of keywords.
1426  */
1427 static int
parse_line(struct archive_read * a,struct archive_entry * entry,struct mtree * mtree,struct mtree_entry * mp,int * parsed_kws)1428 parse_line(struct archive_read *a, struct archive_entry *entry,
1429     struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1430 {
1431 	struct mtree_option *iter;
1432 	int r = ARCHIVE_OK, r1;
1433 
1434 	for (iter = mp->options; iter != NULL; iter = iter->next) {
1435 		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1436 		if (r1 < r)
1437 			r = r1;
1438 	}
1439 	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1440 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1441 		    "Missing type keyword in mtree specification");
1442 		return (ARCHIVE_WARN);
1443 	}
1444 	return (r);
1445 }
1446 
1447 /*
1448  * Device entries have one of the following forms:
1449  *  - raw dev_t
1450  *  - format,major,minor[,subdevice]
1451  * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1452  */
1453 
1454 /* strsep() is not in C90, but strcspn() is. */
1455 /* Taken from http://unixpapa.com/incnote/string.html */
1456 static char *
la_strsep(char ** sp,const char * sep)1457 la_strsep(char **sp, const char *sep)
1458 {
1459 	char *p, *s;
1460 	if (sp == NULL || *sp == NULL || **sp == '\0')
1461 		return(NULL);
1462 	s = *sp;
1463 	p = s + strcspn(s, sep);
1464 	if (*p != '\0')
1465 		*p++ = '\0';
1466 	*sp = p;
1467 	return(s);
1468 }
1469 
1470 static int
parse_device(dev_t * pdev,struct archive * a,char * val)1471 parse_device(dev_t *pdev, struct archive *a, char *val)
1472 {
1473 #define MAX_PACK_ARGS 3
1474 	unsigned long numbers[MAX_PACK_ARGS];
1475 	char *p, *dev;
1476 	int argc;
1477 	pack_t *pack;
1478 	dev_t result;
1479 	const char *error = NULL;
1480 
1481 	memset(pdev, 0, sizeof(*pdev));
1482 	if ((dev = strchr(val, ',')) != NULL) {
1483 		/*
1484 		 * Device's major/minor are given in a specified format.
1485 		 * Decode and pack it accordingly.
1486 		 */
1487 		*dev++ = '\0';
1488 		if ((pack = pack_find(val)) == NULL) {
1489 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1490 			    "Unknown format `%s'", val);
1491 			return ARCHIVE_WARN;
1492 		}
1493 		argc = 0;
1494 		while ((p = la_strsep(&dev, ",")) != NULL) {
1495 			if (*p == '\0') {
1496 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1497 				    "Missing number");
1498 				return ARCHIVE_WARN;
1499 			}
1500 			if (argc >= MAX_PACK_ARGS) {
1501 				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1502 				    "Too many arguments");
1503 				return ARCHIVE_WARN;
1504 			}
1505 			numbers[argc++] = (unsigned long)mtree_atol(&p, 0);
1506 		}
1507 		if (argc < 2) {
1508 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1509 			    "Not enough arguments");
1510 			return ARCHIVE_WARN;
1511 		}
1512 		result = (*pack)(argc, numbers, &error);
1513 		if (error != NULL) {
1514 			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1515 			    "%s", error);
1516 			return ARCHIVE_WARN;
1517 		}
1518 	} else {
1519 		/* file system raw value. */
1520 		result = (dev_t)mtree_atol(&val, 0);
1521 	}
1522 	*pdev = result;
1523 	return ARCHIVE_OK;
1524 #undef MAX_PACK_ARGS
1525 }
1526 
1527 static int
parse_hex_nibble(char c)1528 parse_hex_nibble(char c)
1529 {
1530 	if (c >= '0' && c <= '9')
1531 		return c - '0';
1532 	if (c >= 'a' && c <= 'f')
1533 		return 10 + c - 'a';
1534 #if 0
1535 	/* XXX: Is uppercase something we should support? */
1536 	if (c >= 'A' && c <= 'F')
1537 		return 10 + c - 'A';
1538 #endif
1539 
1540 	return -1;
1541 }
1542 
1543 static int
parse_digest(struct archive_read * a,struct archive_entry * entry,const char * digest,int type)1544 parse_digest(struct archive_read *a, struct archive_entry *entry,
1545     const char *digest, int type)
1546 {
1547 	unsigned char digest_buf[64];
1548 	int high, low;
1549 	size_t i, j, len;
1550 
1551 	switch (type) {
1552 	case ARCHIVE_ENTRY_DIGEST_MD5:
1553 		len = sizeof(entry->digest.md5);
1554 		break;
1555 	case ARCHIVE_ENTRY_DIGEST_RMD160:
1556 		len = sizeof(entry->digest.rmd160);
1557 		break;
1558 	case ARCHIVE_ENTRY_DIGEST_SHA1:
1559 		len = sizeof(entry->digest.sha1);
1560 		break;
1561 	case ARCHIVE_ENTRY_DIGEST_SHA256:
1562 		len = sizeof(entry->digest.sha256);
1563 		break;
1564 	case ARCHIVE_ENTRY_DIGEST_SHA384:
1565 		len = sizeof(entry->digest.sha384);
1566 		break;
1567 	case ARCHIVE_ENTRY_DIGEST_SHA512:
1568 		len = sizeof(entry->digest.sha512);
1569 		break;
1570 	default:
1571 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1572 			"Internal error: Unknown digest type");
1573 		return ARCHIVE_FATAL;
1574 	}
1575 
1576 	if (len > sizeof(digest_buf)) {
1577 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1578 			"Internal error: Digest storage too large");
1579 		return ARCHIVE_FATAL;
1580 	}
1581 
1582 	len *= 2;
1583 
1584 	if (mtree_strnlen(digest, len+1) != len) {
1585 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1586 				  "incorrect digest length, ignoring");
1587 		return ARCHIVE_WARN;
1588 	}
1589 
1590 	for (i = 0, j = 0; i < len; i += 2, j++) {
1591 		high = parse_hex_nibble(digest[i]);
1592 		low = parse_hex_nibble(digest[i+1]);
1593 		if (high == -1 || low == -1) {
1594 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1595 					  "invalid digest data, ignoring");
1596 			return ARCHIVE_WARN;
1597 		}
1598 
1599 		digest_buf[j] = high << 4 | low;
1600 	}
1601 
1602 	return archive_entry_set_digest(entry, type, digest_buf);
1603 }
1604 
1605 /*
1606  * Parse a single keyword and its value.
1607  */
1608 static int
parse_keyword(struct archive_read * a,struct mtree * mtree,struct archive_entry * entry,struct mtree_option * opt,int * parsed_kws)1609 parse_keyword(struct archive_read *a, struct mtree *mtree,
1610     struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1611 {
1612 	char *val, *key;
1613 
1614 	key = opt->value;
1615 
1616 	if (*key == '\0')
1617 		return (ARCHIVE_OK);
1618 
1619 	if (strcmp(key, "nochange") == 0) {
1620 		*parsed_kws |= MTREE_HAS_NOCHANGE;
1621 		return (ARCHIVE_OK);
1622 	}
1623 	if (strcmp(key, "optional") == 0) {
1624 		*parsed_kws |= MTREE_HAS_OPTIONAL;
1625 		return (ARCHIVE_OK);
1626 	}
1627 	if (strcmp(key, "ignore") == 0) {
1628 		/*
1629 		 * The mtree processing is not recursive, so
1630 		 * recursion will only happen for explicitly listed
1631 		 * entries.
1632 		 */
1633 		return (ARCHIVE_OK);
1634 	}
1635 
1636 	val = strchr(key, '=');
1637 	if (val == NULL) {
1638 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1639 		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1640 		return (ARCHIVE_WARN);
1641 	}
1642 
1643 	*val = '\0';
1644 	++val;
1645 
1646 	switch (key[0]) {
1647 	case 'c':
1648 		if (strcmp(key, "content") == 0
1649 		    || strcmp(key, "contents") == 0) {
1650 			parse_escapes(val, NULL);
1651 			archive_strcpy(&mtree->contents_name, val);
1652 			return (ARCHIVE_OK);
1653 		}
1654 		if (strcmp(key, "cksum") == 0)
1655 			return (ARCHIVE_OK);
1656 		break;
1657 	case 'd':
1658 		if (strcmp(key, "device") == 0) {
1659 			/* stat(2) st_rdev field, e.g. the major/minor IDs
1660 			 * of a char/block special file */
1661 			int r;
1662 			dev_t dev;
1663 
1664 			*parsed_kws |= MTREE_HAS_DEVICE;
1665 			r = parse_device(&dev, &a->archive, val);
1666 			if (r == ARCHIVE_OK)
1667 				archive_entry_set_rdev(entry, dev);
1668 			return r;
1669 		}
1670 		break;
1671 	case 'f':
1672 		if (strcmp(key, "flags") == 0) {
1673 			*parsed_kws |= MTREE_HAS_FFLAGS;
1674 			archive_entry_copy_fflags_text(entry, val);
1675 			return (ARCHIVE_OK);
1676 		}
1677 		break;
1678 	case 'g':
1679 		if (strcmp(key, "gid") == 0) {
1680 			*parsed_kws |= MTREE_HAS_GID;
1681 			archive_entry_set_gid(entry, mtree_atol(&val, 10));
1682 			return (ARCHIVE_OK);
1683 		}
1684 		if (strcmp(key, "gname") == 0) {
1685 			*parsed_kws |= MTREE_HAS_GNAME;
1686 			archive_entry_copy_gname(entry, val);
1687 			return (ARCHIVE_OK);
1688 		}
1689 		break;
1690 	case 'i':
1691 		if (strcmp(key, "inode") == 0) {
1692 			archive_entry_set_ino(entry, mtree_atol(&val, 10));
1693 			return (ARCHIVE_OK);
1694 		}
1695 		break;
1696 	case 'l':
1697 		if (strcmp(key, "link") == 0) {
1698 			parse_escapes(val, NULL);
1699 			archive_entry_copy_symlink(entry, val);
1700 			return (ARCHIVE_OK);
1701 		}
1702 		break;
1703 	case 'm':
1704 		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0) {
1705 			return parse_digest(a, entry, val,
1706 			    ARCHIVE_ENTRY_DIGEST_MD5);
1707 		}
1708 		if (strcmp(key, "mode") == 0) {
1709 			if (val[0] < '0' || val[0] > '7') {
1710 				archive_set_error(&a->archive,
1711 				    ARCHIVE_ERRNO_FILE_FORMAT,
1712 				    "Symbolic or non-octal mode \"%s\" unsupported", val);
1713 				return (ARCHIVE_WARN);
1714 			}
1715 			*parsed_kws |= MTREE_HAS_PERM;
1716 			archive_entry_set_perm(entry, (mode_t)mtree_atol(&val, 8));
1717 			return (ARCHIVE_OK);
1718 		}
1719 		break;
1720 	case 'n':
1721 		if (strcmp(key, "nlink") == 0) {
1722 			*parsed_kws |= MTREE_HAS_NLINK;
1723 			archive_entry_set_nlink(entry,
1724 				(unsigned int)mtree_atol(&val, 10));
1725 			return (ARCHIVE_OK);
1726 		}
1727 		break;
1728 	case 'r':
1729 		if (strcmp(key, "resdevice") == 0) {
1730 			/* stat(2) st_dev field, e.g. the device ID where the
1731 			 * inode resides */
1732 			int r;
1733 			dev_t dev;
1734 
1735 			r = parse_device(&dev, &a->archive, val);
1736 			if (r == ARCHIVE_OK)
1737 				archive_entry_set_dev(entry, dev);
1738 			return r;
1739 		}
1740 		if (strcmp(key, "rmd160") == 0 ||
1741 		    strcmp(key, "rmd160digest") == 0) {
1742 			return parse_digest(a, entry, val,
1743 			    ARCHIVE_ENTRY_DIGEST_RMD160);
1744 		}
1745 		break;
1746 	case 's':
1747 		if (strcmp(key, "sha1") == 0 ||
1748 		    strcmp(key, "sha1digest") == 0) {
1749 			return parse_digest(a, entry, val,
1750 			    ARCHIVE_ENTRY_DIGEST_SHA1);
1751 		}
1752 		if (strcmp(key, "sha256") == 0 ||
1753 		    strcmp(key, "sha256digest") == 0) {
1754 			return parse_digest(a, entry, val,
1755 			    ARCHIVE_ENTRY_DIGEST_SHA256);
1756 		}
1757 		if (strcmp(key, "sha384") == 0 ||
1758 		    strcmp(key, "sha384digest") == 0) {
1759 			return parse_digest(a, entry, val,
1760 			    ARCHIVE_ENTRY_DIGEST_SHA384);
1761 		}
1762 		if (strcmp(key, "sha512") == 0 ||
1763 		    strcmp(key, "sha512digest") == 0) {
1764 			return parse_digest(a, entry, val,
1765 			    ARCHIVE_ENTRY_DIGEST_SHA512);
1766 		}
1767 		if (strcmp(key, "size") == 0) {
1768 			archive_entry_set_size(entry, mtree_atol(&val, 10));
1769 			return (ARCHIVE_OK);
1770 		}
1771 		break;
1772 	case 't':
1773 		if (strcmp(key, "tags") == 0) {
1774 			/*
1775 			 * Comma delimited list of tags.
1776 			 * Ignore the tags for now, but the interface
1777 			 * should be extended to allow inclusion/exclusion.
1778 			 */
1779 			return (ARCHIVE_OK);
1780 		}
1781 		if (strcmp(key, "time") == 0) {
1782 			int64_t m;
1783 			int64_t my_time_t_max = get_time_t_max();
1784 			int64_t my_time_t_min = get_time_t_min();
1785 			long ns = 0;
1786 
1787 			*parsed_kws |= MTREE_HAS_MTIME;
1788 			m = mtree_atol(&val, 10);
1789 			/* Replicate an old mtree bug:
1790 			 * 123456789.1 represents 123456789
1791 			 * seconds and 1 nanosecond. */
1792 			if (*val == '.') {
1793 				++val;
1794 				ns = (long)mtree_atol(&val, 10);
1795 				if (ns < 0)
1796 					ns = 0;
1797 				else if (ns > 999999999)
1798 					ns = 999999999;
1799 			}
1800 			if (m > my_time_t_max)
1801 				m = my_time_t_max;
1802 			else if (m < my_time_t_min)
1803 				m = my_time_t_min;
1804 			archive_entry_set_mtime(entry, (time_t)m, ns);
1805 			return (ARCHIVE_OK);
1806 		}
1807 		if (strcmp(key, "type") == 0) {
1808 			switch (val[0]) {
1809 			case 'b':
1810 				if (strcmp(val, "block") == 0) {
1811 					*parsed_kws |= MTREE_HAS_TYPE;
1812 					archive_entry_set_filetype(entry,
1813 						AE_IFBLK);
1814 					return (ARCHIVE_OK);
1815 				}
1816 				break;
1817 			case 'c':
1818 				if (strcmp(val, "char") == 0) {
1819 					*parsed_kws |= MTREE_HAS_TYPE;
1820 					archive_entry_set_filetype(entry,
1821 						AE_IFCHR);
1822 					return (ARCHIVE_OK);
1823 				}
1824 				break;
1825 			case 'd':
1826 				if (strcmp(val, "dir") == 0) {
1827 					*parsed_kws |= MTREE_HAS_TYPE;
1828 					archive_entry_set_filetype(entry,
1829 						AE_IFDIR);
1830 					return (ARCHIVE_OK);
1831 				}
1832 				break;
1833 			case 'f':
1834 				if (strcmp(val, "fifo") == 0) {
1835 					*parsed_kws |= MTREE_HAS_TYPE;
1836 					archive_entry_set_filetype(entry,
1837 						AE_IFIFO);
1838 					return (ARCHIVE_OK);
1839 				}
1840 				if (strcmp(val, "file") == 0) {
1841 					*parsed_kws |= MTREE_HAS_TYPE;
1842 					archive_entry_set_filetype(entry,
1843 						AE_IFREG);
1844 					return (ARCHIVE_OK);
1845 				}
1846 				break;
1847 			case 'l':
1848 				if (strcmp(val, "link") == 0) {
1849 					*parsed_kws |= MTREE_HAS_TYPE;
1850 					archive_entry_set_filetype(entry,
1851 						AE_IFLNK);
1852 					return (ARCHIVE_OK);
1853 				}
1854 				break;
1855 			default:
1856 				break;
1857 			}
1858 			archive_set_error(&a->archive,
1859 			    ARCHIVE_ERRNO_FILE_FORMAT,
1860 			    "Unrecognized file type \"%s\"; "
1861 			    "assuming \"file\"", val);
1862 			archive_entry_set_filetype(entry, AE_IFREG);
1863 			return (ARCHIVE_WARN);
1864 		}
1865 		break;
1866 	case 'u':
1867 		if (strcmp(key, "uid") == 0) {
1868 			*parsed_kws |= MTREE_HAS_UID;
1869 			archive_entry_set_uid(entry, mtree_atol(&val, 10));
1870 			return (ARCHIVE_OK);
1871 		}
1872 		if (strcmp(key, "uname") == 0) {
1873 			*parsed_kws |= MTREE_HAS_UNAME;
1874 			archive_entry_copy_uname(entry, val);
1875 			return (ARCHIVE_OK);
1876 		}
1877 		break;
1878 	default:
1879 		break;
1880 	}
1881 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1882 	    "Unrecognized key %s=%s", key, val);
1883 	return (ARCHIVE_WARN);
1884 }
1885 
1886 static int
read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)1887 read_data(struct archive_read *a, const void **buff, size_t *size,
1888     int64_t *offset)
1889 {
1890 	size_t bytes_to_read;
1891 	ssize_t bytes_read;
1892 	struct mtree *mtree;
1893 
1894 	mtree = (struct mtree *)(a->format->data);
1895 	if (mtree->fd < 0) {
1896 		*buff = NULL;
1897 		*offset = 0;
1898 		*size = 0;
1899 		return (ARCHIVE_EOF);
1900 	}
1901 	if (mtree->buff == NULL) {
1902 		mtree->buffsize = 64 * 1024;
1903 		mtree->buff = malloc(mtree->buffsize);
1904 		if (mtree->buff == NULL) {
1905 			archive_set_error(&a->archive, ENOMEM,
1906 			    "Can't allocate memory");
1907 			return (ARCHIVE_FATAL);
1908 		}
1909 	}
1910 
1911 	*buff = mtree->buff;
1912 	*offset = mtree->offset;
1913 	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1914 		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1915 	else
1916 		bytes_to_read = mtree->buffsize;
1917 	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1918 	if (bytes_read < 0) {
1919 		archive_set_error(&a->archive, errno, "Can't read");
1920 		return (ARCHIVE_WARN);
1921 	}
1922 	if (bytes_read == 0) {
1923 		*size = 0;
1924 		return (ARCHIVE_EOF);
1925 	}
1926 	mtree->offset += bytes_read;
1927 	*size = bytes_read;
1928 	return (ARCHIVE_OK);
1929 }
1930 
1931 /* Skip does nothing except possibly close the contents file. */
1932 static int
skip(struct archive_read * a)1933 skip(struct archive_read *a)
1934 {
1935 	struct mtree *mtree;
1936 
1937 	mtree = (struct mtree *)(a->format->data);
1938 	if (mtree->fd >= 0) {
1939 		close(mtree->fd);
1940 		mtree->fd = -1;
1941 	}
1942 	return (ARCHIVE_OK);
1943 }
1944 
1945 /*
1946  * Since parsing backslash sequences always makes strings shorter,
1947  * we can always do this conversion in-place.
1948  */
1949 static void
parse_escapes(char * src,struct mtree_entry * mentry)1950 parse_escapes(char *src, struct mtree_entry *mentry)
1951 {
1952 	char *dest = src;
1953 	char c;
1954 
1955 	if (mentry != NULL && strcmp(src, ".") == 0)
1956 		mentry->full = 1;
1957 
1958 	while (*src != '\0') {
1959 		c = *src++;
1960 		if (c == '/' && mentry != NULL)
1961 			mentry->full = 1;
1962 		if (c == '\\') {
1963 			switch (src[0]) {
1964 			case '0':
1965 				if (src[1] < '0' || src[1] > '7') {
1966 					c = 0;
1967 					++src;
1968 					break;
1969 				}
1970 				/* FALLTHROUGH */
1971 			case '1':
1972 			case '2':
1973 			case '3':
1974 				if (src[1] >= '0' && src[1] <= '7' &&
1975 				    src[2] >= '0' && src[2] <= '7') {
1976 					c = (src[0] - '0') << 6;
1977 					c |= (src[1] - '0') << 3;
1978 					c |= (src[2] - '0');
1979 					src += 3;
1980 				}
1981 				break;
1982 			case 'a':
1983 				c = '\a';
1984 				++src;
1985 				break;
1986 			case 'b':
1987 				c = '\b';
1988 				++src;
1989 				break;
1990 			case 'f':
1991 				c = '\f';
1992 				++src;
1993 				break;
1994 			case 'n':
1995 				c = '\n';
1996 				++src;
1997 				break;
1998 			case 'r':
1999 				c = '\r';
2000 				++src;
2001 				break;
2002 			case 's':
2003 				c = ' ';
2004 				++src;
2005 				break;
2006 			case 't':
2007 				c = '\t';
2008 				++src;
2009 				break;
2010 			case 'v':
2011 				c = '\v';
2012 				++src;
2013 				break;
2014 			case '\\':
2015 				c = '\\';
2016 				++src;
2017 				break;
2018 			}
2019 		}
2020 		*dest++ = c;
2021 	}
2022 	*dest = '\0';
2023 }
2024 
2025 /* Parse a hex digit. */
2026 static int
parsedigit(char c)2027 parsedigit(char c)
2028 {
2029 	if (c >= '0' && c <= '9')
2030 		return c - '0';
2031 	else if (c >= 'a' && c <= 'f')
2032 		return c - 'a';
2033 	else if (c >= 'A' && c <= 'F')
2034 		return c - 'A';
2035 	else
2036 		return -1;
2037 }
2038 
2039 /*
2040  * Note that this implementation does not (and should not!) obey
2041  * locale settings; you cannot simply substitute strtol here, since
2042  * it does obey locale.
2043  */
2044 static int64_t
mtree_atol(char ** p,int base)2045 mtree_atol(char **p, int base)
2046 {
2047 	int64_t l, limit;
2048 	int digit, last_digit_limit;
2049 
2050 	if (base == 0) {
2051 		if (**p != '0')
2052 			base = 10;
2053 		else if ((*p)[1] == 'x' || (*p)[1] == 'X') {
2054 			*p += 2;
2055 			base = 16;
2056 		} else {
2057 			base = 8;
2058 		}
2059 	}
2060 
2061 	if (**p == '-') {
2062 		limit = INT64_MIN / base;
2063 		last_digit_limit = -(INT64_MIN % base);
2064 		++(*p);
2065 
2066 		l = 0;
2067 		digit = parsedigit(**p);
2068 		while (digit >= 0 && digit < base) {
2069 			if (l < limit || (l == limit && digit >= last_digit_limit))
2070 				return INT64_MIN;
2071 			l = (l * base) - digit;
2072 			digit = parsedigit(*++(*p));
2073 		}
2074 		return l;
2075 	} else {
2076 		limit = INT64_MAX / base;
2077 		last_digit_limit = INT64_MAX % base;
2078 
2079 		l = 0;
2080 		digit = parsedigit(**p);
2081 		while (digit >= 0 && digit < base) {
2082 			if (l > limit || (l == limit && digit > last_digit_limit))
2083 				return INT64_MAX;
2084 			l = (l * base) + digit;
2085 			digit = parsedigit(*++(*p));
2086 		}
2087 		return l;
2088 	}
2089 }
2090 
2091 /*
2092  * Returns length of line (including trailing newline)
2093  * or negative on error.  'start' argument is updated to
2094  * point to first character of line.
2095  */
2096 static ssize_t
readline(struct archive_read * a,struct mtree * mtree,char ** start,ssize_t limit)2097 readline(struct archive_read *a, struct mtree *mtree, char **start,
2098     ssize_t limit)
2099 {
2100 	ssize_t bytes_read;
2101 	ssize_t total_size = 0;
2102 	ssize_t find_off = 0;
2103 	const void *t;
2104 	void *nl;
2105 	char *u;
2106 
2107 	/* Accumulate line in a line buffer. */
2108 	for (;;) {
2109 		/* Read some more. */
2110 		t = __archive_read_ahead(a, 1, &bytes_read);
2111 		if (t == NULL)
2112 			return (0);
2113 		if (bytes_read < 0)
2114 			return (ARCHIVE_FATAL);
2115 		nl = memchr(t, '\n', bytes_read);
2116 		/* If we found '\n', trim the read to end exactly there. */
2117 		if (nl != NULL) {
2118 			bytes_read = ((const char *)nl) - ((const char *)t) + 1;
2119 		}
2120 		if (total_size + bytes_read + 1 > limit) {
2121 			archive_set_error(&a->archive,
2122 			    ARCHIVE_ERRNO_FILE_FORMAT,
2123 			    "Line too long");
2124 			return (ARCHIVE_FATAL);
2125 		}
2126 		if (archive_string_ensure(&mtree->line,
2127 			total_size + bytes_read + 1) == NULL) {
2128 			archive_set_error(&a->archive, ENOMEM,
2129 			    "Can't allocate working buffer");
2130 			return (ARCHIVE_FATAL);
2131 		}
2132 		/* Append new bytes to string. */
2133 		memcpy(mtree->line.s + total_size, t, bytes_read);
2134 		__archive_read_consume(a, bytes_read);
2135 		total_size += bytes_read;
2136 		mtree->line.s[total_size] = '\0';
2137 
2138 		for (u = mtree->line.s + find_off; *u; ++u) {
2139 			if (u[0] == '\n') {
2140 				/* Ends with unescaped newline. */
2141 				/* Check if preceded by '\r' for CRLF handling */
2142 				if (u > mtree->line.s && u[-1] == '\r') {
2143 					/* CRLF ending - remove the '\r' */
2144 					u[-1] = '\n';
2145 					u[0] = '\0';
2146 					total_size--;
2147 				}
2148 				*start = mtree->line.s;
2149 				return total_size;
2150 			} else if (u[0] == '#') {
2151 				/* Ends with comment sequence #...\n */
2152 				if (nl == NULL) {
2153 					/* But we've not found the \n yet */
2154 					break;
2155 				}
2156 			} else if (u[0] == '\\') {
2157 				if (u[1] == '\n') {
2158 					/* Trim escaped newline. */
2159 					total_size -= 2;
2160 					mtree->line.s[total_size] = '\0';
2161 					break;
2162 				} else if (u[1] == '\r' && u[2] == '\n') {
2163 					/* Trim escaped CRLF. */
2164 					total_size -= 3;
2165 					mtree->line.s[total_size] = '\0';
2166 					break;
2167 				} else if (u[1] != '\0') {
2168 					/* Skip the two-char escape sequence */
2169 					++u;
2170 				}
2171 			}
2172 		}
2173 		find_off = u - mtree->line.s;
2174 	}
2175 }
2176