xref: /freebsd/sys/fs/tarfs/tarfs_vfsops.c (revision ff8da9b2bab43920a19c16855ac3d30b5ccb1df2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/buf.h>
34 #include <sys/conf.h>
35 #include <sys/fcntl.h>
36 #include <sys/libkern.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/vnode.h>
50 
51 #include <vm/vm_param.h>
52 
53 #include <geom/geom.h>
54 #include <geom/geom_vfs.h>
55 
56 #include <fs/tarfs/tarfs.h>
57 #include <fs/tarfs/tarfs_dbg.h>
58 
59 CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
60 
61 struct ustar_header {
62 	char	name[100];		/* File name */
63 	char	mode[8];		/* Mode flags */
64 	char	uid[8];			/* User id */
65 	char	gid[8];			/* Group id */
66 	char	size[12];		/* Size */
67 	char	mtime[12];		/* Modified time */
68 	char	checksum[8];		/* Checksum */
69 	char	typeflag[1];		/* Type */
70 	char	linkname[100];		/* "old format" stops here */
71 	char	magic[6];		/* POSIX UStar "ustar\0" indicator */
72 	char	version[2];		/* POSIX UStar version "00" */
73 	char	uname[32];		/* User name */
74 	char	gname[32];		/* Group name */
75 	char	major[8];		/* Device major number */
76 	char	minor[8];		/* Device minor number */
77 	char	prefix[155];		/* Path prefix */
78 };
79 
80 #define	TAR_EOF			((off_t)-1)
81 
82 #define	TAR_TYPE_FILE		'0'
83 #define	TAR_TYPE_HARDLINK	'1'
84 #define	TAR_TYPE_SYMLINK	'2'
85 #define	TAR_TYPE_CHAR		'3'
86 #define	TAR_TYPE_BLOCK		'4'
87 #define	TAR_TYPE_DIRECTORY	'5'
88 #define	TAR_TYPE_FIFO		'6'
89 #define	TAR_TYPE_CONTIG		'7'
90 #define	TAR_TYPE_GLOBAL_EXTHDR	'g'
91 #define	TAR_TYPE_EXTHDR		'x'
92 #define	TAR_TYPE_GNU_SPARSE	'S'
93 
94 #define	USTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
95 #define	USTAR_VERSION		(uint8_t []){ '0', '0' }
96 #define	GNUTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
97 #define	GNUTAR_VERSION		(uint8_t []){ ' ', '\x0' }
98 
99 #define	DEFDIRMODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
100 
101 MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
102 MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
103 
104 static vfs_mount_t	tarfs_mount;
105 static vfs_unmount_t	tarfs_unmount;
106 static vfs_root_t	tarfs_root;
107 static vfs_statfs_t	tarfs_statfs;
108 static vfs_fhtovp_t	tarfs_fhtovp;
109 
110 static const char *tarfs_opts[] = {
111 	"from", "gid", "mode", "uid", "verify",
112 	NULL
113 };
114 
115 /*
116  * Reads a len-width signed octal number from strp.  Returns the value.
117  * XXX Does not report errors.
118  */
119 static int64_t
120 tarfs_str2octal(const char *strp, size_t len)
121 {
122 	int64_t val;
123 	size_t idx;
124 	int sign;
125 
126 	/*
127 	 * Skip leading spaces or tabs.
128 	 * XXX why?  POSIX requires numeric fields to be 0-padded.
129 	 */
130 	for (idx = 0; idx < len; idx++)
131 		if (strp[idx] != ' ' && strp[idx] != '\t')
132 			break;
133 
134 	if (idx == len)
135 		return (0);
136 
137 	if (strp[idx] == '-') {
138 		sign = -1;
139 		idx++;
140 	} else
141 		sign = 1;
142 
143 	val = 0;
144 	for (; idx < len; idx++) {
145 		if (strp[idx] < '0' || strp[idx] > '7')
146 			break;
147 		val <<= 3;
148 		val += (strp[idx] - '0');
149 
150 		/* Truncate on overflow */
151 		if (val > INT64_MAX / 8) {
152 			val = INT64_MAX;
153 			break;
154 		}
155 	}
156 
157 	return (sign > 0) ? val : -val;
158 }
159 
160 /*
161  * Reads a len-byte extended numeric value from strp.  The first byte has
162  * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
163  * bytes that follow form a big-endian signed two's complement binary
164  * number.  Returns the value.  XXX Does not report errors.
165  */
166 static int64_t
167 tarfs_str2base256(const char *strp, size_t len)
168 {
169 	int64_t val;
170 	size_t idx;
171 
172 	KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
173 
174 	/* Sign-extend the first byte */
175 	if ((strp[0] & 0x40) != 0)
176 		val = (int64_t)-1;
177 	else
178 		val = 0;
179 	val <<= 6;
180 	val |= (strp[0] & 0x3f);
181 
182 	/* Read subsequent bytes */
183 	for (idx = 1; idx < len; idx++) {
184 		val <<= 8;
185 		val |= (0xff & (int64_t)strp[idx]);
186 
187 		/* Truncate on overflow and underflow */
188 		if (val > INT64_MAX / 256) {
189 			val = INT64_MAX;
190 			break;
191 		} else if (val < INT64_MAX / 256) {
192 			val = INT64_MIN;
193 			break;
194 		}
195 	}
196 
197 	return (val);
198 }
199 
200 /*
201  * Read a len-byte numeric field from strp.  If bit 7 of the first byte it
202  * set, assume an extended numeric value (signed two's complement);
203  * otherwise, assume a signed octal value.
204  *
205  * XXX practically no error checking or handling
206  */
207 static int64_t
208 tarfs_str2int64(const char *strp, size_t len)
209 {
210 
211 	if (len < 1)
212 		return (0);
213 
214 	if ((strp[0] & 0x80) != 0)
215 		return (tarfs_str2base256(strp, len));
216 	return (tarfs_str2octal(strp, len));
217 }
218 
219 /*
220  * Verifies the checksum of a header.  Returns true if the checksum is
221  * valid, false otherwise.
222  */
223 static boolean_t
224 tarfs_checksum(struct ustar_header *hdrp)
225 {
226 	const unsigned char *ptr;
227 	int64_t checksum, hdrsum;
228 	size_t idx;
229 
230 	hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
231 	TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
232 
233 	checksum = 0;
234 	for (ptr = (const unsigned char *)hdrp;
235 	     ptr < (const unsigned char *)hdrp->checksum; ptr++)
236 		checksum += *ptr;
237 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
238 		checksum += 0x20;
239 	for (ptr = (const unsigned char *)hdrp->typeflag;
240 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
241 		checksum += *ptr;
242 	TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
243 	    checksum);
244 	if (hdrsum == checksum)
245 		return (true);
246 
247 	/*
248 	 * Repeat test with signed bytes, some older formats use a broken
249 	 * form of the calculation
250 	 */
251 	checksum = 0;
252 	for (ptr = (const unsigned char *)hdrp;
253 	     ptr < (const unsigned char *)&hdrp->checksum; ptr++)
254 		checksum += *((const signed char *)ptr);
255 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
256 		checksum += 0x20;
257 	for (ptr = (const unsigned char *)&hdrp->typeflag;
258 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
259 		checksum += *((const signed char *)ptr);
260 	TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
261 	    checksum);
262 	if (hdrsum == checksum)
263 		return (true);
264 
265 	return (false);
266 }
267 
268 
269 /*
270  * Looks up a path in the tarfs node tree.
271  *
272  * - If the path exists, stores a pointer to the corresponding tarfs_node
273  *   in retnode and a pointer to its parent in retparent.
274  *
275  * - If the path does not exist, but create_dirs is true, creates ancestor
276  *   directories and returns NULL in retnode and the parent in retparent.
277  *
278  * - If the path does not exist and create_dirs is false, stops at the
279  *   first missing path name component.
280  *
281  * - In all cases, on return, endp and sepp point to the beginning and
282  *   end, respectively, of the last-processed path name component.
283  *
284  * - Returns 0 if the node was found, ENOENT if it was not, and some other
285  *   positive errno value on failure.
286  */
287 static int
288 tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
289     char **endp, char **sepp, struct tarfs_node **retparent,
290     struct tarfs_node **retnode, boolean_t create_dirs)
291 {
292 	struct componentname cn = { };
293 	struct tarfs_node *parent, *tnp;
294 	char *sep;
295 	size_t len;
296 	int error;
297 	boolean_t do_lookup;
298 
299 	MPASS(name != NULL && namelen != 0);
300 
301 	do_lookup = true;
302 	error = 0;
303 	parent = tnp = tmp->root;
304 	if (tnp == NULL)
305 		panic("%s: root node not yet created", __func__);
306 
307 	TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen,
308 	    name);
309 
310 	sep = NULL;
311 	for (;;) {
312 		/* skip leading slash(es) */
313 		while (name[0] == '/' && namelen > 0)
314 			name++, namelen--;
315 
316 		/* did we reach the end? */
317 		if (namelen == 0 || name[0] == '\0') {
318 			name = do_lookup ? NULL : cn.cn_nameptr;
319 			namelen = do_lookup ? 0 : cn.cn_namelen;
320 			break;
321 		}
322 
323 		/* we're not at the end, so parent must be a directory */
324 		if (parent->type != VDIR) {
325 			error = ENOTDIR;
326 			break;
327 		}
328 
329 		/* locate the next separator */
330 		for (sep = name, len = 0;
331 		     *sep != '\0' && *sep != '/' && len < namelen;
332 		     sep++, len++)
333 			/* nothing */ ;
334 
335 		/* check for . and .. */
336 		if (name[0] == '.' && len == 1) {
337 			name += len;
338 			namelen -= len;
339 			continue;
340 		}
341 		if (name[0] == '.' && name[1] == '.' && len == 2) {
342 			if (tnp == tmp->root) {
343 				error = EINVAL;
344 				break;
345 			}
346 			tnp = parent;
347 			parent = tnp->parent;
348 			name += len;
349 			namelen -= len;
350 			continue;
351 		}
352 
353 		/* create parent if necessary */
354 		if (!do_lookup) {
355 			TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
356 			    (int)cn.cn_namelen, cn.cn_nameptr);
357 			error = tarfs_alloc_node(tmp, cn.cn_nameptr,
358 			    cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
359 			    DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
360 			if (error != 0)
361 				break;
362 		}
363 
364 		parent = tnp;
365 		tnp = NULL;
366 		cn.cn_nameptr = name;
367 		cn.cn_namelen = len;
368 		TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__,
369 		    (int)cn.cn_namelen, cn.cn_nameptr);
370 		if (do_lookup) {
371 			tnp = tarfs_lookup_node(parent, NULL, &cn);
372 			if (tnp == NULL) {
373 				do_lookup = false;
374 				if (!create_dirs)
375 					break;
376 			}
377 		}
378 		name += cn.cn_namelen;
379 		namelen -= cn.cn_namelen;
380 	}
381 
382 	TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp);
383 
384 	if (retparent)
385 		*retparent = parent;
386 	if (retnode)
387 		*retnode = tnp;
388 	if (endp) {
389 		if (namelen > 0)
390 			*endp = name;
391 		else
392 			*endp = NULL;
393 	}
394 	if (sepp)
395 		*sepp = sep;
396 	return (error);
397 }
398 
399 /*
400  * Frees a tarfs_mount structure and everything it references.
401  */
402 static void
403 tarfs_free_mount(struct tarfs_mount *tmp)
404 {
405 	struct mount *mp;
406 	struct tarfs_node *tnp;
407 
408 	MPASS(tmp != NULL);
409 
410 	TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
411 
412 	TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
413 	while (!TAILQ_EMPTY(&tmp->allnodes)) {
414 		tnp = TAILQ_FIRST(&tmp->allnodes);
415 		TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
416 		tarfs_free_node(tnp);
417 	}
418 
419 	(void)tarfs_io_fini(tmp);
420 
421 	TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
422 	delete_unrhdr(tmp->ino_unr);
423 	mp = tmp->vfs;
424 	mp->mnt_data = NULL;
425 
426 	TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
427 	free(tmp, M_TARFSMNT);
428 }
429 
430 /*
431  * Processes the tar file header at block offset blknump and allocates and
432  * populates a tarfs_node structure for the file it describes.  Updated
433  * blknump to point to the next unread tar file block, or TAR_EOF if EOF
434  * is reached.  Returns 0 on success or EOF and a positive errno value on
435  * failure.
436  */
437 static int
438 tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
439 {
440 	char block[TARFS_BLOCKSIZE];
441 	struct ustar_header *hdrp = (struct ustar_header *)block;
442 	struct sbuf *namebuf = NULL;
443 	char *exthdr = NULL, *name = NULL, *link = NULL;
444 	off_t blknum = *blknump;
445 	int64_t num;
446 	int endmarker = 0;
447 	char *namep, *sep;
448 	struct tarfs_node *parent, *tnp;
449 	size_t namelen = 0, linklen = 0, realsize = 0, sz;
450 	ssize_t res;
451 	dev_t rdev;
452 	gid_t gid;
453 	mode_t mode;
454 	time_t mtime;
455 	uid_t uid;
456 	long major = -1, minor = -1;
457 	unsigned int flags = 0;
458 	int error;
459 	boolean_t sparse = false;
460 
461 again:
462 	/* read next header */
463 	res = tarfs_io_read_buf(tmp, false, block,
464 	    TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
465 	if (res < 0) {
466 		error = -res;
467 		goto bad;
468 	} else if (res < TARFS_BLOCKSIZE) {
469 		goto eof;
470 	}
471 	blknum++;
472 
473 	/* check for end marker */
474 	if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
475 		if (endmarker++) {
476 			if (exthdr != NULL) {
477 				TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
478 				    __func__, TARFS_BLOCKSIZE * (blknum - 1));
479 				free(exthdr, M_TEMP);
480 			}
481 			TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
482 			    TARFS_BLOCKSIZE * blknum);
483 			tmp->nblocks = blknum;
484 			*blknump = TAR_EOF;
485 			return (0);
486 		}
487 		goto again;
488 	}
489 
490 	/* verify magic */
491 	if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
492 	    memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
493 		/* POSIX */
494 	} else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
495 	    memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
496 		TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
497 		    TARFS_BLOCKSIZE * (blknum - 1));
498 		error = EFTYPE;
499 		goto bad;
500 	} else {
501 		TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
502 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
503 		error = EINVAL;
504 		goto bad;
505 	}
506 
507 	/* verify checksum */
508 	if (!tarfs_checksum(hdrp)) {
509 		TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
510 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
511 		error = EINVAL;
512 		goto bad;
513 	}
514 
515 	/* get standard attributes */
516 	num = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
517 	if (num < 0 || num > ALLPERMS) {
518 		TARFS_DPF(ALLOC, "%s: invalid file mode at %zu\n",
519 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
520 		mode = S_IRUSR;
521 	} else {
522 		mode = num;
523 	}
524 	num = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
525 	if (num < 0 || num > UID_MAX) {
526 		TARFS_DPF(ALLOC, "%s: UID out of range at %zu\n",
527 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
528 		uid = tmp->root->uid;
529 		mode &= ~S_ISUID;
530 	} else {
531 		uid = num;
532 	}
533 	num = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
534 	if (num < 0 || num > GID_MAX) {
535 		TARFS_DPF(ALLOC, "%s: GID out of range at %zu\n",
536 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
537 		gid = tmp->root->gid;
538 		mode &= ~S_ISGID;
539 	} else {
540 		gid = num;
541 	}
542 	num = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
543 	if (num < 0) {
544 		TARFS_DPF(ALLOC, "%s: negative size at %zu\n",
545 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
546 		error = EINVAL;
547 		goto bad;
548 	} else {
549 		sz = num;
550 	}
551 	mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
552 	rdev = NODEV;
553 	TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
554 	    hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
555 
556 	/* extended header? */
557 	if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
558 		printf("%s: unsupported global extended header at %zu\n",
559 		    __func__, (size_t)(TARFS_BLOCKSIZE * (blknum - 1)));
560 		error = EFTYPE;
561 		goto bad;
562 	}
563 	if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
564 		if (exthdr != NULL) {
565 			TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
566 			    __func__, TARFS_BLOCKSIZE * (blknum - 1));
567 			error = EFTYPE;
568 			goto bad;
569 		}
570 		/* read the contents of the exthdr */
571 		TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
572 		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
573 		exthdr = malloc(sz, M_TEMP, M_WAITOK);
574 		res = tarfs_io_read_buf(tmp, false, exthdr,
575 		    TARFS_BLOCKSIZE * blknum, sz);
576 		if (res < 0) {
577 			error = -res;
578 			goto bad;
579 		}
580 		if (res < sz) {
581 			goto eof;
582 		}
583 		blknum += TARFS_SZ2BLKS(res);
584 		/* XXX TODO: refactor this parser */
585 		char *line = exthdr;
586 		while (line < exthdr + sz) {
587 			char *eol, *key, *value, *sep;
588 			size_t len = strtoul(line, &sep, 10);
589 			if (len == 0 || sep == line || *sep != ' ') {
590 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
591 				    __func__);
592 				error = EINVAL;
593 				goto bad;
594 			}
595 			if (line + len > exthdr + sz) {
596 				TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
597 				    __func__);
598 				error = EINVAL;
599 				goto bad;
600 			}
601 			eol = line + len - 1;
602 			*eol = '\0';
603 			line += len;
604 			key = sep + 1;
605 			sep = strchr(key, '=');
606 			if (sep == NULL) {
607 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
608 				    __func__);
609 				error = EINVAL;
610 				goto bad;
611 			}
612 			*sep = '\0';
613 			value = sep + 1;
614 			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
615 			    key, value);
616 			if (strcmp(key, "linkpath") == 0) {
617 				link = value;
618 				linklen = eol - value;
619 			} else if (strcmp(key, "GNU.sparse.major") == 0) {
620 				sparse = true;
621 				major = strtol(value, &sep, 10);
622 				if (sep != eol) {
623 					printf("exthdr syntax error\n");
624 					error = EINVAL;
625 					goto bad;
626 				}
627 			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
628 				sparse = true;
629 				minor = strtol(value, &sep, 10);
630 				if (sep != eol) {
631 					printf("exthdr syntax error\n");
632 					error = EINVAL;
633 					goto bad;
634 				}
635 			} else if (strcmp(key, "GNU.sparse.name") == 0) {
636 				sparse = true;
637 				name = value;
638 				namelen = eol - value;
639 				if (namelen == 0) {
640 					printf("exthdr syntax error\n");
641 					error = EINVAL;
642 					goto bad;
643 				}
644 			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
645 				sparse = true;
646 				realsize = strtoul(value, &sep, 10);
647 				if (sep != eol) {
648 					printf("exthdr syntax error\n");
649 					error = EINVAL;
650 					goto bad;
651 				}
652 			} else if (strcmp(key, "SCHILY.fflags") == 0) {
653 				flags |= tarfs_strtofflags(value, &sep);
654 				if (sep != eol) {
655 					printf("exthdr syntax error\n");
656 					error = EINVAL;
657 					goto bad;
658 				}
659 			}
660 		}
661 		goto again;
662 	}
663 
664 	/* sparse file consistency checks */
665 	if (sparse) {
666 		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
667 		    name, major, minor, realsize);
668 		if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
669 		    hdrp->typeflag[0] != TAR_TYPE_FILE) {
670 			TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
671 			error = EINVAL;
672 			goto bad;
673 		}
674 	}
675 
676 	/* file name */
677 	if (name == NULL) {
678 		if (hdrp->prefix[0] != '\0') {
679 			namebuf = sbuf_new_auto();
680 			sbuf_printf(namebuf, "%.*s/%.*s",
681 			    (int)sizeof(hdrp->prefix), hdrp->prefix,
682 			    (int)sizeof(hdrp->name), hdrp->name);
683 			sbuf_finish(namebuf);
684 			name = sbuf_data(namebuf);
685 			namelen = sbuf_len(namebuf);
686 		} else {
687 			name = hdrp->name;
688 			namelen = strnlen(hdrp->name, sizeof(hdrp->name));
689 		}
690 	}
691 
692 	error = tarfs_lookup_path(tmp, name, namelen, &namep,
693 	    &sep, &parent, &tnp, true);
694 	if (error != 0) {
695 		TARFS_DPF(ALLOC, "%s: failed to look up %.*s\n", __func__,
696 		    (int)namelen, name);
697 		error = EINVAL;
698 		goto bad;
699 	}
700 	if (tnp != NULL) {
701 		if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
702 			/* XXX set attributes? */
703 			goto skip;
704 		}
705 		TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
706 		    (int)namelen, name);
707 		error = EINVAL;
708 		goto bad;
709 	}
710 	switch (hdrp->typeflag[0]) {
711 	case TAR_TYPE_DIRECTORY:
712 		error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
713 		    0, 0, mtime, uid, gid, mode, flags, NULL, 0,
714 		    parent, &tnp);
715 		break;
716 	case TAR_TYPE_FILE:
717 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
718 		    blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
719 		    flags, NULL, 0, parent, &tnp);
720 		if (error == 0 && sparse) {
721 			error = tarfs_load_blockmap(tnp, realsize);
722 		}
723 		break;
724 	case TAR_TYPE_HARDLINK:
725 		if (link == NULL) {
726 			link = hdrp->linkname;
727 			linklen = strnlen(link, sizeof(hdrp->linkname));
728 		}
729 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
730 		    0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
731 		if (error != 0) {
732 			goto bad;
733 		}
734 		error = tarfs_lookup_path(tmp, link, linklen, NULL,
735 		    NULL, NULL, &tnp->other, false);
736 		if (tnp->other == NULL ||
737 		    tnp->other->type != VREG ||
738 		    tnp->other->other != NULL) {
739 			TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
740 			    __func__, (int)namelen, name, (int)linklen, link);
741 			error = EINVAL;
742 			goto bad;
743 		}
744 		break;
745 	case TAR_TYPE_SYMLINK:
746 		if (link == NULL) {
747 			link = hdrp->linkname;
748 			linklen = strnlen(link, sizeof(hdrp->linkname));
749 		}
750 		error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
751 		    0, linklen, mtime, uid, gid, mode, flags, link, 0,
752 		    parent, &tnp);
753 		break;
754 	case TAR_TYPE_BLOCK:
755 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
756 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
757 		rdev = makedev(major, minor);
758 		error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
759 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
760 		    parent, &tnp);
761 		break;
762 	case TAR_TYPE_CHAR:
763 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
764 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
765 		rdev = makedev(major, minor);
766 		error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
767 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
768 		    parent, &tnp);
769 		break;
770 	default:
771 		TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
772 		    __func__, hdrp->typeflag[0], (int)namelen, name);
773 		error = EINVAL;
774 		break;
775 	}
776 	if (error != 0)
777 		goto bad;
778 
779 skip:
780 	blknum += TARFS_SZ2BLKS(sz);
781 	tmp->nblocks = blknum;
782 	*blknump = blknum;
783 	if (exthdr != NULL) {
784 		free(exthdr, M_TEMP);
785 	}
786 	if (namebuf != NULL) {
787 		sbuf_delete(namebuf);
788 	}
789 	return (0);
790 eof:
791 	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
792 	error = EIO;
793 	goto bad;
794 bad:
795 	if (exthdr != NULL) {
796 		free(exthdr, M_TEMP);
797 	}
798 	if (namebuf != NULL) {
799 		sbuf_delete(namebuf);
800 	}
801 	return (error);
802 }
803 
804 /*
805  * Allocates and populates the metadata structures for the tar file
806  * referenced by vp.  On success, a pointer to the tarfs_mount structure
807  * is stored in tmpp.  Returns 0 on success or a positive errno value on
808  * failure.
809  */
810 static int
811 tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
812     uid_t root_uid, gid_t root_gid, mode_t root_mode,
813     struct tarfs_mount **tmpp)
814 {
815 	struct vattr va;
816 	struct thread *td = curthread;
817 	struct tarfs_mount *tmp;
818 	struct tarfs_node *root;
819 	off_t blknum;
820 	time_t mtime;
821 	int error;
822 
823 	KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
824 	ASSERT_VOP_LOCKED(vp, __func__);
825 
826 	tmp = NULL;
827 
828 	TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
829 	    __func__, vp);
830 
831 	/* Get source metadata */
832 	error = VOP_GETATTR(vp, &va, td->td_ucred);
833 	if (error != 0) {
834 		return (error);
835 	}
836 	VOP_UNLOCK(vp);
837 	mtime = va.va_mtime.tv_sec;
838 
839 	/* Allocate and initialize tarfs mount structure */
840 	tmp = malloc(sizeof(*tmp), M_TARFSMNT, M_WAITOK | M_ZERO);
841 	TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
842 	mp->mnt_data = tmp;
843 
844 	mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
845 	    MTX_DEF);
846 	TAILQ_INIT(&tmp->allnodes);
847 	tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
848 	tmp->vp = vp;
849 	tmp->vfs = mp;
850 	tmp->mtime = mtime;
851 
852 	/*
853 	 * XXX The decompression layer passes everything through the
854 	 * buffer cache, and the buffer cache wants to know our blocksize,
855 	 * but mnt_stat normally isn't populated until after we return, so
856 	 * we have to cheat a bit.
857 	 */
858 	tmp->iosize = 1U << tarfs_ioshift;
859 	mp->mnt_stat.f_iosize = tmp->iosize;
860 
861 	/* Initialize decompression layer */
862 	error = tarfs_io_init(tmp);
863 	if (error != 0)
864 		goto bad;
865 
866 	error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
867 	    root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
868 	if (error != 0 || root == NULL)
869 		goto bad;
870 	tmp->root = root;
871 
872 	blknum = 0;
873 	do {
874 		if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
875 			goto bad;
876 		}
877 	} while (blknum != TAR_EOF);
878 
879 	*tmpp = tmp;
880 
881 	TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
882 	return (0);
883 
884 bad:
885 	tarfs_free_mount(tmp);
886 	return (error);
887 }
888 
889 /*
890  * VFS Operations.
891  */
892 
893 static int
894 tarfs_mount(struct mount *mp)
895 {
896 	struct nameidata nd;
897 	struct vattr va;
898 	struct tarfs_mount *tmp = NULL;
899 	struct thread *td = curthread;
900 	struct vnode *vp;
901 	char *from;
902 	uid_t root_uid;
903 	gid_t root_gid;
904 	mode_t root_mode;
905 	int error, flags, len;
906 
907 	if (mp->mnt_flag & MNT_UPDATE)
908 		return (EOPNOTSUPP);
909 
910 	if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
911 		return (EINVAL);
912 
913 	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
914 	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
915 	VOP_UNLOCK(mp->mnt_vnodecovered);
916 	if (error)
917 		return (error);
918 
919 	if (mp->mnt_cred->cr_ruid != 0 ||
920 	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
921 		root_gid = va.va_gid;
922 	if (mp->mnt_cred->cr_ruid != 0 ||
923 	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
924 		root_uid = va.va_uid;
925 	if (mp->mnt_cred->cr_ruid != 0 ||
926 	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
927 		root_mode = va.va_mode;
928 
929 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
930 	if (error != 0 || from[len - 1] != '\0')
931 		return (EINVAL);
932 
933 	/* Find the source tarball */
934 	TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
935 	    from, root_uid, root_gid, root_mode);
936 	flags = FREAD;
937 	if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
938 	    flags |= O_VERIFY;
939 	}
940 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
941 	error = namei(&nd);
942 	if (error != 0)
943 		return (error);
944 	NDFREE_PNBUF(&nd);
945 	vp = nd.ni_vp;
946 	TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
947 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
948 	/* vp is now held and locked */
949 
950 	/* Open the source tarball */
951 	error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
952 	if (error != 0) {
953 		TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
954 		    from, error);
955 		vput(vp);
956 		goto bad;
957 	}
958 	TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
959 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
960 	if (vp->v_type != VREG) {
961 		TARFS_DPF(FS, "%s: not a regular file\n", __func__);
962 		error = EOPNOTSUPP;
963 		goto bad_open_locked;
964 	}
965 	error = priv_check(td, PRIV_VFS_MOUNT_PERM);
966 	if (error != 0) {
967 		TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
968 		goto bad_open_locked;
969 	}
970 	if (flags & O_VERIFY) {
971 		mp->mnt_flag |= MNT_VERIFIED;
972 	}
973 
974 	/* Allocate the tarfs mount */
975 	error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
976 	/* vp is now held but unlocked */
977 	if (error != 0) {
978 		TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
979 		    from, error);
980 		goto bad_open_unlocked;
981 	}
982 	TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
983 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
984 
985 	/* Unconditionally mount as read-only */
986 	MNT_ILOCK(mp);
987 	mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
988 	MNT_IUNLOCK(mp);
989 
990 	vfs_getnewfsid(mp);
991 	vfs_mountedfrom(mp, "tarfs");
992 	TARFS_DPF(FS, "%s: success\n", __func__);
993 
994 	return (0);
995 
996 bad_open_locked:
997 	/* vp must be held and locked */
998 	TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
999 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1000 	VOP_UNLOCK(vp);
1001 bad_open_unlocked:
1002 	/* vp must be held and unlocked */
1003 	TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
1004 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1005 	(void)vn_close(vp, flags, td->td_ucred, td);
1006 bad:
1007 	/* vp must be released and unlocked */
1008 	TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
1009 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1010 	return (error);
1011 }
1012 
1013 /*
1014  * Unmounts a tarfs filesystem.
1015  */
1016 static int
1017 tarfs_unmount(struct mount *mp, int mntflags)
1018 {
1019 	struct thread *td = curthread;
1020 	struct tarfs_mount *tmp;
1021 	struct vnode *vp;
1022 	int error;
1023 	int flags = 0;
1024 
1025 	TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
1026 
1027 	/* Handle forced unmounts */
1028 	if (mntflags & MNT_FORCE)
1029 		flags |= FORCECLOSE;
1030 
1031 	/* Finalize all pending I/O */
1032 	error = vflush(mp, 0, flags, curthread);
1033 	if (error != 0)
1034 		return (error);
1035 	tmp = MP_TO_TARFS_MOUNT(mp);
1036 	vp = tmp->vp;
1037 
1038 	MPASS(vp != NULL);
1039 	TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
1040 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1041 	vn_close(vp, FREAD, td->td_ucred, td);
1042 	TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
1043 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1044 	tarfs_free_mount(tmp);
1045 
1046 	return (0);
1047 }
1048 
1049 /*
1050  * Gets the root of a tarfs filesystem.  Returns 0 on success or a
1051  * positive errno value on failure.
1052  */
1053 static int
1054 tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
1055 {
1056 	struct vnode *nvp;
1057 	int error;
1058 
1059 	TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
1060 
1061 	error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
1062 	if (error != 0)
1063 		return (error);
1064 
1065 	nvp->v_vflag |= VV_ROOT;
1066 	*vpp = nvp;
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Gets statistics for a tarfs filesystem.  Returns 0.
1072  */
1073 static int
1074 tarfs_statfs(struct mount *mp, struct statfs *sbp)
1075 {
1076 	struct tarfs_mount *tmp;
1077 
1078 	tmp = MP_TO_TARFS_MOUNT(mp);
1079 
1080 	sbp->f_bsize = TARFS_BLOCKSIZE;
1081 	sbp->f_iosize = tmp->iosize;
1082 	sbp->f_blocks = tmp->nblocks;
1083 	sbp->f_bfree = 0;
1084 	sbp->f_bavail = 0;
1085 	sbp->f_files = tmp->nfiles;
1086 	sbp->f_ffree = 0;
1087 
1088 	return (0);
1089 }
1090 
1091 /*
1092  * Gets a vnode for the given inode.  On success, a pointer to the vnode
1093  * is stored in vpp.  Returns 0 on success or a positive errno value on
1094  * failure.
1095  */
1096 static int
1097 tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
1098 {
1099 	struct tarfs_mount *tmp;
1100 	struct tarfs_node *tnp;
1101 	struct thread *td;
1102 	struct vnode *vp;
1103 	int error;
1104 
1105 	TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
1106 	    lkflags);
1107 
1108 	td = curthread;
1109 	error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
1110 	if (error != 0)
1111 		return (error);
1112 
1113 	if (*vpp != NULL) {
1114 		TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
1115 		return (error);
1116 	}
1117 
1118 	TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
1119 
1120 	tmp = MP_TO_TARFS_MOUNT(mp);
1121 
1122 	if (ino == TARFS_ZIOINO) {
1123 		error = vget(tmp->znode, lkflags);
1124 		if (error != 0)
1125 			return (error);
1126 		*vpp = tmp->znode;
1127 		return (0);
1128 	}
1129 
1130 	/* XXX Should use hash instead? */
1131 	TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
1132 		if (tnp->ino == ino)
1133 			break;
1134 	}
1135 	TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
1136 	if (tnp == NULL)
1137 		return (ENOENT);
1138 
1139 	(void)getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
1140 	TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
1141 	vp->v_data = tnp;
1142 	vp->v_type = tnp->type;
1143 	tnp->vnode = vp;
1144 
1145 	lockmgr(vp->v_vnlock, lkflags, NULL);
1146 	error = insmntque(vp, mp);
1147 	if (error != 0)
1148 		goto bad;
1149 	TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
1150 	error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
1151 	if (error != 0 || *vpp != NULL)
1152 		return (error);
1153 
1154 	vn_set_state(vp, VSTATE_CONSTRUCTED);
1155 	*vpp = vp;
1156 	return (0);
1157 
1158 bad:
1159 	*vpp = NULLVP;
1160 	return (error);
1161 }
1162 
1163 static int
1164 tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1165 {
1166 	struct tarfs_node *tnp;
1167 	struct tarfs_fid *tfp;
1168 	struct vnode *nvp;
1169 	int error;
1170 
1171 	tfp = (struct tarfs_fid *)fhp;
1172 	MP_TO_TARFS_MOUNT(mp);
1173 	if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
1174 		return (ESTALE);
1175 
1176 	error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
1177 	if (error != 0) {
1178 		*vpp = NULLVP;
1179 		return (error);
1180 	}
1181 	tnp = VP_TO_TARFS_NODE(nvp);
1182 	if (tnp->mode == 0 ||
1183 	    tnp->gen != tfp->gen ||
1184 	    tnp->nlink <= 0) {
1185 		vput(nvp);
1186 		*vpp = NULLVP;
1187 		return (ESTALE);
1188 	}
1189 	*vpp = nvp;
1190 	return (0);
1191 }
1192 
1193 static struct vfsops tarfs_vfsops = {
1194 	.vfs_fhtovp =	tarfs_fhtovp,
1195 	.vfs_mount =	tarfs_mount,
1196 	.vfs_root =	tarfs_root,
1197 	.vfs_statfs =	tarfs_statfs,
1198 	.vfs_unmount =	tarfs_unmount,
1199 	.vfs_vget =	tarfs_vget,
1200 };
1201 VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
1202 MODULE_VERSION(tarfs, 1);
1203 MODULE_DEPEND(tarfs, xz, 1, 1, 1);
1204