xref: /freebsd/sys/fs/tarfs/tarfs_vfsops.c (revision 3f5d875a27318a909f23a2b7463c4b2d963085df)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/buf.h>
34 #include <sys/conf.h>
35 #include <sys/fcntl.h>
36 #include <sys/libkern.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/vnode.h>
50 
51 #include <vm/vm_param.h>
52 
53 #include <geom/geom.h>
54 #include <geom/geom_vfs.h>
55 
56 #include <fs/tarfs/tarfs.h>
57 #include <fs/tarfs/tarfs_dbg.h>
58 
59 CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
60 
61 struct ustar_header {
62 	char	name[100];		/* File name */
63 	char	mode[8];		/* Mode flags */
64 	char	uid[8];			/* User id */
65 	char	gid[8];			/* Group id */
66 	char	size[12];		/* Size */
67 	char	mtime[12];		/* Modified time */
68 	char	checksum[8];		/* Checksum */
69 	char	typeflag[1];		/* Type */
70 	char	linkname[100];		/* "old format" stops here */
71 	char	magic[6];		/* POSIX UStar "ustar\0" indicator */
72 	char	version[2];		/* POSIX UStar version "00" */
73 	char	uname[32];		/* User name */
74 	char	gname[32];		/* Group name */
75 	char	major[8];		/* Device major number */
76 	char	minor[8];		/* Device minor number */
77 	char	prefix[155];		/* Path prefix */
78 };
79 
80 #define	TAR_EOF			((off_t)-1)
81 
82 #define	TAR_TYPE_FILE		'0'
83 #define	TAR_TYPE_HARDLINK	'1'
84 #define	TAR_TYPE_SYMLINK	'2'
85 #define	TAR_TYPE_CHAR		'3'
86 #define	TAR_TYPE_BLOCK		'4'
87 #define	TAR_TYPE_DIRECTORY	'5'
88 #define	TAR_TYPE_FIFO		'6'
89 #define	TAR_TYPE_CONTIG		'7'
90 #define	TAR_TYPE_GLOBAL_EXTHDR	'g'
91 #define	TAR_TYPE_EXTHDR		'x'
92 #define	TAR_TYPE_GNU_SPARSE	'S'
93 
94 #define	USTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
95 #define	USTAR_VERSION		(uint8_t []){ '0', '0' }
96 #define	GNUTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
97 #define	GNUTAR_VERSION		(uint8_t []){ ' ', '\x0' }
98 
99 #define	DEFDIRMODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
100 
101 MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
102 MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
103 
104 static vfs_mount_t	tarfs_mount;
105 static vfs_unmount_t	tarfs_unmount;
106 static vfs_root_t	tarfs_root;
107 static vfs_statfs_t	tarfs_statfs;
108 static vfs_fhtovp_t	tarfs_fhtovp;
109 
110 static const char *tarfs_opts[] = {
111 	"from", "gid", "mode", "uid", "verify",
112 	NULL
113 };
114 
115 /*
116  * Reads a len-width signed octal number from strp.  Returns the value.
117  * XXX Does not report errors.
118  */
119 static int64_t
120 tarfs_str2octal(const char *strp, size_t len)
121 {
122 	int64_t val;
123 	size_t idx;
124 	int sign;
125 
126 	/*
127 	 * Skip leading spaces or tabs.
128 	 * XXX why?  POSIX requires numeric fields to be 0-padded.
129 	 */
130 	for (idx = 0; idx < len; idx++)
131 		if (strp[idx] != ' ' && strp[idx] != '\t')
132 			break;
133 
134 	if (idx == len)
135 		return (0);
136 
137 	if (strp[idx] == '-') {
138 		sign = -1;
139 		idx++;
140 	} else
141 		sign = 1;
142 
143 	val = 0;
144 	for (; idx < len; idx++) {
145 		if (strp[idx] < '0' || strp[idx] > '7')
146 			break;
147 		val <<= 3;
148 		val += (strp[idx] - '0');
149 
150 		/* Truncate on overflow */
151 		if (val > INT64_MAX / 8) {
152 			val = INT64_MAX;
153 			break;
154 		}
155 	}
156 
157 	return (sign > 0) ? val : -val;
158 }
159 
160 /*
161  * Reads a len-byte extended numeric value from strp.  The first byte has
162  * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
163  * bytes that follow form a big-endian signed two's complement binary
164  * number.  Returns the value.  XXX Does not report errors.
165  */
166 static int64_t
167 tarfs_str2base256(const char *strp, size_t len)
168 {
169 	int64_t val;
170 	size_t idx;
171 
172 	KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
173 
174 	/* Sign-extend the first byte */
175 	if ((strp[0] & 0x40) != 0)
176 		val = (int64_t)-1;
177 	else
178 		val = 0;
179 	val <<= 6;
180 	val |= (strp[0] & 0x3f);
181 
182 	/* Read subsequent bytes */
183 	for (idx = 1; idx < len; idx++) {
184 		val <<= 8;
185 		val |= (0xff & (int64_t)strp[idx]);
186 
187 		/* Truncate on overflow and underflow */
188 		if (val > INT64_MAX / 256) {
189 			val = INT64_MAX;
190 			break;
191 		} else if (val < INT64_MAX / 256) {
192 			val = INT64_MIN;
193 			break;
194 		}
195 	}
196 
197 	return (val);
198 }
199 
200 /*
201  * Read a len-byte numeric field from strp.  If bit 7 of the first byte it
202  * set, assume an extended numeric value (signed two's complement);
203  * otherwise, assume a signed octal value.
204  *
205  * XXX practically no error checking or handling
206  */
207 static int64_t
208 tarfs_str2int64(const char *strp, size_t len)
209 {
210 
211 	if (len < 1)
212 		return (0);
213 
214 	if ((strp[0] & 0x80) != 0)
215 		return (tarfs_str2base256(strp, len));
216 	return (tarfs_str2octal(strp, len));
217 }
218 
219 /*
220  * Verifies the checksum of a header.  Returns true if the checksum is
221  * valid, false otherwise.
222  */
223 static boolean_t
224 tarfs_checksum(struct ustar_header *hdrp)
225 {
226 	const unsigned char *ptr;
227 	int64_t checksum, hdrsum;
228 	size_t idx;
229 
230 	hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
231 	TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
232 
233 	checksum = 0;
234 	for (ptr = (const unsigned char *)hdrp;
235 	     ptr < (const unsigned char *)hdrp->checksum; ptr++)
236 		checksum += *ptr;
237 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
238 		checksum += 0x20;
239 	for (ptr = (const unsigned char *)hdrp->typeflag;
240 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
241 		checksum += *ptr;
242 	TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
243 	    checksum);
244 	if (hdrsum == checksum)
245 		return (true);
246 
247 	/*
248 	 * Repeat test with signed bytes, some older formats use a broken
249 	 * form of the calculation
250 	 */
251 	checksum = 0;
252 	for (ptr = (const unsigned char *)hdrp;
253 	     ptr < (const unsigned char *)&hdrp->checksum; ptr++)
254 		checksum += *((const signed char *)ptr);
255 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
256 		checksum += 0x20;
257 	for (ptr = (const unsigned char *)&hdrp->typeflag;
258 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
259 		checksum += *((const signed char *)ptr);
260 	TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
261 	    checksum);
262 	if (hdrsum == checksum)
263 		return (true);
264 
265 	return (false);
266 }
267 
268 
269 /*
270  * Looks up a path in the tarfs node tree.
271  *
272  * - If the path exists, stores a pointer to the corresponding tarfs_node
273  *   in retnode and a pointer to its parent in retparent.
274  *
275  * - If the path does not exist, but create_dirs is true, creates ancestor
276  *   directories and returns NULL in retnode and the parent in retparent.
277  *
278  * - If the path does not exist and create_dirs is false, stops at the
279  *   first missing path name component.
280  *
281  * - In all cases, on return, endp and sepp point to the beginning and
282  *   end, respectively, of the last-processed path name component.
283  *
284  * - Returns 0 if the node was found, ENOENT if it was not, and some other
285  *   positive errno value on failure.
286  */
287 static int
288 tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
289     char **endp, char **sepp, struct tarfs_node **retparent,
290     struct tarfs_node **retnode, boolean_t create_dirs)
291 {
292 	struct componentname cn = { };
293 	struct tarfs_node *parent, *tnp;
294 	char *sep;
295 	size_t len;
296 	int error;
297 	boolean_t do_lookup;
298 
299 	MPASS(name != NULL && namelen != 0);
300 
301 	do_lookup = true;
302 	error = 0;
303 	parent = tnp = tmp->root;
304 	if (tnp == NULL)
305 		panic("%s: root node not yet created", __func__);
306 
307 	TARFS_DPF(LOOKUP, "%s: full path: %.*s\n", __func__,
308 	    (int)namelen, name);
309 
310 	sep = NULL;
311 	for (;;) {
312 		/* skip leading slash(es) */
313 		while (name[0] == '/' && namelen > 0)
314 			name++, namelen--;
315 
316 		/* did we reach the end? */
317 		if (namelen == 0 || name[0] == '\0') {
318 			name = do_lookup ? NULL : cn.cn_nameptr;
319 			namelen = do_lookup ? 0 : cn.cn_namelen;
320 			break;
321 		}
322 
323 		/* we're not at the end, so we must be in a directory */
324 		if (tnp != NULL && tnp->type != VDIR) {
325 			TARFS_DPF(LOOKUP, "%s: %.*s is not a directory\n", __func__,
326 			    (int)tnp->namelen, tnp->name);
327 			error = ENOTDIR;
328 			break;
329 		}
330 
331 		/* locate the next separator */
332 		for (sep = name, len = 0;
333 		     *sep != '\0' && *sep != '/' && len < namelen;
334 		     sep++, len++)
335 			/* nothing */ ;
336 
337 		/* check for . and .. */
338 		if (name[0] == '.' && len == 1) {
339 			name += len;
340 			namelen -= len;
341 			continue;
342 		}
343 		if (name[0] == '.' && name[1] == '.' && len == 2) {
344 			if (tnp == tmp->root) {
345 				error = EINVAL;
346 				break;
347 			}
348 			tnp = parent;
349 			parent = tnp->parent;
350 			name += len;
351 			namelen -= len;
352 			continue;
353 		}
354 
355 		/* create parent if necessary */
356 		if (!do_lookup) {
357 			TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
358 			    (int)cn.cn_namelen, cn.cn_nameptr);
359 			error = tarfs_alloc_node(tmp, cn.cn_nameptr,
360 			    cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
361 			    DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
362 			if (error != 0)
363 				break;
364 		}
365 
366 		parent = tnp;
367 		tnp = NULL;
368 		cn.cn_nameptr = name;
369 		cn.cn_namelen = len;
370 		TARFS_DPF(LOOKUP, "%s: looking up %.*s in %.*s/\n", __func__,
371 		    (int)cn.cn_namelen, cn.cn_nameptr,
372 		    (int)parent->namelen, parent->name);
373 		if (do_lookup) {
374 			tnp = tarfs_lookup_node(parent, NULL, &cn);
375 			if (tnp == NULL) {
376 				do_lookup = false;
377 				if (!create_dirs)
378 					break;
379 			}
380 		}
381 		name += cn.cn_namelen;
382 		namelen -= cn.cn_namelen;
383 	}
384 
385 	TARFS_DPF(LOOKUP, "%s: parent %p node %p\n", __func__, parent, tnp);
386 
387 	if (retparent)
388 		*retparent = parent;
389 	if (retnode)
390 		*retnode = tnp;
391 	if (endp) {
392 		if (namelen > 0)
393 			*endp = name;
394 		else
395 			*endp = NULL;
396 	}
397 	if (sepp)
398 		*sepp = sep;
399 	return (error);
400 }
401 
402 /*
403  * Frees a tarfs_mount structure and everything it references.
404  */
405 static void
406 tarfs_free_mount(struct tarfs_mount *tmp)
407 {
408 	struct mount *mp;
409 	struct tarfs_node *tnp;
410 
411 	MPASS(tmp != NULL);
412 
413 	TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
414 
415 	TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
416 	while (!TAILQ_EMPTY(&tmp->allnodes)) {
417 		tnp = TAILQ_FIRST(&tmp->allnodes);
418 		TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
419 		tarfs_free_node(tnp);
420 	}
421 
422 	(void)tarfs_io_fini(tmp);
423 
424 	TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
425 	delete_unrhdr(tmp->ino_unr);
426 	mp = tmp->vfs;
427 	mp->mnt_data = NULL;
428 
429 	TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
430 	free(tmp, M_TARFSMNT);
431 }
432 
433 /*
434  * Processes the tar file header at block offset blknump and allocates and
435  * populates a tarfs_node structure for the file it describes.  Updated
436  * blknump to point to the next unread tar file block, or TAR_EOF if EOF
437  * is reached.  Returns 0 on success or EOF and a positive errno value on
438  * failure.
439  */
440 static int
441 tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
442 {
443 	char block[TARFS_BLOCKSIZE];
444 	struct ustar_header *hdrp = (struct ustar_header *)block;
445 	struct sbuf *namebuf = NULL;
446 	char *exthdr = NULL, *name = NULL, *link = NULL;
447 	off_t blknum = *blknump;
448 	int64_t num;
449 	int endmarker = 0;
450 	char *namep, *sep;
451 	struct tarfs_node *parent, *tnp;
452 	size_t namelen = 0, linklen = 0, realsize = 0, sz;
453 	ssize_t res;
454 	dev_t rdev;
455 	gid_t gid;
456 	mode_t mode;
457 	time_t mtime;
458 	uid_t uid;
459 	long major = -1, minor = -1;
460 	unsigned int flags = 0;
461 	int error;
462 	boolean_t sparse = false;
463 
464 again:
465 	/* read next header */
466 	res = tarfs_io_read_buf(tmp, false, block,
467 	    TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
468 	if (res < 0) {
469 		error = -res;
470 		goto bad;
471 	} else if (res < TARFS_BLOCKSIZE) {
472 		goto eof;
473 	}
474 	blknum++;
475 
476 	/* check for end marker */
477 	if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
478 		if (endmarker++) {
479 			if (exthdr != NULL) {
480 				TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
481 				    __func__, TARFS_BLOCKSIZE * (blknum - 1));
482 				free(exthdr, M_TEMP);
483 			}
484 			TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
485 			    TARFS_BLOCKSIZE * blknum);
486 			tmp->nblocks = blknum;
487 			*blknump = TAR_EOF;
488 			return (0);
489 		}
490 		goto again;
491 	}
492 
493 	/* verify magic */
494 	if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
495 	    memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
496 		/* POSIX */
497 	} else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
498 	    memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
499 		TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
500 		    TARFS_BLOCKSIZE * (blknum - 1));
501 		error = EFTYPE;
502 		goto bad;
503 	} else {
504 		TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
505 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
506 		error = EINVAL;
507 		goto bad;
508 	}
509 
510 	/* verify checksum */
511 	if (!tarfs_checksum(hdrp)) {
512 		TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
513 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
514 		error = EINVAL;
515 		goto bad;
516 	}
517 
518 	/* get standard attributes */
519 	num = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
520 	if (num < 0 || num > ALLPERMS) {
521 		TARFS_DPF(ALLOC, "%s: invalid file mode at %zu\n",
522 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
523 		mode = S_IRUSR;
524 	} else {
525 		mode = num;
526 	}
527 	num = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
528 	if (num < 0 || num > UID_MAX) {
529 		TARFS_DPF(ALLOC, "%s: UID out of range at %zu\n",
530 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
531 		uid = tmp->root->uid;
532 		mode &= ~S_ISUID;
533 	} else {
534 		uid = num;
535 	}
536 	num = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
537 	if (num < 0 || num > GID_MAX) {
538 		TARFS_DPF(ALLOC, "%s: GID out of range at %zu\n",
539 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
540 		gid = tmp->root->gid;
541 		mode &= ~S_ISGID;
542 	} else {
543 		gid = num;
544 	}
545 	num = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
546 	if (num < 0) {
547 		TARFS_DPF(ALLOC, "%s: negative size at %zu\n",
548 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
549 		error = EINVAL;
550 		goto bad;
551 	} else {
552 		sz = num;
553 	}
554 	mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
555 	rdev = NODEV;
556 	TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
557 	    hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
558 
559 	/* extended header? */
560 	if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
561 		printf("%s: unsupported global extended header at %zu\n",
562 		    __func__, (size_t)(TARFS_BLOCKSIZE * (blknum - 1)));
563 		error = EFTYPE;
564 		goto bad;
565 	}
566 	if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
567 		if (exthdr != NULL) {
568 			TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
569 			    __func__, TARFS_BLOCKSIZE * (blknum - 1));
570 			error = EFTYPE;
571 			goto bad;
572 		}
573 		/* read the contents of the exthdr */
574 		TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
575 		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
576 		exthdr = malloc(sz, M_TEMP, M_WAITOK);
577 		res = tarfs_io_read_buf(tmp, false, exthdr,
578 		    TARFS_BLOCKSIZE * blknum, sz);
579 		if (res < 0) {
580 			error = -res;
581 			goto bad;
582 		}
583 		if (res < sz) {
584 			goto eof;
585 		}
586 		blknum += TARFS_SZ2BLKS(res);
587 		/* XXX TODO: refactor this parser */
588 		char *line = exthdr;
589 		while (line < exthdr + sz) {
590 			char *eol, *key, *value, *sep;
591 			size_t len = strtoul(line, &sep, 10);
592 			if (len == 0 || sep == line || *sep != ' ') {
593 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
594 				    __func__);
595 				error = EINVAL;
596 				goto bad;
597 			}
598 			if (line + len > exthdr + sz) {
599 				TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
600 				    __func__);
601 				error = EINVAL;
602 				goto bad;
603 			}
604 			eol = line + len - 1;
605 			*eol = '\0';
606 			line += len;
607 			key = sep + 1;
608 			sep = strchr(key, '=');
609 			if (sep == NULL) {
610 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
611 				    __func__);
612 				error = EINVAL;
613 				goto bad;
614 			}
615 			*sep = '\0';
616 			value = sep + 1;
617 			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
618 			    key, value);
619 			if (strcmp(key, "linkpath") == 0) {
620 				link = value;
621 				linklen = eol - value;
622 			} else if (strcmp(key, "GNU.sparse.major") == 0) {
623 				sparse = true;
624 				major = strtol(value, &sep, 10);
625 				if (sep != eol) {
626 					printf("exthdr syntax error\n");
627 					error = EINVAL;
628 					goto bad;
629 				}
630 			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
631 				sparse = true;
632 				minor = strtol(value, &sep, 10);
633 				if (sep != eol) {
634 					printf("exthdr syntax error\n");
635 					error = EINVAL;
636 					goto bad;
637 				}
638 			} else if (strcmp(key, "GNU.sparse.name") == 0) {
639 				sparse = true;
640 				name = value;
641 				namelen = eol - value;
642 				if (namelen == 0) {
643 					printf("exthdr syntax error\n");
644 					error = EINVAL;
645 					goto bad;
646 				}
647 			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
648 				sparse = true;
649 				realsize = strtoul(value, &sep, 10);
650 				if (sep != eol) {
651 					printf("exthdr syntax error\n");
652 					error = EINVAL;
653 					goto bad;
654 				}
655 			} else if (strcmp(key, "SCHILY.fflags") == 0) {
656 				flags |= tarfs_strtofflags(value, &sep);
657 				if (sep != eol) {
658 					printf("exthdr syntax error\n");
659 					error = EINVAL;
660 					goto bad;
661 				}
662 			}
663 		}
664 		goto again;
665 	}
666 
667 	/* sparse file consistency checks */
668 	if (sparse) {
669 		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
670 		    name, major, minor, realsize);
671 		if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
672 		    hdrp->typeflag[0] != TAR_TYPE_FILE) {
673 			TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
674 			error = EINVAL;
675 			goto bad;
676 		}
677 	}
678 
679 	/* file name */
680 	if (name == NULL) {
681 		if (hdrp->prefix[0] != '\0') {
682 			namebuf = sbuf_new_auto();
683 			sbuf_printf(namebuf, "%.*s/%.*s",
684 			    (int)sizeof(hdrp->prefix), hdrp->prefix,
685 			    (int)sizeof(hdrp->name), hdrp->name);
686 			sbuf_finish(namebuf);
687 			name = sbuf_data(namebuf);
688 			namelen = sbuf_len(namebuf);
689 		} else {
690 			name = hdrp->name;
691 			namelen = strnlen(hdrp->name, sizeof(hdrp->name));
692 		}
693 	}
694 
695 	error = tarfs_lookup_path(tmp, name, namelen, &namep,
696 	    &sep, &parent, &tnp, true);
697 	if (error != 0) {
698 		TARFS_DPF(ALLOC, "%s: failed to look up %.*s\n", __func__,
699 		    (int)namelen, name);
700 		error = EINVAL;
701 		goto bad;
702 	}
703 	if (tnp != NULL) {
704 		if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
705 			/* XXX set attributes? */
706 			goto skip;
707 		}
708 		TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
709 		    (int)namelen, name);
710 		error = EINVAL;
711 		goto bad;
712 	}
713 	switch (hdrp->typeflag[0]) {
714 	case TAR_TYPE_DIRECTORY:
715 		error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
716 		    0, 0, mtime, uid, gid, mode, flags, NULL, 0,
717 		    parent, &tnp);
718 		break;
719 	case TAR_TYPE_FILE:
720 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
721 		    blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
722 		    flags, NULL, 0, parent, &tnp);
723 		if (error == 0 && sparse) {
724 			error = tarfs_load_blockmap(tnp, realsize);
725 		}
726 		break;
727 	case TAR_TYPE_HARDLINK:
728 		if (link == NULL) {
729 			link = hdrp->linkname;
730 			linklen = strnlen(link, sizeof(hdrp->linkname));
731 		}
732 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
733 		    0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
734 		if (error != 0) {
735 			goto bad;
736 		}
737 		error = tarfs_lookup_path(tmp, link, linklen, NULL,
738 		    NULL, NULL, &tnp->other, false);
739 		if (tnp->other == NULL ||
740 		    tnp->other->type != VREG ||
741 		    tnp->other->other != NULL) {
742 			TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
743 			    __func__, (int)namelen, name, (int)linklen, link);
744 			error = EINVAL;
745 			goto bad;
746 		}
747 		break;
748 	case TAR_TYPE_SYMLINK:
749 		if (link == NULL) {
750 			link = hdrp->linkname;
751 			linklen = strnlen(link, sizeof(hdrp->linkname));
752 		}
753 		error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
754 		    0, linklen, mtime, uid, gid, mode, flags, link, 0,
755 		    parent, &tnp);
756 		break;
757 	case TAR_TYPE_BLOCK:
758 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
759 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
760 		rdev = makedev(major, minor);
761 		error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
762 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
763 		    parent, &tnp);
764 		break;
765 	case TAR_TYPE_CHAR:
766 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
767 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
768 		rdev = makedev(major, minor);
769 		error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
770 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
771 		    parent, &tnp);
772 		break;
773 	default:
774 		TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
775 		    __func__, hdrp->typeflag[0], (int)namelen, name);
776 		error = EINVAL;
777 		break;
778 	}
779 	if (error != 0)
780 		goto bad;
781 
782 skip:
783 	blknum += TARFS_SZ2BLKS(sz);
784 	tmp->nblocks = blknum;
785 	*blknump = blknum;
786 	if (exthdr != NULL) {
787 		free(exthdr, M_TEMP);
788 	}
789 	if (namebuf != NULL) {
790 		sbuf_delete(namebuf);
791 	}
792 	return (0);
793 eof:
794 	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
795 	error = EIO;
796 	goto bad;
797 bad:
798 	if (exthdr != NULL) {
799 		free(exthdr, M_TEMP);
800 	}
801 	if (namebuf != NULL) {
802 		sbuf_delete(namebuf);
803 	}
804 	return (error);
805 }
806 
807 /*
808  * Allocates and populates the metadata structures for the tar file
809  * referenced by vp.  On success, a pointer to the tarfs_mount structure
810  * is stored in tmpp.  Returns 0 on success or a positive errno value on
811  * failure.
812  */
813 static int
814 tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
815     uid_t root_uid, gid_t root_gid, mode_t root_mode,
816     struct tarfs_mount **tmpp)
817 {
818 	struct vattr va;
819 	struct thread *td = curthread;
820 	struct tarfs_mount *tmp;
821 	struct tarfs_node *root;
822 	off_t blknum;
823 	time_t mtime;
824 	int error;
825 
826 	KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
827 	ASSERT_VOP_LOCKED(vp, __func__);
828 
829 	tmp = NULL;
830 
831 	TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
832 	    __func__, vp);
833 
834 	/* Get source metadata */
835 	error = VOP_GETATTR(vp, &va, td->td_ucred);
836 	if (error != 0) {
837 		return (error);
838 	}
839 	VOP_UNLOCK(vp);
840 	mtime = va.va_mtime.tv_sec;
841 
842 	/* Allocate and initialize tarfs mount structure */
843 	tmp = malloc(sizeof(*tmp), M_TARFSMNT, M_WAITOK | M_ZERO);
844 	TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
845 	mp->mnt_data = tmp;
846 
847 	mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
848 	    MTX_DEF);
849 	TAILQ_INIT(&tmp->allnodes);
850 	tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
851 	tmp->vp = vp;
852 	tmp->vfs = mp;
853 	tmp->mtime = mtime;
854 
855 	/*
856 	 * XXX The decompression layer passes everything through the
857 	 * buffer cache, and the buffer cache wants to know our blocksize,
858 	 * but mnt_stat normally isn't populated until after we return, so
859 	 * we have to cheat a bit.
860 	 */
861 	tmp->iosize = 1U << tarfs_ioshift;
862 	mp->mnt_stat.f_iosize = tmp->iosize;
863 
864 	/* Initialize decompression layer */
865 	error = tarfs_io_init(tmp);
866 	if (error != 0)
867 		goto bad;
868 
869 	error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
870 	    root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
871 	if (error != 0 || root == NULL)
872 		goto bad;
873 	tmp->root = root;
874 
875 	blknum = 0;
876 	do {
877 		if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
878 			goto bad;
879 		}
880 	} while (blknum != TAR_EOF);
881 
882 	*tmpp = tmp;
883 
884 	TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
885 	return (0);
886 
887 bad:
888 	tarfs_free_mount(tmp);
889 	return (error);
890 }
891 
892 /*
893  * VFS Operations.
894  */
895 
896 static int
897 tarfs_mount(struct mount *mp)
898 {
899 	struct nameidata nd;
900 	struct vattr va;
901 	struct tarfs_mount *tmp = NULL;
902 	struct thread *td = curthread;
903 	struct vnode *vp;
904 	char *from;
905 	uid_t root_uid;
906 	gid_t root_gid;
907 	mode_t root_mode;
908 	int error, flags, len;
909 
910 	if (mp->mnt_flag & MNT_UPDATE)
911 		return (EOPNOTSUPP);
912 
913 	if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
914 		return (EINVAL);
915 
916 	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
917 	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
918 	VOP_UNLOCK(mp->mnt_vnodecovered);
919 	if (error)
920 		return (error);
921 
922 	if (mp->mnt_cred->cr_ruid != 0 ||
923 	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
924 		root_gid = va.va_gid;
925 	if (mp->mnt_cred->cr_ruid != 0 ||
926 	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
927 		root_uid = va.va_uid;
928 	if (mp->mnt_cred->cr_ruid != 0 ||
929 	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
930 		root_mode = va.va_mode;
931 
932 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
933 	if (error != 0 || from[len - 1] != '\0')
934 		return (EINVAL);
935 
936 	/* Find the source tarball */
937 	TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
938 	    from, root_uid, root_gid, root_mode);
939 	flags = FREAD;
940 	if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
941 	    flags |= O_VERIFY;
942 	}
943 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
944 	error = namei(&nd);
945 	if (error != 0)
946 		return (error);
947 	NDFREE_PNBUF(&nd);
948 	vp = nd.ni_vp;
949 	TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
950 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
951 	/* vp is now held and locked */
952 
953 	/* Open the source tarball */
954 	error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
955 	if (error != 0) {
956 		TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
957 		    from, error);
958 		vput(vp);
959 		goto bad;
960 	}
961 	TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
962 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
963 	if (vp->v_type != VREG) {
964 		TARFS_DPF(FS, "%s: not a regular file\n", __func__);
965 		error = EOPNOTSUPP;
966 		goto bad_open_locked;
967 	}
968 	error = priv_check(td, PRIV_VFS_MOUNT_PERM);
969 	if (error != 0) {
970 		TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
971 		goto bad_open_locked;
972 	}
973 	if (flags & O_VERIFY) {
974 		mp->mnt_flag |= MNT_VERIFIED;
975 	}
976 
977 	/* Allocate the tarfs mount */
978 	error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
979 	/* vp is now held but unlocked */
980 	if (error != 0) {
981 		TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
982 		    from, error);
983 		goto bad_open_unlocked;
984 	}
985 	TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
986 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
987 
988 	/* Unconditionally mount as read-only */
989 	MNT_ILOCK(mp);
990 	mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
991 	MNT_IUNLOCK(mp);
992 
993 	vfs_getnewfsid(mp);
994 	vfs_mountedfrom(mp, "tarfs");
995 	TARFS_DPF(FS, "%s: success\n", __func__);
996 
997 	return (0);
998 
999 bad_open_locked:
1000 	/* vp must be held and locked */
1001 	TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
1002 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1003 	VOP_UNLOCK(vp);
1004 bad_open_unlocked:
1005 	/* vp must be held and unlocked */
1006 	TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
1007 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1008 	(void)vn_close(vp, flags, td->td_ucred, td);
1009 bad:
1010 	/* vp must be released and unlocked */
1011 	TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
1012 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1013 	return (error);
1014 }
1015 
1016 /*
1017  * Unmounts a tarfs filesystem.
1018  */
1019 static int
1020 tarfs_unmount(struct mount *mp, int mntflags)
1021 {
1022 	struct thread *td = curthread;
1023 	struct tarfs_mount *tmp;
1024 	struct vnode *vp;
1025 	int error;
1026 	int flags = 0;
1027 
1028 	TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
1029 
1030 	/* Handle forced unmounts */
1031 	if (mntflags & MNT_FORCE)
1032 		flags |= FORCECLOSE;
1033 
1034 	/* Finalize all pending I/O */
1035 	error = vflush(mp, 0, flags, curthread);
1036 	if (error != 0)
1037 		return (error);
1038 	tmp = MP_TO_TARFS_MOUNT(mp);
1039 	vp = tmp->vp;
1040 
1041 	MPASS(vp != NULL);
1042 	TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
1043 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1044 	vn_close(vp, FREAD, td->td_ucred, td);
1045 	TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
1046 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1047 	tarfs_free_mount(tmp);
1048 
1049 	return (0);
1050 }
1051 
1052 /*
1053  * Gets the root of a tarfs filesystem.  Returns 0 on success or a
1054  * positive errno value on failure.
1055  */
1056 static int
1057 tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
1058 {
1059 	struct vnode *nvp;
1060 	int error;
1061 
1062 	TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
1063 
1064 	error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
1065 	if (error != 0)
1066 		return (error);
1067 
1068 	nvp->v_vflag |= VV_ROOT;
1069 	*vpp = nvp;
1070 	return (0);
1071 }
1072 
1073 /*
1074  * Gets statistics for a tarfs filesystem.  Returns 0.
1075  */
1076 static int
1077 tarfs_statfs(struct mount *mp, struct statfs *sbp)
1078 {
1079 	struct tarfs_mount *tmp;
1080 
1081 	tmp = MP_TO_TARFS_MOUNT(mp);
1082 
1083 	sbp->f_bsize = TARFS_BLOCKSIZE;
1084 	sbp->f_iosize = tmp->iosize;
1085 	sbp->f_blocks = tmp->nblocks;
1086 	sbp->f_bfree = 0;
1087 	sbp->f_bavail = 0;
1088 	sbp->f_files = tmp->nfiles;
1089 	sbp->f_ffree = 0;
1090 
1091 	return (0);
1092 }
1093 
1094 /*
1095  * Gets a vnode for the given inode.  On success, a pointer to the vnode
1096  * is stored in vpp.  Returns 0 on success or a positive errno value on
1097  * failure.
1098  */
1099 static int
1100 tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
1101 {
1102 	struct tarfs_mount *tmp;
1103 	struct tarfs_node *tnp;
1104 	struct thread *td;
1105 	struct vnode *vp;
1106 	int error;
1107 
1108 	TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
1109 	    lkflags);
1110 
1111 	td = curthread;
1112 	error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
1113 	if (error != 0)
1114 		return (error);
1115 
1116 	if (*vpp != NULL) {
1117 		TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
1118 		return (error);
1119 	}
1120 
1121 	TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
1122 
1123 	tmp = MP_TO_TARFS_MOUNT(mp);
1124 
1125 	if (ino == TARFS_ZIOINO) {
1126 		error = vget(tmp->znode, lkflags);
1127 		if (error != 0)
1128 			return (error);
1129 		*vpp = tmp->znode;
1130 		return (0);
1131 	}
1132 
1133 	/* XXX Should use hash instead? */
1134 	TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
1135 		if (tnp->ino == ino)
1136 			break;
1137 	}
1138 	TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
1139 	if (tnp == NULL)
1140 		return (ENOENT);
1141 
1142 	(void)getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
1143 	TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
1144 	vp->v_data = tnp;
1145 	vp->v_type = tnp->type;
1146 	tnp->vnode = vp;
1147 
1148 	lockmgr(vp->v_vnlock, lkflags, NULL);
1149 	error = insmntque(vp, mp);
1150 	if (error != 0)
1151 		goto bad;
1152 	TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
1153 	error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
1154 	if (error != 0 || *vpp != NULL)
1155 		return (error);
1156 
1157 	vn_set_state(vp, VSTATE_CONSTRUCTED);
1158 	*vpp = vp;
1159 	return (0);
1160 
1161 bad:
1162 	*vpp = NULLVP;
1163 	return (error);
1164 }
1165 
1166 static int
1167 tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1168 {
1169 	struct tarfs_node *tnp;
1170 	struct tarfs_fid *tfp;
1171 	struct vnode *nvp;
1172 	int error;
1173 
1174 	tfp = (struct tarfs_fid *)fhp;
1175 	MP_TO_TARFS_MOUNT(mp);
1176 	if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
1177 		return (ESTALE);
1178 
1179 	error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
1180 	if (error != 0) {
1181 		*vpp = NULLVP;
1182 		return (error);
1183 	}
1184 	tnp = VP_TO_TARFS_NODE(nvp);
1185 	if (tnp->mode == 0 ||
1186 	    tnp->gen != tfp->gen ||
1187 	    tnp->nlink <= 0) {
1188 		vput(nvp);
1189 		*vpp = NULLVP;
1190 		return (ESTALE);
1191 	}
1192 	*vpp = nvp;
1193 	return (0);
1194 }
1195 
1196 static struct vfsops tarfs_vfsops = {
1197 	.vfs_fhtovp =	tarfs_fhtovp,
1198 	.vfs_mount =	tarfs_mount,
1199 	.vfs_root =	tarfs_root,
1200 	.vfs_statfs =	tarfs_statfs,
1201 	.vfs_unmount =	tarfs_unmount,
1202 	.vfs_vget =	tarfs_vget,
1203 };
1204 VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
1205 MODULE_VERSION(tarfs, 1);
1206 MODULE_DEPEND(tarfs, xz, 1, 1, 1);
1207