xref: /freebsd/sys/fs/tarfs/tarfs_vfsops.c (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/buf.h>
34 #include <sys/conf.h>
35 #include <sys/fcntl.h>
36 #include <sys/libkern.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mount.h>
41 #include <sys/mutex.h>
42 #include <sys/namei.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/sbuf.h>
47 #include <sys/stat.h>
48 #include <sys/uio.h>
49 #include <sys/vnode.h>
50 
51 #include <vm/vm_param.h>
52 
53 #include <geom/geom.h>
54 #include <geom/geom_vfs.h>
55 
56 #include <fs/tarfs/tarfs.h>
57 #include <fs/tarfs/tarfs_dbg.h>
58 
59 CTASSERT(ZERO_REGION_SIZE > TARFS_BLOCKSIZE);
60 
61 struct ustar_header {
62 	char	name[100];		/* File name */
63 	char	mode[8];		/* Mode flags */
64 	char	uid[8];			/* User id */
65 	char	gid[8];			/* Group id */
66 	char	size[12];		/* Size */
67 	char	mtime[12];		/* Modified time */
68 	char	checksum[8];		/* Checksum */
69 	char	typeflag[1];		/* Type */
70 	char	linkname[100];		/* "old format" stops here */
71 	char	magic[6];		/* POSIX UStar "ustar\0" indicator */
72 	char	version[2];		/* POSIX UStar version "00" */
73 	char	uname[32];		/* User name */
74 	char	gname[32];		/* Group name */
75 	char	major[8];		/* Device major number */
76 	char	minor[8];		/* Device minor number */
77 	char	prefix[155];		/* Path prefix */
78 };
79 
80 #define	TAR_EOF			((off_t)-1)
81 
82 #define	TAR_TYPE_FILE		'0'
83 #define	TAR_TYPE_HARDLINK	'1'
84 #define	TAR_TYPE_SYMLINK	'2'
85 #define	TAR_TYPE_CHAR		'3'
86 #define	TAR_TYPE_BLOCK		'4'
87 #define	TAR_TYPE_DIRECTORY	'5'
88 #define	TAR_TYPE_FIFO		'6'
89 #define	TAR_TYPE_CONTIG		'7'
90 #define	TAR_TYPE_GLOBAL_EXTHDR	'g'
91 #define	TAR_TYPE_EXTHDR		'x'
92 #define	TAR_TYPE_GNU_SPARSE	'S'
93 
94 #define	USTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', 0 }
95 #define	USTAR_VERSION		(uint8_t []){ '0', '0' }
96 #define	GNUTAR_MAGIC		(uint8_t []){ 'u', 's', 't', 'a', 'r', ' ' }
97 #define	GNUTAR_VERSION		(uint8_t []){ ' ', '\x0' }
98 
99 #define	DEFDIRMODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
100 
101 MALLOC_DEFINE(M_TARFSMNT, "tarfs mount", "tarfs mount structures");
102 MALLOC_DEFINE(M_TARFSNODE, "tarfs node", "tarfs node structures");
103 
104 static vfs_mount_t	tarfs_mount;
105 static vfs_unmount_t	tarfs_unmount;
106 static vfs_root_t	tarfs_root;
107 static vfs_statfs_t	tarfs_statfs;
108 static vfs_fhtovp_t	tarfs_fhtovp;
109 
110 static const char *tarfs_opts[] = {
111 	"from", "gid", "mode", "uid", "verify",
112 	NULL
113 };
114 
115 /*
116  * Reads a len-width signed octal number from strp.  Returns the value.
117  * XXX Does not report errors.
118  */
119 static int64_t
120 tarfs_str2octal(const char *strp, size_t len)
121 {
122 	int64_t val;
123 	size_t idx;
124 	int sign;
125 
126 	/*
127 	 * Skip leading spaces or tabs.
128 	 * XXX why?  POSIX requires numeric fields to be 0-padded.
129 	 */
130 	for (idx = 0; idx < len; idx++)
131 		if (strp[idx] != ' ' && strp[idx] != '\t')
132 			break;
133 
134 	if (idx == len)
135 		return (0);
136 
137 	if (strp[idx] == '-') {
138 		sign = -1;
139 		idx++;
140 	} else
141 		sign = 1;
142 
143 	val = 0;
144 	for (; idx < len; idx++) {
145 		if (strp[idx] < '0' || strp[idx] > '7')
146 			break;
147 		val <<= 3;
148 		val += (strp[idx] - '0');
149 
150 		/* Truncate on overflow */
151 		if (val > INT64_MAX / 8) {
152 			val = INT64_MAX;
153 			break;
154 		}
155 	}
156 
157 	return (sign > 0) ? val : -val;
158 }
159 
160 /*
161  * Reads a len-byte extended numeric value from strp.  The first byte has
162  * bit 7 set to indicate the format; the remaining 7 bits + the (len - 1)
163  * bytes that follow form a big-endian signed two's complement binary
164  * number.  Returns the value.  XXX Does not report errors.
165  */
166 static int64_t
167 tarfs_str2base256(const char *strp, size_t len)
168 {
169 	int64_t val;
170 	size_t idx;
171 
172 	KASSERT(strp[0] & 0x80, ("not an extended numeric value"));
173 
174 	/* Sign-extend the first byte */
175 	if ((strp[0] & 0x40) != 0)
176 		val = (int64_t)-1;
177 	else
178 		val = 0;
179 	val <<= 6;
180 	val |= (strp[0] & 0x3f);
181 
182 	/* Read subsequent bytes */
183 	for (idx = 1; idx < len; idx++) {
184 		val <<= 8;
185 		val |= (0xff & (int64_t)strp[idx]);
186 
187 		/* Truncate on overflow and underflow */
188 		if (val > INT64_MAX / 256) {
189 			val = INT64_MAX;
190 			break;
191 		} else if (val < INT64_MAX / 256) {
192 			val = INT64_MIN;
193 			break;
194 		}
195 	}
196 
197 	return (val);
198 }
199 
200 /*
201  * Read a len-byte numeric field from strp.  If bit 7 of the first byte it
202  * set, assume an extended numeric value (signed two's complement);
203  * otherwise, assume a signed octal value.
204  *
205  * XXX practically no error checking or handling
206  */
207 static int64_t
208 tarfs_str2int64(const char *strp, size_t len)
209 {
210 
211 	if (len < 1)
212 		return (0);
213 
214 	if ((strp[0] & 0x80) != 0)
215 		return (tarfs_str2base256(strp, len));
216 	return (tarfs_str2octal(strp, len));
217 }
218 
219 /*
220  * Verifies the checksum of a header.  Returns true if the checksum is
221  * valid, false otherwise.
222  */
223 static boolean_t
224 tarfs_checksum(struct ustar_header *hdrp)
225 {
226 	const unsigned char *ptr;
227 	int64_t checksum, hdrsum;
228 	size_t idx;
229 
230 	hdrsum = tarfs_str2int64(hdrp->checksum, sizeof(hdrp->checksum));
231 	TARFS_DPF(CHECKSUM, "%s: header checksum %lx\n", __func__, hdrsum);
232 
233 	checksum = 0;
234 	for (ptr = (const unsigned char *)hdrp;
235 	     ptr < (const unsigned char *)hdrp->checksum; ptr++)
236 		checksum += *ptr;
237 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
238 		checksum += 0x20;
239 	for (ptr = (const unsigned char *)hdrp->typeflag;
240 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
241 		checksum += *ptr;
242 	TARFS_DPF(CHECKSUM, "%s: calc unsigned checksum %lx\n", __func__,
243 	    checksum);
244 	if (hdrsum == checksum)
245 		return (true);
246 
247 	/*
248 	 * Repeat test with signed bytes, some older formats use a broken
249 	 * form of the calculation
250 	 */
251 	checksum = 0;
252 	for (ptr = (const unsigned char *)hdrp;
253 	     ptr < (const unsigned char *)&hdrp->checksum; ptr++)
254 		checksum += *((const signed char *)ptr);
255 	for (idx = 0; idx < sizeof(hdrp->checksum); idx++)
256 		checksum += 0x20;
257 	for (ptr = (const unsigned char *)&hdrp->typeflag;
258 	     ptr < (const unsigned char *)(hdrp + 1); ptr++)
259 		checksum += *((const signed char *)ptr);
260 	TARFS_DPF(CHECKSUM, "%s: calc signed checksum %lx\n", __func__,
261 	    checksum);
262 	if (hdrsum == checksum)
263 		return (true);
264 
265 	return (false);
266 }
267 
268 
269 /*
270  * Looks up a path in the tarfs node tree.
271  *
272  * - If the path exists, stores a pointer to the corresponding tarfs_node
273  *   in retnode and a pointer to its parent in retparent.
274  *
275  * - If the path does not exist, but create_dirs is true, creates ancestor
276  *   directories and returns NULL in retnode and the parent in retparent.
277  *
278  * - If the path does not exist and create_dirs is false, stops at the
279  *   first missing path name component.
280  *
281  * - In all cases, on return, endp and sepp point to the beginning and
282  *   end, respectively, of the last-processed path name component.
283  *
284  * - Returns 0 if the node was found, ENOENT if it was not, and some other
285  *   positive errno value on failure.
286  */
287 static int
288 tarfs_lookup_path(struct tarfs_mount *tmp, char *name, size_t namelen,
289     char **endp, char **sepp, struct tarfs_node **retparent,
290     struct tarfs_node **retnode, boolean_t create_dirs)
291 {
292 	struct componentname cn;
293 	struct tarfs_node *parent, *tnp;
294 	char *sep;
295 	size_t len;
296 	int error;
297 	boolean_t do_lookup;
298 
299 	MPASS(name != NULL && namelen != 0);
300 
301 	do_lookup = true;
302 	error = 0;
303 	parent = tnp = tmp->root;
304 	if (tnp == NULL)
305 		panic("%s: root node not yet created", __func__);
306 
307 	bzero(&cn, sizeof(cn));
308 
309 	TARFS_DPF(LOOKUP, "%s: Full path: %.*s\n", __func__, (int)namelen,
310 	    name);
311 
312 	sep = NULL;
313 	for (;;) {
314 		/* skip leading slash(es) */
315 		while (name[0] == '/' && namelen > 0)
316 			name++, namelen--;
317 
318 		/* did we reach the end? */
319 		if (namelen == 0 || name[0] == '\0') {
320 			name = do_lookup ? NULL : cn.cn_nameptr;
321 			namelen = do_lookup ? 0 : cn.cn_namelen;
322 			break;
323 		}
324 
325 		/* locate the next separator */
326 		for (sep = name, len = 0;
327 		     *sep != '\0' && *sep != '/' && len < namelen;
328 		     sep++, len++)
329 			/* nothing */ ;
330 
331 		/* check for . and .. */
332 		if (name[0] == '.' && len <= 2) {
333 			if (len == 1) {
334 				/* . */
335 				name += len;
336 				namelen -= len;
337 				continue;
338 			} else if (name[1] == '.') {
339 				/* .. */
340 				if (tnp == tmp->root) {
341 					error = EINVAL;
342 					break;
343 				}
344 				tnp = tnp->parent;
345 				parent = tnp->parent;
346 				name += len;
347 				namelen -= len;
348 				continue;
349 			}
350 		}
351 
352 		/* create parent if necessary */
353 		if (!do_lookup) {
354 			TARFS_DPF(ALLOC, "%s: creating %.*s\n", __func__,
355 			    (int)cn.cn_namelen, cn.cn_nameptr);
356 			error = tarfs_alloc_node(tmp, cn.cn_nameptr,
357 			    cn.cn_namelen, VDIR, -1, 0, tmp->mtime, 0, 0,
358 			    DEFDIRMODE, 0, NULL, NODEV, parent, &tnp);
359 			if (error != 0)
360 				break;
361 		}
362 
363 		parent = tnp;
364 		tnp = NULL;
365 		cn.cn_nameptr = name;
366 		cn.cn_namelen = len;
367 		TARFS_DPF(LOOKUP, "%s: Search: %.*s\n", __func__,
368 		    (int)cn.cn_namelen, cn.cn_nameptr);
369 		if (do_lookup) {
370 			tnp = tarfs_lookup_node(parent, NULL, &cn);
371 			if (tnp == NULL) {
372 				do_lookup = false;
373 				if (!create_dirs)
374 					break;
375 			}
376 		}
377 		name += cn.cn_namelen;
378 		namelen -= cn.cn_namelen;
379 	}
380 
381 	TARFS_DPF(LOOKUP, "%s: Parent %p, node %p\n", __func__, parent, tnp);
382 
383 	if (retparent)
384 		*retparent = parent;
385 	if (retnode)
386 		*retnode = tnp;
387 	if (endp) {
388 		if (namelen > 0)
389 			*endp = name;
390 		else
391 			*endp = NULL;
392 	}
393 	if (sepp)
394 		*sepp = sep;
395 	return (error);
396 }
397 
398 /*
399  * Frees a tarfs_mount structure and everything it references.
400  */
401 static void
402 tarfs_free_mount(struct tarfs_mount *tmp)
403 {
404 	struct mount *mp;
405 	struct tarfs_node *tnp;
406 
407 	MPASS(tmp != NULL);
408 
409 	TARFS_DPF(ALLOC, "%s: Freeing mount structure %p\n", __func__, tmp);
410 
411 	TARFS_DPF(ALLOC, "%s: freeing tarfs_node structures\n", __func__);
412 	while (!TAILQ_EMPTY(&tmp->allnodes)) {
413 		tnp = TAILQ_FIRST(&tmp->allnodes);
414 		TAILQ_REMOVE(&tmp->allnodes, tnp, entries);
415 		tarfs_free_node(tnp);
416 	}
417 
418 	(void)tarfs_io_fini(tmp);
419 
420 	TARFS_DPF(ALLOC, "%s: deleting unr header\n", __func__);
421 	delete_unrhdr(tmp->ino_unr);
422 	mp = tmp->vfs;
423 	mp->mnt_data = NULL;
424 
425 	TARFS_DPF(ALLOC, "%s: freeing structure\n", __func__);
426 	free(tmp, M_TARFSMNT);
427 }
428 
429 /*
430  * Processes the tar file header at block offset blknump and allocates and
431  * populates a tarfs_node structure for the file it describes.  Updated
432  * blknump to point to the next unread tar file block, or TAR_EOF if EOF
433  * is reached.  Returns 0 on success or EOF and a positive errno value on
434  * failure.
435  */
436 static int
437 tarfs_alloc_one(struct tarfs_mount *tmp, off_t *blknump)
438 {
439 	char block[TARFS_BLOCKSIZE];
440 	struct ustar_header *hdrp = (struct ustar_header *)block;
441 	struct sbuf *namebuf = NULL;
442 	char *exthdr = NULL, *name = NULL, *link = NULL;
443 	off_t blknum = *blknump;
444 	int endmarker = 0;
445 	char *namep, *sep;
446 	struct tarfs_node *parent, *tnp;
447 	size_t namelen = 0, linklen = 0, realsize = 0, sz;
448 	ssize_t res;
449 	dev_t rdev;
450 	gid_t gid;
451 	mode_t mode;
452 	time_t mtime;
453 	uid_t uid;
454 	long major = -1, minor = -1;
455 	unsigned int flags = 0;
456 	int error;
457 	boolean_t sparse = false;
458 
459 again:
460 	/* read next header */
461 	res = tarfs_io_read_buf(tmp, false, block,
462 	    TARFS_BLOCKSIZE * blknum, TARFS_BLOCKSIZE);
463 	if (res < 0) {
464 		error = -res;
465 		goto bad;
466 	} else if (res < TARFS_BLOCKSIZE) {
467 		goto eof;
468 	}
469 	blknum++;
470 
471 	/* check for end marker */
472 	if (memcmp(block, zero_region, TARFS_BLOCKSIZE) == 0) {
473 		if (endmarker++) {
474 			if (exthdr != NULL) {
475 				TARFS_DPF(IO, "%s: orphaned extended header at %zu\n",
476 				    __func__, TARFS_BLOCKSIZE * (blknum - 1));
477 				free(exthdr, M_TEMP);
478 			}
479 			TARFS_DPF(IO, "%s: end of archive at %zu\n", __func__,
480 			    TARFS_BLOCKSIZE * blknum);
481 			tmp->nblocks = blknum;
482 			*blknump = TAR_EOF;
483 			return (0);
484 		}
485 		goto again;
486 	}
487 
488 	/* verify magic */
489 	if (memcmp(hdrp->magic, USTAR_MAGIC, sizeof(USTAR_MAGIC)) == 0 &&
490 	    memcmp(hdrp->version, USTAR_VERSION, sizeof(USTAR_VERSION)) == 0) {
491 		/* POSIX */
492 	} else if (memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0 &&
493 	    memcmp(hdrp->magic, GNUTAR_MAGIC, sizeof(GNUTAR_MAGIC)) == 0) {
494 		TARFS_DPF(ALLOC, "%s: GNU tar format at %zu\n", __func__,
495 		    TARFS_BLOCKSIZE * (blknum - 1));
496 		error = EFTYPE;
497 		goto bad;
498 	} else {
499 		TARFS_DPF(ALLOC, "%s: unsupported TAR format at %zu\n",
500 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
501 		error = EINVAL;
502 		goto bad;
503 	}
504 
505 	/* verify checksum */
506 	if (!tarfs_checksum(hdrp)) {
507 		TARFS_DPF(ALLOC, "%s: header checksum failed at %zu\n",
508 		    __func__, TARFS_BLOCKSIZE * (blknum - 1));
509 		error = EINVAL;
510 		goto bad;
511 	}
512 
513 	/* get standard attributes */
514 	mode = tarfs_str2int64(hdrp->mode, sizeof(hdrp->mode));
515 	uid = tarfs_str2int64(hdrp->uid, sizeof(hdrp->uid));
516 	gid = tarfs_str2int64(hdrp->gid, sizeof(hdrp->gid));
517 	sz = tarfs_str2int64(hdrp->size, sizeof(hdrp->size));
518 	mtime = tarfs_str2int64(hdrp->mtime, sizeof(hdrp->mtime));
519 	rdev = NODEV;
520 	TARFS_DPF(ALLOC, "%s: [%c] %zu @%jd %o %d:%d\n", __func__,
521 	    hdrp->typeflag[0], sz, (intmax_t)mtime, mode, uid, gid);
522 
523 	/* extended header? */
524 	if (hdrp->typeflag[0] == TAR_TYPE_GLOBAL_EXTHDR) {
525 		printf("%s: unsupported global extended header at %zu\n",
526 		    __func__, (size_t)(TARFS_BLOCKSIZE * (blknum - 1)));
527 		error = EFTYPE;
528 		goto bad;
529 	}
530 	if (hdrp->typeflag[0] == TAR_TYPE_EXTHDR) {
531 		if (exthdr != NULL) {
532 			TARFS_DPF(IO, "%s: multiple extended headers at %zu\n",
533 			    __func__, TARFS_BLOCKSIZE * (blknum - 1));
534 			error = EFTYPE;
535 			goto bad;
536 		}
537 		/* read the contents of the exthdr */
538 		TARFS_DPF(ALLOC, "%s: %zu-byte extended header at %zd\n",
539 		    __func__, sz, TARFS_BLOCKSIZE * (blknum - 1));
540 		exthdr = malloc(sz, M_TEMP, M_WAITOK);
541 		res = tarfs_io_read_buf(tmp, false, exthdr,
542 		    TARFS_BLOCKSIZE * blknum, sz);
543 		if (res < 0) {
544 			error = -res;
545 			goto bad;
546 		}
547 		if (res < sz) {
548 			goto eof;
549 		}
550 		blknum += TARFS_SZ2BLKS(res);
551 		/* XXX TODO: refactor this parser */
552 		char *line = exthdr;
553 		while (line < exthdr + sz) {
554 			char *eol, *key, *value, *sep;
555 			size_t len = strtoul(line, &sep, 10);
556 			if (len == 0 || sep == line || *sep != ' ') {
557 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
558 				    __func__);
559 				error = EINVAL;
560 				goto bad;
561 			}
562 			if (line + len > exthdr + sz) {
563 				TARFS_DPF(ALLOC, "%s: exthdr overflow\n",
564 				    __func__);
565 				error = EINVAL;
566 				goto bad;
567 			}
568 			eol = line + len - 1;
569 			*eol = '\0';
570 			line += len;
571 			key = sep + 1;
572 			sep = strchr(key, '=');
573 			if (sep == NULL) {
574 				TARFS_DPF(ALLOC, "%s: exthdr syntax error\n",
575 				    __func__);
576 				error = EINVAL;
577 				goto bad;
578 			}
579 			*sep = '\0';
580 			value = sep + 1;
581 			TARFS_DPF(ALLOC, "%s: exthdr %s=%s\n", __func__,
582 			    key, value);
583 			if (strcmp(key, "linkpath") == 0) {
584 				link = value;
585 				linklen = eol - value;
586 			} else if (strcmp(key, "GNU.sparse.major") == 0) {
587 				sparse = true;
588 				major = strtol(value, &sep, 10);
589 				if (sep != eol) {
590 					printf("exthdr syntax error\n");
591 					error = EINVAL;
592 					goto bad;
593 				}
594 			} else if (strcmp(key, "GNU.sparse.minor") == 0) {
595 				sparse = true;
596 				minor = strtol(value, &sep, 10);
597 				if (sep != eol) {
598 					printf("exthdr syntax error\n");
599 					error = EINVAL;
600 					goto bad;
601 				}
602 			} else if (strcmp(key, "GNU.sparse.name") == 0) {
603 				sparse = true;
604 				name = value;
605 				namelen = eol - value;
606 				if (namelen == 0) {
607 					printf("exthdr syntax error\n");
608 					error = EINVAL;
609 					goto bad;
610 				}
611 			} else if (strcmp(key, "GNU.sparse.realsize") == 0) {
612 				sparse = true;
613 				realsize = strtoul(value, &sep, 10);
614 				if (sep != eol) {
615 					printf("exthdr syntax error\n");
616 					error = EINVAL;
617 					goto bad;
618 				}
619 			} else if (strcmp(key, "SCHILY.fflags") == 0) {
620 				flags |= tarfs_strtofflags(value, &sep);
621 				if (sep != eol) {
622 					printf("exthdr syntax error\n");
623 					error = EINVAL;
624 					goto bad;
625 				}
626 			}
627 		}
628 		goto again;
629 	}
630 
631 	/* sparse file consistency checks */
632 	if (sparse) {
633 		TARFS_DPF(ALLOC, "%s: %s: sparse %ld.%ld (%zu bytes)\n", __func__,
634 		    name, major, minor, realsize);
635 		if (major != 1 || minor != 0 || name == NULL || realsize == 0 ||
636 		    hdrp->typeflag[0] != TAR_TYPE_FILE) {
637 			TARFS_DPF(ALLOC, "%s: invalid sparse format\n", __func__);
638 			error = EINVAL;
639 			goto bad;
640 		}
641 	}
642 
643 	/* file name */
644 	if (name == NULL) {
645 		if (hdrp->prefix[0] != '\0') {
646 			namebuf = sbuf_new_auto();
647 			sbuf_printf(namebuf, "%.*s/%.*s",
648 			    (int)sizeof(hdrp->prefix), hdrp->prefix,
649 			    (int)sizeof(hdrp->name), hdrp->name);
650 			sbuf_finish(namebuf);
651 			name = sbuf_data(namebuf);
652 			namelen = sbuf_len(namebuf);
653 		} else {
654 			name = hdrp->name;
655 			namelen = strnlen(hdrp->name, sizeof(hdrp->name));
656 		}
657 	}
658 
659 	error = tarfs_lookup_path(tmp, name, namelen, &namep,
660 	    &sep, &parent, &tnp, true);
661 	if (error != 0)
662 		goto bad;
663 	if (tnp != NULL) {
664 		if (hdrp->typeflag[0] == TAR_TYPE_DIRECTORY) {
665 			/* XXX set attributes? */
666 			goto skip;
667 		}
668 		TARFS_DPF(ALLOC, "%s: duplicate file %.*s\n", __func__,
669 		    (int)namelen, name);
670 		error = EINVAL;
671 		goto bad;
672 	}
673 	switch (hdrp->typeflag[0]) {
674 	case TAR_TYPE_DIRECTORY:
675 		error = tarfs_alloc_node(tmp, namep, sep - namep, VDIR,
676 		    0, 0, mtime, uid, gid, mode, flags, NULL, 0,
677 		    parent, &tnp);
678 		break;
679 	case TAR_TYPE_FILE:
680 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
681 		    blknum * TARFS_BLOCKSIZE, sz, mtime, uid, gid, mode,
682 		    flags, NULL, 0, parent, &tnp);
683 		if (error == 0 && sparse) {
684 			error = tarfs_load_blockmap(tnp, realsize);
685 		}
686 		break;
687 	case TAR_TYPE_HARDLINK:
688 		if (link == NULL) {
689 			link = hdrp->linkname;
690 			linklen = strnlen(link, sizeof(hdrp->linkname));
691 		}
692 		error = tarfs_alloc_node(tmp, namep, sep - namep, VREG,
693 		    0, 0, 0, 0, 0, 0, 0, NULL, 0, parent, &tnp);
694 		if (error != 0) {
695 			goto bad;
696 		}
697 		error = tarfs_lookup_path(tmp, link, linklen, NULL,
698 		    NULL, NULL, &tnp->other, false);
699 		if (tnp->other == NULL ||
700 		    tnp->other->type != VREG ||
701 		    tnp->other->other != NULL) {
702 			TARFS_DPF(ALLOC, "%s: %.*s: dead hard link to %.*s\n",
703 			    __func__, (int)namelen, name, (int)linklen, link);
704 			error = EINVAL;
705 			goto bad;
706 		}
707 		break;
708 	case TAR_TYPE_SYMLINK:
709 		if (link == NULL) {
710 			link = hdrp->linkname;
711 			linklen = strnlen(link, sizeof(hdrp->linkname));
712 		}
713 		error = tarfs_alloc_node(tmp, namep, sep - namep, VLNK,
714 		    0, linklen, mtime, uid, gid, mode, flags, link, 0,
715 		    parent, &tnp);
716 		break;
717 	case TAR_TYPE_BLOCK:
718 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
719 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
720 		rdev = makedev(major, minor);
721 		error = tarfs_alloc_node(tmp, namep, sep - namep, VBLK,
722 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
723 		    parent, &tnp);
724 		break;
725 	case TAR_TYPE_CHAR:
726 		major = tarfs_str2int64(hdrp->major, sizeof(hdrp->major));
727 		minor = tarfs_str2int64(hdrp->minor, sizeof(hdrp->minor));
728 		rdev = makedev(major, minor);
729 		error = tarfs_alloc_node(tmp, namep, sep - namep, VCHR,
730 		    0, 0, mtime, uid, gid, mode, flags, NULL, rdev,
731 		    parent, &tnp);
732 		break;
733 	default:
734 		TARFS_DPF(ALLOC, "%s: unsupported type %c for %.*s\n",
735 		    __func__, hdrp->typeflag[0], (int)namelen, name);
736 		error = EINVAL;
737 		break;
738 	}
739 	if (error != 0)
740 		goto bad;
741 
742 skip:
743 	blknum += TARFS_SZ2BLKS(sz);
744 	tmp->nblocks = blknum;
745 	*blknump = blknum;
746 	if (exthdr != NULL) {
747 		free(exthdr, M_TEMP);
748 	}
749 	if (namebuf != NULL) {
750 		sbuf_delete(namebuf);
751 	}
752 	return (0);
753 eof:
754 	TARFS_DPF(IO, "%s: premature end of file\n", __func__);
755 	error = EIO;
756 	goto bad;
757 bad:
758 	if (exthdr != NULL) {
759 		free(exthdr, M_TEMP);
760 	}
761 	if (namebuf != NULL) {
762 		sbuf_delete(namebuf);
763 	}
764 	return (error);
765 }
766 
767 /*
768  * Allocates and populates the metadata structures for the tar file
769  * referenced by vp.  On success, a pointer to the tarfs_mount structure
770  * is stored in tmpp.  Returns 0 on success or a positive errno value on
771  * failure.
772  */
773 static int
774 tarfs_alloc_mount(struct mount *mp, struct vnode *vp,
775     uid_t root_uid, gid_t root_gid, mode_t root_mode,
776     struct tarfs_mount **tmpp)
777 {
778 	struct vattr va;
779 	struct thread *td = curthread;
780 	char *fullpath;
781 	struct tarfs_mount *tmp;
782 	struct tarfs_node *root;
783 	off_t blknum;
784 	time_t mtime;
785 	int error;
786 
787 	KASSERT(tmpp != NULL, ("tarfs mount return is NULL"));
788 	ASSERT_VOP_LOCKED(vp, __func__);
789 
790 	tmp = NULL;
791 	fullpath = NULL;
792 
793 	TARFS_DPF(ALLOC, "%s: Allocating tarfs mount structure for vp %p\n",
794 	    __func__, vp);
795 
796 	/* Get source metadata */
797 	error = VOP_GETATTR(vp, &va, td->td_ucred);
798 	if (error != 0) {
799 		return (error);
800 	}
801 	VOP_UNLOCK(vp);
802 	mtime = va.va_mtime.tv_sec;
803 
804 	/* Allocate and initialize tarfs mount structure */
805 	tmp = (struct tarfs_mount *)malloc(sizeof(struct tarfs_mount),
806 	    M_TARFSMNT, M_WAITOK | M_ZERO);
807 	TARFS_DPF(ALLOC, "%s: Allocated mount structure\n", __func__);
808 	mp->mnt_data = tmp;
809 
810 	mtx_init(&tmp->allnode_lock, "tarfs allnode lock", NULL,
811 	    MTX_DEF);
812 	TAILQ_INIT(&tmp->allnodes);
813 	tmp->ino_unr = new_unrhdr(TARFS_MININO, INT_MAX, &tmp->allnode_lock);
814 	tmp->vp = vp;
815 	tmp->vfs = mp;
816 	tmp->mtime = mtime;
817 
818 	/*
819 	 * XXX The decompression layer passes everything through the
820 	 * buffer cache, and the buffer cache wants to know our blocksize,
821 	 * but mnt_stat normally isn't populated until after we return, so
822 	 * we have to cheat a bit.
823 	 */
824 	tmp->iosize = 1U << tarfs_ioshift;
825 	mp->mnt_stat.f_iosize = tmp->iosize;
826 
827 	/* Initialize decompression layer */
828 	error = tarfs_io_init(tmp);
829 	if (error != 0)
830 		goto bad;
831 
832 	error = tarfs_alloc_node(tmp, NULL, 0, VDIR, 0, 0, mtime, root_uid,
833 	    root_gid, root_mode & ALLPERMS, 0, NULL, NODEV, NULL, &root);
834 	if (error != 0 || root == NULL)
835 		goto bad;
836 	tmp->root = root;
837 
838 	blknum = 0;
839 	do {
840 		if ((error = tarfs_alloc_one(tmp, &blknum)) != 0) {
841 			goto bad;
842 		}
843 	} while (blknum != TAR_EOF);
844 
845 	*tmpp = tmp;
846 
847 	TARFS_DPF(ALLOC, "%s: pfsmnt_root %p\n", __func__, tmp->root);
848 	return (0);
849 
850 bad:
851 	if (tmp != NULL)
852 		tarfs_free_mount(tmp);
853 	free(fullpath, M_TEMP);
854 	return (error);
855 }
856 
857 /*
858  * VFS Operations.
859  */
860 
861 static int
862 tarfs_mount(struct mount *mp)
863 {
864 	struct nameidata nd;
865 	struct vattr va;
866 	struct tarfs_mount *tmp = NULL;
867 	struct thread *td = curthread;
868 	struct vnode *vp;
869 	char *from;
870 	uid_t root_uid;
871 	gid_t root_gid;
872 	mode_t root_mode;
873 	int error, flags, len;
874 
875 	if (mp->mnt_flag & MNT_UPDATE)
876 		return (EOPNOTSUPP);
877 
878 	if (vfs_filteropt(mp->mnt_optnew, tarfs_opts))
879 		return (EINVAL);
880 
881 	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
882 	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
883 	VOP_UNLOCK(mp->mnt_vnodecovered);
884 	if (error)
885 		return (error);
886 
887 	if (mp->mnt_cred->cr_ruid != 0 ||
888 	    vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1)
889 		root_gid = va.va_gid;
890 	if (mp->mnt_cred->cr_ruid != 0 ||
891 	    vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1)
892 		root_uid = va.va_uid;
893 	if (mp->mnt_cred->cr_ruid != 0 ||
894 	    vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1)
895 		root_mode = va.va_mode;
896 
897 	error = vfs_getopt(mp->mnt_optnew, "from", (void **)&from, &len);
898 	if (error != 0 || from[len - 1] != '\0')
899 		return (EINVAL);
900 
901 	/* Find the source tarball */
902 	TARFS_DPF(FS, "%s(%s, uid=%u, gid=%u, mode=%o)\n", __func__,
903 	    from, root_uid, root_gid, root_mode);
904 	flags = FREAD;
905 	if (vfs_flagopt(mp->mnt_optnew, "verify", NULL, 0)) {
906 	    flags |= O_VERIFY;
907 	}
908 	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF, UIO_SYSSPACE, from);
909 	error = namei(&nd);
910 	if (error != 0)
911 		return (error);
912 	NDFREE_PNBUF(&nd);
913 	vp = nd.ni_vp;
914 	TARFS_DPF(FS, "%s: N: hold %u use %u lock 0x%x\n", __func__,
915 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
916 	/* vp is now held and locked */
917 
918 	/* Open the source tarball */
919 	error = vn_open_vnode(vp, flags, td->td_ucred, td, NULL);
920 	if (error != 0) {
921 		TARFS_DPF(FS, "%s: failed to open %s: %d\n", __func__,
922 		    from, error);
923 		vput(vp);
924 		goto bad;
925 	}
926 	TARFS_DPF(FS, "%s: O: hold %u use %u lock 0x%x\n", __func__,
927 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
928 	if (vp->v_type != VREG) {
929 		TARFS_DPF(FS, "%s: not a regular file\n", __func__);
930 		error = EOPNOTSUPP;
931 		goto bad_open_locked;
932 	}
933 	error = priv_check(td, PRIV_VFS_MOUNT_PERM);
934 	if (error != 0) {
935 		TARFS_DPF(FS, "%s: not permitted to mount\n", __func__);
936 		goto bad_open_locked;
937 	}
938 	if (flags & O_VERIFY) {
939 		mp->mnt_flag |= MNT_VERIFIED;
940 	}
941 
942 	/* Allocate the tarfs mount */
943 	error = tarfs_alloc_mount(mp, vp, root_uid, root_gid, root_mode, &tmp);
944 	/* vp is now held but unlocked */
945 	if (error != 0) {
946 		TARFS_DPF(FS, "%s: failed to mount %s: %d\n", __func__,
947 		    from, error);
948 		goto bad_open_unlocked;
949 	}
950 	TARFS_DPF(FS, "%s: M: hold %u use %u lock 0x%x\n", __func__,
951 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
952 
953 	/* Unconditionally mount as read-only */
954 	MNT_ILOCK(mp);
955 	mp->mnt_flag |= (MNT_LOCAL | MNT_RDONLY);
956 	MNT_IUNLOCK(mp);
957 
958 	vfs_getnewfsid(mp);
959 	vfs_mountedfrom(mp, "tarfs");
960 	TARFS_DPF(FS, "%s: success\n", __func__);
961 
962 	return (0);
963 
964 bad_open_locked:
965 	/* vp must be held and locked */
966 	TARFS_DPF(FS, "%s: L: hold %u use %u lock 0x%x\n", __func__,
967 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
968 	VOP_UNLOCK(vp);
969 bad_open_unlocked:
970 	/* vp must be held and unlocked */
971 	TARFS_DPF(FS, "%s: E: hold %u use %u lock 0x%x\n", __func__,
972 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
973 	(void)vn_close(vp, flags, td->td_ucred, td);
974 bad:
975 	/* vp must be released and unlocked */
976 	TARFS_DPF(FS, "%s: X: hold %u use %u lock 0x%x\n", __func__,
977 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
978 	return (error);
979 }
980 
981 /*
982  * Unmounts a tarfs filesystem.
983  */
984 static int
985 tarfs_unmount(struct mount *mp, int mntflags)
986 {
987 	struct thread *td = curthread;
988 	struct tarfs_mount *tmp;
989 	struct vnode *vp;
990 	int error;
991 	int flags = 0;
992 
993 	TARFS_DPF(FS, "%s: Unmounting %p\n", __func__, mp);
994 
995 	/* Handle forced unmounts */
996 	if (mntflags & MNT_FORCE)
997 		flags |= FORCECLOSE;
998 
999 	/* Finalize all pending I/O */
1000 	error = vflush(mp, 0, flags, curthread);
1001 	if (error != 0)
1002 		return (error);
1003 	tmp = MP_TO_TARFS_MOUNT(mp);
1004 	vp = tmp->vp;
1005 
1006 	MPASS(vp != NULL);
1007 	TARFS_DPF(FS, "%s: U: hold %u use %u lock 0x%x\n", __func__,
1008 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1009 	vn_close(vp, FREAD, td->td_ucred, td);
1010 	TARFS_DPF(FS, "%s: C: hold %u use %u lock 0x%x\n", __func__,
1011 	    vp->v_holdcnt, vp->v_usecount, VOP_ISLOCKED(vp));
1012 	tarfs_free_mount(tmp);
1013 
1014 	return (0);
1015 }
1016 
1017 /*
1018  * Gets the root of a tarfs filesystem.  Returns 0 on success or a
1019  * positive errno value on failure.
1020  */
1021 static int
1022 tarfs_root(struct mount *mp, int flags, struct vnode **vpp)
1023 {
1024 	struct vnode *nvp;
1025 	int error;
1026 
1027 	TARFS_DPF(FS, "%s: Getting root vnode\n", __func__);
1028 
1029 	error = VFS_VGET(mp, TARFS_ROOTINO, LK_EXCLUSIVE, &nvp);
1030 	if (error != 0)
1031 		return (error);
1032 
1033 	nvp->v_vflag |= VV_ROOT;
1034 	*vpp = nvp;
1035 	return (0);
1036 }
1037 
1038 /*
1039  * Gets statistics for a tarfs filesystem.  Returns 0.
1040  */
1041 static int
1042 tarfs_statfs(struct mount *mp, struct statfs *sbp)
1043 {
1044 	struct tarfs_mount *tmp;
1045 
1046 	tmp = MP_TO_TARFS_MOUNT(mp);
1047 
1048 	sbp->f_bsize = TARFS_BLOCKSIZE;
1049 	sbp->f_iosize = tmp->iosize;
1050 	sbp->f_blocks = tmp->nblocks;
1051 	sbp->f_bfree = 0;
1052 	sbp->f_bavail = 0;
1053 	sbp->f_files = tmp->nfiles;
1054 	sbp->f_ffree = 0;
1055 
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Gets a vnode for the given inode.  On success, a pointer to the vnode
1061  * is stored in vpp.  Returns 0 on success or a positive errno value on
1062  * failure.
1063  */
1064 static int
1065 tarfs_vget(struct mount *mp, ino_t ino, int lkflags, struct vnode **vpp)
1066 {
1067 	struct tarfs_mount *tmp;
1068 	struct tarfs_node *tnp;
1069 	struct thread *td;
1070 	struct vnode *vp;
1071 	int error;
1072 
1073 	TARFS_DPF(FS, "%s: mp %p, ino %lu, lkflags %d\n", __func__, mp, ino,
1074 	    lkflags);
1075 
1076 	td = curthread;
1077 	error = vfs_hash_get(mp, ino, lkflags, td, vpp, NULL, NULL);
1078 	if (error != 0)
1079 		return (error);
1080 
1081 	if (*vpp != NULL) {
1082 		TARFS_DPF(FS, "%s: found hashed vnode %p\n", __func__, *vpp);
1083 		return (error);
1084 	}
1085 
1086 	TARFS_DPF(FS, "%s: no hashed vnode for inode %lu\n", __func__, ino);
1087 
1088 	tmp = MP_TO_TARFS_MOUNT(mp);
1089 
1090 	if (ino == TARFS_ZIOINO) {
1091 		error = vget(tmp->znode, lkflags);
1092 		if (error != 0)
1093 			return (error);
1094 		*vpp = tmp->znode;
1095 		return (0);
1096 	}
1097 
1098 	/* XXX Should use hash instead? */
1099 	TAILQ_FOREACH(tnp, &tmp->allnodes, entries) {
1100 		if (tnp->ino == ino)
1101 			break;
1102 	}
1103 	TARFS_DPF(FS, "%s: search of all nodes found %p\n", __func__, tnp);
1104 	if (tnp == NULL)
1105 		return (ENOENT);
1106 
1107 	error = getnewvnode("tarfs", mp, &tarfs_vnodeops, &vp);
1108 	if (error != 0)
1109 		goto bad;
1110 	TARFS_DPF(FS, "%s: allocated vnode\n", __func__);
1111 	vp->v_data = tnp;
1112 	vp->v_type = tnp->type;
1113 	tnp->vnode = vp;
1114 
1115 	lockmgr(vp->v_vnlock, lkflags, NULL);
1116 	error = insmntque(vp, mp);
1117 	if (error != 0)
1118 		goto bad;
1119 	TARFS_DPF(FS, "%s: inserting entry into VFS hash\n", __func__);
1120 	error = vfs_hash_insert(vp, ino, lkflags, td, vpp, NULL, NULL);
1121 	if (error != 0 || *vpp != NULL)
1122 		return (error);
1123 
1124 	vn_set_state(vp, VSTATE_CONSTRUCTED);
1125 	*vpp = vp;
1126 	return (0);
1127 
1128 bad:
1129 	*vpp = NULLVP;
1130 	return (error);
1131 }
1132 
1133 static int
1134 tarfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
1135 {
1136 	struct tarfs_node *tnp;
1137 	struct tarfs_fid *tfp;
1138 	struct vnode *nvp;
1139 	int error;
1140 
1141 	tfp = (struct tarfs_fid *)fhp;
1142 	MP_TO_TARFS_MOUNT(mp);
1143 	if (tfp->ino < TARFS_ROOTINO || tfp->ino > INT_MAX)
1144 		return (ESTALE);
1145 
1146 	error = VFS_VGET(mp, tfp->ino, LK_EXCLUSIVE, &nvp);
1147 	if (error != 0) {
1148 		*vpp = NULLVP;
1149 		return (error);
1150 	}
1151 	tnp = VP_TO_TARFS_NODE(nvp);
1152 	if (tnp->mode == 0 ||
1153 	    tnp->gen != tfp->gen ||
1154 	    tnp->nlink <= 0) {
1155 		vput(nvp);
1156 		*vpp = NULLVP;
1157 		return (ESTALE);
1158 	}
1159 	*vpp = nvp;
1160 	return (0);
1161 }
1162 
1163 static struct vfsops tarfs_vfsops = {
1164 	.vfs_fhtovp =	tarfs_fhtovp,
1165 	.vfs_mount =	tarfs_mount,
1166 	.vfs_root =	tarfs_root,
1167 	.vfs_statfs =	tarfs_statfs,
1168 	.vfs_unmount =	tarfs_unmount,
1169 	.vfs_vget =	tarfs_vget,
1170 };
1171 VFS_SET(tarfs_vfsops, tarfs, VFCF_READONLY);
1172 MODULE_VERSION(tarfs, 1);
1173 MODULE_DEPEND(tarfs, xz, 1, 1, 1);
1174