xref: /freebsd/sys/fs/tarfs/tarfs_io.c (revision 51015e6d0f570239b0c2088dc6cf2b018928375d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 #include "opt_zstdio.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/counter.h>
35 #include <sys/bio.h>
36 #include <sys/buf.h>
37 #include <sys/malloc.h>
38 #include <sys/mount.h>
39 #include <sys/sysctl.h>
40 #include <sys/uio.h>
41 #include <sys/vnode.h>
42 
43 #if defined(ZSTDIO)
44 #define TARFS_ZIO 1
45 #else
46 #undef TARFS_ZIO
47 #endif
48 
49 #ifdef ZSTDIO
50 #define ZSTD_STATIC_LINKING_ONLY
51 #include <contrib/zstd/lib/zstd.h>
52 #endif
53 
54 #include <fs/tarfs/tarfs.h>
55 #include <fs/tarfs/tarfs_dbg.h>
56 
57 #ifdef TARFS_DEBUG
58 SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
59     "Tar filesystem decompression layer");
60 COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
61 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
62     &tarfs_zio_inflated, "Amount of compressed data inflated.");
63 COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
64 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
65     &tarfs_zio_consumed, "Amount of compressed data consumed.");
66 COUNTER_U64_DEFINE_EARLY(tarfs_zio_bounced);
67 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, bounced, CTLFLAG_RD,
68     &tarfs_zio_bounced, "Amount of decompressed data bounced.");
69 
70 static int
71 tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
72 {
73 	unsigned int tmp;
74 	int error;
75 
76 	tmp = 0;
77 	if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
78 		return (error);
79 	if (req->newptr != NULL) {
80 		if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
81 			return (error);
82 		counter_u64_zero(tarfs_zio_inflated);
83 		counter_u64_zero(tarfs_zio_consumed);
84 		counter_u64_zero(tarfs_zio_bounced);
85 	}
86 	return (0);
87 }
88 
89 SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
90     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
91     NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
92     "Reset compression counters.");
93 #endif
94 
95 MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
96 MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
97 
98 #define XZ_MAGIC		(uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
99 #define ZLIB_MAGIC		(uint8_t[]){ 0x1f, 0x8b, 0x08 }
100 #define ZSTD_MAGIC		(uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
101 
102 #ifdef ZSTDIO
103 struct tarfs_zstd {
104 	ZSTD_DStream *zds;
105 };
106 #endif
107 
108 /* XXX review use of curthread / uio_td / td_cred */
109 
110 /*
111  * Reads from the tar file according to the provided uio.  If the archive
112  * is compressed and raw is false, reads the decompressed stream;
113  * otherwise, reads directly from the original file.  Returns 0 on success
114  * and a positive errno value on failure.
115  */
116 int
117 tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
118 {
119 	void *rl = NULL;
120 	off_t off = uiop->uio_offset;
121 	size_t len = uiop->uio_resid;
122 	int error;
123 
124 	if (raw || tmp->znode == NULL) {
125 		rl = vn_rangelock_rlock(tmp->vp, off, off + len);
126 		error = vn_lock(tmp->vp, LK_SHARED);
127 		if (error == 0) {
128 			error = VOP_READ(tmp->vp, uiop,
129 			    IO_DIRECT|IO_NODELOCKED,
130 			    uiop->uio_td->td_ucred);
131 			VOP_UNLOCK(tmp->vp);
132 		}
133 		vn_rangelock_unlock(tmp->vp, rl);
134 	} else {
135 		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
136 		if (error == 0) {
137 			error = VOP_READ(tmp->znode, uiop,
138 			    IO_DIRECT | IO_NODELOCKED,
139 			    uiop->uio_td->td_ucred);
140 			VOP_UNLOCK(tmp->znode);
141 		}
142 	}
143 	TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
144 	    (size_t)off, len, error, uiop->uio_resid);
145 	return (error);
146 }
147 
148 /*
149  * Reads from the tar file into the provided buffer.  If the archive is
150  * compressed and raw is false, reads the decompressed stream; otherwise,
151  * reads directly from the original file.  Returns the number of bytes
152  * read on success, 0 on EOF, and a negative errno value on failure.
153  */
154 ssize_t
155 tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
156     void *buf, off_t off, size_t len)
157 {
158 	struct uio auio;
159 	struct iovec aiov;
160 	ssize_t res;
161 	int error;
162 
163 	if (len == 0) {
164 		TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
165 		    (size_t)off, len);
166 		return (0);
167 	}
168 	aiov.iov_base = buf;
169 	aiov.iov_len = len;
170 	auio.uio_iov = &aiov;
171 	auio.uio_iovcnt = 1;
172 	auio.uio_offset = off;
173 	auio.uio_segflg = UIO_SYSSPACE;
174 	auio.uio_rw = UIO_READ;
175 	auio.uio_resid = len;
176 	auio.uio_td = curthread;
177 	error = tarfs_io_read(tmp, raw, &auio);
178 	if (error != 0) {
179 		TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
180 		    (size_t)off, len, error);
181 		return (-error);
182 	}
183 	res = len - auio.uio_resid;
184 	if (res == 0 && len != 0) {
185 		TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
186 		    (size_t)off, len);
187 	} else {
188 		TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
189 		    (size_t)off, len, res,
190 		    (int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
191 	}
192 	return (res);
193 }
194 
195 #ifdef ZSTDIO
196 static void *
197 tarfs_zstate_alloc(void *opaque, size_t size)
198 {
199 
200 	(void)opaque;
201 	return (malloc(size, M_TARFSZSTATE, M_WAITOK));
202 }
203 #endif
204 
205 #ifdef ZSTDIO
206 static void
207 tarfs_zstate_free(void *opaque, void *address)
208 {
209 
210 	(void)opaque;
211 	free(address, M_TARFSZSTATE);
212 }
213 #endif
214 
215 #ifdef ZSTDIO
216 static ZSTD_customMem tarfs_zstd_mem = {
217 	tarfs_zstate_alloc,
218 	tarfs_zstate_free,
219 	NULL,
220 };
221 #endif
222 
223 #ifdef TARFS_ZIO
224 /*
225  * Updates the decompression frame index, recording the current input and
226  * output offsets in a new index entry, and growing the index if
227  * necessary.
228  */
229 static void
230 tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
231 {
232 
233 	if (++zio->curidx >= zio->nidx) {
234 		if (++zio->nidx > zio->szidx) {
235 			zio->szidx *= 2;
236 			zio->idx = realloc(zio->idx,
237 			    zio->szidx * sizeof(*zio->idx),
238 			    M_TARFSZSTATE, M_ZERO | M_WAITOK);
239 			TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
240 		}
241 		zio->idx[zio->curidx].i = i;
242 		zio->idx[zio->curidx].o = o;
243 		TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
244 		    zio->curidx, (size_t)zio->idx[zio->curidx].i,
245 		    (size_t)zio->idx[zio->curidx].o);
246 	}
247 	MPASS(zio->idx[zio->curidx].i == i);
248 	MPASS(zio->idx[zio->curidx].o == o);
249 }
250 #endif
251 
252 /*
253  * VOP_ACCESS for zio node.
254  */
255 static int
256 tarfs_zaccess(struct vop_access_args *ap)
257 {
258 	struct vnode *vp = ap->a_vp;
259 	struct tarfs_zio *zio = vp->v_data;
260 	struct tarfs_mount *tmp = zio->tmp;
261 	accmode_t accmode = ap->a_accmode;
262 	int error = EPERM;
263 
264 	if (accmode == VREAD) {
265 		error = vn_lock(tmp->vp, LK_SHARED);
266 		if (error == 0) {
267 			error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
268 			VOP_UNLOCK(tmp->vp);
269 		}
270 	}
271 	TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
272 	return (error);
273 }
274 
275 /*
276  * VOP_GETATTR for zio node.
277  */
278 static int
279 tarfs_zgetattr(struct vop_getattr_args *ap)
280 {
281 	struct vattr va;
282 	struct vnode *vp = ap->a_vp;
283 	struct tarfs_zio *zio = vp->v_data;
284 	struct tarfs_mount *tmp = zio->tmp;
285 	struct vattr *vap = ap->a_vap;
286 	int error = 0;
287 
288 	VATTR_NULL(vap);
289 	error = vn_lock(tmp->vp, LK_SHARED);
290 	if (error == 0) {
291 		error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
292 		VOP_UNLOCK(tmp->vp);
293 		if (error == 0) {
294 			vap->va_type = VREG;
295 			vap->va_mode = va.va_mode;
296 			vap->va_nlink = 1;
297 			vap->va_gid = va.va_gid;
298 			vap->va_uid = va.va_uid;
299 			vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
300 			vap->va_fileid = TARFS_ZIOINO;
301 			vap->va_size = zio->idx[zio->nidx - 1].o;
302 			vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
303 			vap->va_atime = va.va_atime;
304 			vap->va_ctime = va.va_ctime;
305 			vap->va_mtime = va.va_mtime;
306 			vap->va_birthtime = tmp->root->birthtime;
307 			vap->va_bytes = va.va_bytes;
308 		}
309 	}
310 	TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
311 	return (error);
312 }
313 
314 #ifdef ZSTDIO
315 /*
316  * VOP_READ for zio node, zstd edition.
317  */
318 static int
319 tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop)
320 {
321 	void *ibuf = NULL, *obuf = NULL, *rl = NULL;
322 	struct uio auio;
323 	struct iovec aiov;
324 	struct tarfs_mount *tmp = zio->tmp;
325 	struct tarfs_zstd *zstd = zio->zstd;
326 	struct thread *td = curthread;
327 	ZSTD_inBuffer zib;
328 	ZSTD_outBuffer zob;
329 	off_t zsize;
330 	off_t ipos, opos;
331 	size_t ilen, olen;
332 	size_t zerror;
333 	off_t off = uiop->uio_offset;
334 	size_t len = uiop->uio_resid;
335 	size_t resid = uiop->uio_resid;
336 	size_t bsize;
337 	int error;
338 	bool reset = false;
339 
340 	/* do we have to rewind? */
341 	if (off < zio->opos) {
342 		while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
343 			zio->curidx--;
344 		reset = true;
345 	}
346 	/* advance to the nearest index entry */
347 	if (off > zio->opos) {
348 		// XXX maybe do a binary search instead
349 		while (zio->curidx < zio->nidx - 1 &&
350 		    off >= zio->idx[zio->curidx + 1].o) {
351 			zio->curidx++;
352 			reset = true;
353 		}
354 	}
355 	/* reset the decompression stream if needed */
356 	if (reset) {
357 		zio->ipos = zio->idx[zio->curidx].i;
358 		zio->opos = zio->idx[zio->curidx].o;
359 		ZSTD_resetDStream(zstd->zds);
360 		TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
361 		    zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
362 	} else {
363 		TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
364 		    (size_t)zio->ipos, (size_t)zio->opos);
365 	}
366 
367 	/*
368 	 * Set up a temporary buffer for compressed data.  Use the size
369 	 * recommended by the zstd library; this is usually 128 kB, but
370 	 * just in case, make sure it's a multiple of the page size and no
371 	 * larger than MAXBSIZE.
372 	 */
373 	bsize = roundup(ZSTD_CStreamOutSize(), PAGE_SIZE);
374 	if (bsize > MAXBSIZE)
375 		bsize = MAXBSIZE;
376 	ibuf = malloc(bsize, M_TEMP, M_WAITOK);
377 	zib.src = NULL;
378 	zib.size = 0;
379 	zib.pos = 0;
380 
381 	/*
382 	 * Set up the decompression buffer.  If the target is not in
383 	 * kernel space, we will have to set up a bounce buffer.
384 	 *
385 	 * TODO: to avoid using a bounce buffer, map destination pages
386 	 * using vm_fault_quick_hold_pages().
387 	 */
388 	MPASS(zio->opos <= off);
389 	MPASS(uiop->uio_iovcnt == 1);
390 	MPASS(uiop->uio_iov->iov_len >= len);
391 	if (uiop->uio_segflg == UIO_SYSSPACE) {
392 		zob.dst = uiop->uio_iov->iov_base;
393 	} else {
394 		TARFS_DPF(BOUNCE, "%s: allocating %zu-byte bounce buffer\n",
395 		    __func__, len);
396 		zob.dst = obuf = malloc(len, M_TEMP, M_WAITOK);
397 	}
398 	zob.size = len;
399 	zob.pos = 0;
400 
401 	/* lock tarball */
402 	rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
403 	error = vn_lock(tmp->vp, LK_SHARED);
404 	if (error != 0) {
405 		goto fail_unlocked;
406 	}
407 	/* check size */
408 	error = vn_getsize_locked(tmp->vp, &zsize, td->td_ucred);
409 	if (error != 0) {
410 		goto fail;
411 	}
412 	if (zio->ipos >= zsize) {
413 		/* beyond EOF */
414 		goto fail;
415 	}
416 
417 	while (resid > 0) {
418 		if (zib.pos == zib.size) {
419 			/* request data from the underlying file */
420 			aiov.iov_base = ibuf;
421 			aiov.iov_len = bsize;
422 			auio.uio_iov = &aiov;
423 			auio.uio_iovcnt = 1;
424 			auio.uio_offset = zio->ipos;
425 			auio.uio_segflg = UIO_SYSSPACE;
426 			auio.uio_rw = UIO_READ;
427 			auio.uio_resid = aiov.iov_len;
428 			auio.uio_td = td;
429 			error = VOP_READ(tmp->vp, &auio,
430 			    IO_DIRECT | IO_NODELOCKED,
431 			    td->td_ucred);
432 			if (error != 0)
433 				goto fail;
434 			TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
435 			    (size_t)zio->ipos, bsize,
436 			    (size_t)zio->ipos, bsize - auio.uio_resid);
437 			zib.src = ibuf;
438 			zib.size = bsize - auio.uio_resid;
439 			zib.pos = 0;
440 		}
441 		MPASS(zib.pos <= zib.size);
442 		if (zib.pos == zib.size) {
443 			TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
444 			    (size_t)zio->ipos, (size_t)zio->opos);
445 			goto fail;
446 		}
447 		if (zio->opos < off) {
448 			/* to be discarded */
449 			zob.size = min(off - zio->opos, len);
450 			zob.pos = 0;
451 		} else {
452 			zob.size = len;
453 			zob.pos = zio->opos - off;
454 		}
455 		ipos = zib.pos;
456 		opos = zob.pos;
457 		/* decompress as much as possible */
458 		zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
459 		zio->ipos += ilen = zib.pos - ipos;
460 		zio->opos += olen = zob.pos - opos;
461 		if (zio->opos > off)
462 			resid -= olen;
463 		if (ZSTD_isError(zerror)) {
464 			TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
465 			    (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
466 			error = EIO;
467 			goto fail;
468 		}
469 		if (zerror == 0 && olen == 0) {
470 			TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
471 			    (size_t)zio->ipos, (size_t)zio->opos);
472 			break;
473 		}
474 		if (zerror == 0) {
475 			TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
476 			    (size_t)zio->ipos, (size_t)zio->opos);
477 			tarfs_zio_update_index(zio, zio->ipos, zio->opos);
478 		}
479 		TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
480 #ifdef TARFS_DEBUG
481 		counter_u64_add(tarfs_zio_inflated, olen);
482 #endif
483 	}
484 fail:
485 	VOP_UNLOCK(tmp->vp);
486 fail_unlocked:
487 	if (error == 0) {
488 		if (uiop->uio_segflg == UIO_SYSSPACE) {
489 			uiop->uio_resid = resid;
490 		} else if (len > resid) {
491 			TARFS_DPF(BOUNCE, "%s: bounced %zu bytes\n", __func__,
492 			    len - resid);
493 			error = uiomove(obuf, len - resid, uiop);
494 #ifdef TARFS_DEBUG
495 			counter_u64_add(tarfs_zio_bounced, len - resid);
496 #endif
497 		}
498 	}
499 	if (obuf != NULL) {
500 		TARFS_DPF(BOUNCE, "%s: freeing bounce buffer\n", __func__);
501 		free(obuf, M_TEMP);
502 	}
503 	if (rl != NULL)
504 		vn_rangelock_unlock(tmp->vp, rl);
505 	if (ibuf != NULL)
506 		free(ibuf, M_TEMP);
507 	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
508 	    (size_t)off, len, error, uiop->uio_resid);
509 #ifdef TARFS_DEBUG
510 	counter_u64_add(tarfs_zio_consumed, len - uiop->uio_resid);
511 #endif
512 	if (error != 0) {
513 		zio->curidx = 0;
514 		zio->ipos = zio->idx[0].i;
515 		zio->opos = zio->idx[0].o;
516 		ZSTD_resetDStream(zstd->zds);
517 	}
518 	return (error);
519 }
520 #endif
521 
522 /*
523  * VOP_READ for zio node.
524  */
525 static int
526 tarfs_zread(struct vop_read_args *ap)
527 {
528 #if defined(TARFS_DEBUG) || defined(ZSTDIO)
529 	struct vnode *vp = ap->a_vp;
530 	struct tarfs_zio *zio = vp->v_data;
531 	struct uio *uiop = ap->a_uio;
532 #endif
533 #ifdef TARFS_DEBUG
534 	off_t off = uiop->uio_offset;
535 	size_t len = uiop->uio_resid;
536 #endif
537 	int error;
538 
539 	TARFS_DPF(ZIO, "%s(%zu, %zu)\n", __func__,
540 	    (size_t)off, len);
541 #ifdef ZSTDIO
542 	if (zio->zstd != NULL) {
543 		error = tarfs_zread_zstd(zio, uiop);
544 	} else
545 #endif
546 		error = EFTYPE;
547 	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
548 	    (size_t)off, len, error, uiop->uio_resid);
549 	return (error);
550 }
551 
552 /*
553  * VOP_RECLAIM for zio node.
554  */
555 static int
556 tarfs_zreclaim(struct vop_reclaim_args *ap)
557 {
558 	struct vnode *vp = ap->a_vp;
559 
560 	TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
561 	vp->v_data = NULL;
562 	return (0);
563 }
564 
565 /*
566  * VOP_STRATEGY for zio node.
567  */
568 static int
569 tarfs_zstrategy(struct vop_strategy_args *ap)
570 {
571 	struct uio auio;
572 	struct iovec iov;
573 	struct vnode *vp = ap->a_vp;
574 	struct buf *bp = ap->a_bp;
575 	off_t off;
576 	size_t len;
577 	int error;
578 
579 	iov.iov_base = bp->b_data;
580 	iov.iov_len = bp->b_bcount;
581 	off = bp->b_iooffset;
582 	len = bp->b_bcount;
583 	bp->b_resid = len;
584 	auio.uio_iov = &iov;
585 	auio.uio_iovcnt = 1;
586 	auio.uio_offset = off;
587 	auio.uio_resid = len;
588 	auio.uio_segflg = UIO_SYSSPACE;
589 	auio.uio_rw = UIO_READ;
590 	auio.uio_td = curthread;
591 	error = VOP_READ(vp, &auio, IO_DIRECT | IO_NODELOCKED, bp->b_rcred);
592 	bp->b_flags |= B_DONE;
593 	if (error != 0) {
594 		bp->b_ioflags |= BIO_ERROR;
595 		bp->b_error = error;
596 	}
597 	return (0);
598 }
599 
600 static struct vop_vector tarfs_znodeops = {
601 	.vop_default =		&default_vnodeops,
602 
603 	.vop_access =		tarfs_zaccess,
604 	.vop_getattr =		tarfs_zgetattr,
605 	.vop_read =		tarfs_zread,
606 	.vop_reclaim =		tarfs_zreclaim,
607 	.vop_strategy =		tarfs_zstrategy,
608 };
609 VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
610 
611 #ifdef TARFS_ZIO
612 /*
613  * Initializes the decompression layer.
614  */
615 static struct tarfs_zio *
616 tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
617 {
618 	struct tarfs_zio *zio;
619 	struct vnode *zvp;
620 
621 	zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
622 	TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
623 	zio->tmp = tmp;
624 	zio->szidx = 128;
625 	zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
626 	    M_ZERO | M_WAITOK);
627 	zio->curidx = 0;
628 	zio->nidx = 1;
629 	zio->idx[zio->curidx].i = zio->ipos = i;
630 	zio->idx[zio->curidx].o = zio->opos = o;
631 	tmp->zio = zio;
632 	TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
633 	(void)getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp);
634 	zvp->v_data = zio;
635 	zvp->v_type = VREG;
636 	zvp->v_mount = tmp->vfs;
637 	vn_set_state(zvp, VSTATE_CONSTRUCTED);
638 	tmp->znode = zvp;
639 	TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
640 	return (zio);
641 }
642 #endif
643 
644 /*
645  * Initializes the I/O layer, including decompression if the signature of
646  * a supported compression format is detected.  Returns 0 on success and a
647  * positive errno value on failure.
648  */
649 int
650 tarfs_io_init(struct tarfs_mount *tmp)
651 {
652 	uint8_t *block;
653 #ifdef TARFS_ZIO
654 	struct tarfs_zio *zio = NULL;
655 #endif
656 	ssize_t res;
657 	int error = 0;
658 
659 	block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
660 	res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
661 	if (res < 0) {
662 		return (-res);
663 	}
664 	if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
665 		printf("xz compression not supported\n");
666 		error = EOPNOTSUPP;
667 		goto bad;
668 	} else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
669 		printf("zlib compression not supported\n");
670 		error = EOPNOTSUPP;
671 		goto bad;
672 	} else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
673 #ifdef ZSTDIO
674 		zio = tarfs_zio_init(tmp, 0, 0);
675 		zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
676 		zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
677 		(void)ZSTD_initDStream(zio->zstd->zds);
678 #else
679 		printf("zstd compression not supported\n");
680 		error = EOPNOTSUPP;
681 		goto bad;
682 #endif
683 	}
684 bad:
685 	free(block, M_TEMP);
686 	return (error);
687 }
688 
689 #ifdef TARFS_ZIO
690 /*
691  * Tears down the decompression layer.
692  */
693 static int
694 tarfs_zio_fini(struct tarfs_mount *tmp)
695 {
696 	struct tarfs_zio *zio = tmp->zio;
697 	int error = 0;
698 
699 	if (tmp->znode != NULL) {
700 		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
701 		if (error != 0) {
702 			TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
703 			return (error);
704 		}
705 		tmp->znode->v_mount = NULL;
706 		vgone(tmp->znode);
707 		vput(tmp->znode);
708 		tmp->znode = NULL;
709 	}
710 #ifdef ZSTDIO
711 	if (zio->zstd != NULL) {
712 		TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
713 		ZSTD_freeDStream(zio->zstd->zds);
714 		free(zio->zstd, M_TARFSZSTATE);
715 	}
716 #endif
717 	if (zio->idx != NULL) {
718 		TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
719 		free(zio->idx, M_TARFSZSTATE);
720 	}
721 	TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
722 	free(zio, M_TARFSZSTATE);
723 	tmp->zio = NULL;
724 	return (error);
725 }
726 #endif
727 
728 /*
729  * Tears down the I/O layer, including the decompression layer if
730  * applicable.
731  */
732 int
733 tarfs_io_fini(struct tarfs_mount *tmp)
734 {
735 	int error = 0;
736 
737 #ifdef TARFS_ZIO
738 	if (tmp->zio != NULL) {
739 		error = tarfs_zio_fini(tmp);
740 	}
741 #endif
742 	return (error);
743 }
744