xref: /freebsd/sys/fs/tarfs/tarfs_io.c (revision f374ba41f55c1a127303d92d830dd58eef2f5243)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 Juniper Networks, Inc.
5  * Copyright (c) 2022-2023 Klara, Inc.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_tarfs.h"
30 #include "opt_zstdio.h"
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/counter.h>
35 #include <sys/bio.h>
36 #include <sys/buf.h>
37 #include <sys/malloc.h>
38 #include <sys/mount.h>
39 #include <sys/sysctl.h>
40 #include <sys/uio.h>
41 #include <sys/vnode.h>
42 
43 #ifdef ZSTDIO
44 #define ZSTD_STATIC_LINKING_ONLY
45 #include <contrib/zstd/lib/zstd.h>
46 #endif
47 
48 #include <fs/tarfs/tarfs.h>
49 #include <fs/tarfs/tarfs_dbg.h>
50 
51 #ifdef TARFS_DEBUG
52 SYSCTL_NODE(_vfs_tarfs, OID_AUTO, zio, CTLFLAG_RD, 0,
53     "Tar filesystem decompression layer");
54 COUNTER_U64_DEFINE_EARLY(tarfs_zio_inflated);
55 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, inflated, CTLFLAG_RD,
56     &tarfs_zio_inflated, "Amount of compressed data inflated.");
57 COUNTER_U64_DEFINE_EARLY(tarfs_zio_consumed);
58 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, consumed, CTLFLAG_RD,
59     &tarfs_zio_consumed, "Amount of compressed data consumed.");
60 COUNTER_U64_DEFINE_EARLY(tarfs_zio_bounced);
61 SYSCTL_COUNTER_U64(_vfs_tarfs_zio, OID_AUTO, bounced, CTLFLAG_RD,
62     &tarfs_zio_bounced, "Amount of decompressed data bounced.");
63 
64 static int
65 tarfs_sysctl_handle_zio_reset(SYSCTL_HANDLER_ARGS)
66 {
67 	unsigned int tmp;
68 	int error;
69 
70 	tmp = 0;
71 	if ((error = SYSCTL_OUT(req, &tmp, sizeof(tmp))) != 0)
72 		return (error);
73 	if (req->newptr != NULL) {
74 		if ((error = SYSCTL_IN(req, &tmp, sizeof(tmp))) != 0)
75 			return (error);
76 		counter_u64_zero(tarfs_zio_inflated);
77 		counter_u64_zero(tarfs_zio_consumed);
78 		counter_u64_zero(tarfs_zio_bounced);
79 	}
80 	return (0);
81 }
82 
83 SYSCTL_PROC(_vfs_tarfs_zio, OID_AUTO, reset,
84     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW,
85     NULL, 0, tarfs_sysctl_handle_zio_reset, "IU",
86     "Reset compression counters.");
87 #endif
88 
89 MALLOC_DEFINE(M_TARFSZSTATE, "tarfs zstate", "tarfs decompression state");
90 MALLOC_DEFINE(M_TARFSZBUF, "tarfs zbuf", "tarfs decompression buffers");
91 
92 #define XZ_MAGIC		(uint8_t[]){ 0xfd, 0x37, 0x7a, 0x58, 0x5a }
93 #define ZLIB_MAGIC		(uint8_t[]){ 0x1f, 0x8b, 0x08 }
94 #define ZSTD_MAGIC		(uint8_t[]){ 0x28, 0xb5, 0x2f, 0xfd }
95 
96 #ifdef ZSTDIO
97 struct tarfs_zstd {
98 	ZSTD_DStream *zds;
99 };
100 #endif
101 
102 /* XXX review use of curthread / uio_td / td_cred */
103 
104 /*
105  * Reads from the tar file according to the provided uio.  If the archive
106  * is compressed and raw is false, reads the decompressed stream;
107  * otherwise, reads directly from the original file.  Returns 0 on success
108  * and a positive errno value on failure.
109  */
110 int
111 tarfs_io_read(struct tarfs_mount *tmp, bool raw, struct uio *uiop)
112 {
113 	void *rl = NULL;
114 	off_t off = uiop->uio_offset;
115 	size_t len = uiop->uio_resid;
116 	int error;
117 
118 	if (raw || tmp->znode == NULL) {
119 		rl = vn_rangelock_rlock(tmp->vp, off, off + len);
120 		error = vn_lock(tmp->vp, LK_SHARED);
121 		if (error == 0) {
122 			error = VOP_READ(tmp->vp, uiop,
123 			    IO_DIRECT|IO_NODELOCKED,
124 			    uiop->uio_td->td_ucred);
125 			VOP_UNLOCK(tmp->vp);
126 		}
127 		vn_rangelock_unlock(tmp->vp, rl);
128 	} else {
129 		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
130 		if (error == 0) {
131 			error = VOP_READ(tmp->znode, uiop,
132 			    IO_DIRECT | IO_NODELOCKED,
133 			    uiop->uio_td->td_ucred);
134 			VOP_UNLOCK(tmp->znode);
135 		}
136 	}
137 	TARFS_DPF(IO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
138 	    (size_t)off, len, error, uiop->uio_resid);
139 	return (error);
140 }
141 
142 /*
143  * Reads from the tar file into the provided buffer.  If the archive is
144  * compressed and raw is false, reads the decompressed stream; otherwise,
145  * reads directly from the original file.  Returns the number of bytes
146  * read on success, 0 on EOF, and a negative errno value on failure.
147  */
148 ssize_t
149 tarfs_io_read_buf(struct tarfs_mount *tmp, bool raw,
150     void *buf, off_t off, size_t len)
151 {
152 	struct uio auio;
153 	struct iovec aiov;
154 	ssize_t res;
155 	int error;
156 
157 	if (len == 0) {
158 		TARFS_DPF(IO, "%s(%zu, %zu) null\n", __func__,
159 		    (size_t)off, len);
160 		return (0);
161 	}
162 	aiov.iov_base = buf;
163 	aiov.iov_len = len;
164 	auio.uio_iov = &aiov;
165 	auio.uio_iovcnt = 1;
166 	auio.uio_offset = off;
167 	auio.uio_segflg = UIO_SYSSPACE;
168 	auio.uio_rw = UIO_READ;
169 	auio.uio_resid = len;
170 	auio.uio_td = curthread;
171 	error = tarfs_io_read(tmp, raw, &auio);
172 	if (error != 0) {
173 		TARFS_DPF(IO, "%s(%zu, %zu) error %d\n", __func__,
174 		    (size_t)off, len, error);
175 		return (-error);
176 	}
177 	res = len - auio.uio_resid;
178 	if (res == 0 && len != 0) {
179 		TARFS_DPF(IO, "%s(%zu, %zu) eof\n", __func__,
180 		    (size_t)off, len);
181 	} else {
182 		TARFS_DPF(IO, "%s(%zu, %zu) read %zd | %*D\n", __func__,
183 		    (size_t)off, len, res,
184 		    (int)(res > 8 ? 8 : res), (uint8_t *)buf, " ");
185 	}
186 	return (res);
187 }
188 
189 #ifdef ZSTDIO
190 static void *
191 tarfs_zstate_alloc(void *opaque, size_t size)
192 {
193 
194 	(void)opaque;
195 	return (malloc(size, M_TARFSZSTATE, M_WAITOK));
196 }
197 #endif
198 
199 #ifdef ZSTDIO
200 static void
201 tarfs_zstate_free(void *opaque, void *address)
202 {
203 
204 	(void)opaque;
205 	free(address, M_TARFSZSTATE);
206 }
207 #endif
208 
209 #ifdef ZSTDIO
210 static ZSTD_customMem tarfs_zstd_mem = {
211 	tarfs_zstate_alloc,
212 	tarfs_zstate_free,
213 	NULL,
214 };
215 #endif
216 
217 /*
218  * Updates the decompression frame index, recording the current input and
219  * output offsets in a new index entry, and growing the index if
220  * necessary.
221  */
222 static void
223 tarfs_zio_update_index(struct tarfs_zio *zio, off_t i, off_t o)
224 {
225 
226 	if (++zio->curidx >= zio->nidx) {
227 		if (++zio->nidx > zio->szidx) {
228 			zio->szidx *= 2;
229 			zio->idx = realloc(zio->idx,
230 			    zio->szidx * sizeof(*zio->idx),
231 			    M_TARFSZSTATE, M_ZERO | M_WAITOK);
232 			TARFS_DPF(ALLOC, "%s: resized zio index\n", __func__);
233 		}
234 		zio->idx[zio->curidx].i = i;
235 		zio->idx[zio->curidx].o = o;
236 		TARFS_DPF(ZIDX, "%s: index %u = i %zu o %zu\n", __func__,
237 		    zio->curidx, (size_t)zio->idx[zio->curidx].i,
238 		    (size_t)zio->idx[zio->curidx].o);
239 	}
240 	MPASS(zio->idx[zio->curidx].i == i);
241 	MPASS(zio->idx[zio->curidx].o == o);
242 }
243 
244 /*
245  * VOP_ACCESS for zio node.
246  */
247 static int
248 tarfs_zaccess(struct vop_access_args *ap)
249 {
250 	struct vnode *vp = ap->a_vp;
251 	struct tarfs_zio *zio = vp->v_data;
252 	struct tarfs_mount *tmp = zio->tmp;
253 	accmode_t accmode = ap->a_accmode;
254 	int error = EPERM;
255 
256 	if (accmode == VREAD) {
257 		error = vn_lock(tmp->vp, LK_SHARED);
258 		if (error == 0) {
259 			error = VOP_ACCESS(tmp->vp, accmode, ap->a_cred, ap->a_td);
260 			VOP_UNLOCK(tmp->vp);
261 		}
262 	}
263 	TARFS_DPF(ZIO, "%s(%d) = %d\n", __func__, accmode, error);
264 	return (error);
265 }
266 
267 /*
268  * VOP_GETATTR for zio node.
269  */
270 static int
271 tarfs_zgetattr(struct vop_getattr_args *ap)
272 {
273 	struct vattr va;
274 	struct vnode *vp = ap->a_vp;
275 	struct tarfs_zio *zio = vp->v_data;
276 	struct tarfs_mount *tmp = zio->tmp;
277 	struct vattr *vap = ap->a_vap;
278 	int error = 0;
279 
280 	VATTR_NULL(vap);
281 	error = vn_lock(tmp->vp, LK_SHARED);
282 	if (error == 0) {
283 		error = VOP_GETATTR(tmp->vp, &va, ap->a_cred);
284 		VOP_UNLOCK(tmp->vp);
285 		if (error == 0) {
286 			vap->va_type = VREG;
287 			vap->va_mode = va.va_mode;
288 			vap->va_nlink = 1;
289 			vap->va_gid = va.va_gid;
290 			vap->va_uid = va.va_uid;
291 			vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
292 			vap->va_fileid = TARFS_ZIOINO;
293 			vap->va_size = zio->idx[zio->nidx - 1].o;
294 			vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
295 			vap->va_atime = va.va_atime;
296 			vap->va_ctime = va.va_ctime;
297 			vap->va_mtime = va.va_mtime;
298 			vap->va_birthtime = tmp->root->birthtime;
299 			vap->va_bytes = va.va_bytes;
300 		}
301 	}
302 	TARFS_DPF(ZIO, "%s() = %d\n", __func__, error);
303 	return (error);
304 }
305 
306 #ifdef ZSTDIO
307 /*
308  * VOP_READ for zio node, zstd edition.
309  */
310 static int
311 tarfs_zread_zstd(struct tarfs_zio *zio, struct uio *uiop)
312 {
313 	void *ibuf = NULL, *obuf = NULL, *rl = NULL;
314 	struct uio auio;
315 	struct iovec aiov;
316 	struct tarfs_mount *tmp = zio->tmp;
317 	struct tarfs_zstd *zstd = zio->zstd;
318 	struct thread *td = curthread;
319 	ZSTD_inBuffer zib;
320 	ZSTD_outBuffer zob;
321 	off_t zsize;
322 	off_t ipos, opos;
323 	size_t ilen, olen;
324 	size_t zerror;
325 	off_t off = uiop->uio_offset;
326 	size_t len = uiop->uio_resid;
327 	size_t resid = uiop->uio_resid;
328 	size_t bsize;
329 	int error;
330 	bool reset = false;
331 
332 	/* do we have to rewind? */
333 	if (off < zio->opos) {
334 		while (zio->curidx > 0 && off < zio->idx[zio->curidx].o)
335 			zio->curidx--;
336 		reset = true;
337 	}
338 	/* advance to the nearest index entry */
339 	if (off > zio->opos) {
340 		// XXX maybe do a binary search instead
341 		while (zio->curidx < zio->nidx - 1 &&
342 		    off >= zio->idx[zio->curidx + 1].o) {
343 			zio->curidx++;
344 			reset = true;
345 		}
346 	}
347 	/* reset the decompression stream if needed */
348 	if (reset) {
349 		zio->ipos = zio->idx[zio->curidx].i;
350 		zio->opos = zio->idx[zio->curidx].o;
351 		ZSTD_resetDStream(zstd->zds);
352 		TARFS_DPF(ZIDX, "%s: skipping to index %u = i %zu o %zu\n", __func__,
353 		    zio->curidx, (size_t)zio->ipos, (size_t)zio->opos);
354 	} else {
355 		TARFS_DPF(ZIDX, "%s: continuing at i %zu o %zu\n", __func__,
356 		    (size_t)zio->ipos, (size_t)zio->opos);
357 	}
358 
359 	/*
360 	 * Set up a temporary buffer for compressed data.  Use the size
361 	 * recommended by the zstd library; this is usually 128 kB, but
362 	 * just in case, make sure it's a multiple of the page size and no
363 	 * larger than MAXBSIZE.
364 	 */
365 	bsize = roundup(ZSTD_CStreamOutSize(), PAGE_SIZE);
366 	if (bsize > MAXBSIZE)
367 		bsize = MAXBSIZE;
368 	ibuf = malloc(bsize, M_TEMP, M_WAITOK);
369 	zib.src = NULL;
370 	zib.size = 0;
371 	zib.pos = 0;
372 
373 	/*
374 	 * Set up the decompression buffer.  If the target is not in
375 	 * kernel space, we will have to set up a bounce buffer.
376 	 *
377 	 * TODO: to avoid using a bounce buffer, map destination pages
378 	 * using vm_fault_quick_hold_pages().
379 	 */
380 	MPASS(zio->opos <= off);
381 	MPASS(uiop->uio_iovcnt == 1);
382 	MPASS(uiop->uio_iov->iov_len >= len);
383 	if (uiop->uio_segflg == UIO_SYSSPACE) {
384 		zob.dst = uiop->uio_iov->iov_base;
385 	} else {
386 		TARFS_DPF(ALLOC, "%s: allocating %zu-byte bounce buffer\n",
387 		    __func__, len);
388 		zob.dst = obuf = malloc(len, M_TEMP, M_WAITOK);
389 	}
390 	zob.size = len;
391 	zob.pos = 0;
392 
393 	/* lock tarball */
394 	rl = vn_rangelock_rlock(tmp->vp, zio->ipos, OFF_MAX);
395 	error = vn_lock(tmp->vp, LK_SHARED);
396 	if (error != 0) {
397 		goto fail_unlocked;
398 	}
399 	/* check size */
400 	error = vn_getsize_locked(tmp->vp, &zsize, td->td_ucred);
401 	if (error != 0) {
402 		goto fail;
403 	}
404 	if (zio->ipos >= zsize) {
405 		/* beyond EOF */
406 		goto fail;
407 	}
408 
409 	while (resid > 0) {
410 		if (zib.pos == zib.size) {
411 			/* request data from the underlying file */
412 			aiov.iov_base = ibuf;
413 			aiov.iov_len = bsize;
414 			auio.uio_iov = &aiov;
415 			auio.uio_iovcnt = 1;
416 			auio.uio_offset = zio->ipos;
417 			auio.uio_segflg = UIO_SYSSPACE;
418 			auio.uio_rw = UIO_READ;
419 			auio.uio_resid = aiov.iov_len;
420 			auio.uio_td = td;
421 			error = VOP_READ(tmp->vp, &auio,
422 			    IO_DIRECT | IO_NODELOCKED,
423 			    td->td_ucred);
424 			if (error != 0)
425 				goto fail;
426 			TARFS_DPF(ZIO, "%s: req %zu+%zu got %zu+%zu\n", __func__,
427 			    (size_t)zio->ipos, bsize,
428 			    (size_t)zio->ipos, bsize - auio.uio_resid);
429 			zib.src = ibuf;
430 			zib.size = bsize - auio.uio_resid;
431 			zib.pos = 0;
432 		}
433 		MPASS(zib.pos <= zib.size);
434 		if (zib.pos == zib.size) {
435 			TARFS_DPF(ZIO, "%s: end of file after i %zu o %zu\n", __func__,
436 			    (size_t)zio->ipos, (size_t)zio->opos);
437 			goto fail;
438 		}
439 		if (zio->opos < off) {
440 			/* to be discarded */
441 			zob.size = min(off - zio->opos, len);
442 			zob.pos = 0;
443 		} else {
444 			zob.size = len;
445 			zob.pos = zio->opos - off;
446 		}
447 		ipos = zib.pos;
448 		opos = zob.pos;
449 		/* decompress as much as possible */
450 		zerror = ZSTD_decompressStream(zstd->zds, &zob, &zib);
451 		zio->ipos += ilen = zib.pos - ipos;
452 		zio->opos += olen = zob.pos - opos;
453 		if (zio->opos > off)
454 			resid -= olen;
455 		if (ZSTD_isError(zerror)) {
456 			TARFS_DPF(ZIO, "%s: inflate failed after i %zu o %zu: %s\n", __func__,
457 			    (size_t)zio->ipos, (size_t)zio->opos, ZSTD_getErrorName(zerror));
458 			error = EIO;
459 			goto fail;
460 		}
461 		if (zerror == 0 && olen == 0) {
462 			TARFS_DPF(ZIO, "%s: end of stream after i %zu o %zu\n", __func__,
463 			    (size_t)zio->ipos, (size_t)zio->opos);
464 			break;
465 		}
466 		if (zerror == 0) {
467 			TARFS_DPF(ZIO, "%s: end of frame after i %zu o %zu\n", __func__,
468 			    (size_t)zio->ipos, (size_t)zio->opos);
469 			tarfs_zio_update_index(zio, zio->ipos, zio->opos);
470 		}
471 		TARFS_DPF(ZIO, "%s: inflated %zu\n", __func__, olen);
472 #ifdef TARFS_DEBUG
473 		counter_u64_add(tarfs_zio_inflated, olen);
474 #endif
475 	}
476 fail:
477 	VOP_UNLOCK(tmp->vp);
478 fail_unlocked:
479 	if (error == 0) {
480 		if (uiop->uio_segflg == UIO_SYSSPACE) {
481 			uiop->uio_resid = resid;
482 		} else if (len > resid) {
483 			TARFS_DPF(ALLOC, "%s: bounced %zu bytes\n", __func__,
484 			    len - resid);
485 			error = uiomove(obuf, len - resid, uiop);
486 #ifdef TARFS_DEBUG
487 			counter_u64_add(tarfs_zio_bounced, len - resid);
488 #endif
489 		}
490 	}
491 	if (obuf != NULL) {
492 		TARFS_DPF(ALLOC, "%s: freeing bounce buffer\n", __func__);
493 		free(obuf, M_TEMP);
494 	}
495 	if (rl != NULL)
496 		vn_rangelock_unlock(tmp->vp, rl);
497 	if (ibuf != NULL)
498 		free(ibuf, M_TEMP);
499 	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
500 	    (size_t)off, len, error, uiop->uio_resid);
501 #ifdef TARFS_DEBUG
502 	counter_u64_add(tarfs_zio_consumed, len - uiop->uio_resid);
503 #endif
504 	if (error != 0) {
505 		zio->curidx = 0;
506 		zio->ipos = zio->idx[0].i;
507 		zio->opos = zio->idx[0].o;
508 		ZSTD_resetDStream(zstd->zds);
509 	}
510 	return (error);
511 }
512 #endif
513 
514 /*
515  * VOP_READ for zio node.
516  */
517 static int
518 tarfs_zread(struct vop_read_args *ap)
519 {
520 #if defined(TARFS_DEBUG) || defined(ZSTDIO)
521 	struct vnode *vp = ap->a_vp;
522 	struct tarfs_zio *zio = vp->v_data;
523 	struct uio *uiop = ap->a_uio;
524 #endif
525 #ifdef TARFS_DEBUG
526 	off_t off = uiop->uio_offset;
527 	size_t len = uiop->uio_resid;
528 #endif
529 	int error;
530 
531 	TARFS_DPF(ZIO, "%s(%zu, %zu)\n", __func__,
532 	    (size_t)off, len);
533 #ifdef ZSTDIO
534 	if (zio->zstd != NULL) {
535 		error = tarfs_zread_zstd(zio, uiop);
536 	} else
537 #endif
538 		error = EFTYPE;
539 	TARFS_DPF(ZIO, "%s(%zu, %zu) = %d (resid %zd)\n", __func__,
540 	    (size_t)off, len, error, uiop->uio_resid);
541 	return (error);
542 }
543 
544 /*
545  * VOP_RECLAIM for zio node.
546  */
547 static int
548 tarfs_zreclaim(struct vop_reclaim_args *ap)
549 {
550 	struct vnode *vp = ap->a_vp;
551 
552 	TARFS_DPF(ZIO, "%s(%p)\n", __func__, vp);
553 	vp->v_data = NULL;
554 	return (0);
555 }
556 
557 /*
558  * VOP_STRATEGY for zio node.
559  */
560 static int
561 tarfs_zstrategy(struct vop_strategy_args *ap)
562 {
563 	struct uio auio;
564 	struct iovec iov;
565 	struct vnode *vp = ap->a_vp;
566 	struct buf *bp = ap->a_bp;
567 	off_t off;
568 	size_t len;
569 	int error;
570 
571 	iov.iov_base = bp->b_data;
572 	iov.iov_len = bp->b_bcount;
573 	off = bp->b_iooffset;
574 	len = bp->b_bcount;
575 	bp->b_resid = len;
576 	auio.uio_iov = &iov;
577 	auio.uio_iovcnt = 1;
578 	auio.uio_offset = off;
579 	auio.uio_resid = len;
580 	auio.uio_segflg = UIO_SYSSPACE;
581 	auio.uio_rw = UIO_READ;
582 	auio.uio_td = curthread;
583 	error = VOP_READ(vp, &auio, IO_DIRECT | IO_NODELOCKED, bp->b_rcred);
584 	bp->b_flags |= B_DONE;
585 	if (error != 0) {
586 		bp->b_ioflags |= BIO_ERROR;
587 		bp->b_error = error;
588 	}
589 	return (0);
590 }
591 
592 static struct vop_vector tarfs_znodeops = {
593 	.vop_default =		&default_vnodeops,
594 
595 	.vop_access =		tarfs_zaccess,
596 	.vop_getattr =		tarfs_zgetattr,
597 	.vop_read =		tarfs_zread,
598 	.vop_reclaim =		tarfs_zreclaim,
599 	.vop_strategy =		tarfs_zstrategy,
600 };
601 VFS_VOP_VECTOR_REGISTER(tarfs_znodeops);
602 
603 /*
604  * Initializes the decompression layer.
605  */
606 static struct tarfs_zio *
607 tarfs_zio_init(struct tarfs_mount *tmp, off_t i, off_t o)
608 {
609 	struct tarfs_zio *zio;
610 	struct vnode *zvp;
611 
612 	zio = malloc(sizeof(*zio), M_TARFSZSTATE, M_ZERO | M_WAITOK);
613 	TARFS_DPF(ALLOC, "%s: allocated zio\n", __func__);
614 	zio->tmp = tmp;
615 	zio->szidx = 128;
616 	zio->idx = malloc(zio->szidx * sizeof(*zio->idx), M_TARFSZSTATE,
617 	    M_ZERO | M_WAITOK);
618 	zio->curidx = 0;
619 	zio->nidx = 1;
620 	zio->idx[zio->curidx].i = zio->ipos = i;
621 	zio->idx[zio->curidx].o = zio->opos = o;
622 	tmp->zio = zio;
623 	TARFS_DPF(ALLOC, "%s: allocated zio index\n", __func__);
624 	getnewvnode("tarfsz", tmp->vfs, &tarfs_znodeops, &zvp);
625 	zvp->v_data = zio;
626 	zvp->v_type = VREG;
627 	zvp->v_mount = tmp->vfs;
628 	vn_set_state(zvp, VSTATE_CONSTRUCTED);
629 	tmp->znode = zvp;
630 	TARFS_DPF(ZIO, "%s: created zio node\n", __func__);
631 	return (zio);
632 }
633 
634 /*
635  * Initializes the I/O layer, including decompression if the signature of
636  * a supported compression format is detected.  Returns 0 on success and a
637  * positive errno value on failure.
638  */
639 int
640 tarfs_io_init(struct tarfs_mount *tmp)
641 {
642 	uint8_t *block;
643 #ifdef ZSTDIO
644 	struct tarfs_zio *zio = NULL;
645 #endif
646 	ssize_t res;
647 	int error = 0;
648 
649 	block = malloc(tmp->iosize, M_TEMP, M_ZERO | M_WAITOK);
650 	res = tarfs_io_read_buf(tmp, true, block, 0, tmp->iosize);
651 	if (res < 0) {
652 		return (-res);
653 	}
654 	if (memcmp(block, XZ_MAGIC, sizeof(XZ_MAGIC)) == 0) {
655 		printf("xz compression not supported\n");
656 		error = EOPNOTSUPP;
657 		goto bad;
658 	} else if (memcmp(block, ZLIB_MAGIC, sizeof(ZLIB_MAGIC)) == 0) {
659 		printf("zlib compression not supported\n");
660 		error = EOPNOTSUPP;
661 		goto bad;
662 	} else if (memcmp(block, ZSTD_MAGIC, sizeof(ZSTD_MAGIC)) == 0) {
663 #ifdef ZSTDIO
664 		zio = tarfs_zio_init(tmp, 0, 0);
665 		zio->zstd = malloc(sizeof(*zio->zstd), M_TARFSZSTATE, M_WAITOK);
666 		zio->zstd->zds = ZSTD_createDStream_advanced(tarfs_zstd_mem);
667 		(void)ZSTD_initDStream(zio->zstd->zds);
668 #else
669 		printf("zstd compression not supported\n");
670 		error = EOPNOTSUPP;
671 		goto bad;
672 #endif
673 	}
674 bad:
675 	free(block, M_TEMP);
676 	return (error);
677 }
678 
679 /*
680  * Tears down the decompression layer.
681  */
682 static int
683 tarfs_zio_fini(struct tarfs_mount *tmp)
684 {
685 	struct tarfs_zio *zio = tmp->zio;
686 	int error = 0;
687 
688 	if (tmp->znode != NULL) {
689 		error = vn_lock(tmp->znode, LK_EXCLUSIVE);
690 		if (error != 0) {
691 			TARFS_DPF(ALLOC, "%s: failed to lock znode", __func__);
692 			return (error);
693 		}
694 		tmp->znode->v_mount = NULL;
695 		vgone(tmp->znode);
696 		vput(tmp->znode);
697 		tmp->znode = NULL;
698 	}
699 #ifdef ZSTDIO
700 	if (zio->zstd != NULL) {
701 		TARFS_DPF(ALLOC, "%s: freeing zstd state\n", __func__);
702 		ZSTD_freeDStream(zio->zstd->zds);
703 		free(zio->zstd, M_TARFSZSTATE);
704 	}
705 #endif
706 	if (zio->idx != NULL) {
707 		TARFS_DPF(ALLOC, "%s: freeing index\n", __func__);
708 		free(zio->idx, M_TARFSZSTATE);
709 	}
710 	TARFS_DPF(ALLOC, "%s: freeing zio\n", __func__);
711 	free(zio, M_TARFSZSTATE);
712 	tmp->zio = NULL;
713 	return (error);
714 }
715 
716 /*
717  * Tears down the I/O layer, including the decompression layer if
718  * applicable.
719  */
720 int
721 tarfs_io_fini(struct tarfs_mount *tmp)
722 {
723 	int error = 0;
724 
725 	if (tmp->zio != NULL) {
726 		error = tarfs_zio_fini(tmp);
727 	}
728 	return (error);
729 }
730