xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c (revision b197d4b893974c9eb4d7b38704c6d5c486235d6f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2022 Axcient.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <err.h>
28 #include <search.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/zfs_ioctl.h>
33 #include <sys/zio_checksum.h>
34 #include <sys/zstd/zstd.h>
35 #include "zfs_fletcher.h"
36 #include "zstream.h"
37 
38 static int
39 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
40     zio_cksum_t *zc, int outfd)
41 {
42 	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
43 	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
44 	fletcher_4_incremental_native(drr,
45 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
46 	if (drr->drr_type != DRR_BEGIN) {
47 		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
48 		    drr_checksum.drr_checksum));
49 		drr->drr_u.drr_checksum.drr_checksum = *zc;
50 	}
51 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
52 	    sizeof (zio_cksum_t), zc);
53 	if (write(outfd, drr, sizeof (*drr)) == -1)
54 		return (errno);
55 	if (payload_len != 0) {
56 		fletcher_4_incremental_native(payload, payload_len, zc);
57 		if (write(outfd, payload, payload_len) == -1)
58 			return (errno);
59 	}
60 	return (0);
61 }
62 
63 int
64 zstream_do_decompress(int argc, char *argv[])
65 {
66 	const int KEYSIZE = 64;
67 	int bufsz = SPA_MAXBLOCKSIZE;
68 	char *buf = safe_malloc(bufsz);
69 	dmu_replay_record_t thedrr;
70 	dmu_replay_record_t *drr = &thedrr;
71 	zio_cksum_t stream_cksum;
72 	int c;
73 	boolean_t verbose = B_FALSE;
74 
75 	while ((c = getopt(argc, argv, "v")) != -1) {
76 		switch (c) {
77 		case 'v':
78 			verbose = B_TRUE;
79 			break;
80 		case '?':
81 			(void) fprintf(stderr, "invalid option '%c'\n",
82 			    optopt);
83 			zstream_usage();
84 			break;
85 		}
86 	}
87 
88 	argc -= optind;
89 	argv += optind;
90 
91 	if (argc < 0)
92 		zstream_usage();
93 
94 	if (hcreate(argc) == 0)
95 		errx(1, "hcreate");
96 	for (int i = 0; i < argc; i++) {
97 		uint64_t object, offset;
98 		char *obj_str;
99 		char *offset_str;
100 		char *key;
101 		char *end;
102 		enum zio_compress type = ZIO_COMPRESS_LZ4;
103 
104 		obj_str = strsep(&argv[i], ",");
105 		if (argv[i] == NULL) {
106 			zstream_usage();
107 			exit(2);
108 		}
109 		errno = 0;
110 		object = strtoull(obj_str, &end, 0);
111 		if (errno || *end != '\0')
112 			errx(1, "invalid value for object");
113 		offset_str = strsep(&argv[i], ",");
114 		offset = strtoull(offset_str, &end, 0);
115 		if (errno || *end != '\0')
116 			errx(1, "invalid value for offset");
117 		if (argv[i]) {
118 			if (0 == strcmp("lz4", argv[i]))
119 				type = ZIO_COMPRESS_LZ4;
120 			else if (0 == strcmp("lzjb", argv[i]))
121 				type = ZIO_COMPRESS_LZJB;
122 			else if (0 == strcmp("gzip", argv[i]))
123 				type = ZIO_COMPRESS_GZIP_1;
124 			else if (0 == strcmp("zle", argv[i]))
125 				type = ZIO_COMPRESS_ZLE;
126 			else if (0 == strcmp("zstd", argv[i]))
127 				type = ZIO_COMPRESS_ZSTD;
128 			else {
129 				fprintf(stderr, "Invalid compression type %s.\n"
130 				    "Supported types are lz4, lzjb, gzip, zle, "
131 				    "and zstd\n",
132 				    argv[i]);
133 				exit(2);
134 			}
135 		}
136 
137 		if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
138 		    (u_longlong_t)offset) < 0) {
139 			err(1, "asprintf");
140 		}
141 		ENTRY e = {.key = key};
142 		ENTRY *p;
143 
144 		p = hsearch(e, ENTER);
145 		if (p == NULL)
146 			errx(1, "hsearch");
147 		p->data = (void*)type;
148 	}
149 
150 	if (isatty(STDIN_FILENO)) {
151 		(void) fprintf(stderr,
152 		    "Error: The send stream is a binary format "
153 		    "and can not be read from a\n"
154 		    "terminal.  Standard input must be redirected.\n");
155 		exit(1);
156 	}
157 
158 	fletcher_4_init();
159 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
160 		struct drr_write *drrw;
161 		uint64_t payload_size = 0;
162 
163 		/*
164 		 * We need to regenerate the checksum.
165 		 */
166 		if (drr->drr_type != DRR_BEGIN) {
167 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
168 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
169 		}
170 
171 		switch (drr->drr_type) {
172 		case DRR_BEGIN:
173 		{
174 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
175 
176 			int sz = drr->drr_payloadlen;
177 			if (sz != 0) {
178 				if (sz > bufsz) {
179 					buf = realloc(buf, sz);
180 					if (buf == NULL)
181 						err(1, "realloc");
182 					bufsz = sz;
183 				}
184 				(void) sfread(buf, sz, stdin);
185 			}
186 			payload_size = sz;
187 			break;
188 		}
189 		case DRR_END:
190 		{
191 			struct drr_end *drre = &drr->drr_u.drr_end;
192 			/*
193 			 * Use the recalculated checksum, unless this is
194 			 * the END record of a stream package, which has
195 			 * no checksum.
196 			 */
197 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
198 				drre->drr_checksum = stream_cksum;
199 			break;
200 		}
201 
202 		case DRR_OBJECT:
203 		{
204 			struct drr_object *drro = &drr->drr_u.drr_object;
205 
206 			if (drro->drr_bonuslen > 0) {
207 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
208 				(void) sfread(buf, payload_size, stdin);
209 			}
210 			break;
211 		}
212 
213 		case DRR_SPILL:
214 		{
215 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
216 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
217 			(void) sfread(buf, payload_size, stdin);
218 			break;
219 		}
220 
221 		case DRR_WRITE_BYREF:
222 			fprintf(stderr,
223 			    "Deduplicated streams are not supported\n");
224 			exit(1);
225 			break;
226 
227 		case DRR_WRITE:
228 		{
229 			drrw = &thedrr.drr_u.drr_write;
230 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
231 			ENTRY *p;
232 			char key[KEYSIZE];
233 
234 			snprintf(key, KEYSIZE, "%llu,%llu",
235 			    (u_longlong_t)drrw->drr_object,
236 			    (u_longlong_t)drrw->drr_offset);
237 			ENTRY e = {.key = key};
238 
239 			p = hsearch(e, FIND);
240 			if (p != NULL) {
241 				zio_decompress_func_t *xfunc = NULL;
242 				switch ((enum zio_compress)(intptr_t)p->data) {
243 				case ZIO_COMPRESS_LZJB:
244 					xfunc = lzjb_decompress;
245 					break;
246 				case ZIO_COMPRESS_GZIP_1:
247 					xfunc = gzip_decompress;
248 					break;
249 				case ZIO_COMPRESS_ZLE:
250 					xfunc = zle_decompress;
251 					break;
252 				case ZIO_COMPRESS_LZ4:
253 					xfunc = lz4_decompress_zfs;
254 					break;
255 				case ZIO_COMPRESS_ZSTD:
256 					xfunc = zfs_zstd_decompress;
257 					break;
258 				default:
259 					assert(B_FALSE);
260 				}
261 				assert(xfunc != NULL);
262 
263 
264 				/*
265 				 * Read and decompress the block
266 				 */
267 				char *lzbuf = safe_calloc(payload_size);
268 				(void) sfread(lzbuf, payload_size, stdin);
269 				if (0 != xfunc(lzbuf, buf,
270 				    payload_size, payload_size, 0)) {
271 					/*
272 					 * The block must not be compressed,
273 					 * possibly because it gets written
274 					 * multiple times in this stream.
275 					 */
276 					warnx("decompression failed for "
277 					    "ino %llu offset %llu",
278 					    (u_longlong_t)drrw->drr_object,
279 					    (u_longlong_t)drrw->drr_offset);
280 					memcpy(buf, lzbuf, payload_size);
281 				} else if (verbose) {
282 					fprintf(stderr, "successfully "
283 					    "decompressed ino %llu "
284 					    "offset %llu\n",
285 					    (u_longlong_t)drrw->drr_object,
286 					    (u_longlong_t)drrw->drr_offset);
287 				}
288 				free(lzbuf);
289 			} else {
290 				/*
291 				 * Read the contents of the block unaltered
292 				 */
293 				(void) sfread(buf, payload_size, stdin);
294 			}
295 			break;
296 		}
297 
298 		case DRR_WRITE_EMBEDDED:
299 		{
300 			struct drr_write_embedded *drrwe =
301 			    &drr->drr_u.drr_write_embedded;
302 			payload_size =
303 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
304 			(void) sfread(buf, payload_size, stdin);
305 			break;
306 		}
307 
308 		case DRR_FREEOBJECTS:
309 		case DRR_FREE:
310 		case DRR_OBJECT_RANGE:
311 			break;
312 
313 		default:
314 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
315 			    drr->drr_type);
316 			/* should never happen, so assert */
317 			assert(B_FALSE);
318 		}
319 
320 		if (feof(stdout)) {
321 			fprintf(stderr, "Error: unexpected end-of-file\n");
322 			exit(1);
323 		}
324 		if (ferror(stdout)) {
325 			fprintf(stderr, "Error while reading file: %s\n",
326 			    strerror(errno));
327 			exit(1);
328 		}
329 
330 		/*
331 		 * We need to recalculate the checksum, and it needs to be
332 		 * initially zero to do that.  BEGIN records don't have
333 		 * a checksum.
334 		 */
335 		if (drr->drr_type != DRR_BEGIN) {
336 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
337 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
338 		}
339 		if (dump_record(drr, buf, payload_size,
340 		    &stream_cksum, STDOUT_FILENO) != 0)
341 			break;
342 		if (drr->drr_type == DRR_END) {
343 			/*
344 			 * Typically the END record is either the last
345 			 * thing in the stream, or it is followed
346 			 * by a BEGIN record (which also zeros the checksum).
347 			 * However, a stream package ends with two END
348 			 * records.  The last END record's checksum starts
349 			 * from zero.
350 			 */
351 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
352 		}
353 	}
354 	free(buf);
355 	fletcher_4_fini();
356 	hdestroy();
357 
358 	return (0);
359 }
360