xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c (revision 59144db3fca192c4637637dfe6b5a5d98632cd47)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2022 Axcient.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <err.h>
28 #include <search.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/zfs_ioctl.h>
33 #include <sys/zio_checksum.h>
34 #include <sys/zstd/zstd.h>
35 #include "zfs_fletcher.h"
36 #include "zstream.h"
37 
38 static int
39 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
40     zio_cksum_t *zc, int outfd)
41 {
42 	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
43 	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
44 	fletcher_4_incremental_native(drr,
45 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
46 	if (drr->drr_type != DRR_BEGIN) {
47 		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
48 		    drr_checksum.drr_checksum));
49 		drr->drr_u.drr_checksum.drr_checksum = *zc;
50 	}
51 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
52 	    sizeof (zio_cksum_t), zc);
53 	if (write(outfd, drr, sizeof (*drr)) == -1)
54 		return (errno);
55 	if (payload_len != 0) {
56 		fletcher_4_incremental_native(payload, payload_len, zc);
57 		if (write(outfd, payload, payload_len) == -1)
58 			return (errno);
59 	}
60 	return (0);
61 }
62 
63 int
64 zstream_do_decompress(int argc, char *argv[])
65 {
66 	const int KEYSIZE = 64;
67 	int bufsz = SPA_MAXBLOCKSIZE;
68 	char *buf = safe_malloc(bufsz);
69 	dmu_replay_record_t thedrr;
70 	dmu_replay_record_t *drr = &thedrr;
71 	zio_cksum_t stream_cksum;
72 	int c;
73 	boolean_t verbose = B_FALSE;
74 
75 	while ((c = getopt(argc, argv, "v")) != -1) {
76 		switch (c) {
77 		case 'v':
78 			verbose = B_TRUE;
79 			break;
80 		case '?':
81 			(void) fprintf(stderr, "invalid option '%c'\n",
82 			    optopt);
83 			zstream_usage();
84 			break;
85 		}
86 	}
87 
88 	argc -= optind;
89 	argv += optind;
90 
91 	if (argc < 0)
92 		zstream_usage();
93 
94 	if (hcreate(argc) == 0)
95 		errx(1, "hcreate");
96 	for (int i = 0; i < argc; i++) {
97 		uint64_t object, offset;
98 		char *obj_str;
99 		char *offset_str;
100 		char *key;
101 		char *end;
102 		enum zio_compress type = ZIO_COMPRESS_LZ4;
103 
104 		obj_str = strsep(&argv[i], ",");
105 		if (argv[i] == NULL) {
106 			zstream_usage();
107 			exit(2);
108 		}
109 		errno = 0;
110 		object = strtoull(obj_str, &end, 0);
111 		if (errno || *end != '\0')
112 			errx(1, "invalid value for object");
113 		offset_str = strsep(&argv[i], ",");
114 		offset = strtoull(offset_str, &end, 0);
115 		if (errno || *end != '\0')
116 			errx(1, "invalid value for offset");
117 		if (argv[i]) {
118 			if (0 == strcmp("off", argv[i]))
119 				type = ZIO_COMPRESS_OFF;
120 			else if (0 == strcmp("lz4", argv[i]))
121 				type = ZIO_COMPRESS_LZ4;
122 			else if (0 == strcmp("lzjb", argv[i]))
123 				type = ZIO_COMPRESS_LZJB;
124 			else if (0 == strcmp("gzip", argv[i]))
125 				type = ZIO_COMPRESS_GZIP_1;
126 			else if (0 == strcmp("zle", argv[i]))
127 				type = ZIO_COMPRESS_ZLE;
128 			else if (0 == strcmp("zstd", argv[i]))
129 				type = ZIO_COMPRESS_ZSTD;
130 			else {
131 				fprintf(stderr, "Invalid compression type %s.\n"
132 				    "Supported types are off, lz4, lzjb, gzip, "
133 				    "zle, and zstd\n",
134 				    argv[i]);
135 				exit(2);
136 			}
137 		}
138 
139 		if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
140 		    (u_longlong_t)offset) < 0) {
141 			err(1, "asprintf");
142 		}
143 		ENTRY e = {.key = key};
144 		ENTRY *p;
145 
146 		p = hsearch(e, ENTER);
147 		if (p == NULL)
148 			errx(1, "hsearch");
149 		p->data = (void*)(intptr_t)type;
150 	}
151 
152 	if (isatty(STDIN_FILENO)) {
153 		(void) fprintf(stderr,
154 		    "Error: The send stream is a binary format "
155 		    "and can not be read from a\n"
156 		    "terminal.  Standard input must be redirected.\n");
157 		exit(1);
158 	}
159 
160 	fletcher_4_init();
161 	int begin = 0;
162 	boolean_t seen = B_FALSE;
163 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
164 		struct drr_write *drrw;
165 		uint64_t payload_size = 0;
166 
167 		/*
168 		 * We need to regenerate the checksum.
169 		 */
170 		if (drr->drr_type != DRR_BEGIN) {
171 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
172 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
173 		}
174 
175 		switch (drr->drr_type) {
176 		case DRR_BEGIN:
177 		{
178 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
179 			VERIFY0(begin++);
180 			seen = B_TRUE;
181 
182 			uint32_t sz = drr->drr_payloadlen;
183 
184 			VERIFY3U(sz, <=, 1U << 28);
185 
186 			if (sz != 0) {
187 				if (sz > bufsz) {
188 					buf = realloc(buf, sz);
189 					if (buf == NULL)
190 						err(1, "realloc");
191 					bufsz = sz;
192 				}
193 				(void) sfread(buf, sz, stdin);
194 			}
195 			payload_size = sz;
196 			break;
197 		}
198 		case DRR_END:
199 		{
200 			struct drr_end *drre = &drr->drr_u.drr_end;
201 			/*
202 			 * We would prefer to just check --begin == 0, but
203 			 * replication streams have an end of stream END
204 			 * record, so we must avoid tripping it.
205 			 */
206 			VERIFY3B(seen, ==, B_TRUE);
207 			begin--;
208 			/*
209 			 * Use the recalculated checksum, unless this is
210 			 * the END record of a stream package, which has
211 			 * no checksum.
212 			 */
213 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
214 				drre->drr_checksum = stream_cksum;
215 			break;
216 		}
217 
218 		case DRR_OBJECT:
219 		{
220 			struct drr_object *drro = &drr->drr_u.drr_object;
221 			VERIFY3S(begin, ==, 1);
222 
223 			if (drro->drr_bonuslen > 0) {
224 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
225 				(void) sfread(buf, payload_size, stdin);
226 			}
227 			break;
228 		}
229 
230 		case DRR_SPILL:
231 		{
232 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
233 			VERIFY3S(begin, ==, 1);
234 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
235 			(void) sfread(buf, payload_size, stdin);
236 			break;
237 		}
238 
239 		case DRR_WRITE_BYREF:
240 			VERIFY3S(begin, ==, 1);
241 			fprintf(stderr,
242 			    "Deduplicated streams are not supported\n");
243 			exit(1);
244 			break;
245 
246 		case DRR_WRITE:
247 		{
248 			VERIFY3S(begin, ==, 1);
249 			drrw = &thedrr.drr_u.drr_write;
250 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
251 			ENTRY *p;
252 			char key[KEYSIZE];
253 
254 			snprintf(key, KEYSIZE, "%llu,%llu",
255 			    (u_longlong_t)drrw->drr_object,
256 			    (u_longlong_t)drrw->drr_offset);
257 			ENTRY e = {.key = key};
258 
259 			p = hsearch(e, FIND);
260 			if (p != NULL) {
261 				zio_decompress_func_t *xfunc = NULL;
262 				switch ((enum zio_compress)(intptr_t)p->data) {
263 				case ZIO_COMPRESS_OFF:
264 					xfunc = NULL;
265 					break;
266 				case ZIO_COMPRESS_LZJB:
267 					xfunc = lzjb_decompress;
268 					break;
269 				case ZIO_COMPRESS_GZIP_1:
270 					xfunc = gzip_decompress;
271 					break;
272 				case ZIO_COMPRESS_ZLE:
273 					xfunc = zle_decompress;
274 					break;
275 				case ZIO_COMPRESS_LZ4:
276 					xfunc = lz4_decompress_zfs;
277 					break;
278 				case ZIO_COMPRESS_ZSTD:
279 					xfunc = zfs_zstd_decompress;
280 					break;
281 				default:
282 					assert(B_FALSE);
283 				}
284 
285 
286 				/*
287 				 * Read and decompress the block
288 				 */
289 				char *lzbuf = safe_calloc(payload_size);
290 				(void) sfread(lzbuf, payload_size, stdin);
291 				if (xfunc == NULL) {
292 					memcpy(buf, lzbuf, payload_size);
293 					drrw->drr_compressiontype =
294 					    ZIO_COMPRESS_OFF;
295 					if (verbose)
296 						fprintf(stderr, "Resetting "
297 						    "compression type to off "
298 						    "for ino %llu offset "
299 						    "%llu\n",
300 						    (u_longlong_t)
301 						    drrw->drr_object,
302 						    (u_longlong_t)
303 						    drrw->drr_offset);
304 				} else if (0 != xfunc(lzbuf, buf,
305 				    payload_size, payload_size, 0)) {
306 					/*
307 					 * The block must not be compressed,
308 					 * at least not with this compression
309 					 * type, possibly because it gets
310 					 * written multiple times in this
311 					 * stream.
312 					 */
313 					warnx("decompression failed for "
314 					    "ino %llu offset %llu",
315 					    (u_longlong_t)drrw->drr_object,
316 					    (u_longlong_t)drrw->drr_offset);
317 					memcpy(buf, lzbuf, payload_size);
318 				} else if (verbose) {
319 					drrw->drr_compressiontype =
320 					    ZIO_COMPRESS_OFF;
321 					fprintf(stderr, "successfully "
322 					    "decompressed ino %llu "
323 					    "offset %llu\n",
324 					    (u_longlong_t)drrw->drr_object,
325 					    (u_longlong_t)drrw->drr_offset);
326 				} else {
327 					drrw->drr_compressiontype =
328 					    ZIO_COMPRESS_OFF;
329 				}
330 				free(lzbuf);
331 			} else {
332 				/*
333 				 * Read the contents of the block unaltered
334 				 */
335 				(void) sfread(buf, payload_size, stdin);
336 			}
337 			break;
338 		}
339 
340 		case DRR_WRITE_EMBEDDED:
341 		{
342 			VERIFY3S(begin, ==, 1);
343 			struct drr_write_embedded *drrwe =
344 			    &drr->drr_u.drr_write_embedded;
345 			payload_size =
346 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
347 			(void) sfread(buf, payload_size, stdin);
348 			break;
349 		}
350 
351 		case DRR_FREEOBJECTS:
352 		case DRR_FREE:
353 		case DRR_OBJECT_RANGE:
354 			VERIFY3S(begin, ==, 1);
355 			break;
356 
357 		default:
358 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
359 			    drr->drr_type);
360 			/* should never happen, so assert */
361 			assert(B_FALSE);
362 		}
363 
364 		if (feof(stdout)) {
365 			fprintf(stderr, "Error: unexpected end-of-file\n");
366 			exit(1);
367 		}
368 		if (ferror(stdout)) {
369 			fprintf(stderr, "Error while reading file: %s\n",
370 			    strerror(errno));
371 			exit(1);
372 		}
373 
374 		/*
375 		 * We need to recalculate the checksum, and it needs to be
376 		 * initially zero to do that.  BEGIN records don't have
377 		 * a checksum.
378 		 */
379 		if (drr->drr_type != DRR_BEGIN) {
380 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
381 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
382 		}
383 		if (dump_record(drr, buf, payload_size,
384 		    &stream_cksum, STDOUT_FILENO) != 0)
385 			break;
386 		if (drr->drr_type == DRR_END) {
387 			/*
388 			 * Typically the END record is either the last
389 			 * thing in the stream, or it is followed
390 			 * by a BEGIN record (which also zeros the checksum).
391 			 * However, a stream package ends with two END
392 			 * records.  The last END record's checksum starts
393 			 * from zero.
394 			 */
395 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
396 		}
397 	}
398 	free(buf);
399 	fletcher_4_fini();
400 	hdestroy();
401 
402 	return (0);
403 }
404