xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c (revision c07d6445eb89d9dd3950361b065b7bd110e3a043)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2022 Axcient.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <err.h>
28 #include <search.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <sys/zfs_ioctl.h>
33 #include <sys/zio_checksum.h>
34 #include <sys/zstd/zstd.h>
35 #include "zfs_fletcher.h"
36 #include "zstream.h"
37 
38 static int
39 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
40     zio_cksum_t *zc, int outfd)
41 {
42 	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
43 	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
44 	fletcher_4_incremental_native(drr,
45 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
46 	if (drr->drr_type != DRR_BEGIN) {
47 		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
48 		    drr_checksum.drr_checksum));
49 		drr->drr_u.drr_checksum.drr_checksum = *zc;
50 	}
51 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
52 	    sizeof (zio_cksum_t), zc);
53 	if (write(outfd, drr, sizeof (*drr)) == -1)
54 		return (errno);
55 	if (payload_len != 0) {
56 		fletcher_4_incremental_native(payload, payload_len, zc);
57 		if (write(outfd, payload, payload_len) == -1)
58 			return (errno);
59 	}
60 	return (0);
61 }
62 
63 int
64 zstream_do_decompress(int argc, char *argv[])
65 {
66 	const int KEYSIZE = 64;
67 	int bufsz = SPA_MAXBLOCKSIZE;
68 	char *buf = safe_malloc(bufsz);
69 	dmu_replay_record_t thedrr;
70 	dmu_replay_record_t *drr = &thedrr;
71 	zio_cksum_t stream_cksum;
72 	int c;
73 	boolean_t verbose = B_FALSE;
74 
75 	while ((c = getopt(argc, argv, "v")) != -1) {
76 		switch (c) {
77 		case 'v':
78 			verbose = B_TRUE;
79 			break;
80 		case '?':
81 			(void) fprintf(stderr, "invalid option '%c'\n",
82 			    optopt);
83 			zstream_usage();
84 			break;
85 		}
86 	}
87 
88 	argc -= optind;
89 	argv += optind;
90 
91 	if (argc < 0)
92 		zstream_usage();
93 
94 	if (hcreate(argc) == 0)
95 		errx(1, "hcreate");
96 	for (int i = 0; i < argc; i++) {
97 		uint64_t object, offset;
98 		char *obj_str;
99 		char *offset_str;
100 		char *key;
101 		char *end;
102 		enum zio_compress type = ZIO_COMPRESS_LZ4;
103 
104 		obj_str = strsep(&argv[i], ",");
105 		if (argv[i] == NULL) {
106 			zstream_usage();
107 			exit(2);
108 		}
109 		errno = 0;
110 		object = strtoull(obj_str, &end, 0);
111 		if (errno || *end != '\0')
112 			errx(1, "invalid value for object");
113 		offset_str = strsep(&argv[i], ",");
114 		offset = strtoull(offset_str, &end, 0);
115 		if (errno || *end != '\0')
116 			errx(1, "invalid value for offset");
117 		if (argv[i]) {
118 			if (0 == strcmp("off", argv[i]))
119 				type = ZIO_COMPRESS_OFF;
120 			else if (0 == strcmp("lz4", argv[i]))
121 				type = ZIO_COMPRESS_LZ4;
122 			else if (0 == strcmp("lzjb", argv[i]))
123 				type = ZIO_COMPRESS_LZJB;
124 			else if (0 == strcmp("gzip", argv[i]))
125 				type = ZIO_COMPRESS_GZIP_1;
126 			else if (0 == strcmp("zle", argv[i]))
127 				type = ZIO_COMPRESS_ZLE;
128 			else if (0 == strcmp("zstd", argv[i]))
129 				type = ZIO_COMPRESS_ZSTD;
130 			else {
131 				fprintf(stderr, "Invalid compression type %s.\n"
132 				    "Supported types are off, lz4, lzjb, gzip, "
133 				    "zle, and zstd\n",
134 				    argv[i]);
135 				exit(2);
136 			}
137 		}
138 
139 		if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
140 		    (u_longlong_t)offset) < 0) {
141 			err(1, "asprintf");
142 		}
143 		ENTRY e = {.key = key};
144 		ENTRY *p;
145 
146 		p = hsearch(e, ENTER);
147 		if (p == NULL)
148 			errx(1, "hsearch");
149 		p->data = (void*)(intptr_t)type;
150 	}
151 
152 	if (isatty(STDIN_FILENO)) {
153 		(void) fprintf(stderr,
154 		    "Error: The send stream is a binary format "
155 		    "and can not be read from a\n"
156 		    "terminal.  Standard input must be redirected.\n");
157 		exit(1);
158 	}
159 
160 	fletcher_4_init();
161 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
162 		struct drr_write *drrw;
163 		uint64_t payload_size = 0;
164 
165 		/*
166 		 * We need to regenerate the checksum.
167 		 */
168 		if (drr->drr_type != DRR_BEGIN) {
169 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
170 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
171 		}
172 
173 		switch (drr->drr_type) {
174 		case DRR_BEGIN:
175 		{
176 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
177 
178 			int sz = drr->drr_payloadlen;
179 			if (sz != 0) {
180 				if (sz > bufsz) {
181 					buf = realloc(buf, sz);
182 					if (buf == NULL)
183 						err(1, "realloc");
184 					bufsz = sz;
185 				}
186 				(void) sfread(buf, sz, stdin);
187 			}
188 			payload_size = sz;
189 			break;
190 		}
191 		case DRR_END:
192 		{
193 			struct drr_end *drre = &drr->drr_u.drr_end;
194 			/*
195 			 * Use the recalculated checksum, unless this is
196 			 * the END record of a stream package, which has
197 			 * no checksum.
198 			 */
199 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
200 				drre->drr_checksum = stream_cksum;
201 			break;
202 		}
203 
204 		case DRR_OBJECT:
205 		{
206 			struct drr_object *drro = &drr->drr_u.drr_object;
207 
208 			if (drro->drr_bonuslen > 0) {
209 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
210 				(void) sfread(buf, payload_size, stdin);
211 			}
212 			break;
213 		}
214 
215 		case DRR_SPILL:
216 		{
217 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
218 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
219 			(void) sfread(buf, payload_size, stdin);
220 			break;
221 		}
222 
223 		case DRR_WRITE_BYREF:
224 			fprintf(stderr,
225 			    "Deduplicated streams are not supported\n");
226 			exit(1);
227 			break;
228 
229 		case DRR_WRITE:
230 		{
231 			drrw = &thedrr.drr_u.drr_write;
232 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
233 			ENTRY *p;
234 			char key[KEYSIZE];
235 
236 			snprintf(key, KEYSIZE, "%llu,%llu",
237 			    (u_longlong_t)drrw->drr_object,
238 			    (u_longlong_t)drrw->drr_offset);
239 			ENTRY e = {.key = key};
240 
241 			p = hsearch(e, FIND);
242 			if (p != NULL) {
243 				zio_decompress_func_t *xfunc = NULL;
244 				switch ((enum zio_compress)(intptr_t)p->data) {
245 				case ZIO_COMPRESS_OFF:
246 					xfunc = NULL;
247 					break;
248 				case ZIO_COMPRESS_LZJB:
249 					xfunc = lzjb_decompress;
250 					break;
251 				case ZIO_COMPRESS_GZIP_1:
252 					xfunc = gzip_decompress;
253 					break;
254 				case ZIO_COMPRESS_ZLE:
255 					xfunc = zle_decompress;
256 					break;
257 				case ZIO_COMPRESS_LZ4:
258 					xfunc = lz4_decompress_zfs;
259 					break;
260 				case ZIO_COMPRESS_ZSTD:
261 					xfunc = zfs_zstd_decompress;
262 					break;
263 				default:
264 					assert(B_FALSE);
265 				}
266 
267 
268 				/*
269 				 * Read and decompress the block
270 				 */
271 				char *lzbuf = safe_calloc(payload_size);
272 				(void) sfread(lzbuf, payload_size, stdin);
273 				if (xfunc == NULL) {
274 					memcpy(buf, lzbuf, payload_size);
275 					drrw->drr_compressiontype =
276 					    ZIO_COMPRESS_OFF;
277 					if (verbose)
278 						fprintf(stderr, "Resetting "
279 						    "compression type to off "
280 						    "for ino %llu offset "
281 						    "%llu\n",
282 						    (u_longlong_t)
283 						    drrw->drr_object,
284 						    (u_longlong_t)
285 						    drrw->drr_offset);
286 				} else if (0 != xfunc(lzbuf, buf,
287 				    payload_size, payload_size, 0)) {
288 					/*
289 					 * The block must not be compressed,
290 					 * at least not with this compression
291 					 * type, possibly because it gets
292 					 * written multiple times in this
293 					 * stream.
294 					 */
295 					warnx("decompression failed for "
296 					    "ino %llu offset %llu",
297 					    (u_longlong_t)drrw->drr_object,
298 					    (u_longlong_t)drrw->drr_offset);
299 					memcpy(buf, lzbuf, payload_size);
300 				} else if (verbose) {
301 					drrw->drr_compressiontype =
302 					    ZIO_COMPRESS_OFF;
303 					fprintf(stderr, "successfully "
304 					    "decompressed ino %llu "
305 					    "offset %llu\n",
306 					    (u_longlong_t)drrw->drr_object,
307 					    (u_longlong_t)drrw->drr_offset);
308 				} else {
309 					drrw->drr_compressiontype =
310 					    ZIO_COMPRESS_OFF;
311 				}
312 				free(lzbuf);
313 			} else {
314 				/*
315 				 * Read the contents of the block unaltered
316 				 */
317 				(void) sfread(buf, payload_size, stdin);
318 			}
319 			break;
320 		}
321 
322 		case DRR_WRITE_EMBEDDED:
323 		{
324 			struct drr_write_embedded *drrwe =
325 			    &drr->drr_u.drr_write_embedded;
326 			payload_size =
327 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
328 			(void) sfread(buf, payload_size, stdin);
329 			break;
330 		}
331 
332 		case DRR_FREEOBJECTS:
333 		case DRR_FREE:
334 		case DRR_OBJECT_RANGE:
335 			break;
336 
337 		default:
338 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
339 			    drr->drr_type);
340 			/* should never happen, so assert */
341 			assert(B_FALSE);
342 		}
343 
344 		if (feof(stdout)) {
345 			fprintf(stderr, "Error: unexpected end-of-file\n");
346 			exit(1);
347 		}
348 		if (ferror(stdout)) {
349 			fprintf(stderr, "Error while reading file: %s\n",
350 			    strerror(errno));
351 			exit(1);
352 		}
353 
354 		/*
355 		 * We need to recalculate the checksum, and it needs to be
356 		 * initially zero to do that.  BEGIN records don't have
357 		 * a checksum.
358 		 */
359 		if (drr->drr_type != DRR_BEGIN) {
360 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
361 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
362 		}
363 		if (dump_record(drr, buf, payload_size,
364 		    &stream_cksum, STDOUT_FILENO) != 0)
365 			break;
366 		if (drr->drr_type == DRR_END) {
367 			/*
368 			 * Typically the END record is either the last
369 			 * thing in the stream, or it is followed
370 			 * by a BEGIN record (which also zeros the checksum).
371 			 * However, a stream package ends with two END
372 			 * records.  The last END record's checksum starts
373 			 * from zero.
374 			 */
375 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
376 		}
377 	}
378 	free(buf);
379 	fletcher_4_fini();
380 	hdestroy();
381 
382 	return (0);
383 }
384