xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_decompress.c (revision 80aae8a3f8aa70712930664572be9e6885dc0be7)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2022 Axcient.  All rights reserved.
25  * Use is subject to license terms.
26  *
27  * Copyright (c) 2024, Klara, Inc.
28  */
29 
30 #include <err.h>
31 #include <search.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <sys/zfs_ioctl.h>
36 #include <sys/zio_checksum.h>
37 #include <sys/zstd/zstd.h>
38 #include "zfs_fletcher.h"
39 #include "zstream.h"
40 #include "zstream_util.h"
41 
42 int
zstream_do_decompress(int argc,char * argv[])43 zstream_do_decompress(int argc, char *argv[])
44 {
45 	const int KEYSIZE = 64;
46 	int bufsz = SPA_MAXBLOCKSIZE;
47 	char *buf = safe_malloc(bufsz);
48 	dmu_replay_record_t thedrr;
49 	dmu_replay_record_t *drr = &thedrr;
50 	zio_cksum_t stream_cksum;
51 	int c;
52 	boolean_t verbose = B_FALSE;
53 
54 	while ((c = getopt(argc, argv, "v")) != -1) {
55 		switch (c) {
56 		case 'v':
57 			verbose = B_TRUE;
58 			break;
59 		case '?':
60 			(void) fprintf(stderr, "invalid option '%c'\n",
61 			    optopt);
62 			zstream_usage();
63 			break;
64 		}
65 	}
66 
67 	argc -= optind;
68 	argv += optind;
69 
70 	if (argc < 0)
71 		zstream_usage();
72 
73 	if (hcreate(argc) == 0)
74 		errx(1, "hcreate");
75 	for (int i = 0; i < argc; i++) {
76 		uint64_t object, offset;
77 		char *obj_str;
78 		char *offset_str;
79 		char *key;
80 		char *end;
81 		enum zio_compress type = ZIO_COMPRESS_LZ4;
82 
83 		obj_str = strsep(&argv[i], ",");
84 		if (argv[i] == NULL) {
85 			zstream_usage();
86 			exit(2);
87 		}
88 		errno = 0;
89 		object = strtoull(obj_str, &end, 0);
90 		if (errno || *end != '\0')
91 			errx(1, "invalid value for object");
92 		offset_str = strsep(&argv[i], ",");
93 		offset = strtoull(offset_str, &end, 0);
94 		if (errno || *end != '\0')
95 			errx(1, "invalid value for offset");
96 		if (argv[i]) {
97 			if (0 == strcmp("off", argv[i]))
98 				type = ZIO_COMPRESS_OFF;
99 			else if (0 == strcmp("lz4", argv[i]))
100 				type = ZIO_COMPRESS_LZ4;
101 			else if (0 == strcmp("lzjb", argv[i]))
102 				type = ZIO_COMPRESS_LZJB;
103 			else if (0 == strcmp("gzip", argv[i]))
104 				type = ZIO_COMPRESS_GZIP_1;
105 			else if (0 == strcmp("zle", argv[i]))
106 				type = ZIO_COMPRESS_ZLE;
107 			else if (0 == strcmp("zstd", argv[i]))
108 				type = ZIO_COMPRESS_ZSTD;
109 			else {
110 				fprintf(stderr, "Invalid compression type %s.\n"
111 				    "Supported types are off, lz4, lzjb, gzip, "
112 				    "zle, and zstd\n",
113 				    argv[i]);
114 				exit(2);
115 			}
116 		}
117 
118 		if (asprintf(&key, "%llu,%llu", (u_longlong_t)object,
119 		    (u_longlong_t)offset) < 0) {
120 			err(1, "asprintf");
121 		}
122 		ENTRY e = {.key = key};
123 		ENTRY *p;
124 
125 		p = hsearch(e, ENTER);
126 		if (p == NULL)
127 			errx(1, "hsearch");
128 		p->data = (void*)(intptr_t)type;
129 	}
130 
131 	if (isatty(STDIN_FILENO)) {
132 		(void) fprintf(stderr,
133 		    "Error: The send stream is a binary format "
134 		    "and can not be read from a\n"
135 		    "terminal.  Standard input must be redirected.\n");
136 		exit(1);
137 	}
138 
139 	fletcher_4_init();
140 	int begin = 0;
141 	boolean_t seen = B_FALSE;
142 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
143 		struct drr_write *drrw;
144 		uint64_t payload_size = 0;
145 
146 		/*
147 		 * We need to regenerate the checksum.
148 		 */
149 		if (drr->drr_type != DRR_BEGIN) {
150 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
151 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
152 		}
153 
154 		switch (drr->drr_type) {
155 		case DRR_BEGIN:
156 		{
157 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
158 			VERIFY0(begin++);
159 			seen = B_TRUE;
160 
161 			uint32_t sz = drr->drr_payloadlen;
162 
163 			VERIFY3U(sz, <=, 1U << 28);
164 
165 			if (sz != 0) {
166 				if (sz > bufsz) {
167 					buf = realloc(buf, sz);
168 					if (buf == NULL)
169 						err(1, "realloc");
170 					bufsz = sz;
171 				}
172 				(void) sfread(buf, sz, stdin);
173 			}
174 			payload_size = sz;
175 			break;
176 		}
177 		case DRR_END:
178 		{
179 			struct drr_end *drre = &drr->drr_u.drr_end;
180 			/*
181 			 * We would prefer to just check --begin == 0, but
182 			 * replication streams have an end of stream END
183 			 * record, so we must avoid tripping it.
184 			 */
185 			VERIFY3B(seen, ==, B_TRUE);
186 			begin--;
187 			/*
188 			 * Use the recalculated checksum, unless this is
189 			 * the END record of a stream package, which has
190 			 * no checksum.
191 			 */
192 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
193 				drre->drr_checksum = stream_cksum;
194 			break;
195 		}
196 
197 		case DRR_OBJECT:
198 		{
199 			struct drr_object *drro = &drr->drr_u.drr_object;
200 			VERIFY3S(begin, ==, 1);
201 
202 			if (drro->drr_bonuslen > 0) {
203 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
204 				(void) sfread(buf, payload_size, stdin);
205 			}
206 			break;
207 		}
208 
209 		case DRR_SPILL:
210 		{
211 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
212 			VERIFY3S(begin, ==, 1);
213 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
214 			(void) sfread(buf, payload_size, stdin);
215 			break;
216 		}
217 
218 		case DRR_WRITE_BYREF:
219 			VERIFY3S(begin, ==, 1);
220 			fprintf(stderr,
221 			    "Deduplicated streams are not supported\n");
222 			exit(1);
223 			break;
224 
225 		case DRR_WRITE:
226 		{
227 			VERIFY3S(begin, ==, 1);
228 			drrw = &thedrr.drr_u.drr_write;
229 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
230 			ENTRY *p;
231 			char key[KEYSIZE];
232 
233 			snprintf(key, KEYSIZE, "%llu,%llu",
234 			    (u_longlong_t)drrw->drr_object,
235 			    (u_longlong_t)drrw->drr_offset);
236 			ENTRY e = {.key = key};
237 
238 			p = hsearch(e, FIND);
239 			if (p == NULL) {
240 				/*
241 				 * Read the contents of the block unaltered
242 				 */
243 				(void) sfread(buf, payload_size, stdin);
244 				break;
245 			}
246 
247 			/*
248 			 * Read and decompress the block
249 			 */
250 			enum zio_compress c =
251 			    (enum zio_compress)(intptr_t)p->data;
252 
253 			if (c == ZIO_COMPRESS_OFF) {
254 				(void) sfread(buf, payload_size, stdin);
255 				drrw->drr_compressiontype = 0;
256 				drrw->drr_compressed_size = 0;
257 				if (verbose)
258 					fprintf(stderr,
259 					    "Resetting compression type to "
260 					    "off for ino %llu offset %llu\n",
261 					    (u_longlong_t)drrw->drr_object,
262 					    (u_longlong_t)drrw->drr_offset);
263 				break;
264 			}
265 
266 			uint64_t lsize = drrw->drr_logical_size;
267 			ASSERT3U(payload_size, <=, lsize);
268 
269 			char *lzbuf = safe_calloc(payload_size);
270 			(void) sfread(lzbuf, payload_size, stdin);
271 
272 			abd_t sabd, dabd;
273 			abd_get_from_buf_struct(&sabd, lzbuf, payload_size);
274 			abd_get_from_buf_struct(&dabd, buf, lsize);
275 			int err = zio_decompress_data(c, &sabd, &dabd,
276 			    payload_size, lsize, NULL);
277 			abd_free(&dabd);
278 			abd_free(&sabd);
279 
280 			if (err == 0) {
281 				drrw->drr_compressiontype = 0;
282 				drrw->drr_compressed_size = 0;
283 				payload_size = lsize;
284 				if (verbose) {
285 					fprintf(stderr,
286 					    "successfully decompressed "
287 					    "ino %llu offset %llu\n",
288 					    (u_longlong_t)drrw->drr_object,
289 					    (u_longlong_t)drrw->drr_offset);
290 				}
291 			} else {
292 				/*
293 				 * The block must not be compressed, at least
294 				 * not with this compression type, possibly
295 				 * because it gets written multiple times in
296 				 * this stream.
297 				 */
298 				warnx("decompression failed for "
299 				    "ino %llu offset %llu",
300 				    (u_longlong_t)drrw->drr_object,
301 				    (u_longlong_t)drrw->drr_offset);
302 				memcpy(buf, lzbuf, payload_size);
303 			}
304 
305 			free(lzbuf);
306 			break;
307 		}
308 
309 		case DRR_WRITE_EMBEDDED:
310 		{
311 			VERIFY3S(begin, ==, 1);
312 			struct drr_write_embedded *drrwe =
313 			    &drr->drr_u.drr_write_embedded;
314 			payload_size =
315 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
316 			(void) sfread(buf, payload_size, stdin);
317 			break;
318 		}
319 
320 		case DRR_FREEOBJECTS:
321 		case DRR_FREE:
322 		case DRR_OBJECT_RANGE:
323 			VERIFY3S(begin, ==, 1);
324 			break;
325 
326 		default:
327 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
328 			    drr->drr_type);
329 			/* should never happen, so assert */
330 			assert(B_FALSE);
331 		}
332 
333 		if (feof(stdout)) {
334 			fprintf(stderr, "Error: unexpected end-of-file\n");
335 			exit(1);
336 		}
337 		if (ferror(stdout)) {
338 			fprintf(stderr, "Error while reading file: %s\n",
339 			    strerror(errno));
340 			exit(1);
341 		}
342 
343 		/*
344 		 * We need to recalculate the checksum, and it needs to be
345 		 * initially zero to do that.  BEGIN records don't have
346 		 * a checksum.
347 		 */
348 		if (drr->drr_type != DRR_BEGIN) {
349 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
350 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
351 		}
352 		if (dump_record(drr, buf, payload_size,
353 		    &stream_cksum, STDOUT_FILENO) != 0)
354 			break;
355 		if (drr->drr_type == DRR_END) {
356 			/*
357 			 * Typically the END record is either the last
358 			 * thing in the stream, or it is followed
359 			 * by a BEGIN record (which also zeros the checksum).
360 			 * However, a stream package ends with two END
361 			 * records.  The last END record's checksum starts
362 			 * from zero.
363 			 */
364 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
365 		}
366 	}
367 	free(buf);
368 	fletcher_4_fini();
369 	hdestroy();
370 
371 	return (0);
372 }
373