xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c (revision 80aae8a3f8aa70712930664572be9e6885dc0be7)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2022 Axcient.  All rights reserved.
25  * Use is subject to license terms.
26  *
27  * Copyright (c) 2022 by Delphix. All rights reserved.
28  * Copyright (c) 2024, Klara, Inc.
29  */
30 
31 #include <err.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <sys/zfs_ioctl.h>
36 #include <sys/zio_checksum.h>
37 #include <sys/zstd/zstd.h>
38 #include "zfs_fletcher.h"
39 #include "zstream.h"
40 #include "zstream_util.h"
41 
42 int
zstream_do_recompress(int argc,char * argv[])43 zstream_do_recompress(int argc, char *argv[])
44 {
45 	int bufsz = SPA_MAXBLOCKSIZE;
46 	char *buf = safe_malloc(bufsz);
47 	dmu_replay_record_t thedrr;
48 	dmu_replay_record_t *drr = &thedrr;
49 	zio_cksum_t stream_cksum;
50 	int c;
51 	int level = 0;
52 
53 	while ((c = getopt(argc, argv, "l:")) != -1) {
54 		switch (c) {
55 		case 'l':
56 			if (sscanf(optarg, "%d", &level) != 1) {
57 				fprintf(stderr,
58 				    "failed to parse level '%s'\n",
59 				    optarg);
60 				zstream_usage();
61 			}
62 			break;
63 		case '?':
64 			(void) fprintf(stderr, "invalid option '%c'\n",
65 			    optopt);
66 			zstream_usage();
67 			break;
68 		}
69 	}
70 
71 	argc -= optind;
72 	argv += optind;
73 
74 	if (argc != 1)
75 		zstream_usage();
76 
77 	enum zio_compress ctype;
78 	if (strcmp(argv[0], "off") == 0) {
79 		ctype = ZIO_COMPRESS_OFF;
80 	} else {
81 		for (ctype = 0; ctype < ZIO_COMPRESS_FUNCTIONS; ctype++) {
82 			if (strcmp(argv[0],
83 			    zio_compress_table[ctype].ci_name) == 0)
84 				break;
85 		}
86 		if (ctype == ZIO_COMPRESS_FUNCTIONS ||
87 		    zio_compress_table[ctype].ci_compress == NULL) {
88 			fprintf(stderr, "Invalid compression type %s.\n",
89 			    argv[0]);
90 			exit(2);
91 		}
92 	}
93 
94 	if (isatty(STDIN_FILENO)) {
95 		(void) fprintf(stderr,
96 		    "Error: The send stream is a binary format "
97 		    "and can not be read from a\n"
98 		    "terminal.  Standard input must be redirected.\n");
99 		exit(1);
100 	}
101 
102 	abd_init();
103 	fletcher_4_init();
104 	zio_init();
105 	zstd_init();
106 	int begin = 0;
107 	boolean_t seen = B_FALSE;
108 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
109 		struct drr_write *drrw;
110 		uint64_t payload_size = 0;
111 
112 		/*
113 		 * We need to regenerate the checksum.
114 		 */
115 		if (drr->drr_type != DRR_BEGIN) {
116 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
117 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
118 		}
119 
120 
121 		switch (drr->drr_type) {
122 		case DRR_BEGIN:
123 		{
124 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
125 			VERIFY0(begin++);
126 			seen = B_TRUE;
127 
128 			uint32_t sz = drr->drr_payloadlen;
129 
130 			VERIFY3U(sz, <=, 1U << 28);
131 
132 			if (sz != 0) {
133 				if (sz > bufsz) {
134 					buf = realloc(buf, sz);
135 					if (buf == NULL)
136 						err(1, "realloc");
137 					bufsz = sz;
138 				}
139 				(void) sfread(buf, sz, stdin);
140 			}
141 			payload_size = sz;
142 			break;
143 		}
144 		case DRR_END:
145 		{
146 			struct drr_end *drre = &drr->drr_u.drr_end;
147 			/*
148 			 * We would prefer to just check --begin == 0, but
149 			 * replication streams have an end of stream END
150 			 * record, so we must avoid tripping it.
151 			 */
152 			VERIFY3B(seen, ==, B_TRUE);
153 			begin--;
154 			/*
155 			 * Use the recalculated checksum, unless this is
156 			 * the END record of a stream package, which has
157 			 * no checksum.
158 			 */
159 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
160 				drre->drr_checksum = stream_cksum;
161 			break;
162 		}
163 
164 		case DRR_OBJECT:
165 		{
166 			struct drr_object *drro = &drr->drr_u.drr_object;
167 			VERIFY3S(begin, ==, 1);
168 
169 			if (drro->drr_bonuslen > 0) {
170 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
171 				(void) sfread(buf, payload_size, stdin);
172 			}
173 			break;
174 		}
175 
176 		case DRR_SPILL:
177 		{
178 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
179 			VERIFY3S(begin, ==, 1);
180 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
181 			(void) sfread(buf, payload_size, stdin);
182 			break;
183 		}
184 
185 		case DRR_WRITE_BYREF:
186 			VERIFY3S(begin, ==, 1);
187 			fprintf(stderr,
188 			    "Deduplicated streams are not supported\n");
189 			exit(1);
190 			break;
191 
192 		case DRR_WRITE:
193 		{
194 			VERIFY3S(begin, ==, 1);
195 			drrw = &thedrr.drr_u.drr_write;
196 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
197 			/*
198 			 * In order to recompress an encrypted block, you have
199 			 * to decrypt, decompress, recompress, and
200 			 * re-encrypt. That can be a future enhancement (along
201 			 * with decryption or re-encryption), but for now we
202 			 * skip encrypted blocks.
203 			 */
204 			boolean_t encrypted = B_FALSE;
205 			for (int i = 0; i < ZIO_DATA_SALT_LEN; i++) {
206 				if (drrw->drr_salt[i] != 0) {
207 					encrypted = B_TRUE;
208 					break;
209 				}
210 			}
211 			if (encrypted) {
212 				(void) sfread(buf, payload_size, stdin);
213 				break;
214 			}
215 			enum zio_compress dtype = drrw->drr_compressiontype;
216 			if (dtype >= ZIO_COMPRESS_FUNCTIONS) {
217 				fprintf(stderr, "Invalid compression type in "
218 				    "stream: %d\n", dtype);
219 				exit(3);
220 			}
221 			if (zio_compress_table[dtype].ci_decompress == NULL)
222 				dtype = ZIO_COMPRESS_OFF;
223 
224 			/* Set up buffers to minimize memcpys */
225 			char *cbuf, *dbuf;
226 			if (ctype == ZIO_COMPRESS_OFF)
227 				dbuf = buf;
228 			else
229 				dbuf = safe_calloc(bufsz);
230 
231 			if (dtype == ZIO_COMPRESS_OFF)
232 				cbuf = dbuf;
233 			else
234 				cbuf = safe_calloc(payload_size);
235 
236 			/* Read and decompress the payload */
237 			(void) sfread(cbuf, payload_size, stdin);
238 			if (dtype != ZIO_COMPRESS_OFF) {
239 				abd_t cabd, dabd;
240 				abd_get_from_buf_struct(&cabd,
241 				    cbuf, payload_size);
242 				abd_get_from_buf_struct(&dabd, dbuf,
243 				    MIN(bufsz, drrw->drr_logical_size));
244 				if (zio_decompress_data(dtype, &cabd, &dabd,
245 				    payload_size, abd_get_size(&dabd),
246 				    NULL) != 0) {
247 					warnx("decompression type %d failed "
248 					    "for ino %llu offset %llu",
249 					    dtype,
250 					    (u_longlong_t)drrw->drr_object,
251 					    (u_longlong_t)drrw->drr_offset);
252 					exit(4);
253 				}
254 				payload_size = drrw->drr_logical_size;
255 				abd_free(&dabd);
256 				abd_free(&cabd);
257 				free(cbuf);
258 			}
259 
260 			/* Recompress the payload */
261 			if (ctype != ZIO_COMPRESS_OFF) {
262 				abd_t dabd, abd;
263 				abd_get_from_buf_struct(&dabd,
264 				    dbuf, drrw->drr_logical_size);
265 				abd_t *pabd =
266 				    abd_get_from_buf_struct(&abd, buf, bufsz);
267 				size_t csize = zio_compress_data(ctype, &dabd,
268 				    &pabd, drrw->drr_logical_size,
269 				    drrw->drr_logical_size, level);
270 				size_t rounded =
271 				    P2ROUNDUP(csize, SPA_MINBLOCKSIZE);
272 				if (rounded >= drrw->drr_logical_size) {
273 					memcpy(buf, dbuf, payload_size);
274 					drrw->drr_compressiontype = 0;
275 					drrw->drr_compressed_size = 0;
276 				} else {
277 					abd_zero_off(pabd, csize,
278 					    rounded - csize);
279 					drrw->drr_compressiontype = ctype;
280 					drrw->drr_compressed_size =
281 					    payload_size = rounded;
282 				}
283 				abd_free(&abd);
284 				abd_free(&dabd);
285 				free(dbuf);
286 			} else {
287 				drrw->drr_compressiontype = 0;
288 				drrw->drr_compressed_size = 0;
289 			}
290 			break;
291 		}
292 
293 		case DRR_WRITE_EMBEDDED:
294 		{
295 			struct drr_write_embedded *drrwe =
296 			    &drr->drr_u.drr_write_embedded;
297 			VERIFY3S(begin, ==, 1);
298 			payload_size =
299 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
300 			(void) sfread(buf, payload_size, stdin);
301 			break;
302 		}
303 
304 		case DRR_FREEOBJECTS:
305 		case DRR_FREE:
306 		case DRR_OBJECT_RANGE:
307 			VERIFY3S(begin, ==, 1);
308 			break;
309 
310 		default:
311 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
312 			    drr->drr_type);
313 			/* should never happen, so assert */
314 			assert(B_FALSE);
315 		}
316 
317 		if (feof(stdout)) {
318 			fprintf(stderr, "Error: unexpected end-of-file\n");
319 			exit(1);
320 		}
321 		if (ferror(stdout)) {
322 			fprintf(stderr, "Error while reading file: %s\n",
323 			    strerror(errno));
324 			exit(1);
325 		}
326 
327 		/*
328 		 * We need to recalculate the checksum, and it needs to be
329 		 * initially zero to do that.  BEGIN records don't have
330 		 * a checksum.
331 		 */
332 		if (drr->drr_type != DRR_BEGIN) {
333 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
334 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
335 		}
336 		if (dump_record(drr, buf, payload_size,
337 		    &stream_cksum, STDOUT_FILENO) != 0)
338 			break;
339 		if (drr->drr_type == DRR_END) {
340 			/*
341 			 * Typically the END record is either the last
342 			 * thing in the stream, or it is followed
343 			 * by a BEGIN record (which also zeros the checksum).
344 			 * However, a stream package ends with two END
345 			 * records.  The last END record's checksum starts
346 			 * from zero.
347 			 */
348 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
349 		}
350 	}
351 	free(buf);
352 	fletcher_4_fini();
353 	zio_fini();
354 	zstd_fini();
355 	abd_fini();
356 
357 	return (0);
358 }
359