xref: /freebsd/sys/contrib/openzfs/cmd/zstream/zstream_recompress.c (revision 657729a89dd578d8cfc70d6616f5c65a48a8b33a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2022 Axcient.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2022 by Delphix. All rights reserved.
29  */
30 
31 #include <err.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <sys/zfs_ioctl.h>
36 #include <sys/zio_checksum.h>
37 #include <sys/zstd/zstd.h>
38 #include "zfs_fletcher.h"
39 #include "zstream.h"
40 
41 static int
42 dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
43     zio_cksum_t *zc, int outfd)
44 {
45 	assert(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum)
46 	    == sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
47 	fletcher_4_incremental_native(drr,
48 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
49 	if (drr->drr_type != DRR_BEGIN) {
50 		assert(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
51 		    drr_checksum.drr_checksum));
52 		drr->drr_u.drr_checksum.drr_checksum = *zc;
53 	}
54 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
55 	    sizeof (zio_cksum_t), zc);
56 	if (write(outfd, drr, sizeof (*drr)) == -1)
57 		return (errno);
58 	if (payload_len != 0) {
59 		fletcher_4_incremental_native(payload, payload_len, zc);
60 		if (write(outfd, payload, payload_len) == -1)
61 			return (errno);
62 	}
63 	return (0);
64 }
65 
66 int
67 zstream_do_recompress(int argc, char *argv[])
68 {
69 	int bufsz = SPA_MAXBLOCKSIZE;
70 	char *buf = safe_malloc(bufsz);
71 	dmu_replay_record_t thedrr;
72 	dmu_replay_record_t *drr = &thedrr;
73 	zio_cksum_t stream_cksum;
74 	int c;
75 	int level = -1;
76 
77 	while ((c = getopt(argc, argv, "l:")) != -1) {
78 		switch (c) {
79 		case 'l':
80 			if (sscanf(optarg, "%d", &level) != 0) {
81 				fprintf(stderr,
82 				    "failed to parse level '%s'\n",
83 				    optarg);
84 				zstream_usage();
85 			}
86 			break;
87 		case '?':
88 			(void) fprintf(stderr, "invalid option '%c'\n",
89 			    optopt);
90 			zstream_usage();
91 			break;
92 		}
93 	}
94 
95 	argc -= optind;
96 	argv += optind;
97 
98 	if (argc != 1)
99 		zstream_usage();
100 	int type = 0;
101 	zio_compress_info_t *cinfo = NULL;
102 	if (0 == strcmp(argv[0], "off")) {
103 		type = ZIO_COMPRESS_OFF;
104 		cinfo = &zio_compress_table[type];
105 	} else if (0 == strcmp(argv[0], "inherit") ||
106 	    0 == strcmp(argv[0], "empty") ||
107 	    0 == strcmp(argv[0], "on")) {
108 		// Fall through to invalid compression type case
109 	} else {
110 		for (int i = 0; i < ZIO_COMPRESS_FUNCTIONS; i++) {
111 			if (0 == strcmp(zio_compress_table[i].ci_name,
112 			    argv[0])) {
113 				cinfo = &zio_compress_table[i];
114 				type = i;
115 				break;
116 			}
117 		}
118 	}
119 	if (cinfo == NULL) {
120 		fprintf(stderr, "Invalid compression type %s.\n",
121 		    argv[0]);
122 		exit(2);
123 	}
124 
125 	if (cinfo->ci_compress == NULL) {
126 		type = 0;
127 		cinfo = &zio_compress_table[0];
128 	}
129 
130 	if (isatty(STDIN_FILENO)) {
131 		(void) fprintf(stderr,
132 		    "Error: The send stream is a binary format "
133 		    "and can not be read from a\n"
134 		    "terminal.  Standard input must be redirected.\n");
135 		exit(1);
136 	}
137 
138 	fletcher_4_init();
139 	zio_init();
140 	zstd_init();
141 	while (sfread(drr, sizeof (*drr), stdin) != 0) {
142 		struct drr_write *drrw;
143 		uint64_t payload_size = 0;
144 
145 		/*
146 		 * We need to regenerate the checksum.
147 		 */
148 		if (drr->drr_type != DRR_BEGIN) {
149 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
150 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
151 		}
152 
153 
154 		switch (drr->drr_type) {
155 		case DRR_BEGIN:
156 		{
157 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
158 
159 			int sz = drr->drr_payloadlen;
160 			if (sz != 0) {
161 				if (sz > bufsz) {
162 					buf = realloc(buf, sz);
163 					if (buf == NULL)
164 						err(1, "realloc");
165 					bufsz = sz;
166 				}
167 				(void) sfread(buf, sz, stdin);
168 			}
169 			payload_size = sz;
170 			break;
171 		}
172 		case DRR_END:
173 		{
174 			struct drr_end *drre = &drr->drr_u.drr_end;
175 			/*
176 			 * Use the recalculated checksum, unless this is
177 			 * the END record of a stream package, which has
178 			 * no checksum.
179 			 */
180 			if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
181 				drre->drr_checksum = stream_cksum;
182 			break;
183 		}
184 
185 		case DRR_OBJECT:
186 		{
187 			struct drr_object *drro = &drr->drr_u.drr_object;
188 
189 			if (drro->drr_bonuslen > 0) {
190 				payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
191 				(void) sfread(buf, payload_size, stdin);
192 			}
193 			break;
194 		}
195 
196 		case DRR_SPILL:
197 		{
198 			struct drr_spill *drrs = &drr->drr_u.drr_spill;
199 			payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
200 			(void) sfread(buf, payload_size, stdin);
201 			break;
202 		}
203 
204 		case DRR_WRITE_BYREF:
205 			fprintf(stderr,
206 			    "Deduplicated streams are not supported\n");
207 			exit(1);
208 			break;
209 
210 		case DRR_WRITE:
211 		{
212 			drrw = &thedrr.drr_u.drr_write;
213 			payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
214 			/*
215 			 * In order to recompress an encrypted block, you have
216 			 * to decrypt, decompress, recompress, and
217 			 * re-encrypt. That can be a future enhancement (along
218 			 * with decryption or re-encryption), but for now we
219 			 * skip encrypted blocks.
220 			 */
221 			boolean_t encrypted = B_FALSE;
222 			for (int i = 0; i < ZIO_DATA_SALT_LEN; i++) {
223 				if (drrw->drr_salt[i] != 0) {
224 					encrypted = B_TRUE;
225 					break;
226 				}
227 			}
228 			if (encrypted) {
229 				(void) sfread(buf, payload_size, stdin);
230 				break;
231 			}
232 			if (drrw->drr_compressiontype >=
233 			    ZIO_COMPRESS_FUNCTIONS) {
234 				fprintf(stderr, "Invalid compression type in "
235 				    "stream: %d\n", drrw->drr_compressiontype);
236 				exit(3);
237 			}
238 			zio_compress_info_t *dinfo =
239 			    &zio_compress_table[drrw->drr_compressiontype];
240 
241 			/* Set up buffers to minimize memcpys */
242 			char *cbuf, *dbuf;
243 			if (cinfo->ci_compress == NULL)
244 				dbuf = buf;
245 			else
246 				dbuf = safe_calloc(bufsz);
247 
248 			if (dinfo->ci_decompress == NULL)
249 				cbuf = dbuf;
250 			else
251 				cbuf = safe_calloc(payload_size);
252 
253 			/* Read and decompress the payload */
254 			(void) sfread(cbuf, payload_size, stdin);
255 			if (dinfo->ci_decompress != NULL) {
256 				if (0 != dinfo->ci_decompress(cbuf, dbuf,
257 				    payload_size, MIN(bufsz,
258 				    drrw->drr_logical_size), dinfo->ci_level)) {
259 					warnx("decompression type %d failed "
260 					    "for ino %llu offset %llu",
261 					    type,
262 					    (u_longlong_t)drrw->drr_object,
263 					    (u_longlong_t)drrw->drr_offset);
264 					exit(4);
265 				}
266 				payload_size = drrw->drr_logical_size;
267 				free(cbuf);
268 			}
269 
270 			/* Recompress the payload */
271 			if (cinfo->ci_compress != NULL) {
272 				payload_size = P2ROUNDUP(cinfo->ci_compress(
273 				    dbuf, buf, drrw->drr_logical_size,
274 				    MIN(payload_size, bufsz), (level == -1 ?
275 				    cinfo->ci_level : level)),
276 				    SPA_MINBLOCKSIZE);
277 				if (payload_size != drrw->drr_logical_size) {
278 					drrw->drr_compressiontype = type;
279 					drrw->drr_compressed_size =
280 					    payload_size;
281 				} else {
282 					memcpy(buf, dbuf, payload_size);
283 					drrw->drr_compressiontype = 0;
284 					drrw->drr_compressed_size = 0;
285 				}
286 				free(dbuf);
287 			} else {
288 				drrw->drr_compressiontype = type;
289 				drrw->drr_compressed_size = 0;
290 			}
291 			break;
292 		}
293 
294 		case DRR_WRITE_EMBEDDED:
295 		{
296 			struct drr_write_embedded *drrwe =
297 			    &drr->drr_u.drr_write_embedded;
298 			payload_size =
299 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
300 			(void) sfread(buf, payload_size, stdin);
301 			break;
302 		}
303 
304 		case DRR_FREEOBJECTS:
305 		case DRR_FREE:
306 		case DRR_OBJECT_RANGE:
307 			break;
308 
309 		default:
310 			(void) fprintf(stderr, "INVALID record type 0x%x\n",
311 			    drr->drr_type);
312 			/* should never happen, so assert */
313 			assert(B_FALSE);
314 		}
315 
316 		if (feof(stdout)) {
317 			fprintf(stderr, "Error: unexpected end-of-file\n");
318 			exit(1);
319 		}
320 		if (ferror(stdout)) {
321 			fprintf(stderr, "Error while reading file: %s\n",
322 			    strerror(errno));
323 			exit(1);
324 		}
325 
326 		/*
327 		 * We need to recalculate the checksum, and it needs to be
328 		 * initially zero to do that.  BEGIN records don't have
329 		 * a checksum.
330 		 */
331 		if (drr->drr_type != DRR_BEGIN) {
332 			memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
333 			    sizeof (drr->drr_u.drr_checksum.drr_checksum));
334 		}
335 		if (dump_record(drr, buf, payload_size,
336 		    &stream_cksum, STDOUT_FILENO) != 0)
337 			break;
338 		if (drr->drr_type == DRR_END) {
339 			/*
340 			 * Typically the END record is either the last
341 			 * thing in the stream, or it is followed
342 			 * by a BEGIN record (which also zeros the checksum).
343 			 * However, a stream package ends with two END
344 			 * records.  The last END record's checksum starts
345 			 * from zero.
346 			 */
347 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
348 		}
349 	}
350 	free(buf);
351 	fletcher_4_fini();
352 	zio_fini();
353 	zstd_fini();
354 
355 	return (0);
356 }
357