1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright 2022 Axcient. All rights reserved.
25 * Use is subject to license terms.
26 *
27 * Copyright (c) 2022 by Delphix. All rights reserved.
28 * Copyright (c) 2024, Klara, Inc.
29 */
30
31 #include <err.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <sys/zfs_ioctl.h>
36 #include <sys/zio_checksum.h>
37 #include <sys/zstd/zstd.h>
38 #include "zfs_fletcher.h"
39 #include "zstream.h"
40 #include "zstream_util.h"
41
42 int
zstream_do_recompress(int argc,char * argv[])43 zstream_do_recompress(int argc, char *argv[])
44 {
45 int bufsz = SPA_MAXBLOCKSIZE;
46 char *buf = safe_malloc(bufsz);
47 dmu_replay_record_t thedrr;
48 dmu_replay_record_t *drr = &thedrr;
49 zio_cksum_t stream_cksum;
50 int c;
51 int level = 0;
52
53 while ((c = getopt(argc, argv, "l:")) != -1) {
54 switch (c) {
55 case 'l':
56 if (sscanf(optarg, "%d", &level) != 1) {
57 fprintf(stderr,
58 "failed to parse level '%s'\n",
59 optarg);
60 zstream_usage();
61 }
62 break;
63 case '?':
64 (void) fprintf(stderr, "invalid option '%c'\n",
65 optopt);
66 zstream_usage();
67 break;
68 }
69 }
70
71 argc -= optind;
72 argv += optind;
73
74 if (argc != 1)
75 zstream_usage();
76
77 enum zio_compress ctype;
78 if (strcmp(argv[0], "off") == 0) {
79 ctype = ZIO_COMPRESS_OFF;
80 } else {
81 for (ctype = 0; ctype < ZIO_COMPRESS_FUNCTIONS; ctype++) {
82 if (strcmp(argv[0],
83 zio_compress_table[ctype].ci_name) == 0)
84 break;
85 }
86 if (ctype == ZIO_COMPRESS_FUNCTIONS ||
87 zio_compress_table[ctype].ci_compress == NULL) {
88 fprintf(stderr, "Invalid compression type %s.\n",
89 argv[0]);
90 exit(2);
91 }
92 }
93
94 if (isatty(STDIN_FILENO)) {
95 (void) fprintf(stderr,
96 "Error: The send stream is a binary format "
97 "and can not be read from a\n"
98 "terminal. Standard input must be redirected.\n");
99 exit(1);
100 }
101
102 abd_init();
103 fletcher_4_init();
104 zio_init();
105 zstd_init();
106 int begin = 0;
107 boolean_t seen = B_FALSE;
108 while (sfread(drr, sizeof (*drr), stdin) != 0) {
109 struct drr_write *drrw;
110 uint64_t payload_size = 0;
111
112 /*
113 * We need to regenerate the checksum.
114 */
115 if (drr->drr_type != DRR_BEGIN) {
116 memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
117 sizeof (drr->drr_u.drr_checksum.drr_checksum));
118 }
119
120
121 switch (drr->drr_type) {
122 case DRR_BEGIN:
123 {
124 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
125 VERIFY0(begin++);
126 seen = B_TRUE;
127
128 uint32_t sz = drr->drr_payloadlen;
129
130 VERIFY3U(sz, <=, 1U << 28);
131
132 if (sz != 0) {
133 if (sz > bufsz) {
134 buf = realloc(buf, sz);
135 if (buf == NULL)
136 err(1, "realloc");
137 bufsz = sz;
138 }
139 (void) sfread(buf, sz, stdin);
140 }
141 payload_size = sz;
142 break;
143 }
144 case DRR_END:
145 {
146 struct drr_end *drre = &drr->drr_u.drr_end;
147 /*
148 * We would prefer to just check --begin == 0, but
149 * replication streams have an end of stream END
150 * record, so we must avoid tripping it.
151 */
152 VERIFY3B(seen, ==, B_TRUE);
153 begin--;
154 /*
155 * Use the recalculated checksum, unless this is
156 * the END record of a stream package, which has
157 * no checksum.
158 */
159 if (!ZIO_CHECKSUM_IS_ZERO(&drre->drr_checksum))
160 drre->drr_checksum = stream_cksum;
161 break;
162 }
163
164 case DRR_OBJECT:
165 {
166 struct drr_object *drro = &drr->drr_u.drr_object;
167 VERIFY3S(begin, ==, 1);
168
169 if (drro->drr_bonuslen > 0) {
170 payload_size = DRR_OBJECT_PAYLOAD_SIZE(drro);
171 (void) sfread(buf, payload_size, stdin);
172 }
173 break;
174 }
175
176 case DRR_SPILL:
177 {
178 struct drr_spill *drrs = &drr->drr_u.drr_spill;
179 VERIFY3S(begin, ==, 1);
180 payload_size = DRR_SPILL_PAYLOAD_SIZE(drrs);
181 (void) sfread(buf, payload_size, stdin);
182 break;
183 }
184
185 case DRR_WRITE_BYREF:
186 VERIFY3S(begin, ==, 1);
187 fprintf(stderr,
188 "Deduplicated streams are not supported\n");
189 exit(1);
190 break;
191
192 case DRR_WRITE:
193 {
194 VERIFY3S(begin, ==, 1);
195 drrw = &thedrr.drr_u.drr_write;
196 payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
197 /*
198 * In order to recompress an encrypted block, you have
199 * to decrypt, decompress, recompress, and
200 * re-encrypt. That can be a future enhancement (along
201 * with decryption or re-encryption), but for now we
202 * skip encrypted blocks.
203 */
204 boolean_t encrypted = B_FALSE;
205 for (int i = 0; i < ZIO_DATA_SALT_LEN; i++) {
206 if (drrw->drr_salt[i] != 0) {
207 encrypted = B_TRUE;
208 break;
209 }
210 }
211 if (encrypted) {
212 (void) sfread(buf, payload_size, stdin);
213 break;
214 }
215 enum zio_compress dtype = drrw->drr_compressiontype;
216 if (dtype >= ZIO_COMPRESS_FUNCTIONS) {
217 fprintf(stderr, "Invalid compression type in "
218 "stream: %d\n", dtype);
219 exit(3);
220 }
221 if (zio_compress_table[dtype].ci_decompress == NULL)
222 dtype = ZIO_COMPRESS_OFF;
223
224 /* Set up buffers to minimize memcpys */
225 char *cbuf, *dbuf;
226 if (ctype == ZIO_COMPRESS_OFF)
227 dbuf = buf;
228 else
229 dbuf = safe_calloc(bufsz);
230
231 if (dtype == ZIO_COMPRESS_OFF)
232 cbuf = dbuf;
233 else
234 cbuf = safe_calloc(payload_size);
235
236 /* Read and decompress the payload */
237 (void) sfread(cbuf, payload_size, stdin);
238 if (dtype != ZIO_COMPRESS_OFF) {
239 abd_t cabd, dabd;
240 abd_get_from_buf_struct(&cabd,
241 cbuf, payload_size);
242 abd_get_from_buf_struct(&dabd, dbuf,
243 MIN(bufsz, drrw->drr_logical_size));
244 if (zio_decompress_data(dtype, &cabd, &dabd,
245 payload_size, abd_get_size(&dabd),
246 NULL) != 0) {
247 warnx("decompression type %d failed "
248 "for ino %llu offset %llu",
249 dtype,
250 (u_longlong_t)drrw->drr_object,
251 (u_longlong_t)drrw->drr_offset);
252 exit(4);
253 }
254 payload_size = drrw->drr_logical_size;
255 abd_free(&dabd);
256 abd_free(&cabd);
257 free(cbuf);
258 }
259
260 /* Recompress the payload */
261 if (ctype != ZIO_COMPRESS_OFF) {
262 abd_t dabd, abd;
263 abd_get_from_buf_struct(&dabd,
264 dbuf, drrw->drr_logical_size);
265 abd_t *pabd =
266 abd_get_from_buf_struct(&abd, buf, bufsz);
267 size_t csize = zio_compress_data(ctype, &dabd,
268 &pabd, drrw->drr_logical_size,
269 drrw->drr_logical_size, level);
270 size_t rounded =
271 P2ROUNDUP(csize, SPA_MINBLOCKSIZE);
272 if (rounded >= drrw->drr_logical_size) {
273 memcpy(buf, dbuf, payload_size);
274 drrw->drr_compressiontype = 0;
275 drrw->drr_compressed_size = 0;
276 } else {
277 abd_zero_off(pabd, csize,
278 rounded - csize);
279 drrw->drr_compressiontype = ctype;
280 drrw->drr_compressed_size =
281 payload_size = rounded;
282 }
283 abd_free(&abd);
284 abd_free(&dabd);
285 free(dbuf);
286 } else {
287 drrw->drr_compressiontype = 0;
288 drrw->drr_compressed_size = 0;
289 }
290 break;
291 }
292
293 case DRR_WRITE_EMBEDDED:
294 {
295 struct drr_write_embedded *drrwe =
296 &drr->drr_u.drr_write_embedded;
297 VERIFY3S(begin, ==, 1);
298 payload_size =
299 P2ROUNDUP((uint64_t)drrwe->drr_psize, 8);
300 (void) sfread(buf, payload_size, stdin);
301 break;
302 }
303
304 case DRR_FREEOBJECTS:
305 case DRR_FREE:
306 case DRR_OBJECT_RANGE:
307 VERIFY3S(begin, ==, 1);
308 break;
309
310 default:
311 (void) fprintf(stderr, "INVALID record type 0x%x\n",
312 drr->drr_type);
313 /* should never happen, so assert */
314 assert(B_FALSE);
315 }
316
317 if (feof(stdout)) {
318 fprintf(stderr, "Error: unexpected end-of-file\n");
319 exit(1);
320 }
321 if (ferror(stdout)) {
322 fprintf(stderr, "Error while reading file: %s\n",
323 strerror(errno));
324 exit(1);
325 }
326
327 /*
328 * We need to recalculate the checksum, and it needs to be
329 * initially zero to do that. BEGIN records don't have
330 * a checksum.
331 */
332 if (drr->drr_type != DRR_BEGIN) {
333 memset(&drr->drr_u.drr_checksum.drr_checksum, 0,
334 sizeof (drr->drr_u.drr_checksum.drr_checksum));
335 }
336 if (dump_record(drr, buf, payload_size,
337 &stream_cksum, STDOUT_FILENO) != 0)
338 break;
339 if (drr->drr_type == DRR_END) {
340 /*
341 * Typically the END record is either the last
342 * thing in the stream, or it is followed
343 * by a BEGIN record (which also zeros the checksum).
344 * However, a stream package ends with two END
345 * records. The last END record's checksum starts
346 * from zero.
347 */
348 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
349 }
350 }
351 free(buf);
352 fletcher_4_fini();
353 zio_fini();
354 zstd_fini();
355 abd_fini();
356
357 return (0);
358 }
359