xref: /freebsd/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c (revision 66e85755595a451db490d2fe24267d85db4b09c2)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/time.h>
29 #include <sys/wait.h>
30 #include <sys/zio.h>
31 #include <umem.h>
32 #include <sys/vdev_raidz.h>
33 #include <sys/vdev_raidz_impl.h>
34 #include <assert.h>
35 #include <stdio.h>
36 #include <libzpool.h>
37 #include "raidz_test.h"
38 
39 static int *rand_data;
40 raidz_test_opts_t rto_opts;
41 
42 static char pid_s[16];
43 
sig_handler(int signo)44 static void sig_handler(int signo)
45 {
46 	int old_errno = errno;
47 	struct sigaction action;
48 	/*
49 	 * Restore default action and re-raise signal so SIGSEGV and
50 	 * SIGABRT can trigger a core dump.
51 	 */
52 	action.sa_handler = SIG_DFL;
53 	sigemptyset(&action.sa_mask);
54 	action.sa_flags = 0;
55 	(void) sigaction(signo, &action, NULL);
56 
57 	if (rto_opts.rto_gdb) {
58 		pid_t pid = fork();
59 		if (pid == 0) {
60 			execlp("gdb", "gdb", "-ex", "set pagination 0",
61 			    "-p", pid_s, NULL);
62 			_exit(-1);
63 		} else if (pid > 0)
64 			while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
65 				;
66 	}
67 
68 	raise(signo);
69 	errno = old_errno;
70 }
71 
print_opts(raidz_test_opts_t * opts,boolean_t force)72 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
73 {
74 	const char *verbose;
75 	switch (opts->rto_v) {
76 		case D_ALL:
77 			verbose = "no";
78 			break;
79 		case D_INFO:
80 			verbose = "info";
81 			break;
82 		case D_DEBUG:
83 		default:
84 			verbose = "debug";
85 			break;
86 	}
87 
88 	if (force || opts->rto_v >= D_INFO) {
89 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
90 		    "  (-a) zio ashift                   : %zu\n"
91 		    "  (-o) zio offset                   : 1 << %zu\n"
92 		    "  (-e) expanded map                 : %s\n"
93 		    "  (-r) reflow offset                : %llx\n"
94 		    "  (-d) number of raidz data columns : %zu\n"
95 		    "  (-s) size of DATA                 : 1 << %zu\n"
96 		    "  (-S) sweep parameters             : %s \n"
97 		    "  (-v) verbose                      : %s \n\n",
98 		    opts->rto_ashift,				/* -a */
99 		    ilog2(opts->rto_offset),			/* -o */
100 		    opts->rto_expand ? "yes" : "no",		/* -e */
101 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
102 		    opts->rto_dcols,				/* -d */
103 		    ilog2(opts->rto_dsize),			/* -s */
104 		    opts->rto_sweep ? "yes" : "no",		/* -S */
105 		    verbose);					/* -v */
106 	}
107 }
108 
usage(boolean_t requested)109 static void usage(boolean_t requested)
110 {
111 	const raidz_test_opts_t *o = &rto_opts_defaults;
112 
113 	FILE *fp = requested ? stdout : stderr;
114 
115 	(void) fprintf(fp, "Usage:\n"
116 	    "\t[-a zio ashift (default: %zu)]\n"
117 	    "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
118 	    "\t[-d number of raidz data columns (default: %zu)]\n"
119 	    "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
120 	    "\t[-S parameter sweep (default: %s)]\n"
121 	    "\t[-t timeout for parameter sweep test]\n"
122 	    "\t[-B benchmark all raidz implementations]\n"
123 	    "\t[-e use expanded raidz map (default: %s)]\n"
124 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
125 	    "\t[-v increase verbosity (default: %d)]\n"
126 	    "\t[-h (print help)]\n"
127 	    "\t[-T test the test, see if failure would be detected]\n"
128 	    "\t[-D debug (attach gdb on SIGSEGV)]\n"
129 	    "",
130 	    o->rto_ashift,				/* -a */
131 	    ilog2(o->rto_offset),			/* -o */
132 	    o->rto_dcols,				/* -d */
133 	    ilog2(o->rto_dsize),			/* -s */
134 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
135 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
136 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
137 	    o->rto_v);					/* -v */
138 
139 	exit(requested ? 0 : 1);
140 }
141 
process_options(int argc,char ** argv)142 static void process_options(int argc, char **argv)
143 {
144 	size_t value;
145 	int opt;
146 	raidz_test_opts_t *o = &rto_opts;
147 
148 	memcpy(o, &rto_opts_defaults, sizeof (*o));
149 
150 	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
151 		switch (opt) {
152 		case 'a':
153 			value = strtoull(optarg, NULL, 0);
154 			o->rto_ashift = MIN(13, MAX(9, value));
155 			break;
156 		case 'e':
157 			o->rto_expand = 1;
158 			break;
159 		case 'r':
160 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
161 			break;
162 		case 'o':
163 			value = strtoull(optarg, NULL, 0);
164 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
165 			break;
166 		case 'd':
167 			value = strtoull(optarg, NULL, 0);
168 			o->rto_dcols = MIN(255, MAX(1, value));
169 			break;
170 		case 's':
171 			value = strtoull(optarg, NULL, 0);
172 			o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
173 			    MAX(SPA_MINBLOCKSHIFT, value));
174 			break;
175 		case 't':
176 			value = strtoull(optarg, NULL, 0);
177 			o->rto_sweep_timeout = value;
178 			break;
179 		case 'v':
180 			o->rto_v++;
181 			break;
182 		case 'S':
183 			o->rto_sweep = 1;
184 			break;
185 		case 'B':
186 			o->rto_benchmark = 1;
187 			break;
188 		case 'D':
189 			o->rto_gdb = 1;
190 			break;
191 		case 'T':
192 			o->rto_sanity = 1;
193 			break;
194 		case 'h':
195 			usage(B_TRUE);
196 			break;
197 		case '?':
198 		default:
199 			usage(B_FALSE);
200 			break;
201 		}
202 	}
203 }
204 
205 #define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
206 #define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
207 
208 #define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
209 #define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
210 
211 static int
cmp_code(raidz_test_opts_t * opts,const raidz_map_t * rm,const int parity)212 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
213 {
214 	int r, i, ret = 0;
215 
216 	VERIFY(parity >= 1 && parity <= 3);
217 
218 	for (r = 0; r < rm->rm_nrows; r++) {
219 		raidz_row_t * const rr = rm->rm_row[r];
220 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
221 		for (i = 0; i < parity; i++) {
222 			if (CODE_COL_SIZE(rrg, i) == 0) {
223 				VERIFY0(CODE_COL_SIZE(rr, i));
224 				continue;
225 			}
226 
227 			if (abd_cmp(CODE_COL(rr, i),
228 			    CODE_COL(rrg, i)) != 0) {
229 				ret++;
230 				LOG_OPT(D_DEBUG, opts,
231 				    "\nParity block [%d] different!\n", i);
232 			}
233 		}
234 	}
235 	return (ret);
236 }
237 
238 static int
cmp_data(raidz_test_opts_t * opts,raidz_map_t * rm)239 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
240 {
241 	int r, i, dcols, ret = 0;
242 
243 	for (r = 0; r < rm->rm_nrows; r++) {
244 		raidz_row_t *rr = rm->rm_row[r];
245 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
246 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
247 		    raidz_parity(opts->rm_golden);
248 		for (i = 0; i < dcols; i++) {
249 			if (DATA_COL_SIZE(rrg, i) == 0) {
250 				VERIFY0(DATA_COL_SIZE(rr, i));
251 				continue;
252 			}
253 
254 			if (abd_cmp(DATA_COL(rrg, i),
255 			    DATA_COL(rr, i)) != 0) {
256 				ret++;
257 
258 				LOG_OPT(D_DEBUG, opts,
259 				    "\nData block [%d] different!\n", i);
260 			}
261 		}
262 	}
263 	return (ret);
264 }
265 
266 static int
init_rand(void * data,size_t size,void * private)267 init_rand(void *data, size_t size, void *private)
268 {
269 	size_t *offsetp = (size_t *)private;
270 	size_t offset = *offsetp;
271 
272 	VERIFY3U(offset + size, <=, SPA_MAXBLOCKSIZE);
273 	memcpy(data, (char *)rand_data + offset, size);
274 	*offsetp = offset + size;
275 	return (0);
276 }
277 
278 static int
corrupt_rand_fill(void * data,size_t size,void * private)279 corrupt_rand_fill(void *data, size_t size, void *private)
280 {
281 	(void) private;
282 	memset(data, 0xAA, size);
283 	return (0);
284 }
285 
286 static void
corrupt_colums(raidz_map_t * rm,const int * tgts,const int cnt)287 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
288 {
289 	for (int r = 0; r < rm->rm_nrows; r++) {
290 		raidz_row_t *rr = rm->rm_row[r];
291 		for (int i = 0; i < cnt; i++) {
292 			raidz_col_t *col = &rr->rr_col[tgts[i]];
293 			abd_iterate_func(col->rc_abd, 0, col->rc_size,
294 			    corrupt_rand_fill, NULL);
295 		}
296 	}
297 }
298 
299 void
init_zio_abd(zio_t * zio)300 init_zio_abd(zio_t *zio)
301 {
302 	size_t offset = 0;
303 	abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, &offset);
304 }
305 
306 static void
fini_raidz_map(zio_t ** zio,raidz_map_t ** rm)307 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
308 {
309 	vdev_raidz_map_free(*rm);
310 	raidz_free((*zio)->io_abd, (*zio)->io_size);
311 	umem_free(*zio, sizeof (zio_t));
312 
313 	*zio = NULL;
314 	*rm = NULL;
315 }
316 
317 static int
init_raidz_golden_map(raidz_test_opts_t * opts,const int parity)318 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
319 {
320 	int err = 0;
321 	zio_t *zio_test;
322 	raidz_map_t *rm_test;
323 	const size_t total_ncols = opts->rto_dcols + parity;
324 
325 	if (opts->rm_golden) {
326 		fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
327 	}
328 
329 	opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
330 	zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
331 
332 	opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
333 	opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
334 
335 	opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
336 	zio_test->io_abd = raidz_alloc(opts->rto_dsize);
337 
338 	init_zio_abd(opts->zio_golden);
339 	init_zio_abd(zio_test);
340 
341 	VERIFY0(vdev_raidz_impl_set("original"));
342 
343 	if (opts->rto_expand) {
344 		opts->rm_golden =
345 		    vdev_raidz_map_alloc_expanded(opts->zio_golden,
346 		    opts->rto_ashift, total_ncols+1, total_ncols,
347 		    parity, opts->rto_expand_offset, 0, B_FALSE);
348 		rm_test = vdev_raidz_map_alloc_expanded(zio_test,
349 		    opts->rto_ashift, total_ncols+1, total_ncols,
350 		    parity, opts->rto_expand_offset, 0, B_FALSE);
351 	} else {
352 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
353 		    opts->rto_ashift, total_ncols, parity);
354 		rm_test = vdev_raidz_map_alloc(zio_test,
355 		    opts->rto_ashift, total_ncols, parity);
356 	}
357 
358 	VERIFY(opts->zio_golden);
359 	VERIFY(opts->rm_golden);
360 
361 	vdev_raidz_generate_parity(opts->rm_golden);
362 	vdev_raidz_generate_parity(rm_test);
363 
364 	/* sanity check */
365 	err |= cmp_data(opts, rm_test);
366 	err |= cmp_code(opts, rm_test, parity);
367 
368 	if (err)
369 		ERR("initializing the golden copy ... [FAIL]!\n");
370 
371 	/* tear down raidz_map of test zio */
372 	fini_raidz_map(&zio_test, &rm_test);
373 
374 	return (err);
375 }
376 
377 static raidz_map_t *
init_raidz_map(raidz_test_opts_t * opts,zio_t ** zio,const int parity)378 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
379 {
380 	raidz_map_t *rm = NULL;
381 	const size_t alloc_dsize = opts->rto_dsize;
382 	const size_t total_ncols = opts->rto_dcols + parity;
383 	const int ccols[] = { 0, 1, 2 };
384 
385 	VERIFY(zio);
386 	VERIFY(parity <= 3 && parity >= 1);
387 
388 	*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
389 
390 	(*zio)->io_offset = opts->rto_offset;
391 	(*zio)->io_size = alloc_dsize;
392 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
393 	init_zio_abd(*zio);
394 
395 	if (opts->rto_expand) {
396 		rm = vdev_raidz_map_alloc_expanded(*zio,
397 		    opts->rto_ashift, total_ncols+1, total_ncols,
398 		    parity, opts->rto_expand_offset, 0, B_FALSE);
399 	} else {
400 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
401 		    total_ncols, parity);
402 	}
403 	VERIFY(rm);
404 
405 	/* Make sure code columns are destroyed */
406 	corrupt_colums(rm, ccols, parity);
407 
408 	return (rm);
409 }
410 
411 static int
run_gen_check(raidz_test_opts_t * opts)412 run_gen_check(raidz_test_opts_t *opts)
413 {
414 	char **impl_name;
415 	int fn, err = 0;
416 	zio_t *zio_test;
417 	raidz_map_t *rm_test;
418 
419 	err = init_raidz_golden_map(opts, PARITY_PQR);
420 	if (0 != err)
421 		return (err);
422 
423 	LOG(D_INFO, DBLSEP);
424 	LOG(D_INFO, "Testing parity generation...\n");
425 
426 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
427 	    impl_name++) {
428 
429 		LOG(D_INFO, SEP);
430 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
431 
432 		if (0 != vdev_raidz_impl_set(*impl_name)) {
433 			LOG(D_INFO, "[SKIP]\n");
434 			continue;
435 		} else {
436 			LOG(D_INFO, "[SUPPORTED]\n");
437 		}
438 
439 		for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
440 
441 			/* Check if should stop */
442 			if (rto_opts.rto_should_stop)
443 				return (err);
444 
445 			/* create suitable raidz_map */
446 			rm_test = init_raidz_map(opts, &zio_test, fn+1);
447 			VERIFY(rm_test);
448 
449 			LOG(D_INFO, "\t\tTesting method [%s] ...",
450 			    raidz_gen_name[fn]);
451 
452 			if (!opts->rto_sanity)
453 				vdev_raidz_generate_parity(rm_test);
454 
455 			if (cmp_code(opts, rm_test, fn+1) != 0) {
456 				LOG(D_INFO, "[FAIL]\n");
457 				err++;
458 			} else
459 				LOG(D_INFO, "[PASS]\n");
460 
461 			fini_raidz_map(&zio_test, &rm_test);
462 		}
463 	}
464 
465 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
466 
467 	return (err);
468 }
469 
470 static int
run_rec_check_impl(raidz_test_opts_t * opts,raidz_map_t * rm,const int fn)471 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
472 {
473 	int x0, x1, x2;
474 	int tgtidx[3];
475 	int err = 0;
476 	static const int rec_tgts[7][3] = {
477 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
478 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
479 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
480 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
481 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
482 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
483 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
484 	};
485 
486 	memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
487 
488 	if (fn < RAIDZ_REC_PQ) {
489 		/* can reconstruct 1 failed data disk */
490 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
491 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
492 				continue;
493 
494 			/* Check if should stop */
495 			if (rto_opts.rto_should_stop)
496 				return (err);
497 
498 			LOG(D_DEBUG, "[%d] ", x0);
499 
500 			tgtidx[2] = x0 + raidz_parity(rm);
501 
502 			corrupt_colums(rm, tgtidx+2, 1);
503 
504 			if (!opts->rto_sanity)
505 				vdev_raidz_reconstruct(rm, tgtidx, 3);
506 
507 			if (cmp_data(opts, rm) != 0) {
508 				err++;
509 				LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
510 			}
511 		}
512 
513 	} else if (fn < RAIDZ_REC_PQR) {
514 		/* can reconstruct 2 failed data disk */
515 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
516 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
517 				continue;
518 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
519 				if (x1 >= rm->rm_row[0]->rr_cols -
520 				    raidz_parity(rm))
521 					continue;
522 
523 				/* Check if should stop */
524 				if (rto_opts.rto_should_stop)
525 					return (err);
526 
527 				LOG(D_DEBUG, "[%d %d] ", x0, x1);
528 
529 				tgtidx[1] = x0 + raidz_parity(rm);
530 				tgtidx[2] = x1 + raidz_parity(rm);
531 
532 				corrupt_colums(rm, tgtidx+1, 2);
533 
534 				if (!opts->rto_sanity)
535 					vdev_raidz_reconstruct(rm, tgtidx, 3);
536 
537 				if (cmp_data(opts, rm) != 0) {
538 					err++;
539 					LOG(D_DEBUG, "\nREC D[%d %d]... "
540 					    "[FAIL]\n", x0, x1);
541 				}
542 			}
543 		}
544 	} else {
545 		/* can reconstruct 3 failed data disk */
546 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
547 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
548 				continue;
549 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
550 				if (x1 >= rm->rm_row[0]->rr_cols -
551 				    raidz_parity(rm))
552 					continue;
553 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
554 					if (x2 >= rm->rm_row[0]->rr_cols -
555 					    raidz_parity(rm))
556 						continue;
557 
558 					/* Check if should stop */
559 					if (rto_opts.rto_should_stop)
560 						return (err);
561 
562 					LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
563 
564 					tgtidx[0] = x0 + raidz_parity(rm);
565 					tgtidx[1] = x1 + raidz_parity(rm);
566 					tgtidx[2] = x2 + raidz_parity(rm);
567 
568 					corrupt_colums(rm, tgtidx, 3);
569 
570 					if (!opts->rto_sanity)
571 						vdev_raidz_reconstruct(rm,
572 						    tgtidx, 3);
573 
574 					if (cmp_data(opts, rm) != 0) {
575 						err++;
576 						LOG(D_DEBUG,
577 						    "\nREC D[%d %d %d]... "
578 						    "[FAIL]\n", x0, x1, x2);
579 					}
580 				}
581 			}
582 		}
583 	}
584 	return (err);
585 }
586 
587 static int
run_rec_check(raidz_test_opts_t * opts)588 run_rec_check(raidz_test_opts_t *opts)
589 {
590 	char **impl_name;
591 	unsigned fn, err = 0;
592 	zio_t *zio_test;
593 	raidz_map_t *rm_test;
594 
595 	err = init_raidz_golden_map(opts, PARITY_PQR);
596 	if (0 != err)
597 		return (err);
598 
599 	LOG(D_INFO, DBLSEP);
600 	LOG(D_INFO, "Testing data reconstruction...\n");
601 
602 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
603 	    impl_name++) {
604 
605 		LOG(D_INFO, SEP);
606 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
607 
608 		if (vdev_raidz_impl_set(*impl_name) != 0) {
609 			LOG(D_INFO, "[SKIP]\n");
610 			continue;
611 		} else
612 			LOG(D_INFO, "[SUPPORTED]\n");
613 
614 
615 		/* create suitable raidz_map */
616 		rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
617 		/* generate parity */
618 		vdev_raidz_generate_parity(rm_test);
619 
620 		for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
621 
622 			LOG(D_INFO, "\t\tTesting method [%s] ...",
623 			    raidz_rec_name[fn]);
624 
625 			if (run_rec_check_impl(opts, rm_test, fn) != 0) {
626 				LOG(D_INFO, "[FAIL]\n");
627 				err++;
628 
629 			} else
630 				LOG(D_INFO, "[PASS]\n");
631 
632 		}
633 		/* tear down test raidz_map */
634 		fini_raidz_map(&zio_test, &rm_test);
635 	}
636 
637 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
638 
639 	return (err);
640 }
641 
642 static int
run_test(raidz_test_opts_t * opts)643 run_test(raidz_test_opts_t *opts)
644 {
645 	int err = 0;
646 
647 	if (opts == NULL)
648 		opts = &rto_opts;
649 
650 	print_opts(opts, B_FALSE);
651 
652 	err |= run_gen_check(opts);
653 	err |= run_rec_check(opts);
654 
655 	return (err);
656 }
657 
658 #define	SWEEP_RUNNING	0
659 #define	SWEEP_FINISHED	1
660 #define	SWEEP_ERROR	2
661 #define	SWEEP_TIMEOUT	3
662 
663 static int sweep_state = 0;
664 static raidz_test_opts_t failed_opts;
665 
666 static kmutex_t sem_mtx;
667 static kcondvar_t sem_cv;
668 static int max_free_slots;
669 static int free_slots;
670 
671 static __attribute__((noreturn)) void
sweep_thread(void * arg)672 sweep_thread(void *arg)
673 {
674 	int err = 0;
675 	raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
676 	VERIFY(opts != NULL);
677 
678 	err = run_test(opts);
679 
680 	if (rto_opts.rto_sanity) {
681 		/* 25% chance that a sweep test fails */
682 		if (rand() < (RAND_MAX/4))
683 			err = 1;
684 	}
685 
686 	if (0 != err) {
687 		mutex_enter(&sem_mtx);
688 		memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
689 		sweep_state = SWEEP_ERROR;
690 		mutex_exit(&sem_mtx);
691 	}
692 
693 	umem_free(opts, sizeof (raidz_test_opts_t));
694 
695 	/* signal the next thread */
696 	mutex_enter(&sem_mtx);
697 	free_slots++;
698 	cv_signal(&sem_cv);
699 	mutex_exit(&sem_mtx);
700 
701 	thread_exit();
702 }
703 
704 static int
run_sweep(void)705 run_sweep(void)
706 {
707 	static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
708 	static const size_t ashift_v[] = { 9, 12, 14 };
709 	static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
710 		1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
711 
712 	(void) setvbuf(stdout, NULL, _IONBF, 0);
713 
714 	ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
715 	    ARRAY_SIZE(dcols_v);
716 	ulong_t tried_comb = 0;
717 	hrtime_t time_diff, start_time = gethrtime();
718 	raidz_test_opts_t *opts;
719 	int a, d, s;
720 
721 	max_free_slots = free_slots = MAX(2, boot_ncpus);
722 
723 	mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
724 	cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
725 
726 	for (s = 0; s < ARRAY_SIZE(size_v); s++)
727 	for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
728 	for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
729 
730 		if (size_v[s] < (1 << ashift_v[a])) {
731 			total_comb--;
732 			continue;
733 		}
734 
735 		if (++tried_comb % 20 == 0)
736 			LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
737 
738 		/* wait for signal to start new thread */
739 		mutex_enter(&sem_mtx);
740 		while (cv_timedwait_sig(&sem_cv, &sem_mtx,
741 		    ddi_get_lbolt() + hz)) {
742 
743 			/* check if should stop the test (timeout) */
744 			time_diff = (gethrtime() - start_time) / NANOSEC;
745 			if (rto_opts.rto_sweep_timeout > 0 &&
746 			    time_diff >= rto_opts.rto_sweep_timeout) {
747 				sweep_state = SWEEP_TIMEOUT;
748 				rto_opts.rto_should_stop = B_TRUE;
749 				mutex_exit(&sem_mtx);
750 				goto exit;
751 			}
752 
753 			/* check if should stop the test (error) */
754 			if (sweep_state != SWEEP_RUNNING) {
755 				mutex_exit(&sem_mtx);
756 				goto exit;
757 			}
758 
759 			/* exit loop if a slot is available */
760 			if (free_slots > 0) {
761 				break;
762 			}
763 		}
764 
765 		free_slots--;
766 		mutex_exit(&sem_mtx);
767 
768 		opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
769 		opts->rto_ashift = ashift_v[a];
770 		opts->rto_dcols = dcols_v[d];
771 		opts->rto_offset = (1ULL << ashift_v[a]) * rand();
772 		opts->rto_dsize = size_v[s];
773 		opts->rto_expand = rto_opts.rto_expand;
774 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
775 		opts->rto_v = 0; /* be quiet */
776 
777 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
778 		    0, NULL, TS_RUN, defclsyspri), !=, NULL);
779 	}
780 
781 exit:
782 	LOG(D_ALL, "\nWaiting for test threads to finish...\n");
783 	mutex_enter(&sem_mtx);
784 	VERIFY(free_slots <= max_free_slots);
785 	while (free_slots < max_free_slots) {
786 		(void) cv_wait(&sem_cv, &sem_mtx);
787 	}
788 	mutex_exit(&sem_mtx);
789 
790 	if (sweep_state == SWEEP_ERROR) {
791 		ERR("Sweep test failed! Failed option: \n");
792 		print_opts(&failed_opts, B_TRUE);
793 	} else {
794 		if (sweep_state == SWEEP_TIMEOUT)
795 			LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
796 			    (ulong_t)rto_opts.rto_sweep_timeout);
797 
798 		LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
799 		    (ulong_t)tried_comb);
800 	}
801 
802 	mutex_destroy(&sem_mtx);
803 
804 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
805 }
806 
807 
808 int
main(int argc,char ** argv)809 main(int argc, char **argv)
810 {
811 	size_t i;
812 	struct sigaction action;
813 	int err = 0;
814 
815 	/* init gdb pid string early */
816 	(void) sprintf(pid_s, "%d", getpid());
817 
818 	action.sa_handler = sig_handler;
819 	sigemptyset(&action.sa_mask);
820 	action.sa_flags = 0;
821 
822 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
823 		ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
824 		exit(EXIT_FAILURE);
825 	}
826 
827 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
828 
829 	dprintf_setup(&argc, argv);
830 
831 	process_options(argc, argv);
832 
833 	kernel_init(SPA_MODE_READ);
834 
835 	/* setup random data because rand() is not reentrant */
836 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
837 	srand((unsigned)time(NULL) * getpid());
838 	for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
839 		rand_data[i] = rand();
840 
841 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
842 
843 	if (rto_opts.rto_benchmark) {
844 		run_raidz_benchmark();
845 	} else if (rto_opts.rto_sweep) {
846 		err = run_sweep();
847 	} else {
848 		err = run_test(NULL);
849 	}
850 
851 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ | PROT_WRITE);
852 
853 	umem_free(rand_data, SPA_MAXBLOCKSIZE);
854 	kernel_fini();
855 
856 	return (err);
857 }
858