xref: /freebsd/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c (revision 8ccc0d235c226d84112561d453c49904398d085c)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/time.h>
29 #include <sys/wait.h>
30 #include <sys/zio.h>
31 #include <umem.h>
32 #include <sys/vdev_raidz.h>
33 #include <sys/vdev_raidz_impl.h>
34 #include <assert.h>
35 #include <stdio.h>
36 #include <libzpool.h>
37 #include "raidz_test.h"
38 
39 static int *rand_data;
40 raidz_test_opts_t rto_opts;
41 
42 static char pid_s[16];
43 
44 static void sig_handler(int signo)
45 {
46 	int old_errno = errno;
47 	struct sigaction action;
48 	/*
49 	 * Restore default action and re-raise signal so SIGSEGV and
50 	 * SIGABRT can trigger a core dump.
51 	 */
52 	action.sa_handler = SIG_DFL;
53 	sigemptyset(&action.sa_mask);
54 	action.sa_flags = 0;
55 	(void) sigaction(signo, &action, NULL);
56 
57 	if (rto_opts.rto_gdb) {
58 		pid_t pid = fork();
59 		if (pid == 0) {
60 			execlp("gdb", "gdb", "-ex", "set pagination 0",
61 			    "-p", pid_s, NULL);
62 			_exit(-1);
63 		} else if (pid > 0)
64 			while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
65 				;
66 	}
67 
68 	raise(signo);
69 	errno = old_errno;
70 }
71 
72 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
73 {
74 	const char *verbose;
75 	switch (opts->rto_v) {
76 		case D_ALL:
77 			verbose = "no";
78 			break;
79 		case D_INFO:
80 			verbose = "info";
81 			break;
82 		case D_DEBUG:
83 		default:
84 			verbose = "debug";
85 			break;
86 	}
87 
88 	if (force || opts->rto_v >= D_INFO) {
89 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
90 		    "  (-a) zio ashift                   : %zu\n"
91 		    "  (-o) zio offset                   : 1 << %zu\n"
92 		    "  (-e) expanded map                 : %s\n"
93 		    "  (-r) reflow offset                : %llx\n"
94 		    "  (-d) number of raidz data columns : %zu\n"
95 		    "  (-s) size of DATA                 : 1 << %zu\n"
96 		    "  (-S) sweep parameters             : %s \n"
97 		    "  (-v) verbose                      : %s \n\n",
98 		    opts->rto_ashift,				/* -a */
99 		    ilog2(opts->rto_offset),			/* -o */
100 		    opts->rto_expand ? "yes" : "no",		/* -e */
101 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
102 		    opts->rto_dcols,				/* -d */
103 		    ilog2(opts->rto_dsize),			/* -s */
104 		    opts->rto_sweep ? "yes" : "no",		/* -S */
105 		    verbose);					/* -v */
106 	}
107 }
108 
109 static void usage(boolean_t requested)
110 {
111 	const raidz_test_opts_t *o = &rto_opts_defaults;
112 
113 	FILE *fp = requested ? stdout : stderr;
114 
115 	(void) fprintf(fp, "Usage:\n"
116 	    "\t[-a zio ashift (default: %zu)]\n"
117 	    "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
118 	    "\t[-d number of raidz data columns (default: %zu)]\n"
119 	    "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
120 	    "\t[-S parameter sweep (default: %s)]\n"
121 	    "\t[-t timeout for parameter sweep test]\n"
122 	    "\t[-B benchmark all raidz implementations]\n"
123 	    "\t[-e use expanded raidz map (default: %s)]\n"
124 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
125 	    "\t[-v increase verbosity (default: %d)]\n"
126 	    "\t[-h (print help)]\n"
127 	    "\t[-T test the test, see if failure would be detected]\n"
128 	    "\t[-D debug (attach gdb on SIGSEGV)]\n"
129 	    "",
130 	    o->rto_ashift,				/* -a */
131 	    ilog2(o->rto_offset),			/* -o */
132 	    o->rto_dcols,				/* -d */
133 	    ilog2(o->rto_dsize),			/* -s */
134 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
135 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
136 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
137 	    o->rto_v);					/* -v */
138 
139 	exit(requested ? 0 : 1);
140 }
141 
142 static void process_options(int argc, char **argv)
143 {
144 	size_t value;
145 	int opt;
146 	raidz_test_opts_t *o = &rto_opts;
147 
148 	memcpy(o, &rto_opts_defaults, sizeof (*o));
149 
150 	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
151 		switch (opt) {
152 		case 'a':
153 			value = strtoull(optarg, NULL, 0);
154 			o->rto_ashift = MIN(13, MAX(9, value));
155 			break;
156 		case 'e':
157 			o->rto_expand = 1;
158 			break;
159 		case 'r':
160 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
161 			break;
162 		case 'o':
163 			value = strtoull(optarg, NULL, 0);
164 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
165 			break;
166 		case 'd':
167 			value = strtoull(optarg, NULL, 0);
168 			o->rto_dcols = MIN(255, MAX(1, value));
169 			break;
170 		case 's':
171 			value = strtoull(optarg, NULL, 0);
172 			o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
173 			    MAX(SPA_MINBLOCKSHIFT, value));
174 			break;
175 		case 't':
176 			value = strtoull(optarg, NULL, 0);
177 			o->rto_sweep_timeout = value;
178 			break;
179 		case 'v':
180 			o->rto_v++;
181 			break;
182 		case 'S':
183 			o->rto_sweep = 1;
184 			break;
185 		case 'B':
186 			o->rto_benchmark = 1;
187 			break;
188 		case 'D':
189 			o->rto_gdb = 1;
190 			break;
191 		case 'T':
192 			o->rto_sanity = 1;
193 			break;
194 		case 'h':
195 			usage(B_TRUE);
196 			break;
197 		case '?':
198 		default:
199 			usage(B_FALSE);
200 			break;
201 		}
202 	}
203 }
204 
205 #define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
206 #define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
207 
208 #define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
209 #define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
210 
211 static int
212 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
213 {
214 	int r, i, ret = 0;
215 
216 	VERIFY(parity >= 1 && parity <= 3);
217 
218 	for (r = 0; r < rm->rm_nrows; r++) {
219 		raidz_row_t * const rr = rm->rm_row[r];
220 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
221 		for (i = 0; i < parity; i++) {
222 			if (CODE_COL_SIZE(rrg, i) == 0) {
223 				VERIFY0(CODE_COL_SIZE(rr, i));
224 				continue;
225 			}
226 
227 			if (abd_cmp(CODE_COL(rr, i),
228 			    CODE_COL(rrg, i)) != 0) {
229 				ret++;
230 				LOG_OPT(D_DEBUG, opts,
231 				    "\nParity block [%d] different!\n", i);
232 			}
233 		}
234 	}
235 	return (ret);
236 }
237 
238 static int
239 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
240 {
241 	int r, i, dcols, ret = 0;
242 
243 	for (r = 0; r < rm->rm_nrows; r++) {
244 		raidz_row_t *rr = rm->rm_row[r];
245 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
246 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
247 		    raidz_parity(opts->rm_golden);
248 		for (i = 0; i < dcols; i++) {
249 			if (DATA_COL_SIZE(rrg, i) == 0) {
250 				VERIFY0(DATA_COL_SIZE(rr, i));
251 				continue;
252 			}
253 
254 			if (abd_cmp(DATA_COL(rrg, i),
255 			    DATA_COL(rr, i)) != 0) {
256 				ret++;
257 
258 				LOG_OPT(D_DEBUG, opts,
259 				    "\nData block [%d] different!\n", i);
260 			}
261 		}
262 	}
263 	return (ret);
264 }
265 
266 static int
267 init_rand(void *data, size_t size, void *private)
268 {
269 	(void) private;
270 	memcpy(data, rand_data, size);
271 	return (0);
272 }
273 
274 static void
275 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
276 {
277 	for (int r = 0; r < rm->rm_nrows; r++) {
278 		raidz_row_t *rr = rm->rm_row[r];
279 		for (int i = 0; i < cnt; i++) {
280 			raidz_col_t *col = &rr->rr_col[tgts[i]];
281 			abd_iterate_func(col->rc_abd, 0, col->rc_size,
282 			    init_rand, NULL);
283 		}
284 	}
285 }
286 
287 void
288 init_zio_abd(zio_t *zio)
289 {
290 	abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
291 }
292 
293 static void
294 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
295 {
296 	vdev_raidz_map_free(*rm);
297 	raidz_free((*zio)->io_abd, (*zio)->io_size);
298 	umem_free(*zio, sizeof (zio_t));
299 
300 	*zio = NULL;
301 	*rm = NULL;
302 }
303 
304 static int
305 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
306 {
307 	int err = 0;
308 	zio_t *zio_test;
309 	raidz_map_t *rm_test;
310 	const size_t total_ncols = opts->rto_dcols + parity;
311 
312 	if (opts->rm_golden) {
313 		fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
314 	}
315 
316 	opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
317 	zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
318 
319 	opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
320 	opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
321 
322 	opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
323 	zio_test->io_abd = raidz_alloc(opts->rto_dsize);
324 
325 	init_zio_abd(opts->zio_golden);
326 	init_zio_abd(zio_test);
327 
328 	VERIFY0(vdev_raidz_impl_set("original"));
329 
330 	if (opts->rto_expand) {
331 		opts->rm_golden =
332 		    vdev_raidz_map_alloc_expanded(opts->zio_golden,
333 		    opts->rto_ashift, total_ncols+1, total_ncols,
334 		    parity, opts->rto_expand_offset, 0, B_FALSE);
335 		rm_test = vdev_raidz_map_alloc_expanded(zio_test,
336 		    opts->rto_ashift, total_ncols+1, total_ncols,
337 		    parity, opts->rto_expand_offset, 0, B_FALSE);
338 	} else {
339 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
340 		    opts->rto_ashift, total_ncols, parity);
341 		rm_test = vdev_raidz_map_alloc(zio_test,
342 		    opts->rto_ashift, total_ncols, parity);
343 	}
344 
345 	VERIFY(opts->zio_golden);
346 	VERIFY(opts->rm_golden);
347 
348 	vdev_raidz_generate_parity(opts->rm_golden);
349 	vdev_raidz_generate_parity(rm_test);
350 
351 	/* sanity check */
352 	err |= cmp_data(opts, rm_test);
353 	err |= cmp_code(opts, rm_test, parity);
354 
355 	if (err)
356 		ERR("initializing the golden copy ... [FAIL]!\n");
357 
358 	/* tear down raidz_map of test zio */
359 	fini_raidz_map(&zio_test, &rm_test);
360 
361 	return (err);
362 }
363 
364 static raidz_map_t *
365 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
366 {
367 	raidz_map_t *rm = NULL;
368 	const size_t alloc_dsize = opts->rto_dsize;
369 	const size_t total_ncols = opts->rto_dcols + parity;
370 	const int ccols[] = { 0, 1, 2 };
371 
372 	VERIFY(zio);
373 	VERIFY(parity <= 3 && parity >= 1);
374 
375 	*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
376 
377 	(*zio)->io_offset = 0;
378 	(*zio)->io_size = alloc_dsize;
379 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
380 	init_zio_abd(*zio);
381 
382 	if (opts->rto_expand) {
383 		rm = vdev_raidz_map_alloc_expanded(*zio,
384 		    opts->rto_ashift, total_ncols+1, total_ncols,
385 		    parity, opts->rto_expand_offset, 0, B_FALSE);
386 	} else {
387 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
388 		    total_ncols, parity);
389 	}
390 	VERIFY(rm);
391 
392 	/* Make sure code columns are destroyed */
393 	corrupt_colums(rm, ccols, parity);
394 
395 	return (rm);
396 }
397 
398 static int
399 run_gen_check(raidz_test_opts_t *opts)
400 {
401 	char **impl_name;
402 	int fn, err = 0;
403 	zio_t *zio_test;
404 	raidz_map_t *rm_test;
405 
406 	err = init_raidz_golden_map(opts, PARITY_PQR);
407 	if (0 != err)
408 		return (err);
409 
410 	LOG(D_INFO, DBLSEP);
411 	LOG(D_INFO, "Testing parity generation...\n");
412 
413 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
414 	    impl_name++) {
415 
416 		LOG(D_INFO, SEP);
417 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
418 
419 		if (0 != vdev_raidz_impl_set(*impl_name)) {
420 			LOG(D_INFO, "[SKIP]\n");
421 			continue;
422 		} else {
423 			LOG(D_INFO, "[SUPPORTED]\n");
424 		}
425 
426 		for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
427 
428 			/* Check if should stop */
429 			if (rto_opts.rto_should_stop)
430 				return (err);
431 
432 			/* create suitable raidz_map */
433 			rm_test = init_raidz_map(opts, &zio_test, fn+1);
434 			VERIFY(rm_test);
435 
436 			LOG(D_INFO, "\t\tTesting method [%s] ...",
437 			    raidz_gen_name[fn]);
438 
439 			if (!opts->rto_sanity)
440 				vdev_raidz_generate_parity(rm_test);
441 
442 			if (cmp_code(opts, rm_test, fn+1) != 0) {
443 				LOG(D_INFO, "[FAIL]\n");
444 				err++;
445 			} else
446 				LOG(D_INFO, "[PASS]\n");
447 
448 			fini_raidz_map(&zio_test, &rm_test);
449 		}
450 	}
451 
452 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
453 
454 	return (err);
455 }
456 
457 static int
458 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
459 {
460 	int x0, x1, x2;
461 	int tgtidx[3];
462 	int err = 0;
463 	static const int rec_tgts[7][3] = {
464 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
465 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
466 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
467 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
468 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
469 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
470 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
471 	};
472 
473 	memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
474 
475 	if (fn < RAIDZ_REC_PQ) {
476 		/* can reconstruct 1 failed data disk */
477 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
478 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
479 				continue;
480 
481 			/* Check if should stop */
482 			if (rto_opts.rto_should_stop)
483 				return (err);
484 
485 			LOG(D_DEBUG, "[%d] ", x0);
486 
487 			tgtidx[2] = x0 + raidz_parity(rm);
488 
489 			corrupt_colums(rm, tgtidx+2, 1);
490 
491 			if (!opts->rto_sanity)
492 				vdev_raidz_reconstruct(rm, tgtidx, 3);
493 
494 			if (cmp_data(opts, rm) != 0) {
495 				err++;
496 				LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
497 			}
498 		}
499 
500 	} else if (fn < RAIDZ_REC_PQR) {
501 		/* can reconstruct 2 failed data disk */
502 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
503 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
504 				continue;
505 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
506 				if (x1 >= rm->rm_row[0]->rr_cols -
507 				    raidz_parity(rm))
508 					continue;
509 
510 				/* Check if should stop */
511 				if (rto_opts.rto_should_stop)
512 					return (err);
513 
514 				LOG(D_DEBUG, "[%d %d] ", x0, x1);
515 
516 				tgtidx[1] = x0 + raidz_parity(rm);
517 				tgtidx[2] = x1 + raidz_parity(rm);
518 
519 				corrupt_colums(rm, tgtidx+1, 2);
520 
521 				if (!opts->rto_sanity)
522 					vdev_raidz_reconstruct(rm, tgtidx, 3);
523 
524 				if (cmp_data(opts, rm) != 0) {
525 					err++;
526 					LOG(D_DEBUG, "\nREC D[%d %d]... "
527 					    "[FAIL]\n", x0, x1);
528 				}
529 			}
530 		}
531 	} else {
532 		/* can reconstruct 3 failed data disk */
533 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
534 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
535 				continue;
536 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
537 				if (x1 >= rm->rm_row[0]->rr_cols -
538 				    raidz_parity(rm))
539 					continue;
540 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
541 					if (x2 >= rm->rm_row[0]->rr_cols -
542 					    raidz_parity(rm))
543 						continue;
544 
545 					/* Check if should stop */
546 					if (rto_opts.rto_should_stop)
547 						return (err);
548 
549 					LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
550 
551 					tgtidx[0] = x0 + raidz_parity(rm);
552 					tgtidx[1] = x1 + raidz_parity(rm);
553 					tgtidx[2] = x2 + raidz_parity(rm);
554 
555 					corrupt_colums(rm, tgtidx, 3);
556 
557 					if (!opts->rto_sanity)
558 						vdev_raidz_reconstruct(rm,
559 						    tgtidx, 3);
560 
561 					if (cmp_data(opts, rm) != 0) {
562 						err++;
563 						LOG(D_DEBUG,
564 						    "\nREC D[%d %d %d]... "
565 						    "[FAIL]\n", x0, x1, x2);
566 					}
567 				}
568 			}
569 		}
570 	}
571 	return (err);
572 }
573 
574 static int
575 run_rec_check(raidz_test_opts_t *opts)
576 {
577 	char **impl_name;
578 	unsigned fn, err = 0;
579 	zio_t *zio_test;
580 	raidz_map_t *rm_test;
581 
582 	err = init_raidz_golden_map(opts, PARITY_PQR);
583 	if (0 != err)
584 		return (err);
585 
586 	LOG(D_INFO, DBLSEP);
587 	LOG(D_INFO, "Testing data reconstruction...\n");
588 
589 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
590 	    impl_name++) {
591 
592 		LOG(D_INFO, SEP);
593 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
594 
595 		if (vdev_raidz_impl_set(*impl_name) != 0) {
596 			LOG(D_INFO, "[SKIP]\n");
597 			continue;
598 		} else
599 			LOG(D_INFO, "[SUPPORTED]\n");
600 
601 
602 		/* create suitable raidz_map */
603 		rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
604 		/* generate parity */
605 		vdev_raidz_generate_parity(rm_test);
606 
607 		for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
608 
609 			LOG(D_INFO, "\t\tTesting method [%s] ...",
610 			    raidz_rec_name[fn]);
611 
612 			if (run_rec_check_impl(opts, rm_test, fn) != 0) {
613 				LOG(D_INFO, "[FAIL]\n");
614 				err++;
615 
616 			} else
617 				LOG(D_INFO, "[PASS]\n");
618 
619 		}
620 		/* tear down test raidz_map */
621 		fini_raidz_map(&zio_test, &rm_test);
622 	}
623 
624 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
625 
626 	return (err);
627 }
628 
629 static int
630 run_test(raidz_test_opts_t *opts)
631 {
632 	int err = 0;
633 
634 	if (opts == NULL)
635 		opts = &rto_opts;
636 
637 	print_opts(opts, B_FALSE);
638 
639 	err |= run_gen_check(opts);
640 	err |= run_rec_check(opts);
641 
642 	return (err);
643 }
644 
645 #define	SWEEP_RUNNING	0
646 #define	SWEEP_FINISHED	1
647 #define	SWEEP_ERROR	2
648 #define	SWEEP_TIMEOUT	3
649 
650 static int sweep_state = 0;
651 static raidz_test_opts_t failed_opts;
652 
653 static kmutex_t sem_mtx;
654 static kcondvar_t sem_cv;
655 static int max_free_slots;
656 static int free_slots;
657 
658 static __attribute__((noreturn)) void
659 sweep_thread(void *arg)
660 {
661 	int err = 0;
662 	raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
663 	VERIFY(opts != NULL);
664 
665 	err = run_test(opts);
666 
667 	if (rto_opts.rto_sanity) {
668 		/* 25% chance that a sweep test fails */
669 		if (rand() < (RAND_MAX/4))
670 			err = 1;
671 	}
672 
673 	if (0 != err) {
674 		mutex_enter(&sem_mtx);
675 		memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
676 		sweep_state = SWEEP_ERROR;
677 		mutex_exit(&sem_mtx);
678 	}
679 
680 	umem_free(opts, sizeof (raidz_test_opts_t));
681 
682 	/* signal the next thread */
683 	mutex_enter(&sem_mtx);
684 	free_slots++;
685 	cv_signal(&sem_cv);
686 	mutex_exit(&sem_mtx);
687 
688 	thread_exit();
689 }
690 
691 static int
692 run_sweep(void)
693 {
694 	static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
695 	static const size_t ashift_v[] = { 9, 12, 14 };
696 	static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
697 		1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
698 
699 	(void) setvbuf(stdout, NULL, _IONBF, 0);
700 
701 	ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
702 	    ARRAY_SIZE(dcols_v);
703 	ulong_t tried_comb = 0;
704 	hrtime_t time_diff, start_time = gethrtime();
705 	raidz_test_opts_t *opts;
706 	int a, d, s;
707 
708 	max_free_slots = free_slots = MAX(2, boot_ncpus);
709 
710 	mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
711 	cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
712 
713 	for (s = 0; s < ARRAY_SIZE(size_v); s++)
714 	for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
715 	for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
716 
717 		if (size_v[s] < (1 << ashift_v[a])) {
718 			total_comb--;
719 			continue;
720 		}
721 
722 		if (++tried_comb % 20 == 0)
723 			LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
724 
725 		/* wait for signal to start new thread */
726 		mutex_enter(&sem_mtx);
727 		while (cv_timedwait_sig(&sem_cv, &sem_mtx,
728 		    ddi_get_lbolt() + hz)) {
729 
730 			/* check if should stop the test (timeout) */
731 			time_diff = (gethrtime() - start_time) / NANOSEC;
732 			if (rto_opts.rto_sweep_timeout > 0 &&
733 			    time_diff >= rto_opts.rto_sweep_timeout) {
734 				sweep_state = SWEEP_TIMEOUT;
735 				rto_opts.rto_should_stop = B_TRUE;
736 				mutex_exit(&sem_mtx);
737 				goto exit;
738 			}
739 
740 			/* check if should stop the test (error) */
741 			if (sweep_state != SWEEP_RUNNING) {
742 				mutex_exit(&sem_mtx);
743 				goto exit;
744 			}
745 
746 			/* exit loop if a slot is available */
747 			if (free_slots > 0) {
748 				break;
749 			}
750 		}
751 
752 		free_slots--;
753 		mutex_exit(&sem_mtx);
754 
755 		opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
756 		opts->rto_ashift = ashift_v[a];
757 		opts->rto_dcols = dcols_v[d];
758 		opts->rto_offset = (1ULL << ashift_v[a]) * rand();
759 		opts->rto_dsize = size_v[s];
760 		opts->rto_expand = rto_opts.rto_expand;
761 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
762 		opts->rto_v = 0; /* be quiet */
763 
764 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
765 		    0, NULL, TS_RUN, defclsyspri), !=, NULL);
766 	}
767 
768 exit:
769 	LOG(D_ALL, "\nWaiting for test threads to finish...\n");
770 	mutex_enter(&sem_mtx);
771 	VERIFY(free_slots <= max_free_slots);
772 	while (free_slots < max_free_slots) {
773 		(void) cv_wait(&sem_cv, &sem_mtx);
774 	}
775 	mutex_exit(&sem_mtx);
776 
777 	if (sweep_state == SWEEP_ERROR) {
778 		ERR("Sweep test failed! Failed option: \n");
779 		print_opts(&failed_opts, B_TRUE);
780 	} else {
781 		if (sweep_state == SWEEP_TIMEOUT)
782 			LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
783 			    (ulong_t)rto_opts.rto_sweep_timeout);
784 
785 		LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
786 		    (ulong_t)tried_comb);
787 	}
788 
789 	mutex_destroy(&sem_mtx);
790 
791 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
792 }
793 
794 
795 int
796 main(int argc, char **argv)
797 {
798 	size_t i;
799 	struct sigaction action;
800 	int err = 0;
801 
802 	/* init gdb pid string early */
803 	(void) sprintf(pid_s, "%d", getpid());
804 
805 	action.sa_handler = sig_handler;
806 	sigemptyset(&action.sa_mask);
807 	action.sa_flags = 0;
808 
809 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
810 		ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
811 		exit(EXIT_FAILURE);
812 	}
813 
814 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
815 
816 	dprintf_setup(&argc, argv);
817 
818 	process_options(argc, argv);
819 
820 	kernel_init(SPA_MODE_READ);
821 
822 	/* setup random data because rand() is not reentrant */
823 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
824 	srand((unsigned)time(NULL) * getpid());
825 	for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
826 		rand_data[i] = rand();
827 
828 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
829 
830 	if (rto_opts.rto_benchmark) {
831 		run_raidz_benchmark();
832 	} else if (rto_opts.rto_sweep) {
833 		err = run_sweep();
834 	} else {
835 		err = run_test(NULL);
836 	}
837 
838 	umem_free(rand_data, SPA_MAXBLOCKSIZE);
839 	kernel_fini();
840 
841 	return (err);
842 }
843