xref: /freebsd/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c (revision e0c4386e7e71d93b0edc0c8fa156263fc4a8b0b6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/time.h>
28 #include <sys/wait.h>
29 #include <sys/zio.h>
30 #include <umem.h>
31 #include <sys/vdev_raidz.h>
32 #include <sys/vdev_raidz_impl.h>
33 #include <assert.h>
34 #include <stdio.h>
35 #include "raidz_test.h"
36 
37 static int *rand_data;
38 raidz_test_opts_t rto_opts;
39 
40 static char pid_s[16];
41 
42 static void sig_handler(int signo)
43 {
44 	int old_errno = errno;
45 	struct sigaction action;
46 	/*
47 	 * Restore default action and re-raise signal so SIGSEGV and
48 	 * SIGABRT can trigger a core dump.
49 	 */
50 	action.sa_handler = SIG_DFL;
51 	sigemptyset(&action.sa_mask);
52 	action.sa_flags = 0;
53 	(void) sigaction(signo, &action, NULL);
54 
55 	if (rto_opts.rto_gdb) {
56 		pid_t pid = fork();
57 		if (pid == 0) {
58 			execlp("gdb", "gdb", "-ex", "set pagination 0",
59 			    "-p", pid_s, NULL);
60 			_exit(-1);
61 		} else if (pid > 0)
62 			while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
63 				;
64 	}
65 
66 	raise(signo);
67 	errno = old_errno;
68 }
69 
70 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
71 {
72 	const char *verbose;
73 	switch (opts->rto_v) {
74 		case D_ALL:
75 			verbose = "no";
76 			break;
77 		case D_INFO:
78 			verbose = "info";
79 			break;
80 		case D_DEBUG:
81 		default:
82 			verbose = "debug";
83 			break;
84 	}
85 
86 	if (force || opts->rto_v >= D_INFO) {
87 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
88 		    "  (-a) zio ashift                   : %zu\n"
89 		    "  (-o) zio offset                   : 1 << %zu\n"
90 		    "  (-e) expanded map                 : %s\n"
91 		    "  (-r) reflow offset                : %llx\n"
92 		    "  (-d) number of raidz data columns : %zu\n"
93 		    "  (-s) size of DATA                 : 1 << %zu\n"
94 		    "  (-S) sweep parameters             : %s \n"
95 		    "  (-v) verbose                      : %s \n\n",
96 		    opts->rto_ashift,				/* -a */
97 		    ilog2(opts->rto_offset),			/* -o */
98 		    opts->rto_expand ? "yes" : "no",		/* -e */
99 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
100 		    opts->rto_dcols,				/* -d */
101 		    ilog2(opts->rto_dsize),			/* -s */
102 		    opts->rto_sweep ? "yes" : "no",		/* -S */
103 		    verbose);					/* -v */
104 	}
105 }
106 
107 static void usage(boolean_t requested)
108 {
109 	const raidz_test_opts_t *o = &rto_opts_defaults;
110 
111 	FILE *fp = requested ? stdout : stderr;
112 
113 	(void) fprintf(fp, "Usage:\n"
114 	    "\t[-a zio ashift (default: %zu)]\n"
115 	    "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116 	    "\t[-d number of raidz data columns (default: %zu)]\n"
117 	    "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118 	    "\t[-S parameter sweep (default: %s)]\n"
119 	    "\t[-t timeout for parameter sweep test]\n"
120 	    "\t[-B benchmark all raidz implementations]\n"
121 	    "\t[-e use expanded raidz map (default: %s)]\n"
122 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
123 	    "\t[-v increase verbosity (default: %d)]\n"
124 	    "\t[-h (print help)]\n"
125 	    "\t[-T test the test, see if failure would be detected]\n"
126 	    "\t[-D debug (attach gdb on SIGSEGV)]\n"
127 	    "",
128 	    o->rto_ashift,				/* -a */
129 	    ilog2(o->rto_offset),			/* -o */
130 	    o->rto_dcols,				/* -d */
131 	    ilog2(o->rto_dsize),			/* -s */
132 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
133 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
134 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
135 	    o->rto_v);					/* -v */
136 
137 	exit(requested ? 0 : 1);
138 }
139 
140 static void process_options(int argc, char **argv)
141 {
142 	size_t value;
143 	int opt;
144 	raidz_test_opts_t *o = &rto_opts;
145 
146 	memcpy(o, &rto_opts_defaults, sizeof (*o));
147 
148 	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
149 		switch (opt) {
150 		case 'a':
151 			value = strtoull(optarg, NULL, 0);
152 			o->rto_ashift = MIN(13, MAX(9, value));
153 			break;
154 		case 'e':
155 			o->rto_expand = 1;
156 			break;
157 		case 'r':
158 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
159 			break;
160 		case 'o':
161 			value = strtoull(optarg, NULL, 0);
162 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
163 			break;
164 		case 'd':
165 			value = strtoull(optarg, NULL, 0);
166 			o->rto_dcols = MIN(255, MAX(1, value));
167 			break;
168 		case 's':
169 			value = strtoull(optarg, NULL, 0);
170 			o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
171 			    MAX(SPA_MINBLOCKSHIFT, value));
172 			break;
173 		case 't':
174 			value = strtoull(optarg, NULL, 0);
175 			o->rto_sweep_timeout = value;
176 			break;
177 		case 'v':
178 			o->rto_v++;
179 			break;
180 		case 'S':
181 			o->rto_sweep = 1;
182 			break;
183 		case 'B':
184 			o->rto_benchmark = 1;
185 			break;
186 		case 'D':
187 			o->rto_gdb = 1;
188 			break;
189 		case 'T':
190 			o->rto_sanity = 1;
191 			break;
192 		case 'h':
193 			usage(B_TRUE);
194 			break;
195 		case '?':
196 		default:
197 			usage(B_FALSE);
198 			break;
199 		}
200 	}
201 }
202 
203 #define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
204 #define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
205 
206 #define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
207 #define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
208 
209 static int
210 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
211 {
212 	int r, i, ret = 0;
213 
214 	VERIFY(parity >= 1 && parity <= 3);
215 
216 	for (r = 0; r < rm->rm_nrows; r++) {
217 		raidz_row_t * const rr = rm->rm_row[r];
218 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
219 		for (i = 0; i < parity; i++) {
220 			if (CODE_COL_SIZE(rrg, i) == 0) {
221 				VERIFY0(CODE_COL_SIZE(rr, i));
222 				continue;
223 			}
224 
225 			if (abd_cmp(CODE_COL(rr, i),
226 			    CODE_COL(rrg, i)) != 0) {
227 				ret++;
228 				LOG_OPT(D_DEBUG, opts,
229 				    "\nParity block [%d] different!\n", i);
230 			}
231 		}
232 	}
233 	return (ret);
234 }
235 
236 static int
237 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
238 {
239 	int r, i, dcols, ret = 0;
240 
241 	for (r = 0; r < rm->rm_nrows; r++) {
242 		raidz_row_t *rr = rm->rm_row[r];
243 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
244 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
245 		    raidz_parity(opts->rm_golden);
246 		for (i = 0; i < dcols; i++) {
247 			if (DATA_COL_SIZE(rrg, i) == 0) {
248 				VERIFY0(DATA_COL_SIZE(rr, i));
249 				continue;
250 			}
251 
252 			if (abd_cmp(DATA_COL(rrg, i),
253 			    DATA_COL(rr, i)) != 0) {
254 				ret++;
255 
256 				LOG_OPT(D_DEBUG, opts,
257 				    "\nData block [%d] different!\n", i);
258 			}
259 		}
260 	}
261 	return (ret);
262 }
263 
264 static int
265 init_rand(void *data, size_t size, void *private)
266 {
267 	(void) private;
268 	memcpy(data, rand_data, size);
269 	return (0);
270 }
271 
272 static void
273 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
274 {
275 	for (int r = 0; r < rm->rm_nrows; r++) {
276 		raidz_row_t *rr = rm->rm_row[r];
277 		for (int i = 0; i < cnt; i++) {
278 			raidz_col_t *col = &rr->rr_col[tgts[i]];
279 			abd_iterate_func(col->rc_abd, 0, col->rc_size,
280 			    init_rand, NULL);
281 		}
282 	}
283 }
284 
285 void
286 init_zio_abd(zio_t *zio)
287 {
288 	abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
289 }
290 
291 static void
292 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
293 {
294 	vdev_raidz_map_free(*rm);
295 	raidz_free((*zio)->io_abd, (*zio)->io_size);
296 	umem_free(*zio, sizeof (zio_t));
297 
298 	*zio = NULL;
299 	*rm = NULL;
300 }
301 
302 static int
303 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
304 {
305 	int err = 0;
306 	zio_t *zio_test;
307 	raidz_map_t *rm_test;
308 	const size_t total_ncols = opts->rto_dcols + parity;
309 
310 	if (opts->rm_golden) {
311 		fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
312 	}
313 
314 	opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
315 	zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
316 
317 	opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
318 	opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
319 
320 	opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
321 	zio_test->io_abd = raidz_alloc(opts->rto_dsize);
322 
323 	init_zio_abd(opts->zio_golden);
324 	init_zio_abd(zio_test);
325 
326 	VERIFY0(vdev_raidz_impl_set("original"));
327 
328 	if (opts->rto_expand) {
329 		opts->rm_golden =
330 		    vdev_raidz_map_alloc_expanded(opts->zio_golden,
331 		    opts->rto_ashift, total_ncols+1, total_ncols,
332 		    parity, opts->rto_expand_offset, 0, B_FALSE);
333 		rm_test = vdev_raidz_map_alloc_expanded(zio_test,
334 		    opts->rto_ashift, total_ncols+1, total_ncols,
335 		    parity, opts->rto_expand_offset, 0, B_FALSE);
336 	} else {
337 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
338 		    opts->rto_ashift, total_ncols, parity);
339 		rm_test = vdev_raidz_map_alloc(zio_test,
340 		    opts->rto_ashift, total_ncols, parity);
341 	}
342 
343 	VERIFY(opts->zio_golden);
344 	VERIFY(opts->rm_golden);
345 
346 	vdev_raidz_generate_parity(opts->rm_golden);
347 	vdev_raidz_generate_parity(rm_test);
348 
349 	/* sanity check */
350 	err |= cmp_data(opts, rm_test);
351 	err |= cmp_code(opts, rm_test, parity);
352 
353 	if (err)
354 		ERR("initializing the golden copy ... [FAIL]!\n");
355 
356 	/* tear down raidz_map of test zio */
357 	fini_raidz_map(&zio_test, &rm_test);
358 
359 	return (err);
360 }
361 
362 static raidz_map_t *
363 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
364 {
365 	raidz_map_t *rm = NULL;
366 	const size_t alloc_dsize = opts->rto_dsize;
367 	const size_t total_ncols = opts->rto_dcols + parity;
368 	const int ccols[] = { 0, 1, 2 };
369 
370 	VERIFY(zio);
371 	VERIFY(parity <= 3 && parity >= 1);
372 
373 	*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
374 
375 	(*zio)->io_offset = 0;
376 	(*zio)->io_size = alloc_dsize;
377 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
378 	init_zio_abd(*zio);
379 
380 	if (opts->rto_expand) {
381 		rm = vdev_raidz_map_alloc_expanded(*zio,
382 		    opts->rto_ashift, total_ncols+1, total_ncols,
383 		    parity, opts->rto_expand_offset, 0, B_FALSE);
384 	} else {
385 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
386 		    total_ncols, parity);
387 	}
388 	VERIFY(rm);
389 
390 	/* Make sure code columns are destroyed */
391 	corrupt_colums(rm, ccols, parity);
392 
393 	return (rm);
394 }
395 
396 static int
397 run_gen_check(raidz_test_opts_t *opts)
398 {
399 	char **impl_name;
400 	int fn, err = 0;
401 	zio_t *zio_test;
402 	raidz_map_t *rm_test;
403 
404 	err = init_raidz_golden_map(opts, PARITY_PQR);
405 	if (0 != err)
406 		return (err);
407 
408 	LOG(D_INFO, DBLSEP);
409 	LOG(D_INFO, "Testing parity generation...\n");
410 
411 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
412 	    impl_name++) {
413 
414 		LOG(D_INFO, SEP);
415 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
416 
417 		if (0 != vdev_raidz_impl_set(*impl_name)) {
418 			LOG(D_INFO, "[SKIP]\n");
419 			continue;
420 		} else {
421 			LOG(D_INFO, "[SUPPORTED]\n");
422 		}
423 
424 		for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
425 
426 			/* Check if should stop */
427 			if (rto_opts.rto_should_stop)
428 				return (err);
429 
430 			/* create suitable raidz_map */
431 			rm_test = init_raidz_map(opts, &zio_test, fn+1);
432 			VERIFY(rm_test);
433 
434 			LOG(D_INFO, "\t\tTesting method [%s] ...",
435 			    raidz_gen_name[fn]);
436 
437 			if (!opts->rto_sanity)
438 				vdev_raidz_generate_parity(rm_test);
439 
440 			if (cmp_code(opts, rm_test, fn+1) != 0) {
441 				LOG(D_INFO, "[FAIL]\n");
442 				err++;
443 			} else
444 				LOG(D_INFO, "[PASS]\n");
445 
446 			fini_raidz_map(&zio_test, &rm_test);
447 		}
448 	}
449 
450 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
451 
452 	return (err);
453 }
454 
455 static int
456 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
457 {
458 	int x0, x1, x2;
459 	int tgtidx[3];
460 	int err = 0;
461 	static const int rec_tgts[7][3] = {
462 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
463 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
464 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
465 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
466 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
467 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
468 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
469 	};
470 
471 	memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
472 
473 	if (fn < RAIDZ_REC_PQ) {
474 		/* can reconstruct 1 failed data disk */
475 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
476 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
477 				continue;
478 
479 			/* Check if should stop */
480 			if (rto_opts.rto_should_stop)
481 				return (err);
482 
483 			LOG(D_DEBUG, "[%d] ", x0);
484 
485 			tgtidx[2] = x0 + raidz_parity(rm);
486 
487 			corrupt_colums(rm, tgtidx+2, 1);
488 
489 			if (!opts->rto_sanity)
490 				vdev_raidz_reconstruct(rm, tgtidx, 3);
491 
492 			if (cmp_data(opts, rm) != 0) {
493 				err++;
494 				LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
495 			}
496 		}
497 
498 	} else if (fn < RAIDZ_REC_PQR) {
499 		/* can reconstruct 2 failed data disk */
500 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
501 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
502 				continue;
503 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
504 				if (x1 >= rm->rm_row[0]->rr_cols -
505 				    raidz_parity(rm))
506 					continue;
507 
508 				/* Check if should stop */
509 				if (rto_opts.rto_should_stop)
510 					return (err);
511 
512 				LOG(D_DEBUG, "[%d %d] ", x0, x1);
513 
514 				tgtidx[1] = x0 + raidz_parity(rm);
515 				tgtidx[2] = x1 + raidz_parity(rm);
516 
517 				corrupt_colums(rm, tgtidx+1, 2);
518 
519 				if (!opts->rto_sanity)
520 					vdev_raidz_reconstruct(rm, tgtidx, 3);
521 
522 				if (cmp_data(opts, rm) != 0) {
523 					err++;
524 					LOG(D_DEBUG, "\nREC D[%d %d]... "
525 					    "[FAIL]\n", x0, x1);
526 				}
527 			}
528 		}
529 	} else {
530 		/* can reconstruct 3 failed data disk */
531 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
532 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
533 				continue;
534 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
535 				if (x1 >= rm->rm_row[0]->rr_cols -
536 				    raidz_parity(rm))
537 					continue;
538 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
539 					if (x2 >= rm->rm_row[0]->rr_cols -
540 					    raidz_parity(rm))
541 						continue;
542 
543 					/* Check if should stop */
544 					if (rto_opts.rto_should_stop)
545 						return (err);
546 
547 					LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
548 
549 					tgtidx[0] = x0 + raidz_parity(rm);
550 					tgtidx[1] = x1 + raidz_parity(rm);
551 					tgtidx[2] = x2 + raidz_parity(rm);
552 
553 					corrupt_colums(rm, tgtidx, 3);
554 
555 					if (!opts->rto_sanity)
556 						vdev_raidz_reconstruct(rm,
557 						    tgtidx, 3);
558 
559 					if (cmp_data(opts, rm) != 0) {
560 						err++;
561 						LOG(D_DEBUG,
562 						    "\nREC D[%d %d %d]... "
563 						    "[FAIL]\n", x0, x1, x2);
564 					}
565 				}
566 			}
567 		}
568 	}
569 	return (err);
570 }
571 
572 static int
573 run_rec_check(raidz_test_opts_t *opts)
574 {
575 	char **impl_name;
576 	unsigned fn, err = 0;
577 	zio_t *zio_test;
578 	raidz_map_t *rm_test;
579 
580 	err = init_raidz_golden_map(opts, PARITY_PQR);
581 	if (0 != err)
582 		return (err);
583 
584 	LOG(D_INFO, DBLSEP);
585 	LOG(D_INFO, "Testing data reconstruction...\n");
586 
587 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
588 	    impl_name++) {
589 
590 		LOG(D_INFO, SEP);
591 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
592 
593 		if (vdev_raidz_impl_set(*impl_name) != 0) {
594 			LOG(D_INFO, "[SKIP]\n");
595 			continue;
596 		} else
597 			LOG(D_INFO, "[SUPPORTED]\n");
598 
599 
600 		/* create suitable raidz_map */
601 		rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
602 		/* generate parity */
603 		vdev_raidz_generate_parity(rm_test);
604 
605 		for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
606 
607 			LOG(D_INFO, "\t\tTesting method [%s] ...",
608 			    raidz_rec_name[fn]);
609 
610 			if (run_rec_check_impl(opts, rm_test, fn) != 0) {
611 				LOG(D_INFO, "[FAIL]\n");
612 				err++;
613 
614 			} else
615 				LOG(D_INFO, "[PASS]\n");
616 
617 		}
618 		/* tear down test raidz_map */
619 		fini_raidz_map(&zio_test, &rm_test);
620 	}
621 
622 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
623 
624 	return (err);
625 }
626 
627 static int
628 run_test(raidz_test_opts_t *opts)
629 {
630 	int err = 0;
631 
632 	if (opts == NULL)
633 		opts = &rto_opts;
634 
635 	print_opts(opts, B_FALSE);
636 
637 	err |= run_gen_check(opts);
638 	err |= run_rec_check(opts);
639 
640 	return (err);
641 }
642 
643 #define	SWEEP_RUNNING	0
644 #define	SWEEP_FINISHED	1
645 #define	SWEEP_ERROR	2
646 #define	SWEEP_TIMEOUT	3
647 
648 static int sweep_state = 0;
649 static raidz_test_opts_t failed_opts;
650 
651 static kmutex_t sem_mtx;
652 static kcondvar_t sem_cv;
653 static int max_free_slots;
654 static int free_slots;
655 
656 static __attribute__((noreturn)) void
657 sweep_thread(void *arg)
658 {
659 	int err = 0;
660 	raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
661 	VERIFY(opts != NULL);
662 
663 	err = run_test(opts);
664 
665 	if (rto_opts.rto_sanity) {
666 		/* 25% chance that a sweep test fails */
667 		if (rand() < (RAND_MAX/4))
668 			err = 1;
669 	}
670 
671 	if (0 != err) {
672 		mutex_enter(&sem_mtx);
673 		memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
674 		sweep_state = SWEEP_ERROR;
675 		mutex_exit(&sem_mtx);
676 	}
677 
678 	umem_free(opts, sizeof (raidz_test_opts_t));
679 
680 	/* signal the next thread */
681 	mutex_enter(&sem_mtx);
682 	free_slots++;
683 	cv_signal(&sem_cv);
684 	mutex_exit(&sem_mtx);
685 
686 	thread_exit();
687 }
688 
689 static int
690 run_sweep(void)
691 {
692 	static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
693 	static const size_t ashift_v[] = { 9, 12, 14 };
694 	static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
695 		1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
696 
697 	(void) setvbuf(stdout, NULL, _IONBF, 0);
698 
699 	ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
700 	    ARRAY_SIZE(dcols_v);
701 	ulong_t tried_comb = 0;
702 	hrtime_t time_diff, start_time = gethrtime();
703 	raidz_test_opts_t *opts;
704 	int a, d, s;
705 
706 	max_free_slots = free_slots = MAX(2, boot_ncpus);
707 
708 	mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
709 	cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
710 
711 	for (s = 0; s < ARRAY_SIZE(size_v); s++)
712 	for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
713 	for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
714 
715 		if (size_v[s] < (1 << ashift_v[a])) {
716 			total_comb--;
717 			continue;
718 		}
719 
720 		if (++tried_comb % 20 == 0)
721 			LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
722 
723 		/* wait for signal to start new thread */
724 		mutex_enter(&sem_mtx);
725 		while (cv_timedwait_sig(&sem_cv, &sem_mtx,
726 		    ddi_get_lbolt() + hz)) {
727 
728 			/* check if should stop the test (timeout) */
729 			time_diff = (gethrtime() - start_time) / NANOSEC;
730 			if (rto_opts.rto_sweep_timeout > 0 &&
731 			    time_diff >= rto_opts.rto_sweep_timeout) {
732 				sweep_state = SWEEP_TIMEOUT;
733 				rto_opts.rto_should_stop = B_TRUE;
734 				mutex_exit(&sem_mtx);
735 				goto exit;
736 			}
737 
738 			/* check if should stop the test (error) */
739 			if (sweep_state != SWEEP_RUNNING) {
740 				mutex_exit(&sem_mtx);
741 				goto exit;
742 			}
743 
744 			/* exit loop if a slot is available */
745 			if (free_slots > 0) {
746 				break;
747 			}
748 		}
749 
750 		free_slots--;
751 		mutex_exit(&sem_mtx);
752 
753 		opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
754 		opts->rto_ashift = ashift_v[a];
755 		opts->rto_dcols = dcols_v[d];
756 		opts->rto_offset = (1ULL << ashift_v[a]) * rand();
757 		opts->rto_dsize = size_v[s];
758 		opts->rto_expand = rto_opts.rto_expand;
759 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
760 		opts->rto_v = 0; /* be quiet */
761 
762 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
763 		    0, NULL, TS_RUN, defclsyspri), !=, NULL);
764 	}
765 
766 exit:
767 	LOG(D_ALL, "\nWaiting for test threads to finish...\n");
768 	mutex_enter(&sem_mtx);
769 	VERIFY(free_slots <= max_free_slots);
770 	while (free_slots < max_free_slots) {
771 		(void) cv_wait(&sem_cv, &sem_mtx);
772 	}
773 	mutex_exit(&sem_mtx);
774 
775 	if (sweep_state == SWEEP_ERROR) {
776 		ERR("Sweep test failed! Failed option: \n");
777 		print_opts(&failed_opts, B_TRUE);
778 	} else {
779 		if (sweep_state == SWEEP_TIMEOUT)
780 			LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
781 			    (ulong_t)rto_opts.rto_sweep_timeout);
782 
783 		LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
784 		    (ulong_t)tried_comb);
785 	}
786 
787 	mutex_destroy(&sem_mtx);
788 
789 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
790 }
791 
792 
793 int
794 main(int argc, char **argv)
795 {
796 	size_t i;
797 	struct sigaction action;
798 	int err = 0;
799 
800 	/* init gdb pid string early */
801 	(void) sprintf(pid_s, "%d", getpid());
802 
803 	action.sa_handler = sig_handler;
804 	sigemptyset(&action.sa_mask);
805 	action.sa_flags = 0;
806 
807 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
808 		ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
809 		exit(EXIT_FAILURE);
810 	}
811 
812 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
813 
814 	dprintf_setup(&argc, argv);
815 
816 	process_options(argc, argv);
817 
818 	kernel_init(SPA_MODE_READ);
819 
820 	/* setup random data because rand() is not reentrant */
821 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
822 	srand((unsigned)time(NULL) * getpid());
823 	for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
824 		rand_data[i] = rand();
825 
826 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
827 
828 	if (rto_opts.rto_benchmark) {
829 		run_raidz_benchmark();
830 	} else if (rto_opts.rto_sweep) {
831 		err = run_sweep();
832 	} else {
833 		err = run_test(NULL);
834 	}
835 
836 	umem_free(rand_data, SPA_MAXBLOCKSIZE);
837 	kernel_fini();
838 
839 	return (err);
840 }
841