xref: /freebsd/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/time.h>
29 #include <sys/wait.h>
30 #include <sys/zio.h>
31 #include <umem.h>
32 #include <sys/vdev_raidz.h>
33 #include <sys/vdev_raidz_impl.h>
34 #include <assert.h>
35 #include <stdio.h>
36 #include "raidz_test.h"
37 
38 static int *rand_data;
39 raidz_test_opts_t rto_opts;
40 
41 static char pid_s[16];
42 
43 static void sig_handler(int signo)
44 {
45 	int old_errno = errno;
46 	struct sigaction action;
47 	/*
48 	 * Restore default action and re-raise signal so SIGSEGV and
49 	 * SIGABRT can trigger a core dump.
50 	 */
51 	action.sa_handler = SIG_DFL;
52 	sigemptyset(&action.sa_mask);
53 	action.sa_flags = 0;
54 	(void) sigaction(signo, &action, NULL);
55 
56 	if (rto_opts.rto_gdb) {
57 		pid_t pid = fork();
58 		if (pid == 0) {
59 			execlp("gdb", "gdb", "-ex", "set pagination 0",
60 			    "-p", pid_s, NULL);
61 			_exit(-1);
62 		} else if (pid > 0)
63 			while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
64 				;
65 	}
66 
67 	raise(signo);
68 	errno = old_errno;
69 }
70 
71 static void print_opts(raidz_test_opts_t *opts, boolean_t force)
72 {
73 	const char *verbose;
74 	switch (opts->rto_v) {
75 		case D_ALL:
76 			verbose = "no";
77 			break;
78 		case D_INFO:
79 			verbose = "info";
80 			break;
81 		case D_DEBUG:
82 		default:
83 			verbose = "debug";
84 			break;
85 	}
86 
87 	if (force || opts->rto_v >= D_INFO) {
88 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
89 		    "  (-a) zio ashift                   : %zu\n"
90 		    "  (-o) zio offset                   : 1 << %zu\n"
91 		    "  (-e) expanded map                 : %s\n"
92 		    "  (-r) reflow offset                : %llx\n"
93 		    "  (-d) number of raidz data columns : %zu\n"
94 		    "  (-s) size of DATA                 : 1 << %zu\n"
95 		    "  (-S) sweep parameters             : %s \n"
96 		    "  (-v) verbose                      : %s \n\n",
97 		    opts->rto_ashift,				/* -a */
98 		    ilog2(opts->rto_offset),			/* -o */
99 		    opts->rto_expand ? "yes" : "no",		/* -e */
100 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
101 		    opts->rto_dcols,				/* -d */
102 		    ilog2(opts->rto_dsize),			/* -s */
103 		    opts->rto_sweep ? "yes" : "no",		/* -S */
104 		    verbose);					/* -v */
105 	}
106 }
107 
108 static void usage(boolean_t requested)
109 {
110 	const raidz_test_opts_t *o = &rto_opts_defaults;
111 
112 	FILE *fp = requested ? stdout : stderr;
113 
114 	(void) fprintf(fp, "Usage:\n"
115 	    "\t[-a zio ashift (default: %zu)]\n"
116 	    "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
117 	    "\t[-d number of raidz data columns (default: %zu)]\n"
118 	    "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
119 	    "\t[-S parameter sweep (default: %s)]\n"
120 	    "\t[-t timeout for parameter sweep test]\n"
121 	    "\t[-B benchmark all raidz implementations]\n"
122 	    "\t[-e use expanded raidz map (default: %s)]\n"
123 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
124 	    "\t[-v increase verbosity (default: %d)]\n"
125 	    "\t[-h (print help)]\n"
126 	    "\t[-T test the test, see if failure would be detected]\n"
127 	    "\t[-D debug (attach gdb on SIGSEGV)]\n"
128 	    "",
129 	    o->rto_ashift,				/* -a */
130 	    ilog2(o->rto_offset),			/* -o */
131 	    o->rto_dcols,				/* -d */
132 	    ilog2(o->rto_dsize),			/* -s */
133 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
134 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
135 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
136 	    o->rto_v);					/* -v */
137 
138 	exit(requested ? 0 : 1);
139 }
140 
141 static void process_options(int argc, char **argv)
142 {
143 	size_t value;
144 	int opt;
145 	raidz_test_opts_t *o = &rto_opts;
146 
147 	memcpy(o, &rto_opts_defaults, sizeof (*o));
148 
149 	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
150 		switch (opt) {
151 		case 'a':
152 			value = strtoull(optarg, NULL, 0);
153 			o->rto_ashift = MIN(13, MAX(9, value));
154 			break;
155 		case 'e':
156 			o->rto_expand = 1;
157 			break;
158 		case 'r':
159 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
160 			break;
161 		case 'o':
162 			value = strtoull(optarg, NULL, 0);
163 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
164 			break;
165 		case 'd':
166 			value = strtoull(optarg, NULL, 0);
167 			o->rto_dcols = MIN(255, MAX(1, value));
168 			break;
169 		case 's':
170 			value = strtoull(optarg, NULL, 0);
171 			o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
172 			    MAX(SPA_MINBLOCKSHIFT, value));
173 			break;
174 		case 't':
175 			value = strtoull(optarg, NULL, 0);
176 			o->rto_sweep_timeout = value;
177 			break;
178 		case 'v':
179 			o->rto_v++;
180 			break;
181 		case 'S':
182 			o->rto_sweep = 1;
183 			break;
184 		case 'B':
185 			o->rto_benchmark = 1;
186 			break;
187 		case 'D':
188 			o->rto_gdb = 1;
189 			break;
190 		case 'T':
191 			o->rto_sanity = 1;
192 			break;
193 		case 'h':
194 			usage(B_TRUE);
195 			break;
196 		case '?':
197 		default:
198 			usage(B_FALSE);
199 			break;
200 		}
201 	}
202 }
203 
204 #define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
205 #define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
206 
207 #define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
208 #define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
209 
210 static int
211 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
212 {
213 	int r, i, ret = 0;
214 
215 	VERIFY(parity >= 1 && parity <= 3);
216 
217 	for (r = 0; r < rm->rm_nrows; r++) {
218 		raidz_row_t * const rr = rm->rm_row[r];
219 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
220 		for (i = 0; i < parity; i++) {
221 			if (CODE_COL_SIZE(rrg, i) == 0) {
222 				VERIFY0(CODE_COL_SIZE(rr, i));
223 				continue;
224 			}
225 
226 			if (abd_cmp(CODE_COL(rr, i),
227 			    CODE_COL(rrg, i)) != 0) {
228 				ret++;
229 				LOG_OPT(D_DEBUG, opts,
230 				    "\nParity block [%d] different!\n", i);
231 			}
232 		}
233 	}
234 	return (ret);
235 }
236 
237 static int
238 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
239 {
240 	int r, i, dcols, ret = 0;
241 
242 	for (r = 0; r < rm->rm_nrows; r++) {
243 		raidz_row_t *rr = rm->rm_row[r];
244 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
245 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
246 		    raidz_parity(opts->rm_golden);
247 		for (i = 0; i < dcols; i++) {
248 			if (DATA_COL_SIZE(rrg, i) == 0) {
249 				VERIFY0(DATA_COL_SIZE(rr, i));
250 				continue;
251 			}
252 
253 			if (abd_cmp(DATA_COL(rrg, i),
254 			    DATA_COL(rr, i)) != 0) {
255 				ret++;
256 
257 				LOG_OPT(D_DEBUG, opts,
258 				    "\nData block [%d] different!\n", i);
259 			}
260 		}
261 	}
262 	return (ret);
263 }
264 
265 static int
266 init_rand(void *data, size_t size, void *private)
267 {
268 	(void) private;
269 	memcpy(data, rand_data, size);
270 	return (0);
271 }
272 
273 static void
274 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
275 {
276 	for (int r = 0; r < rm->rm_nrows; r++) {
277 		raidz_row_t *rr = rm->rm_row[r];
278 		for (int i = 0; i < cnt; i++) {
279 			raidz_col_t *col = &rr->rr_col[tgts[i]];
280 			abd_iterate_func(col->rc_abd, 0, col->rc_size,
281 			    init_rand, NULL);
282 		}
283 	}
284 }
285 
286 void
287 init_zio_abd(zio_t *zio)
288 {
289 	abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
290 }
291 
292 static void
293 fini_raidz_map(zio_t **zio, raidz_map_t **rm)
294 {
295 	vdev_raidz_map_free(*rm);
296 	raidz_free((*zio)->io_abd, (*zio)->io_size);
297 	umem_free(*zio, sizeof (zio_t));
298 
299 	*zio = NULL;
300 	*rm = NULL;
301 }
302 
303 static int
304 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
305 {
306 	int err = 0;
307 	zio_t *zio_test;
308 	raidz_map_t *rm_test;
309 	const size_t total_ncols = opts->rto_dcols + parity;
310 
311 	if (opts->rm_golden) {
312 		fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
313 	}
314 
315 	opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
316 	zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
317 
318 	opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
319 	opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
320 
321 	opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
322 	zio_test->io_abd = raidz_alloc(opts->rto_dsize);
323 
324 	init_zio_abd(opts->zio_golden);
325 	init_zio_abd(zio_test);
326 
327 	VERIFY0(vdev_raidz_impl_set("original"));
328 
329 	if (opts->rto_expand) {
330 		opts->rm_golden =
331 		    vdev_raidz_map_alloc_expanded(opts->zio_golden,
332 		    opts->rto_ashift, total_ncols+1, total_ncols,
333 		    parity, opts->rto_expand_offset, 0, B_FALSE);
334 		rm_test = vdev_raidz_map_alloc_expanded(zio_test,
335 		    opts->rto_ashift, total_ncols+1, total_ncols,
336 		    parity, opts->rto_expand_offset, 0, B_FALSE);
337 	} else {
338 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
339 		    opts->rto_ashift, total_ncols, parity);
340 		rm_test = vdev_raidz_map_alloc(zio_test,
341 		    opts->rto_ashift, total_ncols, parity);
342 	}
343 
344 	VERIFY(opts->zio_golden);
345 	VERIFY(opts->rm_golden);
346 
347 	vdev_raidz_generate_parity(opts->rm_golden);
348 	vdev_raidz_generate_parity(rm_test);
349 
350 	/* sanity check */
351 	err |= cmp_data(opts, rm_test);
352 	err |= cmp_code(opts, rm_test, parity);
353 
354 	if (err)
355 		ERR("initializing the golden copy ... [FAIL]!\n");
356 
357 	/* tear down raidz_map of test zio */
358 	fini_raidz_map(&zio_test, &rm_test);
359 
360 	return (err);
361 }
362 
363 static raidz_map_t *
364 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
365 {
366 	raidz_map_t *rm = NULL;
367 	const size_t alloc_dsize = opts->rto_dsize;
368 	const size_t total_ncols = opts->rto_dcols + parity;
369 	const int ccols[] = { 0, 1, 2 };
370 
371 	VERIFY(zio);
372 	VERIFY(parity <= 3 && parity >= 1);
373 
374 	*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
375 
376 	(*zio)->io_offset = 0;
377 	(*zio)->io_size = alloc_dsize;
378 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
379 	init_zio_abd(*zio);
380 
381 	if (opts->rto_expand) {
382 		rm = vdev_raidz_map_alloc_expanded(*zio,
383 		    opts->rto_ashift, total_ncols+1, total_ncols,
384 		    parity, opts->rto_expand_offset, 0, B_FALSE);
385 	} else {
386 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
387 		    total_ncols, parity);
388 	}
389 	VERIFY(rm);
390 
391 	/* Make sure code columns are destroyed */
392 	corrupt_colums(rm, ccols, parity);
393 
394 	return (rm);
395 }
396 
397 static int
398 run_gen_check(raidz_test_opts_t *opts)
399 {
400 	char **impl_name;
401 	int fn, err = 0;
402 	zio_t *zio_test;
403 	raidz_map_t *rm_test;
404 
405 	err = init_raidz_golden_map(opts, PARITY_PQR);
406 	if (0 != err)
407 		return (err);
408 
409 	LOG(D_INFO, DBLSEP);
410 	LOG(D_INFO, "Testing parity generation...\n");
411 
412 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
413 	    impl_name++) {
414 
415 		LOG(D_INFO, SEP);
416 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
417 
418 		if (0 != vdev_raidz_impl_set(*impl_name)) {
419 			LOG(D_INFO, "[SKIP]\n");
420 			continue;
421 		} else {
422 			LOG(D_INFO, "[SUPPORTED]\n");
423 		}
424 
425 		for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
426 
427 			/* Check if should stop */
428 			if (rto_opts.rto_should_stop)
429 				return (err);
430 
431 			/* create suitable raidz_map */
432 			rm_test = init_raidz_map(opts, &zio_test, fn+1);
433 			VERIFY(rm_test);
434 
435 			LOG(D_INFO, "\t\tTesting method [%s] ...",
436 			    raidz_gen_name[fn]);
437 
438 			if (!opts->rto_sanity)
439 				vdev_raidz_generate_parity(rm_test);
440 
441 			if (cmp_code(opts, rm_test, fn+1) != 0) {
442 				LOG(D_INFO, "[FAIL]\n");
443 				err++;
444 			} else
445 				LOG(D_INFO, "[PASS]\n");
446 
447 			fini_raidz_map(&zio_test, &rm_test);
448 		}
449 	}
450 
451 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
452 
453 	return (err);
454 }
455 
456 static int
457 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
458 {
459 	int x0, x1, x2;
460 	int tgtidx[3];
461 	int err = 0;
462 	static const int rec_tgts[7][3] = {
463 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
464 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
465 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
466 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
467 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
468 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
469 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
470 	};
471 
472 	memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
473 
474 	if (fn < RAIDZ_REC_PQ) {
475 		/* can reconstruct 1 failed data disk */
476 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
477 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
478 				continue;
479 
480 			/* Check if should stop */
481 			if (rto_opts.rto_should_stop)
482 				return (err);
483 
484 			LOG(D_DEBUG, "[%d] ", x0);
485 
486 			tgtidx[2] = x0 + raidz_parity(rm);
487 
488 			corrupt_colums(rm, tgtidx+2, 1);
489 
490 			if (!opts->rto_sanity)
491 				vdev_raidz_reconstruct(rm, tgtidx, 3);
492 
493 			if (cmp_data(opts, rm) != 0) {
494 				err++;
495 				LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
496 			}
497 		}
498 
499 	} else if (fn < RAIDZ_REC_PQR) {
500 		/* can reconstruct 2 failed data disk */
501 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
502 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
503 				continue;
504 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
505 				if (x1 >= rm->rm_row[0]->rr_cols -
506 				    raidz_parity(rm))
507 					continue;
508 
509 				/* Check if should stop */
510 				if (rto_opts.rto_should_stop)
511 					return (err);
512 
513 				LOG(D_DEBUG, "[%d %d] ", x0, x1);
514 
515 				tgtidx[1] = x0 + raidz_parity(rm);
516 				tgtidx[2] = x1 + raidz_parity(rm);
517 
518 				corrupt_colums(rm, tgtidx+1, 2);
519 
520 				if (!opts->rto_sanity)
521 					vdev_raidz_reconstruct(rm, tgtidx, 3);
522 
523 				if (cmp_data(opts, rm) != 0) {
524 					err++;
525 					LOG(D_DEBUG, "\nREC D[%d %d]... "
526 					    "[FAIL]\n", x0, x1);
527 				}
528 			}
529 		}
530 	} else {
531 		/* can reconstruct 3 failed data disk */
532 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
533 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
534 				continue;
535 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
536 				if (x1 >= rm->rm_row[0]->rr_cols -
537 				    raidz_parity(rm))
538 					continue;
539 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
540 					if (x2 >= rm->rm_row[0]->rr_cols -
541 					    raidz_parity(rm))
542 						continue;
543 
544 					/* Check if should stop */
545 					if (rto_opts.rto_should_stop)
546 						return (err);
547 
548 					LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
549 
550 					tgtidx[0] = x0 + raidz_parity(rm);
551 					tgtidx[1] = x1 + raidz_parity(rm);
552 					tgtidx[2] = x2 + raidz_parity(rm);
553 
554 					corrupt_colums(rm, tgtidx, 3);
555 
556 					if (!opts->rto_sanity)
557 						vdev_raidz_reconstruct(rm,
558 						    tgtidx, 3);
559 
560 					if (cmp_data(opts, rm) != 0) {
561 						err++;
562 						LOG(D_DEBUG,
563 						    "\nREC D[%d %d %d]... "
564 						    "[FAIL]\n", x0, x1, x2);
565 					}
566 				}
567 			}
568 		}
569 	}
570 	return (err);
571 }
572 
573 static int
574 run_rec_check(raidz_test_opts_t *opts)
575 {
576 	char **impl_name;
577 	unsigned fn, err = 0;
578 	zio_t *zio_test;
579 	raidz_map_t *rm_test;
580 
581 	err = init_raidz_golden_map(opts, PARITY_PQR);
582 	if (0 != err)
583 		return (err);
584 
585 	LOG(D_INFO, DBLSEP);
586 	LOG(D_INFO, "Testing data reconstruction...\n");
587 
588 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
589 	    impl_name++) {
590 
591 		LOG(D_INFO, SEP);
592 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
593 
594 		if (vdev_raidz_impl_set(*impl_name) != 0) {
595 			LOG(D_INFO, "[SKIP]\n");
596 			continue;
597 		} else
598 			LOG(D_INFO, "[SUPPORTED]\n");
599 
600 
601 		/* create suitable raidz_map */
602 		rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
603 		/* generate parity */
604 		vdev_raidz_generate_parity(rm_test);
605 
606 		for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
607 
608 			LOG(D_INFO, "\t\tTesting method [%s] ...",
609 			    raidz_rec_name[fn]);
610 
611 			if (run_rec_check_impl(opts, rm_test, fn) != 0) {
612 				LOG(D_INFO, "[FAIL]\n");
613 				err++;
614 
615 			} else
616 				LOG(D_INFO, "[PASS]\n");
617 
618 		}
619 		/* tear down test raidz_map */
620 		fini_raidz_map(&zio_test, &rm_test);
621 	}
622 
623 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
624 
625 	return (err);
626 }
627 
628 static int
629 run_test(raidz_test_opts_t *opts)
630 {
631 	int err = 0;
632 
633 	if (opts == NULL)
634 		opts = &rto_opts;
635 
636 	print_opts(opts, B_FALSE);
637 
638 	err |= run_gen_check(opts);
639 	err |= run_rec_check(opts);
640 
641 	return (err);
642 }
643 
644 #define	SWEEP_RUNNING	0
645 #define	SWEEP_FINISHED	1
646 #define	SWEEP_ERROR	2
647 #define	SWEEP_TIMEOUT	3
648 
649 static int sweep_state = 0;
650 static raidz_test_opts_t failed_opts;
651 
652 static kmutex_t sem_mtx;
653 static kcondvar_t sem_cv;
654 static int max_free_slots;
655 static int free_slots;
656 
657 static __attribute__((noreturn)) void
658 sweep_thread(void *arg)
659 {
660 	int err = 0;
661 	raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
662 	VERIFY(opts != NULL);
663 
664 	err = run_test(opts);
665 
666 	if (rto_opts.rto_sanity) {
667 		/* 25% chance that a sweep test fails */
668 		if (rand() < (RAND_MAX/4))
669 			err = 1;
670 	}
671 
672 	if (0 != err) {
673 		mutex_enter(&sem_mtx);
674 		memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
675 		sweep_state = SWEEP_ERROR;
676 		mutex_exit(&sem_mtx);
677 	}
678 
679 	umem_free(opts, sizeof (raidz_test_opts_t));
680 
681 	/* signal the next thread */
682 	mutex_enter(&sem_mtx);
683 	free_slots++;
684 	cv_signal(&sem_cv);
685 	mutex_exit(&sem_mtx);
686 
687 	thread_exit();
688 }
689 
690 static int
691 run_sweep(void)
692 {
693 	static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
694 	static const size_t ashift_v[] = { 9, 12, 14 };
695 	static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
696 		1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
697 
698 	(void) setvbuf(stdout, NULL, _IONBF, 0);
699 
700 	ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
701 	    ARRAY_SIZE(dcols_v);
702 	ulong_t tried_comb = 0;
703 	hrtime_t time_diff, start_time = gethrtime();
704 	raidz_test_opts_t *opts;
705 	int a, d, s;
706 
707 	max_free_slots = free_slots = MAX(2, boot_ncpus);
708 
709 	mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
710 	cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
711 
712 	for (s = 0; s < ARRAY_SIZE(size_v); s++)
713 	for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
714 	for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
715 
716 		if (size_v[s] < (1 << ashift_v[a])) {
717 			total_comb--;
718 			continue;
719 		}
720 
721 		if (++tried_comb % 20 == 0)
722 			LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
723 
724 		/* wait for signal to start new thread */
725 		mutex_enter(&sem_mtx);
726 		while (cv_timedwait_sig(&sem_cv, &sem_mtx,
727 		    ddi_get_lbolt() + hz)) {
728 
729 			/* check if should stop the test (timeout) */
730 			time_diff = (gethrtime() - start_time) / NANOSEC;
731 			if (rto_opts.rto_sweep_timeout > 0 &&
732 			    time_diff >= rto_opts.rto_sweep_timeout) {
733 				sweep_state = SWEEP_TIMEOUT;
734 				rto_opts.rto_should_stop = B_TRUE;
735 				mutex_exit(&sem_mtx);
736 				goto exit;
737 			}
738 
739 			/* check if should stop the test (error) */
740 			if (sweep_state != SWEEP_RUNNING) {
741 				mutex_exit(&sem_mtx);
742 				goto exit;
743 			}
744 
745 			/* exit loop if a slot is available */
746 			if (free_slots > 0) {
747 				break;
748 			}
749 		}
750 
751 		free_slots--;
752 		mutex_exit(&sem_mtx);
753 
754 		opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
755 		opts->rto_ashift = ashift_v[a];
756 		opts->rto_dcols = dcols_v[d];
757 		opts->rto_offset = (1ULL << ashift_v[a]) * rand();
758 		opts->rto_dsize = size_v[s];
759 		opts->rto_expand = rto_opts.rto_expand;
760 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
761 		opts->rto_v = 0; /* be quiet */
762 
763 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
764 		    0, NULL, TS_RUN, defclsyspri), !=, NULL);
765 	}
766 
767 exit:
768 	LOG(D_ALL, "\nWaiting for test threads to finish...\n");
769 	mutex_enter(&sem_mtx);
770 	VERIFY(free_slots <= max_free_slots);
771 	while (free_slots < max_free_slots) {
772 		(void) cv_wait(&sem_cv, &sem_mtx);
773 	}
774 	mutex_exit(&sem_mtx);
775 
776 	if (sweep_state == SWEEP_ERROR) {
777 		ERR("Sweep test failed! Failed option: \n");
778 		print_opts(&failed_opts, B_TRUE);
779 	} else {
780 		if (sweep_state == SWEEP_TIMEOUT)
781 			LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
782 			    (ulong_t)rto_opts.rto_sweep_timeout);
783 
784 		LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
785 		    (ulong_t)tried_comb);
786 	}
787 
788 	mutex_destroy(&sem_mtx);
789 
790 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
791 }
792 
793 
794 int
795 main(int argc, char **argv)
796 {
797 	size_t i;
798 	struct sigaction action;
799 	int err = 0;
800 
801 	/* init gdb pid string early */
802 	(void) sprintf(pid_s, "%d", getpid());
803 
804 	action.sa_handler = sig_handler;
805 	sigemptyset(&action.sa_mask);
806 	action.sa_flags = 0;
807 
808 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
809 		ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
810 		exit(EXIT_FAILURE);
811 	}
812 
813 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
814 
815 	dprintf_setup(&argc, argv);
816 
817 	process_options(argc, argv);
818 
819 	kernel_init(SPA_MODE_READ);
820 
821 	/* setup random data because rand() is not reentrant */
822 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
823 	srand((unsigned)time(NULL) * getpid());
824 	for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
825 		rand_data[i] = rand();
826 
827 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
828 
829 	if (rto_opts.rto_benchmark) {
830 		run_raidz_benchmark();
831 	} else if (rto_opts.rto_sweep) {
832 		err = run_sweep();
833 	} else {
834 		err = run_test(NULL);
835 	}
836 
837 	umem_free(rand_data, SPA_MAXBLOCKSIZE);
838 	kernel_fini();
839 
840 	return (err);
841 }
842