1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/time.h> 28 #include <sys/wait.h> 29 #include <sys/zio.h> 30 #include <umem.h> 31 #include <sys/vdev_raidz.h> 32 #include <sys/vdev_raidz_impl.h> 33 #include <assert.h> 34 #include <stdio.h> 35 #include "raidz_test.h" 36 37 static int *rand_data; 38 raidz_test_opts_t rto_opts; 39 40 static char gdb[256]; 41 static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d"; 42 43 static void sig_handler(int signo) 44 { 45 struct sigaction action; 46 /* 47 * Restore default action and re-raise signal so SIGSEGV and 48 * SIGABRT can trigger a core dump. 49 */ 50 action.sa_handler = SIG_DFL; 51 sigemptyset(&action.sa_mask); 52 action.sa_flags = 0; 53 (void) sigaction(signo, &action, NULL); 54 55 if (rto_opts.rto_gdb) 56 if (system(gdb)) { } 57 58 raise(signo); 59 } 60 61 static void print_opts(raidz_test_opts_t *opts, boolean_t force) 62 { 63 char *verbose; 64 switch (opts->rto_v) { 65 case 0: 66 verbose = "no"; 67 break; 68 case 1: 69 verbose = "info"; 70 break; 71 default: 72 verbose = "debug"; 73 break; 74 } 75 76 if (force || opts->rto_v >= D_INFO) { 77 (void) fprintf(stdout, DBLSEP "Running with options:\n" 78 " (-a) zio ashift : %zu\n" 79 " (-o) zio offset : 1 << %zu\n" 80 " (-e) expanded map : %s\n" 81 " (-r) reflow offset : %llx\n" 82 " (-d) number of raidz data columns : %zu\n" 83 " (-s) size of DATA : 1 << %zu\n" 84 " (-S) sweep parameters : %s \n" 85 " (-v) verbose : %s \n\n", 86 opts->rto_ashift, /* -a */ 87 ilog2(opts->rto_offset), /* -o */ 88 opts->rto_expand ? "yes" : "no", /* -e */ 89 (u_longlong_t)opts->rto_expand_offset, /* -r */ 90 opts->rto_dcols, /* -d */ 91 ilog2(opts->rto_dsize), /* -s */ 92 opts->rto_sweep ? "yes" : "no", /* -S */ 93 verbose); /* -v */ 94 } 95 } 96 97 static void usage(boolean_t requested) 98 { 99 const raidz_test_opts_t *o = &rto_opts_defaults; 100 101 FILE *fp = requested ? stdout : stderr; 102 103 (void) fprintf(fp, "Usage:\n" 104 "\t[-a zio ashift (default: %zu)]\n" 105 "\t[-o zio offset, exponent radix 2 (default: %zu)]\n" 106 "\t[-d number of raidz data columns (default: %zu)]\n" 107 "\t[-s zio size, exponent radix 2 (default: %zu)]\n" 108 "\t[-S parameter sweep (default: %s)]\n" 109 "\t[-t timeout for parameter sweep test]\n" 110 "\t[-B benchmark all raidz implementations]\n" 111 "\t[-e use expanded raidz map (default: %s)]\n" 112 "\t[-r expanded raidz map reflow offset (default: %llx)]\n" 113 "\t[-v increase verbosity (default: %zu)]\n" 114 "\t[-h (print help)]\n" 115 "\t[-T test the test, see if failure would be detected]\n" 116 "\t[-D debug (attach gdb on SIGSEGV)]\n" 117 "", 118 o->rto_ashift, /* -a */ 119 ilog2(o->rto_offset), /* -o */ 120 o->rto_dcols, /* -d */ 121 ilog2(o->rto_dsize), /* -s */ 122 rto_opts.rto_sweep ? "yes" : "no", /* -S */ 123 rto_opts.rto_expand ? "yes" : "no", /* -e */ 124 (u_longlong_t)o->rto_expand_offset, /* -r */ 125 o->rto_v); /* -d */ 126 127 exit(requested ? 0 : 1); 128 } 129 130 static void process_options(int argc, char **argv) 131 { 132 size_t value; 133 int opt; 134 135 raidz_test_opts_t *o = &rto_opts; 136 137 bcopy(&rto_opts_defaults, o, sizeof (*o)); 138 139 while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) { 140 value = 0; 141 142 switch (opt) { 143 case 'a': 144 value = strtoull(optarg, NULL, 0); 145 o->rto_ashift = MIN(13, MAX(9, value)); 146 break; 147 case 'e': 148 o->rto_expand = 1; 149 break; 150 case 'r': 151 o->rto_expand_offset = strtoull(optarg, NULL, 0); 152 break; 153 case 'o': 154 value = strtoull(optarg, NULL, 0); 155 o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9; 156 break; 157 case 'd': 158 value = strtoull(optarg, NULL, 0); 159 o->rto_dcols = MIN(255, MAX(1, value)); 160 break; 161 case 's': 162 value = strtoull(optarg, NULL, 0); 163 o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT, 164 MAX(SPA_MINBLOCKSHIFT, value)); 165 break; 166 case 't': 167 value = strtoull(optarg, NULL, 0); 168 o->rto_sweep_timeout = value; 169 break; 170 case 'v': 171 o->rto_v++; 172 break; 173 case 'S': 174 o->rto_sweep = 1; 175 break; 176 case 'B': 177 o->rto_benchmark = 1; 178 break; 179 case 'D': 180 o->rto_gdb = 1; 181 break; 182 case 'T': 183 o->rto_sanity = 1; 184 break; 185 case 'h': 186 usage(B_TRUE); 187 break; 188 case '?': 189 default: 190 usage(B_FALSE); 191 break; 192 } 193 } 194 } 195 196 #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd) 197 #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size) 198 199 #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd) 200 #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size) 201 202 static int 203 cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity) 204 { 205 int r, i, ret = 0; 206 207 VERIFY(parity >= 1 && parity <= 3); 208 209 for (r = 0; r < rm->rm_nrows; r++) { 210 raidz_row_t * const rr = rm->rm_row[r]; 211 raidz_row_t * const rrg = opts->rm_golden->rm_row[r]; 212 for (i = 0; i < parity; i++) { 213 if (CODE_COL_SIZE(rrg, i) == 0) { 214 VERIFY0(CODE_COL_SIZE(rr, i)); 215 continue; 216 } 217 218 if (abd_cmp(CODE_COL(rr, i), 219 CODE_COL(rrg, i)) != 0) { 220 ret++; 221 LOG_OPT(D_DEBUG, opts, 222 "\nParity block [%d] different!\n", i); 223 } 224 } 225 } 226 return (ret); 227 } 228 229 static int 230 cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm) 231 { 232 int r, i, dcols, ret = 0; 233 234 for (r = 0; r < rm->rm_nrows; r++) { 235 raidz_row_t *rr = rm->rm_row[r]; 236 raidz_row_t *rrg = opts->rm_golden->rm_row[r]; 237 dcols = opts->rm_golden->rm_row[0]->rr_cols - 238 raidz_parity(opts->rm_golden); 239 for (i = 0; i < dcols; i++) { 240 if (DATA_COL_SIZE(rrg, i) == 0) { 241 VERIFY0(DATA_COL_SIZE(rr, i)); 242 continue; 243 } 244 245 if (abd_cmp(DATA_COL(rrg, i), 246 DATA_COL(rr, i)) != 0) { 247 ret++; 248 249 LOG_OPT(D_DEBUG, opts, 250 "\nData block [%d] different!\n", i); 251 } 252 } 253 } 254 return (ret); 255 } 256 257 static int 258 init_rand(void *data, size_t size, void *private) 259 { 260 int i; 261 int *dst = (int *)data; 262 263 for (i = 0; i < size / sizeof (int); i++) 264 dst[i] = rand_data[i]; 265 266 return (0); 267 } 268 269 static void 270 corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt) 271 { 272 for (int r = 0; r < rm->rm_nrows; r++) { 273 raidz_row_t *rr = rm->rm_row[r]; 274 for (int i = 0; i < cnt; i++) { 275 raidz_col_t *col = &rr->rr_col[tgts[i]]; 276 abd_iterate_func(col->rc_abd, 0, col->rc_size, 277 init_rand, NULL); 278 } 279 } 280 } 281 282 void 283 init_zio_abd(zio_t *zio) 284 { 285 abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL); 286 } 287 288 static void 289 fini_raidz_map(zio_t **zio, raidz_map_t **rm) 290 { 291 vdev_raidz_map_free(*rm); 292 raidz_free((*zio)->io_abd, (*zio)->io_size); 293 umem_free(*zio, sizeof (zio_t)); 294 295 *zio = NULL; 296 *rm = NULL; 297 } 298 299 static int 300 init_raidz_golden_map(raidz_test_opts_t *opts, const int parity) 301 { 302 int err = 0; 303 zio_t *zio_test; 304 raidz_map_t *rm_test; 305 const size_t total_ncols = opts->rto_dcols + parity; 306 307 if (opts->rm_golden) { 308 fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 309 } 310 311 opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 312 zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 313 314 opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset; 315 opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize; 316 317 opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize); 318 zio_test->io_abd = raidz_alloc(opts->rto_dsize); 319 320 init_zio_abd(opts->zio_golden); 321 init_zio_abd(zio_test); 322 323 VERIFY0(vdev_raidz_impl_set("original")); 324 325 if (opts->rto_expand) { 326 opts->rm_golden = 327 vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd, 328 opts->zio_golden->io_size, opts->zio_golden->io_offset, 329 opts->rto_ashift, total_ncols+1, total_ncols, 330 parity, opts->rto_expand_offset); 331 rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd, 332 zio_test->io_size, zio_test->io_offset, 333 opts->rto_ashift, total_ncols+1, total_ncols, 334 parity, opts->rto_expand_offset); 335 } else { 336 opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden, 337 opts->rto_ashift, total_ncols, parity); 338 rm_test = vdev_raidz_map_alloc(zio_test, 339 opts->rto_ashift, total_ncols, parity); 340 } 341 342 VERIFY(opts->zio_golden); 343 VERIFY(opts->rm_golden); 344 345 vdev_raidz_generate_parity(opts->rm_golden); 346 vdev_raidz_generate_parity(rm_test); 347 348 /* sanity check */ 349 err |= cmp_data(opts, rm_test); 350 err |= cmp_code(opts, rm_test, parity); 351 352 if (err) 353 ERR("initializing the golden copy ... [FAIL]!\n"); 354 355 /* tear down raidz_map of test zio */ 356 fini_raidz_map(&zio_test, &rm_test); 357 358 return (err); 359 } 360 361 /* 362 * If reflow is not in progress, reflow_offset should be UINT64_MAX. 363 * For each row, if the row is entirely before reflow_offset, it will 364 * come from the new location. Otherwise this row will come from the 365 * old location. Therefore, rows that straddle the reflow_offset will 366 * come from the old location. 367 * 368 * NOTE: Until raidz expansion is implemented this function is only 369 * needed by raidz_test.c to the multi-row raid_map_t functionality. 370 */ 371 raidz_map_t * 372 vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, 373 uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols, 374 uint64_t nparity, uint64_t reflow_offset) 375 { 376 /* The zio's size in units of the vdev's minimum sector size. */ 377 uint64_t s = size >> ashift; 378 uint64_t q, r, bc, devidx, asize = 0, tot; 379 380 /* 381 * "Quotient": The number of data sectors for this stripe on all but 382 * the "big column" child vdevs that also contain "remainder" data. 383 * AKA "full rows" 384 */ 385 q = s / (logical_cols - nparity); 386 387 /* 388 * "Remainder": The number of partial stripe data sectors in this I/O. 389 * This will add a sector to some, but not all, child vdevs. 390 */ 391 r = s - q * (logical_cols - nparity); 392 393 /* The number of "big columns" - those which contain remainder data. */ 394 bc = (r == 0 ? 0 : r + nparity); 395 396 /* 397 * The total number of data and parity sectors associated with 398 * this I/O. 399 */ 400 tot = s + nparity * (q + (r == 0 ? 0 : 1)); 401 402 /* How many rows contain data (not skip) */ 403 uint64_t rows = howmany(tot, logical_cols); 404 int cols = MIN(tot, logical_cols); 405 406 raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]), 407 KM_SLEEP); 408 rm->rm_nrows = rows; 409 410 for (uint64_t row = 0; row < rows; row++) { 411 raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t, 412 rr_col[cols]), KM_SLEEP); 413 rm->rm_row[row] = rr; 414 415 /* The starting RAIDZ (parent) vdev sector of the row. */ 416 uint64_t b = (offset >> ashift) + row * logical_cols; 417 418 /* 419 * If we are in the middle of a reflow, and any part of this 420 * row has not been copied, then use the old location of 421 * this row. 422 */ 423 int row_phys_cols = physical_cols; 424 if (b + (logical_cols - nparity) > reflow_offset >> ashift) 425 row_phys_cols--; 426 427 /* starting child of this row */ 428 uint64_t child_id = b % row_phys_cols; 429 /* The starting byte offset on each child vdev. */ 430 uint64_t child_offset = (b / row_phys_cols) << ashift; 431 432 /* 433 * We set cols to the entire width of the block, even 434 * if this row is shorter. This is needed because parity 435 * generation (for Q and R) needs to know the entire width, 436 * because it treats the short row as though it was 437 * full-width (and the "phantom" sectors were zero-filled). 438 * 439 * Another approach to this would be to set cols shorter 440 * (to just the number of columns that we might do i/o to) 441 * and have another mechanism to tell the parity generation 442 * about the "entire width". Reconstruction (at least 443 * vdev_raidz_reconstruct_general()) would also need to 444 * know about the "entire width". 445 */ 446 rr->rr_cols = cols; 447 rr->rr_bigcols = bc; 448 rr->rr_missingdata = 0; 449 rr->rr_missingparity = 0; 450 rr->rr_firstdatacol = nparity; 451 rr->rr_abd_copy = NULL; 452 rr->rr_abd_empty = NULL; 453 rr->rr_nempty = 0; 454 455 for (int c = 0; c < rr->rr_cols; c++, child_id++) { 456 if (child_id >= row_phys_cols) { 457 child_id -= row_phys_cols; 458 child_offset += 1ULL << ashift; 459 } 460 rr->rr_col[c].rc_devidx = child_id; 461 rr->rr_col[c].rc_offset = child_offset; 462 rr->rr_col[c].rc_gdata = NULL; 463 rr->rr_col[c].rc_orig_data = NULL; 464 rr->rr_col[c].rc_error = 0; 465 rr->rr_col[c].rc_tried = 0; 466 rr->rr_col[c].rc_skipped = 0; 467 rr->rr_col[c].rc_need_orig_restore = B_FALSE; 468 469 uint64_t dc = c - rr->rr_firstdatacol; 470 if (c < rr->rr_firstdatacol) { 471 rr->rr_col[c].rc_size = 1ULL << ashift; 472 rr->rr_col[c].rc_abd = 473 abd_alloc_linear(rr->rr_col[c].rc_size, 474 B_TRUE); 475 } else if (row == rows - 1 && bc != 0 && c >= bc) { 476 /* 477 * Past the end, this for parity generation. 478 */ 479 rr->rr_col[c].rc_size = 0; 480 rr->rr_col[c].rc_abd = NULL; 481 } else { 482 /* 483 * "data column" (col excluding parity) 484 * Add an ASCII art diagram here 485 */ 486 uint64_t off; 487 488 if (c < bc || r == 0) { 489 off = dc * rows + row; 490 } else { 491 off = r * rows + 492 (dc - r) * (rows - 1) + row; 493 } 494 rr->rr_col[c].rc_size = 1ULL << ashift; 495 rr->rr_col[c].rc_abd = abd_get_offset_struct( 496 &rr->rr_col[c].rc_abdstruct, 497 abd, off << ashift, 1 << ashift); 498 } 499 500 asize += rr->rr_col[c].rc_size; 501 } 502 /* 503 * If all data stored spans all columns, there's a danger that 504 * parity will always be on the same device and, since parity 505 * isn't read during normal operation, that that device's I/O 506 * bandwidth won't be used effectively. We therefore switch 507 * the parity every 1MB. 508 * 509 * ...at least that was, ostensibly, the theory. As a practical 510 * matter unless we juggle the parity between all devices 511 * evenly, we won't see any benefit. Further, occasional writes 512 * that aren't a multiple of the LCM of the number of children 513 * and the minimum stripe width are sufficient to avoid pessimal 514 * behavior. Unfortunately, this decision created an implicit 515 * on-disk format requirement that we need to support for all 516 * eternity, but only for single-parity RAID-Z. 517 * 518 * If we intend to skip a sector in the zeroth column for 519 * padding we must make sure to note this swap. We will never 520 * intend to skip the first column since at least one data and 521 * one parity column must appear in each row. 522 */ 523 if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 && 524 (offset & (1ULL << 20))) { 525 ASSERT(rr->rr_cols >= 2); 526 ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size); 527 devidx = rr->rr_col[0].rc_devidx; 528 uint64_t o = rr->rr_col[0].rc_offset; 529 rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx; 530 rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset; 531 rr->rr_col[1].rc_devidx = devidx; 532 rr->rr_col[1].rc_offset = o; 533 } 534 535 } 536 ASSERT3U(asize, ==, tot << ashift); 537 538 /* init RAIDZ parity ops */ 539 rm->rm_ops = vdev_raidz_math_get_ops(); 540 541 return (rm); 542 } 543 544 static raidz_map_t * 545 init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity) 546 { 547 raidz_map_t *rm = NULL; 548 const size_t alloc_dsize = opts->rto_dsize; 549 const size_t total_ncols = opts->rto_dcols + parity; 550 const int ccols[] = { 0, 1, 2 }; 551 552 VERIFY(zio); 553 VERIFY(parity <= 3 && parity >= 1); 554 555 *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 556 557 (*zio)->io_offset = 0; 558 (*zio)->io_size = alloc_dsize; 559 (*zio)->io_abd = raidz_alloc(alloc_dsize); 560 init_zio_abd(*zio); 561 562 if (opts->rto_expand) { 563 rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd, 564 (*zio)->io_size, (*zio)->io_offset, 565 opts->rto_ashift, total_ncols+1, total_ncols, 566 parity, opts->rto_expand_offset); 567 } else { 568 rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, 569 total_ncols, parity); 570 } 571 VERIFY(rm); 572 573 /* Make sure code columns are destroyed */ 574 corrupt_colums(rm, ccols, parity); 575 576 return (rm); 577 } 578 579 static int 580 run_gen_check(raidz_test_opts_t *opts) 581 { 582 char **impl_name; 583 int fn, err = 0; 584 zio_t *zio_test; 585 raidz_map_t *rm_test; 586 587 err = init_raidz_golden_map(opts, PARITY_PQR); 588 if (0 != err) 589 return (err); 590 591 LOG(D_INFO, DBLSEP); 592 LOG(D_INFO, "Testing parity generation...\n"); 593 594 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 595 impl_name++) { 596 597 LOG(D_INFO, SEP); 598 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 599 600 if (0 != vdev_raidz_impl_set(*impl_name)) { 601 LOG(D_INFO, "[SKIP]\n"); 602 continue; 603 } else { 604 LOG(D_INFO, "[SUPPORTED]\n"); 605 } 606 607 for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { 608 609 /* Check if should stop */ 610 if (rto_opts.rto_should_stop) 611 return (err); 612 613 /* create suitable raidz_map */ 614 rm_test = init_raidz_map(opts, &zio_test, fn+1); 615 VERIFY(rm_test); 616 617 LOG(D_INFO, "\t\tTesting method [%s] ...", 618 raidz_gen_name[fn]); 619 620 if (!opts->rto_sanity) 621 vdev_raidz_generate_parity(rm_test); 622 623 if (cmp_code(opts, rm_test, fn+1) != 0) { 624 LOG(D_INFO, "[FAIL]\n"); 625 err++; 626 } else 627 LOG(D_INFO, "[PASS]\n"); 628 629 fini_raidz_map(&zio_test, &rm_test); 630 } 631 } 632 633 fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 634 635 return (err); 636 } 637 638 static int 639 run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) 640 { 641 int x0, x1, x2; 642 int tgtidx[3]; 643 int err = 0; 644 static const int rec_tgts[7][3] = { 645 {1, 2, 3}, /* rec_p: bad QR & D[0] */ 646 {0, 2, 3}, /* rec_q: bad PR & D[0] */ 647 {0, 1, 3}, /* rec_r: bad PQ & D[0] */ 648 {2, 3, 4}, /* rec_pq: bad R & D[0][1] */ 649 {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */ 650 {0, 3, 4}, /* rec_qr: bad P & D[0][1] */ 651 {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */ 652 }; 653 654 memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx)); 655 656 if (fn < RAIDZ_REC_PQ) { 657 /* can reconstruct 1 failed data disk */ 658 for (x0 = 0; x0 < opts->rto_dcols; x0++) { 659 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 660 continue; 661 662 /* Check if should stop */ 663 if (rto_opts.rto_should_stop) 664 return (err); 665 666 LOG(D_DEBUG, "[%d] ", x0); 667 668 tgtidx[2] = x0 + raidz_parity(rm); 669 670 corrupt_colums(rm, tgtidx+2, 1); 671 672 if (!opts->rto_sanity) 673 vdev_raidz_reconstruct(rm, tgtidx, 3); 674 675 if (cmp_data(opts, rm) != 0) { 676 err++; 677 LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0); 678 } 679 } 680 681 } else if (fn < RAIDZ_REC_PQR) { 682 /* can reconstruct 2 failed data disk */ 683 for (x0 = 0; x0 < opts->rto_dcols; x0++) { 684 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 685 continue; 686 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 687 if (x1 >= rm->rm_row[0]->rr_cols - 688 raidz_parity(rm)) 689 continue; 690 691 /* Check if should stop */ 692 if (rto_opts.rto_should_stop) 693 return (err); 694 695 LOG(D_DEBUG, "[%d %d] ", x0, x1); 696 697 tgtidx[1] = x0 + raidz_parity(rm); 698 tgtidx[2] = x1 + raidz_parity(rm); 699 700 corrupt_colums(rm, tgtidx+1, 2); 701 702 if (!opts->rto_sanity) 703 vdev_raidz_reconstruct(rm, tgtidx, 3); 704 705 if (cmp_data(opts, rm) != 0) { 706 err++; 707 LOG(D_DEBUG, "\nREC D[%d %d]... " 708 "[FAIL]\n", x0, x1); 709 } 710 } 711 } 712 } else { 713 /* can reconstruct 3 failed data disk */ 714 for (x0 = 0; x0 < opts->rto_dcols; x0++) { 715 if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 716 continue; 717 for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 718 if (x1 >= rm->rm_row[0]->rr_cols - 719 raidz_parity(rm)) 720 continue; 721 for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) { 722 if (x2 >= rm->rm_row[0]->rr_cols - 723 raidz_parity(rm)) 724 continue; 725 726 /* Check if should stop */ 727 if (rto_opts.rto_should_stop) 728 return (err); 729 730 LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2); 731 732 tgtidx[0] = x0 + raidz_parity(rm); 733 tgtidx[1] = x1 + raidz_parity(rm); 734 tgtidx[2] = x2 + raidz_parity(rm); 735 736 corrupt_colums(rm, tgtidx, 3); 737 738 if (!opts->rto_sanity) 739 vdev_raidz_reconstruct(rm, 740 tgtidx, 3); 741 742 if (cmp_data(opts, rm) != 0) { 743 err++; 744 LOG(D_DEBUG, 745 "\nREC D[%d %d %d]... " 746 "[FAIL]\n", x0, x1, x2); 747 } 748 } 749 } 750 } 751 } 752 return (err); 753 } 754 755 static int 756 run_rec_check(raidz_test_opts_t *opts) 757 { 758 char **impl_name; 759 unsigned fn, err = 0; 760 zio_t *zio_test; 761 raidz_map_t *rm_test; 762 763 err = init_raidz_golden_map(opts, PARITY_PQR); 764 if (0 != err) 765 return (err); 766 767 LOG(D_INFO, DBLSEP); 768 LOG(D_INFO, "Testing data reconstruction...\n"); 769 770 for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 771 impl_name++) { 772 773 LOG(D_INFO, SEP); 774 LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 775 776 if (vdev_raidz_impl_set(*impl_name) != 0) { 777 LOG(D_INFO, "[SKIP]\n"); 778 continue; 779 } else 780 LOG(D_INFO, "[SUPPORTED]\n"); 781 782 783 /* create suitable raidz_map */ 784 rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR); 785 /* generate parity */ 786 vdev_raidz_generate_parity(rm_test); 787 788 for (fn = 0; fn < RAIDZ_REC_NUM; fn++) { 789 790 LOG(D_INFO, "\t\tTesting method [%s] ...", 791 raidz_rec_name[fn]); 792 793 if (run_rec_check_impl(opts, rm_test, fn) != 0) { 794 LOG(D_INFO, "[FAIL]\n"); 795 err++; 796 797 } else 798 LOG(D_INFO, "[PASS]\n"); 799 800 } 801 /* tear down test raidz_map */ 802 fini_raidz_map(&zio_test, &rm_test); 803 } 804 805 fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 806 807 return (err); 808 } 809 810 static int 811 run_test(raidz_test_opts_t *opts) 812 { 813 int err = 0; 814 815 if (opts == NULL) 816 opts = &rto_opts; 817 818 print_opts(opts, B_FALSE); 819 820 err |= run_gen_check(opts); 821 err |= run_rec_check(opts); 822 823 return (err); 824 } 825 826 #define SWEEP_RUNNING 0 827 #define SWEEP_FINISHED 1 828 #define SWEEP_ERROR 2 829 #define SWEEP_TIMEOUT 3 830 831 static int sweep_state = 0; 832 static raidz_test_opts_t failed_opts; 833 834 static kmutex_t sem_mtx; 835 static kcondvar_t sem_cv; 836 static int max_free_slots; 837 static int free_slots; 838 839 static void 840 sweep_thread(void *arg) 841 { 842 int err = 0; 843 raidz_test_opts_t *opts = (raidz_test_opts_t *)arg; 844 VERIFY(opts != NULL); 845 846 err = run_test(opts); 847 848 if (rto_opts.rto_sanity) { 849 /* 25% chance that a sweep test fails */ 850 if (rand() < (RAND_MAX/4)) 851 err = 1; 852 } 853 854 if (0 != err) { 855 mutex_enter(&sem_mtx); 856 memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t)); 857 sweep_state = SWEEP_ERROR; 858 mutex_exit(&sem_mtx); 859 } 860 861 umem_free(opts, sizeof (raidz_test_opts_t)); 862 863 /* signal the next thread */ 864 mutex_enter(&sem_mtx); 865 free_slots++; 866 cv_signal(&sem_cv); 867 mutex_exit(&sem_mtx); 868 869 thread_exit(); 870 } 871 872 static int 873 run_sweep(void) 874 { 875 static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 }; 876 static const size_t ashift_v[] = { 9, 12, 14 }; 877 static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12), 878 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE }; 879 880 (void) setvbuf(stdout, NULL, _IONBF, 0); 881 882 ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) * 883 ARRAY_SIZE(dcols_v); 884 ulong_t tried_comb = 0; 885 hrtime_t time_diff, start_time = gethrtime(); 886 raidz_test_opts_t *opts; 887 int a, d, s; 888 889 max_free_slots = free_slots = MAX(2, boot_ncpus); 890 891 mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL); 892 cv_init(&sem_cv, NULL, CV_DEFAULT, NULL); 893 894 for (s = 0; s < ARRAY_SIZE(size_v); s++) 895 for (a = 0; a < ARRAY_SIZE(ashift_v); a++) 896 for (d = 0; d < ARRAY_SIZE(dcols_v); d++) { 897 898 if (size_v[s] < (1 << ashift_v[a])) { 899 total_comb--; 900 continue; 901 } 902 903 if (++tried_comb % 20 == 0) 904 LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb); 905 906 /* wait for signal to start new thread */ 907 mutex_enter(&sem_mtx); 908 while (cv_timedwait_sig(&sem_cv, &sem_mtx, 909 ddi_get_lbolt() + hz)) { 910 911 /* check if should stop the test (timeout) */ 912 time_diff = (gethrtime() - start_time) / NANOSEC; 913 if (rto_opts.rto_sweep_timeout > 0 && 914 time_diff >= rto_opts.rto_sweep_timeout) { 915 sweep_state = SWEEP_TIMEOUT; 916 rto_opts.rto_should_stop = B_TRUE; 917 mutex_exit(&sem_mtx); 918 goto exit; 919 } 920 921 /* check if should stop the test (error) */ 922 if (sweep_state != SWEEP_RUNNING) { 923 mutex_exit(&sem_mtx); 924 goto exit; 925 } 926 927 /* exit loop if a slot is available */ 928 if (free_slots > 0) { 929 break; 930 } 931 } 932 933 free_slots--; 934 mutex_exit(&sem_mtx); 935 936 opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL); 937 opts->rto_ashift = ashift_v[a]; 938 opts->rto_dcols = dcols_v[d]; 939 opts->rto_offset = (1 << ashift_v[a]) * rand(); 940 opts->rto_dsize = size_v[s]; 941 opts->rto_expand = rto_opts.rto_expand; 942 opts->rto_expand_offset = rto_opts.rto_expand_offset; 943 opts->rto_v = 0; /* be quiet */ 944 945 VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts, 946 0, NULL, TS_RUN, defclsyspri), !=, NULL); 947 } 948 949 exit: 950 LOG(D_ALL, "\nWaiting for test threads to finish...\n"); 951 mutex_enter(&sem_mtx); 952 VERIFY(free_slots <= max_free_slots); 953 while (free_slots < max_free_slots) { 954 (void) cv_wait(&sem_cv, &sem_mtx); 955 } 956 mutex_exit(&sem_mtx); 957 958 if (sweep_state == SWEEP_ERROR) { 959 ERR("Sweep test failed! Failed option: \n"); 960 print_opts(&failed_opts, B_TRUE); 961 } else { 962 if (sweep_state == SWEEP_TIMEOUT) 963 LOG(D_ALL, "Test timeout (%lus). Stopping...\n", 964 (ulong_t)rto_opts.rto_sweep_timeout); 965 966 LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n", 967 (ulong_t)tried_comb); 968 } 969 970 mutex_destroy(&sem_mtx); 971 972 return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0); 973 } 974 975 976 int 977 main(int argc, char **argv) 978 { 979 size_t i; 980 struct sigaction action; 981 int err = 0; 982 983 /* init gdb string early */ 984 (void) sprintf(gdb, gdb_tmpl, getpid()); 985 986 action.sa_handler = sig_handler; 987 sigemptyset(&action.sa_mask); 988 action.sa_flags = 0; 989 990 if (sigaction(SIGSEGV, &action, NULL) < 0) { 991 ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno)); 992 exit(EXIT_FAILURE); 993 } 994 995 (void) setvbuf(stdout, NULL, _IOLBF, 0); 996 997 dprintf_setup(&argc, argv); 998 999 process_options(argc, argv); 1000 1001 kernel_init(SPA_MODE_READ); 1002 1003 /* setup random data because rand() is not reentrant */ 1004 rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 1005 srand((unsigned)time(NULL) * getpid()); 1006 for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++) 1007 rand_data[i] = rand(); 1008 1009 mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ); 1010 1011 if (rto_opts.rto_benchmark) { 1012 run_raidz_benchmark(); 1013 } else if (rto_opts.rto_sweep) { 1014 err = run_sweep(); 1015 } else { 1016 err = run_test(NULL); 1017 } 1018 1019 umem_free(rand_data, SPA_MAXBLOCKSIZE); 1020 kernel_fini(); 1021 1022 return (err); 1023 } 1024