1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 /* 28 * Just in case we're not in a build environment, make sure that 29 * TEXT_DOMAIN gets set to something. 30 */ 31 #if !defined(TEXT_DOMAIN) 32 #define TEXT_DOMAIN "SYS_TEST" 33 #endif 34 35 /* 36 * RAID operations 37 */ 38 39 #include <stdlib.h> 40 #include <meta.h> 41 #include <sys/lvm/md_raid.h> 42 #include <sys/lvm/mdvar.h> 43 #include <sys/lvm/md_convert.h> 44 #include <stddef.h> 45 46 /* 47 * FUNCTION: meta_get_raid_names() 48 * INPUT: sp - the set name to get raid from 49 * options - options from the command line 50 * OUTPUT: nlpp - list of all raid names 51 * ep - return error pointer 52 * RETURNS: int - -1 if error, 0 success 53 * PURPOSE: returns a list of all raid in the metadb 54 * for all devices in the specified set 55 */ 56 int 57 meta_get_raid_names( 58 mdsetname_t *sp, 59 mdnamelist_t **nlpp, 60 int options, 61 md_error_t *ep 62 ) 63 { 64 return (meta_get_names(MD_RAID, sp, nlpp, options, ep)); 65 } 66 67 /* 68 * free raid unit 69 */ 70 void 71 meta_free_raid( 72 md_raid_t *raidp 73 ) 74 { 75 if (raidp->cols.cols_val != NULL) { 76 assert(raidp->cols.cols_len > 0); 77 Free(raidp->cols.cols_val); 78 } 79 Free(raidp); 80 } 81 82 /* 83 * get raid (common) 84 */ 85 md_raid_t * 86 meta_get_raid_common( 87 mdsetname_t *sp, 88 mdname_t *raidnp, 89 int fast, 90 md_error_t *ep 91 ) 92 { 93 mddrivename_t *dnp = raidnp->drivenamep; 94 char *miscname; 95 mr_unit_t *mr; 96 md_raid_t *raidp; 97 uint_t ncol; 98 uint_t col; 99 md_resync_ioctl_t ri; 100 101 /* must have set */ 102 assert(sp != NULL); 103 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 104 105 /* short circuit */ 106 if (dnp->unitp != NULL) { 107 assert(dnp->unitp->type == MD_METARAID); 108 return ((md_raid_t *)dnp->unitp); 109 } 110 111 /* get miscname and unit */ 112 if ((miscname = metagetmiscname(raidnp, ep)) == NULL) 113 return (NULL); 114 if (strcmp(miscname, MD_RAID) != 0) { 115 (void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev), 116 raidnp->cname); 117 return (NULL); 118 } 119 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 120 return (NULL); 121 assert(mr->c.un_type == MD_METARAID); 122 123 /* allocate raid */ 124 raidp = Zalloc(sizeof (*raidp)); 125 126 /* allocate columns */ 127 ncol = mr->un_totalcolumncnt; 128 assert(ncol >= MD_RAID_MIN); 129 raidp->cols.cols_len = ncol; 130 raidp->cols.cols_val = Zalloc(raidp->cols.cols_len * 131 sizeof (*raidp->cols.cols_val)); 132 133 /* get common info */ 134 raidp->common.namep = raidnp; 135 raidp->common.type = mr->c.un_type; 136 raidp->common.state = mr->c.un_status; 137 raidp->common.capabilities = mr->c.un_capabilities; 138 raidp->common.parent = mr->c.un_parent; 139 raidp->common.size = mr->c.un_total_blocks; 140 raidp->common.user_flags = mr->c.un_user_flags; 141 raidp->common.revision = mr->c.un_revision; 142 143 /* get options */ 144 raidp->state = mr->un_state; 145 raidp->timestamp = mr->un_timestamp; 146 raidp->interlace = mr->un_segsize; 147 raidp->orig_ncol = mr->un_origcolumncnt; 148 raidp->column_size = mr->un_segsize * mr->un_segsincolumn; 149 raidp->pw_count = mr->un_pwcnt; 150 assert(raidp->orig_ncol <= ncol); 151 if ((mr->un_hsp_id != MD_HSP_NONE) && 152 ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id, 153 ep)) == NULL)) { 154 goto out; 155 } 156 157 /* get columns, update unit state */ 158 for (col = 0; (col < ncol); ++col) { 159 mr_column_t *rcp = &mr->un_column[col]; 160 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 161 162 /* get column name */ 163 mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep); 164 if (mdrcp->colnamep == NULL) 165 goto out; 166 167 /* override any start_blk */ 168 #ifdef DEBUG 169 if (metagetstart(sp, mdrcp->colnamep, ep) != 170 MD_DISKADDR_ERROR) { 171 assert(mdrcp->colnamep->start_blk <= 172 rcp->un_orig_devstart); 173 } else { 174 mdclrerror(ep); 175 } 176 #endif /* DEBUG */ 177 mdrcp->colnamep->start_blk = rcp->un_orig_devstart; 178 179 /* if hotspared */ 180 if (HOTSPARED(mr, col)) { 181 /* get hotspare name */ 182 mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key, 183 fast, ep); 184 if (mdrcp->hsnamep == NULL) 185 goto out; 186 187 if (getenv("META_DEBUG_START_BLK") != NULL) { 188 if (metagetstart(sp, mdrcp->hsnamep, ep) == 189 MD_DISKADDR_ERROR) 190 mdclrerror(ep); 191 192 if ((mdrcp->hsnamep->start_blk == 0) && 193 (rcp->un_hs_pwstart != 0)) 194 md_eprintf(dgettext(TEXT_DOMAIN, 195 "%s: suspected bad start block," 196 " seems labelled [raid]\n"), 197 mdrcp->hsnamep->cname); 198 199 if ((mdrcp->hsnamep->start_blk > 0) && 200 (rcp->un_hs_pwstart == 0)) 201 md_eprintf(dgettext(TEXT_DOMAIN, 202 "%s: suspected bad start block, " 203 " seems unlabelled [raid]\n"), 204 mdrcp->hsnamep->cname); 205 } 206 207 /* override any start_blk */ 208 mdrcp->hsnamep->start_blk = rcp->un_hs_devstart; 209 } 210 211 /* get state, flags, and timestamp */ 212 mdrcp->state = rcp->un_devstate; 213 mdrcp->flags = rcp->un_devflags; 214 mdrcp->timestamp = rcp->un_devtimestamp; 215 } 216 217 /* get resync info */ 218 (void) memset(&ri, 0, sizeof (ri)); 219 ri.ri_mnum = meta_getminor(raidnp->dev); 220 MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno); 221 if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) { 222 (void) mdstealerror(ep, &ri.mde); 223 goto out; 224 } 225 raidp->resync_flags = ri.ri_flags; 226 raidp->percent_dirty = ri.ri_percent_dirty; 227 raidp->percent_done = ri.ri_percent_done; 228 229 /* cleanup, return success */ 230 Free(mr); 231 dnp->unitp = (md_common_t *)raidp; 232 return (raidp); 233 234 /* cleanup, return error */ 235 out: 236 Free(mr); 237 meta_free_raid(raidp); 238 return (NULL); 239 } 240 241 /* 242 * get raid 243 */ 244 md_raid_t * 245 meta_get_raid( 246 mdsetname_t *sp, 247 mdname_t *raidnp, 248 md_error_t *ep 249 ) 250 { 251 return (meta_get_raid_common(sp, raidnp, 0, ep)); 252 } 253 254 /* 255 * check raid for dev 256 */ 257 static int 258 in_raid( 259 mdsetname_t *sp, 260 mdname_t *raidnp, 261 mdname_t *np, 262 diskaddr_t slblk, 263 diskaddr_t nblks, 264 md_error_t *ep 265 ) 266 { 267 md_raid_t *raidp; 268 uint_t col; 269 270 /* should be in the same set */ 271 assert(sp != NULL); 272 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 273 274 /* get unit */ 275 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 276 return (-1); 277 278 /* look in columns */ 279 for (col = 0; (col < raidp->cols.cols_len); ++col) { 280 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 281 mdname_t *colnp = cp->colnamep; 282 diskaddr_t col_sblk; 283 int err; 284 285 /* check same drive since metagetstart() can fail */ 286 if ((err = meta_check_samedrive(np, colnp, ep)) < 0) 287 return (-1); 288 else if (err == 0) 289 continue; 290 291 /* check overlap */ 292 if ((col_sblk = metagetstart(sp, colnp, ep)) == 293 MD_DISKADDR_ERROR) 294 return (-1); 295 if (meta_check_overlap(raidnp->cname, np, slblk, nblks, 296 colnp, col_sblk, -1, ep) != 0) { 297 return (-1); 298 } 299 } 300 301 /* return success */ 302 return (0); 303 } 304 305 /* 306 * check to see if we're in a raid 307 */ 308 int 309 meta_check_inraid( 310 mdsetname_t *sp, 311 mdname_t *np, 312 diskaddr_t slblk, 313 diskaddr_t nblks, 314 md_error_t *ep 315 ) 316 { 317 mdnamelist_t *raidnlp = NULL; 318 mdnamelist_t *p; 319 int rval = 0; 320 321 /* should have a set */ 322 assert(sp != NULL); 323 324 /* for each raid */ 325 if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0) 326 return (-1); 327 for (p = raidnlp; (p != NULL); p = p->next) { 328 mdname_t *raidnp = p->namep; 329 330 /* check raid */ 331 if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) { 332 rval = -1; 333 break; 334 } 335 } 336 337 /* cleanup, return success */ 338 metafreenamelist(raidnlp); 339 return (rval); 340 } 341 342 /* 343 * check column 344 */ 345 int 346 meta_check_column( 347 mdsetname_t *sp, 348 mdname_t *np, 349 md_error_t *ep 350 ) 351 { 352 mdchkopts_t options = (MDCHK_ALLOW_MDDB); 353 354 /* check for soft partitions */ 355 if (meta_sp_issp(sp, np, ep) != 0) { 356 /* make sure we have a disk */ 357 if (metachkcomp(np, ep) != 0) 358 return (-1); 359 } 360 361 /* check to ensure that it is not already in use */ 362 if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 363 return (-1); 364 } 365 366 /* make sure it is in the set */ 367 if (meta_check_inset(sp, np, ep) != 0) 368 return (-1); 369 370 /* make sure its not in a metadevice */ 371 if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0) 372 return (-1); 373 374 /* return success */ 375 return (0); 376 } 377 378 /* 379 * print raid 380 */ 381 static int 382 raid_print( 383 md_raid_t *raidp, 384 char *fname, 385 FILE *fp, 386 mdprtopts_t options, 387 md_error_t *ep 388 ) 389 { 390 uint_t col; 391 int rval = -1; 392 393 394 if (options & PRINT_LARGEDEVICES) { 395 if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) { 396 rval = 0; 397 goto out; 398 } 399 } 400 401 if (options & PRINT_FN) { 402 if ((raidp->common.revision & MD_FN_META_DEV) == 0) { 403 rval = 0; 404 goto out; 405 } 406 } 407 408 /* print name and -r */ 409 if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF) 410 goto out; 411 412 /* 413 * Print columns. Always print the full path. 414 */ 415 for (col = 0; (col < raidp->cols.cols_len); ++col) { 416 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 417 418 if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF) 419 goto out; 420 } 421 422 if (fprintf(fp, " -k") == EOF) 423 goto out; 424 425 /* print options */ 426 if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF) 427 goto out; 428 429 if (raidp->pw_count != PWCNT_MIN) 430 if (fprintf(fp, " -w %d", raidp->pw_count) == EOF) 431 goto out; 432 433 if (raidp->hspnamep != NULL) { 434 if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF) 435 goto out; 436 } 437 if (raidp->orig_ncol != raidp->cols.cols_len) { 438 assert(raidp->orig_ncol < raidp->cols.cols_len); 439 if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF) 440 goto out; 441 } 442 443 /* terminate last line */ 444 if (fprintf(fp, "\n") == EOF) 445 goto out; 446 447 /* success */ 448 rval = 0; 449 450 /* cleanup, return error */ 451 out: 452 if (rval != 0) 453 (void) mdsyserror(ep, errno, fname); 454 return (rval); 455 } 456 457 static int 458 find_resyncing_column( 459 md_raid_t *raidp 460 ) 461 { 462 int col; 463 464 for (col = 0; (col < raidp->cols.cols_len); ++col) { 465 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 466 if (cp->state & RCS_RESYNC) 467 return (col); 468 } 469 470 /* No resyncing columns */ 471 return (-1); 472 } 473 474 /* 475 * convert raid state to name 476 */ 477 char * 478 raid_state_to_name( 479 md_raid_t *raidp, 480 md_timeval32_t *tvp, 481 uint_t tstate /* Errored tstate flags */ 482 ) 483 { 484 485 /* grab time */ 486 if (tvp != NULL) 487 *tvp = raidp->timestamp; 488 489 /* 490 * If the device has a transient error state (due to it being DR'ed or 491 * failed) and there has been no I/O to it (the actual device is still 492 * marked as 'Okay') then we cannot know what the state is or what 493 * action to take on it. Therefore report the device as 'Unavailable'. 494 * A subsequent I/O to the device will cause the 'Okay' status to 495 * disappear if the device is actually gone and then we will print out 496 * the appropriate status. The MD_INACCESSIBLE state is only set 497 * on the raid when we open it or probe it. One the raid is open 498 * then we will just have regular error status on the device. 499 */ 500 if (tstate & MD_INACCESSIBLE) { 501 return (dgettext(TEXT_DOMAIN, "Unavailable")); 502 } 503 504 /* resyncing */ 505 if (find_resyncing_column(raidp) >= 0) 506 return (dgettext(TEXT_DOMAIN, "Resyncing")); 507 508 /* everything else */ 509 switch (raidp->state) { 510 case RUS_INIT : 511 return (dgettext(TEXT_DOMAIN, "Initializing")); 512 case RUS_OKAY : 513 return (dgettext(TEXT_DOMAIN, "Okay")); 514 case RUS_ERRED : 515 /*FALLTHROUGH*/ 516 case RUS_LAST_ERRED : 517 return (dgettext(TEXT_DOMAIN, "Needs Maintenance")); 518 case RUS_DOI : 519 return (dgettext(TEXT_DOMAIN, "Initialization Failed")); 520 case RUS_REGEN : 521 return (dgettext(TEXT_DOMAIN, "Regen")); 522 default : 523 return (dgettext(TEXT_DOMAIN, "invalid")); 524 } /* switch */ 525 } 526 527 static int 528 find_erred_column(md_raid_t *raidp, rcs_state_t state) 529 { 530 int col; 531 532 for (col = 0; (col < raidp->cols.cols_len); ++col) { 533 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 534 if (cp->state & state) 535 return (col); 536 } 537 538 /* No erred columns */ 539 return (-1); 540 } 541 542 /* 543 * convert raid state to repair action 544 */ 545 char * 546 raid_state_to_action(md_raid_t *raidp) 547 { 548 static char emsg[1024]; 549 mdname_t *raidnp = raidp->common.namep; 550 int err_col; 551 552 /* first check for full init failure */ 553 if (raidp->state & RUS_DOI) { 554 (void) snprintf(emsg, sizeof (emsg), 555 "metaclear -f %s", raidnp->cname); 556 return (emsg); 557 } 558 559 /* replace errored or init errored raid column */ 560 if ((err_col = find_erred_column(raidp, 561 (RCS_ERRED | RCS_INIT_ERRED))) >= 0) { 562 mdname_t *colnp; 563 564 /* get column with error */ 565 assert(err_col < raidp->cols.cols_len); 566 colnp = raidp->cols.cols_val[err_col].colnamep; 567 (void) snprintf(emsg, sizeof (emsg), 568 "metareplace %s%s %s <%s>", 569 ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""), 570 raidnp->cname, colnp->cname, 571 dgettext(TEXT_DOMAIN, "new device")); 572 return (emsg); 573 } 574 575 576 /* replace last errored raid column */ 577 if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) { 578 mdname_t *colnp; 579 580 assert(err_col < raidp->cols.cols_len); 581 colnp = raidp->cols.cols_val[err_col].colnamep; 582 (void) snprintf(emsg, sizeof (emsg), 583 "metareplace %s %s %s <%s>", 584 ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""), 585 raidnp->cname, colnp->cname, 586 dgettext(TEXT_DOMAIN, "new device")); 587 return (emsg); 588 } 589 590 /* OK */ 591 return (NULL); 592 } 593 594 /* 595 * get printable raid column state 596 */ 597 char * 598 raid_col_state_to_name( 599 md_raidcol_t *colp, 600 md_timeval32_t *tvp, 601 uint_t tstate 602 ) 603 { 604 /* grab time */ 605 if (tvp != NULL) 606 *tvp = colp->timestamp; 607 608 if (tstate != 0) { 609 return (dgettext(TEXT_DOMAIN, "Unavailable")); 610 } 611 612 /* everything else */ 613 switch (colp->state) { 614 case RCS_INIT: 615 return (dgettext(TEXT_DOMAIN, "Initializing")); 616 617 case RCS_OKAY: 618 return (dgettext(TEXT_DOMAIN, "Okay")); 619 620 case RCS_INIT_ERRED: 621 /*FALLTHROUGH*/ 622 case RCS_ERRED: 623 return (dgettext(TEXT_DOMAIN, "Maintenance")); 624 625 case RCS_LAST_ERRED: 626 return (dgettext(TEXT_DOMAIN, "Last Erred")); 627 628 case RCS_RESYNC: 629 return (dgettext(TEXT_DOMAIN, "Resyncing")); 630 631 default: 632 return (dgettext(TEXT_DOMAIN, "Unknown")); 633 } 634 } 635 636 /* 637 * print raid column 638 */ 639 static int 640 display_raid_device_info( 641 mdsetname_t *sp, 642 md_raidcol_t *colp, 643 char *fname, 644 FILE *fp, 645 mdprtopts_t options, 646 int print_len, 647 uint_t top_tstate, /* Errored tstate flags */ 648 md_error_t *ep 649 ) 650 { 651 mdname_t *namep = ((colp->hsnamep != NULL) ? 652 colp->hsnamep : colp->colnamep); 653 char *devid = ""; 654 char *cname = colp->colnamep->cname; 655 diskaddr_t start_blk; 656 int has_mddb; 657 char *has_mddb_str; 658 char *col_state; 659 md_timeval32_t tv; 660 char *hsname = ((colp->hsnamep != NULL) ? 661 colp->hsnamep->cname : ""); 662 int rval = -1; 663 mdname_t *didnp = NULL; 664 ddi_devid_t dtp; 665 uint_t tstate = 0; 666 667 /* get info */ 668 if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR) 669 return (-1); 670 if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) 671 return (-1); 672 if (has_mddb) 673 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 674 else 675 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 676 677 if (metaismeta(namep)) { 678 if (meta_get_tstate(namep->dev, &tstate, ep) != 0) 679 return (-1); 680 col_state = raid_col_state_to_name(colp, &tv, 681 tstate & MD_DEV_ERRORED); 682 } else { 683 /* 684 * if top_tstate is set, that implies that you have 685 * a ctd type device with an unavailable metadevice 686 * on top of it. If so, print a - for it's state 687 */ 688 if (top_tstate != 0) 689 col_state = "-"; 690 else 691 col_state = raid_col_state_to_name(colp, &tv, tstate); 692 } 693 694 /* populate the key in the name_p structure */ 695 if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL) 696 return (-1); 697 698 /* determine if devid does NOT exist */ 699 if (options & PRINT_DEVID) { 700 if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep), 701 didnp->key, ep)) == NULL) 702 devid = dgettext(TEXT_DOMAIN, "No "); 703 else { 704 devid = dgettext(TEXT_DOMAIN, "Yes"); 705 free(dtp); 706 } 707 } 708 /* print column */ 709 /* 710 * Building a format string on the fly that will 711 * be used in (f)printf. This allows the length 712 * of the ctd to vary from small to large without 713 * looking horrible. 714 */ 715 if (! (options & PRINT_TIMES)) { 716 if (fprintf(fp, 717 "\t%-*.*s %8lld %5.5s %12.12s %5.5s %s\n", 718 print_len, print_len, cname, start_blk, has_mddb_str, 719 col_state, devid, hsname) == EOF) { 720 goto out; 721 } 722 } else { 723 char *timep = meta_print_time(&tv); 724 725 if (fprintf(fp, 726 "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n", 727 print_len, cname, start_blk, has_mddb_str, 728 col_state, devid, hsname, timep) == EOF) { 729 goto out; 730 } 731 } 732 733 /* success */ 734 rval = 0; 735 736 /* cleanup, return error */ 737 out: 738 if (rval != 0) 739 (void) mdsyserror(ep, errno, fname); 740 741 return (rval); 742 } 743 744 /* 745 * print raid options 746 */ 747 int 748 meta_print_raid_options( 749 mdhspname_t *hspnamep, 750 char *fname, 751 FILE *fp, 752 md_error_t *ep 753 ) 754 { 755 char *hspname = ((hspnamep != NULL) ? hspnamep->hspname : 756 dgettext(TEXT_DOMAIN, "none")); 757 int rval = -1; 758 759 /* print options */ 760 if (fprintf(fp, dgettext(TEXT_DOMAIN, 761 " Hot spare pool: %s\n"), hspname) == EOF) { 762 goto out; 763 } 764 765 /* success */ 766 rval = 0; 767 768 /* cleanup, return error */ 769 out: 770 if (rval != 0) 771 (void) mdsyserror(ep, errno, fname); 772 return (rval); 773 } 774 775 /* 776 * report raid 777 */ 778 static int 779 raid_report( 780 mdsetname_t *sp, 781 md_raid_t *raidp, 782 char *fname, 783 FILE *fp, 784 mdprtopts_t options, 785 md_error_t *ep 786 ) 787 { 788 char *p; 789 uint_t ncol = raidp->cols.cols_len; 790 uint_t orig_ncol = raidp->orig_ncol; 791 diskaddr_t column_size = raidp->column_size; 792 char *raid_state; 793 md_timeval32_t tv; 794 char *timep; 795 uint_t col; 796 int rval = -1; 797 int len = 0; 798 uint_t tstate = 0; 799 800 if (options & PRINT_LARGEDEVICES) { 801 if ((raidp->common.revision & MD_64BIT_META_DEV) == 0) { 802 rval = 0; 803 goto out; 804 } 805 } 806 807 if (options & PRINT_FN) { 808 if ((raidp->common.revision & MD_FN_META_DEV) == 0) { 809 rval = 0; 810 goto out; 811 } 812 } 813 814 /* print header */ 815 if (options & PRINT_HEADER) { 816 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"), 817 raidp->common.namep->cname) == EOF) { 818 goto out; 819 } 820 821 } 822 823 /* print state */ 824 if (metaismeta(raidp->common.namep)) { 825 if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0) 826 return (-1); 827 } 828 tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */ 829 raid_state = raid_state_to_name(raidp, &tv, tstate); 830 if (options & PRINT_TIMES) { 831 timep = meta_print_time(&tv); 832 } else { 833 timep = ""; 834 } 835 836 if (fprintf(fp, dgettext(TEXT_DOMAIN, " State: %-12s %s\n"), 837 raid_state, timep) == EOF) { 838 goto out; 839 } 840 841 /* 842 * Display recovery action if we're marked in the Unavailable state. 843 */ 844 if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) { 845 /* print what to do */ 846 if (tstate & MD_INACCESSIBLE) { 847 char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */ 848 849 if (metaislocalset(sp)) { 850 sname[0] = '\0'; 851 } else { 852 (void) snprintf(sname, MD_MAX_SETNAME + 3, 853 "-s %s", sp->setname); 854 } 855 if (fprintf(fp, dgettext(TEXT_DOMAIN, 856 " Invoke: metastat -i %s\n"), sname) == EOF) { 857 goto out; 858 } 859 } else if ((p = raid_state_to_action(raidp)) != NULL) { 860 if (fprintf(fp, dgettext(TEXT_DOMAIN, 861 " Invoke: %s\n"), p) == EOF) { 862 goto out; 863 } 864 } 865 866 /* resync status */ 867 if (raidp->resync_flags & MD_RI_INPROGRESS) { 868 if (fprintf(fp, dgettext(TEXT_DOMAIN, 869 " Resync in progress: %2d.%1d%% done\n"), 870 raidp->percent_done/10, 871 raidp->percent_done % 10) == EOF) { 872 goto out; 873 } 874 } else if (raidp->resync_flags & MD_GROW_INPROGRESS) { 875 if (fprintf(fp, dgettext(TEXT_DOMAIN, 876 " Initialization in progress: %2d.%1d%% " 877 "done\n"), 878 raidp->percent_done/10, 879 raidp->percent_done % 10) == EOF) { 880 goto out; 881 } 882 } else if (raidp->state & RUS_REGEN) { 883 if (fprintf(fp, dgettext(TEXT_DOMAIN, 884 " Parity regeneration in progress: %2d.%1d%% " 885 "done\n"), 886 raidp->percent_done/10, 887 raidp->percent_done % 10) == EOF) { 888 goto out; 889 } 890 } 891 } 892 893 /* print hotspare pool */ 894 if (raidp->hspnamep != NULL) { 895 if (meta_print_raid_options(raidp->hspnamep, 896 fname, fp, ep) != 0) { 897 return (-1); 898 } 899 } 900 901 /* print interlace */ 902 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Interlace: %lld blocks\n"), 903 raidp->interlace) == EOF) { 904 goto out; 905 } 906 907 /* print size */ 908 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"), 909 raidp->common.size, 910 meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) { 911 goto out; 912 } 913 914 /* MD_DEBUG stuff */ 915 if (options & PRINT_DEBUG) { 916 mdname_t *raidnp = raidp->common.namep; 917 mr_unit_t *mr; 918 919 /* get additional info */ 920 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 921 return (-1); 922 assert(mr->c.un_type == MD_METARAID); 923 924 /* print prewrite count and size */ 925 if (fprintf(fp, dgettext(TEXT_DOMAIN, 926 " Prewrite Count: %u slots\n"), 927 mr->un_pwcnt) == EOF) { 928 Free(mr); 929 goto out; 930 } 931 if (fprintf(fp, dgettext(TEXT_DOMAIN, 932 " Prewrite Slot Size: %u blocks\n"), 933 (mr->un_pwsize / mr->un_pwcnt)) == EOF) { 934 Free(mr); 935 goto out; 936 } 937 if (fprintf(fp, dgettext(TEXT_DOMAIN, 938 " Prewrite Total Size: %u blocks\n"), 939 mr->un_pwsize) == EOF) { 940 Free(mr); 941 goto out; 942 } 943 Free(mr); 944 } 945 946 /* print original devices */ 947 if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF) 948 goto out; 949 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"), 950 column_size * (orig_ncol - 1), 951 meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE)) 952 == EOF) { 953 goto out; 954 } 955 /* 956 * Building a format string on the fly that will 957 * be used in (f)printf. This allows the length 958 * of the ctd to vary from small to large without 959 * looking horrible. 960 */ 961 for (col = 0; (col < orig_ncol); ++col) { 962 len = max(len, 963 strlen(raidp->cols.cols_val[col].colnamep->cname)); 964 } 965 966 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 967 len += 2; 968 969 if (! (options & PRINT_TIMES)) { 970 if (fprintf(fp, 971 "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s %s\n", 972 len, len, 973 dgettext(TEXT_DOMAIN, "Device"), 974 dgettext(TEXT_DOMAIN, "Start Block"), 975 dgettext(TEXT_DOMAIN, "Dbase"), 976 dgettext(TEXT_DOMAIN, "State"), 977 dgettext(TEXT_DOMAIN, "Reloc"), 978 dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) { 979 goto out; 980 } 981 } else { 982 if (fprintf(fp, 983 "\t%-*s %5s %-5s %-11s %-5s %-9s %s\n", 984 len, 985 dgettext(TEXT_DOMAIN, "Device"), 986 dgettext(TEXT_DOMAIN, "Start"), 987 dgettext(TEXT_DOMAIN, "Dbase"), 988 dgettext(TEXT_DOMAIN, "State"), 989 dgettext(TEXT_DOMAIN, "Reloc"), 990 dgettext(TEXT_DOMAIN, "Hot Spare"), 991 dgettext(TEXT_DOMAIN, "Time")) == EOF) { 992 goto out; 993 } 994 } 995 for (col = 0; (col < orig_ncol); ++col) { 996 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 997 998 if (display_raid_device_info(sp, mdrcp, fname, fp, options, 999 len, tstate, ep) != 0) { 1000 return (-1); 1001 } 1002 } 1003 1004 /* print concatenated devices */ 1005 if (col < ncol) { 1006 if (fprintf(fp, dgettext(TEXT_DOMAIN, 1007 "Concatenated Devices:\n")) == EOF) { 1008 goto out; 1009 } 1010 if (fprintf(fp, dgettext(TEXT_DOMAIN, 1011 " Size: %lld blocks (%s)\n"), 1012 column_size * (ncol - orig_ncol), 1013 meta_number_to_string(column_size * (ncol - orig_ncol), 1014 DEV_BSIZE)) 1015 == EOF) { 1016 goto out; 1017 } 1018 /* 1019 * This allows the length 1020 * of the ctd to vary from small to large without 1021 * looking horrible. 1022 */ 1023 if (! (options & PRINT_TIMES)) { 1024 if (fprintf(fp, 1025 "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n", 1026 len, len, 1027 dgettext(TEXT_DOMAIN, "Device"), 1028 dgettext(TEXT_DOMAIN, "Start Block"), 1029 dgettext(TEXT_DOMAIN, "Dbase"), 1030 dgettext(TEXT_DOMAIN, "State"), 1031 dgettext(TEXT_DOMAIN, "Reloc"), 1032 dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) { 1033 goto out; 1034 } 1035 } else { 1036 if (fprintf(fp, 1037 "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n", 1038 len, 1039 dgettext(TEXT_DOMAIN, "Device"), 1040 dgettext(TEXT_DOMAIN, "Start"), 1041 dgettext(TEXT_DOMAIN, "Dbase"), 1042 dgettext(TEXT_DOMAIN, "State"), 1043 dgettext(TEXT_DOMAIN, "Reloc"), 1044 dgettext(TEXT_DOMAIN, "Hot Spare"), 1045 dgettext(TEXT_DOMAIN, "Time")) == EOF) { 1046 goto out; 1047 } 1048 } 1049 assert(col == orig_ncol); 1050 for (/* void */; (col < ncol); col++) { 1051 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 1052 1053 if (display_raid_device_info(sp, mdrcp, fname, fp, 1054 options, len, tstate, ep) != 0) { 1055 return (-1); 1056 } 1057 } 1058 } 1059 1060 /* add extra line */ 1061 if (fprintf(fp, "\n") == EOF) 1062 goto out; 1063 1064 /* success */ 1065 rval = 0; 1066 1067 /* cleanup, return error */ 1068 out: 1069 if (rval != 0) 1070 (void) mdsyserror(ep, errno, fname); 1071 return (rval); 1072 } 1073 1074 /* 1075 * print/report raid 1076 */ 1077 int 1078 meta_raid_print( 1079 mdsetname_t *sp, 1080 mdname_t *raidnp, 1081 mdnamelist_t **nlpp, 1082 char *fname, 1083 FILE *fp, 1084 mdprtopts_t options, 1085 md_error_t *ep 1086 ) 1087 { 1088 md_raid_t *raidp; 1089 int col; 1090 1091 /* should have same set */ 1092 assert(sp != NULL); 1093 assert((raidnp == NULL) || 1094 (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)))); 1095 1096 /* print all raids */ 1097 if (raidnp == NULL) { 1098 mdnamelist_t *nlp = NULL; 1099 mdnamelist_t *p; 1100 int cnt; 1101 int rval = 0; 1102 1103 /* get list */ 1104 if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0) 1105 return (-1); 1106 else if (cnt == 0) 1107 return (0); 1108 1109 /* recurse */ 1110 for (p = nlp; (p != NULL); p = p->next) { 1111 mdname_t *np = p->namep; 1112 1113 if (meta_raid_print(sp, np, nlpp, fname, fp, 1114 options, ep) != 0) 1115 rval = -1; 1116 } 1117 1118 /* cleanup, return success */ 1119 metafreenamelist(nlp); 1120 return (rval); 1121 } 1122 1123 /* get unit structure */ 1124 if ((raidp = meta_get_raid_common(sp, raidnp, 1125 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 1126 return (-1); 1127 1128 /* check for parented */ 1129 if ((! (options & PRINT_SUBDEVS)) && 1130 (MD_HAS_PARENT(raidp->common.parent))) { 1131 return (0); 1132 } 1133 1134 /* print appropriate detail */ 1135 if (options & PRINT_SHORT) { 1136 if (raid_print(raidp, fname, fp, options, ep) != 0) 1137 return (-1); 1138 } else { 1139 if (raid_report(sp, raidp, fname, fp, options, ep) != 0) 1140 return (-1); 1141 } 1142 1143 /* Recurse on components that are metadevices */ 1144 for (col = 0; col < raidp->cols.cols_len; ++col) { 1145 md_raidcol_t *colp = &raidp->cols.cols_val[col]; 1146 mdname_t *namep = colp->colnamep; 1147 1148 if ((metaismeta(namep)) && 1149 (meta_print_name(sp, namep, nlpp, fname, fp, 1150 (options | PRINT_HEADER | PRINT_SUBDEVS), 1151 NULL, ep) != 0)) { 1152 return (-1); 1153 } 1154 } 1155 1156 return (0); 1157 } 1158 1159 /* 1160 * adjust raid geometry 1161 */ 1162 static int 1163 adjust_geom( 1164 mdname_t *raidnp, 1165 mdname_t *colnp, 1166 mr_unit_t *mr, 1167 md_error_t *ep 1168 ) 1169 { 1170 uint_t round_cyl = 1; 1171 mdgeom_t *geomp; 1172 1173 /* get reinstructs */ 1174 if ((geomp = metagetgeom(colnp, ep)) == NULL) 1175 return (-1); 1176 1177 /* adjust geometry */ 1178 if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct, 1179 geomp->read_reinstruct, round_cyl, ep) != 0) 1180 return (-1); 1181 1182 /* return success */ 1183 return (0); 1184 } 1185 1186 /* 1187 * add another column to the raid unit structure 1188 */ 1189 static int 1190 attach_raid_col( 1191 mdsetname_t *sp, 1192 mdname_t *raidnp, 1193 mr_unit_t *mr, 1194 mr_column_t *mdc, 1195 mdname_t *colnp, 1196 rcs_state_t state, 1197 mdnamelist_t **keynlpp, 1198 mdcmdopts_t options, 1199 md_error_t *ep 1200 ) 1201 { 1202 diskaddr_t column_size = mr->un_segsize * mr->un_segsincolumn; 1203 diskaddr_t size; 1204 uint_t maxio; 1205 mdcinfo_t *cinfop; 1206 md_timeval32_t tmp_time; 1207 1208 /* setup state and timestamp */ 1209 mdc->un_devstate = state; 1210 if (meta_gettimeofday(&tmp_time) == -1) 1211 return (mdsyserror(ep, errno, NULL)); 1212 1213 mdc->un_devtimestamp = tmp_time; 1214 /* get start, size, and maxio */ 1215 if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) == 1216 MD_DISKADDR_ERROR) 1217 return (-1); 1218 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 1219 return (-1); 1220 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 1221 return (-1); 1222 maxio = cinfop->maxtransfer; 1223 1224 /* adjust start and size by prewrite */ 1225 mdc->un_orig_pwstart = mdc->un_orig_devstart; 1226 mdc->un_orig_devstart += mr->un_pwsize; 1227 1228 /* make sure we still have something left */ 1229 if ((mdc->un_orig_devstart >= size) || 1230 ((size - mdc->un_orig_devstart) < column_size)) { 1231 return (mdsyserror(ep, ENOSPC, colnp->cname)); 1232 } 1233 size -= mdc->un_orig_devstart; 1234 if (maxio < mr->un_maxio) { 1235 return (mdcomperror(ep, MDE_MAXIO, 1236 meta_getminor(raidnp->dev), colnp->dev, colnp->cname)); 1237 } 1238 1239 if (options & MDCMD_DOIT) { 1240 /* store name in namespace */ 1241 if (add_key_name(sp, colnp, keynlpp, ep) != 0) 1242 return (-1); 1243 } 1244 1245 /* setup column */ 1246 mdc->un_orig_dev = colnp->dev; 1247 mdc->un_orig_key = colnp->key; 1248 mdc->un_dev = colnp->dev; 1249 mdc->un_pwstart = mdc->un_orig_pwstart; 1250 mdc->un_devstart = mdc->un_orig_devstart; 1251 mdc->un_alt_dev = NODEV64; 1252 mdc->un_alt_pwstart = 0; 1253 mdc->un_alt_devstart = 0; 1254 mdc->un_hs_id = 0; 1255 1256 /* add the size (we use) of the device to the total */ 1257 mr->c.un_actual_tb += column_size; 1258 1259 /* adjust geometry */ 1260 if (adjust_geom(raidnp, colnp, mr, ep) != 0) 1261 return (-1); 1262 1263 /* count column */ 1264 mr->un_totalcolumncnt++; 1265 1266 /* return success */ 1267 return (0); 1268 } 1269 1270 /* 1271 * invalidate column names 1272 */ 1273 static int 1274 invalidate_columns( 1275 mdsetname_t *sp, 1276 mdname_t *raidnp, 1277 md_error_t *ep 1278 ) 1279 { 1280 md_raid_t *raidp; 1281 uint_t col; 1282 1283 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 1284 return (-1); 1285 for (col = 0; (col < raidp->cols.cols_len); ++col) { 1286 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 1287 mdname_t *colnp = cp->colnamep; 1288 1289 meta_invalidate_name(colnp); 1290 } 1291 return (0); 1292 } 1293 1294 /* 1295 * attach columns to raid 1296 */ 1297 int 1298 meta_raid_attach( 1299 mdsetname_t *sp, 1300 mdname_t *raidnp, 1301 mdnamelist_t *colnlp, 1302 mdcmdopts_t options, 1303 md_error_t *ep 1304 ) 1305 { 1306 uint_t concat_cnt = 0; 1307 mdnamelist_t *p; 1308 mr_unit_t *old_mr; 1309 mr_unit_t *new_mr; 1310 size_t old_rusize; 1311 size_t new_rusize; 1312 mdnamelist_t *keynlp = NULL; 1313 md_grow_params_t mgp; 1314 int rval = -1; 1315 int create_flag = MD_CRO_32BIT; 1316 1317 /* should have a set */ 1318 assert(sp != NULL); 1319 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1320 1321 /* check type */ 1322 if (metachkmeta(raidnp, ep) != 0) 1323 return (-1); 1324 1325 /* check and count new columns */ 1326 for (p = colnlp; (p != NULL); p = p->next) { 1327 mdname_t *np = p->namep; 1328 mdnamelist_t *p2; 1329 1330 /* check against existing devices */ 1331 if (meta_check_column(sp, np, ep) != 0) 1332 return (-1); 1333 1334 /* check against ourselves */ 1335 for (p2 = p->next; (p2 != NULL); p2 = p2->next) { 1336 if (meta_check_overlap(np->cname, np, 0, -1, 1337 p2->namep, 0, -1, ep) != 0) { 1338 return (-1); 1339 } 1340 } 1341 1342 /* count */ 1343 ++concat_cnt; 1344 } 1345 1346 /* get old unit */ 1347 if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 1348 return (-1); 1349 1350 /* 1351 * calculate the size needed for the new raid unit and allocate 1352 * the appropriate structure. allocate new unit. 1353 */ 1354 old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]); 1355 old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]); 1356 new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]); 1357 new_rusize += (old_mr->un_totalcolumncnt + concat_cnt) 1358 * sizeof (new_mr->un_column[0]); 1359 new_mr = Zalloc(new_rusize); 1360 (void) memcpy(new_mr, old_mr, old_rusize); 1361 1362 /* We always want a do-it, this is for attach_raid_col below */ 1363 options |= MDCMD_DOIT; 1364 1365 /* build new unit structure */ 1366 for (p = colnlp; (p != NULL); p = p->next) { 1367 mdname_t *colnp = p->namep; 1368 mr_column_t *mdc; 1369 1370 /* attach column */ 1371 mdc = &new_mr->un_column[new_mr->un_totalcolumncnt]; 1372 if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp, 1373 RCS_INIT, &keynlp, options, ep) != 0) { 1374 goto out; 1375 } 1376 } 1377 assert(new_mr->un_totalcolumncnt 1378 == (old_mr->un_totalcolumncnt + concat_cnt)); 1379 1380 1381 create_flag = meta_check_devicesize(new_mr->c.un_total_blocks); 1382 1383 /* grow raid */ 1384 (void) memset(&mgp, 0, sizeof (mgp)); 1385 mgp.mnum = MD_SID(new_mr); 1386 MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno); 1387 mgp.size = new_rusize; 1388 mgp.mdp = (uintptr_t)new_mr; 1389 1390 if (create_flag == MD_CRO_32BIT) { 1391 mgp.options = MD_CRO_32BIT; 1392 new_mr->c.un_revision &= ~MD_64BIT_META_DEV; 1393 } else { 1394 mgp.options = MD_CRO_64BIT; 1395 new_mr->c.un_revision |= MD_64BIT_META_DEV; 1396 } 1397 if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) { 1398 (void) mdstealerror(ep, &mgp.mde); 1399 goto out; 1400 } 1401 1402 /* clear cache */ 1403 if (invalidate_columns(sp, raidnp, ep) != 0) 1404 goto out; 1405 meta_invalidate_name(raidnp); 1406 1407 /* let em know */ 1408 if (options & MDCMD_PRINT) { 1409 if (concat_cnt == 1) { 1410 (void) printf(dgettext(TEXT_DOMAIN, 1411 "%s: component is attached\n"), 1412 raidnp->cname); 1413 } else { 1414 (void) printf(dgettext(TEXT_DOMAIN, 1415 "%s: components are attached\n"), 1416 raidnp->cname); 1417 } 1418 (void) fflush(stdout); 1419 } 1420 1421 1422 /* grow any parents */ 1423 if (meta_concat_parent(sp, raidnp, ep) != 0) 1424 goto out; 1425 rval = 0; /* success */ 1426 1427 /* cleanup, return error */ 1428 out: 1429 Free(old_mr); 1430 Free(new_mr); 1431 if (rval != 0) 1432 (void) del_key_names(sp, keynlp, NULL); 1433 metafreenamelist(keynlp); 1434 return (rval); 1435 } 1436 1437 /* 1438 * get raid parameters 1439 */ 1440 int 1441 meta_raid_get_params( 1442 mdsetname_t *sp, 1443 mdname_t *raidnp, 1444 mr_params_t *paramsp, 1445 md_error_t *ep 1446 ) 1447 { 1448 md_raid_t *raidp; 1449 1450 /* should have a set */ 1451 assert(sp != NULL); 1452 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1453 1454 /* check name */ 1455 if (metachkmeta(raidnp, ep) != 0) 1456 return (-1); 1457 1458 /* get unit */ 1459 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 1460 return (-1); 1461 1462 /* return parameters */ 1463 (void) memset(paramsp, 0, sizeof (*paramsp)); 1464 if (raidp->hspnamep == NULL) 1465 paramsp->hsp_id = MD_HSP_NONE; 1466 else 1467 paramsp->hsp_id = raidp->hspnamep->hsp; 1468 return (0); 1469 } 1470 1471 /* 1472 * set raid parameters 1473 */ 1474 int 1475 meta_raid_set_params( 1476 mdsetname_t *sp, 1477 mdname_t *raidnp, 1478 mr_params_t *paramsp, 1479 md_error_t *ep 1480 ) 1481 { 1482 md_raid_params_t msp; 1483 1484 /* should have a set */ 1485 assert(sp != NULL); 1486 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1487 1488 /* check name */ 1489 if (metachkmeta(raidnp, ep) != 0) 1490 return (-1); 1491 1492 /* set parameters */ 1493 (void) memset(&msp, 0, sizeof (msp)); 1494 MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno); 1495 msp.mnum = meta_getminor(raidnp->dev); 1496 msp.params = *paramsp; 1497 if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0) 1498 return (mdstealerror(ep, &msp.mde)); 1499 1500 /* clear cache */ 1501 meta_invalidate_name(raidnp); 1502 1503 /* return success */ 1504 return (0); 1505 } 1506 1507 /* 1508 * validate raid replace column 1509 */ 1510 static int 1511 validate_new_raid( 1512 mdsetname_t *sp, 1513 mdname_t *raidnp, 1514 mdname_t *colnp, 1515 replace_params_t *paramsp, 1516 int dup_ok, 1517 md_error_t *ep 1518 ) 1519 { 1520 mr_unit_t *mr; 1521 diskaddr_t column_size; 1522 diskaddr_t label; 1523 mdcinfo_t *cinfop; 1524 int rval = -1; 1525 1526 /* get raid unit */ 1527 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 1528 return (-1); 1529 column_size = mr->un_segsize * mr->un_segsincolumn; 1530 1531 /* check it out */ 1532 if (meta_check_column(sp, colnp, ep) != 0) { 1533 if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY))) 1534 goto out; 1535 mdclrerror(ep); 1536 } 1537 if ((paramsp->number_blks = metagetsize(colnp, ep)) == 1538 MD_DISKADDR_ERROR) 1539 goto out; 1540 if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR) 1541 goto out; 1542 paramsp->has_label = ((label > 0) ? 1 : 0); 1543 if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) == 1544 MD_DISKADDR_ERROR) 1545 goto out; 1546 if ((paramsp->number_blks - paramsp->start_blk) < column_size) { 1547 (void) mdsyserror(ep, ENOSPC, colnp->cname); 1548 goto out; 1549 } 1550 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 1551 goto out; 1552 if (cinfop->maxtransfer < mr->un_maxio) { 1553 (void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev), 1554 colnp->dev, colnp->cname); 1555 goto out; 1556 } 1557 1558 /* success */ 1559 rval = 0; 1560 1561 /* cleanup, return error */ 1562 out: 1563 Free(mr); 1564 return (rval); 1565 } 1566 1567 /* 1568 * replace raid column 1569 */ 1570 int 1571 meta_raid_replace( 1572 mdsetname_t *sp, 1573 mdname_t *raidnp, 1574 mdname_t *oldnp, 1575 mdname_t *newnp, 1576 mdcmdopts_t options, 1577 md_error_t *ep 1578 ) 1579 { 1580 int force = ((options & MDCMD_FORCE) ? 1 : 0); 1581 replace_params_t params; 1582 md_dev64_t old_dev, new_dev; 1583 diskaddr_t new_start_blk, new_end_blk; 1584 int rebind; 1585 char *new_devidp = NULL; 1586 md_error_t xep = mdnullerror; 1587 int ret; 1588 md_set_desc *sd; 1589 uint_t tstate; 1590 1591 /* should have same set */ 1592 assert(sp != NULL); 1593 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1594 1595 /* check name */ 1596 if (metachkmeta(raidnp, ep) != 0) 1597 return (-1); 1598 1599 /* save new binding incase this is a rebind where oldnp==newnp */ 1600 new_dev = newnp->dev; 1601 new_start_blk = newnp->start_blk; 1602 new_end_blk = newnp->end_blk; 1603 1604 /* invalidate, then get the raid (fill in oldnp from metadb) */ 1605 meta_invalidate_name(raidnp); 1606 if (meta_get_raid(sp, raidnp, ep) == NULL) 1607 return (-1); 1608 1609 /* can't replace a component if the raid inaccessible */ 1610 if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) { 1611 return (-1); 1612 } 1613 if (tstate & MD_INACCESSIBLE) { 1614 return (mdmderror(ep, MDE_IN_UNAVAIL_STATE, 1615 meta_getminor(raidnp->dev), raidnp->cname)); 1616 } 1617 1618 /* the old device binding is now established */ 1619 if ((old_dev = oldnp->dev) == NODEV64) 1620 return (mdsyserror(ep, ENODEV, oldnp->cname)); 1621 1622 1623 /* setup raid info */ 1624 (void) memset(¶ms, 0, sizeof (params)); 1625 params.mnum = meta_getminor(raidnp->dev); 1626 MD_SETDRIVERNAME(¶ms, MD_RAID, sp->setno); 1627 params.old_dev = old_dev; 1628 params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP; 1629 1630 if ((strcmp(oldnp->rname, newnp->rname) == 0) && 1631 (old_dev != new_dev)) { 1632 rebind = 1; 1633 } else { 1634 rebind = 0; 1635 } 1636 if (rebind) { 1637 newnp->dev = new_dev; 1638 newnp->start_blk = new_start_blk; 1639 newnp->end_blk = new_end_blk; 1640 } 1641 1642 /* 1643 * Save a copy of the devid associated with the new disk, the 1644 * reason is that the checks for the column (meta_check_column) 1645 * via validate_new_raid(), could cause the disk's devid to be 1646 * changed to that of the devid that is currently stored in the 1647 * replica namespace for the disk in question. This devid could 1648 * be stale if we are replacing the disk. The actual function 1649 * that overwrites the devid is dr2drivedesc(). 1650 */ 1651 1652 /* don't setup new_devid if no devid's or MN diskset */ 1653 if (newnp->drivenamep->devid != NULL) 1654 new_devidp = Strdup(newnp->drivenamep->devid); 1655 1656 if (!metaislocalset(sp)) { 1657 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1658 return (-1); 1659 if (MD_MNSET_DESC(sd)) 1660 new_devidp = NULL; 1661 } 1662 1663 /* check out new (sets up start_blk, has_label, number_blks) */ 1664 if (validate_new_raid(sp, raidnp, newnp, ¶ms, rebind, 1665 ep) != 0) { 1666 Free(new_devidp); 1667 return (-1); 1668 } 1669 1670 /* 1671 * Copy back the saved devid. 1672 */ 1673 Free(newnp->drivenamep->devid); 1674 if (new_devidp) { 1675 newnp->drivenamep->devid = Strdup(new_devidp); 1676 Free(new_devidp); 1677 } 1678 1679 /* store name in namespace, allocate new key */ 1680 if (add_key_name(sp, newnp, NULL, ep) != 0) 1681 return (-1); 1682 1683 if (rebind && !metaislocalset(sp)) { 1684 /* 1685 * We are 'rebind'ing a disk that is in a diskset so as well 1686 * as updating the diskset's namespace the local set needs 1687 * to be updated because it also contains a reference to the 1688 * disk in question. 1689 */ 1690 ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, 1691 newnp->cname, ep); 1692 1693 if (ret != METADEVADM_SUCCESS) { 1694 (void) del_key_name(sp, newnp, &xep); 1695 return (-1); 1696 } 1697 } 1698 1699 /* replace column */ 1700 params.new_dev = new_dev; 1701 params.new_key = newnp->key; 1702 if (metaioctl(MD_IOCREPLACE, ¶ms, ¶ms.mde, NULL) != 0) { 1703 (void) del_key_name(sp, newnp, ep); 1704 return (mdstealerror(ep, ¶ms.mde)); 1705 } 1706 1707 /* clear cache */ 1708 meta_invalidate_name(oldnp); 1709 meta_invalidate_name(newnp); 1710 meta_invalidate_name(raidnp); 1711 1712 /* let em know */ 1713 if (options & MDCMD_PRINT) { 1714 (void) printf(dgettext(TEXT_DOMAIN, 1715 "%s: device %s is replaced with %s\n"), 1716 raidnp->cname, oldnp->cname, newnp->cname); 1717 (void) fflush(stdout); 1718 } 1719 1720 /* return success */ 1721 return (0); 1722 } 1723 1724 /* 1725 * enable raid column 1726 */ 1727 int 1728 meta_raid_enable( 1729 mdsetname_t *sp, 1730 mdname_t *raidnp, 1731 mdname_t *colnp, 1732 mdcmdopts_t options, 1733 md_error_t *ep 1734 ) 1735 { 1736 int force = ((options & MDCMD_FORCE) ? 1 : 0); 1737 replace_params_t params; 1738 md_dev64_t fs_dev, del_dev; 1739 int err = 0; 1740 char *devnm; 1741 int ret; 1742 uint_t tstate; 1743 1744 /* should have same set */ 1745 assert(sp != NULL); 1746 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1747 1748 /* check name */ 1749 if (metachkmeta(raidnp, ep) != 0) 1750 return (-1); 1751 1752 /* get the file_system dev binding */ 1753 if (meta_getdev(sp, colnp, ep) != 0) 1754 return (-1); 1755 fs_dev = colnp->dev; 1756 1757 /* get the raid unit (fill in colnp->dev with metadb version) */ 1758 meta_invalidate_name(raidnp); 1759 if (meta_get_raid(sp, raidnp, ep) == NULL) 1760 return (-1); 1761 1762 /* enabling a component can't work if the raid inaccessible */ 1763 if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) { 1764 return (-1); 1765 } 1766 if (tstate & MD_INACCESSIBLE) { 1767 return (mdmderror(ep, MDE_IN_UNAVAIL_STATE, 1768 meta_getminor(raidnp->dev), raidnp->cname)); 1769 } 1770 1771 /* the metadb device binding is now established */ 1772 if (colnp->dev == NODEV64) 1773 return (mdsyserror(ep, ENODEV, colnp->cname)); 1774 1775 /* 1776 * check for the case where the dev_t has changed between the 1777 * filesystem and the metadb. This is called a rebind, and 1778 * is handled by meta_raid_replace. 1779 */ 1780 if (fs_dev != colnp->dev) { 1781 /* 1782 * Save the devt of mddb version 1783 */ 1784 del_dev = colnp->dev; 1785 1786 /* establish file system binding with invalid start/end */ 1787 colnp->dev = fs_dev; 1788 colnp->start_blk = -1; 1789 colnp->end_blk = -1; 1790 err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep); 1791 1792 /* 1793 * Don't do it if meta_raid_replace returns an error 1794 */ 1795 if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD, 1796 del_dev, NULL, NULL, &colnp->key, ep)) != NULL) { 1797 (void) del_key_name(sp, colnp, ep); 1798 Free(devnm); 1799 } 1800 return (err); 1801 } 1802 1803 /* setup raid info */ 1804 (void) memset(¶ms, 0, sizeof (params)); 1805 params.mnum = meta_getminor(raidnp->dev); 1806 MD_SETDRIVERNAME(¶ms, MD_RAID, sp->setno); 1807 params.old_dev = params.new_dev = colnp->dev; 1808 if (force) 1809 params.cmd = FORCE_ENABLE_COMP; 1810 else 1811 params.cmd = ENABLE_COMP; 1812 1813 /* check it out */ 1814 if (validate_new_raid(sp, raidnp, colnp, ¶ms, 1, ep) != 0) 1815 return (-1); 1816 1817 /* enable column */ 1818 if (metaioctl(MD_IOCREPLACE, ¶ms, ¶ms.mde, NULL) != 0) 1819 return (mdstealerror(ep, ¶ms.mde)); 1820 1821 /* 1822 * are we dealing with a non-local set? If so need to update the 1823 * local namespace so that the disk record has the correct devid. 1824 */ 1825 if (!metaislocalset(sp)) { 1826 ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname, 1827 ep); 1828 1829 if (ret != METADEVADM_SUCCESS) { 1830 /* 1831 * Failed to update the local set. Nothing to do here 1832 * apart from report the error. The namespace is 1833 * most likely broken and some form of remedial 1834 * recovery is going to be required. 1835 */ 1836 mde_perror(ep, ""); 1837 mdclrerror(ep); 1838 } 1839 } 1840 1841 /* clear cache */ 1842 meta_invalidate_name(colnp); 1843 meta_invalidate_name(raidnp); 1844 1845 /* let em know */ 1846 if (options & MDCMD_PRINT) { 1847 (void) printf(dgettext(TEXT_DOMAIN, 1848 "%s: device %s is enabled\n"), 1849 raidnp->cname, colnp->cname); 1850 (void) fflush(stdout); 1851 } 1852 1853 /* return success */ 1854 return (0); 1855 } 1856 1857 /* 1858 * check for dups in the raid itself 1859 */ 1860 static int 1861 check_twice( 1862 md_raid_t *raidp, 1863 uint_t col, 1864 md_error_t *ep 1865 ) 1866 { 1867 mdname_t *raidnp = raidp->common.namep; 1868 mdname_t *thisnp; 1869 uint_t c; 1870 1871 thisnp = raidp->cols.cols_val[col].colnamep; 1872 for (c = 0; (c < col); ++c) { 1873 md_raidcol_t *mdcp = &raidp->cols.cols_val[c]; 1874 mdname_t *colnp = mdcp->colnamep; 1875 1876 if (meta_check_overlap(raidnp->cname, thisnp, 0, -1, 1877 colnp, 0, -1, ep) != 0) { 1878 return (-1); 1879 } 1880 } 1881 return (0); 1882 } 1883 1884 /* 1885 * default raid interlace 1886 */ 1887 diskaddr_t 1888 meta_default_raid_interlace(void) 1889 { 1890 diskaddr_t interlace; 1891 1892 /* default to 512k, round up if necessary */ 1893 interlace = btodb(512 * 1024); 1894 if (interlace < lbtodb(MININTERLACE)) 1895 interlace = roundup(MININTERLACE, interlace); 1896 return (interlace); 1897 } 1898 1899 /* 1900 * convert interlaces 1901 */ 1902 int 1903 meta_raid_check_interlace( 1904 diskaddr_t interlace, 1905 char *uname, 1906 md_error_t *ep 1907 ) 1908 { 1909 if ((interlace < btodb(RAID_MIN_INTERLACE)) || 1910 (interlace > btodb(MAXINTERLACE))) { 1911 return (mderror(ep, MDE_BAD_INTERLACE, uname)); 1912 } 1913 return (0); 1914 } 1915 1916 /* 1917 * check raid 1918 */ 1919 int 1920 meta_check_raid( 1921 mdsetname_t *sp, 1922 md_raid_t *raidp, 1923 mdcmdopts_t options, 1924 md_error_t *ep 1925 ) 1926 { 1927 mdname_t *raidnp = raidp->common.namep; 1928 int doit = ((options & MDCMD_DOIT) ? 1 : 0); 1929 int updateit = ((options & MDCMD_UPDATE) ? 1 : 0); 1930 uint_t ncol; 1931 uint_t col; 1932 minor_t mnum = meta_getminor(raidnp->dev); 1933 1934 /* check number */ 1935 if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) || 1936 (raidp->orig_ncol > ncol)) { 1937 return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname)); 1938 } 1939 1940 /* compute default interlace */ 1941 if (raidp->interlace == 0) { 1942 raidp->interlace = meta_default_raid_interlace(); 1943 } 1944 1945 /* check state */ 1946 switch (raidp->state) { 1947 case RUS_INIT: 1948 case RUS_OKAY: 1949 break; 1950 1951 default: 1952 return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname)); 1953 } 1954 1955 /* check interlace */ 1956 if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0) 1957 return (-1); 1958 1959 /* check hotspare pool name */ 1960 if (doit) { 1961 if ((raidp->hspnamep != NULL) && 1962 (metachkhsp(sp, raidp->hspnamep, ep) != 0)) { 1963 return (-1); 1964 } 1965 } 1966 1967 /* check columns */ 1968 for (col = 0; (col < ncol); ++col) { 1969 md_raidcol_t *mdcp = &raidp->cols.cols_val[col]; 1970 mdname_t *colnp = mdcp->colnamep; 1971 diskaddr_t start_blk, size; 1972 1973 /* setup column */ 1974 if (raidp->state == RUS_INIT) 1975 mdcp->state = RCS_INIT; 1976 else 1977 mdcp->state = RCS_OKAY; 1978 1979 /* check column */ 1980 if (!updateit) { 1981 if (meta_check_column(sp, colnp, ep) != 0) 1982 return (-1); 1983 if (((start_blk = metagetstart(sp, colnp, ep)) == 1984 MD_DISKADDR_ERROR) || ((size = metagetsize(colnp, 1985 ep)) == MD_DISKADDR_ERROR)) { 1986 return (-1); 1987 } 1988 if (start_blk >= size) 1989 return (mdsyserror(ep, ENOSPC, colnp->cname)); 1990 size -= start_blk; 1991 size = rounddown(size, raidp->interlace); 1992 if (size == 0) 1993 return (mdsyserror(ep, ENOSPC, colnp->cname)); 1994 } 1995 1996 /* check this raid too */ 1997 if (check_twice(raidp, col, ep) != 0) 1998 return (-1); 1999 } 2000 2001 /* return success */ 2002 return (0); 2003 } 2004 2005 /* 2006 * setup raid geometry 2007 */ 2008 static int 2009 raid_geom( 2010 md_raid_t *raidp, 2011 mr_unit_t *mr, 2012 md_error_t *ep 2013 ) 2014 { 2015 uint_t write_reinstruct = 0; 2016 uint_t read_reinstruct = 0; 2017 uint_t round_cyl = 1; 2018 uint_t col; 2019 mdgeom_t *geomp; 2020 2021 /* get worst reinstructs */ 2022 for (col = 0; (col < raidp->cols.cols_len); ++col) { 2023 md_raidcol_t *mdcp = &raidp->cols.cols_val[col]; 2024 mdname_t *colnp = mdcp->colnamep; 2025 2026 if ((geomp = metagetgeom(colnp, ep)) == NULL) 2027 return (-1); 2028 if (geomp->write_reinstruct > write_reinstruct) 2029 write_reinstruct = geomp->write_reinstruct; 2030 if (geomp->read_reinstruct > read_reinstruct) 2031 read_reinstruct = geomp->read_reinstruct; 2032 } 2033 2034 /* setup geometry from first column */ 2035 assert(raidp->cols.cols_len > 0); 2036 if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep, 2037 ep)) == NULL) { 2038 return (-1); 2039 } 2040 if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp, 2041 write_reinstruct, read_reinstruct, round_cyl, ep) != 0) 2042 return (-1); 2043 2044 /* return success */ 2045 return (0); 2046 } 2047 2048 int 2049 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state) 2050 { 2051 int statecnt = 0; 2052 int col; 2053 2054 for (col = 0; col < mr->un_totalcolumncnt; col++) 2055 if (mr->un_column[col].un_devstate & state) 2056 statecnt++; 2057 return (statecnt); 2058 } 2059 /* 2060 * validate that a raid device being created with the -k flag is a real 2061 * raid device 2062 */ 2063 int 2064 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr) 2065 { 2066 long long buf[DEV_BSIZE / sizeof (long long)]; 2067 raid_pwhdr_t pwhdr; 2068 raid_pwhdr_t *rpw = &pwhdr; 2069 minor_t mnum; 2070 int col; 2071 int fd; 2072 2073 for (col = 0; col < mr->un_totalcolumncnt; col++) { 2074 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2075 mdname_t *colnp = cp->colnamep; 2076 2077 if ((fd = open(colnp->rname, O_RDONLY)) < 0) 2078 goto error_exit; 2079 2080 if (lseek64(fd, 2081 (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0) 2082 goto error_exit; 2083 2084 if (read(fd, buf, DEV_BSIZE) < 0) 2085 goto error_exit; 2086 2087 /* 2088 * If our raid device is a 64 bit device, we can accept the 2089 * pw header we just read in. 2090 * Otherwise it's of type raid_pwhdr32_od_t and has to 2091 * be converted. 2092 */ 2093 if (mr->c.un_revision & MD_64BIT_META_DEV) { 2094 rpw = (raid_pwhdr_t *)buf; 2095 } else { 2096 RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw); 2097 } 2098 2099 if (rpw->rpw_column != col) 2100 goto error_exit; 2101 2102 if (col == 0) 2103 mnum = rpw->rpw_unit; 2104 2105 if (rpw->rpw_unit != mnum) 2106 goto error_exit; 2107 2108 if (rpw->rpw_magic_ext == RAID_PWMAGIC) { 2109 /* 4.1 prewrite header */ 2110 if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) || 2111 (rpw->rpw_totalcolumncnt 2112 != mr->un_totalcolumncnt) || 2113 (rpw->rpw_segsize != mr->un_segsize) || 2114 (rpw->rpw_segsincolumn != mr->un_segsincolumn) || 2115 (rpw->rpw_pwcnt != mr->un_pwcnt) || 2116 (rpw->rpw_pwstart != 2117 mr->un_column[col].un_pwstart) || 2118 (rpw->rpw_devstart != 2119 mr->un_column[col].un_devstart) || 2120 (rpw->rpw_pwsize != mr->un_pwsize)) 2121 goto error_exit; 2122 } 2123 /* 2124 * this is an old prewrite header (4.0) the unit structure 2125 * will have to be trusted. 2126 */ 2127 (void) close(fd); 2128 } 2129 2130 return (0); 2131 2132 error_exit: 2133 (void) close(fd); 2134 return (-1); 2135 } 2136 2137 /* 2138 * create raid 2139 */ 2140 int 2141 meta_create_raid( 2142 mdsetname_t *sp, 2143 md_raid_t *raidp, 2144 mdcmdopts_t options, 2145 md_error_t *ep 2146 ) 2147 { 2148 mdname_t *raidnp = raidp->common.namep; 2149 uint_t ncol = raidp->cols.cols_len; 2150 uint_t orig_ncol = raidp->orig_ncol; 2151 size_t rdsize; 2152 mr_unit_t *mr; 2153 uint_t col; 2154 diskaddr_t disk_size = 0; 2155 uint_t disk_maxio = 0; 2156 uint_t pwes; 2157 diskaddr_t non_pw_blks, column_size; 2158 mdnamelist_t *keynlp = NULL; 2159 md_set_params_t set_params; 2160 int rval = -1; 2161 md_timeval32_t creation_time; 2162 int create_flag = MD_CRO_32BIT; 2163 2164 /* validate raid */ 2165 if (meta_check_raid(sp, raidp, options, ep) != 0) 2166 return (-1); 2167 2168 /* allocate raid unit */ 2169 rdsize = sizeof (*mr) - sizeof (mr->un_column[0]); 2170 rdsize += ncol * sizeof (mr->un_column[0]); 2171 mr = Zalloc(rdsize); 2172 2173 if (meta_gettimeofday(&creation_time) == -1) 2174 return (mdsyserror(ep, errno, NULL)); 2175 /* 2176 * initialize the top level mr_unit_t structure 2177 * setup the unit state to indicate whether to retain 2178 * any data currently on the metadevice or to clear it 2179 */ 2180 mr->c.un_type = MD_METARAID; 2181 MD_SID(mr) = meta_getminor(raidnp->dev); 2182 mr->c.un_size = rdsize; 2183 mr->un_magic = RAID_UNMAGIC; 2184 mr->un_state = raidp->state; 2185 mr->un_timestamp = creation_time; 2186 mr->un_origcolumncnt = orig_ncol; 2187 mr->un_segsize = (uint_t)raidp->interlace; 2188 if (raidp->hspnamep != NULL) { 2189 mr->un_hsp_id = raidp->hspnamep->hsp; 2190 } else { 2191 mr->un_hsp_id = MD_HSP_NONE; 2192 } 2193 /* 2194 * setup original columns, saving start_block and 2195 * finding smallest size and maxio 2196 */ 2197 for (col = 0; (col < orig_ncol); ++col) { 2198 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2199 mdname_t *colnp = cp->colnamep; 2200 mr_column_t *mdc = &mr->un_column[col]; 2201 diskaddr_t size; 2202 uint_t maxio; 2203 mdcinfo_t *cinfop; 2204 2205 /* setup state */ 2206 mdc->un_devstate = cp->state; 2207 2208 /* setup creation time */ 2209 mdc->un_devtimestamp = creation_time; 2210 2211 /* get start, size, and maxio */ 2212 if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) == 2213 MD_DISKADDR_ERROR) 2214 goto out; 2215 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 2216 goto out; 2217 size -= mdc->un_orig_devstart; 2218 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 2219 goto out; 2220 maxio = cinfop->maxtransfer; 2221 2222 if (options & MDCMD_DOIT) { 2223 /* store name in namespace */ 2224 if (add_key_name(sp, colnp, &keynlp, ep) != 0) 2225 goto out; 2226 } 2227 2228 /* setup column */ 2229 mdc->un_orig_key = colnp->key; 2230 mdc->un_orig_dev = colnp->dev; 2231 mdc->un_dev = mdc->un_orig_dev; 2232 mdc->un_pwstart = mdc->un_orig_pwstart; 2233 mdc->un_devstart = mdc->un_orig_devstart; 2234 mdc->un_alt_dev = NODEV64; 2235 mdc->un_alt_pwstart = 0; 2236 mdc->un_alt_devstart = 0; 2237 mdc->un_hs_id = 0; 2238 if (mr->un_state == RUS_INIT) 2239 mdc->un_devstate = RCS_INIT; 2240 else 2241 mdc->un_devstate = RCS_OKAY; 2242 2243 /* adjust for smallest disk */ 2244 if (disk_size == 0) { 2245 disk_size = size; 2246 } else if (size < disk_size) { 2247 disk_size = size; 2248 } 2249 if (disk_maxio == 0) { 2250 disk_maxio = maxio; 2251 } else if (maxio < disk_maxio) { 2252 disk_maxio = maxio; 2253 } 2254 } 2255 assert(col == mr->un_origcolumncnt); 2256 2257 /* 2258 * before processing any of the attached column(s) 2259 * set up the composition of the metadevice for column 2260 * sizes and pre-write information 2261 */ 2262 mr->un_maxio = disk_maxio; /* smallest maxio */ 2263 mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1)); 2264 pwes = mr->un_iosize; 2265 if (raidp->pw_count) 2266 mr->un_pwcnt = raidp->pw_count; 2267 else 2268 mr->un_pwcnt = PWCNT_MIN; 2269 if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) { 2270 (void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname); 2271 goto out; 2272 } 2273 mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2); 2274 2275 /* now calculate the number of segments per column */ 2276 non_pw_blks = disk_size - mr->un_pwsize; /* smallest disk */ 2277 if ((mr->un_pwsize > disk_size) || 2278 (non_pw_blks < (diskaddr_t)mr->un_segsize)) { 2279 (void) mdsyserror(ep, ENOSPC, raidnp->cname); 2280 goto out; 2281 } 2282 mr->un_segsincolumn = non_pw_blks / mr->un_segsize; 2283 column_size = mr->un_segsize * mr->un_segsincolumn; 2284 2285 /* 2286 * adjust the pw_cnt, pw_size, to fit into any fragmentation 2287 * left over after column_size has been computed 2288 */ 2289 mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2); 2290 mr->un_pwcnt = mr->un_pwsize / pwes; 2291 assert(mr->un_pwcnt >= PWCNT_MIN); 2292 mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2); 2293 assert((mr->un_pwsize + column_size) <= disk_size); 2294 2295 /* 2296 * calculate the actual block count available based on the 2297 * segment size and the number of segments per column ... 2298 * ... and adjust for the number of parity segments 2299 */ 2300 mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1); 2301 2302 if (raid_geom(raidp, mr, ep) != 0) 2303 goto out; 2304 2305 create_flag = meta_check_devicesize(mr->c.un_total_blocks); 2306 2307 /* 2308 * now calculate the pre-write offset and update the column 2309 * structures to include the address of the individual pre-write 2310 * areas 2311 */ 2312 for (col = 0; (col < orig_ncol); ++col) { 2313 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2314 mdname_t *colnp = cp->colnamep; 2315 mr_column_t *mdc = &mr->un_column[col]; 2316 diskaddr_t size; 2317 2318 /* get size */ 2319 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 2320 goto out; 2321 2322 /* adjust start and size by prewrite */ 2323 mdc->un_orig_pwstart = mdc->un_orig_devstart; 2324 mdc->un_orig_devstart += mr->un_pwsize; 2325 mdc->un_pwstart = mdc->un_orig_pwstart; 2326 mdc->un_devstart = mdc->un_orig_devstart; 2327 2328 assert(size >= mdc->un_orig_devstart); 2329 size -= mdc->un_orig_devstart; 2330 2331 /* make sure we still have something left */ 2332 assert(size >= column_size); 2333 } 2334 2335 /* do concat cols */ 2336 mr->un_totalcolumncnt = mr->un_origcolumncnt; 2337 assert(col == mr->un_origcolumncnt); 2338 for (col = orig_ncol; (col < ncol); ++col) { 2339 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2340 mdname_t *colnp = cp->colnamep; 2341 mr_column_t *mdc = &mr->un_column[col]; 2342 2343 /* attach column */ 2344 if (attach_raid_col(sp, raidnp, mr, mdc, colnp, 2345 cp->state, &keynlp, options, ep) != 0) { 2346 goto out; 2347 } 2348 } 2349 assert(mr->un_totalcolumncnt == ncol); 2350 2351 /* fill in the size of the raid */ 2352 if (options & MDCMD_UPDATE) { 2353 raidp->common.size = mr->c.un_total_blocks; 2354 raidp->column_size = mr->un_segsize * mr->un_segsincolumn; 2355 } 2356 2357 /* if we're not doing anything, return success */ 2358 if (! (options & MDCMD_DOIT)) { 2359 rval = 0; /* success */ 2360 goto out; 2361 } 2362 2363 if ((mr->un_state & RUS_OKAY) && 2364 (meta_raid_valid(raidp, mr) != 0)) { 2365 (void) mderror(ep, MDE_RAID_INVALID, raidnp->cname); 2366 goto out; 2367 } 2368 2369 /* create raid */ 2370 (void) memset(&set_params, 0, sizeof (set_params)); 2371 /* did the user tell us to generate a large device? */ 2372 if (create_flag == MD_CRO_64BIT) { 2373 mr->c.un_revision |= MD_64BIT_META_DEV; 2374 set_params.options = MD_CRO_64BIT; 2375 } else { 2376 mr->c.un_revision &= ~MD_64BIT_META_DEV; 2377 set_params.options = MD_CRO_32BIT; 2378 } 2379 set_params.mnum = MD_SID(mr); 2380 set_params.size = mr->c.un_size; 2381 set_params.mdp = (uintptr_t)mr; 2382 MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum)); 2383 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 2384 raidnp->cname) != 0) { 2385 (void) mdstealerror(ep, &set_params.mde); 2386 goto out; 2387 } 2388 rval = 0; /* success */ 2389 2390 /* cleanup, return success */ 2391 out: 2392 Free(mr); 2393 if (rval != 0) { 2394 (void) del_key_names(sp, keynlp, NULL); 2395 } 2396 metafreenamelist(keynlp); 2397 if ((rval == 0) && (options & MDCMD_DOIT)) { 2398 if (invalidate_columns(sp, raidnp, ep) != 0) 2399 rval = -1; 2400 meta_invalidate_name(raidnp); 2401 } 2402 return (rval); 2403 } 2404 2405 /* 2406 * initialize raid 2407 * NOTE: this functions is metainit(1m)'s command line parser! 2408 */ 2409 int 2410 meta_init_raid( 2411 mdsetname_t **spp, 2412 int argc, 2413 char *argv[], 2414 mdcmdopts_t options, 2415 md_error_t *ep 2416 ) 2417 { 2418 char *uname = argv[0]; 2419 mdname_t *raidnp = NULL; 2420 int old_optind; 2421 int c; 2422 md_raid_t *raidp = NULL; 2423 uint_t ncol, col; 2424 int rval = -1; 2425 md_set_desc *sd; 2426 2427 /* get raid name */ 2428 assert(argc > 0); 2429 if (argc < 1) 2430 goto syntax; 2431 if ((raidnp = metaname(spp, uname, META_DEVICE, ep)) == NULL) 2432 goto out; 2433 assert(*spp != NULL); 2434 2435 /* 2436 * Raid metadevice not allowed on multi-node diskset. 2437 */ 2438 if (! metaislocalset(*spp)) { 2439 if ((sd = metaget_setdesc(*spp, ep)) == NULL) 2440 goto out; 2441 if (MD_MNSET_DESC(sd)) { 2442 rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname, 2443 argc, argv); 2444 goto out; 2445 } 2446 } 2447 2448 uname = raidnp->cname; 2449 if (metachkmeta(raidnp, ep) != 0) 2450 goto out; 2451 2452 if (!(options & MDCMD_NOLOCK)) { 2453 /* grab set lock */ 2454 if (meta_lock(*spp, TRUE, ep) != 0) 2455 goto out; 2456 2457 if (meta_check_ownership(*spp, ep) != 0) 2458 goto out; 2459 } 2460 2461 /* see if it exists already */ 2462 if (metagetmiscname(raidnp, ep) != NULL) { 2463 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 2464 meta_getminor(raidnp->dev), uname); 2465 goto out; 2466 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 2467 goto out; 2468 } else { 2469 mdclrerror(ep); 2470 } 2471 --argc, ++argv; 2472 2473 /* grab -r */ 2474 if ((argc < 1) || (strcmp(argv[0], "-r") != 0)) 2475 goto syntax; 2476 --argc, ++argv; 2477 2478 /* parse general options */ 2479 optind = 0; 2480 opterr = 0; 2481 if (getopt(argc, argv, "") != -1) 2482 goto options; 2483 2484 /* allocate raid */ 2485 raidp = Zalloc(sizeof (*raidp)); 2486 2487 /* setup common */ 2488 raidp->common.namep = raidnp; 2489 raidp->common.type = MD_METARAID; 2490 raidp->state = RUS_INIT; 2491 2492 /* allocate and parse cols */ 2493 for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol) 2494 ; 2495 raidp->cols.cols_len = ncol; 2496 if (ncol != 0) { 2497 raidp->cols.cols_val = 2498 Zalloc(ncol * sizeof (*raidp->cols.cols_val)); 2499 } 2500 for (col = 0; ((argc > 0) && (col < ncol)); ++col) { 2501 md_raidcol_t *mdc = &raidp->cols.cols_val[col]; 2502 mdname_t *colnp; 2503 2504 /* parse column name */ 2505 if ((colnp = metaname(spp, argv[0], UNKNOWN, ep)) == NULL) 2506 goto out; 2507 /* check for soft partitions */ 2508 if (meta_sp_issp(*spp, colnp, ep) != 0) { 2509 /* check disks */ 2510 if (metachkcomp(colnp, ep) != 0) 2511 goto out; 2512 } 2513 mdc->colnamep = colnp; 2514 --argc, ++argv; 2515 } 2516 2517 /* parse raid options */ 2518 old_optind = optind = 0; 2519 opterr = 0; 2520 while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) { 2521 switch (c) { 2522 case 'h': 2523 if ((raidp->hspnamep = metahspname(spp, optarg, 2524 ep)) == NULL) { 2525 goto out; 2526 } 2527 break; 2528 2529 case 'i': 2530 if (parse_interlace(uname, optarg, &raidp->interlace, 2531 ep) != 0) { 2532 goto out; 2533 } 2534 if (meta_raid_check_interlace(raidp->interlace, 2535 uname, ep)) 2536 goto out; 2537 break; 2538 2539 case 'k': 2540 raidp->state = RUS_OKAY; 2541 break; 2542 2543 case 'o': 2544 if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) || 2545 ((int)raidp->orig_ncol < 0)) { 2546 goto syntax; 2547 } 2548 if ((raidp->orig_ncol < MD_RAID_MIN) || 2549 (raidp->orig_ncol > ncol)) { 2550 rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname); 2551 goto out; 2552 } 2553 break; 2554 case 'w': 2555 if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) || 2556 ((int)raidp->pw_count < 0)) 2557 goto syntax; 2558 if (((int)raidp->pw_count < PWCNT_MIN) || 2559 ((int)raidp->pw_count > PWCNT_MAX)) { 2560 rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname); 2561 goto out; 2562 } 2563 break; 2564 default: 2565 argc += old_optind; 2566 argv -= old_optind; 2567 goto options; 2568 } 2569 old_optind = optind; 2570 } 2571 argc -= optind; 2572 argv += optind; 2573 2574 /* we should be at the end */ 2575 if (argc != 0) 2576 goto syntax; 2577 2578 /* default to all original columns */ 2579 if (raidp->orig_ncol == 0) 2580 raidp->orig_ncol = ncol; 2581 2582 /* create raid */ 2583 if (meta_create_raid(*spp, raidp, options, ep) != 0) 2584 goto out; 2585 rval = 0; /* success */ 2586 2587 /* let em know */ 2588 if (options & MDCMD_PRINT) { 2589 (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"), 2590 uname); 2591 (void) fflush(stdout); 2592 } 2593 goto out; 2594 2595 /* syntax error */ 2596 syntax: 2597 rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv); 2598 goto out; 2599 2600 /* options error */ 2601 options: 2602 rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv); 2603 goto out; 2604 2605 /* cleanup, return error */ 2606 out: 2607 if (raidp != NULL) 2608 meta_free_raid(raidp); 2609 return (rval); 2610 } 2611 2612 /* 2613 * reset RAIDs 2614 */ 2615 int 2616 meta_raid_reset( 2617 mdsetname_t *sp, 2618 mdname_t *raidnp, 2619 mdcmdopts_t options, 2620 md_error_t *ep 2621 ) 2622 { 2623 md_raid_t *raidp; 2624 int rval = -1; 2625 int col; 2626 2627 /* should have same set */ 2628 assert(sp != NULL); 2629 assert((raidnp == NULL) || 2630 (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)))); 2631 2632 /* reset all raids */ 2633 if (raidnp == NULL) { 2634 mdnamelist_t *raidnlp = NULL; 2635 mdnamelist_t *p; 2636 2637 /* for each raid */ 2638 rval = 0; 2639 if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0) 2640 return (-1); 2641 for (p = raidnlp; (p != NULL); p = p->next) { 2642 /* reset RAID */ 2643 raidnp = p->namep; 2644 if (meta_raid_reset(sp, raidnp, options, ep) != 0) { 2645 rval = -1; 2646 break; 2647 } 2648 } 2649 2650 /* cleanup, return success */ 2651 metafreenamelist(raidnlp); 2652 return (rval); 2653 } 2654 2655 /* check name */ 2656 if (metachkmeta(raidnp, ep) != 0) 2657 return (-1); 2658 2659 /* get unit structure */ 2660 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 2661 return (-1); 2662 2663 /* make sure nobody owns us */ 2664 if (MD_HAS_PARENT(raidp->common.parent)) { 2665 return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev), 2666 raidnp->cname)); 2667 } 2668 2669 /* clear subdevices cache */ 2670 if (invalidate_columns(sp, raidnp, ep) != 0) 2671 return (-1); 2672 2673 /* clear metadevice */ 2674 if (meta_reset(sp, raidnp, options, ep) != 0) 2675 goto out; 2676 rval = 0; /* success */ 2677 2678 /* let em know */ 2679 if (options & MDCMD_PRINT) { 2680 (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"), 2681 raidnp->cname); 2682 (void) fflush(stdout); 2683 } 2684 2685 /* clear subdevices */ 2686 if (! (options & MDCMD_RECURSE)) 2687 goto out; 2688 2689 for (col = 0; (col < raidp->cols.cols_len); ++col) { 2690 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2691 mdname_t *colnp = cp->colnamep; 2692 2693 /* only recurse on metadevices */ 2694 if (! metaismeta(colnp)) 2695 continue; 2696 2697 if (meta_reset_by_name(sp, colnp, options, ep) != 0) 2698 rval = -1; 2699 } 2700 2701 /* cleanup, return success */ 2702 out: 2703 meta_invalidate_name(raidnp); 2704 return (rval); 2705 } 2706 2707 /* 2708 * reports TRUE if any RAID component is in error 2709 */ 2710 int 2711 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names) 2712 { 2713 mdnamelist_t *nlp; 2714 md_error_t status = mdnullerror; 2715 md_error_t *ep = &status; 2716 int any_errs = FALSE; 2717 2718 for (nlp = raid_names; nlp; nlp = nlp->next) { 2719 md_raid_t *raidp; 2720 2721 if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) { 2722 any_errs |= TRUE; 2723 goto out; 2724 } 2725 if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) { 2726 any_errs |= TRUE; 2727 goto out; 2728 } 2729 } 2730 out: 2731 if (!mdisok(ep)) 2732 mdclrerror(ep); 2733 2734 return (any_errs); 2735 } 2736 /* 2737 * regen parity on a raid 2738 */ 2739 int 2740 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size, 2741 md_error_t *ep) 2742 { 2743 char *miscname; 2744 md_resync_ioctl_t ri; 2745 2746 /* should have a set */ 2747 assert(sp != NULL); 2748 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 2749 2750 /* make sure we have a raid */ 2751 if ((miscname = metagetmiscname(raidnp, ep)) == NULL) 2752 return (-1); 2753 if (strcmp(miscname, MD_RAID) != 0) { 2754 return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev), 2755 raidnp->cname)); 2756 } 2757 2758 /* start resync */ 2759 (void) memset(&ri, 0, sizeof (ri)); 2760 MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno); 2761 ri.ri_mnum = meta_getminor(raidnp->dev); 2762 ri.ri_copysize = size; 2763 if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0) 2764 return (mdstealerror(ep, &ri.mde)); 2765 2766 /* return success */ 2767 return (0); 2768 } 2769