1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 /* 29 * Just in case we're not in a build environment, make sure that 30 * TEXT_DOMAIN gets set to something. 31 */ 32 #if !defined(TEXT_DOMAIN) 33 #define TEXT_DOMAIN "SYS_TEST" 34 #endif 35 36 /* 37 * RAID operations 38 */ 39 40 #include <stdlib.h> 41 #include <meta.h> 42 #include <sys/lvm/md_raid.h> 43 #include <sys/lvm/mdvar.h> 44 #include <sys/lvm/md_convert.h> 45 #include <stddef.h> 46 47 /* 48 * FUNCTION: meta_get_raid_names() 49 * INPUT: sp - the set name to get raid from 50 * options - options from the command line 51 * OUTPUT: nlpp - list of all raid names 52 * ep - return error pointer 53 * RETURNS: int - -1 if error, 0 success 54 * PURPOSE: returns a list of all raid in the metadb 55 * for all devices in the specified set 56 */ 57 int 58 meta_get_raid_names( 59 mdsetname_t *sp, 60 mdnamelist_t **nlpp, 61 int options, 62 md_error_t *ep 63 ) 64 { 65 return (meta_get_names(MD_RAID, sp, nlpp, options, ep)); 66 } 67 68 /* 69 * free raid unit 70 */ 71 void 72 meta_free_raid( 73 md_raid_t *raidp 74 ) 75 { 76 if (raidp->cols.cols_val != NULL) { 77 assert(raidp->cols.cols_len > 0); 78 Free(raidp->cols.cols_val); 79 } 80 Free(raidp); 81 } 82 83 /* 84 * get raid (common) 85 */ 86 md_raid_t * 87 meta_get_raid_common( 88 mdsetname_t *sp, 89 mdname_t *raidnp, 90 int fast, 91 md_error_t *ep 92 ) 93 { 94 mddrivename_t *dnp = raidnp->drivenamep; 95 char *miscname; 96 mr_unit_t *mr; 97 md_raid_t *raidp; 98 uint_t ncol; 99 uint_t col; 100 md_resync_ioctl_t ri; 101 102 /* must have set */ 103 assert(sp != NULL); 104 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 105 106 /* short circuit */ 107 if (dnp->unitp != NULL) { 108 assert(dnp->unitp->type == MD_METARAID); 109 return ((md_raid_t *)dnp->unitp); 110 } 111 112 /* get miscname and unit */ 113 if ((miscname = metagetmiscname(raidnp, ep)) == NULL) 114 return (NULL); 115 if (strcmp(miscname, MD_RAID) != 0) { 116 (void) mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev), 117 raidnp->cname); 118 return (NULL); 119 } 120 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 121 return (NULL); 122 assert(mr->c.un_type == MD_METARAID); 123 124 /* allocate raid */ 125 raidp = Zalloc(sizeof (*raidp)); 126 127 /* allocate columns */ 128 ncol = mr->un_totalcolumncnt; 129 assert(ncol >= MD_RAID_MIN); 130 raidp->cols.cols_len = ncol; 131 raidp->cols.cols_val = Zalloc(raidp->cols.cols_len * 132 sizeof (*raidp->cols.cols_val)); 133 134 /* get common info */ 135 raidp->common.namep = raidnp; 136 raidp->common.type = mr->c.un_type; 137 raidp->common.state = mr->c.un_status; 138 raidp->common.capabilities = mr->c.un_capabilities; 139 raidp->common.parent = mr->c.un_parent; 140 raidp->common.size = mr->c.un_total_blocks; 141 raidp->common.user_flags = mr->c.un_user_flags; 142 raidp->common.revision = mr->c.un_revision; 143 144 /* get options */ 145 raidp->state = mr->un_state; 146 raidp->timestamp = mr->un_timestamp; 147 raidp->interlace = mr->un_segsize; 148 raidp->orig_ncol = mr->un_origcolumncnt; 149 raidp->column_size = mr->un_segsize * mr->un_segsincolumn; 150 raidp->pw_count = mr->un_pwcnt; 151 assert(raidp->orig_ncol <= ncol); 152 if ((mr->un_hsp_id != MD_HSP_NONE) && 153 ((raidp->hspnamep = metahsphspname(&sp, mr->un_hsp_id, 154 ep)) == NULL)) { 155 goto out; 156 } 157 158 /* get columns, update unit state */ 159 for (col = 0; (col < ncol); ++col) { 160 mr_column_t *rcp = &mr->un_column[col]; 161 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 162 163 /* get column name */ 164 mdrcp->colnamep = metakeyname(&sp, rcp->un_orig_key, fast, ep); 165 if (mdrcp->colnamep == NULL) 166 goto out; 167 168 /* override any start_blk */ 169 #ifdef DEBUG 170 if (metagetstart(sp, mdrcp->colnamep, ep) != 171 MD_DISKADDR_ERROR) { 172 assert(mdrcp->colnamep->start_blk <= 173 rcp->un_orig_devstart); 174 } else { 175 mdclrerror(ep); 176 } 177 #endif /* DEBUG */ 178 mdrcp->colnamep->start_blk = rcp->un_orig_devstart; 179 180 /* if hotspared */ 181 if (HOTSPARED(mr, col)) { 182 /* get hotspare name */ 183 mdrcp->hsnamep = metakeyname(&sp, rcp->un_hs_key, 184 fast, ep); 185 if (mdrcp->hsnamep == NULL) 186 goto out; 187 188 if (getenv("META_DEBUG_START_BLK") != NULL) { 189 if (metagetstart(sp, mdrcp->hsnamep, ep) == 190 MD_DISKADDR_ERROR) 191 mdclrerror(ep); 192 193 if ((mdrcp->hsnamep->start_blk == 0) && 194 (rcp->un_hs_pwstart != 0)) 195 md_eprintf(dgettext(TEXT_DOMAIN, 196 "%s: suspected bad start block," 197 " seems labelled [raid]\n"), 198 mdrcp->hsnamep->cname); 199 200 if ((mdrcp->hsnamep->start_blk > 0) && 201 (rcp->un_hs_pwstart == 0)) 202 md_eprintf(dgettext(TEXT_DOMAIN, 203 "%s: suspected bad start block, " 204 " seems unlabelled [raid]\n"), 205 mdrcp->hsnamep->cname); 206 } 207 208 /* override any start_blk */ 209 mdrcp->hsnamep->start_blk = rcp->un_hs_devstart; 210 } 211 212 /* get state, flags, and timestamp */ 213 mdrcp->state = rcp->un_devstate; 214 mdrcp->flags = rcp->un_devflags; 215 mdrcp->timestamp = rcp->un_devtimestamp; 216 } 217 218 /* get resync info */ 219 (void) memset(&ri, 0, sizeof (ri)); 220 ri.ri_mnum = meta_getminor(raidnp->dev); 221 MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno); 222 if (metaioctl(MD_IOCGETSYNC, &ri, &ri.mde, raidnp->cname) != 0) { 223 (void) mdstealerror(ep, &ri.mde); 224 goto out; 225 } 226 raidp->resync_flags = ri.ri_flags; 227 raidp->percent_dirty = ri.ri_percent_dirty; 228 raidp->percent_done = ri.ri_percent_done; 229 230 /* cleanup, return success */ 231 Free(mr); 232 dnp->unitp = (md_common_t *)raidp; 233 return (raidp); 234 235 /* cleanup, return error */ 236 out: 237 Free(mr); 238 meta_free_raid(raidp); 239 return (NULL); 240 } 241 242 /* 243 * get raid 244 */ 245 md_raid_t * 246 meta_get_raid( 247 mdsetname_t *sp, 248 mdname_t *raidnp, 249 md_error_t *ep 250 ) 251 { 252 return (meta_get_raid_common(sp, raidnp, 0, ep)); 253 } 254 255 /* 256 * check raid for dev 257 */ 258 static int 259 in_raid( 260 mdsetname_t *sp, 261 mdname_t *raidnp, 262 mdname_t *np, 263 diskaddr_t slblk, 264 diskaddr_t nblks, 265 md_error_t *ep 266 ) 267 { 268 md_raid_t *raidp; 269 uint_t col; 270 271 /* should be in the same set */ 272 assert(sp != NULL); 273 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 274 275 /* get unit */ 276 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 277 return (-1); 278 279 /* look in columns */ 280 for (col = 0; (col < raidp->cols.cols_len); ++col) { 281 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 282 mdname_t *colnp = cp->colnamep; 283 diskaddr_t col_sblk; 284 int err; 285 286 /* check same drive since metagetstart() can fail */ 287 if ((err = meta_check_samedrive(np, colnp, ep)) < 0) 288 return (-1); 289 else if (err == 0) 290 continue; 291 292 /* check overlap */ 293 if ((col_sblk = metagetstart(sp, colnp, ep)) == 294 MD_DISKADDR_ERROR) 295 return (-1); 296 if (meta_check_overlap(raidnp->cname, np, slblk, nblks, 297 colnp, col_sblk, -1, ep) != 0) { 298 return (-1); 299 } 300 } 301 302 /* return success */ 303 return (0); 304 } 305 306 /* 307 * check to see if we're in a raid 308 */ 309 int 310 meta_check_inraid( 311 mdsetname_t *sp, 312 mdname_t *np, 313 diskaddr_t slblk, 314 diskaddr_t nblks, 315 md_error_t *ep 316 ) 317 { 318 mdnamelist_t *raidnlp = NULL; 319 mdnamelist_t *p; 320 int rval = 0; 321 322 /* should have a set */ 323 assert(sp != NULL); 324 325 /* for each raid */ 326 if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0) 327 return (-1); 328 for (p = raidnlp; (p != NULL); p = p->next) { 329 mdname_t *raidnp = p->namep; 330 331 /* check raid */ 332 if (in_raid(sp, raidnp, np, slblk, nblks, ep) != 0) { 333 rval = -1; 334 break; 335 } 336 } 337 338 /* cleanup, return success */ 339 metafreenamelist(raidnlp); 340 return (rval); 341 } 342 343 /* 344 * check column 345 */ 346 int 347 meta_check_column( 348 mdsetname_t *sp, 349 mdname_t *np, 350 md_error_t *ep 351 ) 352 { 353 mdchkopts_t options = (MDCHK_ALLOW_MDDB); 354 355 /* check for soft partitions */ 356 if (meta_sp_issp(sp, np, ep) != 0) { 357 /* make sure we have a disk */ 358 if (metachkcomp(np, ep) != 0) 359 return (-1); 360 } 361 362 /* check to ensure that it is not already in use */ 363 if (meta_check_inuse(sp, np, MDCHK_INUSE, ep) != 0) { 364 return (-1); 365 } 366 367 /* make sure it is in the set */ 368 if (meta_check_inset(sp, np, ep) != 0) 369 return (-1); 370 371 /* make sure its not in a metadevice */ 372 if (meta_check_inmeta(sp, np, options, 0, -1, ep) != 0) 373 return (-1); 374 375 /* return success */ 376 return (0); 377 } 378 379 /* 380 * print raid 381 */ 382 static int 383 raid_print( 384 md_raid_t *raidp, 385 char *fname, 386 FILE *fp, 387 mdprtopts_t options, 388 md_error_t *ep 389 ) 390 { 391 uint_t col; 392 int rval = -1; 393 394 395 if (options & PRINT_LARGEDEVICES) { 396 if (raidp->common.revision != MD_64BIT_META_DEV) { 397 rval = 0; 398 goto out; 399 } 400 } 401 402 /* print name and -r */ 403 if (fprintf(fp, "%s -r", raidp->common.namep->cname) == EOF) 404 goto out; 405 406 /* print columns */ 407 for (col = 0; (col < raidp->cols.cols_len); ++col) { 408 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 409 410 /* print column */ 411 /* 412 * If the path is our standard /dev/rdsk or /dev/md/rdsk 413 * then just print out the cxtxdxsx or the dx, metainit 414 * will assume the default, otherwise we need the full 415 * pathname to make sure this works as we intend. 416 */ 417 if ((strstr(mdrcp->colnamep->rname, "/dev/rdsk") == NULL) && 418 (strstr(mdrcp->colnamep->rname, "/dev/md/rdsk") == NULL) && 419 (strstr(mdrcp->colnamep->rname, "/dev/td/") == NULL)) { 420 /* not standard path, print full pathname */ 421 if (fprintf(fp, " %s", mdrcp->colnamep->rname) == EOF) 422 goto out; 423 } else { 424 /* standard path so print ctd or d number */ 425 if (fprintf(fp, " %s", mdrcp->colnamep->cname) == EOF) 426 goto out; 427 } 428 } 429 430 if (fprintf(fp, " -k") == EOF) 431 goto out; 432 433 /* print options */ 434 if (fprintf(fp, " -i %lldb", raidp->interlace) == EOF) 435 goto out; 436 437 if (raidp->pw_count != PWCNT_MIN) 438 if (fprintf(fp, " -w %d", raidp->pw_count) == EOF) 439 goto out; 440 441 if (raidp->hspnamep != NULL) { 442 if (fprintf(fp, " -h %s", raidp->hspnamep->hspname) == EOF) 443 goto out; 444 } 445 if (raidp->orig_ncol != raidp->cols.cols_len) { 446 assert(raidp->orig_ncol < raidp->cols.cols_len); 447 if (fprintf(fp, " -o %u", raidp->orig_ncol) == EOF) 448 goto out; 449 } 450 451 /* terminate last line */ 452 if (fprintf(fp, "\n") == EOF) 453 goto out; 454 455 /* success */ 456 rval = 0; 457 458 /* cleanup, return error */ 459 out: 460 if (rval != 0) 461 (void) mdsyserror(ep, errno, fname); 462 return (rval); 463 } 464 465 static int 466 find_resyncing_column( 467 md_raid_t *raidp 468 ) 469 { 470 int col; 471 472 for (col = 0; (col < raidp->cols.cols_len); ++col) { 473 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 474 if (cp->state & RCS_RESYNC) 475 return (col); 476 } 477 478 /* No resyncing columns */ 479 return (-1); 480 } 481 482 /* 483 * convert raid state to name 484 */ 485 char * 486 raid_state_to_name( 487 md_raid_t *raidp, 488 md_timeval32_t *tvp, 489 uint_t tstate /* Errored tstate flags */ 490 ) 491 { 492 493 /* grab time */ 494 if (tvp != NULL) 495 *tvp = raidp->timestamp; 496 497 /* 498 * If the device has a transient error state (due to it being DR'ed or 499 * failed) and there has been no I/O to it (the actual device is still 500 * marked as 'Okay') then we cannot know what the state is or what 501 * action to take on it. Therefore report the device as 'Unavailable'. 502 * A subsequent I/O to the device will cause the 'Okay' status to 503 * disappear if the device is actually gone and then we will print out 504 * the appropriate status. The MD_INACCESSIBLE state is only set 505 * on the raid when we open it or probe it. One the raid is open 506 * then we will just have regular error status on the device. 507 */ 508 if (tstate & MD_INACCESSIBLE) { 509 return (dgettext(TEXT_DOMAIN, "Unavailable")); 510 } 511 512 /* resyncing */ 513 if (find_resyncing_column(raidp) >= 0) 514 return (dgettext(TEXT_DOMAIN, "Resyncing")); 515 516 /* everything else */ 517 switch (raidp->state) { 518 case RUS_INIT : 519 return (dgettext(TEXT_DOMAIN, "Initializing")); 520 case RUS_OKAY : 521 return (dgettext(TEXT_DOMAIN, "Okay")); 522 case RUS_ERRED : 523 /*FALLTHROUGH*/ 524 case RUS_LAST_ERRED : 525 return (dgettext(TEXT_DOMAIN, "Needs Maintenance")); 526 case RUS_DOI : 527 return (dgettext(TEXT_DOMAIN, "Initialization Failed")); 528 case RUS_REGEN : 529 return (dgettext(TEXT_DOMAIN, "Regen")); 530 default : 531 return (dgettext(TEXT_DOMAIN, "invalid")); 532 } /* switch */ 533 } 534 535 static int 536 find_erred_column(md_raid_t *raidp, rcs_state_t state) 537 { 538 int col; 539 540 for (col = 0; (col < raidp->cols.cols_len); ++col) { 541 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 542 if (cp->state & state) 543 return (col); 544 } 545 546 /* No erred columns */ 547 return (-1); 548 } 549 550 /* 551 * convert raid state to repair action 552 */ 553 char * 554 raid_state_to_action(md_raid_t *raidp) 555 { 556 static char emsg[1024]; 557 mdname_t *raidnp = raidp->common.namep; 558 int err_col; 559 560 /* first check for full init failure */ 561 if (raidp->state & RUS_DOI) { 562 (void) snprintf(emsg, sizeof (emsg), 563 "metaclear -f %s", raidnp->cname); 564 return (emsg); 565 } 566 567 /* replace errored or init errored raid column */ 568 if ((err_col = find_erred_column(raidp, 569 (RCS_ERRED | RCS_INIT_ERRED))) >= 0) { 570 mdname_t *colnp; 571 572 /* get column with error */ 573 assert(err_col < raidp->cols.cols_len); 574 colnp = raidp->cols.cols_val[err_col].colnamep; 575 (void) snprintf(emsg, sizeof (emsg), 576 "metareplace %s%s %s <%s>", 577 ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""), 578 raidnp->cname, colnp->cname, 579 dgettext(TEXT_DOMAIN, "new device")); 580 return (emsg); 581 } 582 583 584 /* replace last errored raid column */ 585 if ((err_col = find_erred_column(raidp, RCS_LAST_ERRED)) >= 0) { 586 mdname_t *colnp; 587 588 assert(err_col < raidp->cols.cols_len); 589 colnp = raidp->cols.cols_val[err_col].colnamep; 590 (void) snprintf(emsg, sizeof (emsg), 591 "metareplace %s %s %s <%s>", 592 ((raidp->state == RUS_LAST_ERRED) ? "-f " : ""), 593 raidnp->cname, colnp->cname, 594 dgettext(TEXT_DOMAIN, "new device")); 595 return (emsg); 596 } 597 598 /* OK */ 599 return (NULL); 600 } 601 602 /* 603 * get printable raid column state 604 */ 605 char * 606 raid_col_state_to_name( 607 md_raidcol_t *colp, 608 md_timeval32_t *tvp, 609 uint_t tstate 610 ) 611 { 612 /* grab time */ 613 if (tvp != NULL) 614 *tvp = colp->timestamp; 615 616 if (tstate != 0) { 617 return (dgettext(TEXT_DOMAIN, "Unavailable")); 618 } 619 620 /* everything else */ 621 switch (colp->state) { 622 case RCS_INIT: 623 return (dgettext(TEXT_DOMAIN, "Initializing")); 624 625 case RCS_OKAY: 626 return (dgettext(TEXT_DOMAIN, "Okay")); 627 628 case RCS_INIT_ERRED: 629 /*FALLTHROUGH*/ 630 case RCS_ERRED: 631 return (dgettext(TEXT_DOMAIN, "Maintenance")); 632 633 case RCS_LAST_ERRED: 634 return (dgettext(TEXT_DOMAIN, "Last Erred")); 635 636 case RCS_RESYNC: 637 return (dgettext(TEXT_DOMAIN, "Resyncing")); 638 639 default: 640 return (dgettext(TEXT_DOMAIN, "Unknown")); 641 } 642 } 643 644 /* 645 * print raid column 646 */ 647 static int 648 display_raid_device_info( 649 mdsetname_t *sp, 650 md_raidcol_t *colp, 651 char *fname, 652 FILE *fp, 653 mdprtopts_t options, 654 int print_len, 655 uint_t top_tstate, /* Errored tstate flags */ 656 md_error_t *ep 657 ) 658 { 659 mdname_t *namep = ((colp->hsnamep != NULL) ? 660 colp->hsnamep : colp->colnamep); 661 char *devid = ""; 662 char *cname = colp->colnamep->cname; 663 diskaddr_t start_blk; 664 int has_mddb; 665 char *has_mddb_str; 666 char *col_state; 667 md_timeval32_t tv; 668 char *hsname = ((colp->hsnamep != NULL) ? 669 colp->hsnamep->cname : ""); 670 int rval = -1; 671 mdname_t *didnp = NULL; 672 ddi_devid_t dtp; 673 uint_t tstate = 0; 674 675 /* get info */ 676 if ((start_blk = metagetstart(sp, namep, ep)) == MD_DISKADDR_ERROR) 677 return (-1); 678 if ((has_mddb = metahasmddb(sp, namep, ep)) < 0) 679 return (-1); 680 if (has_mddb) 681 has_mddb_str = dgettext(TEXT_DOMAIN, "Yes"); 682 else 683 has_mddb_str = dgettext(TEXT_DOMAIN, "No"); 684 685 if (metaismeta(namep)) { 686 if (meta_get_tstate(namep->dev, &tstate, ep) != 0) 687 return (-1); 688 col_state = raid_col_state_to_name(colp, &tv, 689 tstate & MD_DEV_ERRORED); 690 } else { 691 /* 692 * if top_tstate is set, that implies that you have 693 * a ctd type device with an unavailable metadevice 694 * on top of it. If so, print a - for it's state 695 */ 696 if (top_tstate != 0) 697 col_state = "-"; 698 else 699 col_state = raid_col_state_to_name(colp, &tv, tstate); 700 } 701 702 /* populate the key in the name_p structure */ 703 if ((didnp = metadevname(&sp, namep->dev, ep)) == NULL) 704 return (-1); 705 706 /* determine if devid does NOT exist */ 707 if (options & PRINT_DEVID) { 708 if ((dtp = meta_getdidbykey(sp->setno, getmyside(sp, ep), 709 didnp->key, ep)) == NULL) 710 devid = dgettext(TEXT_DOMAIN, "No "); 711 else { 712 devid = dgettext(TEXT_DOMAIN, "Yes"); 713 free(dtp); 714 } 715 } 716 /* print column */ 717 /* 718 * Building a format string on the fly that will 719 * be used in (f)printf. This allows the length 720 * of the ctd to vary from small to large without 721 * looking horrible. 722 */ 723 if (! (options & PRINT_TIMES)) { 724 if (fprintf(fp, 725 "\t%-*.*s %8lld %5.5s %12.12s %5.5s %s\n", 726 print_len, print_len, cname, start_blk, has_mddb_str, 727 col_state, devid, hsname) == EOF) { 728 goto out; 729 } 730 } else { 731 char *timep = meta_print_time(&tv); 732 733 if (fprintf(fp, 734 "\t%-*s %5lld %-5s %-11s %-5s %-9s %s\n", 735 print_len, cname, start_blk, has_mddb_str, 736 col_state, devid, hsname, timep) == EOF) { 737 goto out; 738 } 739 } 740 741 /* success */ 742 rval = 0; 743 744 /* cleanup, return error */ 745 out: 746 if (rval != 0) 747 (void) mdsyserror(ep, errno, fname); 748 749 return (rval); 750 } 751 752 /* 753 * print raid options 754 */ 755 int 756 meta_print_raid_options( 757 mdhspname_t *hspnamep, 758 char *fname, 759 FILE *fp, 760 md_error_t *ep 761 ) 762 { 763 char *hspname = ((hspnamep != NULL) ? hspnamep->hspname : 764 dgettext(TEXT_DOMAIN, "none")); 765 int rval = -1; 766 767 /* print options */ 768 if (fprintf(fp, dgettext(TEXT_DOMAIN, 769 " Hot spare pool: %s\n"), hspname) == EOF) { 770 goto out; 771 } 772 773 /* success */ 774 rval = 0; 775 776 /* cleanup, return error */ 777 out: 778 if (rval != 0) 779 (void) mdsyserror(ep, errno, fname); 780 return (rval); 781 } 782 783 /* 784 * report raid 785 */ 786 static int 787 raid_report( 788 mdsetname_t *sp, 789 md_raid_t *raidp, 790 char *fname, 791 FILE *fp, 792 mdprtopts_t options, 793 md_error_t *ep 794 ) 795 { 796 char *p; 797 uint_t ncol = raidp->cols.cols_len; 798 uint_t orig_ncol = raidp->orig_ncol; 799 diskaddr_t column_size = raidp->column_size; 800 char *raid_state; 801 md_timeval32_t tv; 802 char *timep; 803 uint_t col; 804 int rval = -1; 805 int len = 0; 806 uint_t tstate = 0; 807 808 if (options & PRINT_LARGEDEVICES) { 809 if (raidp->common.revision != MD_64BIT_META_DEV) { 810 rval = 0; 811 goto out; 812 } 813 } 814 815 /* print header */ 816 if (options & PRINT_HEADER) { 817 if (fprintf(fp, dgettext(TEXT_DOMAIN, "%s: RAID\n"), 818 raidp->common.namep->cname) == EOF) { 819 goto out; 820 } 821 822 } 823 824 /* print state */ 825 if (metaismeta(raidp->common.namep)) { 826 if (meta_get_tstate(raidp->common.namep->dev, &tstate, ep) != 0) 827 return (-1); 828 } 829 tstate &= MD_DEV_ERRORED; /* extract the errored tstate bits */ 830 raid_state = raid_state_to_name(raidp, &tv, tstate); 831 if (options & PRINT_TIMES) { 832 timep = meta_print_time(&tv); 833 } else { 834 timep = ""; 835 } 836 837 if (fprintf(fp, dgettext(TEXT_DOMAIN, " State: %-12s %s\n"), 838 raid_state, timep) == EOF) { 839 goto out; 840 } 841 842 /* 843 * Display recovery action if we're marked in the Unavailable state. 844 */ 845 if ((tstate == 0) || (tstate & MD_INACCESSIBLE)) { 846 /* print what to do */ 847 if (tstate & MD_INACCESSIBLE) { 848 char sname[MD_MAX_SETNAME + 3]; /* 3 = sizeof("-s ") */ 849 850 if (metaislocalset(sp)) { 851 sname[0] = '\0'; 852 } else { 853 (void) snprintf(sname, MD_MAX_SETNAME + 3, 854 "-s %s", sp->setname); 855 } 856 if (fprintf(fp, dgettext(TEXT_DOMAIN, 857 " Invoke: metastat -i %s\n"), sname) == EOF) { 858 goto out; 859 } 860 } else if ((p = raid_state_to_action(raidp)) != NULL) { 861 if (fprintf(fp, dgettext(TEXT_DOMAIN, 862 " Invoke: %s\n"), p) == EOF) { 863 goto out; 864 } 865 } 866 867 /* resync status */ 868 if (raidp->resync_flags & MD_RI_INPROGRESS) { 869 if (fprintf(fp, dgettext(TEXT_DOMAIN, 870 " Resync in progress: %2d.%1d%% done\n"), 871 raidp->percent_done/10, 872 raidp->percent_done % 10) == EOF) { 873 goto out; 874 } 875 } else if (raidp->resync_flags & MD_GROW_INPROGRESS) { 876 if (fprintf(fp, dgettext(TEXT_DOMAIN, 877 " Initialization in progress: %2d.%1d%% " 878 "done\n"), 879 raidp->percent_done/10, 880 raidp->percent_done % 10) == EOF) { 881 goto out; 882 } 883 } else if (raidp->state & RUS_REGEN) { 884 if (fprintf(fp, dgettext(TEXT_DOMAIN, 885 " Parity regeneration in progress: %2d.%1d%% " 886 "done\n"), 887 raidp->percent_done/10, 888 raidp->percent_done % 10) == EOF) { 889 goto out; 890 } 891 } 892 } 893 894 /* print hotspare pool */ 895 if (raidp->hspnamep != NULL) { 896 if (meta_print_raid_options(raidp->hspnamep, 897 fname, fp, ep) != 0) { 898 return (-1); 899 } 900 } 901 902 /* print interlace */ 903 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Interlace: %lld blocks\n"), 904 raidp->interlace) == EOF) { 905 goto out; 906 } 907 908 /* print size */ 909 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"), 910 raidp->common.size, 911 meta_number_to_string(raidp->common.size, DEV_BSIZE)) == EOF) { 912 goto out; 913 } 914 915 /* MD_DEBUG stuff */ 916 if (options & PRINT_DEBUG) { 917 mdname_t *raidnp = raidp->common.namep; 918 mr_unit_t *mr; 919 920 /* get additional info */ 921 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 922 return (-1); 923 assert(mr->c.un_type == MD_METARAID); 924 925 /* print prewrite count and size */ 926 if (fprintf(fp, dgettext(TEXT_DOMAIN, 927 " Prewrite Count: %u slots\n"), 928 mr->un_pwcnt) == EOF) { 929 Free(mr); 930 goto out; 931 } 932 if (fprintf(fp, dgettext(TEXT_DOMAIN, 933 " Prewrite Slot Size: %u blocks\n"), 934 (mr->un_pwsize / mr->un_pwcnt)) == EOF) { 935 Free(mr); 936 goto out; 937 } 938 if (fprintf(fp, dgettext(TEXT_DOMAIN, 939 " Prewrite Total Size: %u blocks\n"), 940 mr->un_pwsize) == EOF) { 941 Free(mr); 942 goto out; 943 } 944 Free(mr); 945 } 946 947 /* print original devices */ 948 if (fprintf(fp, dgettext(TEXT_DOMAIN, "Original device:\n")) == EOF) 949 goto out; 950 if (fprintf(fp, dgettext(TEXT_DOMAIN, " Size: %lld blocks (%s)\n"), 951 column_size * (orig_ncol - 1), 952 meta_number_to_string(column_size * (orig_ncol - 1), DEV_BSIZE)) 953 == EOF) { 954 goto out; 955 } 956 /* 957 * Building a format string on the fly that will 958 * be used in (f)printf. This allows the length 959 * of the ctd to vary from small to large without 960 * looking horrible. 961 */ 962 for (col = 0; (col < orig_ncol); ++col) { 963 len = max(len, 964 strlen(raidp->cols.cols_val[col].colnamep->cname)); 965 } 966 967 len = max(len, strlen(dgettext(TEXT_DOMAIN, "Device"))); 968 len += 2; 969 970 if (! (options & PRINT_TIMES)) { 971 if (fprintf(fp, 972 "\t%-*.*s %-12.12s %-5.5s %12.12s %-5.5s %s\n", 973 len, len, 974 dgettext(TEXT_DOMAIN, "Device"), 975 dgettext(TEXT_DOMAIN, "Start Block"), 976 dgettext(TEXT_DOMAIN, "Dbase"), 977 dgettext(TEXT_DOMAIN, "State"), 978 dgettext(TEXT_DOMAIN, "Reloc"), 979 dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) { 980 goto out; 981 } 982 } else { 983 if (fprintf(fp, 984 "\t%-*s %5s %-5s %-11s %-5s %-9s %s\n", 985 len, 986 dgettext(TEXT_DOMAIN, "Device"), 987 dgettext(TEXT_DOMAIN, "Start"), 988 dgettext(TEXT_DOMAIN, "Dbase"), 989 dgettext(TEXT_DOMAIN, "State"), 990 dgettext(TEXT_DOMAIN, "Reloc"), 991 dgettext(TEXT_DOMAIN, "Hot Spare"), 992 dgettext(TEXT_DOMAIN, "Time")) == EOF) { 993 goto out; 994 } 995 } 996 for (col = 0; (col < orig_ncol); ++col) { 997 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 998 999 if (display_raid_device_info(sp, mdrcp, fname, fp, options, 1000 len, tstate, ep) != 0) { 1001 return (-1); 1002 } 1003 } 1004 1005 /* print concatenated devices */ 1006 if (col < ncol) { 1007 if (fprintf(fp, dgettext(TEXT_DOMAIN, 1008 "Concatenated Devices:\n")) == EOF) { 1009 goto out; 1010 } 1011 if (fprintf(fp, dgettext(TEXT_DOMAIN, 1012 " Size: %lld blocks (%s)\n"), 1013 column_size * (ncol - orig_ncol), 1014 meta_number_to_string(column_size * (ncol - orig_ncol), 1015 DEV_BSIZE)) 1016 == EOF) { 1017 goto out; 1018 } 1019 /* 1020 * This allows the length 1021 * of the ctd to vary from small to large without 1022 * looking horrible. 1023 */ 1024 if (! (options & PRINT_TIMES)) { 1025 if (fprintf(fp, 1026 "\t%-*.*s %-12.12s %-5.5s %-12.12s %5.5s %s\n", 1027 len, len, 1028 dgettext(TEXT_DOMAIN, "Device"), 1029 dgettext(TEXT_DOMAIN, "Start Block"), 1030 dgettext(TEXT_DOMAIN, "Dbase"), 1031 dgettext(TEXT_DOMAIN, "State"), 1032 dgettext(TEXT_DOMAIN, "Reloc"), 1033 dgettext(TEXT_DOMAIN, "Hot Spare")) == EOF) { 1034 goto out; 1035 } 1036 } else { 1037 if (fprintf(fp, 1038 "\t%-*s %5s %-5s %-11s %-9s %s\t%s\n", 1039 len, 1040 dgettext(TEXT_DOMAIN, "Device"), 1041 dgettext(TEXT_DOMAIN, "Start"), 1042 dgettext(TEXT_DOMAIN, "Dbase"), 1043 dgettext(TEXT_DOMAIN, "State"), 1044 dgettext(TEXT_DOMAIN, "Reloc"), 1045 dgettext(TEXT_DOMAIN, "Hot Spare"), 1046 dgettext(TEXT_DOMAIN, "Time")) == EOF) { 1047 goto out; 1048 } 1049 } 1050 assert(col == orig_ncol); 1051 for (/* void */; (col < ncol); col++) { 1052 md_raidcol_t *mdrcp = &raidp->cols.cols_val[col]; 1053 1054 if (display_raid_device_info(sp, mdrcp, fname, fp, 1055 options, len, tstate, ep) != 0) { 1056 return (-1); 1057 } 1058 } 1059 } 1060 1061 /* add extra line */ 1062 if (fprintf(fp, "\n") == EOF) 1063 goto out; 1064 1065 /* success */ 1066 rval = 0; 1067 1068 /* cleanup, return error */ 1069 out: 1070 if (rval != 0) 1071 (void) mdsyserror(ep, errno, fname); 1072 return (rval); 1073 } 1074 1075 /* 1076 * print/report raid 1077 */ 1078 int 1079 meta_raid_print( 1080 mdsetname_t *sp, 1081 mdname_t *raidnp, 1082 mdnamelist_t **nlpp, 1083 char *fname, 1084 FILE *fp, 1085 mdprtopts_t options, 1086 md_error_t *ep 1087 ) 1088 { 1089 md_raid_t *raidp; 1090 int col; 1091 1092 /* should have same set */ 1093 assert(sp != NULL); 1094 assert((raidnp == NULL) || 1095 (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)))); 1096 1097 /* print all raids */ 1098 if (raidnp == NULL) { 1099 mdnamelist_t *nlp = NULL; 1100 mdnamelist_t *p; 1101 int cnt; 1102 int rval = 0; 1103 1104 /* get list */ 1105 if ((cnt = meta_get_raid_names(sp, &nlp, options, ep)) < 0) 1106 return (-1); 1107 else if (cnt == 0) 1108 return (0); 1109 1110 /* recurse */ 1111 for (p = nlp; (p != NULL); p = p->next) { 1112 mdname_t *np = p->namep; 1113 1114 if (meta_raid_print(sp, np, nlpp, fname, fp, 1115 options, ep) != 0) 1116 rval = -1; 1117 } 1118 1119 /* cleanup, return success */ 1120 metafreenamelist(nlp); 1121 return (rval); 1122 } 1123 1124 /* get unit structure */ 1125 if ((raidp = meta_get_raid_common(sp, raidnp, 1126 ((options & PRINT_FAST) ? 1 : 0), ep)) == NULL) 1127 return (-1); 1128 1129 /* check for parented */ 1130 if ((! (options & PRINT_SUBDEVS)) && 1131 (MD_HAS_PARENT(raidp->common.parent))) { 1132 return (0); 1133 } 1134 1135 /* print appropriate detail */ 1136 if (options & PRINT_SHORT) { 1137 if (raid_print(raidp, fname, fp, options, ep) != 0) 1138 return (-1); 1139 } else { 1140 if (raid_report(sp, raidp, fname, fp, options, ep) != 0) 1141 return (-1); 1142 } 1143 1144 /* Recurse on components that are metadevices */ 1145 for (col = 0; col < raidp->cols.cols_len; ++col) { 1146 md_raidcol_t *colp = &raidp->cols.cols_val[col]; 1147 mdname_t *namep = colp->colnamep; 1148 1149 if ((metaismeta(namep)) && 1150 (meta_print_name(sp, namep, nlpp, fname, fp, 1151 (options | PRINT_HEADER | PRINT_SUBDEVS), 1152 NULL, ep) != 0)) { 1153 return (-1); 1154 } 1155 } 1156 1157 return (0); 1158 } 1159 1160 /* 1161 * adjust raid geometry 1162 */ 1163 static int 1164 adjust_geom( 1165 mdname_t *raidnp, 1166 mdname_t *colnp, 1167 mr_unit_t *mr, 1168 md_error_t *ep 1169 ) 1170 { 1171 uint_t round_cyl = 1; 1172 mdgeom_t *geomp; 1173 1174 /* get reinstructs */ 1175 if ((geomp = metagetgeom(colnp, ep)) == NULL) 1176 return (-1); 1177 1178 /* adjust geometry */ 1179 if (meta_adjust_geom((md_unit_t *)mr, raidnp, geomp->write_reinstruct, 1180 geomp->read_reinstruct, round_cyl, ep) != 0) 1181 return (-1); 1182 1183 /* return success */ 1184 return (0); 1185 } 1186 1187 /* 1188 * add another column to the raid unit structure 1189 */ 1190 static int 1191 attach_raid_col( 1192 mdsetname_t *sp, 1193 mdname_t *raidnp, 1194 mr_unit_t *mr, 1195 mr_column_t *mdc, 1196 mdname_t *colnp, 1197 rcs_state_t state, 1198 mdnamelist_t **keynlpp, 1199 mdcmdopts_t options, 1200 md_error_t *ep 1201 ) 1202 { 1203 diskaddr_t column_size = mr->un_segsize * mr->un_segsincolumn; 1204 diskaddr_t size; 1205 uint_t maxio; 1206 mdcinfo_t *cinfop; 1207 md_timeval32_t tmp_time; 1208 1209 /* setup state and timestamp */ 1210 mdc->un_devstate = state; 1211 if (meta_gettimeofday(&tmp_time) == -1) 1212 return (mdsyserror(ep, errno, NULL)); 1213 1214 mdc->un_devtimestamp = tmp_time; 1215 /* get start, size, and maxio */ 1216 if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) == 1217 MD_DISKADDR_ERROR) 1218 return (-1); 1219 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 1220 return (-1); 1221 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 1222 return (-1); 1223 maxio = cinfop->maxtransfer; 1224 1225 /* adjust start and size by prewrite */ 1226 mdc->un_orig_pwstart = mdc->un_orig_devstart; 1227 mdc->un_orig_devstart += mr->un_pwsize; 1228 1229 /* make sure we still have something left */ 1230 if ((mdc->un_orig_devstart >= size) || 1231 ((size - mdc->un_orig_devstart) < column_size)) { 1232 return (mdsyserror(ep, ENOSPC, colnp->cname)); 1233 } 1234 size -= mdc->un_orig_devstart; 1235 if (maxio < mr->un_maxio) { 1236 return (mdcomperror(ep, MDE_MAXIO, 1237 meta_getminor(raidnp->dev), colnp->dev, colnp->cname)); 1238 } 1239 1240 if (options & MDCMD_DOIT) { 1241 /* store name in namespace */ 1242 if (add_key_name(sp, colnp, keynlpp, ep) != 0) 1243 return (-1); 1244 } 1245 1246 /* setup column */ 1247 mdc->un_orig_dev = colnp->dev; 1248 mdc->un_orig_key = colnp->key; 1249 mdc->un_dev = colnp->dev; 1250 mdc->un_pwstart = mdc->un_orig_pwstart; 1251 mdc->un_devstart = mdc->un_orig_devstart; 1252 mdc->un_alt_dev = NODEV64; 1253 mdc->un_alt_pwstart = 0; 1254 mdc->un_alt_devstart = 0; 1255 mdc->un_hs_id = 0; 1256 1257 /* add the size (we use) of the device to the total */ 1258 mr->c.un_actual_tb += column_size; 1259 1260 /* adjust geometry */ 1261 if (adjust_geom(raidnp, colnp, mr, ep) != 0) 1262 return (-1); 1263 1264 /* count column */ 1265 mr->un_totalcolumncnt++; 1266 1267 /* return success */ 1268 return (0); 1269 } 1270 1271 /* 1272 * invalidate column names 1273 */ 1274 static int 1275 invalidate_columns( 1276 mdsetname_t *sp, 1277 mdname_t *raidnp, 1278 md_error_t *ep 1279 ) 1280 { 1281 md_raid_t *raidp; 1282 uint_t col; 1283 1284 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 1285 return (-1); 1286 for (col = 0; (col < raidp->cols.cols_len); ++col) { 1287 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 1288 mdname_t *colnp = cp->colnamep; 1289 1290 meta_invalidate_name(colnp); 1291 } 1292 return (0); 1293 } 1294 1295 /* 1296 * attach columns to raid 1297 */ 1298 int 1299 meta_raid_attach( 1300 mdsetname_t *sp, 1301 mdname_t *raidnp, 1302 mdnamelist_t *colnlp, 1303 mdcmdopts_t options, 1304 md_error_t *ep 1305 ) 1306 { 1307 uint_t concat_cnt = 0; 1308 mdnamelist_t *p; 1309 mr_unit_t *old_mr; 1310 mr_unit_t *new_mr; 1311 size_t old_rusize; 1312 size_t new_rusize; 1313 mdnamelist_t *keynlp = NULL; 1314 md_grow_params_t mgp; 1315 int rval = -1; 1316 int create_flag = MD_CRO_32BIT; 1317 1318 /* should have a set */ 1319 assert(sp != NULL); 1320 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1321 1322 /* check type */ 1323 if (metachkmeta(raidnp, ep) != 0) 1324 return (-1); 1325 1326 /* check and count new columns */ 1327 for (p = colnlp; (p != NULL); p = p->next) { 1328 mdname_t *np = p->namep; 1329 mdnamelist_t *p2; 1330 1331 /* check against existing devices */ 1332 if (meta_check_column(sp, np, ep) != 0) 1333 return (-1); 1334 1335 /* check against ourselves */ 1336 for (p2 = p->next; (p2 != NULL); p2 = p2->next) { 1337 if (meta_check_overlap(np->cname, np, 0, -1, 1338 p2->namep, 0, -1, ep) != 0) { 1339 return (-1); 1340 } 1341 } 1342 1343 /* count */ 1344 ++concat_cnt; 1345 } 1346 1347 /* get old unit */ 1348 if ((old_mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 1349 return (-1); 1350 1351 /* 1352 * calculate the size needed for the new raid unit and allocate 1353 * the appropriate structure. allocate new unit. 1354 */ 1355 old_rusize = sizeof (*old_mr) - sizeof (old_mr->un_column[0]); 1356 old_rusize += old_mr->un_totalcolumncnt * sizeof (old_mr->un_column[0]); 1357 new_rusize = sizeof (*new_mr) - sizeof (new_mr->un_column[0]); 1358 new_rusize += (old_mr->un_totalcolumncnt + concat_cnt) 1359 * sizeof (new_mr->un_column[0]); 1360 new_mr = Zalloc(new_rusize); 1361 (void) memcpy(new_mr, old_mr, old_rusize); 1362 1363 /* We always want a do-it, this is for attach_raid_col below */ 1364 options |= MDCMD_DOIT; 1365 1366 /* build new unit structure */ 1367 for (p = colnlp; (p != NULL); p = p->next) { 1368 mdname_t *colnp = p->namep; 1369 mr_column_t *mdc; 1370 1371 /* attach column */ 1372 mdc = &new_mr->un_column[new_mr->un_totalcolumncnt]; 1373 if (attach_raid_col(sp, raidnp, new_mr, mdc, colnp, 1374 RCS_INIT, &keynlp, options, ep) != 0) { 1375 goto out; 1376 } 1377 } 1378 assert(new_mr->un_totalcolumncnt 1379 == (old_mr->un_totalcolumncnt + concat_cnt)); 1380 1381 1382 create_flag = meta_check_devicesize(new_mr->c.un_total_blocks); 1383 1384 /* grow raid */ 1385 (void) memset(&mgp, 0, sizeof (mgp)); 1386 mgp.mnum = MD_SID(new_mr); 1387 MD_SETDRIVERNAME(&mgp, MD_RAID, sp->setno); 1388 mgp.size = new_rusize; 1389 mgp.mdp = (uintptr_t)new_mr; 1390 1391 if (create_flag == MD_CRO_32BIT) { 1392 mgp.options = MD_CRO_32BIT; 1393 new_mr->c.un_revision = MD_32BIT_META_DEV; 1394 } else { 1395 mgp.options = MD_CRO_64BIT; 1396 new_mr->c.un_revision = MD_64BIT_META_DEV; 1397 } 1398 if (metaioctl(MD_IOCGROW, &mgp, &mgp.mde, NULL) != 0) { 1399 (void) mdstealerror(ep, &mgp.mde); 1400 goto out; 1401 } 1402 1403 /* clear cache */ 1404 if (invalidate_columns(sp, raidnp, ep) != 0) 1405 goto out; 1406 meta_invalidate_name(raidnp); 1407 1408 /* let em know */ 1409 if (options & MDCMD_PRINT) { 1410 if (concat_cnt == 1) { 1411 (void) printf(dgettext(TEXT_DOMAIN, 1412 "%s: component is attached\n"), 1413 raidnp->cname); 1414 } else { 1415 (void) printf(dgettext(TEXT_DOMAIN, 1416 "%s: components are attached\n"), 1417 raidnp->cname); 1418 } 1419 (void) fflush(stdout); 1420 } 1421 1422 1423 /* grow any parents */ 1424 if (meta_concat_parent(sp, raidnp, ep) != 0) 1425 goto out; 1426 rval = 0; /* success */ 1427 1428 /* cleanup, return error */ 1429 out: 1430 Free(old_mr); 1431 Free(new_mr); 1432 if (rval != 0) 1433 (void) del_key_names(sp, keynlp, NULL); 1434 metafreenamelist(keynlp); 1435 return (rval); 1436 } 1437 1438 /* 1439 * get raid parameters 1440 */ 1441 int 1442 meta_raid_get_params( 1443 mdsetname_t *sp, 1444 mdname_t *raidnp, 1445 mr_params_t *paramsp, 1446 md_error_t *ep 1447 ) 1448 { 1449 md_raid_t *raidp; 1450 1451 /* should have a set */ 1452 assert(sp != NULL); 1453 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1454 1455 /* check name */ 1456 if (metachkmeta(raidnp, ep) != 0) 1457 return (-1); 1458 1459 /* get unit */ 1460 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 1461 return (-1); 1462 1463 /* return parameters */ 1464 (void) memset(paramsp, 0, sizeof (*paramsp)); 1465 if (raidp->hspnamep == NULL) 1466 paramsp->hsp_id = MD_HSP_NONE; 1467 else 1468 paramsp->hsp_id = raidp->hspnamep->hsp; 1469 return (0); 1470 } 1471 1472 /* 1473 * set raid parameters 1474 */ 1475 int 1476 meta_raid_set_params( 1477 mdsetname_t *sp, 1478 mdname_t *raidnp, 1479 mr_params_t *paramsp, 1480 md_error_t *ep 1481 ) 1482 { 1483 md_raid_params_t msp; 1484 1485 /* should have a set */ 1486 assert(sp != NULL); 1487 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1488 1489 /* check name */ 1490 if (metachkmeta(raidnp, ep) != 0) 1491 return (-1); 1492 1493 /* set parameters */ 1494 (void) memset(&msp, 0, sizeof (msp)); 1495 MD_SETDRIVERNAME(&msp, MD_RAID, sp->setno); 1496 msp.mnum = meta_getminor(raidnp->dev); 1497 msp.params = *paramsp; 1498 if (metaioctl(MD_IOCCHANGE, &msp, &msp.mde, raidnp->cname) != 0) 1499 return (mdstealerror(ep, &msp.mde)); 1500 1501 /* clear cache */ 1502 meta_invalidate_name(raidnp); 1503 1504 /* return success */ 1505 return (0); 1506 } 1507 1508 /* 1509 * validate raid replace column 1510 */ 1511 static int 1512 validate_new_raid( 1513 mdsetname_t *sp, 1514 mdname_t *raidnp, 1515 mdname_t *colnp, 1516 replace_params_t *paramsp, 1517 int dup_ok, 1518 md_error_t *ep 1519 ) 1520 { 1521 mr_unit_t *mr; 1522 diskaddr_t column_size; 1523 diskaddr_t label; 1524 mdcinfo_t *cinfop; 1525 int rval = -1; 1526 1527 /* get raid unit */ 1528 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 1529 return (-1); 1530 column_size = mr->un_segsize * mr->un_segsincolumn; 1531 1532 /* check it out */ 1533 if (meta_check_column(sp, colnp, ep) != 0) { 1534 if ((! dup_ok) || (! mdisuseerror(ep, MDE_ALREADY))) 1535 goto out; 1536 mdclrerror(ep); 1537 } 1538 if ((paramsp->number_blks = metagetsize(colnp, ep)) == 1539 MD_DISKADDR_ERROR) 1540 goto out; 1541 if ((label = metagetlabel(colnp, ep)) == MD_DISKADDR_ERROR) 1542 goto out; 1543 paramsp->has_label = ((label > 0) ? 1 : 0); 1544 if ((paramsp->start_blk = metagetstart(sp, colnp, ep)) == 1545 MD_DISKADDR_ERROR) 1546 goto out; 1547 if ((paramsp->number_blks - paramsp->start_blk) < column_size) { 1548 (void) mdsyserror(ep, ENOSPC, colnp->cname); 1549 goto out; 1550 } 1551 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 1552 goto out; 1553 if (cinfop->maxtransfer < mr->un_maxio) { 1554 (void) mdcomperror(ep, MDE_MAXIO, meta_getminor(raidnp->dev), 1555 colnp->dev, colnp->cname); 1556 goto out; 1557 } 1558 1559 /* success */ 1560 rval = 0; 1561 1562 /* cleanup, return error */ 1563 out: 1564 Free(mr); 1565 return (rval); 1566 } 1567 1568 /* 1569 * replace raid column 1570 */ 1571 int 1572 meta_raid_replace( 1573 mdsetname_t *sp, 1574 mdname_t *raidnp, 1575 mdname_t *oldnp, 1576 mdname_t *newnp, 1577 mdcmdopts_t options, 1578 md_error_t *ep 1579 ) 1580 { 1581 int force = ((options & MDCMD_FORCE) ? 1 : 0); 1582 replace_params_t params; 1583 md_dev64_t old_dev, new_dev; 1584 diskaddr_t new_start_blk, new_end_blk; 1585 int rebind; 1586 mr_unit_t *mr; 1587 char *new_devidp = NULL; 1588 md_error_t xep = mdnullerror; 1589 int ret; 1590 md_set_desc *sd; 1591 uint_t tstate; 1592 1593 /* should have same set */ 1594 assert(sp != NULL); 1595 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1596 1597 /* check name */ 1598 if (metachkmeta(raidnp, ep) != 0) 1599 return (-1); 1600 1601 /* save new binding incase this is a rebind where oldnp==newnp */ 1602 new_dev = newnp->dev; 1603 new_start_blk = newnp->start_blk; 1604 new_end_blk = newnp->end_blk; 1605 1606 /* invalidate, then get the raid (fill in oldnp from metadb) */ 1607 meta_invalidate_name(raidnp); 1608 if (meta_get_raid(sp, raidnp, ep) == NULL) 1609 return (-1); 1610 1611 /* can't replace a component if the raid inaccessible */ 1612 if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) { 1613 return (-1); 1614 } 1615 if (tstate & MD_INACCESSIBLE) { 1616 return (mdmderror(ep, MDE_IN_UNAVAIL_STATE, 1617 meta_getminor(raidnp->dev), raidnp->cname)); 1618 } 1619 1620 /* the old device binding is now established */ 1621 if ((old_dev = oldnp->dev) == NODEV64) 1622 return (mdsyserror(ep, ENODEV, oldnp->cname)); 1623 1624 1625 /* setup raid info */ 1626 (void) memset(¶ms, 0, sizeof (params)); 1627 params.mnum = meta_getminor(raidnp->dev); 1628 MD_SETDRIVERNAME(¶ms, MD_RAID, sp->setno); 1629 params.old_dev = old_dev; 1630 params.cmd = force ? FORCE_REPLACE_COMP : REPLACE_COMP; 1631 1632 if (options & MDCMD_CLUSTER_REPLACE) { 1633 if ((mr = (mr_unit_t *)meta_get_mdunit(sp, raidnp, ep)) == NULL) 1634 return (NULL); 1635 Free(mr); 1636 params.options = MDIOCTL_NO_RESYNC_RAID; 1637 params.number_blks = metagetsize(newnp, ep); 1638 if ((metagetlabel(newnp, ep) == MD_DISKADDR_ERROR) || 1639 (metagetlabel(newnp, ep) == 0)) 1640 params.has_label = 0; 1641 else 1642 params.has_label = 1; 1643 params.start_blk = metagetstart(sp, newnp, ep); 1644 } else { 1645 if ((strcmp(oldnp->rname, newnp->rname) == 0) && 1646 (old_dev != new_dev)) { 1647 rebind = 1; 1648 } else { 1649 rebind = 0; 1650 } 1651 if (rebind) { 1652 newnp->dev = new_dev; 1653 newnp->start_blk = new_start_blk; 1654 newnp->end_blk = new_end_blk; 1655 } 1656 1657 /* 1658 * Save a copy of the devid associated with the new disk, the 1659 * reason is that the checks for the column (meta_check_column) 1660 * via validate_new_raid(), could cause the disk's devid to be 1661 * changed to that of the devid that is currently stored in the 1662 * replica namespace for the disk in question. This devid could 1663 * be stale if we are replacing the disk. The actual function 1664 * that overwrites the devid is dr2drivedesc(). 1665 */ 1666 1667 /* don't setup new_devid if no devid's or MN diskset */ 1668 if (newnp->drivenamep->devid != NULL) 1669 new_devidp = Strdup(newnp->drivenamep->devid); 1670 1671 if (!metaislocalset(sp)) { 1672 if ((sd = metaget_setdesc(sp, ep)) == NULL) 1673 return (-1); 1674 if (MD_MNSET_DESC(sd)) 1675 new_devidp = NULL; 1676 } 1677 1678 /* check out new (sets up start_blk, has_label, number_blks) */ 1679 if (validate_new_raid(sp, raidnp, newnp, ¶ms, rebind, 1680 ep) != 0) { 1681 Free(new_devidp); 1682 return (-1); 1683 } 1684 1685 /* 1686 * Copy back the saved devid. 1687 */ 1688 Free(newnp->drivenamep->devid); 1689 if (new_devidp) { 1690 newnp->drivenamep->devid = Strdup(new_devidp); 1691 Free(new_devidp); 1692 } 1693 } 1694 1695 /* store name in namespace, allocate new key */ 1696 if (add_key_name(sp, newnp, NULL, ep) != 0) 1697 return (-1); 1698 1699 if (rebind && !metaislocalset(sp)) { 1700 /* 1701 * We are 'rebind'ing a disk that is in a diskset so as well 1702 * as updating the diskset's namespace the local set needs 1703 * to be updated because it also contains a reference to the 1704 * disk in question. 1705 */ 1706 ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, 1707 newnp->cname, ep); 1708 1709 if (ret != METADEVADM_SUCCESS) { 1710 (void) del_key_name(sp, newnp, &xep); 1711 return (-1); 1712 } 1713 } 1714 1715 /* replace column */ 1716 params.new_dev = new_dev; 1717 params.new_key = newnp->key; 1718 if (metaioctl(MD_IOCREPLACE, ¶ms, ¶ms.mde, NULL) != 0) { 1719 (void) del_key_name(sp, newnp, ep); 1720 return (mdstealerror(ep, ¶ms.mde)); 1721 } 1722 1723 /* clear cache */ 1724 meta_invalidate_name(oldnp); 1725 meta_invalidate_name(newnp); 1726 meta_invalidate_name(raidnp); 1727 1728 /* let em know */ 1729 if (options & MDCMD_PRINT) { 1730 (void) printf(dgettext(TEXT_DOMAIN, 1731 "%s: device %s is replaced with %s\n"), 1732 raidnp->cname, oldnp->cname, newnp->cname); 1733 (void) fflush(stdout); 1734 } 1735 1736 /* return success */ 1737 return (0); 1738 } 1739 1740 /* 1741 * enable raid column 1742 */ 1743 int 1744 meta_raid_enable( 1745 mdsetname_t *sp, 1746 mdname_t *raidnp, 1747 mdname_t *colnp, 1748 mdcmdopts_t options, 1749 md_error_t *ep 1750 ) 1751 { 1752 int force = ((options & MDCMD_FORCE) ? 1 : 0); 1753 replace_params_t params; 1754 md_dev64_t fs_dev, del_dev; 1755 int err = 0; 1756 char *devnm; 1757 int ret; 1758 uint_t tstate; 1759 1760 /* should have same set */ 1761 assert(sp != NULL); 1762 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 1763 1764 /* check name */ 1765 if (metachkmeta(raidnp, ep) != 0) 1766 return (-1); 1767 1768 /* get the file_system dev binding */ 1769 if (meta_getdev(sp, colnp, ep) != 0) 1770 return (-1); 1771 fs_dev = colnp->dev; 1772 1773 /* get the raid unit (fill in colnp->dev with metadb version) */ 1774 meta_invalidate_name(raidnp); 1775 if (meta_get_raid(sp, raidnp, ep) == NULL) 1776 return (-1); 1777 1778 /* enabling a component can't work if the raid inaccessible */ 1779 if (meta_get_tstate(raidnp->dev, &tstate, ep) != 0) { 1780 return (-1); 1781 } 1782 if (tstate & MD_INACCESSIBLE) { 1783 return (mdmderror(ep, MDE_IN_UNAVAIL_STATE, 1784 meta_getminor(raidnp->dev), raidnp->cname)); 1785 } 1786 1787 /* the metadb device binding is now established */ 1788 if (colnp->dev == NODEV64) 1789 return (mdsyserror(ep, ENODEV, colnp->cname)); 1790 1791 /* 1792 * check for the case where the dev_t has changed between the 1793 * filesystem and the metadb. This is called a rebind, and 1794 * is handled by meta_raid_replace. 1795 */ 1796 if (fs_dev != colnp->dev) { 1797 /* 1798 * Save the devt of mddb version 1799 */ 1800 del_dev = colnp->dev; 1801 1802 /* establish file system binding with invalid start/end */ 1803 colnp->dev = fs_dev; 1804 colnp->start_blk = -1; 1805 colnp->end_blk = -1; 1806 err = meta_raid_replace(sp, raidnp, colnp, colnp, options, ep); 1807 1808 /* 1809 * Don't do it if meta_raid_replace returns an error 1810 */ 1811 if (!err && (devnm = meta_getnmentbydev(sp->setno, MD_SIDEWILD, 1812 del_dev, NULL, NULL, &colnp->key, ep)) != NULL) { 1813 (void) del_key_name(sp, colnp, ep); 1814 Free(devnm); 1815 } 1816 return (err); 1817 } 1818 1819 /* setup raid info */ 1820 (void) memset(¶ms, 0, sizeof (params)); 1821 params.mnum = meta_getminor(raidnp->dev); 1822 MD_SETDRIVERNAME(¶ms, MD_RAID, sp->setno); 1823 params.old_dev = params.new_dev = colnp->dev; 1824 if (force) 1825 params.cmd = FORCE_ENABLE_COMP; 1826 else 1827 params.cmd = ENABLE_COMP; 1828 1829 /* check it out */ 1830 if (validate_new_raid(sp, raidnp, colnp, ¶ms, 1, ep) != 0) 1831 return (-1); 1832 1833 /* enable column */ 1834 if (metaioctl(MD_IOCREPLACE, ¶ms, ¶ms.mde, NULL) != 0) 1835 return (mdstealerror(ep, ¶ms.mde)); 1836 1837 /* 1838 * are we dealing with a non-local set? If so need to update the 1839 * local namespace so that the disk record has the correct devid. 1840 */ 1841 if (!metaislocalset(sp)) { 1842 ret = meta_fixdevid(sp, DEV_UPDATE|DEV_LOCAL_SET, colnp->cname, 1843 ep); 1844 1845 if (ret != METADEVADM_SUCCESS) { 1846 /* 1847 * Failed to update the local set. Nothing to do here 1848 * apart from report the error. The namespace is 1849 * most likely broken and some form of remedial 1850 * recovery is going to be required. 1851 */ 1852 mde_perror(ep, ""); 1853 mdclrerror(ep); 1854 } 1855 } 1856 1857 /* clear cache */ 1858 meta_invalidate_name(colnp); 1859 meta_invalidate_name(raidnp); 1860 1861 /* let em know */ 1862 if (options & MDCMD_PRINT) { 1863 (void) printf(dgettext(TEXT_DOMAIN, 1864 "%s: device %s is enabled\n"), 1865 raidnp->cname, colnp->cname); 1866 (void) fflush(stdout); 1867 } 1868 1869 /* return success */ 1870 return (0); 1871 } 1872 1873 /* 1874 * check for dups in the raid itself 1875 */ 1876 static int 1877 check_twice( 1878 md_raid_t *raidp, 1879 uint_t col, 1880 md_error_t *ep 1881 ) 1882 { 1883 mdname_t *raidnp = raidp->common.namep; 1884 mdname_t *thisnp; 1885 uint_t c; 1886 1887 thisnp = raidp->cols.cols_val[col].colnamep; 1888 for (c = 0; (c < col); ++c) { 1889 md_raidcol_t *mdcp = &raidp->cols.cols_val[c]; 1890 mdname_t *colnp = mdcp->colnamep; 1891 1892 if (meta_check_overlap(raidnp->cname, thisnp, 0, -1, 1893 colnp, 0, -1, ep) != 0) { 1894 return (-1); 1895 } 1896 } 1897 return (0); 1898 } 1899 1900 /* 1901 * default raid interlace 1902 */ 1903 diskaddr_t 1904 meta_default_raid_interlace(void) 1905 { 1906 diskaddr_t interlace; 1907 1908 /* default to 512k, round up if necessary */ 1909 interlace = btodb(512 * 1024); 1910 if (interlace < lbtodb(MININTERLACE)) 1911 interlace = roundup(MININTERLACE, interlace); 1912 return (interlace); 1913 } 1914 1915 /* 1916 * convert interlaces 1917 */ 1918 int 1919 meta_raid_check_interlace( 1920 diskaddr_t interlace, 1921 char *uname, 1922 md_error_t *ep 1923 ) 1924 { 1925 if ((interlace < btodb(RAID_MIN_INTERLACE)) || 1926 (interlace > btodb(MAXINTERLACE))) { 1927 return (mderror(ep, MDE_BAD_INTERLACE, uname)); 1928 } 1929 return (0); 1930 } 1931 1932 /* 1933 * check raid 1934 */ 1935 int 1936 meta_check_raid( 1937 mdsetname_t *sp, 1938 md_raid_t *raidp, 1939 mdcmdopts_t options, 1940 md_error_t *ep 1941 ) 1942 { 1943 mdname_t *raidnp = raidp->common.namep; 1944 int doit = ((options & MDCMD_DOIT) ? 1 : 0); 1945 int updateit = ((options & MDCMD_UPDATE) ? 1 : 0); 1946 uint_t ncol; 1947 uint_t col; 1948 minor_t mnum = meta_getminor(raidnp->dev); 1949 1950 /* check number */ 1951 if (((ncol = raidp->cols.cols_len) < MD_RAID_MIN) || 1952 (raidp->orig_ncol > ncol)) { 1953 return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname)); 1954 } 1955 1956 /* compute default interlace */ 1957 if (raidp->interlace == 0) { 1958 raidp->interlace = meta_default_raid_interlace(); 1959 } 1960 1961 /* check state */ 1962 switch (raidp->state) { 1963 case RUS_INIT: 1964 case RUS_OKAY: 1965 break; 1966 1967 default: 1968 return (mdmderror(ep, MDE_BAD_RAID, mnum, raidnp->cname)); 1969 } 1970 1971 /* check interlace */ 1972 if (meta_raid_check_interlace(raidp->interlace, raidnp->cname, ep) != 0) 1973 return (-1); 1974 1975 /* check hotspare pool name */ 1976 if (doit) { 1977 if ((raidp->hspnamep != NULL) && 1978 (metachkhsp(sp, raidp->hspnamep, ep) != 0)) { 1979 return (-1); 1980 } 1981 } 1982 1983 /* check columns */ 1984 for (col = 0; (col < ncol); ++col) { 1985 md_raidcol_t *mdcp = &raidp->cols.cols_val[col]; 1986 mdname_t *colnp = mdcp->colnamep; 1987 diskaddr_t start_blk, size; 1988 1989 /* setup column */ 1990 if (raidp->state == RUS_INIT) 1991 mdcp->state = RCS_INIT; 1992 else 1993 mdcp->state = RCS_OKAY; 1994 1995 /* check column */ 1996 if (!updateit) { 1997 if (meta_check_column(sp, colnp, ep) != 0) 1998 return (-1); 1999 if (((start_blk = metagetstart(sp, colnp, ep)) == 2000 MD_DISKADDR_ERROR) || ((size = metagetsize(colnp, 2001 ep)) == MD_DISKADDR_ERROR)) { 2002 return (-1); 2003 } 2004 if (start_blk >= size) 2005 return (mdsyserror(ep, ENOSPC, colnp->cname)); 2006 size -= start_blk; 2007 size = rounddown(size, raidp->interlace); 2008 if (size == 0) 2009 return (mdsyserror(ep, ENOSPC, colnp->cname)); 2010 } 2011 2012 /* check this raid too */ 2013 if (check_twice(raidp, col, ep) != 0) 2014 return (-1); 2015 } 2016 2017 /* return success */ 2018 return (0); 2019 } 2020 2021 /* 2022 * setup raid geometry 2023 */ 2024 static int 2025 raid_geom( 2026 md_raid_t *raidp, 2027 mr_unit_t *mr, 2028 md_error_t *ep 2029 ) 2030 { 2031 uint_t write_reinstruct = 0; 2032 uint_t read_reinstruct = 0; 2033 uint_t round_cyl = 1; 2034 uint_t col; 2035 mdgeom_t *geomp; 2036 2037 /* get worst reinstructs */ 2038 for (col = 0; (col < raidp->cols.cols_len); ++col) { 2039 md_raidcol_t *mdcp = &raidp->cols.cols_val[col]; 2040 mdname_t *colnp = mdcp->colnamep; 2041 2042 if ((geomp = metagetgeom(colnp, ep)) == NULL) 2043 return (-1); 2044 if (geomp->write_reinstruct > write_reinstruct) 2045 write_reinstruct = geomp->write_reinstruct; 2046 if (geomp->read_reinstruct > read_reinstruct) 2047 read_reinstruct = geomp->read_reinstruct; 2048 } 2049 2050 /* setup geometry from first column */ 2051 assert(raidp->cols.cols_len > 0); 2052 if ((geomp = metagetgeom(raidp->cols.cols_val[0].colnamep, 2053 ep)) == NULL) { 2054 return (-1); 2055 } 2056 if (meta_setup_geom((md_unit_t *)mr, raidp->common.namep, geomp, 2057 write_reinstruct, read_reinstruct, round_cyl, ep) != 0) 2058 return (-1); 2059 2060 /* return success */ 2061 return (0); 2062 } 2063 2064 int 2065 meta_raid_state_cnt(mr_unit_t *mr, rcs_state_t state) 2066 { 2067 int statecnt = 0; 2068 int col; 2069 2070 for (col = 0; col < mr->un_totalcolumncnt; col++) 2071 if (mr->un_column[col].un_devstate & state) 2072 statecnt++; 2073 return (statecnt); 2074 } 2075 /* 2076 * validate that a raid device being created with the -k flag is a real 2077 * raid device 2078 */ 2079 int 2080 meta_raid_valid(md_raid_t *raidp, mr_unit_t *mr) 2081 { 2082 long long buf[DEV_BSIZE / sizeof (long long)]; 2083 raid_pwhdr_t pwhdr; 2084 raid_pwhdr_t *rpw = &pwhdr; 2085 minor_t mnum; 2086 int col; 2087 int fd; 2088 2089 for (col = 0; col < mr->un_totalcolumncnt; col++) { 2090 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2091 mdname_t *colnp = cp->colnamep; 2092 2093 if ((fd = open(colnp->rname, O_RDONLY)) < 0) 2094 goto error_exit; 2095 2096 if (lseek64(fd, 2097 (mr->un_column[col].un_pwstart * DEV_BSIZE), SEEK_SET) < 0) 2098 goto error_exit; 2099 2100 if (read(fd, buf, DEV_BSIZE) < 0) 2101 goto error_exit; 2102 2103 /* 2104 * If our raid device is a 64 bit device, we can accept the 2105 * pw header we just read in. 2106 * Otherwise it's of type raid_pwhdr32_od_t and has to 2107 * be converted. 2108 */ 2109 if (mr->c.un_revision == MD_64BIT_META_DEV) { 2110 rpw = (raid_pwhdr_t *)buf; 2111 } else { 2112 RAID_CONVERT_RPW((raid_pwhdr32_od_t *)buf, rpw); 2113 } 2114 2115 if (rpw->rpw_column != col) 2116 goto error_exit; 2117 2118 if (col == 0) 2119 mnum = rpw->rpw_unit; 2120 2121 if (rpw->rpw_unit != mnum) 2122 goto error_exit; 2123 2124 if (rpw->rpw_magic_ext == RAID_PWMAGIC) { 2125 /* 4.1 prewrite header */ 2126 if ((rpw->rpw_origcolumncnt != mr->un_origcolumncnt) || 2127 (rpw->rpw_totalcolumncnt 2128 != mr->un_totalcolumncnt) || 2129 (rpw->rpw_segsize != mr->un_segsize) || 2130 (rpw->rpw_segsincolumn != mr->un_segsincolumn) || 2131 (rpw->rpw_pwcnt != mr->un_pwcnt) || 2132 (rpw->rpw_pwstart != 2133 mr->un_column[col].un_pwstart) || 2134 (rpw->rpw_devstart != 2135 mr->un_column[col].un_devstart) || 2136 (rpw->rpw_pwsize != mr->un_pwsize)) 2137 goto error_exit; 2138 } 2139 /* 2140 * this is an old prewrite header (4.0) the unit structure 2141 * will have to be trusted. 2142 */ 2143 (void) close(fd); 2144 } 2145 2146 return (0); 2147 2148 error_exit: 2149 (void) close(fd); 2150 return (-1); 2151 } 2152 2153 /* 2154 * create raid 2155 */ 2156 int 2157 meta_create_raid( 2158 mdsetname_t *sp, 2159 md_raid_t *raidp, 2160 mdcmdopts_t options, 2161 md_error_t *ep 2162 ) 2163 { 2164 mdname_t *raidnp = raidp->common.namep; 2165 uint_t ncol = raidp->cols.cols_len; 2166 uint_t orig_ncol = raidp->orig_ncol; 2167 size_t rdsize; 2168 mr_unit_t *mr; 2169 uint_t col; 2170 diskaddr_t disk_size = 0; 2171 uint_t disk_maxio = 0; 2172 uint_t pwes; 2173 diskaddr_t non_pw_blks, column_size; 2174 mdnamelist_t *keynlp = NULL; 2175 md_set_params_t set_params; 2176 int rval = -1; 2177 md_timeval32_t creation_time; 2178 int create_flag = MD_CRO_32BIT; 2179 2180 /* validate raid */ 2181 if (meta_check_raid(sp, raidp, options, ep) != 0) 2182 return (-1); 2183 2184 /* allocate raid unit */ 2185 rdsize = sizeof (*mr) - sizeof (mr->un_column[0]); 2186 rdsize += ncol * sizeof (mr->un_column[0]); 2187 mr = Zalloc(rdsize); 2188 2189 if (meta_gettimeofday(&creation_time) == -1) 2190 return (mdsyserror(ep, errno, NULL)); 2191 /* 2192 * initialize the top level mr_unit_t structure 2193 * setup the unit state to indicate whether to retain 2194 * any data currently on the metadevice or to clear it 2195 */ 2196 mr->c.un_type = MD_METARAID; 2197 MD_SID(mr) = meta_getminor(raidnp->dev); 2198 mr->c.un_size = rdsize; 2199 mr->un_magic = RAID_UNMAGIC; 2200 mr->un_state = raidp->state; 2201 mr->un_timestamp = creation_time; 2202 mr->un_origcolumncnt = orig_ncol; 2203 mr->un_segsize = (uint_t)raidp->interlace; 2204 if (raidp->hspnamep != NULL) { 2205 mr->un_hsp_id = raidp->hspnamep->hsp; 2206 } else { 2207 mr->un_hsp_id = MD_HSP_NONE; 2208 } 2209 /* 2210 * setup original columns, saving start_block and 2211 * finding smallest size and maxio 2212 */ 2213 for (col = 0; (col < orig_ncol); ++col) { 2214 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2215 mdname_t *colnp = cp->colnamep; 2216 mr_column_t *mdc = &mr->un_column[col]; 2217 diskaddr_t size; 2218 uint_t maxio; 2219 mdcinfo_t *cinfop; 2220 2221 /* setup state */ 2222 mdc->un_devstate = cp->state; 2223 2224 /* setup creation time */ 2225 mdc->un_devtimestamp = creation_time; 2226 2227 /* get start, size, and maxio */ 2228 if ((mdc->un_orig_devstart = metagetstart(sp, colnp, ep)) == 2229 MD_DISKADDR_ERROR) 2230 goto out; 2231 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 2232 goto out; 2233 size -= mdc->un_orig_devstart; 2234 if ((cinfop = metagetcinfo(colnp, ep)) == NULL) 2235 goto out; 2236 maxio = cinfop->maxtransfer; 2237 2238 if (options & MDCMD_DOIT) { 2239 /* store name in namespace */ 2240 if (add_key_name(sp, colnp, &keynlp, ep) != 0) 2241 goto out; 2242 } 2243 2244 /* setup column */ 2245 mdc->un_orig_key = colnp->key; 2246 mdc->un_orig_dev = colnp->dev; 2247 mdc->un_dev = mdc->un_orig_dev; 2248 mdc->un_pwstart = mdc->un_orig_pwstart; 2249 mdc->un_devstart = mdc->un_orig_devstart; 2250 mdc->un_alt_dev = NODEV64; 2251 mdc->un_alt_pwstart = 0; 2252 mdc->un_alt_devstart = 0; 2253 mdc->un_hs_id = 0; 2254 if (mr->un_state == RUS_INIT) 2255 mdc->un_devstate = RCS_INIT; 2256 else 2257 mdc->un_devstate = RCS_OKAY; 2258 2259 /* adjust for smallest disk */ 2260 if (disk_size == 0) { 2261 disk_size = size; 2262 } else if (size < disk_size) { 2263 disk_size = size; 2264 } 2265 if (disk_maxio == 0) { 2266 disk_maxio = maxio; 2267 } else if (maxio < disk_maxio) { 2268 disk_maxio = maxio; 2269 } 2270 } 2271 assert(col == mr->un_origcolumncnt); 2272 2273 /* 2274 * before processing any of the attached column(s) 2275 * set up the composition of the metadevice for column 2276 * sizes and pre-write information 2277 */ 2278 mr->un_maxio = disk_maxio; /* smallest maxio */ 2279 mr->un_iosize = min(mr->un_maxio, (mr->un_segsize + 1)); 2280 pwes = mr->un_iosize; 2281 if (raidp->pw_count) 2282 mr->un_pwcnt = raidp->pw_count; 2283 else 2284 mr->un_pwcnt = PWCNT_MIN; 2285 if ((mr->un_pwcnt < PWCNT_MIN) || (mr->un_pwcnt > PWCNT_MAX)) { 2286 (void) mderror(ep, MDE_RAID_BAD_PW_CNT, raidnp->cname); 2287 goto out; 2288 } 2289 mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2); 2290 2291 /* now calculate the number of segments per column */ 2292 non_pw_blks = disk_size - mr->un_pwsize; /* smallest disk */ 2293 if ((mr->un_pwsize > disk_size) || 2294 (non_pw_blks < (diskaddr_t)mr->un_segsize)) { 2295 (void) mdsyserror(ep, ENOSPC, raidnp->cname); 2296 goto out; 2297 } 2298 mr->un_segsincolumn = non_pw_blks / mr->un_segsize; 2299 column_size = mr->un_segsize * mr->un_segsincolumn; 2300 2301 /* 2302 * adjust the pw_cnt, pw_size, to fit into any fragmentation 2303 * left over after column_size has been computed 2304 */ 2305 mr->un_pwsize = rounddown(((uint_t)(disk_size - column_size)), 2); 2306 mr->un_pwcnt = mr->un_pwsize / pwes; 2307 assert(mr->un_pwcnt >= PWCNT_MIN); 2308 mr->un_pwsize = roundup((mr->un_pwcnt * pwes), 2); 2309 assert((mr->un_pwsize + column_size) <= disk_size); 2310 2311 /* 2312 * calculate the actual block count available based on the 2313 * segment size and the number of segments per column ... 2314 * ... and adjust for the number of parity segments 2315 */ 2316 mr->c.un_actual_tb = column_size * (mr->un_origcolumncnt - 1); 2317 2318 if (raid_geom(raidp, mr, ep) != 0) 2319 goto out; 2320 2321 create_flag = meta_check_devicesize(mr->c.un_total_blocks); 2322 2323 /* 2324 * now calculate the pre-write offset and update the column 2325 * structures to include the address of the individual pre-write 2326 * areas 2327 */ 2328 for (col = 0; (col < orig_ncol); ++col) { 2329 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2330 mdname_t *colnp = cp->colnamep; 2331 mr_column_t *mdc = &mr->un_column[col]; 2332 diskaddr_t size; 2333 2334 /* get size */ 2335 if ((size = metagetsize(colnp, ep)) == MD_DISKADDR_ERROR) 2336 goto out; 2337 2338 /* adjust start and size by prewrite */ 2339 mdc->un_orig_pwstart = mdc->un_orig_devstart; 2340 mdc->un_orig_devstart += mr->un_pwsize; 2341 mdc->un_pwstart = mdc->un_orig_pwstart; 2342 mdc->un_devstart = mdc->un_orig_devstart; 2343 2344 assert(size >= mdc->un_orig_devstart); 2345 size -= mdc->un_orig_devstart; 2346 2347 /* make sure we still have something left */ 2348 assert(size >= column_size); 2349 } 2350 2351 /* do concat cols */ 2352 mr->un_totalcolumncnt = mr->un_origcolumncnt; 2353 assert(col == mr->un_origcolumncnt); 2354 for (col = orig_ncol; (col < ncol); ++col) { 2355 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2356 mdname_t *colnp = cp->colnamep; 2357 mr_column_t *mdc = &mr->un_column[col]; 2358 2359 /* attach column */ 2360 if (attach_raid_col(sp, raidnp, mr, mdc, colnp, 2361 cp->state, &keynlp, options, ep) != 0) { 2362 goto out; 2363 } 2364 } 2365 assert(mr->un_totalcolumncnt == ncol); 2366 2367 /* fill in the size of the raid */ 2368 if (options & MDCMD_UPDATE) { 2369 raidp->common.size = mr->c.un_total_blocks; 2370 raidp->column_size = mr->un_segsize * mr->un_segsincolumn; 2371 } 2372 2373 /* if we're not doing anything, return success */ 2374 if (! (options & MDCMD_DOIT)) { 2375 rval = 0; /* success */ 2376 goto out; 2377 } 2378 2379 if ((mr->un_state & RUS_OKAY) && 2380 (meta_raid_valid(raidp, mr) != 0)) { 2381 (void) mderror(ep, MDE_RAID_INVALID, raidnp->cname); 2382 goto out; 2383 } 2384 2385 /* create raid */ 2386 (void) memset(&set_params, 0, sizeof (set_params)); 2387 /* did the user tell us to generate a large device? */ 2388 if (create_flag == MD_CRO_64BIT) { 2389 mr->c.un_revision = MD_64BIT_META_DEV; 2390 set_params.options = MD_CRO_64BIT; 2391 } else { 2392 mr->c.un_revision = MD_32BIT_META_DEV; 2393 set_params.options = MD_CRO_32BIT; 2394 } 2395 set_params.mnum = MD_SID(mr); 2396 set_params.size = mr->c.un_size; 2397 set_params.mdp = (uintptr_t)mr; 2398 MD_SETDRIVERNAME(&set_params, MD_RAID, MD_MIN2SET(set_params.mnum)); 2399 if (metaioctl(MD_IOCSET, &set_params, &set_params.mde, 2400 raidnp->cname) != 0) { 2401 (void) mdstealerror(ep, &set_params.mde); 2402 goto out; 2403 } 2404 rval = 0; /* success */ 2405 2406 /* cleanup, return success */ 2407 out: 2408 Free(mr); 2409 if (rval != 0) { 2410 (void) del_key_names(sp, keynlp, NULL); 2411 } 2412 metafreenamelist(keynlp); 2413 if ((rval == 0) && (options & MDCMD_DOIT)) { 2414 if (invalidate_columns(sp, raidnp, ep) != 0) 2415 rval = -1; 2416 meta_invalidate_name(raidnp); 2417 } 2418 return (rval); 2419 } 2420 2421 /* 2422 * initialize raid 2423 * NOTE: this functions is metainit(1m)'s command line parser! 2424 */ 2425 int 2426 meta_init_raid( 2427 mdsetname_t **spp, 2428 int argc, 2429 char *argv[], 2430 mdcmdopts_t options, 2431 md_error_t *ep 2432 ) 2433 { 2434 char *uname = argv[0]; 2435 mdname_t *raidnp = NULL; 2436 int old_optind; 2437 int c; 2438 md_raid_t *raidp = NULL; 2439 uint_t ncol, col; 2440 int rval = -1; 2441 md_set_desc *sd; 2442 2443 /* get raid name */ 2444 assert(argc > 0); 2445 if (argc < 1) 2446 goto syntax; 2447 if ((raidnp = metaname(spp, uname, ep)) == NULL) 2448 goto out; 2449 assert(*spp != NULL); 2450 2451 /* 2452 * Raid metadevice not allowed on multi-node diskset. 2453 */ 2454 if (! metaislocalset(*spp)) { 2455 if ((sd = metaget_setdesc(*spp, ep)) == NULL) 2456 goto out; 2457 if (MD_MNSET_DESC(sd)) { 2458 rval = meta_cook_syntax(ep, MDE_MNSET_NORAID, uname, 2459 argc, argv); 2460 goto out; 2461 } 2462 } 2463 2464 uname = raidnp->cname; 2465 if (metachkmeta(raidnp, ep) != 0) 2466 goto out; 2467 2468 if (!(options & MDCMD_NOLOCK)) { 2469 /* grab set lock */ 2470 if (meta_lock(*spp, TRUE, ep) != 0) 2471 goto out; 2472 2473 if (meta_check_ownership(*spp, ep) != 0) 2474 goto out; 2475 } 2476 2477 /* see if it exists already */ 2478 if (metagetmiscname(raidnp, ep) != NULL) { 2479 (void) mdmderror(ep, MDE_UNIT_ALREADY_SETUP, 2480 meta_getminor(raidnp->dev), uname); 2481 goto out; 2482 } else if (! mdismderror(ep, MDE_UNIT_NOT_SETUP)) { 2483 goto out; 2484 } else { 2485 mdclrerror(ep); 2486 } 2487 --argc, ++argv; 2488 2489 /* grab -r */ 2490 if ((argc < 1) || (strcmp(argv[0], "-r") != 0)) 2491 goto syntax; 2492 --argc, ++argv; 2493 2494 /* parse general options */ 2495 optind = 0; 2496 opterr = 0; 2497 if (getopt(argc, argv, "") != -1) 2498 goto options; 2499 2500 /* allocate raid */ 2501 raidp = Zalloc(sizeof (*raidp)); 2502 2503 /* setup common */ 2504 raidp->common.namep = raidnp; 2505 raidp->common.type = MD_METARAID; 2506 raidp->state = RUS_INIT; 2507 2508 /* allocate and parse cols */ 2509 for (ncol = 0; ((ncol < argc) && (argv[ncol][0] != '-')); ++ncol) 2510 ; 2511 raidp->cols.cols_len = ncol; 2512 if (ncol != 0) { 2513 raidp->cols.cols_val = 2514 Zalloc(ncol * sizeof (*raidp->cols.cols_val)); 2515 } 2516 for (col = 0; ((argc > 0) && (col < ncol)); ++col) { 2517 md_raidcol_t *mdc = &raidp->cols.cols_val[col]; 2518 mdname_t *colnp; 2519 2520 /* parse column name */ 2521 if ((colnp = metaname(spp, argv[0], ep)) == NULL) 2522 goto out; 2523 /* check for soft partitions */ 2524 if (meta_sp_issp(*spp, colnp, ep) != 0) { 2525 /* check disks */ 2526 if (metachkcomp(colnp, ep) != 0) 2527 goto out; 2528 } 2529 mdc->colnamep = colnp; 2530 --argc, ++argv; 2531 } 2532 2533 /* parse raid options */ 2534 old_optind = optind = 0; 2535 opterr = 0; 2536 while ((c = getopt(argc, argv, "h:i:ko:w:")) != -1) { 2537 switch (c) { 2538 case 'h': 2539 if ((raidp->hspnamep = metahspname(spp, optarg, 2540 ep)) == NULL) { 2541 goto out; 2542 } 2543 break; 2544 2545 case 'i': 2546 if (parse_interlace(uname, optarg, &raidp->interlace, 2547 ep) != 0) { 2548 goto out; 2549 } 2550 if (meta_raid_check_interlace(raidp->interlace, 2551 uname, ep)) 2552 goto out; 2553 break; 2554 2555 case 'k': 2556 raidp->state = RUS_OKAY; 2557 break; 2558 2559 case 'o': 2560 if ((sscanf(optarg, "%u", &raidp->orig_ncol) != 1) || 2561 ((int)raidp->orig_ncol < 0)) { 2562 goto syntax; 2563 } 2564 if ((raidp->orig_ncol < MD_RAID_MIN) || 2565 (raidp->orig_ncol > ncol)) { 2566 rval = mderror(ep, MDE_BAD_ORIG_NCOL, uname); 2567 goto out; 2568 } 2569 break; 2570 case 'w': 2571 if ((sscanf(optarg, "%d", &raidp->pw_count) != 1) || 2572 ((int)raidp->pw_count < 0)) 2573 goto syntax; 2574 if (((int)raidp->pw_count < PWCNT_MIN) || 2575 ((int)raidp->pw_count > PWCNT_MAX)) { 2576 rval = mderror(ep, MDE_RAID_BAD_PW_CNT, uname); 2577 goto out; 2578 } 2579 break; 2580 default: 2581 argc += old_optind; 2582 argv -= old_optind; 2583 goto options; 2584 } 2585 old_optind = optind; 2586 } 2587 argc -= optind; 2588 argv += optind; 2589 2590 /* we should be at the end */ 2591 if (argc != 0) 2592 goto syntax; 2593 2594 /* default to all original columns */ 2595 if (raidp->orig_ncol == 0) 2596 raidp->orig_ncol = ncol; 2597 2598 /* create raid */ 2599 if (meta_create_raid(*spp, raidp, options, ep) != 0) 2600 goto out; 2601 rval = 0; /* success */ 2602 2603 /* let em know */ 2604 if (options & MDCMD_PRINT) { 2605 (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is setup\n"), 2606 uname); 2607 (void) fflush(stdout); 2608 } 2609 goto out; 2610 2611 /* syntax error */ 2612 syntax: 2613 rval = meta_cook_syntax(ep, MDE_SYNTAX, uname, argc, argv); 2614 goto out; 2615 2616 /* options error */ 2617 options: 2618 rval = meta_cook_syntax(ep, MDE_OPTION, uname, argc, argv); 2619 goto out; 2620 2621 /* cleanup, return error */ 2622 out: 2623 if (raidp != NULL) 2624 meta_free_raid(raidp); 2625 return (rval); 2626 } 2627 2628 /* 2629 * reset RAIDs 2630 */ 2631 int 2632 meta_raid_reset( 2633 mdsetname_t *sp, 2634 mdname_t *raidnp, 2635 mdcmdopts_t options, 2636 md_error_t *ep 2637 ) 2638 { 2639 md_raid_t *raidp; 2640 int rval = -1; 2641 int col; 2642 2643 /* should have same set */ 2644 assert(sp != NULL); 2645 assert((raidnp == NULL) || 2646 (sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev)))); 2647 2648 /* reset all raids */ 2649 if (raidnp == NULL) { 2650 mdnamelist_t *raidnlp = NULL; 2651 mdnamelist_t *p; 2652 2653 /* for each raid */ 2654 rval = 0; 2655 if (meta_get_raid_names(sp, &raidnlp, 0, ep) < 0) 2656 return (-1); 2657 for (p = raidnlp; (p != NULL); p = p->next) { 2658 /* reset RAID */ 2659 raidnp = p->namep; 2660 if (meta_raid_reset(sp, raidnp, options, ep) != 0) { 2661 rval = -1; 2662 break; 2663 } 2664 } 2665 2666 /* cleanup, return success */ 2667 metafreenamelist(raidnlp); 2668 return (rval); 2669 } 2670 2671 /* check name */ 2672 if (metachkmeta(raidnp, ep) != 0) 2673 return (-1); 2674 2675 /* get unit structure */ 2676 if ((raidp = meta_get_raid(sp, raidnp, ep)) == NULL) 2677 return (-1); 2678 2679 /* make sure nobody owns us */ 2680 if (MD_HAS_PARENT(raidp->common.parent)) { 2681 return (mdmderror(ep, MDE_IN_USE, meta_getminor(raidnp->dev), 2682 raidnp->cname)); 2683 } 2684 2685 /* clear subdevices cache */ 2686 if (invalidate_columns(sp, raidnp, ep) != 0) 2687 return (-1); 2688 2689 /* clear metadevice */ 2690 if (meta_reset(sp, raidnp, options, ep) != 0) 2691 goto out; 2692 rval = 0; /* success */ 2693 2694 /* let em know */ 2695 if (options & MDCMD_PRINT) { 2696 (void) printf(dgettext(TEXT_DOMAIN, "%s: RAID is cleared\n"), 2697 raidnp->cname); 2698 (void) fflush(stdout); 2699 } 2700 2701 /* clear subdevices */ 2702 if (! (options & MDCMD_RECURSE)) 2703 goto out; 2704 2705 for (col = 0; (col < raidp->cols.cols_len); ++col) { 2706 md_raidcol_t *cp = &raidp->cols.cols_val[col]; 2707 mdname_t *colnp = cp->colnamep; 2708 2709 /* only recurse on metadevices */ 2710 if (! metaismeta(colnp)) 2711 continue; 2712 2713 if (meta_reset_by_name(sp, colnp, options, ep) != 0) 2714 rval = -1; 2715 } 2716 2717 /* cleanup, return success */ 2718 out: 2719 meta_invalidate_name(raidnp); 2720 return (rval); 2721 } 2722 2723 /* 2724 * reports TRUE if any RAID component is in error 2725 */ 2726 int 2727 meta_raid_anycomp_is_err(mdsetname_t *sp, mdnamelist_t *raid_names) 2728 { 2729 mdnamelist_t *nlp; 2730 md_error_t status = mdnullerror; 2731 md_error_t *ep = &status; 2732 int any_errs = FALSE; 2733 2734 for (nlp = raid_names; nlp; nlp = nlp->next) { 2735 md_raid_t *raidp; 2736 2737 if ((raidp = meta_get_raid(sp, nlp->namep, ep)) == NULL) { 2738 any_errs |= TRUE; 2739 goto out; 2740 } 2741 if (raidp->state != RUS_OKAY && raidp->state != RUS_INIT) { 2742 any_errs |= TRUE; 2743 goto out; 2744 } 2745 } 2746 out: 2747 if (!mdisok(ep)) 2748 mdclrerror(ep); 2749 2750 return (any_errs); 2751 } 2752 /* 2753 * regen parity on a raid 2754 */ 2755 int 2756 meta_raid_regen_byname(mdsetname_t *sp, mdname_t *raidnp, diskaddr_t size, 2757 md_error_t *ep) 2758 { 2759 char *miscname; 2760 md_resync_ioctl_t ri; 2761 2762 /* should have a set */ 2763 assert(sp != NULL); 2764 assert(sp->setno == MD_MIN2SET(meta_getminor(raidnp->dev))); 2765 2766 /* make sure we have a raid */ 2767 if ((miscname = metagetmiscname(raidnp, ep)) == NULL) 2768 return (-1); 2769 if (strcmp(miscname, MD_RAID) != 0) { 2770 return (mdmderror(ep, MDE_NOT_RAID, meta_getminor(raidnp->dev), 2771 raidnp->cname)); 2772 } 2773 2774 /* start resync */ 2775 (void) memset(&ri, 0, sizeof (ri)); 2776 MD_SETDRIVERNAME(&ri, MD_RAID, sp->setno); 2777 ri.ri_mnum = meta_getminor(raidnp->dev); 2778 ri.ri_copysize = size; 2779 if (metaioctl(MD_IOCSETREGEN, &ri, &ri.mde, raidnp->cname) != 0) 2780 return (mdstealerror(ep, &ri.mde)); 2781 2782 /* return success */ 2783 return (0); 2784 } 2785