1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/conf.h> 31 #include <sys/file.h> 32 #include <sys/user.h> 33 #include <sys/uio.h> 34 #include <sys/t_lock.h> 35 #include <sys/buf.h> 36 #include <sys/dkio.h> 37 #include <sys/vtoc.h> 38 #include <sys/kmem.h> 39 #include <vm/page.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sysmacros.h> 42 #include <sys/types.h> 43 #include <sys/mkdev.h> 44 #include <sys/stat.h> 45 #include <sys/open.h> 46 #include <sys/lvm/mdio.h> 47 #include <sys/lvm/mdvar.h> 48 #include <sys/lvm/md_stripe.h> 49 #include <sys/lvm/md_convert.h> 50 #include <sys/lvm/md_notify.h> 51 #include <sys/modctl.h> 52 #include <sys/ddi.h> 53 #include <sys/sunddi.h> 54 #include <sys/debug.h> 55 #include <sys/sysevent/eventdefs.h> 56 #include <sys/sysevent/svm.h> 57 58 md_ops_t stripe_md_ops; 59 #ifndef lint 60 char _depends_on[] = "drv/md"; 61 static md_ops_t *md_interface_ops = &stripe_md_ops; 62 #endif 63 64 extern unit_t md_nunits; 65 extern set_t md_nsets; 66 extern md_set_t md_set[]; 67 68 extern kmutex_t md_mx; 69 extern kcondvar_t md_cv; 70 71 extern int md_status; 72 extern major_t md_major; 73 extern mdq_anchor_t md_done_daemon; 74 75 static int md_stripe_mcs_buf_off; 76 static kmem_cache_t *stripe_parent_cache = NULL; 77 static kmem_cache_t *stripe_child_cache = NULL; 78 79 /*ARGSUSED1*/ 80 static int 81 stripe_parent_constructor(void *p, void *d1, int d2) 82 { 83 mutex_init(&((md_sps_t *)p)->ps_mx, 84 NULL, MUTEX_DEFAULT, NULL); 85 return (0); 86 } 87 88 static void 89 stripe_parent_init(void *ps) 90 { 91 bzero(ps, offsetof(md_sps_t, ps_mx)); 92 } 93 94 /*ARGSUSED1*/ 95 static void 96 stripe_parent_destructor(void *p, void *d) 97 { 98 mutex_destroy(&((md_sps_t *)p)->ps_mx); 99 } 100 101 /*ARGSUSED1*/ 102 static int 103 stripe_child_constructor(void *p, void *d1, int d2) 104 { 105 bioinit(&((md_scs_t *)p)->cs_buf); 106 return (0); 107 } 108 109 static void 110 stripe_child_init(md_scs_t *cs) 111 { 112 cs->cs_mdunit = 0; 113 cs->cs_ps = NULL; 114 cs->cs_comp = NULL; 115 md_bioreset(&cs->cs_buf); 116 } 117 118 /*ARGSUSED1*/ 119 static void 120 stripe_child_destructor(void *p, void *d) 121 { 122 biofini(&((md_scs_t *)p)->cs_buf); 123 } 124 125 /*ARGSUSED*/ 126 static void 127 stripe_run_queue(void *d) 128 { 129 if (!(md_status & MD_GBL_DAEMONS_LIVE)) 130 md_daemon(1, &md_done_daemon); 131 } 132 133 static void 134 stripe_close_all_devs(ms_unit_t *un, int md_cflags) 135 { 136 int row; 137 int i; 138 int c; 139 struct ms_comp *mdcomp; 140 141 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 142 for (row = 0; row < un->un_nrows; row++) { 143 struct ms_row *mdr = &un->un_row[row]; 144 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 145 struct ms_comp *mdc; 146 mdc = &mdcomp[c++]; 147 if (md_cflags & MD_OFLG_PROBEDEV) { 148 149 /* 150 * It is possible that the md_layered_open 151 * failed because the stripe unit structure 152 * contained a NODEV. In such a case since 153 * there is nothing to open, there is nothing 154 * to close. 155 */ 156 if (mdc->un_dev == NODEV64) 157 continue; 158 } 159 if ((md_cflags & MD_OFLG_PROBEDEV) && 160 (mdc->un_mirror.ms_flags & MDM_S_PROBEOPEN)) { 161 md_layered_close(mdc->un_dev, 162 md_cflags); 163 mdc->un_mirror.ms_flags &= 164 ~MDM_S_PROBEOPEN; 165 } else if (mdc->un_mirror.ms_flags & MDM_S_ISOPEN) { 166 md_layered_close(mdc->un_dev, md_cflags); 167 mdc->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 168 } 169 } 170 } 171 } 172 173 static int 174 stripe_open_all_devs(ms_unit_t *un, int md_oflags) 175 { 176 minor_t mnum = MD_SID(un); 177 int row; 178 int i; 179 int c; 180 struct ms_comp *mdcomp; 181 int err; 182 int cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS); 183 int probe_err_cnt = 0; 184 int total_comp_cnt = 0; 185 set_t setno = MD_MIN2SET(MD_SID(un)); 186 side_t side = mddb_getsidenum(setno); 187 mdkey_t key; 188 189 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 190 191 /* 192 * For a probe call, if any component of a stripe or a concat 193 * can be opened, it is considered to be a success. The total number 194 * of components in a stripe are computed prior to starting a probe. 195 * This number is then compared against the number of components 196 * that could be be successfully opened. If none of the components 197 * in a stripe can be opened, only then an ENXIO is returned for a 198 * probe type open. 199 */ 200 201 for (row = 0; row < un->un_nrows; row++) { 202 struct ms_row *mdr = &un->un_row[row]; 203 204 if (md_oflags & MD_OFLG_PROBEDEV) 205 total_comp_cnt += mdr->un_ncomp; 206 207 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 208 struct ms_comp *mdc; 209 md_dev64_t tmpdev; 210 211 mdc = &mdcomp[c++]; 212 tmpdev = mdc->un_dev; 213 /* 214 * Do the open by device id 215 * Check if this comp is hotspared and 216 * if it is then use the key for hotspare. 217 * MN disksets don't use devids, so we better don't use 218 * md_devid_found/md_resolve_bydevid there. Rather do, 219 * what's done in stripe_build_incore() 220 */ 221 if (MD_MNSET_SETNO(setno)) { 222 if (mdc->un_mirror.ms_hs_id != 0) { 223 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 224 0, &mdc->un_mirror.ms_hs_id, NULL, 225 &tmpdev, NULL); 226 } 227 } else { 228 key = mdc->un_mirror.ms_hs_id ? 229 mdc->un_mirror.ms_hs_key : mdc->un_key; 230 if ((md_getmajor(tmpdev) != md_major) && 231 md_devid_found(setno, side, key) == 1) { 232 tmpdev = md_resolve_bydevid(mnum, 233 tmpdev, key); 234 } 235 } 236 237 /* 238 * For a submirror, we only want to open those devices 239 * that are not errored. If the device is errored then 240 * then there is no reason to open it and leaving it 241 * closed allows the RCM/DR code to work so that the 242 * errored device can be replaced. 243 */ 244 if ((md_oflags & MD_OFLG_PROBEDEV) || 245 ! (mdc->un_mirror.ms_state & CS_ERRED)) { 246 247 err = md_layered_open(mnum, &tmpdev, md_oflags); 248 } else { 249 err = ENXIO; 250 } 251 252 /* 253 * Only set the un_dev if the tmpdev != NODEV64. If 254 * it is NODEV64 then the md_layered_open() will have 255 * failed in some manner. 256 */ 257 if (tmpdev != NODEV64) 258 mdc->un_dev = tmpdev; 259 260 if (err) { 261 if (!cont_on_errors) { 262 stripe_close_all_devs(un, md_oflags); 263 return (ENXIO); 264 } 265 266 if (md_oflags & MD_OFLG_PROBEDEV) 267 probe_err_cnt++; 268 } else { 269 if (md_oflags & MD_OFLG_PROBEDEV) { 270 mdc->un_mirror.ms_flags |= 271 MDM_S_PROBEOPEN; 272 } else 273 mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; 274 } 275 } 276 } 277 278 /* If every component in a stripe could not be opened fail */ 279 if ((md_oflags & MD_OFLG_PROBEDEV) && 280 (probe_err_cnt == total_comp_cnt)) 281 return (ENXIO); 282 else 283 return (0); 284 } 285 286 int 287 stripe_build_incore(void *p, int snarfing) 288 { 289 ms_unit_t *un = (ms_unit_t *)p; 290 struct ms_comp *mdcomp; 291 minor_t mnum; 292 int row; 293 int i; 294 int c; 295 int ncomps; 296 297 mnum = MD_SID(un); 298 299 if (MD_UNIT(mnum) != NULL) 300 return (0); 301 302 MD_STATUS(un) = 0; 303 304 /* 305 * Reset all the is_open flags, these are probably set 306 * cause they just came out of the database. 307 */ 308 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 309 310 ncomps = 0; 311 for (row = 0; row < un->un_nrows; row++) { 312 struct ms_row *mdr = &un->un_row[row]; 313 ncomps += mdr->un_ncomp; 314 } 315 316 for (row = 0; row < un->un_nrows; row++) { 317 struct ms_row *mdr = &un->un_row[row]; 318 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 319 struct ms_comp *mdc; 320 set_t setno; 321 md_dev64_t tmpdev; 322 323 mdc = &mdcomp[c++]; 324 mdc->un_mirror.ms_flags &= 325 ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED); 326 327 if (!snarfing) 328 continue; 329 330 setno = MD_MIN2SET(mnum); 331 332 tmpdev = md_getdevnum(setno, mddb_getsidenum(setno), 333 mdc->un_key, MD_NOTRUST_DEVT); 334 mdc->un_dev = tmpdev; 335 /* 336 * Check for hotspares. If the hotspares haven't been 337 * snarfed yet, stripe_open_all_devs() will do the 338 * remapping of the dev's later. 339 */ 340 if (mdc->un_mirror.ms_hs_id != 0) { 341 mdc->un_mirror.ms_orig_dev = mdc->un_dev; 342 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 343 0, &mdc->un_mirror.ms_hs_id, NULL, 344 &tmpdev, NULL); 345 mdc->un_dev = tmpdev; 346 } 347 } 348 } 349 350 MD_UNIT(mnum) = un; 351 return (0); 352 } 353 354 void 355 reset_stripe(ms_unit_t *un, minor_t mnum, int removing) 356 { 357 ms_comp_t *mdcomp; 358 struct ms_row *mdr; 359 int i, c; 360 int row; 361 int nsv; 362 int isv; 363 sv_dev_t *sv; 364 mddb_recid_t *recids; 365 mddb_recid_t vtoc_id; 366 int rid = 0; 367 368 md_destroy_unit_incore(mnum, &stripe_md_ops); 369 370 MD_UNIT(mnum) = NULL; 371 372 if (!removing) 373 return; 374 375 nsv = 0; 376 /* Count the number of devices */ 377 for (row = 0; row < un->un_nrows; row++) { 378 mdr = &un->un_row[row]; 379 nsv += mdr->un_ncomp; 380 } 381 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); 382 383 /* 384 * allocate recids array. since we may have to commit 385 * underlying soft partition records, we need an array 386 * of size: total number of components in stripe + 3 387 * (one for the stripe itself, one for the hotspare, one 388 * for the end marker). 389 */ 390 recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); 391 392 /* 393 * Save the md_dev64_t's and driver nm indexes. 394 * Because after the mddb_deleterec() we will 395 * not be able to access the unit structure. 396 * 397 * NOTE: Deleting the names before deleting the 398 * unit structure would cause problems if 399 * the machine crashed in between the two. 400 */ 401 isv = 0; 402 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 403 404 for (row = 0; row < un->un_nrows; row++) { 405 mdr = &un->un_row[row]; 406 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 407 struct ms_comp *mdc; 408 md_dev64_t child_dev; 409 md_unit_t *child_un; 410 411 mdc = &mdcomp[c++]; 412 if (mdc->un_mirror.ms_hs_id != 0) { 413 mdkey_t hs_key; 414 415 hs_key = mdc->un_mirror.ms_hs_key; 416 417 mdc->un_dev = mdc->un_mirror.ms_orig_dev; 418 mdc->un_start_block = 419 mdc->un_mirror.ms_orig_blk; 420 mdc->un_mirror.ms_hs_id = 0; 421 mdc->un_mirror.ms_hs_key = 0; 422 mdc->un_mirror.ms_orig_dev = 0; 423 recids[0] = 0; 424 recids[1] = 0; /* recids[1] filled in below */ 425 recids[2] = 0; 426 (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, 427 0, 0, &recids[0], &hs_key, NULL, NULL); 428 mddb_commitrecs_wrapper(recids); 429 } 430 431 /* 432 * check if we've got metadevice below us and 433 * deparent it if we do. 434 * NOTE: currently soft partitions are the 435 * the only metadevices stripes can be 436 * built on top of. 437 */ 438 child_dev = mdc->un_dev; 439 if (md_getmajor(child_dev) == md_major) { 440 child_un = MD_UNIT(md_getminor(child_dev)); 441 md_reset_parent(child_dev); 442 recids[rid++] = MD_RECID(child_un); 443 } 444 445 sv[isv].setno = MD_MIN2SET(mnum); 446 sv[isv++].key = mdc->un_key; 447 } 448 } 449 450 recids[rid++] = un->c.un_record_id; 451 recids[rid] = 0; /* filled in below */ 452 453 /* 454 * Decrement the HSP reference count and 455 * remove the knowledge of the HSP from the unit struct. 456 * This is done atomically to remove a window. 457 */ 458 if (un->un_hsp_id != -1) { 459 (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 460 &recids[rid++], NULL, NULL, NULL); 461 un->un_hsp_id = -1; 462 } 463 464 /* set end marker and commit records */ 465 recids[rid] = 0; 466 mddb_commitrecs_wrapper(recids); 467 468 vtoc_id = un->c.un_vtoc_id; 469 470 /* Remove the unit structure */ 471 mddb_deleterec_wrapper(un->c.un_record_id); 472 473 /* Remove the vtoc, if present */ 474 if (vtoc_id) 475 mddb_deleterec_wrapper(vtoc_id); 476 477 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 478 MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); 479 md_rem_names(sv, nsv); 480 kmem_free(sv, sizeof (sv_dev_t) * nsv); 481 kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); 482 } 483 484 static void 485 stripe_error(md_sps_t *ps) 486 { 487 struct buf *pb = ps->ps_bp; 488 mdi_unit_t *ui = ps->ps_ui; 489 md_dev64_t dev = ps->ps_errcomp->un_dev; 490 md_dev64_t md_dev = md_expldev(pb->b_edev); 491 char *str; 492 493 if (pb->b_flags & B_READ) { 494 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_READERR; 495 str = "read"; 496 } else { 497 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_WRTERR; 498 str = "write"; 499 } 500 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 501 if (MUTEX_HELD(&ps->ps_mx)) { 502 mutex_exit(&ps->ps_mx); 503 } 504 } else { 505 ASSERT(panicstr); 506 } 507 SPS_FREE(stripe_parent_cache, ps); 508 pb->b_flags |= B_ERROR; 509 510 md_kstat_done(ui, pb, 0); 511 md_unit_readerexit(ui); 512 md_biodone(pb); 513 514 cmn_err(CE_WARN, "md: %s: %s error on %s", 515 md_shortname(md_getminor(md_dev)), str, 516 md_devname(MD_DEV2SET(md_dev), dev, NULL, 0)); 517 } 518 519 static int 520 stripe_done(struct buf *cb) 521 { 522 struct buf *pb; 523 mdi_unit_t *ui; 524 md_sps_t *ps; 525 md_scs_t *cs; 526 527 /*LINTED*/ 528 cs = (md_scs_t *)((caddr_t)cb - md_stripe_mcs_buf_off); 529 ps = cs->cs_ps; 530 pb = ps->ps_bp; 531 532 mutex_enter(&ps->ps_mx); 533 if (cb->b_flags & B_ERROR) { 534 ps->ps_flags |= MD_SPS_ERROR; 535 pb->b_error = cb->b_error; 536 ps->ps_errcomp = cs->cs_comp; 537 } 538 539 if (cb->b_flags & B_REMAPPED) 540 bp_mapout(cb); 541 542 ps->ps_frags--; 543 if (ps->ps_frags != 0) { 544 mutex_exit(&ps->ps_mx); 545 kmem_cache_free(stripe_child_cache, cs); 546 return (1); 547 } 548 kmem_cache_free(stripe_child_cache, cs); 549 if (ps->ps_flags & MD_SPS_ERROR) { 550 stripe_error(ps); 551 return (1); 552 } 553 ui = ps->ps_ui; 554 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 555 mutex_exit(&ps->ps_mx); 556 } else { 557 ASSERT(panicstr); 558 } 559 SPS_FREE(stripe_parent_cache, ps); 560 md_kstat_done(ui, pb, 0); 561 md_unit_readerexit(ui); 562 md_biodone(pb); 563 return (0); 564 } 565 566 567 /* 568 * This routine does the mapping from virtual (dev, blkno) of a metapartition 569 * to the real (dev, blkno) of a real disk partition. 570 * It goes to the md_conf[] table to find out the correct real partition 571 * dev and block number for this buffer. 572 * 573 * A single buf request can not go across real disk partition boundary. 574 * When the virtual request specified by (dev, blkno) spans more than one 575 * real partition, md_mapbuf will return 1. Then the caller should prepare 576 * another real buf and continue calling md_mapbuf to do the mapping until 577 * it returns 0. 578 * 579 */ 580 581 static int 582 md_mapbuf( 583 ms_unit_t *un, 584 diskaddr_t blkno, 585 u_longlong_t bcount, 586 buf_t *bp, /* if bp==NULL, skip bp updates */ 587 ms_comp_t **mdc) /* if bp==NULL, skip mdc update */ 588 { 589 struct ms_row *mdr; 590 struct ms_comp *mdcomp; 591 diskaddr_t stripe_blk; 592 diskaddr_t fragment, blk_in_row, endblk; 593 offset_t interlace; 594 size_t dev_index; 595 int row_index, more; 596 extern unsigned md_maxphys; 597 /* Work var's when bp==NULL */ 598 u_longlong_t wb_bcount; 599 diskaddr_t wb_blkno; 600 md_dev64_t wb_edev; 601 ms_comp_t *wmdc; 602 603 /* 604 * Do a real calculation to derive the minor device of the 605 * Virtual Disk, which in turn will let us derive the 606 * device/minor of the underlying real device. 607 */ 608 609 610 for (row_index = 0; row_index < un->un_nrows; row_index++) { 611 mdr = &un->un_row[row_index]; 612 if (blkno < mdr->un_cum_blocks) 613 break; 614 } 615 ASSERT(row_index != un->un_nrows); 616 617 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 618 619 blk_in_row = blkno - mdr->un_cum_blocks + mdr->un_blocks; 620 endblk = (diskaddr_t)(blkno + howmany(bcount, DEV_BSIZE)); 621 if (mdr->un_ncomp == 1) { /* No striping */ 622 if (endblk > mdr->un_cum_blocks) { 623 wb_bcount = ldbtob(mdr->un_cum_blocks - blkno); 624 if ((row_index + 1) == un->un_nrows) 625 more = 0; 626 else 627 more = 1; 628 } else { 629 wb_bcount = bcount; 630 more = 0; 631 } 632 wmdc = &mdcomp[mdr->un_icomp]; 633 wb_blkno = blk_in_row; 634 } else { /* Have striping */ 635 interlace = mdr->un_interlace; 636 fragment = blk_in_row % interlace; 637 if (bcount > ldbtob(interlace - fragment)) { 638 more = 1; 639 wb_bcount = ldbtob(interlace - fragment); 640 } else { 641 more = 0; 642 wb_bcount = bcount; 643 } 644 645 stripe_blk = blk_in_row / interlace; 646 dev_index = (size_t)(stripe_blk % mdr->un_ncomp); 647 wmdc = &mdcomp[mdr->un_icomp + dev_index]; 648 wb_blkno = (diskaddr_t)(((stripe_blk / mdr->un_ncomp) 649 * interlace) + fragment); 650 } 651 652 wb_blkno += wmdc->un_start_block; 653 wb_edev = wmdc->un_dev; 654 655 /* only break up the I/O if we're not built on another metadevice */ 656 if ((md_getmajor(wb_edev) != md_major) && (wb_bcount > md_maxphys)) { 657 wb_bcount = md_maxphys; 658 more = 1; 659 } 660 if (bp != (buf_t *)NULL) { 661 /* 662 * wb_bcount is limited by md_maxphys which is 'int' 663 */ 664 bp->b_bcount = (size_t)wb_bcount; 665 bp->b_lblkno = wb_blkno; 666 bp->b_edev = md_dev64_to_dev(wb_edev); 667 *mdc = wmdc; 668 } 669 return (more); 670 } 671 672 static void 673 md_stripe_strategy(buf_t *pb, int flag, void *private) 674 { 675 md_sps_t *ps; 676 md_scs_t *cs; 677 int doing_writes; 678 int more; 679 ms_unit_t *un; 680 mdi_unit_t *ui; 681 size_t current_count; 682 diskaddr_t current_blkno; 683 off_t current_offset; 684 buf_t *cb; /* child buf pointer */ 685 set_t setno; 686 687 setno = MD_MIN2SET(getminor(pb->b_edev)); 688 689 /* 690 * When doing IO to a multi owner meta device, check if set is halted. 691 * We do this check without the needed lock held, for performance 692 * reasons. 693 * If an IO just slips through while the set is locked via an 694 * MD_MN_SUSPEND_SET, we don't care about it. 695 * Only check for a suspended set if we are a top-level i/o request 696 * (MD_STR_NOTTOP is cleared in 'flag'). 697 */ 698 if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == 699 (MD_SET_HALTED | MD_SET_MNSET)) { 700 if ((flag & MD_STR_NOTTOP) == 0) { 701 mutex_enter(&md_mx); 702 /* Here we loop until the set is no longer halted */ 703 while (md_set[setno].s_status & MD_SET_HALTED) { 704 cv_wait(&md_cv, &md_mx); 705 } 706 mutex_exit(&md_mx); 707 } 708 } 709 710 ui = MDI_UNIT(getminor(pb->b_edev)); 711 712 md_kstat_waitq_enter(ui); 713 714 un = (ms_unit_t *)md_unit_readerlock(ui); 715 716 if ((flag & MD_NOBLOCK) == 0) { 717 if (md_inc_iocount(setno) != 0) { 718 pb->b_flags |= B_ERROR; 719 pb->b_error = ENXIO; 720 pb->b_resid = pb->b_bcount; 721 md_unit_readerexit(ui); 722 biodone(pb); 723 return; 724 } 725 } else { 726 md_inc_iocount_noblock(setno); 727 } 728 729 if (!(flag & MD_STR_NOTTOP)) { 730 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 731 md_kstat_waitq_exit(ui); 732 return; 733 } 734 } 735 736 ps = kmem_cache_alloc(stripe_parent_cache, MD_ALLOCFLAGS); 737 stripe_parent_init(ps); 738 739 /* 740 * Save essential information from the original buffhdr 741 * in the md_save structure. 742 */ 743 ps->ps_un = un; 744 ps->ps_ui = ui; 745 ps->ps_bp = pb; 746 ps->ps_addr = pb->b_un.b_addr; 747 748 if ((pb->b_flags & B_READ) == 0) 749 doing_writes = 1; 750 else 751 doing_writes = 0; 752 753 754 current_count = pb->b_bcount; 755 current_blkno = pb->b_lblkno; 756 current_offset = 0; 757 758 if (!(flag & MD_STR_NOTTOP) && panicstr) 759 ps->ps_flags |= MD_SPS_DONTFREE; 760 761 md_kstat_waitq_to_runq(ui); 762 763 ps->ps_frags++; 764 do { 765 cs = kmem_cache_alloc(stripe_child_cache, MD_ALLOCFLAGS); 766 stripe_child_init(cs); 767 cb = &cs->cs_buf; 768 cs->cs_ps = ps; 769 more = md_mapbuf(un, current_blkno, current_count, cb, 770 &cs->cs_comp); 771 772 cb = md_bioclone(pb, current_offset, cb->b_bcount, cb->b_edev, 773 cb->b_lblkno, stripe_done, cb, KM_NOSLEEP); 774 /* 775 * Do these calculations now, 776 * so that we pickup a valid b_bcount from the chld_bp. 777 */ 778 current_offset += cb->b_bcount; 779 current_count -= cb->b_bcount; 780 current_blkno += (diskaddr_t)(lbtodb(cb->b_bcount)); 781 782 if (more) { 783 mutex_enter(&ps->ps_mx); 784 ps->ps_frags++; 785 mutex_exit(&ps->ps_mx); 786 } 787 788 if (doing_writes && 789 cs->cs_comp->un_mirror.ms_flags & MDM_S_NOWRITE) { 790 (void) stripe_done(cb); 791 continue; 792 } 793 md_call_strategy(cb, flag, private); 794 } while (more); 795 796 if (!(flag & MD_STR_NOTTOP) && panicstr) { 797 while (!(ps->ps_flags & MD_SPS_DONE)) { 798 md_daemon(1, &md_done_daemon); 799 drv_usecwait(10); 800 } 801 kmem_cache_free(stripe_parent_cache, ps); 802 } 803 } 804 805 static int 806 stripe_snarf(md_snarfcmd_t cmd, set_t setno) 807 { 808 ms_unit_t *un; 809 mddb_recid_t recid; 810 int gotsomething; 811 int all_stripes_gotten; 812 mddb_type_t typ1; 813 mddb_de_ic_t *dep; 814 mddb_rb32_t *rbp; 815 size_t newreqsize; 816 ms_unit_t *big_un; 817 ms_unit32_od_t *small_un; 818 819 820 if (cmd == MD_SNARF_CLEANUP) 821 return (0); 822 823 all_stripes_gotten = 1; 824 gotsomething = 0; 825 826 typ1 = (mddb_type_t)md_getshared_key(setno, 827 stripe_md_ops.md_driver.md_drivername); 828 recid = mddb_makerecid(setno, 0); 829 830 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 831 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 832 continue; 833 834 dep = mddb_getrecdep(recid); 835 dep->de_flags = MDDB_F_STRIPE; 836 rbp = dep->de_rb; 837 838 if ((rbp->rb_revision == MDDB_REV_RB) && 839 ((rbp->rb_private & MD_PRV_CONVD) == 0)) { 840 /* 841 * This means, we have an old and small record 842 * and this record hasn't already been converted. 843 * Before we create an incore metadevice from this 844 * we have to convert it to a big record. 845 */ 846 small_un = (ms_unit32_od_t *)mddb_getrecaddr(recid); 847 newreqsize = get_big_stripe_req_size(small_un, 848 COMPLETE_STRUCTURE); 849 big_un = (ms_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 850 stripe_convert((caddr_t)small_un, (caddr_t)big_un, 851 SMALL_2_BIG); 852 kmem_free(small_un, dep->de_reqsize); 853 dep->de_rb_userdata = big_un; 854 dep->de_reqsize = newreqsize; 855 un = big_un; 856 rbp->rb_private |= MD_PRV_CONVD; 857 } else { 858 /* Big device */ 859 un = (ms_unit_t *)mddb_getrecaddr(recid); 860 } 861 862 /* Set revision and flag accordingly */ 863 if (rbp->rb_revision == MDDB_REV_RB) { 864 un->c.un_revision = MD_32BIT_META_DEV; 865 } else { 866 un->c.un_revision = MD_64BIT_META_DEV; 867 un->c.un_flag |= MD_EFILABEL; 868 } 869 870 /* Create minor node for snarfed unit. */ 871 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 872 873 if (MD_UNIT(MD_SID(un)) != NULL) { 874 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 875 continue; 876 } 877 all_stripes_gotten = 0; 878 if (stripe_build_incore((void *)un, 1) == 0) { 879 mddb_setrecprivate(recid, MD_PRV_GOTIT); 880 md_create_unit_incore(MD_SID(un), &stripe_md_ops, 0); 881 gotsomething = 1; 882 } 883 } 884 885 if (!all_stripes_gotten) 886 return (gotsomething); 887 888 recid = mddb_makerecid(setno, 0); 889 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 890 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 891 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 892 893 return (0); 894 } 895 896 static int 897 stripe_halt(md_haltcmd_t cmd, set_t setno) 898 { 899 int i; 900 mdi_unit_t *ui; 901 minor_t mnum; 902 903 if (cmd == MD_HALT_CLOSE) 904 return (0); 905 906 if (cmd == MD_HALT_OPEN) 907 return (0); 908 909 if (cmd == MD_HALT_UNLOAD) 910 return (0); 911 912 if (cmd == MD_HALT_CHECK) { 913 for (i = 0; i < md_nunits; i++) { 914 mnum = MD_MKMIN(setno, i); 915 if ((ui = MDI_UNIT(mnum)) == NULL) 916 continue; 917 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 918 continue; 919 if (md_unit_isopen(ui)) 920 return (1); 921 } 922 return (0); 923 } 924 925 if (cmd != MD_HALT_DOIT) 926 return (1); 927 928 for (i = 0; i < md_nunits; i++) { 929 mnum = MD_MKMIN(setno, i); 930 if ((ui = MDI_UNIT(mnum)) == NULL) 931 continue; 932 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 933 continue; 934 reset_stripe((ms_unit_t *)MD_UNIT(mnum), mnum, 0); 935 } 936 937 return (0); 938 } 939 940 /*ARGSUSED3*/ 941 static int 942 stripe_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 943 { 944 minor_t mnum = getminor(*dev); 945 mdi_unit_t *ui = MDI_UNIT(mnum); 946 ms_unit_t *un; 947 int err = 0; 948 set_t setno; 949 950 /* 951 * When doing an open of a multi owner metadevice, check to see if this 952 * node is a starting node and if a reconfig cycle is underway. 953 * If so, the system isn't sufficiently set up enough to handle the 954 * open (which involves I/O during sp_validate), so fail with ENXIO. 955 */ 956 setno = MD_MIN2SET(mnum); 957 if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == 958 (MD_SET_MNSET | MD_SET_MN_START_RC)) { 959 return (ENXIO); 960 } 961 962 /* single thread */ 963 un = (ms_unit_t *)md_unit_openclose_enter(ui); 964 965 /* open devices, if necessary */ 966 if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) { 967 if ((err = stripe_open_all_devs(un, md_oflags)) != 0) { 968 goto out; 969 } 970 } 971 972 /* count open */ 973 if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) 974 goto out; 975 976 /* unlock, return success */ 977 out: 978 md_unit_openclose_exit(ui); 979 return (err); 980 } 981 982 /*ARGSUSED1*/ 983 static int 984 stripe_close( 985 dev_t dev, 986 int flag, 987 int otyp, 988 cred_t *cred_p, 989 int md_cflags 990 ) 991 { 992 minor_t mnum = getminor(dev); 993 mdi_unit_t *ui = MDI_UNIT(mnum); 994 ms_unit_t *un; 995 int err = 0; 996 997 /* single thread */ 998 un = (ms_unit_t *)md_unit_openclose_enter(ui); 999 1000 /* count closed */ 1001 if ((err = md_unit_decopen(mnum, otyp)) != 0) 1002 goto out; 1003 1004 /* close devices, if necessary */ 1005 if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 1006 stripe_close_all_devs(un, md_cflags); 1007 } 1008 1009 /* unlock, return success */ 1010 out: 1011 md_unit_openclose_exit(ui); 1012 return (err); 1013 } 1014 1015 1016 static struct buf dumpbuf; 1017 1018 /* 1019 * This routine dumps memory to the disk. It assumes that the memory has 1020 * already been mapped into mainbus space. It is called at disk interrupt 1021 * priority when the system is in trouble. 1022 * 1023 */ 1024 static int 1025 stripe_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1026 { 1027 ms_unit_t *un; 1028 buf_t *bp; 1029 ms_comp_t *mdc; 1030 u_longlong_t nb; 1031 diskaddr_t mapblk; 1032 int result; 1033 int more; 1034 int saveresult = 0; 1035 1036 /* 1037 * Don't need to grab the unit lock. 1038 * Cause nothing else is suppose to be happenning. 1039 * Also dump is not suppose to sleep. 1040 */ 1041 un = (ms_unit_t *)MD_UNIT(getminor(dev)); 1042 1043 if ((diskaddr_t)blkno >= un->c.un_total_blocks) 1044 return (EINVAL); 1045 1046 if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks) 1047 return (EINVAL); 1048 1049 bp = &dumpbuf; 1050 nb = ldbtob(nblk); 1051 do { 1052 bzero((caddr_t)bp, sizeof (*bp)); 1053 more = md_mapbuf(un, (diskaddr_t)blkno, nb, bp, &mdc); 1054 nblk = btodb(bp->b_bcount); 1055 mapblk = bp->b_lblkno; 1056 if (!(mdc->un_mirror.ms_flags & MDM_S_NOWRITE)) { 1057 /* 1058 * bdev_dump() is currently only able to take 1059 * 32 bit wide blkno's. 1060 */ 1061 result = bdev_dump(bp->b_edev, addr, (daddr_t)mapblk, 1062 nblk); 1063 if (result) 1064 saveresult = result; 1065 } 1066 1067 nb -= bp->b_bcount; 1068 addr += bp->b_bcount; 1069 blkno += nblk; 1070 } while (more); 1071 1072 return (saveresult); 1073 } 1074 1075 /*ARGSUSED*/ 1076 static intptr_t 1077 stripe_shared_by_blk( 1078 md_dev64_t dev, 1079 void *junk, 1080 diskaddr_t blkno, 1081 u_longlong_t *cnt) 1082 { 1083 ms_unit_t *un; 1084 buf_t bp; 1085 ms_comp_t *comp; 1086 1087 un = MD_UNIT(md_getminor(dev)); 1088 (void) md_mapbuf(un, blkno, ldbtob(*cnt), &bp, &comp); 1089 *cnt = (u_longlong_t)lbtodb(bp.b_bcount); 1090 return ((intptr_t)&comp->un_mirror); 1091 } 1092 1093 /* 1094 * stripe_block_count_skip_size() returns the following values 1095 * so that the logical to physical block mappings can 1096 * be calculated without intimate knowledge of the underpinnings. 1097 * 1098 * block - first logical block number of the device. 1099 * block = [ # of blocks before THE row ] + 1100 * [ # of blocks in THE row before the component ] 1101 * count - # of segments (interlaced size). 1102 * skip - # of logical blocks between segments, or delta to 1103 * get to next segment 1104 * size - interlace size used for the block, count, skip. 1105 */ 1106 /*ARGSUSED*/ 1107 static intptr_t 1108 stripe_block_count_skip_size( 1109 md_dev64_t dev, 1110 void *junk, 1111 int ci, 1112 diskaddr_t *block, 1113 size_t *count, 1114 u_longlong_t *skip, 1115 u_longlong_t *size) 1116 { 1117 ms_unit_t *un; 1118 int row; 1119 struct ms_row *mdr; 1120 int cmpcount = 0; 1121 1122 un = MD_UNIT(md_getminor(dev)); 1123 1124 for (row = 0; row < un->un_nrows; row++) { 1125 mdr = &un->un_row[row]; 1126 if ((mdr->un_ncomp + cmpcount) > ci) 1127 break; 1128 cmpcount += mdr->un_ncomp; 1129 } 1130 ASSERT(row != un->un_nrows); 1131 1132 /* 1133 * Concatenations are always contiguous blocks, 1134 * you cannot depend on the interlace being a usable 1135 * value (except for stripes). 1136 */ 1137 if (mdr->un_ncomp == 1) { /* Concats */ 1138 *block = mdr->un_cum_blocks - mdr->un_blocks; 1139 *count = 1; 1140 *skip = 0; 1141 *size = mdr->un_blocks; 1142 } else { /* Stripes */ 1143 *block = (mdr->un_cum_blocks - mdr->un_blocks) + 1144 ((ci - cmpcount) * mdr->un_interlace); 1145 *count = (size_t)(mdr->un_blocks / (mdr->un_interlace 1146 * mdr->un_ncomp)); 1147 *skip = (mdr->un_interlace * mdr->un_ncomp) - mdr->un_interlace; 1148 *size = mdr->un_interlace; 1149 } 1150 1151 return (0); 1152 } 1153 1154 /*ARGSUSED*/ 1155 static intptr_t 1156 stripe_shared_by_indx(md_dev64_t dev, void *junk, int indx) 1157 { 1158 ms_unit_t *un; 1159 ms_comp_t *comp; 1160 1161 un = MD_UNIT(md_getminor(dev)); 1162 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1163 comp += indx; 1164 return ((intptr_t)&comp->un_mirror); 1165 } 1166 1167 /*ARGSUSED*/ 1168 intptr_t 1169 stripe_component_count(md_dev64_t dev, void *junk) 1170 { 1171 /* 1172 * See comments for stripe_get_dev 1173 */ 1174 1175 ms_unit_t *un; 1176 int count = 0; 1177 int row; 1178 1179 un = MD_UNIT(md_getminor(dev)); 1180 for (row = 0; row < un->un_nrows; row++) 1181 count += un->un_row[row].un_ncomp; 1182 return (count); 1183 } 1184 1185 /*ARGSUSED*/ 1186 intptr_t 1187 stripe_get_dev(md_dev64_t dev, void *junk, int indx, ms_cd_info_t *cd) 1188 { 1189 /* 1190 * It should be noted that stripe_replace in stripe_ioctl.c calls this 1191 * routine using makedevice(0, minor) for the first argument. 1192 * 1193 * If this routine at some point in the future needs to use the major 1194 * number stripe_replace must be changed. 1195 */ 1196 1197 ms_unit_t *un; 1198 ms_comp_t *comp; 1199 md_dev64_t tmpdev; 1200 1201 un = MD_UNIT(md_getminor(dev)); 1202 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1203 comp += indx; 1204 tmpdev = comp->un_dev; 1205 /* 1206 * Try to resolve devt again if NODEV64 1207 * Check if this comp is hotspared and if it is 1208 * then use key for hotspare 1209 */ 1210 if (tmpdev == NODEV64) { 1211 tmpdev = md_resolve_bydevid(md_getminor(dev), tmpdev, 1212 comp->un_mirror.ms_hs_id ? 1213 comp->un_mirror.ms_hs_key : 1214 comp->un_key); 1215 comp->un_dev = tmpdev; 1216 } 1217 1218 cd->cd_dev = comp->un_dev; 1219 cd->cd_orig_dev = comp->un_mirror.ms_orig_dev; 1220 return (0); 1221 } 1222 1223 /*ARGSUSED*/ 1224 void 1225 stripe_replace_done(md_dev64_t dev, sv_dev_t *sv) 1226 { 1227 /* 1228 * See comments for stripe_get_dev 1229 */ 1230 1231 minor_t mnum = md_getminor(dev); 1232 1233 if (sv != NULL) { 1234 md_rem_names(sv, 1); 1235 kmem_free(sv, sizeof (sv_dev_t)); 1236 } 1237 1238 md_unit_writerexit(MDI_UNIT(mnum)); 1239 } 1240 1241 /*ARGSUSED*/ 1242 intptr_t 1243 stripe_replace_dev(md_dev64_t dev, void *junk, int ci, ms_new_dev_t *nd, 1244 mddb_recid_t *recids, int nrecids, void (**replace_done)(), 1245 void **replace_data) 1246 { 1247 minor_t mnum; 1248 ms_unit_t *un; 1249 mdi_unit_t *ui; 1250 ms_comp_t *comp; 1251 diskaddr_t dev_size; 1252 int row; 1253 int ncomps = 0; 1254 int cmpcount = 0; 1255 int rid = 0; 1256 struct ms_row *mdr; 1257 sv_dev_t *sv = NULL; 1258 mddb_recid_t hs_id = 0; 1259 set_t setno; 1260 side_t side; 1261 md_dev64_t this_dev; 1262 1263 mnum = md_getminor(dev); 1264 ui = MDI_UNIT(mnum); 1265 setno = MD_MIN2SET(mnum); 1266 side = mddb_getsidenum(setno); 1267 1268 un = md_unit_writerlock(ui); 1269 1270 *replace_data = NULL; 1271 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1272 1273 comp += ci; 1274 1275 /* 1276 * Count the number of components 1277 */ 1278 for (row = 0; row < un->un_nrows; row++) { 1279 struct ms_row *mdr = &un->un_row[row]; 1280 ncomps += mdr->un_ncomp; 1281 } 1282 1283 recids[0] = 0; 1284 /* 1285 * No need of checking size of new device, 1286 * when hotsparing (it has already been done), or 1287 * when enabling the device. 1288 */ 1289 if ((nd != NULL) && (nd->nd_hs_id == 0)) { 1290 for (row = 0; row < un->un_nrows; row++) { 1291 mdr = &un->un_row[row]; 1292 if ((mdr->un_ncomp + cmpcount) > ci) 1293 break; 1294 cmpcount += mdr->un_ncomp; 1295 } 1296 ASSERT(row != un->un_nrows); 1297 1298 /* Concatenations have a ncomp = 1 */ 1299 dev_size = mdr->un_blocks / mdr->un_ncomp; 1300 1301 /* 1302 * now check to see if new comp can be used in 1303 * place of old comp 1304 */ 1305 if ((un->c.un_flag & MD_LABELED) && (ci == 0) && 1306 nd->nd_labeled) 1307 nd->nd_start_blk = 0; 1308 else 1309 nd->nd_nblks -= nd->nd_start_blk; 1310 1311 if (dev_size > nd->nd_nblks) { 1312 md_unit_writerexit(ui); 1313 return (MDE_COMP_TOO_SMALL); 1314 } 1315 1316 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP); 1317 sv->setno = MD_MIN2SET(mnum); 1318 sv->key = comp->un_key; 1319 } 1320 1321 /* 1322 * Close this component. 1323 */ 1324 if (comp->un_mirror.ms_flags & MDM_S_ISOPEN) { 1325 md_layered_close(comp->un_dev, MD_OFLG_NULL); 1326 comp->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 1327 } 1328 1329 /* 1330 * If the component is hotspared, return to the pool. 1331 */ 1332 if (comp->un_mirror.ms_hs_id != 0) { 1333 hs_cmds_t cmd; 1334 mdkey_t hs_key; 1335 1336 hs_key = comp->un_mirror.ms_hs_key; 1337 comp->un_dev = comp->un_mirror.ms_orig_dev; 1338 comp->un_start_block = comp->un_mirror.ms_orig_blk; 1339 comp->un_mirror.ms_hs_key = 0; 1340 comp->un_mirror.ms_hs_id = 0; 1341 comp->un_mirror.ms_orig_dev = 0; 1342 1343 cmd = HS_FREE; 1344 if ((comp->un_mirror.ms_state != CS_OKAY) && 1345 (comp->un_mirror.ms_state != CS_RESYNC)) 1346 cmd = HS_BAD; 1347 (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, &hs_id, 1348 &hs_key, NULL, NULL); 1349 } 1350 1351 /* 1352 * Open by device id; for enable (indicated by a NULL 1353 * nd pointer), use the existing component info. For 1354 * replace, use the new device. 1355 */ 1356 if (nd == NULL) { 1357 this_dev = md_resolve_bydevid(mnum, comp->un_dev, comp->un_key); 1358 /* 1359 * If someone replaced a new disk in the same slot 1360 * we get NODEV64 since old device id cannot be 1361 * resolved. The new devt is obtained from the 1362 * mddb since devt is going to be unchanged for the 1363 * enable case. No need to check for multiple 1364 * keys here because the caller (comp_replace) 1365 * has already sanity checked it for us. 1366 */ 1367 if (this_dev == NODEV64) { 1368 this_dev = md_getdevnum(setno, side, comp->un_key, 1369 MD_TRUST_DEVT); 1370 } 1371 } else { 1372 /* 1373 * If this is a hotspare, save the original dev_t for later 1374 * use. If this has occured during boot then the value of 1375 * comp->un_dev will be NODEV64 because of the failure to look 1376 * up the devid of the device. 1377 */ 1378 if (nd->nd_hs_id != 0) 1379 comp->un_mirror.ms_orig_dev = comp->un_dev; 1380 this_dev = md_resolve_bydevid(mnum, nd->nd_dev, nd->nd_key); 1381 } 1382 1383 comp->un_dev = this_dev; 1384 1385 /* 1386 * Now open the new device if required. Note for a single component 1387 * stripe it will not be open - leave this for the mirror driver to 1388 * deal with. 1389 */ 1390 if (md_unit_isopen(ui)) { 1391 if (md_layered_open(mnum, &this_dev, MD_OFLG_NULL)) { 1392 mddb_recid_t ids[3]; 1393 1394 ids[0] = un->c.un_record_id; 1395 ids[1] = hs_id; 1396 ids[2] = 0; 1397 mddb_commitrecs_wrapper(ids); 1398 if ((nd != NULL) && (nd->nd_hs_id != 0)) { 1399 /* 1400 * Revert back to the original device. 1401 */ 1402 comp->un_dev = comp->un_mirror.ms_orig_dev; 1403 1404 cmn_err(CE_WARN, 1405 "md: %s: open error of hotspare %s", 1406 md_shortname(mnum), 1407 md_devname(MD_MIN2SET(mnum), nd->nd_dev, 1408 NULL, 0)); 1409 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, 1410 SVM_TAG_HS, MD_MIN2SET(mnum), nd->nd_dev); 1411 } 1412 md_unit_writerexit(ui); 1413 return (MDE_COMP_OPEN_ERR); 1414 } 1415 if (nd != NULL) 1416 nd->nd_dev = this_dev; 1417 1418 comp->un_mirror.ms_flags |= MDM_S_ISOPEN; 1419 } 1420 1421 if (nd == NULL) { 1422 recids[0] = un->c.un_record_id; 1423 recids[1] = hs_id; 1424 recids[2] = 0; 1425 *replace_done = stripe_replace_done; 1426 return (0); 1427 } 1428 1429 /* if hot sparing this device */ 1430 if (nd->nd_hs_id != 0) { 1431 char devname[MD_MAX_CTDLEN]; 1432 char hs_devname[MD_MAX_CTDLEN]; 1433 set_t setno; 1434 1435 comp->un_mirror.ms_hs_id = nd->nd_hs_id; 1436 comp->un_mirror.ms_hs_key = nd->nd_key; 1437 1438 comp->un_mirror.ms_orig_blk = comp->un_start_block; 1439 1440 setno = MD_MIN2SET(mnum); 1441 1442 (void) md_devname(setno, comp->un_mirror.ms_orig_dev, devname, 1443 sizeof (devname)); 1444 (void) md_devname(setno, nd->nd_dev, hs_devname, 1445 sizeof (hs_devname)); 1446 1447 cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", 1448 md_shortname(mnum), devname, hs_devname); 1449 1450 } else { /* replacing the device */ 1451 comp->un_key = nd->nd_key; 1452 *replace_data = (void *)sv; 1453 1454 /* 1455 * For the old device, make sure to reset the parent 1456 * if it's a metadevice. 1457 */ 1458 if (md_getmajor(comp->un_dev) == md_major) { 1459 minor_t comp_mnum = md_getminor(comp->un_dev); 1460 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1461 1462 md_reset_parent(comp->un_dev); 1463 recids[rid++] = MD_RECID(comp_un); 1464 } 1465 } 1466 1467 comp->un_dev = nd->nd_dev; 1468 comp->un_start_block = nd->nd_start_blk; 1469 1470 /* 1471 * For the new device, make sure to set the parent if it's a 1472 * metadevice. 1473 * 1474 * If we ever support using metadevices as hot spares, this 1475 * will need to be tested, and possibly moved into the 1476 * preceding "else" clause, immediately following the parent 1477 * reset block. For now, it's convenient to leave it here and 1478 * only compress nd->nd_dev once. 1479 */ 1480 if (md_getmajor(comp->un_dev) == md_major) { 1481 minor_t comp_mnum = md_getminor(comp->un_dev); 1482 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1483 1484 md_set_parent(comp->un_dev, MD_SID(un)); 1485 recids[rid++] = MD_RECID(comp_un); 1486 } 1487 1488 recids[rid++] = un->c.un_record_id; 1489 recids[rid++] = hs_id; 1490 recids[rid] = 0; 1491 *replace_done = stripe_replace_done; 1492 return (0); 1493 } 1494 1495 /*ARGSUSED*/ 1496 static intptr_t 1497 stripe_hotspare_dev( 1498 md_dev64_t dev, 1499 void *junk, 1500 int ci, 1501 mddb_recid_t *recids, 1502 int nrecids, 1503 void (**replace_done)(), 1504 void **replace_data) 1505 { 1506 ms_unit_t *un; 1507 mdi_unit_t *ui; 1508 ms_comp_t *comp; 1509 int row; 1510 struct ms_row *mdr; 1511 ms_new_dev_t nd; 1512 int err; 1513 int i; 1514 minor_t mnum; 1515 set_t setno; 1516 int cmpcount = 0; 1517 1518 mnum = md_getminor(dev); 1519 ui = MDI_UNIT(mnum); 1520 un = MD_UNIT(mnum); 1521 setno = MD_MIN2SET(mnum); 1522 1523 if (md_get_setstatus(setno) & MD_SET_STALE) 1524 return (1); 1525 1526 if (un->un_hsp_id == -1) 1527 return (1); 1528 1529 for (row = 0; row < un->un_nrows; row++) { 1530 mdr = &un->un_row[row]; 1531 if ((mdr->un_ncomp + cmpcount) > ci) 1532 break; 1533 cmpcount += mdr->un_ncomp; 1534 } 1535 ASSERT(row != un->un_nrows); 1536 1537 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1538 comp += ci; 1539 /* Concatenations have a ncomp = 1 */ 1540 nd.nd_nblks = mdr->un_blocks / mdr->un_ncomp; 1541 1542 if ((un->c.un_flag & MD_LABELED) && (ci == 0)) 1543 nd.nd_labeled = 1; 1544 else 1545 nd.nd_labeled = 0; 1546 1547 again: 1548 err = md_hot_spare_ifc(HS_GET, un->un_hsp_id, nd.nd_nblks, 1549 nd.nd_labeled, &nd.nd_hs_id, &nd.nd_key, &nd.nd_dev, 1550 &nd.nd_start_blk); 1551 1552 if (err) { 1553 if (!stripe_replace_dev(dev, junk, ci, NULL, recids, nrecids, 1554 replace_done, replace_data)) { 1555 mddb_commitrecs_wrapper(recids); 1556 md_unit_writerexit(ui); 1557 } 1558 recids[0] = 0; 1559 return (1); 1560 } 1561 1562 if (stripe_replace_dev(dev, junk, ci, &nd, recids, nrecids, 1563 replace_done, replace_data)) { 1564 1565 (void) md_hot_spare_ifc(HS_BAD, un->un_hsp_id, 0, 0, 1566 &nd.nd_hs_id, &nd.nd_key, NULL, NULL); 1567 mddb_commitrec_wrapper(nd.nd_hs_id); 1568 goto again; 1569 } 1570 1571 /* Leave a slot for the null recid */ 1572 for (i = 0; i < (nrecids - 1); i++) { 1573 if (recids[i] == 0) { 1574 recids[i++] = nd.nd_hs_id; 1575 recids[i] = 0; 1576 } 1577 } 1578 return (0); 1579 } 1580 1581 static int 1582 stripe_imp_set( 1583 set_t setno 1584 ) 1585 { 1586 1587 mddb_recid_t recid; 1588 int i, row, c, gotsomething; 1589 mddb_type_t typ1; 1590 mddb_de_ic_t *dep; 1591 mddb_rb32_t *rbp; 1592 ms_unit32_od_t *un32; 1593 ms_unit_t *un64; 1594 minor_t *self_id; /* minor needs to be updated */ 1595 md_parent_t *parent_id; /* parent needs to be updated */ 1596 mddb_recid_t *record_id; /* record id needs to be updated */ 1597 mddb_recid_t *hsp_id; 1598 ms_comp32_od_t *comp32; 1599 ms_comp_t *comp64; 1600 1601 1602 gotsomething = 0; 1603 1604 typ1 = (mddb_type_t)md_getshared_key(setno, 1605 stripe_md_ops.md_driver.md_drivername); 1606 recid = mddb_makerecid(setno, 0); 1607 1608 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 1609 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1610 continue; 1611 1612 dep = mddb_getrecdep(recid); 1613 rbp = dep->de_rb; 1614 1615 if (rbp->rb_revision == MDDB_REV_RB) { 1616 /* 1617 * Small device 1618 */ 1619 un32 = (ms_unit32_od_t *)mddb_getrecaddr(recid); 1620 self_id = &(un32->c.un_self_id); 1621 parent_id = &(un32->c.un_parent); 1622 record_id = &(un32->c.un_record_id); 1623 hsp_id = &(un32->un_hsp_id); 1624 1625 comp32 = (ms_comp32_od_t *)((void *)&((char *)un32) 1626 [un32->un_ocomp]); 1627 for (row = 0; row < un32->un_nrows; row++) { 1628 struct ms_row32_od *mdr = &un32->un_row[row]; 1629 for (i = 0, c = mdr->un_icomp; 1630 i < mdr->un_ncomp; i++) { 1631 ms_comp32_od_t *mdc; 1632 mdc = &comp32[c++]; 1633 1634 if (!md_update_minor(setno, mddb_getsidenum 1635 (setno), mdc->un_key)) 1636 goto out; 1637 1638 if (mdc->un_mirror.ms_hs_id != 0) 1639 mdc->un_mirror.ms_hs_id = MAKERECID( 1640 setno, mdc->un_mirror.ms_hs_id); 1641 } 1642 } 1643 } else { 1644 un64 = (ms_unit_t *)mddb_getrecaddr(recid); 1645 self_id = &(un64->c.un_self_id); 1646 parent_id = &(un64->c.un_parent); 1647 record_id = &(un64->c.un_record_id); 1648 hsp_id = &(un64->un_hsp_id); 1649 1650 comp64 = (ms_comp_t *)((void *)&((char *)un64) 1651 [un64->un_ocomp]); 1652 for (row = 0; row < un64->un_nrows; row++) { 1653 struct ms_row *mdr = &un64->un_row[row]; 1654 for (i = 0, c = mdr->un_icomp; 1655 i < mdr->un_ncomp; i++) { 1656 ms_comp_t *mdc; 1657 mdc = &comp64[c++]; 1658 1659 if (!md_update_minor(setno, mddb_getsidenum 1660 (setno), mdc->un_key)) 1661 goto out; 1662 1663 if (mdc->un_mirror.ms_hs_id != 0) 1664 mdc->un_mirror.ms_hs_id = MAKERECID( 1665 setno, mdc->un_mirror.ms_hs_id); 1666 } 1667 } 1668 } 1669 1670 /* 1671 * Update unit with the imported setno 1672 * 1673 */ 1674 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1675 1676 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1677 1678 if (*hsp_id != -1) 1679 *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 1680 1681 if (*parent_id != MD_NO_PARENT) 1682 *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 1683 *record_id = MAKERECID(setno, DBID(*record_id)); 1684 1685 gotsomething = 1; 1686 } 1687 1688 out: 1689 return (gotsomething); 1690 } 1691 1692 static md_named_services_t stripe_named_services[] = { 1693 {stripe_shared_by_blk, "shared by blk" }, 1694 {stripe_shared_by_indx, "shared by indx" }, 1695 {stripe_component_count, "get component count" }, 1696 {stripe_block_count_skip_size, "get block count skip size" }, 1697 {stripe_get_dev, "get device" }, 1698 {stripe_replace_dev, "replace device" }, 1699 {stripe_hotspare_dev, "hotspare device" }, 1700 {stripe_rename_check, MDRNM_CHECK }, 1701 {NULL, 0} 1702 }; 1703 1704 md_ops_t stripe_md_ops = { 1705 stripe_open, /* open */ 1706 stripe_close, /* close */ 1707 md_stripe_strategy, /* strategy */ 1708 NULL, /* print */ 1709 stripe_dump, /* dump */ 1710 NULL, /* read */ 1711 NULL, /* write */ 1712 md_stripe_ioctl, /* stripe_ioctl, */ 1713 stripe_snarf, /* stripe_snarf */ 1714 stripe_halt, /* stripe_halt */ 1715 NULL, /* aread */ 1716 NULL, /* awrite */ 1717 stripe_imp_set, /* import set */ 1718 stripe_named_services 1719 }; 1720 1721 static void 1722 init_init() 1723 { 1724 md_stripe_mcs_buf_off = sizeof (md_scs_t) - sizeof (buf_t); 1725 1726 stripe_parent_cache = kmem_cache_create("md_stripe_parent", 1727 sizeof (md_sps_t), 0, stripe_parent_constructor, 1728 stripe_parent_destructor, stripe_run_queue, NULL, NULL, 1729 0); 1730 stripe_child_cache = kmem_cache_create("md_stripe_child", 1731 sizeof (md_scs_t) - sizeof (buf_t) + biosize(), 0, 1732 stripe_child_constructor, stripe_child_destructor, 1733 stripe_run_queue, NULL, NULL, 0); 1734 } 1735 1736 static void 1737 fini_uninit() 1738 { 1739 kmem_cache_destroy(stripe_parent_cache); 1740 kmem_cache_destroy(stripe_child_cache); 1741 stripe_parent_cache = stripe_child_cache = NULL; 1742 } 1743 1744 /* define the module linkage */ 1745 MD_PLUGIN_MISC_MODULE("stripes module %I%", init_init(), fini_uninit()) 1746