1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/file.h> 31 #include <sys/user.h> 32 #include <sys/uio.h> 33 #include <sys/t_lock.h> 34 #include <sys/buf.h> 35 #include <sys/dkio.h> 36 #include <sys/vtoc.h> 37 #include <sys/kmem.h> 38 #include <vm/page.h> 39 #include <sys/cmn_err.h> 40 #include <sys/sysmacros.h> 41 #include <sys/types.h> 42 #include <sys/mkdev.h> 43 #include <sys/stat.h> 44 #include <sys/open.h> 45 #include <sys/lvm/mdio.h> 46 #include <sys/lvm/mdvar.h> 47 #include <sys/lvm/md_stripe.h> 48 #include <sys/lvm/md_convert.h> 49 #include <sys/lvm/md_notify.h> 50 #include <sys/modctl.h> 51 #include <sys/ddi.h> 52 #include <sys/sunddi.h> 53 #include <sys/debug.h> 54 #include <sys/sysevent/eventdefs.h> 55 #include <sys/sysevent/svm.h> 56 57 md_ops_t stripe_md_ops; 58 #ifndef lint 59 char _depends_on[] = "drv/md"; 60 md_ops_t *md_interface_ops = &stripe_md_ops; 61 #endif 62 63 extern unit_t md_nunits; 64 extern set_t md_nsets; 65 extern md_set_t md_set[]; 66 67 extern kmutex_t md_mx; 68 extern kcondvar_t md_cv; 69 70 extern int md_status; 71 extern major_t md_major; 72 extern mdq_anchor_t md_done_daemon; 73 74 static int md_stripe_mcs_buf_off; 75 static kmem_cache_t *stripe_parent_cache = NULL; 76 static kmem_cache_t *stripe_child_cache = NULL; 77 78 /*ARGSUSED1*/ 79 static int 80 stripe_parent_constructor(void *p, void *d1, int d2) 81 { 82 mutex_init(&((md_sps_t *)p)->ps_mx, 83 NULL, MUTEX_DEFAULT, NULL); 84 return (0); 85 } 86 87 static void 88 stripe_parent_init(void *ps) 89 { 90 bzero(ps, offsetof(md_sps_t, ps_mx)); 91 } 92 93 /*ARGSUSED1*/ 94 static void 95 stripe_parent_destructor(void *p, void *d) 96 { 97 mutex_destroy(&((md_sps_t *)p)->ps_mx); 98 } 99 100 /*ARGSUSED1*/ 101 static int 102 stripe_child_constructor(void *p, void *d1, int d2) 103 { 104 bioinit(&((md_scs_t *)p)->cs_buf); 105 return (0); 106 } 107 108 static void 109 stripe_child_init(md_scs_t *cs) 110 { 111 cs->cs_mdunit = 0; 112 cs->cs_ps = NULL; 113 cs->cs_comp = NULL; 114 md_bioreset(&cs->cs_buf); 115 } 116 117 /*ARGSUSED1*/ 118 static void 119 stripe_child_destructor(void *p, void *d) 120 { 121 biofini(&((md_scs_t *)p)->cs_buf); 122 } 123 124 /*ARGSUSED*/ 125 static void 126 stripe_run_queue(void *d) 127 { 128 if (!(md_status & MD_GBL_DAEMONS_LIVE)) 129 md_daemon(1, &md_done_daemon); 130 } 131 132 static void 133 stripe_close_all_devs(ms_unit_t *un, int md_cflags) 134 { 135 int row; 136 int i; 137 int c; 138 struct ms_comp *mdcomp; 139 140 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 141 for (row = 0; row < un->un_nrows; row++) { 142 struct ms_row *mdr = &un->un_row[row]; 143 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 144 struct ms_comp *mdc; 145 mdc = &mdcomp[c++]; 146 if (md_cflags & MD_OFLG_PROBEDEV) { 147 148 /* 149 * It is possible that the md_layered_open 150 * failed because the stripe unit structure 151 * contained a NODEV. In such a case since 152 * there is nothing to open, there is nothing 153 * to close. 154 */ 155 if (mdc->un_dev == NODEV64) 156 continue; 157 } 158 if ((md_cflags & MD_OFLG_PROBEDEV) && 159 (mdc->un_mirror.ms_flags & MDM_S_PROBEOPEN)) { 160 md_layered_close(mdc->un_dev, 161 md_cflags); 162 mdc->un_mirror.ms_flags &= ~MDM_S_PROBEOPEN; 163 } else if (mdc->un_mirror.ms_flags & MDM_S_ISOPEN) { 164 md_layered_close(mdc->un_dev, md_cflags); 165 mdc->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 166 } 167 } 168 } 169 } 170 171 static int 172 stripe_open_all_devs(ms_unit_t *un, int md_oflags) 173 { 174 minor_t mnum = MD_SID(un); 175 int row; 176 int i; 177 int c; 178 struct ms_comp *mdcomp; 179 int err; 180 int cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS); 181 int probe_err_cnt = 0; 182 int total_comp_cnt = 0; 183 set_t setno = MD_MIN2SET(MD_SID(un)); 184 side_t side = mddb_getsidenum(setno); 185 mdkey_t key; 186 187 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 188 189 /* 190 * For a probe call, if any component of a stripe or a concat 191 * can be opened, it is considered to be a success. The total number 192 * of components in a stripe are computed prior to starting a probe. 193 * This number is then compared against the number of components 194 * that could be be successfully opened. If none of the components 195 * in a stripe can be opened, only then an ENXIO is returned for a 196 * probe type open. 197 */ 198 199 for (row = 0; row < un->un_nrows; row++) { 200 struct ms_row *mdr = &un->un_row[row]; 201 202 if (md_oflags & MD_OFLG_PROBEDEV) 203 total_comp_cnt += mdr->un_ncomp; 204 205 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 206 struct ms_comp *mdc; 207 md_dev64_t tmpdev; 208 209 mdc = &mdcomp[c++]; 210 tmpdev = mdc->un_dev; 211 /* 212 * Do the open by device id 213 * Check if this comp is hotspared and 214 * if it is then use the key for hotspare. 215 * MN disksets don't use devids, so we better don't use 216 * md_devid_found/md_resolve_bydevid there. Rather do, 217 * what's done in stripe_build_incore() 218 */ 219 if (MD_MNSET_SETNO(setno)) { 220 if (mdc->un_mirror.ms_hs_id != 0) { 221 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 222 0, &mdc->un_mirror.ms_hs_id, NULL, 223 &tmpdev, NULL); 224 } 225 } else { 226 key = mdc->un_mirror.ms_hs_id ? 227 mdc->un_mirror.ms_hs_key : mdc->un_key; 228 if ((md_getmajor(tmpdev) != md_major) && 229 md_devid_found(setno, side, key) == 1) { 230 tmpdev = md_resolve_bydevid(mnum, 231 tmpdev, key); 232 } 233 } 234 235 /* 236 * For a submirror, we only want to open those devices 237 * that are not errored. If the device is errored then 238 * then there is no reason to open it and leaving it 239 * closed allows the RCM/DR code to work so that the 240 * errored device can be replaced. 241 */ 242 if ((md_oflags & MD_OFLG_PROBEDEV) || 243 ! (mdc->un_mirror.ms_state & CS_ERRED)) { 244 245 err = md_layered_open(mnum, &tmpdev, md_oflags); 246 } else { 247 err = ENXIO; 248 } 249 250 /* 251 * Only set the un_dev if the tmpdev != NODEV64. If 252 * it is NODEV64 then the md_layered_open() will have 253 * failed in some manner. 254 */ 255 if (tmpdev != NODEV64) 256 mdc->un_dev = tmpdev; 257 258 if (err) { 259 if (!cont_on_errors) { 260 stripe_close_all_devs(un, md_oflags); 261 return (ENXIO); 262 } 263 264 if (md_oflags & MD_OFLG_PROBEDEV) 265 probe_err_cnt++; 266 } else { 267 if (md_oflags & MD_OFLG_PROBEDEV) { 268 mdc->un_mirror.ms_flags |= 269 MDM_S_PROBEOPEN; 270 } else 271 mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; 272 } 273 } 274 } 275 276 /* If every component in a stripe could not be opened fail */ 277 if ((md_oflags & MD_OFLG_PROBEDEV) && 278 (probe_err_cnt == total_comp_cnt)) 279 return (ENXIO); 280 else 281 return (0); 282 } 283 284 int 285 stripe_build_incore(void *p, int snarfing) 286 { 287 ms_unit_t *un = (ms_unit_t *)p; 288 struct ms_comp *mdcomp; 289 minor_t mnum; 290 int row; 291 int i; 292 int c; 293 int ncomps; 294 295 mnum = MD_SID(un); 296 297 if (MD_UNIT(mnum) != NULL) 298 return (0); 299 300 MD_STATUS(un) = 0; 301 302 /* 303 * Reset all the is_open flags, these are probably set 304 * cause they just came out of the database. 305 */ 306 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 307 308 ncomps = 0; 309 for (row = 0; row < un->un_nrows; row++) { 310 struct ms_row *mdr = &un->un_row[row]; 311 ncomps += mdr->un_ncomp; 312 } 313 314 for (row = 0; row < un->un_nrows; row++) { 315 struct ms_row *mdr = &un->un_row[row]; 316 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 317 struct ms_comp *mdc; 318 set_t setno; 319 md_dev64_t tmpdev; 320 321 mdc = &mdcomp[c++]; 322 mdc->un_mirror.ms_flags &= 323 ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED); 324 325 if (!snarfing) 326 continue; 327 328 setno = MD_MIN2SET(mnum); 329 330 tmpdev = md_getdevnum(setno, mddb_getsidenum(setno), 331 mdc->un_key, MD_NOTRUST_DEVT); 332 mdc->un_dev = tmpdev; 333 /* 334 * Check for hotspares. If the hotspares haven't been 335 * snarfed yet, stripe_open_all_devs() will do the 336 * remapping of the dev's later. 337 */ 338 if (mdc->un_mirror.ms_hs_id != 0) { 339 mdc->un_mirror.ms_orig_dev = mdc->un_dev; 340 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 341 0, &mdc->un_mirror.ms_hs_id, NULL, 342 &tmpdev, NULL); 343 mdc->un_dev = tmpdev; 344 } 345 } 346 } 347 348 /* place various information in the in-core data structures */ 349 md_nblocks_set(mnum, un->c.un_total_blocks); 350 MD_UNIT(mnum) = un; 351 352 return (0); 353 } 354 355 void 356 reset_stripe(ms_unit_t *un, minor_t mnum, int removing) 357 { 358 ms_comp_t *mdcomp; 359 struct ms_row *mdr; 360 int i, c; 361 int row; 362 int nsv; 363 int isv; 364 sv_dev_t *sv; 365 mddb_recid_t *recids; 366 mddb_recid_t vtoc_id; 367 int rid = 0; 368 369 md_destroy_unit_incore(mnum, &stripe_md_ops); 370 371 md_nblocks_set(mnum, -1ULL); 372 MD_UNIT(mnum) = NULL; 373 374 /* 375 * Attempt release of its minor node 376 */ 377 md_remove_minor_node(mnum); 378 379 if (!removing) 380 return; 381 382 nsv = 0; 383 /* Count the number of devices */ 384 for (row = 0; row < un->un_nrows; row++) { 385 mdr = &un->un_row[row]; 386 nsv += mdr->un_ncomp; 387 } 388 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); 389 390 /* 391 * allocate recids array. since we may have to commit 392 * underlying soft partition records, we need an array 393 * of size: total number of components in stripe + 3 394 * (one for the stripe itself, one for the hotspare, one 395 * for the end marker). 396 */ 397 recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); 398 399 /* 400 * Save the md_dev64_t's and driver nm indexes. 401 * Because after the mddb_deleterec() we will 402 * not be able to access the unit structure. 403 * 404 * NOTE: Deleting the names before deleting the 405 * unit structure would cause problems if 406 * the machine crashed in between the two. 407 */ 408 isv = 0; 409 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 410 411 for (row = 0; row < un->un_nrows; row++) { 412 mdr = &un->un_row[row]; 413 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 414 struct ms_comp *mdc; 415 md_dev64_t child_dev; 416 md_unit_t *child_un; 417 418 mdc = &mdcomp[c++]; 419 if (mdc->un_mirror.ms_hs_id != 0) { 420 mdkey_t hs_key; 421 422 hs_key = mdc->un_mirror.ms_hs_key; 423 424 mdc->un_dev = mdc->un_mirror.ms_orig_dev; 425 mdc->un_start_block = 426 mdc->un_mirror.ms_orig_blk; 427 mdc->un_mirror.ms_hs_id = 0; 428 mdc->un_mirror.ms_hs_key = 0; 429 mdc->un_mirror.ms_orig_dev = 0; 430 recids[0] = 0; 431 recids[1] = 0; /* recids[1] filled in below */ 432 recids[2] = 0; 433 (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, 434 0, 0, &recids[0], &hs_key, NULL, NULL); 435 mddb_commitrecs_wrapper(recids); 436 } 437 438 /* 439 * check if we've got metadevice below us and 440 * deparent it if we do. 441 * NOTE: currently soft partitions are the 442 * the only metadevices stripes can be 443 * built on top of. 444 */ 445 child_dev = mdc->un_dev; 446 if (md_getmajor(child_dev) == md_major) { 447 child_un = MD_UNIT(md_getminor(child_dev)); 448 md_reset_parent(child_dev); 449 recids[rid++] = MD_RECID(child_un); 450 } 451 452 sv[isv].setno = MD_MIN2SET(mnum); 453 sv[isv++].key = mdc->un_key; 454 } 455 } 456 457 recids[rid++] = un->c.un_record_id; 458 recids[rid] = 0; /* filled in below */ 459 460 /* 461 * Decrement the HSP reference count and 462 * remove the knowledge of the HSP from the unit struct. 463 * This is done atomically to remove a window. 464 */ 465 if (un->un_hsp_id != -1) { 466 (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 467 &recids[rid++], NULL, NULL, NULL); 468 un->un_hsp_id = -1; 469 } 470 471 /* set end marker and commit records */ 472 recids[rid] = 0; 473 mddb_commitrecs_wrapper(recids); 474 475 vtoc_id = un->c.un_vtoc_id; 476 477 /* 478 * Remove self from the namespace 479 */ 480 if (un->c.un_revision & MD_FN_META_DEV) { 481 (void) md_rem_selfname(un->c.un_self_id); 482 } 483 484 /* Remove the unit structure */ 485 mddb_deleterec_wrapper(un->c.un_record_id); 486 487 /* Remove the vtoc, if present */ 488 if (vtoc_id) 489 mddb_deleterec_wrapper(vtoc_id); 490 491 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 492 MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); 493 md_rem_names(sv, nsv); 494 kmem_free(sv, sizeof (sv_dev_t) * nsv); 495 kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); 496 } 497 498 static void 499 stripe_error(md_sps_t *ps) 500 { 501 struct buf *pb = ps->ps_bp; 502 mdi_unit_t *ui = ps->ps_ui; 503 md_dev64_t dev = ps->ps_errcomp->un_dev; 504 md_dev64_t md_dev = md_expldev(pb->b_edev); 505 char *str; 506 507 if (pb->b_flags & B_READ) { 508 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_READERR; 509 str = "read"; 510 } else { 511 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_WRTERR; 512 str = "write"; 513 } 514 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 515 if (MUTEX_HELD(&ps->ps_mx)) { 516 mutex_exit(&ps->ps_mx); 517 } 518 } else { 519 ASSERT(panicstr); 520 } 521 SPS_FREE(stripe_parent_cache, ps); 522 pb->b_flags |= B_ERROR; 523 524 md_kstat_done(ui, pb, 0); 525 md_unit_readerexit(ui); 526 md_biodone(pb); 527 528 cmn_err(CE_WARN, "md: %s: %s error on %s", 529 md_shortname(md_getminor(md_dev)), str, 530 md_devname(MD_DEV2SET(md_dev), dev, NULL, 0)); 531 } 532 533 static int 534 stripe_done(struct buf *cb) 535 { 536 struct buf *pb; 537 mdi_unit_t *ui; 538 md_sps_t *ps; 539 md_scs_t *cs; 540 541 /*LINTED*/ 542 cs = (md_scs_t *)((caddr_t)cb - md_stripe_mcs_buf_off); 543 ps = cs->cs_ps; 544 pb = ps->ps_bp; 545 546 mutex_enter(&ps->ps_mx); 547 if (cb->b_flags & B_ERROR) { 548 ps->ps_flags |= MD_SPS_ERROR; 549 pb->b_error = cb->b_error; 550 ps->ps_errcomp = cs->cs_comp; 551 } 552 553 if (cb->b_flags & B_REMAPPED) 554 bp_mapout(cb); 555 556 ps->ps_frags--; 557 if (ps->ps_frags != 0) { 558 mutex_exit(&ps->ps_mx); 559 kmem_cache_free(stripe_child_cache, cs); 560 return (1); 561 } 562 kmem_cache_free(stripe_child_cache, cs); 563 if (ps->ps_flags & MD_SPS_ERROR) { 564 stripe_error(ps); 565 return (1); 566 } 567 ui = ps->ps_ui; 568 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 569 mutex_exit(&ps->ps_mx); 570 } else { 571 ASSERT(panicstr); 572 } 573 SPS_FREE(stripe_parent_cache, ps); 574 md_kstat_done(ui, pb, 0); 575 md_unit_readerexit(ui); 576 md_biodone(pb); 577 return (0); 578 } 579 580 581 /* 582 * This routine does the mapping from virtual (dev, blkno) of a metapartition 583 * to the real (dev, blkno) of a real disk partition. 584 * It goes to the md_conf[] table to find out the correct real partition 585 * dev and block number for this buffer. 586 * 587 * A single buf request can not go across real disk partition boundary. 588 * When the virtual request specified by (dev, blkno) spans more than one 589 * real partition, md_mapbuf will return 1. Then the caller should prepare 590 * another real buf and continue calling md_mapbuf to do the mapping until 591 * it returns 0. 592 * 593 */ 594 595 static int 596 md_mapbuf( 597 ms_unit_t *un, 598 diskaddr_t blkno, 599 u_longlong_t bcount, 600 buf_t *bp, /* if bp==NULL, skip bp updates */ 601 ms_comp_t **mdc) /* if bp==NULL, skip mdc update */ 602 { 603 struct ms_row *mdr; 604 struct ms_comp *mdcomp; 605 diskaddr_t stripe_blk; 606 diskaddr_t fragment, blk_in_row, endblk; 607 offset_t interlace; 608 size_t dev_index; 609 int row_index, more; 610 extern unsigned md_maxphys; 611 /* Work var's when bp==NULL */ 612 u_longlong_t wb_bcount; 613 diskaddr_t wb_blkno; 614 md_dev64_t wb_edev; 615 ms_comp_t *wmdc; 616 617 /* 618 * Do a real calculation to derive the minor device of the 619 * Virtual Disk, which in turn will let us derive the 620 * device/minor of the underlying real device. 621 */ 622 623 624 for (row_index = 0; row_index < un->un_nrows; row_index++) { 625 mdr = &un->un_row[row_index]; 626 if (blkno < mdr->un_cum_blocks) 627 break; 628 } 629 ASSERT(row_index != un->un_nrows); 630 631 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 632 633 blk_in_row = blkno - mdr->un_cum_blocks + mdr->un_blocks; 634 endblk = (diskaddr_t)(blkno + howmany(bcount, DEV_BSIZE)); 635 if (mdr->un_ncomp == 1) { /* No striping */ 636 if (endblk > mdr->un_cum_blocks) { 637 wb_bcount = ldbtob(mdr->un_cum_blocks - blkno); 638 if ((row_index + 1) == un->un_nrows) 639 more = 0; 640 else 641 more = 1; 642 } else { 643 wb_bcount = bcount; 644 more = 0; 645 } 646 wmdc = &mdcomp[mdr->un_icomp]; 647 wb_blkno = blk_in_row; 648 } else { /* Have striping */ 649 interlace = mdr->un_interlace; 650 fragment = blk_in_row % interlace; 651 if (bcount > ldbtob(interlace - fragment)) { 652 more = 1; 653 wb_bcount = ldbtob(interlace - fragment); 654 } else { 655 more = 0; 656 wb_bcount = bcount; 657 } 658 659 stripe_blk = blk_in_row / interlace; 660 dev_index = (size_t)(stripe_blk % mdr->un_ncomp); 661 wmdc = &mdcomp[mdr->un_icomp + dev_index]; 662 wb_blkno = (diskaddr_t)(((stripe_blk / mdr->un_ncomp) * 663 interlace) + fragment); 664 } 665 666 wb_blkno += wmdc->un_start_block; 667 wb_edev = wmdc->un_dev; 668 669 /* only break up the I/O if we're not built on another metadevice */ 670 if ((md_getmajor(wb_edev) != md_major) && (wb_bcount > md_maxphys)) { 671 wb_bcount = md_maxphys; 672 more = 1; 673 } 674 if (bp != (buf_t *)NULL) { 675 /* 676 * wb_bcount is limited by md_maxphys which is 'int' 677 */ 678 bp->b_bcount = (size_t)wb_bcount; 679 bp->b_lblkno = wb_blkno; 680 bp->b_edev = md_dev64_to_dev(wb_edev); 681 *mdc = wmdc; 682 } 683 return (more); 684 } 685 686 static void 687 md_stripe_strategy(buf_t *pb, int flag, void *private) 688 { 689 md_sps_t *ps; 690 md_scs_t *cs; 691 int doing_writes; 692 int more; 693 ms_unit_t *un; 694 mdi_unit_t *ui; 695 size_t current_count; 696 diskaddr_t current_blkno; 697 off_t current_offset; 698 buf_t *cb; /* child buf pointer */ 699 set_t setno; 700 701 setno = MD_MIN2SET(getminor(pb->b_edev)); 702 703 /* 704 * When doing IO to a multi owner meta device, check if set is halted. 705 * We do this check without the needed lock held, for performance 706 * reasons. 707 * If an IO just slips through while the set is locked via an 708 * MD_MN_SUSPEND_SET, we don't care about it. 709 * Only check for a suspended set if we are a top-level i/o request 710 * (MD_STR_NOTTOP is cleared in 'flag'). 711 */ 712 if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == 713 (MD_SET_HALTED | MD_SET_MNSET)) { 714 if ((flag & MD_STR_NOTTOP) == 0) { 715 mutex_enter(&md_mx); 716 /* Here we loop until the set is no longer halted */ 717 while (md_set[setno].s_status & MD_SET_HALTED) { 718 cv_wait(&md_cv, &md_mx); 719 } 720 mutex_exit(&md_mx); 721 } 722 } 723 724 ui = MDI_UNIT(getminor(pb->b_edev)); 725 726 md_kstat_waitq_enter(ui); 727 728 un = (ms_unit_t *)md_unit_readerlock(ui); 729 730 if ((flag & MD_NOBLOCK) == 0) { 731 if (md_inc_iocount(setno) != 0) { 732 pb->b_flags |= B_ERROR; 733 pb->b_error = ENXIO; 734 pb->b_resid = pb->b_bcount; 735 md_kstat_waitq_exit(ui); 736 md_unit_readerexit(ui); 737 biodone(pb); 738 return; 739 } 740 } else { 741 md_inc_iocount_noblock(setno); 742 } 743 744 if (!(flag & MD_STR_NOTTOP)) { 745 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 746 md_kstat_waitq_exit(ui); 747 return; 748 } 749 } 750 751 ps = kmem_cache_alloc(stripe_parent_cache, MD_ALLOCFLAGS); 752 stripe_parent_init(ps); 753 754 /* 755 * Save essential information from the original buffhdr 756 * in the md_save structure. 757 */ 758 ps->ps_un = un; 759 ps->ps_ui = ui; 760 ps->ps_bp = pb; 761 ps->ps_addr = pb->b_un.b_addr; 762 763 if ((pb->b_flags & B_READ) == 0) 764 doing_writes = 1; 765 else 766 doing_writes = 0; 767 768 769 current_count = pb->b_bcount; 770 current_blkno = pb->b_lblkno; 771 current_offset = 0; 772 773 if (!(flag & MD_STR_NOTTOP) && panicstr) 774 ps->ps_flags |= MD_SPS_DONTFREE; 775 776 md_kstat_waitq_to_runq(ui); 777 778 ps->ps_frags++; 779 do { 780 cs = kmem_cache_alloc(stripe_child_cache, MD_ALLOCFLAGS); 781 stripe_child_init(cs); 782 cb = &cs->cs_buf; 783 cs->cs_ps = ps; 784 more = md_mapbuf(un, current_blkno, current_count, cb, 785 &cs->cs_comp); 786 787 cb = md_bioclone(pb, current_offset, cb->b_bcount, cb->b_edev, 788 cb->b_lblkno, stripe_done, cb, KM_NOSLEEP); 789 /* 790 * Do these calculations now, 791 * so that we pickup a valid b_bcount from the chld_bp. 792 */ 793 current_offset += cb->b_bcount; 794 current_count -= cb->b_bcount; 795 current_blkno += (diskaddr_t)(lbtodb(cb->b_bcount)); 796 797 if (more) { 798 mutex_enter(&ps->ps_mx); 799 ps->ps_frags++; 800 mutex_exit(&ps->ps_mx); 801 } 802 803 if (doing_writes && 804 cs->cs_comp->un_mirror.ms_flags & MDM_S_NOWRITE) { 805 (void) stripe_done(cb); 806 continue; 807 } 808 md_call_strategy(cb, flag, private); 809 } while (more); 810 811 if (!(flag & MD_STR_NOTTOP) && panicstr) { 812 while (!(ps->ps_flags & MD_SPS_DONE)) { 813 md_daemon(1, &md_done_daemon); 814 drv_usecwait(10); 815 } 816 kmem_cache_free(stripe_parent_cache, ps); 817 } 818 } 819 820 static int 821 stripe_snarf(md_snarfcmd_t cmd, set_t setno) 822 { 823 ms_unit_t *un; 824 mddb_recid_t recid; 825 int gotsomething; 826 int all_stripes_gotten; 827 mddb_type_t typ1; 828 mddb_de_ic_t *dep; 829 mddb_rb32_t *rbp; 830 size_t newreqsize; 831 ms_unit_t *big_un; 832 ms_unit32_od_t *small_un; 833 834 835 if (cmd == MD_SNARF_CLEANUP) 836 return (0); 837 838 all_stripes_gotten = 1; 839 gotsomething = 0; 840 841 typ1 = (mddb_type_t)md_getshared_key(setno, 842 stripe_md_ops.md_driver.md_drivername); 843 recid = mddb_makerecid(setno, 0); 844 845 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 846 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 847 continue; 848 849 dep = mddb_getrecdep(recid); 850 dep->de_flags = MDDB_F_STRIPE; 851 rbp = dep->de_rb; 852 853 switch (rbp->rb_revision) { 854 case MDDB_REV_RB: 855 case MDDB_REV_RBFN: 856 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 857 /* 858 * This means, we have an old and small record 859 * and this record hasn't already been 860 * converted. Before we create an incore 861 * metadevice from this we have to convert it to 862 * a big record. 863 */ 864 small_un = 865 (ms_unit32_od_t *)mddb_getrecaddr(recid); 866 newreqsize = get_big_stripe_req_size(small_un, 867 COMPLETE_STRUCTURE); 868 big_un = (ms_unit_t *)kmem_zalloc(newreqsize, 869 KM_SLEEP); 870 stripe_convert((caddr_t)small_un, 871 (caddr_t)big_un, SMALL_2_BIG); 872 kmem_free(small_un, dep->de_reqsize); 873 dep->de_rb_userdata = big_un; 874 dep->de_reqsize = newreqsize; 875 un = big_un; 876 rbp->rb_private |= MD_PRV_CONVD; 877 } else { 878 /* Small device had already been converted */ 879 un = (ms_unit_t *)mddb_getrecaddr(recid); 880 } 881 un->c.un_revision &= ~MD_64BIT_META_DEV; 882 break; 883 case MDDB_REV_RB64: 884 case MDDB_REV_RB64FN: 885 /* Big device */ 886 un = (ms_unit_t *)mddb_getrecaddr(recid); 887 un->c.un_revision |= MD_64BIT_META_DEV; 888 un->c.un_flag |= MD_EFILABEL; 889 break; 890 } 891 MDDB_NOTE_FN(rbp->rb_revision, un->c.un_revision); 892 893 /* Create minor node for snarfed unit. */ 894 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 895 896 if (MD_UNIT(MD_SID(un)) != NULL) { 897 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 898 continue; 899 } 900 all_stripes_gotten = 0; 901 if (stripe_build_incore((void *)un, 1) == 0) { 902 mddb_setrecprivate(recid, MD_PRV_GOTIT); 903 md_create_unit_incore(MD_SID(un), &stripe_md_ops, 0); 904 gotsomething = 1; 905 } 906 } 907 908 if (!all_stripes_gotten) 909 return (gotsomething); 910 911 recid = mddb_makerecid(setno, 0); 912 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 913 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 914 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 915 916 return (0); 917 } 918 919 static int 920 stripe_halt(md_haltcmd_t cmd, set_t setno) 921 { 922 int i; 923 mdi_unit_t *ui; 924 minor_t mnum; 925 926 if (cmd == MD_HALT_CLOSE) 927 return (0); 928 929 if (cmd == MD_HALT_OPEN) 930 return (0); 931 932 if (cmd == MD_HALT_UNLOAD) 933 return (0); 934 935 if (cmd == MD_HALT_CHECK) { 936 for (i = 0; i < md_nunits; i++) { 937 mnum = MD_MKMIN(setno, i); 938 if ((ui = MDI_UNIT(mnum)) == NULL) 939 continue; 940 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 941 continue; 942 if (md_unit_isopen(ui)) 943 return (1); 944 } 945 return (0); 946 } 947 948 if (cmd != MD_HALT_DOIT) 949 return (1); 950 951 for (i = 0; i < md_nunits; i++) { 952 mnum = MD_MKMIN(setno, i); 953 if ((ui = MDI_UNIT(mnum)) == NULL) 954 continue; 955 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 956 continue; 957 reset_stripe((ms_unit_t *)MD_UNIT(mnum), mnum, 0); 958 } 959 960 return (0); 961 } 962 963 /*ARGSUSED3*/ 964 static int 965 stripe_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 966 { 967 minor_t mnum = getminor(*dev); 968 mdi_unit_t *ui = MDI_UNIT(mnum); 969 ms_unit_t *un; 970 int err = 0; 971 set_t setno; 972 973 /* 974 * When doing an open of a multi owner metadevice, check to see if this 975 * node is a starting node and if a reconfig cycle is underway. 976 * If so, the system isn't sufficiently set up enough to handle the 977 * open (which involves I/O during sp_validate), so fail with ENXIO. 978 */ 979 setno = MD_MIN2SET(mnum); 980 if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == 981 (MD_SET_MNSET | MD_SET_MN_START_RC)) { 982 return (ENXIO); 983 } 984 985 /* single thread */ 986 un = (ms_unit_t *)md_unit_openclose_enter(ui); 987 988 /* open devices, if necessary */ 989 if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) { 990 if ((err = stripe_open_all_devs(un, md_oflags)) != 0) { 991 goto out; 992 } 993 } 994 995 /* count open */ 996 if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) 997 goto out; 998 999 /* unlock, return success */ 1000 out: 1001 md_unit_openclose_exit(ui); 1002 return (err); 1003 } 1004 1005 /*ARGSUSED1*/ 1006 static int 1007 stripe_close( 1008 dev_t dev, 1009 int flag, 1010 int otyp, 1011 cred_t *cred_p, 1012 int md_cflags 1013 ) 1014 { 1015 minor_t mnum = getminor(dev); 1016 mdi_unit_t *ui = MDI_UNIT(mnum); 1017 ms_unit_t *un; 1018 int err = 0; 1019 1020 /* single thread */ 1021 un = (ms_unit_t *)md_unit_openclose_enter(ui); 1022 1023 /* count closed */ 1024 if ((err = md_unit_decopen(mnum, otyp)) != 0) 1025 goto out; 1026 1027 /* close devices, if necessary */ 1028 if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 1029 stripe_close_all_devs(un, md_cflags); 1030 } 1031 1032 /* unlock, return success */ 1033 out: 1034 md_unit_openclose_exit(ui); 1035 return (err); 1036 } 1037 1038 1039 static struct buf dumpbuf; 1040 1041 /* 1042 * This routine dumps memory to the disk. It assumes that the memory has 1043 * already been mapped into mainbus space. It is called at disk interrupt 1044 * priority when the system is in trouble. 1045 * 1046 */ 1047 static int 1048 stripe_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1049 { 1050 ms_unit_t *un; 1051 buf_t *bp; 1052 ms_comp_t *mdc; 1053 u_longlong_t nb; 1054 diskaddr_t mapblk; 1055 int result; 1056 int more; 1057 int saveresult = 0; 1058 1059 /* 1060 * Don't need to grab the unit lock. 1061 * Cause nothing else is suppose to be happenning. 1062 * Also dump is not suppose to sleep. 1063 */ 1064 un = (ms_unit_t *)MD_UNIT(getminor(dev)); 1065 1066 if ((diskaddr_t)blkno >= un->c.un_total_blocks) 1067 return (EINVAL); 1068 1069 if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks) 1070 return (EINVAL); 1071 1072 bp = &dumpbuf; 1073 nb = ldbtob(nblk); 1074 do { 1075 bzero((caddr_t)bp, sizeof (*bp)); 1076 more = md_mapbuf(un, (diskaddr_t)blkno, nb, bp, &mdc); 1077 nblk = btodb(bp->b_bcount); 1078 mapblk = bp->b_lblkno; 1079 if (!(mdc->un_mirror.ms_flags & MDM_S_NOWRITE)) { 1080 /* 1081 * bdev_dump() is currently only able to take 1082 * 32 bit wide blkno's. 1083 */ 1084 result = bdev_dump(bp->b_edev, addr, (daddr_t)mapblk, 1085 nblk); 1086 if (result) 1087 saveresult = result; 1088 } 1089 1090 nb -= bp->b_bcount; 1091 addr += bp->b_bcount; 1092 blkno += nblk; 1093 } while (more); 1094 1095 return (saveresult); 1096 } 1097 1098 /*ARGSUSED*/ 1099 static intptr_t 1100 stripe_shared_by_blk( 1101 md_dev64_t dev, 1102 void *junk, 1103 diskaddr_t blkno, 1104 u_longlong_t *cnt) 1105 { 1106 ms_unit_t *un; 1107 buf_t bp; 1108 ms_comp_t *comp; 1109 1110 un = MD_UNIT(md_getminor(dev)); 1111 (void) md_mapbuf(un, blkno, ldbtob(*cnt), &bp, &comp); 1112 *cnt = (u_longlong_t)lbtodb(bp.b_bcount); 1113 return ((intptr_t)&comp->un_mirror); 1114 } 1115 1116 /* 1117 * stripe_block_count_skip_size() returns the following values 1118 * so that the logical to physical block mappings can 1119 * be calculated without intimate knowledge of the underpinnings. 1120 * 1121 * block - first logical block number of the device. 1122 * block = [ # of blocks before THE row ] + 1123 * [ # of blocks in THE row before the component ] 1124 * count - # of segments (interlaced size). 1125 * skip - # of logical blocks between segments, or delta to 1126 * get to next segment 1127 * size - interlace size used for the block, count, skip. 1128 */ 1129 /*ARGSUSED*/ 1130 static intptr_t 1131 stripe_block_count_skip_size( 1132 md_dev64_t dev, 1133 void *junk, 1134 int ci, 1135 diskaddr_t *block, 1136 size_t *count, 1137 u_longlong_t *skip, 1138 u_longlong_t *size) 1139 { 1140 ms_unit_t *un; 1141 int row; 1142 struct ms_row *mdr; 1143 int cmpcount = 0; 1144 1145 un = MD_UNIT(md_getminor(dev)); 1146 1147 for (row = 0; row < un->un_nrows; row++) { 1148 mdr = &un->un_row[row]; 1149 if ((mdr->un_ncomp + cmpcount) > ci) 1150 break; 1151 cmpcount += mdr->un_ncomp; 1152 } 1153 ASSERT(row != un->un_nrows); 1154 1155 /* 1156 * Concatenations are always contiguous blocks, 1157 * you cannot depend on the interlace being a usable 1158 * value (except for stripes). 1159 */ 1160 if (mdr->un_ncomp == 1) { /* Concats */ 1161 *block = mdr->un_cum_blocks - mdr->un_blocks; 1162 *count = 1; 1163 *skip = 0; 1164 *size = mdr->un_blocks; 1165 } else { /* Stripes */ 1166 *block = (mdr->un_cum_blocks - mdr->un_blocks) + 1167 ((ci - cmpcount) * mdr->un_interlace); 1168 *count = (size_t)(mdr->un_blocks / (mdr->un_interlace * 1169 mdr->un_ncomp)); 1170 *skip = (mdr->un_interlace * mdr->un_ncomp) - mdr->un_interlace; 1171 *size = mdr->un_interlace; 1172 } 1173 1174 return (0); 1175 } 1176 1177 /*ARGSUSED*/ 1178 static intptr_t 1179 stripe_shared_by_indx(md_dev64_t dev, void *junk, int indx) 1180 { 1181 ms_unit_t *un; 1182 ms_comp_t *comp; 1183 1184 un = MD_UNIT(md_getminor(dev)); 1185 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1186 comp += indx; 1187 return ((intptr_t)&comp->un_mirror); 1188 } 1189 1190 /*ARGSUSED*/ 1191 intptr_t 1192 stripe_component_count(md_dev64_t dev, void *junk) 1193 { 1194 /* 1195 * See comments for stripe_get_dev 1196 */ 1197 1198 ms_unit_t *un; 1199 int count = 0; 1200 int row; 1201 1202 un = MD_UNIT(md_getminor(dev)); 1203 for (row = 0; row < un->un_nrows; row++) 1204 count += un->un_row[row].un_ncomp; 1205 return (count); 1206 } 1207 1208 /*ARGSUSED*/ 1209 intptr_t 1210 stripe_get_dev(md_dev64_t dev, void *junk, int indx, ms_cd_info_t *cd) 1211 { 1212 /* 1213 * It should be noted that stripe_replace in stripe_ioctl.c calls this 1214 * routine using makedevice(0, minor) for the first argument. 1215 * 1216 * If this routine at some point in the future needs to use the major 1217 * number stripe_replace must be changed. 1218 */ 1219 1220 ms_unit_t *un; 1221 ms_comp_t *comp; 1222 md_dev64_t tmpdev; 1223 1224 un = MD_UNIT(md_getminor(dev)); 1225 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1226 comp += indx; 1227 tmpdev = comp->un_dev; 1228 /* 1229 * Try to resolve devt again if NODEV64 1230 * Check if this comp is hotspared and if it is 1231 * then use key for hotspare 1232 */ 1233 if (tmpdev == NODEV64) { 1234 tmpdev = md_resolve_bydevid(md_getminor(dev), tmpdev, 1235 comp->un_mirror.ms_hs_id ? 1236 comp->un_mirror.ms_hs_key : 1237 comp->un_key); 1238 comp->un_dev = tmpdev; 1239 } 1240 1241 cd->cd_dev = comp->un_dev; 1242 cd->cd_orig_dev = comp->un_mirror.ms_orig_dev; 1243 return (0); 1244 } 1245 1246 /*ARGSUSED*/ 1247 void 1248 stripe_replace_done(md_dev64_t dev, sv_dev_t *sv) 1249 { 1250 /* 1251 * See comments for stripe_get_dev 1252 */ 1253 1254 minor_t mnum = md_getminor(dev); 1255 1256 if (sv != NULL) { 1257 md_rem_names(sv, 1); 1258 kmem_free(sv, sizeof (sv_dev_t)); 1259 } 1260 1261 md_unit_writerexit(MDI_UNIT(mnum)); 1262 } 1263 1264 /*ARGSUSED*/ 1265 intptr_t 1266 stripe_replace_dev(md_dev64_t dev, void *junk, int ci, ms_new_dev_t *nd, 1267 mddb_recid_t *recids, int nrecids, void (**replace_done)(), 1268 void **replace_data) 1269 { 1270 minor_t mnum; 1271 ms_unit_t *un; 1272 mdi_unit_t *ui; 1273 ms_comp_t *comp; 1274 diskaddr_t dev_size; 1275 int row; 1276 int ncomps = 0; 1277 int cmpcount = 0; 1278 int rid = 0; 1279 struct ms_row *mdr; 1280 sv_dev_t *sv = NULL; 1281 mddb_recid_t hs_id = 0; 1282 set_t setno; 1283 side_t side; 1284 md_dev64_t this_dev; 1285 1286 mnum = md_getminor(dev); 1287 ui = MDI_UNIT(mnum); 1288 setno = MD_MIN2SET(mnum); 1289 side = mddb_getsidenum(setno); 1290 1291 un = md_unit_writerlock(ui); 1292 1293 *replace_data = NULL; 1294 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1295 1296 comp += ci; 1297 1298 /* 1299 * Count the number of components 1300 */ 1301 for (row = 0; row < un->un_nrows; row++) { 1302 struct ms_row *mdr = &un->un_row[row]; 1303 ncomps += mdr->un_ncomp; 1304 } 1305 1306 recids[0] = 0; 1307 /* 1308 * No need of checking size of new device, 1309 * when hotsparing (it has already been done), or 1310 * when enabling the device. 1311 */ 1312 if ((nd != NULL) && (nd->nd_hs_id == 0)) { 1313 for (row = 0; row < un->un_nrows; row++) { 1314 mdr = &un->un_row[row]; 1315 if ((mdr->un_ncomp + cmpcount) > ci) 1316 break; 1317 cmpcount += mdr->un_ncomp; 1318 } 1319 ASSERT(row != un->un_nrows); 1320 1321 /* Concatenations have a ncomp = 1 */ 1322 dev_size = mdr->un_blocks / mdr->un_ncomp; 1323 1324 /* 1325 * now check to see if new comp can be used in 1326 * place of old comp 1327 */ 1328 if ((un->c.un_flag & MD_LABELED) && (ci == 0) && 1329 nd->nd_labeled) 1330 nd->nd_start_blk = 0; 1331 else 1332 nd->nd_nblks -= nd->nd_start_blk; 1333 1334 if (dev_size > nd->nd_nblks) { 1335 md_unit_writerexit(ui); 1336 return (MDE_COMP_TOO_SMALL); 1337 } 1338 1339 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP); 1340 sv->setno = MD_MIN2SET(mnum); 1341 sv->key = comp->un_key; 1342 } 1343 1344 /* 1345 * Close this component. 1346 */ 1347 if (comp->un_mirror.ms_flags & MDM_S_ISOPEN) { 1348 md_layered_close(comp->un_dev, MD_OFLG_NULL); 1349 comp->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 1350 } 1351 1352 /* 1353 * If the component is hotspared, return to the pool. 1354 */ 1355 if (comp->un_mirror.ms_hs_id != 0) { 1356 hs_cmds_t cmd; 1357 mdkey_t hs_key; 1358 1359 hs_key = comp->un_mirror.ms_hs_key; 1360 comp->un_dev = comp->un_mirror.ms_orig_dev; 1361 comp->un_start_block = comp->un_mirror.ms_orig_blk; 1362 comp->un_mirror.ms_hs_key = 0; 1363 comp->un_mirror.ms_hs_id = 0; 1364 comp->un_mirror.ms_orig_dev = 0; 1365 1366 cmd = HS_FREE; 1367 if ((comp->un_mirror.ms_state != CS_OKAY) && 1368 (comp->un_mirror.ms_state != CS_RESYNC)) 1369 cmd = HS_BAD; 1370 (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, &hs_id, 1371 &hs_key, NULL, NULL); 1372 } 1373 1374 /* 1375 * Open by device id; for enable (indicated by a NULL 1376 * nd pointer), use the existing component info. For 1377 * replace, use the new device. 1378 */ 1379 if (nd == NULL) { 1380 this_dev = md_resolve_bydevid(mnum, comp->un_dev, comp->un_key); 1381 /* 1382 * If someone replaced a new disk in the same slot 1383 * we get NODEV64 since old device id cannot be 1384 * resolved. The new devt is obtained from the 1385 * mddb since devt is going to be unchanged for the 1386 * enable case. No need to check for multiple 1387 * keys here because the caller (comp_replace) 1388 * has already sanity checked it for us. 1389 */ 1390 if (this_dev == NODEV64) { 1391 this_dev = md_getdevnum(setno, side, comp->un_key, 1392 MD_TRUST_DEVT); 1393 } 1394 } else { 1395 /* 1396 * If this is a hotspare, save the original dev_t for later 1397 * use. If this has occured during boot then the value of 1398 * comp->un_dev will be NODEV64 because of the failure to look 1399 * up the devid of the device. 1400 */ 1401 if (nd->nd_hs_id != 0) 1402 comp->un_mirror.ms_orig_dev = comp->un_dev; 1403 this_dev = md_resolve_bydevid(mnum, nd->nd_dev, nd->nd_key); 1404 } 1405 1406 comp->un_dev = this_dev; 1407 1408 /* 1409 * Now open the new device if required. Note for a single component 1410 * stripe it will not be open - leave this for the mirror driver to 1411 * deal with. 1412 */ 1413 if (md_unit_isopen(ui)) { 1414 if (md_layered_open(mnum, &this_dev, MD_OFLG_NULL)) { 1415 mddb_recid_t ids[3]; 1416 1417 ids[0] = un->c.un_record_id; 1418 ids[1] = hs_id; 1419 ids[2] = 0; 1420 mddb_commitrecs_wrapper(ids); 1421 if ((nd != NULL) && (nd->nd_hs_id != 0)) { 1422 /* 1423 * Revert back to the original device. 1424 */ 1425 comp->un_dev = comp->un_mirror.ms_orig_dev; 1426 1427 cmn_err(CE_WARN, 1428 "md: %s: open error of hotspare %s", 1429 md_shortname(mnum), 1430 md_devname(MD_MIN2SET(mnum), nd->nd_dev, 1431 NULL, 0)); 1432 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, 1433 SVM_TAG_HS, MD_MIN2SET(mnum), nd->nd_dev); 1434 } 1435 md_unit_writerexit(ui); 1436 return (MDE_COMP_OPEN_ERR); 1437 } 1438 if (nd != NULL) 1439 nd->nd_dev = this_dev; 1440 1441 comp->un_mirror.ms_flags |= MDM_S_ISOPEN; 1442 } 1443 1444 if (nd == NULL) { 1445 recids[0] = un->c.un_record_id; 1446 recids[1] = hs_id; 1447 recids[2] = 0; 1448 *replace_done = stripe_replace_done; 1449 return (0); 1450 } 1451 1452 /* if hot sparing this device */ 1453 if (nd->nd_hs_id != 0) { 1454 char devname[MD_MAX_CTDLEN]; 1455 char hs_devname[MD_MAX_CTDLEN]; 1456 set_t setno; 1457 1458 comp->un_mirror.ms_hs_id = nd->nd_hs_id; 1459 comp->un_mirror.ms_hs_key = nd->nd_key; 1460 1461 comp->un_mirror.ms_orig_blk = comp->un_start_block; 1462 1463 setno = MD_MIN2SET(mnum); 1464 1465 (void) md_devname(setno, comp->un_mirror.ms_orig_dev, devname, 1466 sizeof (devname)); 1467 (void) md_devname(setno, nd->nd_dev, hs_devname, 1468 sizeof (hs_devname)); 1469 1470 cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", 1471 md_shortname(mnum), devname, hs_devname); 1472 1473 } else { /* replacing the device */ 1474 comp->un_key = nd->nd_key; 1475 *replace_data = (void *)sv; 1476 1477 /* 1478 * For the old device, make sure to reset the parent 1479 * if it's a metadevice. 1480 */ 1481 if (md_getmajor(comp->un_dev) == md_major) { 1482 minor_t comp_mnum = md_getminor(comp->un_dev); 1483 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1484 1485 md_reset_parent(comp->un_dev); 1486 recids[rid++] = MD_RECID(comp_un); 1487 } 1488 } 1489 1490 comp->un_dev = nd->nd_dev; 1491 comp->un_start_block = nd->nd_start_blk; 1492 1493 /* 1494 * For the new device, make sure to set the parent if it's a 1495 * metadevice. 1496 * 1497 * If we ever support using metadevices as hot spares, this 1498 * will need to be tested, and possibly moved into the 1499 * preceding "else" clause, immediately following the parent 1500 * reset block. For now, it's convenient to leave it here and 1501 * only compress nd->nd_dev once. 1502 */ 1503 if (md_getmajor(comp->un_dev) == md_major) { 1504 minor_t comp_mnum = md_getminor(comp->un_dev); 1505 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1506 1507 md_set_parent(comp->un_dev, MD_SID(un)); 1508 recids[rid++] = MD_RECID(comp_un); 1509 } 1510 1511 recids[rid++] = un->c.un_record_id; 1512 recids[rid++] = hs_id; 1513 recids[rid] = 0; 1514 *replace_done = stripe_replace_done; 1515 return (0); 1516 } 1517 1518 /*ARGSUSED*/ 1519 static intptr_t 1520 stripe_hotspare_dev( 1521 md_dev64_t dev, 1522 void *junk, 1523 int ci, 1524 mddb_recid_t *recids, 1525 int nrecids, 1526 void (**replace_done)(), 1527 void **replace_data) 1528 { 1529 ms_unit_t *un; 1530 mdi_unit_t *ui; 1531 ms_comp_t *comp; 1532 int row; 1533 struct ms_row *mdr; 1534 ms_new_dev_t nd; 1535 int err; 1536 int i; 1537 minor_t mnum; 1538 set_t setno; 1539 int cmpcount = 0; 1540 1541 mnum = md_getminor(dev); 1542 ui = MDI_UNIT(mnum); 1543 un = MD_UNIT(mnum); 1544 setno = MD_MIN2SET(mnum); 1545 1546 if (md_get_setstatus(setno) & MD_SET_STALE) 1547 return (1); 1548 1549 if (un->un_hsp_id == -1) 1550 return (1); 1551 1552 for (row = 0; row < un->un_nrows; row++) { 1553 mdr = &un->un_row[row]; 1554 if ((mdr->un_ncomp + cmpcount) > ci) 1555 break; 1556 cmpcount += mdr->un_ncomp; 1557 } 1558 ASSERT(row != un->un_nrows); 1559 1560 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1561 comp += ci; 1562 /* Concatenations have a ncomp = 1 */ 1563 nd.nd_nblks = mdr->un_blocks / mdr->un_ncomp; 1564 1565 if ((un->c.un_flag & MD_LABELED) && (ci == 0)) 1566 nd.nd_labeled = 1; 1567 else 1568 nd.nd_labeled = 0; 1569 1570 again: 1571 err = md_hot_spare_ifc(HS_GET, un->un_hsp_id, nd.nd_nblks, 1572 nd.nd_labeled, &nd.nd_hs_id, &nd.nd_key, &nd.nd_dev, 1573 &nd.nd_start_blk); 1574 1575 if (err) { 1576 if (!stripe_replace_dev(dev, junk, ci, NULL, recids, nrecids, 1577 replace_done, replace_data)) { 1578 mddb_commitrecs_wrapper(recids); 1579 md_unit_writerexit(ui); 1580 } 1581 recids[0] = 0; 1582 return (1); 1583 } 1584 1585 if (stripe_replace_dev(dev, junk, ci, &nd, recids, nrecids, 1586 replace_done, replace_data)) { 1587 1588 (void) md_hot_spare_ifc(HS_BAD, un->un_hsp_id, 0, 0, 1589 &nd.nd_hs_id, &nd.nd_key, NULL, NULL); 1590 mddb_commitrec_wrapper(nd.nd_hs_id); 1591 goto again; 1592 } 1593 1594 /* Leave a slot for the null recid */ 1595 for (i = 0; i < (nrecids - 1); i++) { 1596 if (recids[i] == 0) { 1597 recids[i++] = nd.nd_hs_id; 1598 recids[i] = 0; 1599 } 1600 } 1601 return (0); 1602 } 1603 1604 static int 1605 stripe_imp_set( 1606 set_t setno 1607 ) 1608 { 1609 1610 mddb_recid_t recid; 1611 int i, row, c, gotsomething; 1612 mddb_type_t typ1; 1613 mddb_de_ic_t *dep; 1614 mddb_rb32_t *rbp; 1615 ms_unit32_od_t *un32; 1616 ms_unit_t *un64; 1617 md_dev64_t self_devt; 1618 minor_t *self_id; /* minor needs to be updated */ 1619 md_parent_t *parent_id; /* parent needs to be updated */ 1620 mddb_recid_t *record_id; /* record id needs to be updated */ 1621 mddb_recid_t *hsp_id; 1622 ms_comp32_od_t *comp32; 1623 ms_comp_t *comp64; 1624 1625 1626 gotsomething = 0; 1627 1628 typ1 = (mddb_type_t)md_getshared_key(setno, 1629 stripe_md_ops.md_driver.md_drivername); 1630 recid = mddb_makerecid(setno, 0); 1631 1632 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 1633 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1634 continue; 1635 1636 dep = mddb_getrecdep(recid); 1637 rbp = dep->de_rb; 1638 1639 switch (rbp->rb_revision) { 1640 case MDDB_REV_RB: 1641 case MDDB_REV_RBFN: 1642 /* 1643 * Small device 1644 */ 1645 un32 = (ms_unit32_od_t *)mddb_getrecaddr(recid); 1646 self_id = &(un32->c.un_self_id); 1647 parent_id = &(un32->c.un_parent); 1648 record_id = &(un32->c.un_record_id); 1649 hsp_id = &(un32->un_hsp_id); 1650 1651 comp32 = (ms_comp32_od_t *) 1652 ((void *)&((char *)un32)[un32->un_ocomp]); 1653 for (row = 0; row < un32->un_nrows; row++) { 1654 struct ms_row32_od *mdr = &un32->un_row[row]; 1655 for (i = 0, c = mdr->un_icomp; 1656 i < mdr->un_ncomp; i++) { 1657 ms_comp32_od_t *mdc; 1658 1659 mdc = &comp32[c++]; 1660 1661 if (!md_update_minor(setno, 1662 mddb_getsidenum(setno), 1663 mdc->un_key)) 1664 goto out; 1665 1666 if (mdc->un_mirror.ms_hs_id != 0) 1667 mdc->un_mirror.ms_hs_id = 1668 MAKERECID(setno, 1669 mdc->un_mirror.ms_hs_id); 1670 } 1671 } 1672 break; 1673 case MDDB_REV_RB64: 1674 case MDDB_REV_RB64FN: 1675 un64 = (ms_unit_t *)mddb_getrecaddr(recid); 1676 self_id = &(un64->c.un_self_id); 1677 parent_id = &(un64->c.un_parent); 1678 record_id = &(un64->c.un_record_id); 1679 hsp_id = &(un64->un_hsp_id); 1680 1681 comp64 = (ms_comp_t *) 1682 ((void *)&((char *)un64)[un64->un_ocomp]); 1683 for (row = 0; row < un64->un_nrows; row++) { 1684 struct ms_row *mdr = &un64->un_row[row]; 1685 1686 for (i = 0, c = mdr->un_icomp; 1687 i < mdr->un_ncomp; i++) { 1688 ms_comp_t *mdc; 1689 1690 mdc = &comp64[c++]; 1691 1692 if (!md_update_minor(setno, 1693 mddb_getsidenum(setno), 1694 mdc->un_key)) 1695 goto out; 1696 1697 if (mdc->un_mirror.ms_hs_id != 0) 1698 mdc->un_mirror.ms_hs_id = 1699 MAKERECID(setno, 1700 mdc->un_mirror.ms_hs_id); 1701 } 1702 } 1703 break; 1704 } 1705 1706 /* 1707 * If this is a top level and a friendly name metadevice, 1708 * update its minor in the namespace. 1709 */ 1710 if ((*parent_id == MD_NO_PARENT) && 1711 ((rbp->rb_revision == MDDB_REV_RBFN) || 1712 (rbp->rb_revision == MDDB_REV_RB64FN))) { 1713 1714 self_devt = md_makedevice(md_major, *self_id); 1715 if (!md_update_top_device_minor(setno, 1716 mddb_getsidenum(setno), self_devt)) 1717 goto out; 1718 } 1719 1720 /* 1721 * Update unit with the imported setno 1722 * 1723 */ 1724 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1725 1726 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1727 1728 if (*hsp_id != -1) 1729 *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 1730 1731 if (*parent_id != MD_NO_PARENT) 1732 *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 1733 *record_id = MAKERECID(setno, DBID(*record_id)); 1734 1735 gotsomething = 1; 1736 } 1737 1738 out: 1739 return (gotsomething); 1740 } 1741 1742 static md_named_services_t stripe_named_services[] = { 1743 {stripe_shared_by_blk, "shared by blk" }, 1744 {stripe_shared_by_indx, "shared by indx" }, 1745 {stripe_component_count, "get component count" }, 1746 {stripe_block_count_skip_size, "get block count skip size" }, 1747 {stripe_get_dev, "get device" }, 1748 {stripe_replace_dev, "replace device" }, 1749 {stripe_hotspare_dev, "hotspare device" }, 1750 {stripe_rename_check, MDRNM_CHECK }, 1751 {NULL, 0} 1752 }; 1753 1754 md_ops_t stripe_md_ops = { 1755 stripe_open, /* open */ 1756 stripe_close, /* close */ 1757 md_stripe_strategy, /* strategy */ 1758 NULL, /* print */ 1759 stripe_dump, /* dump */ 1760 NULL, /* read */ 1761 NULL, /* write */ 1762 md_stripe_ioctl, /* stripe_ioctl, */ 1763 stripe_snarf, /* stripe_snarf */ 1764 stripe_halt, /* stripe_halt */ 1765 NULL, /* aread */ 1766 NULL, /* awrite */ 1767 stripe_imp_set, /* import set */ 1768 stripe_named_services 1769 }; 1770 1771 static void 1772 init_init() 1773 { 1774 md_stripe_mcs_buf_off = sizeof (md_scs_t) - sizeof (buf_t); 1775 1776 stripe_parent_cache = kmem_cache_create("md_stripe_parent", 1777 sizeof (md_sps_t), 0, stripe_parent_constructor, 1778 stripe_parent_destructor, stripe_run_queue, NULL, NULL, 1779 0); 1780 stripe_child_cache = kmem_cache_create("md_stripe_child", 1781 sizeof (md_scs_t) - sizeof (buf_t) + biosize(), 0, 1782 stripe_child_constructor, stripe_child_destructor, 1783 stripe_run_queue, NULL, NULL, 0); 1784 } 1785 1786 static void 1787 fini_uninit() 1788 { 1789 kmem_cache_destroy(stripe_parent_cache); 1790 kmem_cache_destroy(stripe_child_cache); 1791 stripe_parent_cache = stripe_child_cache = NULL; 1792 } 1793 1794 /* define the module linkage */ 1795 MD_PLUGIN_MISC_MODULE("stripes module", init_init(), fini_uninit()) 1796