1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/conf.h> 32 #include <sys/file.h> 33 #include <sys/user.h> 34 #include <sys/uio.h> 35 #include <sys/t_lock.h> 36 #include <sys/buf.h> 37 #include <sys/dkio.h> 38 #include <sys/vtoc.h> 39 #include <sys/kmem.h> 40 #include <vm/page.h> 41 #include <sys/cmn_err.h> 42 #include <sys/sysmacros.h> 43 #include <sys/types.h> 44 #include <sys/mkdev.h> 45 #include <sys/stat.h> 46 #include <sys/open.h> 47 #include <sys/lvm/mdio.h> 48 #include <sys/lvm/mdvar.h> 49 #include <sys/lvm/md_stripe.h> 50 #include <sys/lvm/md_convert.h> 51 #include <sys/lvm/md_notify.h> 52 #include <sys/modctl.h> 53 #include <sys/ddi.h> 54 #include <sys/sunddi.h> 55 #include <sys/debug.h> 56 #include <sys/sysevent/eventdefs.h> 57 #include <sys/sysevent/svm.h> 58 59 md_ops_t stripe_md_ops; 60 #ifndef lint 61 static char _depends_on[] = "drv/md"; 62 static md_ops_t *md_interface_ops = &stripe_md_ops; 63 #endif 64 65 extern unit_t md_nunits; 66 extern set_t md_nsets; 67 extern md_set_t md_set[]; 68 69 extern kmutex_t md_mx; 70 extern kcondvar_t md_cv; 71 72 extern int md_status; 73 extern major_t md_major; 74 extern mdq_anchor_t md_done_daemon; 75 76 static int md_stripe_mcs_buf_off; 77 static kmem_cache_t *stripe_parent_cache = NULL; 78 static kmem_cache_t *stripe_child_cache = NULL; 79 80 /*ARGSUSED1*/ 81 static int 82 stripe_parent_constructor(void *p, void *d1, int d2) 83 { 84 mutex_init(&((md_sps_t *)p)->ps_mx, 85 NULL, MUTEX_DEFAULT, NULL); 86 return (0); 87 } 88 89 static void 90 stripe_parent_init(void *ps) 91 { 92 bzero(ps, offsetof(md_sps_t, ps_mx)); 93 } 94 95 /*ARGSUSED1*/ 96 static void 97 stripe_parent_destructor(void *p, void *d) 98 { 99 mutex_destroy(&((md_sps_t *)p)->ps_mx); 100 } 101 102 /*ARGSUSED1*/ 103 static int 104 stripe_child_constructor(void *p, void *d1, int d2) 105 { 106 bioinit(&((md_scs_t *)p)->cs_buf); 107 return (0); 108 } 109 110 static void 111 stripe_child_init(md_scs_t *cs) 112 { 113 cs->cs_mdunit = 0; 114 cs->cs_ps = NULL; 115 cs->cs_comp = NULL; 116 md_bioreset(&cs->cs_buf); 117 } 118 119 /*ARGSUSED1*/ 120 static void 121 stripe_child_destructor(void *p, void *d) 122 { 123 biofini(&((md_scs_t *)p)->cs_buf); 124 } 125 126 /*ARGSUSED*/ 127 static void 128 stripe_run_queue(void *d) 129 { 130 if (!(md_status & MD_GBL_DAEMONS_LIVE)) 131 md_daemon(1, &md_done_daemon); 132 } 133 134 static void 135 stripe_close_all_devs(ms_unit_t *un, int md_cflags) 136 { 137 int row; 138 int i; 139 int c; 140 struct ms_comp *mdcomp; 141 142 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 143 for (row = 0; row < un->un_nrows; row++) { 144 struct ms_row *mdr = &un->un_row[row]; 145 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 146 struct ms_comp *mdc; 147 mdc = &mdcomp[c++]; 148 if (md_cflags & MD_OFLG_PROBEDEV) { 149 150 /* 151 * It is possible that the md_layered_open 152 * failed because the stripe unit structure 153 * contained a NODEV. In such a case since 154 * there is nothing to open, there is nothing 155 * to close. 156 */ 157 if (mdc->un_dev == NODEV64) 158 continue; 159 } 160 if ((md_cflags & MD_OFLG_PROBEDEV) && 161 (mdc->un_mirror.ms_flags & MDM_S_PROBEOPEN)) { 162 md_layered_close(mdc->un_dev, 163 md_cflags); 164 mdc->un_mirror.ms_flags &= 165 ~MDM_S_PROBEOPEN; 166 } else if (mdc->un_mirror.ms_flags & MDM_S_ISOPEN) { 167 md_layered_close(mdc->un_dev, md_cflags); 168 mdc->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 169 } 170 } 171 } 172 } 173 174 static int 175 stripe_open_all_devs(ms_unit_t *un, int md_oflags) 176 { 177 minor_t mnum = MD_SID(un); 178 int row; 179 int i; 180 int c; 181 struct ms_comp *mdcomp; 182 int err; 183 int cont_on_errors = (md_oflags & MD_OFLG_CONT_ERRS); 184 int probe_err_cnt = 0; 185 int total_comp_cnt = 0; 186 set_t setno = MD_MIN2SET(MD_SID(un)); 187 side_t side = mddb_getsidenum(setno); 188 mdkey_t key; 189 190 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 191 192 /* 193 * For a probe call, if any component of a stripe or a concat 194 * can be opened, it is considered to be a success. The total number 195 * of components in a stripe are computed prior to starting a probe. 196 * This number is then compared against the number of components 197 * that could be be successfully opened. If none of the components 198 * in a stripe can be opened, only then an ENXIO is returned for a 199 * probe type open. 200 */ 201 202 for (row = 0; row < un->un_nrows; row++) { 203 struct ms_row *mdr = &un->un_row[row]; 204 205 if (md_oflags & MD_OFLG_PROBEDEV) 206 total_comp_cnt += mdr->un_ncomp; 207 208 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 209 struct ms_comp *mdc; 210 md_dev64_t tmpdev; 211 212 mdc = &mdcomp[c++]; 213 tmpdev = mdc->un_dev; 214 /* 215 * Do the open by device id 216 * Check if this comp is hotspared and 217 * if it is then use the key for hotspare. 218 * MN disksets don't use devids, so we better don't use 219 * md_devid_found/md_resolve_bydevid there. Rather do, 220 * what's done in stripe_build_incore() 221 */ 222 if (MD_MNSET_SETNO(setno)) { 223 if (mdc->un_mirror.ms_hs_id != 0) { 224 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 225 0, &mdc->un_mirror.ms_hs_id, NULL, 226 &tmpdev, NULL); 227 } 228 } else { 229 key = mdc->un_mirror.ms_hs_id ? 230 mdc->un_mirror.ms_hs_key : mdc->un_key; 231 if ((md_getmajor(tmpdev) != md_major) && 232 md_devid_found(setno, side, key) == 1) { 233 tmpdev = md_resolve_bydevid(mnum, 234 tmpdev, key); 235 } 236 } 237 238 /* 239 * For a submirror, we only want to open those devices 240 * that are not errored. If the device is errored then 241 * then there is no reason to open it and leaving it 242 * closed allows the RCM/DR code to work so that the 243 * errored device can be replaced. 244 */ 245 if ((md_oflags & MD_OFLG_PROBEDEV) || 246 ! (mdc->un_mirror.ms_state & CS_ERRED)) { 247 248 err = md_layered_open(mnum, &tmpdev, md_oflags); 249 } else { 250 err = ENXIO; 251 } 252 253 /* 254 * Only set the un_dev if the tmpdev != NODEV64. If 255 * it is NODEV64 then the md_layered_open() will have 256 * failed in some manner. 257 */ 258 if (tmpdev != NODEV64) 259 mdc->un_dev = tmpdev; 260 261 if (err) { 262 if (!cont_on_errors) { 263 stripe_close_all_devs(un, md_oflags); 264 return (ENXIO); 265 } 266 267 if (md_oflags & MD_OFLG_PROBEDEV) 268 probe_err_cnt++; 269 } else { 270 if (md_oflags & MD_OFLG_PROBEDEV) { 271 mdc->un_mirror.ms_flags |= 272 MDM_S_PROBEOPEN; 273 } else 274 mdc->un_mirror.ms_flags |= MDM_S_ISOPEN; 275 } 276 } 277 } 278 279 /* If every component in a stripe could not be opened fail */ 280 if ((md_oflags & MD_OFLG_PROBEDEV) && 281 (probe_err_cnt == total_comp_cnt)) 282 return (ENXIO); 283 else 284 return (0); 285 } 286 287 int 288 stripe_build_incore(void *p, int snarfing) 289 { 290 ms_unit_t *un = (ms_unit_t *)p; 291 struct ms_comp *mdcomp; 292 minor_t mnum; 293 int row; 294 int i; 295 int c; 296 int ncomps; 297 298 mnum = MD_SID(un); 299 300 if (MD_UNIT(mnum) != NULL) 301 return (0); 302 303 MD_STATUS(un) = 0; 304 305 /* 306 * Reset all the is_open flags, these are probably set 307 * cause they just came out of the database. 308 */ 309 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 310 311 ncomps = 0; 312 for (row = 0; row < un->un_nrows; row++) { 313 struct ms_row *mdr = &un->un_row[row]; 314 ncomps += mdr->un_ncomp; 315 } 316 317 for (row = 0; row < un->un_nrows; row++) { 318 struct ms_row *mdr = &un->un_row[row]; 319 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 320 struct ms_comp *mdc; 321 set_t setno; 322 md_dev64_t tmpdev; 323 324 mdc = &mdcomp[c++]; 325 mdc->un_mirror.ms_flags &= 326 ~(MDM_S_ISOPEN | MDM_S_IOERR | MDM_S_RS_TRIED); 327 328 if (!snarfing) 329 continue; 330 331 setno = MD_MIN2SET(mnum); 332 333 tmpdev = md_getdevnum(setno, mddb_getsidenum(setno), 334 mdc->un_key, MD_NOTRUST_DEVT); 335 mdc->un_dev = tmpdev; 336 /* 337 * Check for hotspares. If the hotspares haven't been 338 * snarfed yet, stripe_open_all_devs() will do the 339 * remapping of the dev's later. 340 */ 341 if (mdc->un_mirror.ms_hs_id != 0) { 342 mdc->un_mirror.ms_orig_dev = mdc->un_dev; 343 (void) md_hot_spare_ifc(HS_MKDEV, 0, 0, 344 0, &mdc->un_mirror.ms_hs_id, NULL, 345 &tmpdev, NULL); 346 mdc->un_dev = tmpdev; 347 } 348 } 349 } 350 351 MD_UNIT(mnum) = un; 352 return (0); 353 } 354 355 void 356 reset_stripe(ms_unit_t *un, minor_t mnum, int removing) 357 { 358 ms_comp_t *mdcomp; 359 struct ms_row *mdr; 360 int i, c; 361 int row; 362 int nsv; 363 int isv; 364 sv_dev_t *sv; 365 mddb_recid_t *recids; 366 mddb_recid_t vtoc_id; 367 int rid = 0; 368 369 md_destroy_unit_incore(mnum, &stripe_md_ops); 370 371 MD_UNIT(mnum) = NULL; 372 373 if (!removing) 374 return; 375 376 nsv = 0; 377 /* Count the number of devices */ 378 for (row = 0; row < un->un_nrows; row++) { 379 mdr = &un->un_row[row]; 380 nsv += mdr->un_ncomp; 381 } 382 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t) * nsv, KM_SLEEP); 383 384 /* 385 * allocate recids array. since we may have to commit 386 * underlying soft partition records, we need an array 387 * of size: total number of components in stripe + 3 388 * (one for the stripe itself, one for the hotspare, one 389 * for the end marker). 390 */ 391 recids = kmem_alloc(sizeof (mddb_recid_t) * (nsv + 3), KM_SLEEP); 392 393 /* 394 * Save the md_dev64_t's and driver nm indexes. 395 * Because after the mddb_deleterec() we will 396 * not be able to access the unit structure. 397 * 398 * NOTE: Deleting the names before deleting the 399 * unit structure would cause problems if 400 * the machine crashed in between the two. 401 */ 402 isv = 0; 403 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 404 405 for (row = 0; row < un->un_nrows; row++) { 406 mdr = &un->un_row[row]; 407 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) { 408 struct ms_comp *mdc; 409 md_dev64_t child_dev; 410 md_unit_t *child_un; 411 412 mdc = &mdcomp[c++]; 413 if (mdc->un_mirror.ms_hs_id != 0) { 414 mdkey_t hs_key; 415 416 hs_key = mdc->un_mirror.ms_hs_key; 417 418 mdc->un_dev = mdc->un_mirror.ms_orig_dev; 419 mdc->un_start_block = 420 mdc->un_mirror.ms_orig_blk; 421 mdc->un_mirror.ms_hs_id = 0; 422 mdc->un_mirror.ms_hs_key = 0; 423 mdc->un_mirror.ms_orig_dev = 0; 424 recids[0] = 0; 425 recids[1] = 0; /* recids[1] filled in below */ 426 recids[2] = 0; 427 (void) md_hot_spare_ifc(HS_FREE, un->un_hsp_id, 428 0, 0, &recids[0], &hs_key, NULL, NULL); 429 mddb_commitrecs_wrapper(recids); 430 } 431 432 /* 433 * check if we've got metadevice below us and 434 * deparent it if we do. 435 * NOTE: currently soft partitions are the 436 * the only metadevices stripes can be 437 * built on top of. 438 */ 439 child_dev = mdc->un_dev; 440 if (md_getmajor(child_dev) == md_major) { 441 child_un = MD_UNIT(md_getminor(child_dev)); 442 md_reset_parent(child_dev); 443 recids[rid++] = MD_RECID(child_un); 444 } 445 446 sv[isv].setno = MD_MIN2SET(mnum); 447 sv[isv++].key = mdc->un_key; 448 } 449 } 450 451 recids[rid++] = un->c.un_record_id; 452 recids[rid] = 0; /* filled in below */ 453 454 /* 455 * Decrement the HSP reference count and 456 * remove the knowledge of the HSP from the unit struct. 457 * This is done atomically to remove a window. 458 */ 459 if (un->un_hsp_id != -1) { 460 (void) md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0, 461 &recids[rid++], NULL, NULL, NULL); 462 un->un_hsp_id = -1; 463 } 464 465 /* set end marker and commit records */ 466 recids[rid] = 0; 467 mddb_commitrecs_wrapper(recids); 468 469 vtoc_id = un->c.un_vtoc_id; 470 471 /* Remove the unit structure */ 472 mddb_deleterec_wrapper(un->c.un_record_id); 473 474 /* Remove the vtoc, if present */ 475 if (vtoc_id) 476 mddb_deleterec_wrapper(vtoc_id); 477 478 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_METADEVICE, 479 MD_MIN2SET(mnum), MD_MIN2UNIT(mnum)); 480 md_rem_names(sv, nsv); 481 kmem_free(sv, sizeof (sv_dev_t) * nsv); 482 kmem_free(recids, sizeof (mddb_recid_t) * (nsv + 3)); 483 } 484 485 static void 486 stripe_error(md_sps_t *ps) 487 { 488 struct buf *pb = ps->ps_bp; 489 mdi_unit_t *ui = ps->ps_ui; 490 md_dev64_t dev = ps->ps_errcomp->un_dev; 491 md_dev64_t md_dev = md_expldev(pb->b_edev); 492 char *str; 493 494 if (pb->b_flags & B_READ) { 495 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_READERR; 496 str = "read"; 497 } else { 498 ps->ps_errcomp->un_mirror.ms_flags |= MDM_S_WRTERR; 499 str = "write"; 500 } 501 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 502 if (MUTEX_HELD(&ps->ps_mx)) { 503 mutex_exit(&ps->ps_mx); 504 } 505 } else { 506 ASSERT(panicstr); 507 } 508 SPS_FREE(stripe_parent_cache, ps); 509 pb->b_flags |= B_ERROR; 510 511 md_kstat_done(ui, pb, 0); 512 md_unit_readerexit(ui); 513 md_biodone(pb); 514 515 cmn_err(CE_WARN, "md: %s: %s error on %s", 516 md_shortname(md_getminor(md_dev)), str, 517 md_devname(MD_DEV2SET(md_dev), dev, NULL, 0)); 518 } 519 520 static int 521 stripe_done(struct buf *cb) 522 { 523 struct buf *pb; 524 mdi_unit_t *ui; 525 md_sps_t *ps; 526 md_scs_t *cs; 527 528 /*LINTED*/ 529 cs = (md_scs_t *)((caddr_t)cb - md_stripe_mcs_buf_off); 530 ps = cs->cs_ps; 531 pb = ps->ps_bp; 532 533 mutex_enter(&ps->ps_mx); 534 if (cb->b_flags & B_ERROR) { 535 ps->ps_flags |= MD_SPS_ERROR; 536 pb->b_error = cb->b_error; 537 ps->ps_errcomp = cs->cs_comp; 538 } 539 540 if (cb->b_flags & B_REMAPPED) 541 bp_mapout(cb); 542 543 ps->ps_frags--; 544 if (ps->ps_frags != 0) { 545 mutex_exit(&ps->ps_mx); 546 kmem_cache_free(stripe_child_cache, cs); 547 return (1); 548 } 549 kmem_cache_free(stripe_child_cache, cs); 550 if (ps->ps_flags & MD_SPS_ERROR) { 551 stripe_error(ps); 552 return (1); 553 } 554 ui = ps->ps_ui; 555 if (!(ps->ps_flags & MD_SPS_DONTFREE)) { 556 mutex_exit(&ps->ps_mx); 557 } else { 558 ASSERT(panicstr); 559 } 560 SPS_FREE(stripe_parent_cache, ps); 561 md_kstat_done(ui, pb, 0); 562 md_unit_readerexit(ui); 563 md_biodone(pb); 564 return (0); 565 } 566 567 568 /* 569 * This routine does the mapping from virtual (dev, blkno) of a metapartition 570 * to the real (dev, blkno) of a real disk partition. 571 * It goes to the md_conf[] table to find out the correct real partition 572 * dev and block number for this buffer. 573 * 574 * A single buf request can not go across real disk partition boundary. 575 * When the virtual request specified by (dev, blkno) spans more than one 576 * real partition, md_mapbuf will return 1. Then the caller should prepare 577 * another real buf and continue calling md_mapbuf to do the mapping until 578 * it returns 0. 579 * 580 */ 581 582 static int 583 md_mapbuf( 584 ms_unit_t *un, 585 diskaddr_t blkno, 586 u_longlong_t bcount, 587 buf_t *bp, /* if bp==NULL, skip bp updates */ 588 ms_comp_t **mdc) /* if bp==NULL, skip mdc update */ 589 { 590 struct ms_row *mdr; 591 struct ms_comp *mdcomp; 592 diskaddr_t stripe_blk; 593 diskaddr_t fragment, blk_in_row, endblk; 594 offset_t interlace; 595 size_t dev_index; 596 int row_index, more; 597 extern unsigned md_maxphys; 598 /* Work var's when bp==NULL */ 599 u_longlong_t wb_bcount; 600 diskaddr_t wb_blkno; 601 md_dev64_t wb_edev; 602 ms_comp_t *wmdc; 603 604 /* 605 * Do a real calculation to derive the minor device of the 606 * Virtual Disk, which in turn will let us derive the 607 * device/minor of the underlying real device. 608 */ 609 610 611 for (row_index = 0; row_index < un->un_nrows; row_index++) { 612 mdr = &un->un_row[row_index]; 613 if (blkno < mdr->un_cum_blocks) 614 break; 615 } 616 ASSERT(row_index != un->un_nrows); 617 618 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 619 620 blk_in_row = blkno - mdr->un_cum_blocks + mdr->un_blocks; 621 endblk = (diskaddr_t)(blkno + howmany(bcount, DEV_BSIZE)); 622 if (mdr->un_ncomp == 1) { /* No striping */ 623 if (endblk > mdr->un_cum_blocks) { 624 wb_bcount = ldbtob(mdr->un_cum_blocks - blkno); 625 if ((row_index + 1) == un->un_nrows) 626 more = 0; 627 else 628 more = 1; 629 } else { 630 wb_bcount = bcount; 631 more = 0; 632 } 633 wmdc = &mdcomp[mdr->un_icomp]; 634 wb_blkno = blk_in_row; 635 } else { /* Have striping */ 636 interlace = mdr->un_interlace; 637 fragment = blk_in_row % interlace; 638 if (bcount > ldbtob(interlace - fragment)) { 639 more = 1; 640 wb_bcount = ldbtob(interlace - fragment); 641 } else { 642 more = 0; 643 wb_bcount = bcount; 644 } 645 646 stripe_blk = blk_in_row / interlace; 647 dev_index = (size_t)(stripe_blk % mdr->un_ncomp); 648 wmdc = &mdcomp[mdr->un_icomp + dev_index]; 649 wb_blkno = (diskaddr_t)(((stripe_blk / mdr->un_ncomp) 650 * interlace) + fragment); 651 } 652 653 wb_blkno += wmdc->un_start_block; 654 wb_edev = wmdc->un_dev; 655 656 /* only break up the I/O if we're not built on another metadevice */ 657 if ((md_getmajor(wb_edev) != md_major) && (wb_bcount > md_maxphys)) { 658 wb_bcount = md_maxphys; 659 more = 1; 660 } 661 if (bp != (buf_t *)NULL) { 662 /* 663 * wb_bcount is limited by md_maxphys which is 'int' 664 */ 665 bp->b_bcount = (size_t)wb_bcount; 666 bp->b_lblkno = wb_blkno; 667 bp->b_edev = md_dev64_to_dev(wb_edev); 668 *mdc = wmdc; 669 } 670 return (more); 671 } 672 673 static void 674 md_stripe_strategy(buf_t *pb, int flag, void *private) 675 { 676 md_sps_t *ps; 677 md_scs_t *cs; 678 int doing_writes; 679 int more; 680 ms_unit_t *un; 681 mdi_unit_t *ui; 682 size_t current_count; 683 diskaddr_t current_blkno; 684 off_t current_offset; 685 buf_t *cb; /* child buf pointer */ 686 set_t setno; 687 688 setno = MD_MIN2SET(getminor(pb->b_edev)); 689 690 /* 691 * When doing IO to a multi owner meta device, check if set is halted. 692 * We do this check without the needed lock held, for performance 693 * reasons. 694 * If an IO just slips through while the set is locked via an 695 * MD_MN_SUSPEND_SET, we don't care about it. 696 * Only check for a suspended set if we are a top-level i/o request 697 * (MD_STR_NOTTOP is cleared in 'flag'). 698 */ 699 if ((md_set[setno].s_status & (MD_SET_HALTED | MD_SET_MNSET)) == 700 (MD_SET_HALTED | MD_SET_MNSET)) { 701 if ((flag & MD_STR_NOTTOP) == 0) { 702 mutex_enter(&md_mx); 703 /* Here we loop until the set is no longer halted */ 704 while (md_set[setno].s_status & MD_SET_HALTED) { 705 cv_wait(&md_cv, &md_mx); 706 } 707 mutex_exit(&md_mx); 708 } 709 } 710 711 ui = MDI_UNIT(getminor(pb->b_edev)); 712 713 md_kstat_waitq_enter(ui); 714 715 un = (ms_unit_t *)md_unit_readerlock(ui); 716 717 if ((flag & MD_NOBLOCK) == 0) { 718 if (md_inc_iocount(setno) != 0) { 719 pb->b_flags |= B_ERROR; 720 pb->b_error = ENXIO; 721 pb->b_resid = pb->b_bcount; 722 md_unit_readerexit(ui); 723 biodone(pb); 724 return; 725 } 726 } else { 727 md_inc_iocount_noblock(setno); 728 } 729 730 if (!(flag & MD_STR_NOTTOP)) { 731 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 732 md_kstat_waitq_exit(ui); 733 return; 734 } 735 } 736 737 ps = kmem_cache_alloc(stripe_parent_cache, MD_ALLOCFLAGS); 738 stripe_parent_init(ps); 739 740 /* 741 * Save essential information from the original buffhdr 742 * in the md_save structure. 743 */ 744 ps->ps_un = un; 745 ps->ps_ui = ui; 746 ps->ps_bp = pb; 747 ps->ps_addr = pb->b_un.b_addr; 748 749 if ((pb->b_flags & B_READ) == 0) 750 doing_writes = 1; 751 else 752 doing_writes = 0; 753 754 755 current_count = pb->b_bcount; 756 current_blkno = pb->b_lblkno; 757 current_offset = 0; 758 759 if (!(flag & MD_STR_NOTTOP) && panicstr) 760 ps->ps_flags |= MD_SPS_DONTFREE; 761 762 md_kstat_waitq_to_runq(ui); 763 764 ps->ps_frags++; 765 do { 766 cs = kmem_cache_alloc(stripe_child_cache, MD_ALLOCFLAGS); 767 stripe_child_init(cs); 768 cb = &cs->cs_buf; 769 cs->cs_ps = ps; 770 more = md_mapbuf(un, current_blkno, current_count, cb, 771 &cs->cs_comp); 772 773 cb = md_bioclone(pb, current_offset, cb->b_bcount, cb->b_edev, 774 cb->b_lblkno, stripe_done, cb, KM_NOSLEEP); 775 /* 776 * Do these calculations now, 777 * so that we pickup a valid b_bcount from the chld_bp. 778 */ 779 current_offset += cb->b_bcount; 780 current_count -= cb->b_bcount; 781 current_blkno += (diskaddr_t)(lbtodb(cb->b_bcount)); 782 783 if (more) { 784 mutex_enter(&ps->ps_mx); 785 ps->ps_frags++; 786 mutex_exit(&ps->ps_mx); 787 } 788 789 if (doing_writes && 790 cs->cs_comp->un_mirror.ms_flags & MDM_S_NOWRITE) { 791 (void) stripe_done(cb); 792 continue; 793 } 794 md_call_strategy(cb, flag, private); 795 } while (more); 796 797 if (!(flag & MD_STR_NOTTOP) && panicstr) { 798 while (!(ps->ps_flags & MD_SPS_DONE)) { 799 md_daemon(1, &md_done_daemon); 800 drv_usecwait(10); 801 } 802 kmem_cache_free(stripe_parent_cache, ps); 803 } 804 } 805 806 static int 807 stripe_snarf(md_snarfcmd_t cmd, set_t setno) 808 { 809 ms_unit_t *un; 810 mddb_recid_t recid; 811 int gotsomething; 812 int all_stripes_gotten; 813 mddb_type_t typ1; 814 mddb_de_ic_t *dep; 815 mddb_rb32_t *rbp; 816 size_t newreqsize; 817 ms_unit_t *big_un; 818 ms_unit32_od_t *small_un; 819 820 821 if (cmd == MD_SNARF_CLEANUP) 822 return (0); 823 824 all_stripes_gotten = 1; 825 gotsomething = 0; 826 827 typ1 = (mddb_type_t)md_getshared_key(setno, 828 stripe_md_ops.md_driver.md_drivername); 829 recid = mddb_makerecid(setno, 0); 830 831 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 832 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 833 continue; 834 835 dep = mddb_getrecdep(recid); 836 dep->de_flags = MDDB_F_STRIPE; 837 rbp = dep->de_rb; 838 839 if ((rbp->rb_revision == MDDB_REV_RB) && 840 ((rbp->rb_private & MD_PRV_CONVD) == 0)) { 841 /* 842 * This means, we have an old and small record 843 * and this record hasn't already been converted. 844 * Before we create an incore metadevice from this 845 * we have to convert it to a big record. 846 */ 847 small_un = (ms_unit32_od_t *)mddb_getrecaddr(recid); 848 newreqsize = get_big_stripe_req_size(small_un, 849 COMPLETE_STRUCTURE); 850 big_un = (ms_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 851 stripe_convert((caddr_t)small_un, (caddr_t)big_un, 852 SMALL_2_BIG); 853 kmem_free(small_un, dep->de_reqsize); 854 dep->de_rb_userdata = big_un; 855 dep->de_reqsize = newreqsize; 856 un = big_un; 857 rbp->rb_private |= MD_PRV_CONVD; 858 } else { 859 /* Big device */ 860 un = (ms_unit_t *)mddb_getrecaddr(recid); 861 } 862 863 /* Set revision and flag accordingly */ 864 if (rbp->rb_revision == MDDB_REV_RB) { 865 un->c.un_revision = MD_32BIT_META_DEV; 866 } else { 867 un->c.un_revision = MD_64BIT_META_DEV; 868 un->c.un_flag |= MD_EFILABEL; 869 } 870 871 /* Create minor node for snarfed unit. */ 872 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 873 874 if (MD_UNIT(MD_SID(un)) != NULL) { 875 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 876 continue; 877 } 878 all_stripes_gotten = 0; 879 if (stripe_build_incore((void *)un, 1) == 0) { 880 mddb_setrecprivate(recid, MD_PRV_GOTIT); 881 md_create_unit_incore(MD_SID(un), &stripe_md_ops, 0); 882 gotsomething = 1; 883 } 884 } 885 886 if (!all_stripes_gotten) 887 return (gotsomething); 888 889 recid = mddb_makerecid(setno, 0); 890 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 891 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 892 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 893 894 return (0); 895 } 896 897 static int 898 stripe_halt(md_haltcmd_t cmd, set_t setno) 899 { 900 int i; 901 mdi_unit_t *ui; 902 minor_t mnum; 903 904 if (cmd == MD_HALT_CLOSE) 905 return (0); 906 907 if (cmd == MD_HALT_OPEN) 908 return (0); 909 910 if (cmd == MD_HALT_UNLOAD) 911 return (0); 912 913 if (cmd == MD_HALT_CHECK) { 914 for (i = 0; i < md_nunits; i++) { 915 mnum = MD_MKMIN(setno, i); 916 if ((ui = MDI_UNIT(mnum)) == NULL) 917 continue; 918 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 919 continue; 920 if (md_unit_isopen(ui)) 921 return (1); 922 } 923 return (0); 924 } 925 926 if (cmd != MD_HALT_DOIT) 927 return (1); 928 929 for (i = 0; i < md_nunits; i++) { 930 mnum = MD_MKMIN(setno, i); 931 if ((ui = MDI_UNIT(mnum)) == NULL) 932 continue; 933 if (ui->ui_opsindex != stripe_md_ops.md_selfindex) 934 continue; 935 reset_stripe((ms_unit_t *)MD_UNIT(mnum), mnum, 0); 936 } 937 938 return (0); 939 } 940 941 /*ARGSUSED3*/ 942 static int 943 stripe_open(dev_t *dev, int flag, int otyp, cred_t *cred_p, int md_oflags) 944 { 945 minor_t mnum = getminor(*dev); 946 mdi_unit_t *ui = MDI_UNIT(mnum); 947 ms_unit_t *un; 948 int err = 0; 949 set_t setno; 950 951 /* 952 * When doing an open of a multi owner metadevice, check to see if this 953 * node is a starting node and if a reconfig cycle is underway. 954 * If so, the system isn't sufficiently set up enough to handle the 955 * open (which involves I/O during sp_validate), so fail with ENXIO. 956 */ 957 setno = MD_MIN2SET(mnum); 958 if ((md_set[setno].s_status & (MD_SET_MNSET | MD_SET_MN_START_RC)) == 959 (MD_SET_MNSET | MD_SET_MN_START_RC)) { 960 return (ENXIO); 961 } 962 963 /* single thread */ 964 un = (ms_unit_t *)md_unit_openclose_enter(ui); 965 966 /* open devices, if necessary */ 967 if (! md_unit_isopen(ui) || (md_oflags & MD_OFLG_PROBEDEV)) { 968 if ((err = stripe_open_all_devs(un, md_oflags)) != 0) { 969 goto out; 970 } 971 } 972 973 /* count open */ 974 if ((err = md_unit_incopen(mnum, flag, otyp)) != 0) 975 goto out; 976 977 /* unlock, return success */ 978 out: 979 md_unit_openclose_exit(ui); 980 return (err); 981 } 982 983 /*ARGSUSED1*/ 984 static int 985 stripe_close( 986 dev_t dev, 987 int flag, 988 int otyp, 989 cred_t *cred_p, 990 int md_cflags 991 ) 992 { 993 minor_t mnum = getminor(dev); 994 mdi_unit_t *ui = MDI_UNIT(mnum); 995 ms_unit_t *un; 996 int err = 0; 997 998 /* single thread */ 999 un = (ms_unit_t *)md_unit_openclose_enter(ui); 1000 1001 /* count closed */ 1002 if ((err = md_unit_decopen(mnum, otyp)) != 0) 1003 goto out; 1004 1005 /* close devices, if necessary */ 1006 if (! md_unit_isopen(ui) || (md_cflags & MD_OFLG_PROBEDEV)) { 1007 stripe_close_all_devs(un, md_cflags); 1008 } 1009 1010 /* unlock, return success */ 1011 out: 1012 md_unit_openclose_exit(ui); 1013 return (err); 1014 } 1015 1016 1017 static struct buf dumpbuf; 1018 1019 /* 1020 * This routine dumps memory to the disk. It assumes that the memory has 1021 * already been mapped into mainbus space. It is called at disk interrupt 1022 * priority when the system is in trouble. 1023 * 1024 */ 1025 static int 1026 stripe_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1027 { 1028 ms_unit_t *un; 1029 buf_t *bp; 1030 ms_comp_t *mdc; 1031 u_longlong_t nb; 1032 diskaddr_t mapblk; 1033 int result; 1034 int more; 1035 int saveresult = 0; 1036 1037 /* 1038 * Don't need to grab the unit lock. 1039 * Cause nothing else is suppose to be happenning. 1040 * Also dump is not suppose to sleep. 1041 */ 1042 un = (ms_unit_t *)MD_UNIT(getminor(dev)); 1043 1044 if ((diskaddr_t)blkno >= un->c.un_total_blocks) 1045 return (EINVAL); 1046 1047 if ((diskaddr_t)blkno + nblk > un->c.un_total_blocks) 1048 return (EINVAL); 1049 1050 bp = &dumpbuf; 1051 nb = ldbtob(nblk); 1052 do { 1053 bzero((caddr_t)bp, sizeof (*bp)); 1054 more = md_mapbuf(un, (diskaddr_t)blkno, nb, bp, &mdc); 1055 nblk = btodb(bp->b_bcount); 1056 mapblk = bp->b_lblkno; 1057 if (!(mdc->un_mirror.ms_flags & MDM_S_NOWRITE)) { 1058 /* 1059 * bdev_dump() is currently only able to take 1060 * 32 bit wide blkno's. 1061 */ 1062 result = bdev_dump(bp->b_edev, addr, (daddr_t)mapblk, 1063 nblk); 1064 if (result) 1065 saveresult = result; 1066 } 1067 1068 nb -= bp->b_bcount; 1069 addr += bp->b_bcount; 1070 blkno += nblk; 1071 } while (more); 1072 1073 return (saveresult); 1074 } 1075 1076 /*ARGSUSED*/ 1077 static intptr_t 1078 stripe_shared_by_blk( 1079 md_dev64_t dev, 1080 void *junk, 1081 diskaddr_t blkno, 1082 u_longlong_t *cnt) 1083 { 1084 ms_unit_t *un; 1085 buf_t bp; 1086 ms_comp_t *comp; 1087 1088 un = MD_UNIT(md_getminor(dev)); 1089 (void) md_mapbuf(un, blkno, ldbtob(*cnt), &bp, &comp); 1090 *cnt = (u_longlong_t)lbtodb(bp.b_bcount); 1091 return ((intptr_t)&comp->un_mirror); 1092 } 1093 1094 /* 1095 * stripe_block_count_skip_size() returns the following values 1096 * so that the logical to physical block mappings can 1097 * be calculated without intimate knowledge of the underpinnings. 1098 * 1099 * block - first logical block number of the device. 1100 * block = [ # of blocks before THE row ] + 1101 * [ # of blocks in THE row before the component ] 1102 * count - # of segments (interlaced size). 1103 * skip - # of logical blocks between segments, or delta to 1104 * get to next segment 1105 * size - interlace size used for the block, count, skip. 1106 */ 1107 /*ARGSUSED*/ 1108 static intptr_t 1109 stripe_block_count_skip_size( 1110 md_dev64_t dev, 1111 void *junk, 1112 int ci, 1113 diskaddr_t *block, 1114 size_t *count, 1115 u_longlong_t *skip, 1116 u_longlong_t *size) 1117 { 1118 ms_unit_t *un; 1119 int row; 1120 struct ms_row *mdr; 1121 int cmpcount = 0; 1122 1123 un = MD_UNIT(md_getminor(dev)); 1124 1125 for (row = 0; row < un->un_nrows; row++) { 1126 mdr = &un->un_row[row]; 1127 if ((mdr->un_ncomp + cmpcount) > ci) 1128 break; 1129 cmpcount += mdr->un_ncomp; 1130 } 1131 ASSERT(row != un->un_nrows); 1132 1133 /* 1134 * Concatenations are always contiguous blocks, 1135 * you cannot depend on the interlace being a usable 1136 * value (except for stripes). 1137 */ 1138 if (mdr->un_ncomp == 1) { /* Concats */ 1139 *block = mdr->un_cum_blocks - mdr->un_blocks; 1140 *count = 1; 1141 *skip = 0; 1142 *size = mdr->un_blocks; 1143 } else { /* Stripes */ 1144 *block = (mdr->un_cum_blocks - mdr->un_blocks) + 1145 ((ci - cmpcount) * mdr->un_interlace); 1146 *count = (size_t)(mdr->un_blocks / (mdr->un_interlace 1147 * mdr->un_ncomp)); 1148 *skip = (mdr->un_interlace * mdr->un_ncomp) - mdr->un_interlace; 1149 *size = mdr->un_interlace; 1150 } 1151 1152 return (0); 1153 } 1154 1155 /*ARGSUSED*/ 1156 static intptr_t 1157 stripe_shared_by_indx(md_dev64_t dev, void *junk, int indx) 1158 { 1159 ms_unit_t *un; 1160 ms_comp_t *comp; 1161 1162 un = MD_UNIT(md_getminor(dev)); 1163 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1164 comp += indx; 1165 return ((intptr_t)&comp->un_mirror); 1166 } 1167 1168 /*ARGSUSED*/ 1169 intptr_t 1170 stripe_component_count(md_dev64_t dev, void *junk) 1171 { 1172 /* 1173 * See comments for stripe_get_dev 1174 */ 1175 1176 ms_unit_t *un; 1177 int count = 0; 1178 int row; 1179 1180 un = MD_UNIT(md_getminor(dev)); 1181 for (row = 0; row < un->un_nrows; row++) 1182 count += un->un_row[row].un_ncomp; 1183 return (count); 1184 } 1185 1186 /*ARGSUSED*/ 1187 intptr_t 1188 stripe_get_dev(md_dev64_t dev, void *junk, int indx, ms_cd_info_t *cd) 1189 { 1190 /* 1191 * It should be noted that stripe_replace in stripe_ioctl.c calls this 1192 * routine using makedevice(0, minor) for the first argument. 1193 * 1194 * If this routine at some point in the future needs to use the major 1195 * number stripe_replace must be changed. 1196 */ 1197 1198 ms_unit_t *un; 1199 ms_comp_t *comp; 1200 md_dev64_t tmpdev; 1201 1202 un = MD_UNIT(md_getminor(dev)); 1203 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1204 comp += indx; 1205 tmpdev = comp->un_dev; 1206 /* 1207 * Try to resolve devt again if NODEV64 1208 * Check if this comp is hotspared and if it is 1209 * then use key for hotspare 1210 */ 1211 if (tmpdev == NODEV64) { 1212 tmpdev = md_resolve_bydevid(md_getminor(dev), tmpdev, 1213 comp->un_mirror.ms_hs_id ? 1214 comp->un_mirror.ms_hs_key : 1215 comp->un_key); 1216 comp->un_dev = tmpdev; 1217 } 1218 1219 cd->cd_dev = comp->un_dev; 1220 cd->cd_orig_dev = comp->un_mirror.ms_orig_dev; 1221 return (0); 1222 } 1223 1224 /*ARGSUSED*/ 1225 void 1226 stripe_replace_done(md_dev64_t dev, sv_dev_t *sv) 1227 { 1228 /* 1229 * See comments for stripe_get_dev 1230 */ 1231 1232 minor_t mnum = md_getminor(dev); 1233 1234 if (sv != NULL) { 1235 md_rem_names(sv, 1); 1236 kmem_free(sv, sizeof (sv_dev_t)); 1237 } 1238 1239 md_unit_writerexit(MDI_UNIT(mnum)); 1240 } 1241 1242 /*ARGSUSED*/ 1243 intptr_t 1244 stripe_replace_dev(md_dev64_t dev, void *junk, int ci, ms_new_dev_t *nd, 1245 mddb_recid_t *recids, int nrecids, void (**replace_done)(), 1246 void **replace_data) 1247 { 1248 minor_t mnum; 1249 ms_unit_t *un; 1250 mdi_unit_t *ui; 1251 ms_comp_t *comp; 1252 diskaddr_t dev_size; 1253 int row; 1254 int ncomps = 0; 1255 int cmpcount = 0; 1256 int rid = 0; 1257 struct ms_row *mdr; 1258 sv_dev_t *sv = NULL; 1259 mddb_recid_t hs_id = 0; 1260 set_t setno; 1261 side_t side; 1262 md_dev64_t this_dev; 1263 1264 mnum = md_getminor(dev); 1265 ui = MDI_UNIT(mnum); 1266 setno = MD_MIN2SET(mnum); 1267 side = mddb_getsidenum(setno); 1268 1269 un = md_unit_writerlock(ui); 1270 1271 *replace_data = NULL; 1272 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1273 1274 comp += ci; 1275 1276 /* 1277 * Count the number of components 1278 */ 1279 for (row = 0; row < un->un_nrows; row++) { 1280 struct ms_row *mdr = &un->un_row[row]; 1281 ncomps += mdr->un_ncomp; 1282 } 1283 1284 recids[0] = 0; 1285 /* 1286 * No need of checking size of new device, 1287 * when hotsparing (it has already been done), or 1288 * when enabling the device. 1289 */ 1290 if ((nd != NULL) && (nd->nd_hs_id == 0)) { 1291 for (row = 0; row < un->un_nrows; row++) { 1292 mdr = &un->un_row[row]; 1293 if ((mdr->un_ncomp + cmpcount) > ci) 1294 break; 1295 cmpcount += mdr->un_ncomp; 1296 } 1297 ASSERT(row != un->un_nrows); 1298 1299 /* Concatenations have a ncomp = 1 */ 1300 dev_size = mdr->un_blocks / mdr->un_ncomp; 1301 1302 /* 1303 * now check to see if new comp can be used in 1304 * place of old comp 1305 */ 1306 if ((un->c.un_flag & MD_LABELED) && (ci == 0) && 1307 nd->nd_labeled) 1308 nd->nd_start_blk = 0; 1309 else 1310 nd->nd_nblks -= nd->nd_start_blk; 1311 1312 if (dev_size > nd->nd_nblks) { 1313 md_unit_writerexit(ui); 1314 return (MDE_COMP_TOO_SMALL); 1315 } 1316 1317 sv = (sv_dev_t *)kmem_alloc(sizeof (sv_dev_t), KM_SLEEP); 1318 sv->setno = MD_MIN2SET(mnum); 1319 sv->key = comp->un_key; 1320 } 1321 1322 /* 1323 * Close this component. 1324 */ 1325 if (comp->un_mirror.ms_flags & MDM_S_ISOPEN) { 1326 md_layered_close(comp->un_dev, MD_OFLG_NULL); 1327 comp->un_mirror.ms_flags &= ~MDM_S_ISOPEN; 1328 } 1329 1330 /* 1331 * If the component is hotspared, return to the pool. 1332 */ 1333 if (comp->un_mirror.ms_hs_id != 0) { 1334 hs_cmds_t cmd; 1335 mdkey_t hs_key; 1336 1337 hs_key = comp->un_mirror.ms_hs_key; 1338 comp->un_dev = comp->un_mirror.ms_orig_dev; 1339 comp->un_start_block = comp->un_mirror.ms_orig_blk; 1340 comp->un_mirror.ms_hs_key = 0; 1341 comp->un_mirror.ms_hs_id = 0; 1342 comp->un_mirror.ms_orig_dev = 0; 1343 1344 cmd = HS_FREE; 1345 if ((comp->un_mirror.ms_state != CS_OKAY) && 1346 (comp->un_mirror.ms_state != CS_RESYNC)) 1347 cmd = HS_BAD; 1348 (void) md_hot_spare_ifc(cmd, un->un_hsp_id, 0, 0, &hs_id, 1349 &hs_key, NULL, NULL); 1350 } 1351 1352 /* 1353 * Open by device id; for enable (indicated by a NULL 1354 * nd pointer), use the existing component info. For 1355 * replace, use the new device. 1356 */ 1357 if (nd == NULL) { 1358 this_dev = md_resolve_bydevid(mnum, comp->un_dev, comp->un_key); 1359 /* 1360 * If someone replaced a new disk in the same slot 1361 * we get NODEV64 since old device id cannot be 1362 * resolved. The new devt is obtained from the 1363 * mddb since devt is going to be unchanged for the 1364 * enable case. No need to check for multiple 1365 * keys here because the caller (comp_replace) 1366 * has already sanity checked it for us. 1367 */ 1368 if (this_dev == NODEV64) { 1369 this_dev = md_getdevnum(setno, side, comp->un_key, 1370 MD_TRUST_DEVT); 1371 } 1372 } else { 1373 /* 1374 * If this is a hotspare, save the original dev_t for later 1375 * use. If this has occured during boot then the value of 1376 * comp->un_dev will be NODEV64 because of the failure to look 1377 * up the devid of the device. 1378 */ 1379 if (nd->nd_hs_id != 0) 1380 comp->un_mirror.ms_orig_dev = comp->un_dev; 1381 this_dev = md_resolve_bydevid(mnum, nd->nd_dev, nd->nd_key); 1382 } 1383 1384 comp->un_dev = this_dev; 1385 1386 /* 1387 * Now open the new device if required. Note for a single component 1388 * stripe it will not be open - leave this for the mirror driver to 1389 * deal with. 1390 */ 1391 if (md_unit_isopen(ui)) { 1392 if (md_layered_open(mnum, &this_dev, MD_OFLG_NULL)) { 1393 mddb_recid_t ids[3]; 1394 1395 ids[0] = un->c.un_record_id; 1396 ids[1] = hs_id; 1397 ids[2] = 0; 1398 mddb_commitrecs_wrapper(ids); 1399 if ((nd != NULL) && (nd->nd_hs_id != 0)) { 1400 /* 1401 * Revert back to the original device. 1402 */ 1403 comp->un_dev = comp->un_mirror.ms_orig_dev; 1404 1405 cmn_err(CE_WARN, 1406 "md: %s: open error of hotspare %s", 1407 md_shortname(mnum), 1408 md_devname(MD_MIN2SET(mnum), nd->nd_dev, 1409 NULL, 0)); 1410 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OPEN_FAIL, 1411 SVM_TAG_HS, MD_MIN2SET(mnum), nd->nd_dev); 1412 } 1413 md_unit_writerexit(ui); 1414 return (MDE_COMP_OPEN_ERR); 1415 } 1416 if (nd != NULL) 1417 nd->nd_dev = this_dev; 1418 1419 comp->un_mirror.ms_flags |= MDM_S_ISOPEN; 1420 } 1421 1422 if (nd == NULL) { 1423 recids[0] = un->c.un_record_id; 1424 recids[1] = hs_id; 1425 recids[2] = 0; 1426 *replace_done = stripe_replace_done; 1427 return (0); 1428 } 1429 1430 /* if hot sparing this device */ 1431 if (nd->nd_hs_id != 0) { 1432 char devname[MD_MAX_CTDLEN]; 1433 char hs_devname[MD_MAX_CTDLEN]; 1434 set_t setno; 1435 1436 comp->un_mirror.ms_hs_id = nd->nd_hs_id; 1437 comp->un_mirror.ms_hs_key = nd->nd_key; 1438 1439 comp->un_mirror.ms_orig_blk = comp->un_start_block; 1440 1441 setno = MD_MIN2SET(mnum); 1442 1443 (void) md_devname(setno, comp->un_mirror.ms_orig_dev, devname, 1444 sizeof (devname)); 1445 (void) md_devname(setno, nd->nd_dev, hs_devname, 1446 sizeof (hs_devname)); 1447 1448 cmn_err(CE_NOTE, "md: %s: hotspared device %s with %s", 1449 md_shortname(mnum), devname, hs_devname); 1450 1451 } else { /* replacing the device */ 1452 comp->un_key = nd->nd_key; 1453 *replace_data = (void *)sv; 1454 1455 /* 1456 * For the old device, make sure to reset the parent 1457 * if it's a metadevice. 1458 */ 1459 if (md_getmajor(comp->un_dev) == md_major) { 1460 minor_t comp_mnum = md_getminor(comp->un_dev); 1461 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1462 1463 md_reset_parent(comp->un_dev); 1464 recids[rid++] = MD_RECID(comp_un); 1465 } 1466 } 1467 1468 comp->un_dev = nd->nd_dev; 1469 comp->un_start_block = nd->nd_start_blk; 1470 1471 /* 1472 * For the new device, make sure to set the parent if it's a 1473 * metadevice. 1474 * 1475 * If we ever support using metadevices as hot spares, this 1476 * will need to be tested, and possibly moved into the 1477 * preceding "else" clause, immediately following the parent 1478 * reset block. For now, it's convenient to leave it here and 1479 * only compress nd->nd_dev once. 1480 */ 1481 if (md_getmajor(comp->un_dev) == md_major) { 1482 minor_t comp_mnum = md_getminor(comp->un_dev); 1483 md_unit_t *comp_un = MD_UNIT(comp_mnum); 1484 1485 md_set_parent(comp->un_dev, MD_SID(un)); 1486 recids[rid++] = MD_RECID(comp_un); 1487 } 1488 1489 recids[rid++] = un->c.un_record_id; 1490 recids[rid++] = hs_id; 1491 recids[rid] = 0; 1492 *replace_done = stripe_replace_done; 1493 return (0); 1494 } 1495 1496 /*ARGSUSED*/ 1497 static intptr_t 1498 stripe_hotspare_dev( 1499 md_dev64_t dev, 1500 void *junk, 1501 int ci, 1502 mddb_recid_t *recids, 1503 int nrecids, 1504 void (**replace_done)(), 1505 void **replace_data) 1506 { 1507 ms_unit_t *un; 1508 mdi_unit_t *ui; 1509 ms_comp_t *comp; 1510 int row; 1511 struct ms_row *mdr; 1512 ms_new_dev_t nd; 1513 int err; 1514 int i; 1515 minor_t mnum; 1516 set_t setno; 1517 int cmpcount = 0; 1518 1519 mnum = md_getminor(dev); 1520 ui = MDI_UNIT(mnum); 1521 un = MD_UNIT(mnum); 1522 setno = MD_MIN2SET(mnum); 1523 1524 if (md_get_setstatus(setno) & MD_SET_STALE) 1525 return (1); 1526 1527 if (un->un_hsp_id == -1) 1528 return (1); 1529 1530 for (row = 0; row < un->un_nrows; row++) { 1531 mdr = &un->un_row[row]; 1532 if ((mdr->un_ncomp + cmpcount) > ci) 1533 break; 1534 cmpcount += mdr->un_ncomp; 1535 } 1536 ASSERT(row != un->un_nrows); 1537 1538 comp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]); 1539 comp += ci; 1540 /* Concatenations have a ncomp = 1 */ 1541 nd.nd_nblks = mdr->un_blocks / mdr->un_ncomp; 1542 1543 if ((un->c.un_flag & MD_LABELED) && (ci == 0)) 1544 nd.nd_labeled = 1; 1545 else 1546 nd.nd_labeled = 0; 1547 1548 again: 1549 err = md_hot_spare_ifc(HS_GET, un->un_hsp_id, nd.nd_nblks, 1550 nd.nd_labeled, &nd.nd_hs_id, &nd.nd_key, &nd.nd_dev, 1551 &nd.nd_start_blk); 1552 1553 if (err) { 1554 if (!stripe_replace_dev(dev, junk, ci, NULL, recids, nrecids, 1555 replace_done, replace_data)) { 1556 mddb_commitrecs_wrapper(recids); 1557 md_unit_writerexit(ui); 1558 } 1559 recids[0] = 0; 1560 return (1); 1561 } 1562 1563 if (stripe_replace_dev(dev, junk, ci, &nd, recids, nrecids, 1564 replace_done, replace_data)) { 1565 1566 (void) md_hot_spare_ifc(HS_BAD, un->un_hsp_id, 0, 0, 1567 &nd.nd_hs_id, &nd.nd_key, NULL, NULL); 1568 mddb_commitrec_wrapper(nd.nd_hs_id); 1569 goto again; 1570 } 1571 1572 /* Leave a slot for the null recid */ 1573 for (i = 0; i < (nrecids - 1); i++) { 1574 if (recids[i] == 0) { 1575 recids[i++] = nd.nd_hs_id; 1576 recids[i] = 0; 1577 } 1578 } 1579 return (0); 1580 } 1581 1582 static int 1583 stripe_imp_set( 1584 set_t setno 1585 ) 1586 { 1587 1588 mddb_recid_t recid; 1589 int i, row, c, gotsomething; 1590 mddb_type_t typ1; 1591 mddb_de_ic_t *dep; 1592 mddb_rb32_t *rbp; 1593 ms_unit32_od_t *un32; 1594 ms_unit_t *un64; 1595 minor_t *self_id; /* minor needs to be updated */ 1596 md_parent_t *parent_id; /* parent needs to be updated */ 1597 mddb_recid_t *record_id; /* record id needs to be updated */ 1598 mddb_recid_t *hsp_id; 1599 ms_comp32_od_t *comp32; 1600 ms_comp_t *comp64; 1601 1602 1603 gotsomething = 0; 1604 1605 typ1 = (mddb_type_t)md_getshared_key(setno, 1606 stripe_md_ops.md_driver.md_drivername); 1607 recid = mddb_makerecid(setno, 0); 1608 1609 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) { 1610 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1611 continue; 1612 1613 dep = mddb_getrecdep(recid); 1614 rbp = dep->de_rb; 1615 1616 if (rbp->rb_revision == MDDB_REV_RB) { 1617 /* 1618 * Small device 1619 */ 1620 un32 = (ms_unit32_od_t *)mddb_getrecaddr(recid); 1621 self_id = &(un32->c.un_self_id); 1622 parent_id = &(un32->c.un_parent); 1623 record_id = &(un32->c.un_record_id); 1624 hsp_id = &(un32->un_hsp_id); 1625 1626 comp32 = (ms_comp32_od_t *)((void *)&((char *)un32) 1627 [un32->un_ocomp]); 1628 for (row = 0; row < un32->un_nrows; row++) { 1629 struct ms_row32_od *mdr = &un32->un_row[row]; 1630 for (i = 0, c = mdr->un_icomp; 1631 i < mdr->un_ncomp; i++) { 1632 ms_comp32_od_t *mdc; 1633 mdc = &comp32[c++]; 1634 1635 if (!md_update_minor(setno, mddb_getsidenum 1636 (setno), mdc->un_key)) 1637 goto out; 1638 1639 if (mdc->un_mirror.ms_hs_id != 0) 1640 mdc->un_mirror.ms_hs_id = MAKERECID( 1641 setno, mdc->un_mirror.ms_hs_id); 1642 } 1643 } 1644 } else { 1645 un64 = (ms_unit_t *)mddb_getrecaddr(recid); 1646 self_id = &(un64->c.un_self_id); 1647 parent_id = &(un64->c.un_parent); 1648 record_id = &(un64->c.un_record_id); 1649 hsp_id = &(un64->un_hsp_id); 1650 1651 comp64 = (ms_comp_t *)((void *)&((char *)un64) 1652 [un64->un_ocomp]); 1653 for (row = 0; row < un64->un_nrows; row++) { 1654 struct ms_row *mdr = &un64->un_row[row]; 1655 for (i = 0, c = mdr->un_icomp; 1656 i < mdr->un_ncomp; i++) { 1657 ms_comp_t *mdc; 1658 mdc = &comp64[c++]; 1659 1660 if (!md_update_minor(setno, mddb_getsidenum 1661 (setno), mdc->un_key)) 1662 goto out; 1663 1664 if (mdc->un_mirror.ms_hs_id != 0) 1665 mdc->un_mirror.ms_hs_id = MAKERECID( 1666 setno, mdc->un_mirror.ms_hs_id); 1667 } 1668 } 1669 } 1670 1671 /* 1672 * Update unit with the imported setno 1673 * 1674 */ 1675 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1676 1677 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1678 1679 if (*hsp_id != -1) 1680 *hsp_id = MAKERECID(setno, DBID(*hsp_id)); 1681 1682 if (*parent_id != MD_NO_PARENT) 1683 *parent_id = MD_MKMIN(setno, MD_MIN2UNIT(*parent_id)); 1684 *record_id = MAKERECID(setno, DBID(*record_id)); 1685 1686 gotsomething = 1; 1687 } 1688 1689 out: 1690 return (gotsomething); 1691 } 1692 1693 static md_named_services_t stripe_named_services[] = { 1694 {stripe_shared_by_blk, "shared by blk" }, 1695 {stripe_shared_by_indx, "shared by indx" }, 1696 {stripe_component_count, "get component count" }, 1697 {stripe_block_count_skip_size, "get block count skip size" }, 1698 {stripe_get_dev, "get device" }, 1699 {stripe_replace_dev, "replace device" }, 1700 {stripe_hotspare_dev, "hotspare device" }, 1701 {stripe_rename_check, MDRNM_CHECK }, 1702 {NULL, 0} 1703 }; 1704 1705 md_ops_t stripe_md_ops = { 1706 stripe_open, /* open */ 1707 stripe_close, /* close */ 1708 md_stripe_strategy, /* strategy */ 1709 NULL, /* print */ 1710 stripe_dump, /* dump */ 1711 NULL, /* read */ 1712 NULL, /* write */ 1713 md_stripe_ioctl, /* stripe_ioctl, */ 1714 stripe_snarf, /* stripe_snarf */ 1715 stripe_halt, /* stripe_halt */ 1716 NULL, /* aread */ 1717 NULL, /* awrite */ 1718 stripe_imp_set, /* import set */ 1719 stripe_named_services 1720 }; 1721 1722 static void 1723 init_init() 1724 { 1725 md_stripe_mcs_buf_off = sizeof (md_scs_t) - sizeof (buf_t); 1726 1727 stripe_parent_cache = kmem_cache_create("md_stripe_parent", 1728 sizeof (md_sps_t), 0, stripe_parent_constructor, 1729 stripe_parent_destructor, stripe_run_queue, NULL, NULL, 1730 0); 1731 stripe_child_cache = kmem_cache_create("md_stripe_child", 1732 sizeof (md_scs_t) - sizeof (buf_t) + biosize(), 0, 1733 stripe_child_constructor, stripe_child_destructor, 1734 stripe_run_queue, NULL, NULL, 0); 1735 } 1736 1737 static void 1738 fini_uninit() 1739 { 1740 kmem_cache_destroy(stripe_parent_cache); 1741 kmem_cache_destroy(stripe_child_cache); 1742 stripe_parent_cache = stripe_child_cache = NULL; 1743 } 1744 1745 /* define the module linkage */ 1746 MD_PLUGIN_MISC_MODULE("stripes module %I%", init_init(), fini_uninit()) 1747