1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/conf.h> 31 #include <sys/debug.h> 32 #include <sys/file.h> 33 #include <sys/user.h> 34 #include <sys/uio.h> 35 #include <sys/dkio.h> 36 #include <sys/vtoc.h> 37 #include <sys/kmem.h> 38 #include <vm/page.h> 39 #include <sys/cmn_err.h> 40 #include <sys/sysmacros.h> 41 #include <sys/types.h> 42 #include <sys/mkdev.h> 43 #include <sys/stat.h> 44 #include <sys/open.h> 45 #include <sys/modctl.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/disp.h> 49 #include <sys/buf.h> 50 51 #include <sys/lvm/mdvar.h> 52 #include <sys/lvm/md_trans.h> 53 #include <sys/lvm/md_notify.h> 54 #include <sys/lvm/md_convert.h> 55 56 #include <sys/sysevent/eventdefs.h> 57 #include <sys/sysevent/svm.h> 58 59 md_ops_t trans_md_ops; 60 #ifndef lint 61 char _depends_on[] = "drv/md fs/ufs"; 62 md_ops_t *md_interface_ops = &trans_md_ops; 63 #endif /* lint */ 64 65 extern unit_t md_nunits; 66 extern set_t md_nsets; 67 extern md_set_t md_set[]; 68 extern int md_status; 69 extern major_t md_major; 70 71 extern int md_trans_ioctl(); 72 extern md_krwlock_t md_unit_array_rw; 73 74 extern mdq_anchor_t md_done_daemon; 75 76 extern int md_in_upgrade; 77 78 static kmem_cache_t *trans_parent_cache = NULL; 79 kmem_cache_t *trans_child_cache = NULL; 80 81 #ifdef DEBUG 82 /* 83 * ROUTINES FOR TESTING: 84 */ 85 static int 86 _init_debug() 87 { 88 extern int _init_ioctl(); 89 90 return (_init_ioctl()); 91 } 92 static int 93 _fini_debug() 94 { 95 extern int _fini_ioctl(); 96 int err = 0; 97 98 err = _fini_ioctl(); 99 return (err); 100 } 101 102 #endif /* DEBUG */ 103 104 /* 105 * BEGIN RELEASE DEBUG 106 * The following routines remain in the released product for testability 107 */ 108 int 109 trans_done_shadow(buf_t *bp) 110 { 111 buf_t *pb; 112 md_tps_t *ps = (md_tps_t *)bp->b_chain; 113 int rv = 0; 114 115 pb = ps->ps_bp; 116 mutex_enter(&ps->ps_mx); 117 ps->ps_count--; 118 if (ps->ps_count > 0) { 119 if ((bp->b_flags & B_ERROR) != 0) { 120 pb->b_flags |= B_ERROR; 121 pb->b_error = bp->b_error; 122 } 123 mutex_exit(&ps->ps_mx); 124 kmem_cache_free(trans_child_cache, bp); 125 } else { 126 mutex_exit(&ps->ps_mx); 127 mutex_destroy(&ps->ps_mx); 128 rv = trans_done(bp); 129 } 130 return (rv); 131 } 132 133 static void 134 shadow_debug(mt_unit_t *un, /* trans unit info */ 135 buf_t *pb, /* primary buffer */ 136 md_tps_t *ps, /* trans parent save */ 137 buf_t *cb, /* buffer for writing to master */ 138 int flag, 139 void *private) 140 { 141 buf_t *sb; /* Shadow buffer */ 142 143 mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); 144 ps->ps_count = 2; /* Write child buffer & shadow */ 145 cb->b_iodone = trans_done_shadow; 146 sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 147 trans_child_init(sb); 148 sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), 149 pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); 150 151 sb->b_flags |= B_ASYNC; 152 sb->b_chain = (void *)ps; 153 md_call_strategy(sb, flag | MD_STR_MAPPED, private); 154 } 155 /* 156 * END RELEASE DEBUG 157 */ 158 159 /* 160 * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) 161 */ 162 void * 163 md_trans_zalloc(size_t nb) 164 { 165 TRANSSTATS(ts_trans_zalloc); 166 TRANSSTATSADD(ts_trans_alloced, nb); 167 return (kmem_zalloc(nb, KM_SLEEP)); 168 } 169 void * 170 md_trans_alloc(size_t nb) 171 { 172 TRANSSTATS(ts_trans_alloc); 173 TRANSSTATSADD(ts_trans_alloced, nb); 174 return (kmem_alloc(nb, KM_SLEEP)); 175 } 176 void 177 md_trans_free(void *va, size_t nb) 178 { 179 TRANSSTATS(ts_trans_free); 180 TRANSSTATSADD(ts_trans_freed, nb); 181 if (nb) 182 kmem_free(va, nb); 183 } 184 185 static void 186 trans_parent_init(md_tps_t *ps) 187 { 188 bzero(ps, sizeof (md_tps_t)); 189 } 190 191 /*ARGSUSED1*/ 192 int 193 trans_child_constructor(void *p, void *d1, int d2) 194 { 195 bioinit(p); 196 return (0); 197 } 198 199 void 200 trans_child_init(struct buf *bp) 201 { 202 md_bioreset(bp); 203 } 204 205 /*ARGSUSED1*/ 206 void 207 trans_child_destructor(void *p, void *d) 208 { 209 biofini(p); 210 } 211 212 void 213 trans_commit(mt_unit_t *un, int domstr) 214 { 215 mddb_recid_t recids[4]; 216 md_unit_t *su; 217 int ri = 0; 218 219 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) 220 return; 221 222 recids[ri++] = un->c.un_record_id; 223 224 if (domstr) 225 if (md_getmajor(un->un_m_dev) == md_major) { 226 su = MD_UNIT(md_getminor(un->un_m_dev)); 227 recids[ri++] = su->c.un_record_id; 228 } 229 230 if (ri == 0) 231 return; 232 recids[ri] = 0; 233 234 uniqtime32(&un->un_timestamp); 235 mddb_commitrecs_wrapper(recids); 236 } 237 238 void 239 trans_close_all_devs(mt_unit_t *un) 240 { 241 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 242 md_layered_close(un->un_m_dev, MD_OFLG_NULL); 243 if (un->un_l_unit) 244 ldl_close_dev(un->un_l_unit); 245 un->un_flags |= TRANS_NEED_OPEN; 246 } 247 } 248 249 int 250 trans_open_all_devs(mt_unit_t *un) 251 { 252 int err; 253 minor_t mnum = MD_SID(un); 254 md_dev64_t tmpdev = un->un_m_dev; 255 set_t setno = MD_MIN2SET(MD_SID(un)); 256 side_t side = mddb_getsidenum(setno); 257 258 /* 259 * Do the open by device id if it is regular device 260 */ 261 if ((md_getmajor(tmpdev) != md_major) && 262 md_devid_found(setno, side, un->un_m_key) == 1) { 263 tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); 264 } 265 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); 266 un->un_m_dev = tmpdev; 267 268 if (err) 269 return (ENXIO); 270 271 if (un->un_l_unit) { 272 err = ldl_open_dev(un, un->un_l_unit); 273 if (err) { 274 md_layered_close(tmpdev, MD_OFLG_NULL); 275 return (ENXIO); 276 } 277 } 278 return (0); 279 } 280 281 uint_t mt_debug = 0; 282 283 int 284 trans_build_incore(void *p, int snarfing) 285 { 286 mt_unit_t *un = (mt_unit_t *)p; 287 minor_t mnum; 288 set_t setno; 289 290 /* 291 * initialize debug mode and always start with no shadowing. 292 */ 293 if (!snarfing) 294 un->un_debug = mt_debug; 295 un->un_s_dev = NODEV64; 296 297 mnum = MD_SID(un); 298 299 if (MD_UNIT(mnum) != NULL) 300 return (0); 301 302 setno = MD_MIN2SET(mnum); 303 304 /* 305 * If snarfing the metatrans device, 306 * then remake the device number 307 */ 308 if (snarfing) { 309 un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), 310 un->un_m_key, MD_NOTRUST_DEVT); 311 } 312 313 /* 314 * db rec is partially deleted; finish the db delete later 315 */ 316 if (MD_STATUS(un) & MD_UN_BEING_RESET) { 317 mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 318 return (1); 319 } 320 321 /* 322 * With the current device id implementation there is possibility 323 * that we may have NODEV if the underlying can't be resolved at 324 * snarf time. If this is the case we want to be consistent with 325 * the normal behavior and continue to allow the snarf of unit 326 * and resolve the devt at the open time 327 */ 328 if ((md_getmajor(un->un_m_dev) == md_major) && 329 (md_dev_exists(un->un_m_dev) == 0)) { 330 return (1); 331 } 332 333 /* 334 * retain the detach status; reset open status 335 */ 336 un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); 337 un->un_flags |= TRANS_NEED_OPEN; 338 if ((un->un_flags & TRANS_DETACHED) == 0) 339 un->un_flags |= TRANS_ATTACHING; 340 341 /* 342 * log device not set up yet; try again later 343 */ 344 if ((un->un_flags & TRANS_DETACHED) == 0) 345 if (ldl_findlog(un->un_l_recid) == NULL) 346 return (1); 347 348 /* 349 * initialize incore fields 350 */ 351 un->un_next = NULL; 352 un->un_l_unit = NULL; 353 un->un_deltamap = NULL; 354 un->un_udmap = NULL; 355 un->un_logmap = NULL; 356 un->un_matamap = NULL; 357 un->un_shadowmap = NULL; 358 un->un_ut = NULL; 359 un->un_logreset = 0; 360 un->un_dev = md_makedevice(md_major, mnum); 361 MD_STATUS(un) = 0; 362 363 /* necessary because capability didn't exist pre-4.1 */ 364 MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); 365 366 /* 367 * attach the log 368 */ 369 trans_attach(un, 0); 370 371 /* 372 * check for master dev dynconcat 373 */ 374 if (md_getmajor(un->un_m_dev) == md_major) { 375 struct mdc_unit *c; 376 377 c = MD_UNIT(md_getminor(un->un_m_dev)); 378 un->c.un_total_blocks = c->un_total_blocks; 379 } 380 381 MD_UNIT(mnum) = un; 382 383 return (0); 384 } 385 386 int 387 trans_detach(mt_unit_t *un, int force) 388 { 389 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 390 int error = 0; 391 392 /* 393 * The caller is responsible for single-threading this routine. 394 */ 395 396 if (ui == NULL) 397 return (0); 398 399 /* 400 * already detached or the log isn't attached yet; do nothing 401 */ 402 if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) 403 return (0); 404 405 /* 406 * set state to detaching 407 */ 408 if (force || !md_unit_isopen(ui)) { 409 un->un_flags |= TRANS_DETACHING; 410 if (!MD_UPGRADE) { 411 trans_commit(un, 0); 412 } 413 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, 414 MD_UN2SET(un), MD_SID(un)); 415 } 416 417 /* 418 * device is busy 419 */ 420 if (md_unit_isopen(ui)) 421 return (EBUSY); 422 423 /* 424 * detach the log 425 * if successful 426 * flags committed to TRANS_DETACHED in database 427 * un->un_l_unit set to NULL 428 * no error returned 429 */ 430 error = ldl_reset(un, 1, force); 431 if (error) 432 return (error); 433 434 /* 435 * commit to database 436 */ 437 if (!MD_UPGRADE) { 438 trans_commit(un, 0); 439 } 440 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), 441 MD_SID(un)); 442 443 return (0); 444 } 445 446 void 447 trans_attach(mt_unit_t *un, int attaching) 448 { 449 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 450 ml_unit_t *ul; 451 452 /* 453 * called from snarf, set, and attach. Hence, the attaching param 454 * The caller is responsible for single-threading this routine. 455 */ 456 457 /* 458 * not attaching; do nothing 459 */ 460 if ((un->un_flags & TRANS_ATTACHING) == 0) 461 return; 462 463 /* 464 * find log unit struct 465 */ 466 ul = ldl_findlog(un->un_l_recid); 467 if (ul == NULL) 468 return; 469 un->un_l_dev = ul->un_dev; 470 471 /* 472 * device is busy; do nothing 473 */ 474 if (attaching && md_unit_isopen(ui)) 475 return; 476 /* 477 * other functions use non-NULL un_l_unit as detach/attach flag 478 */ 479 un->un_l_unit = ul; 480 481 /* 482 * add metatrans device to the log's list of mt devices 483 */ 484 ldl_utadd(un); 485 486 /* 487 * attached 488 */ 489 un->un_flags &= ~TRANS_ATTACHING; 490 491 } 492 493 int 494 trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) 495 { 496 sv_dev_t sv; 497 mddb_recid_t vtoc_id; 498 int error = 0; 499 500 /* 501 * reset log, maps, and ufs interface 502 */ 503 error = ldl_reset(un, removing, force); 504 if (error) 505 return (error); 506 507 /* 508 * done with underyling devices 509 */ 510 trans_close_all_devs(un); 511 512 md_destroy_unit_incore(mnum, &trans_md_ops); 513 514 MD_UNIT(mnum) = NULL; 515 516 if (!removing) 517 return (0); 518 519 md_reset_parent(un->un_m_dev); 520 MD_STATUS(un) |= MD_UN_BEING_RESET; 521 trans_commit(un, 1); 522 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), 523 MD_SID(un)); 524 525 /* Save the mstr key */ 526 sv.setno = MD_MIN2SET(mnum); 527 sv.key = un->un_m_key; 528 529 vtoc_id = un->c.un_vtoc_id; 530 531 mddb_deleterec_wrapper(un->c.un_record_id); 532 533 /* Remove the vtoc, if present */ 534 if (vtoc_id) 535 mddb_deleterec_wrapper(vtoc_id); 536 md_rem_names(&sv, 1); 537 return (0); 538 } 539 540 static void 541 trans_wait_panic(struct buf *cb) 542 { 543 while ((cb->b_flags & B_DONE) == 0) { 544 md_daemon(1, &md_done_daemon); 545 drv_usecwait(10); 546 } 547 } 548 549 static void 550 trans_error(md_tps_t *ps) 551 { 552 md_dev64_t md_dev; 553 md_dev64_t m_dev; 554 char *str; 555 struct buf *pb; 556 mdi_unit_t *ui; 557 558 pb = ps->ps_bp; 559 ui = ps->ps_ui; 560 561 /* 562 * gather up params for cmn_err 563 */ 564 if (pb->b_flags & B_READ) 565 str = "read"; 566 else 567 str = "write"; 568 md_dev = md_expldev(pb->b_edev); 569 m_dev = ps->ps_un->un_m_dev; 570 571 /* 572 * free up the resources for this request and done the errored buf 573 */ 574 md_kstat_done(ui, pb, 0); 575 kmem_cache_free(trans_parent_cache, ps); 576 md_unit_readerexit(ui); 577 md_biodone(pb); 578 579 /* 580 * print pretty error message 581 */ 582 cmn_err(CE_WARN, "md: %s: %s error on %s", 583 md_shortname(md_getminor(md_dev)), str, 584 md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); 585 } 586 587 int 588 trans_done(struct buf *cb) 589 { 590 struct buf *pb; 591 mdi_unit_t *ui; 592 md_tps_t *ps; 593 594 ps = (md_tps_t *)cb->b_chain; 595 pb = ps->ps_bp; 596 ui = ps->ps_ui; 597 598 if (cb->b_flags & B_ERROR) { 599 pb->b_flags |= B_ERROR; 600 pb->b_error = cb->b_error; 601 /* 602 * device not in hard error state; report error 603 */ 604 if (!ldl_isherror(ps->ps_un->un_l_unit)) { 605 daemon_request(&md_done_daemon, trans_error, 606 (daemon_queue_t *)ps, REQ_OLD); 607 608 if (cb->b_flags & B_REMAPPED) 609 bp_mapout(cb); 610 if (panicstr) 611 cb->b_flags |= B_DONE; 612 else 613 kmem_cache_free(trans_child_cache, cb); 614 615 return (1); 616 } 617 } 618 619 if (cb->b_flags & B_REMAPPED) 620 bp_mapout(cb); 621 622 if (panicstr) 623 cb->b_flags |= B_DONE; 624 else 625 kmem_cache_free(trans_child_cache, cb); 626 kmem_cache_free(trans_parent_cache, ps); 627 md_kstat_done(ui, pb, 0); 628 md_unit_readerexit(ui); 629 md_biodone(pb); 630 631 return (0); 632 } 633 634 static void 635 md_trans_strategy(buf_t *pb, int flag, void *private) 636 { 637 md_tps_t *ps; 638 buf_t *cb; /* child buf pointer */ 639 mt_unit_t *un; 640 mdi_unit_t *ui; 641 642 ui = MDI_UNIT(getminor(pb->b_edev)); 643 644 md_kstat_waitq_enter(ui); 645 646 un = (mt_unit_t *)md_unit_readerlock(ui); 647 648 if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { 649 pb->b_flags |= B_ERROR; 650 pb->b_error = ENXIO; 651 pb->b_resid = pb->b_bcount; 652 md_kstat_waitq_exit(ui); 653 md_unit_readerexit(ui); 654 biodone(pb); 655 return; 656 } 657 658 ASSERT(!(flag & MD_STR_NOTTOP)); 659 660 /* check and map */ 661 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 662 md_kstat_waitq_exit(ui); 663 return; 664 } 665 666 bp_mapin(pb); 667 668 ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); 669 trans_parent_init(ps); 670 671 /* 672 * Save essential information from the original buffhdr 673 * in the md_save structure. 674 */ 675 ps->ps_un = un; 676 ps->ps_ui = ui; 677 ps->ps_bp = pb; 678 679 cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 680 trans_child_init(cb); 681 682 cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), 683 pb->b_blkno, trans_done, cb, KM_NOSLEEP); 684 685 cb->b_chain = (void *)ps; 686 687 /* 688 * RELEASE DEBUG 689 * The following calls shadow debug for testing purposes if we are 690 * writing and if shadowing is turned on. 691 */ 692 if ((un->un_s_dev != NODEV64) && 693 ((pb->b_flags & B_READ) == 0)) 694 shadow_debug(un, pb, ps, cb, flag, private); 695 696 md_kstat_waitq_to_runq(ui); 697 698 (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); 699 700 /* 701 * panic in progress; process daemon queues 702 */ 703 if (panicstr) { 704 trans_wait_panic(cb); 705 kmem_cache_free(trans_child_cache, cb); 706 } 707 } 708 709 /* ARGSUSED */ 710 static int 711 md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) 712 { 713 int error; 714 715 if ((error = md_chk_uio(uio)) != 0) 716 return (error); 717 718 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 719 } 720 721 /* ARGSUSED */ 722 static int 723 md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 724 { 725 int error; 726 727 if ((error = md_chk_uio(aio->aio_uio)) != 0) 728 return (error); 729 730 return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); 731 } 732 733 /* ARGSUSED */ 734 static int 735 md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) 736 { 737 int error; 738 739 if ((error = md_chk_uio(uio)) != 0) 740 return (error); 741 742 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 743 } 744 745 /* ARGSUSED */ 746 static int 747 md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 748 { 749 int error; 750 751 if ((error = md_chk_uio(aio->aio_uio)) != 0) 752 return (error); 753 754 return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); 755 } 756 757 static void 758 trans_cleanup(mt_unit_t *un) 759 { 760 sv_dev_t sv; 761 762 MD_STATUS(un) |= MD_UN_LOG_DELETED; 763 trans_commit(un, 0); 764 765 /* Save the mstr key */ 766 sv.setno = MD_UN2SET(un); 767 sv.key = un->un_m_key; 768 769 mddb_deleterec_wrapper(un->c.un_record_id); 770 771 md_rem_names(&sv, 1); 772 } 773 774 static int 775 trans_snarf(md_snarfcmd_t cmd, set_t setno) 776 { 777 mt_unit_t *un; 778 ml_unit_t *ul; 779 mddb_recid_t recid; 780 int gotsomething; 781 mddb_type_t typ1; 782 int all_trans_gotten; 783 mddb_de_ic_t *dep; 784 mddb_rb32_t *rbp; 785 size_t newreqsize; 786 static int trans_found = 0; 787 788 789 790 if (cmd == MD_SNARF_CLEANUP) { 791 792 if (md_get_setstatus(setno) & MD_SET_STALE) 793 return (0); 794 795 /* 796 * clean up partially cleared trans devices 797 */ 798 typ1 = (mddb_type_t)md_getshared_key(setno, 799 trans_md_ops.md_driver.md_drivername); 800 recid = mddb_makerecid(setno, 0); 801 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 802 un = (mt_unit_t *)mddb_getrecaddr(recid); 803 (void) trans_detach(un, 1); 804 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 805 trans_cleanup(un); 806 recid = mddb_makerecid(setno, 0); 807 } 808 } 809 /* 810 * clean up partially cleared log devices 811 */ 812 recid = mddb_makerecid(setno, 0); 813 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 814 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 815 ul = (ml_unit_t *)mddb_getrecaddr(recid); 816 ldl_cleanup(ul); 817 recid = mddb_makerecid(setno, 0); 818 } 819 } 820 821 return (0); 822 } 823 824 /* 825 * must snarf up the log devices first 826 */ 827 gotsomething = 0; 828 all_trans_gotten = 1; 829 typ1 = (mddb_type_t)md_getshared_key(setno, 830 trans_md_ops.md_driver.md_drivername); 831 recid = mddb_makerecid(setno, 0); 832 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 833 ml_unit_t *big_ul; 834 ml_unit32_od_t *small_ul; 835 836 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 837 continue; 838 839 small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); 840 dep = mddb_getrecdep(recid); 841 dep->de_flags = MDDB_F_TRANS_LOG; 842 rbp = dep->de_rb; 843 /* 844 * As trans records are always old records, 845 * we have to check if this record already has been converted. 846 * We don't want to do that work twice. 847 */ 848 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 849 newreqsize = sizeof (ml_unit_t); 850 big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 851 trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, 852 SMALL_2_BIG); 853 kmem_free(small_ul, dep->de_reqsize); 854 /* 855 * Update userdata and incore userdata 856 * incores are at the end of ul 857 */ 858 dep->de_rb_userdata_ic = big_ul; 859 dep->de_rb_userdata = big_ul; 860 dep->de_icreqsize = newreqsize; 861 rbp->rb_private |= MD_PRV_CONVD; 862 ul = big_ul; 863 } else { 864 /* already converted, just set the pointer */ 865 ul = dep->de_rb_userdata; 866 } 867 all_trans_gotten = 0; 868 if (ldl_build_incore(ul, 1) == 0) { 869 mddb_setrecprivate(recid, MD_PRV_GOTIT); 870 gotsomething = 1; 871 } 872 } 873 874 /* 875 * now snarf up metatrans devices 876 */ 877 gotsomething = 0; 878 recid = mddb_makerecid(setno, 0); 879 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 880 mt_unit_t *big_un; 881 mt_unit32_od_t *small_un; 882 883 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 884 continue; 885 886 if ((trans_found == 0) && (!MD_UPGRADE)) { 887 cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); 888 trans_found = 1; 889 } 890 891 small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); 892 893 dep = mddb_getrecdep(recid); 894 dep->de_flags = MDDB_F_TRANS_MASTER; 895 rbp = dep->de_rb; 896 /* 897 * As trans records are always old records, 898 * we have to check if this record already has been converted. 899 * We don't want to do that work twice. 900 */ 901 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 902 newreqsize = sizeof (mt_unit_t); 903 big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 904 trans_master_convert((caddr_t)small_un, (caddr_t)big_un, 905 SMALL_2_BIG); 906 kmem_free(small_un, dep->de_reqsize); 907 /* 908 * Update userdata and incore userdata 909 * incores are at the end of ul 910 */ 911 dep->de_rb_userdata_ic = big_un; 912 dep->de_rb_userdata = big_un; 913 dep->de_icreqsize = newreqsize; 914 rbp->rb_private |= MD_PRV_CONVD; 915 un = big_un; 916 un->c.un_revision &= ~MD_64BIT_META_DEV; 917 } else { 918 /* already converted, just set the pointer */ 919 un = dep->de_rb_userdata; 920 } 921 922 /* 923 * Create minor node for snarfed entry. 924 */ 925 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 926 927 if (MD_UNIT(MD_SID(un)) != NULL) { 928 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 929 continue; 930 } 931 932 all_trans_gotten = 0; 933 if (trans_build_incore(un, 1) == 0) { 934 mddb_setrecprivate(recid, MD_PRV_GOTIT); 935 md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); 936 gotsomething = 1; 937 } 938 } 939 940 if (!all_trans_gotten) 941 return (gotsomething); 942 943 recid = mddb_makerecid(setno, 0); 944 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 945 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 946 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 947 return (0); 948 } 949 950 static int 951 trans_halt(md_haltcmd_t cmd, set_t setno) 952 { 953 unit_t i; 954 mdi_unit_t *ui; 955 minor_t mnum; 956 mt_unit_t *un; 957 958 if (cmd == MD_HALT_CLOSE) { 959 for (i = 0; i < md_nunits; i++) { 960 mnum = MD_MKMIN(setno, i); 961 if ((ui = MDI_UNIT(mnum)) == NULL) 962 continue; 963 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 964 continue; 965 if (md_unit_isopen(ui)) { 966 return (1); 967 } 968 } 969 for (i = 0; i < md_nunits; i++) { 970 mnum = MD_MKMIN(setno, i); 971 if ((ui = MDI_UNIT(mnum)) == NULL) 972 continue; 973 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 974 continue; 975 un = (mt_unit_t *)MD_UNIT(mnum); 976 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 977 trans_close_all_devs(un); 978 } 979 } 980 return (0); 981 } 982 983 if (cmd == MD_HALT_OPEN) { 984 for (i = 0; i < md_nunits; i++) { 985 mnum = MD_MKMIN(setno, i); 986 if ((ui = MDI_UNIT(mnum)) == NULL) 987 continue; 988 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 989 continue; 990 ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); 991 } 992 return (0); 993 } 994 995 if (cmd == MD_HALT_CHECK) { 996 for (i = 0; i < md_nunits; i++) { 997 mnum = MD_MKMIN(setno, i); 998 if ((ui = MDI_UNIT(mnum)) == NULL) 999 continue; 1000 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1001 continue; 1002 if (md_unit_isopen(ui)) { 1003 return (1); 1004 } 1005 } 1006 return (0); 1007 } 1008 if (cmd == MD_HALT_DOIT) { 1009 for (i = 0; i < md_nunits; i++) { 1010 mnum = MD_MKMIN(setno, i); 1011 if ((ui = MDI_UNIT(mnum)) == NULL) 1012 continue; 1013 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1014 continue; 1015 (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, 1016 0, 1); 1017 } 1018 return (0); 1019 } 1020 if (cmd == MD_HALT_UNLOAD) 1021 return (0); 1022 1023 return (1); 1024 } 1025 1026 /*ARGSUSED3*/ 1027 static int 1028 trans_open( 1029 dev_t *dev, 1030 int flag, 1031 int otyp, 1032 cred_t *cred_p, 1033 int md_oflags 1034 ) 1035 { 1036 minor_t mnum = getminor(*dev); 1037 mdi_unit_t *ui = MDI_UNIT(mnum); 1038 mt_unit_t *un; 1039 int err; 1040 1041 /* disallow layered opens (e.g., PrestoServe) */ 1042 if (otyp == OTYP_LYR) 1043 return (EINVAL); 1044 1045 /* single thread */ 1046 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1047 1048 /* if already open, count open, return success */ 1049 if (md_unit_isopen(ui)) { 1050 err = md_unit_incopen(mnum, flag, otyp); 1051 md_unit_openclose_exit(ui); 1052 if (err != 0) 1053 return (err); 1054 return (0); 1055 } 1056 1057 /* 1058 * For some reason, not all of the metatrans devices attached to 1059 * this log were openable at snarf; try again now. All of the 1060 * underlying devices have to be openable for the roll thread to work. 1061 */ 1062 if (un->un_flags & TRANS_NEED_OPEN) { 1063 md_unit_openclose_exit(ui); 1064 ldl_open_underlying(un); 1065 if (un->un_flags & TRANS_NEED_OPEN) 1066 return (EINVAL); 1067 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1068 } 1069 1070 /* count open */ 1071 err = md_unit_incopen(mnum, flag, otyp); 1072 md_unit_openclose_exit(ui); 1073 if (err != 0) 1074 return (err); 1075 1076 /* return success */ 1077 return (0); 1078 } 1079 1080 /*ARGSUSED1*/ 1081 static int 1082 trans_close( 1083 dev_t dev, 1084 int flag, 1085 int otyp, 1086 cred_t *cred_p, 1087 int md_oflags 1088 ) 1089 { 1090 minor_t mnum = getminor(dev); 1091 mdi_unit_t *ui = MDI_UNIT(mnum); 1092 mt_unit_t *un; 1093 int err = 0; 1094 1095 /* single thread */ 1096 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1097 1098 /* count closed */ 1099 if ((err = md_unit_decopen(mnum, otyp)) != 0) { 1100 md_unit_openclose_exit(ui); 1101 return (err); 1102 } 1103 1104 /* if still open */ 1105 if (md_unit_isopen(ui)) { 1106 md_unit_openclose_exit(ui); 1107 return (0); 1108 } 1109 md_unit_openclose_exit(ui); 1110 1111 if (un->un_flags & TRANS_DETACHING) { 1112 /* 1113 * prevent new opens and try to detach the log 1114 */ 1115 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1116 (void) trans_detach(un, 0); 1117 rw_exit(&md_unit_array_rw.lock); 1118 } 1119 if (un->un_flags & TRANS_ATTACHING) { 1120 /* 1121 * prevent new opens and try to attach the log 1122 */ 1123 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1124 trans_attach(un, 1); 1125 rw_exit(&md_unit_array_rw.lock); 1126 } 1127 1128 return (0); 1129 } 1130 1131 static int 1132 trans_imp_set( 1133 set_t setno 1134 ) 1135 { 1136 mt_unit32_od_t *un32; 1137 ml_unit32_od_t *ul32; 1138 mddb_recid_t recid; 1139 int gotsomething = 0; 1140 mddb_type_t typ1; 1141 minor_t *self_id; /* minor needs to be updated */ 1142 mddb_recid_t *record_id; /* record id needs to be updated */ 1143 1144 /* 1145 * Do log first if there is any 1146 * Note that trans record is always 32 bit 1147 */ 1148 typ1 = (mddb_type_t)md_getshared_key(setno, 1149 trans_md_ops.md_driver.md_drivername); 1150 recid = mddb_makerecid(setno, 0); 1151 1152 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 1153 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1154 continue; 1155 1156 ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); 1157 1158 /* 1159 * Trans log record always is old format 1160 * Go ahead update the record with the new set info 1161 */ 1162 record_id = &(ul32->un_recid); 1163 1164 /* 1165 * Mark the record and update it 1166 */ 1167 *record_id = MAKERECID(setno, DBID(*record_id)); 1168 if (!md_update_minor(setno, mddb_getsidenum 1169 (setno), ul32->un_key)) 1170 goto out; 1171 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1172 } 1173 1174 1175 /* 1176 * Now do the master 1177 */ 1178 recid = mddb_makerecid(setno, 0); 1179 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 1180 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1181 continue; 1182 1183 un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); 1184 1185 /* 1186 * Trans master record always is old format 1187 */ 1188 self_id = &(un32->c.un_self_id); 1189 record_id = &(un32->c.un_record_id); 1190 1191 /* 1192 * Mark the record and update it 1193 */ 1194 *record_id = MAKERECID(setno, DBID(*record_id)); 1195 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1196 if (!md_update_minor(setno, mddb_getsidenum 1197 (setno), un32->un_m_key)) 1198 goto out; 1199 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1200 1201 gotsomething = 1; 1202 } 1203 1204 out: 1205 return (gotsomething); 1206 } 1207 1208 static md_named_services_t trans_named_services[] = { 1209 {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, 1210 {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, 1211 {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, 1212 {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, 1213 {(intptr_t (*)()) trans_exchange_self_update_from_down, 1214 MDRNM_SELF_UPDATE_FROM_DOWN }, 1215 {(intptr_t (*)()) trans_exchange_parent_update_to, 1216 MDRNM_PARENT_UPDATE_TO }, 1217 {NULL, 0 } 1218 }; 1219 1220 md_ops_t trans_md_ops = { 1221 trans_open, /* open */ 1222 trans_close, /* close */ 1223 md_trans_strategy, /* strategy */ 1224 NULL, /* print */ 1225 NULL, /* dump */ 1226 md_trans_read, /* read */ 1227 md_trans_write, /* write */ 1228 md_trans_ioctl, /* trans ioctl */ 1229 trans_snarf, /* trans_snarf */ 1230 trans_halt, /* halt */ 1231 md_trans_aread, /* aread */ 1232 md_trans_awrite, /* awrite */ 1233 trans_imp_set, /* import set */ 1234 trans_named_services 1235 }; 1236 1237 static void 1238 init_init(void) 1239 { 1240 _init_ldl(); 1241 ASSERT(_init_debug()); 1242 trans_parent_cache = kmem_cache_create("md_trans_parent", 1243 sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1244 trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, 1245 trans_child_constructor, trans_child_destructor, 1246 NULL, NULL, NULL, 0); 1247 } 1248 1249 static void 1250 fini_uninit(void) 1251 { 1252 ASSERT(_fini_debug()); 1253 _fini_ldl(); 1254 kmem_cache_destroy(trans_parent_cache); 1255 kmem_cache_destroy(trans_child_cache); 1256 trans_parent_cache = trans_child_cache = NULL; 1257 } 1258 1259 /* define the module linkage */ 1260 MD_PLUGIN_MISC_MODULE("trans module", init_init(), fini_uninit()) 1261