1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/param.h> 28 #include <sys/systm.h> 29 #include <sys/conf.h> 30 #include <sys/debug.h> 31 #include <sys/file.h> 32 #include <sys/user.h> 33 #include <sys/uio.h> 34 #include <sys/dkio.h> 35 #include <sys/vtoc.h> 36 #include <sys/kmem.h> 37 #include <vm/page.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sysmacros.h> 40 #include <sys/types.h> 41 #include <sys/mkdev.h> 42 #include <sys/stat.h> 43 #include <sys/open.h> 44 #include <sys/modctl.h> 45 #include <sys/ddi.h> 46 #include <sys/sunddi.h> 47 #include <sys/disp.h> 48 #include <sys/buf.h> 49 50 #include <sys/lvm/mdvar.h> 51 #include <sys/lvm/md_trans.h> 52 #include <sys/lvm/md_notify.h> 53 #include <sys/lvm/md_convert.h> 54 55 #include <sys/sysevent/eventdefs.h> 56 #include <sys/sysevent/svm.h> 57 58 md_ops_t trans_md_ops; 59 #ifndef lint 60 char _depends_on[] = "drv/md fs/ufs"; 61 md_ops_t *md_interface_ops = &trans_md_ops; 62 #endif /* lint */ 63 64 extern unit_t md_nunits; 65 extern set_t md_nsets; 66 extern md_set_t md_set[]; 67 extern int md_status; 68 extern major_t md_major; 69 70 extern int md_trans_ioctl(); 71 extern md_krwlock_t md_unit_array_rw; 72 73 extern mdq_anchor_t md_done_daemon; 74 75 extern int md_in_upgrade; 76 77 static kmem_cache_t *trans_parent_cache = NULL; 78 kmem_cache_t *trans_child_cache = NULL; 79 80 #ifdef DEBUG 81 /* 82 * ROUTINES FOR TESTING: 83 */ 84 static int 85 _init_debug() 86 { 87 extern int _init_ioctl(); 88 89 return (_init_ioctl()); 90 } 91 static int 92 _fini_debug() 93 { 94 extern int _fini_ioctl(); 95 int err = 0; 96 97 err = _fini_ioctl(); 98 return (err); 99 } 100 101 #endif /* DEBUG */ 102 103 /* 104 * BEGIN RELEASE DEBUG 105 * The following routines remain in the released product for testability 106 */ 107 int 108 trans_done_shadow(buf_t *bp) 109 { 110 buf_t *pb; 111 md_tps_t *ps = (md_tps_t *)bp->b_chain; 112 int rv = 0; 113 114 pb = ps->ps_bp; 115 mutex_enter(&ps->ps_mx); 116 ps->ps_count--; 117 if (ps->ps_count > 0) { 118 if ((bp->b_flags & B_ERROR) != 0) { 119 pb->b_flags |= B_ERROR; 120 pb->b_error = bp->b_error; 121 } 122 mutex_exit(&ps->ps_mx); 123 kmem_cache_free(trans_child_cache, bp); 124 } else { 125 mutex_exit(&ps->ps_mx); 126 mutex_destroy(&ps->ps_mx); 127 rv = trans_done(bp); 128 } 129 return (rv); 130 } 131 132 static void 133 shadow_debug(mt_unit_t *un, /* trans unit info */ 134 buf_t *pb, /* primary buffer */ 135 md_tps_t *ps, /* trans parent save */ 136 buf_t *cb, /* buffer for writing to master */ 137 int flag, 138 void *private) 139 { 140 buf_t *sb; /* Shadow buffer */ 141 142 mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); 143 ps->ps_count = 2; /* Write child buffer & shadow */ 144 cb->b_iodone = trans_done_shadow; 145 sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 146 trans_child_init(sb); 147 sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), 148 pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); 149 150 sb->b_flags |= B_ASYNC; 151 sb->b_chain = (void *)ps; 152 md_call_strategy(sb, flag | MD_STR_MAPPED, private); 153 } 154 /* 155 * END RELEASE DEBUG 156 */ 157 158 /* 159 * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) 160 */ 161 void * 162 md_trans_zalloc(size_t nb) 163 { 164 TRANSSTATS(ts_trans_zalloc); 165 TRANSSTATSADD(ts_trans_alloced, nb); 166 return (kmem_zalloc(nb, KM_SLEEP)); 167 } 168 void * 169 md_trans_alloc(size_t nb) 170 { 171 TRANSSTATS(ts_trans_alloc); 172 TRANSSTATSADD(ts_trans_alloced, nb); 173 return (kmem_alloc(nb, KM_SLEEP)); 174 } 175 void 176 md_trans_free(void *va, size_t nb) 177 { 178 TRANSSTATS(ts_trans_free); 179 TRANSSTATSADD(ts_trans_freed, nb); 180 if (nb) 181 kmem_free(va, nb); 182 } 183 184 static void 185 trans_parent_init(md_tps_t *ps) 186 { 187 bzero(ps, sizeof (md_tps_t)); 188 } 189 190 /*ARGSUSED1*/ 191 int 192 trans_child_constructor(void *p, void *d1, int d2) 193 { 194 bioinit(p); 195 return (0); 196 } 197 198 void 199 trans_child_init(struct buf *bp) 200 { 201 md_bioreset(bp); 202 } 203 204 /*ARGSUSED1*/ 205 void 206 trans_child_destructor(void *p, void *d) 207 { 208 biofini(p); 209 } 210 211 void 212 trans_commit(mt_unit_t *un, int domstr) 213 { 214 mddb_recid_t recids[4]; 215 md_unit_t *su; 216 int ri = 0; 217 218 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) 219 return; 220 221 recids[ri++] = un->c.un_record_id; 222 223 if (domstr) 224 if (md_getmajor(un->un_m_dev) == md_major) { 225 su = MD_UNIT(md_getminor(un->un_m_dev)); 226 recids[ri++] = su->c.un_record_id; 227 } 228 229 if (ri == 0) 230 return; 231 recids[ri] = 0; 232 233 uniqtime32(&un->un_timestamp); 234 mddb_commitrecs_wrapper(recids); 235 } 236 237 void 238 trans_close_all_devs(mt_unit_t *un) 239 { 240 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 241 md_layered_close(un->un_m_dev, MD_OFLG_NULL); 242 if (un->un_l_unit) 243 ldl_close_dev(un->un_l_unit); 244 un->un_flags |= TRANS_NEED_OPEN; 245 } 246 } 247 248 int 249 trans_open_all_devs(mt_unit_t *un) 250 { 251 int err; 252 minor_t mnum = MD_SID(un); 253 md_dev64_t tmpdev = un->un_m_dev; 254 set_t setno = MD_MIN2SET(MD_SID(un)); 255 side_t side = mddb_getsidenum(setno); 256 257 /* 258 * Do the open by device id if it is regular device 259 */ 260 if ((md_getmajor(tmpdev) != md_major) && 261 md_devid_found(setno, side, un->un_m_key) == 1) { 262 tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); 263 } 264 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); 265 un->un_m_dev = tmpdev; 266 267 if (err) 268 return (ENXIO); 269 270 if (un->un_l_unit) { 271 err = ldl_open_dev(un, un->un_l_unit); 272 if (err) { 273 md_layered_close(tmpdev, MD_OFLG_NULL); 274 return (ENXIO); 275 } 276 } 277 return (0); 278 } 279 280 uint_t mt_debug = 0; 281 282 int 283 trans_build_incore(void *p, int snarfing) 284 { 285 mt_unit_t *un = (mt_unit_t *)p; 286 minor_t mnum; 287 set_t setno; 288 289 /* 290 * initialize debug mode and always start with no shadowing. 291 */ 292 if (!snarfing) 293 un->un_debug = mt_debug; 294 un->un_s_dev = NODEV64; 295 296 mnum = MD_SID(un); 297 298 if (MD_UNIT(mnum) != NULL) 299 return (0); 300 301 setno = MD_MIN2SET(mnum); 302 303 /* 304 * If snarfing the metatrans device, 305 * then remake the device number 306 */ 307 if (snarfing) { 308 un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), 309 un->un_m_key, MD_NOTRUST_DEVT); 310 } 311 312 /* 313 * db rec is partially deleted; finish the db delete later 314 */ 315 if (MD_STATUS(un) & MD_UN_BEING_RESET) { 316 mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 317 return (1); 318 } 319 320 /* 321 * With the current device id implementation there is possibility 322 * that we may have NODEV if the underlying can't be resolved at 323 * snarf time. If this is the case we want to be consistent with 324 * the normal behavior and continue to allow the snarf of unit 325 * and resolve the devt at the open time 326 */ 327 if ((md_getmajor(un->un_m_dev) == md_major) && 328 (md_dev_exists(un->un_m_dev) == 0)) { 329 return (1); 330 } 331 332 /* 333 * retain the detach status; reset open status 334 */ 335 un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); 336 un->un_flags |= TRANS_NEED_OPEN; 337 if ((un->un_flags & TRANS_DETACHED) == 0) 338 un->un_flags |= TRANS_ATTACHING; 339 340 /* 341 * log device not set up yet; try again later 342 */ 343 if ((un->un_flags & TRANS_DETACHED) == 0) 344 if (ldl_findlog(un->un_l_recid) == NULL) 345 return (1); 346 347 /* 348 * initialize incore fields 349 */ 350 un->un_next = NULL; 351 un->un_l_unit = NULL; 352 un->un_deltamap = NULL; 353 un->un_udmap = NULL; 354 un->un_logmap = NULL; 355 un->un_matamap = NULL; 356 un->un_shadowmap = NULL; 357 un->un_ut = NULL; 358 un->un_logreset = 0; 359 un->un_dev = md_makedevice(md_major, mnum); 360 MD_STATUS(un) = 0; 361 362 /* necessary because capability didn't exist pre-4.1 */ 363 MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); 364 365 /* 366 * attach the log 367 */ 368 trans_attach(un, 0); 369 370 /* 371 * check for master dev dynconcat 372 */ 373 if (md_getmajor(un->un_m_dev) == md_major) { 374 struct mdc_unit *c; 375 376 c = MD_UNIT(md_getminor(un->un_m_dev)); 377 un->c.un_total_blocks = c->un_total_blocks; 378 } 379 380 /* place various information in the in-core data structures */ 381 md_nblocks_set(mnum, un->c.un_total_blocks); 382 MD_UNIT(mnum) = un; 383 384 return (0); 385 } 386 387 int 388 trans_detach(mt_unit_t *un, int force) 389 { 390 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 391 int error = 0; 392 393 /* 394 * The caller is responsible for single-threading this routine. 395 */ 396 397 if (ui == NULL) 398 return (0); 399 400 /* 401 * already detached or the log isn't attached yet; do nothing 402 */ 403 if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) 404 return (0); 405 406 /* 407 * set state to detaching 408 */ 409 if (force || !md_unit_isopen(ui)) { 410 un->un_flags |= TRANS_DETACHING; 411 if (!MD_UPGRADE) { 412 trans_commit(un, 0); 413 } 414 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, 415 MD_UN2SET(un), MD_SID(un)); 416 } 417 418 /* 419 * device is busy 420 */ 421 if (md_unit_isopen(ui)) 422 return (EBUSY); 423 424 /* 425 * detach the log 426 * if successful 427 * flags committed to TRANS_DETACHED in database 428 * un->un_l_unit set to NULL 429 * no error returned 430 */ 431 error = ldl_reset(un, 1, force); 432 if (error) 433 return (error); 434 435 /* 436 * commit to database 437 */ 438 if (!MD_UPGRADE) { 439 trans_commit(un, 0); 440 } 441 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), 442 MD_SID(un)); 443 444 return (0); 445 } 446 447 void 448 trans_attach(mt_unit_t *un, int attaching) 449 { 450 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 451 ml_unit_t *ul; 452 453 /* 454 * called from snarf, set, and attach. Hence, the attaching param 455 * The caller is responsible for single-threading this routine. 456 */ 457 458 /* 459 * not attaching; do nothing 460 */ 461 if ((un->un_flags & TRANS_ATTACHING) == 0) 462 return; 463 464 /* 465 * find log unit struct 466 */ 467 ul = ldl_findlog(un->un_l_recid); 468 if (ul == NULL) 469 return; 470 un->un_l_dev = ul->un_dev; 471 472 /* 473 * device is busy; do nothing 474 */ 475 if (attaching && md_unit_isopen(ui)) 476 return; 477 /* 478 * other functions use non-NULL un_l_unit as detach/attach flag 479 */ 480 un->un_l_unit = ul; 481 482 /* 483 * add metatrans device to the log's list of mt devices 484 */ 485 ldl_utadd(un); 486 487 /* 488 * attached 489 */ 490 un->un_flags &= ~TRANS_ATTACHING; 491 492 } 493 494 int 495 trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) 496 { 497 sv_dev_t sv; 498 mddb_recid_t vtoc_id; 499 int error = 0; 500 501 /* 502 * reset log, maps, and ufs interface 503 */ 504 error = ldl_reset(un, removing, force); 505 if (error) 506 return (error); 507 508 /* 509 * done with underyling devices 510 */ 511 trans_close_all_devs(un); 512 513 md_destroy_unit_incore(mnum, &trans_md_ops); 514 515 md_nblocks_set(mnum, -1ULL); 516 MD_UNIT(mnum) = NULL; 517 518 if (!removing) 519 return (0); 520 521 md_reset_parent(un->un_m_dev); 522 MD_STATUS(un) |= MD_UN_BEING_RESET; 523 trans_commit(un, 1); 524 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), 525 MD_SID(un)); 526 527 /* Save the mstr key */ 528 sv.setno = MD_MIN2SET(mnum); 529 sv.key = un->un_m_key; 530 531 vtoc_id = un->c.un_vtoc_id; 532 533 mddb_deleterec_wrapper(un->c.un_record_id); 534 535 /* Remove the vtoc, if present */ 536 if (vtoc_id) 537 mddb_deleterec_wrapper(vtoc_id); 538 md_rem_names(&sv, 1); 539 return (0); 540 } 541 542 static void 543 trans_wait_panic(struct buf *cb) 544 { 545 while ((cb->b_flags & B_DONE) == 0) { 546 md_daemon(1, &md_done_daemon); 547 drv_usecwait(10); 548 } 549 } 550 551 static void 552 trans_error(md_tps_t *ps) 553 { 554 md_dev64_t md_dev; 555 md_dev64_t m_dev; 556 char *str; 557 struct buf *pb; 558 mdi_unit_t *ui; 559 560 pb = ps->ps_bp; 561 ui = ps->ps_ui; 562 563 /* 564 * gather up params for cmn_err 565 */ 566 if (pb->b_flags & B_READ) 567 str = "read"; 568 else 569 str = "write"; 570 md_dev = md_expldev(pb->b_edev); 571 m_dev = ps->ps_un->un_m_dev; 572 573 /* 574 * free up the resources for this request and done the errored buf 575 */ 576 md_kstat_done(ui, pb, 0); 577 kmem_cache_free(trans_parent_cache, ps); 578 md_unit_readerexit(ui); 579 md_biodone(pb); 580 581 /* 582 * print pretty error message 583 */ 584 cmn_err(CE_WARN, "md: %s: %s error on %s", 585 md_shortname(md_getminor(md_dev)), str, 586 md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); 587 } 588 589 int 590 trans_done(struct buf *cb) 591 { 592 struct buf *pb; 593 mdi_unit_t *ui; 594 md_tps_t *ps; 595 596 ps = (md_tps_t *)cb->b_chain; 597 pb = ps->ps_bp; 598 ui = ps->ps_ui; 599 600 if (cb->b_flags & B_ERROR) { 601 pb->b_flags |= B_ERROR; 602 pb->b_error = cb->b_error; 603 /* 604 * device not in hard error state; report error 605 */ 606 if (!ldl_isherror(ps->ps_un->un_l_unit)) { 607 daemon_request(&md_done_daemon, trans_error, 608 (daemon_queue_t *)ps, REQ_OLD); 609 610 if (cb->b_flags & B_REMAPPED) 611 bp_mapout(cb); 612 if (panicstr) 613 cb->b_flags |= B_DONE; 614 else 615 kmem_cache_free(trans_child_cache, cb); 616 617 return (1); 618 } 619 } 620 621 if (cb->b_flags & B_REMAPPED) 622 bp_mapout(cb); 623 624 if (panicstr) 625 cb->b_flags |= B_DONE; 626 else 627 kmem_cache_free(trans_child_cache, cb); 628 kmem_cache_free(trans_parent_cache, ps); 629 md_kstat_done(ui, pb, 0); 630 md_unit_readerexit(ui); 631 md_biodone(pb); 632 633 return (0); 634 } 635 636 static void 637 md_trans_strategy(buf_t *pb, int flag, void *private) 638 { 639 md_tps_t *ps; 640 buf_t *cb; /* child buf pointer */ 641 mt_unit_t *un; 642 mdi_unit_t *ui; 643 644 ui = MDI_UNIT(getminor(pb->b_edev)); 645 646 md_kstat_waitq_enter(ui); 647 648 un = (mt_unit_t *)md_unit_readerlock(ui); 649 650 if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { 651 pb->b_flags |= B_ERROR; 652 pb->b_error = ENXIO; 653 pb->b_resid = pb->b_bcount; 654 md_kstat_waitq_exit(ui); 655 md_unit_readerexit(ui); 656 biodone(pb); 657 return; 658 } 659 660 ASSERT(!(flag & MD_STR_NOTTOP)); 661 662 /* check and map */ 663 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 664 md_kstat_waitq_exit(ui); 665 return; 666 } 667 668 bp_mapin(pb); 669 670 ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); 671 trans_parent_init(ps); 672 673 /* 674 * Save essential information from the original buffhdr 675 * in the md_save structure. 676 */ 677 ps->ps_un = un; 678 ps->ps_ui = ui; 679 ps->ps_bp = pb; 680 681 cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 682 trans_child_init(cb); 683 684 cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), 685 pb->b_blkno, trans_done, cb, KM_NOSLEEP); 686 687 cb->b_chain = (void *)ps; 688 689 /* 690 * RELEASE DEBUG 691 * The following calls shadow debug for testing purposes if we are 692 * writing and if shadowing is turned on. 693 */ 694 if ((un->un_s_dev != NODEV64) && 695 ((pb->b_flags & B_READ) == 0)) 696 shadow_debug(un, pb, ps, cb, flag, private); 697 698 md_kstat_waitq_to_runq(ui); 699 700 (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); 701 702 /* 703 * panic in progress; process daemon queues 704 */ 705 if (panicstr) { 706 trans_wait_panic(cb); 707 kmem_cache_free(trans_child_cache, cb); 708 } 709 } 710 711 /* ARGSUSED */ 712 static int 713 md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) 714 { 715 int error; 716 717 if ((error = md_chk_uio(uio)) != 0) 718 return (error); 719 720 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 721 } 722 723 /* ARGSUSED */ 724 static int 725 md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 726 { 727 int error; 728 729 if ((error = md_chk_uio(aio->aio_uio)) != 0) 730 return (error); 731 732 return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); 733 } 734 735 /* ARGSUSED */ 736 static int 737 md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) 738 { 739 int error; 740 741 if ((error = md_chk_uio(uio)) != 0) 742 return (error); 743 744 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 745 } 746 747 /* ARGSUSED */ 748 static int 749 md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 750 { 751 int error; 752 753 if ((error = md_chk_uio(aio->aio_uio)) != 0) 754 return (error); 755 756 return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); 757 } 758 759 static void 760 trans_cleanup(mt_unit_t *un) 761 { 762 sv_dev_t sv; 763 764 MD_STATUS(un) |= MD_UN_LOG_DELETED; 765 trans_commit(un, 0); 766 767 /* Save the mstr key */ 768 sv.setno = MD_UN2SET(un); 769 sv.key = un->un_m_key; 770 771 mddb_deleterec_wrapper(un->c.un_record_id); 772 773 md_rem_names(&sv, 1); 774 } 775 776 static int 777 trans_snarf(md_snarfcmd_t cmd, set_t setno) 778 { 779 mt_unit_t *un; 780 ml_unit_t *ul; 781 mddb_recid_t recid; 782 int gotsomething; 783 mddb_type_t typ1; 784 int all_trans_gotten; 785 mddb_de_ic_t *dep; 786 mddb_rb32_t *rbp; 787 size_t newreqsize; 788 static int trans_found = 0; 789 790 791 792 if (cmd == MD_SNARF_CLEANUP) { 793 794 if (md_get_setstatus(setno) & MD_SET_STALE) 795 return (0); 796 797 /* 798 * clean up partially cleared trans devices 799 */ 800 typ1 = (mddb_type_t)md_getshared_key(setno, 801 trans_md_ops.md_driver.md_drivername); 802 recid = mddb_makerecid(setno, 0); 803 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 804 un = (mt_unit_t *)mddb_getrecaddr(recid); 805 (void) trans_detach(un, 1); 806 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 807 trans_cleanup(un); 808 recid = mddb_makerecid(setno, 0); 809 } 810 } 811 /* 812 * clean up partially cleared log devices 813 */ 814 recid = mddb_makerecid(setno, 0); 815 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 816 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 817 ul = (ml_unit_t *)mddb_getrecaddr(recid); 818 ldl_cleanup(ul); 819 recid = mddb_makerecid(setno, 0); 820 } 821 } 822 823 return (0); 824 } 825 826 /* 827 * must snarf up the log devices first 828 */ 829 gotsomething = 0; 830 all_trans_gotten = 1; 831 typ1 = (mddb_type_t)md_getshared_key(setno, 832 trans_md_ops.md_driver.md_drivername); 833 recid = mddb_makerecid(setno, 0); 834 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 835 ml_unit_t *big_ul; 836 ml_unit32_od_t *small_ul; 837 838 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 839 continue; 840 841 small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); 842 dep = mddb_getrecdep(recid); 843 dep->de_flags = MDDB_F_TRANS_LOG; 844 rbp = dep->de_rb; 845 /* 846 * As trans records are always old records, 847 * we have to check if this record already has been converted. 848 * We don't want to do that work twice. 849 */ 850 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 851 newreqsize = sizeof (ml_unit_t); 852 big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 853 trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, 854 SMALL_2_BIG); 855 kmem_free(small_ul, dep->de_reqsize); 856 /* 857 * Update userdata and incore userdata 858 * incores are at the end of ul 859 */ 860 dep->de_rb_userdata_ic = big_ul; 861 dep->de_rb_userdata = big_ul; 862 dep->de_icreqsize = newreqsize; 863 rbp->rb_private |= MD_PRV_CONVD; 864 ul = big_ul; 865 } else { 866 /* already converted, just set the pointer */ 867 ul = dep->de_rb_userdata; 868 } 869 all_trans_gotten = 0; 870 if (ldl_build_incore(ul, 1) == 0) { 871 mddb_setrecprivate(recid, MD_PRV_GOTIT); 872 gotsomething = 1; 873 } 874 } 875 876 /* 877 * now snarf up metatrans devices 878 */ 879 gotsomething = 0; 880 recid = mddb_makerecid(setno, 0); 881 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 882 mt_unit_t *big_un; 883 mt_unit32_od_t *small_un; 884 885 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 886 continue; 887 888 if ((trans_found == 0) && (!MD_UPGRADE)) { 889 cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); 890 trans_found = 1; 891 } 892 893 small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); 894 895 dep = mddb_getrecdep(recid); 896 dep->de_flags = MDDB_F_TRANS_MASTER; 897 rbp = dep->de_rb; 898 /* 899 * As trans records are always old records, 900 * we have to check if this record already has been converted. 901 * We don't want to do that work twice. 902 */ 903 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 904 newreqsize = sizeof (mt_unit_t); 905 big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 906 trans_master_convert((caddr_t)small_un, (caddr_t)big_un, 907 SMALL_2_BIG); 908 kmem_free(small_un, dep->de_reqsize); 909 /* 910 * Update userdata and incore userdata 911 * incores are at the end of ul 912 */ 913 dep->de_rb_userdata_ic = big_un; 914 dep->de_rb_userdata = big_un; 915 dep->de_icreqsize = newreqsize; 916 rbp->rb_private |= MD_PRV_CONVD; 917 un = big_un; 918 un->c.un_revision &= ~MD_64BIT_META_DEV; 919 } else { 920 /* already converted, just set the pointer */ 921 un = dep->de_rb_userdata; 922 } 923 924 /* 925 * Create minor node for snarfed entry. 926 */ 927 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 928 929 if (MD_UNIT(MD_SID(un)) != NULL) { 930 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 931 continue; 932 } 933 934 all_trans_gotten = 0; 935 if (trans_build_incore(un, 1) == 0) { 936 mddb_setrecprivate(recid, MD_PRV_GOTIT); 937 md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); 938 gotsomething = 1; 939 } 940 } 941 942 if (!all_trans_gotten) 943 return (gotsomething); 944 945 recid = mddb_makerecid(setno, 0); 946 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 947 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 948 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 949 return (0); 950 } 951 952 static int 953 trans_halt(md_haltcmd_t cmd, set_t setno) 954 { 955 unit_t i; 956 mdi_unit_t *ui; 957 minor_t mnum; 958 mt_unit_t *un; 959 960 if (cmd == MD_HALT_CLOSE) { 961 for (i = 0; i < md_nunits; i++) { 962 mnum = MD_MKMIN(setno, i); 963 if ((ui = MDI_UNIT(mnum)) == NULL) 964 continue; 965 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 966 continue; 967 if (md_unit_isopen(ui)) { 968 return (1); 969 } 970 } 971 for (i = 0; i < md_nunits; i++) { 972 mnum = MD_MKMIN(setno, i); 973 if ((ui = MDI_UNIT(mnum)) == NULL) 974 continue; 975 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 976 continue; 977 un = (mt_unit_t *)MD_UNIT(mnum); 978 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 979 trans_close_all_devs(un); 980 } 981 } 982 return (0); 983 } 984 985 if (cmd == MD_HALT_OPEN) { 986 for (i = 0; i < md_nunits; i++) { 987 mnum = MD_MKMIN(setno, i); 988 if ((ui = MDI_UNIT(mnum)) == NULL) 989 continue; 990 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 991 continue; 992 ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); 993 } 994 return (0); 995 } 996 997 if (cmd == MD_HALT_CHECK) { 998 for (i = 0; i < md_nunits; i++) { 999 mnum = MD_MKMIN(setno, i); 1000 if ((ui = MDI_UNIT(mnum)) == NULL) 1001 continue; 1002 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1003 continue; 1004 if (md_unit_isopen(ui)) { 1005 return (1); 1006 } 1007 } 1008 return (0); 1009 } 1010 if (cmd == MD_HALT_DOIT) { 1011 for (i = 0; i < md_nunits; i++) { 1012 mnum = MD_MKMIN(setno, i); 1013 if ((ui = MDI_UNIT(mnum)) == NULL) 1014 continue; 1015 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1016 continue; 1017 (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, 1018 0, 1); 1019 } 1020 return (0); 1021 } 1022 if (cmd == MD_HALT_UNLOAD) 1023 return (0); 1024 1025 return (1); 1026 } 1027 1028 /*ARGSUSED3*/ 1029 static int 1030 trans_open( 1031 dev_t *dev, 1032 int flag, 1033 int otyp, 1034 cred_t *cred_p, 1035 int md_oflags 1036 ) 1037 { 1038 minor_t mnum = getminor(*dev); 1039 mdi_unit_t *ui = MDI_UNIT(mnum); 1040 mt_unit_t *un; 1041 int err; 1042 1043 /* disallow layered opens (e.g., PrestoServe) */ 1044 if (otyp == OTYP_LYR) 1045 return (EINVAL); 1046 1047 /* single thread */ 1048 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1049 1050 /* if already open, count open, return success */ 1051 if (md_unit_isopen(ui)) { 1052 err = md_unit_incopen(mnum, flag, otyp); 1053 md_unit_openclose_exit(ui); 1054 if (err != 0) 1055 return (err); 1056 return (0); 1057 } 1058 1059 /* 1060 * For some reason, not all of the metatrans devices attached to 1061 * this log were openable at snarf; try again now. All of the 1062 * underlying devices have to be openable for the roll thread to work. 1063 */ 1064 if (un->un_flags & TRANS_NEED_OPEN) { 1065 md_unit_openclose_exit(ui); 1066 ldl_open_underlying(un); 1067 if (un->un_flags & TRANS_NEED_OPEN) 1068 return (EINVAL); 1069 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1070 } 1071 1072 /* count open */ 1073 err = md_unit_incopen(mnum, flag, otyp); 1074 md_unit_openclose_exit(ui); 1075 if (err != 0) 1076 return (err); 1077 1078 /* return success */ 1079 return (0); 1080 } 1081 1082 /*ARGSUSED1*/ 1083 static int 1084 trans_close( 1085 dev_t dev, 1086 int flag, 1087 int otyp, 1088 cred_t *cred_p, 1089 int md_oflags 1090 ) 1091 { 1092 minor_t mnum = getminor(dev); 1093 mdi_unit_t *ui = MDI_UNIT(mnum); 1094 mt_unit_t *un; 1095 int err = 0; 1096 1097 /* single thread */ 1098 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1099 1100 /* count closed */ 1101 if ((err = md_unit_decopen(mnum, otyp)) != 0) { 1102 md_unit_openclose_exit(ui); 1103 return (err); 1104 } 1105 1106 /* if still open */ 1107 if (md_unit_isopen(ui)) { 1108 md_unit_openclose_exit(ui); 1109 return (0); 1110 } 1111 md_unit_openclose_exit(ui); 1112 1113 if (un->un_flags & TRANS_DETACHING) { 1114 /* 1115 * prevent new opens and try to detach the log 1116 */ 1117 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1118 (void) trans_detach(un, 0); 1119 rw_exit(&md_unit_array_rw.lock); 1120 } 1121 if (un->un_flags & TRANS_ATTACHING) { 1122 /* 1123 * prevent new opens and try to attach the log 1124 */ 1125 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1126 trans_attach(un, 1); 1127 rw_exit(&md_unit_array_rw.lock); 1128 } 1129 1130 return (0); 1131 } 1132 1133 static int 1134 trans_imp_set( 1135 set_t setno 1136 ) 1137 { 1138 mt_unit32_od_t *un32; 1139 ml_unit32_od_t *ul32; 1140 mddb_recid_t recid; 1141 int gotsomething = 0; 1142 mddb_type_t typ1; 1143 minor_t *self_id; /* minor needs to be updated */ 1144 mddb_recid_t *record_id; /* record id needs to be updated */ 1145 1146 /* 1147 * Do log first if there is any 1148 * Note that trans record is always 32 bit 1149 */ 1150 typ1 = (mddb_type_t)md_getshared_key(setno, 1151 trans_md_ops.md_driver.md_drivername); 1152 recid = mddb_makerecid(setno, 0); 1153 1154 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 1155 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1156 continue; 1157 1158 ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); 1159 1160 /* 1161 * Trans log record always is old format 1162 * Go ahead update the record with the new set info 1163 */ 1164 record_id = &(ul32->un_recid); 1165 1166 /* 1167 * Mark the record and update it 1168 */ 1169 *record_id = MAKERECID(setno, DBID(*record_id)); 1170 if (!md_update_minor(setno, mddb_getsidenum 1171 (setno), ul32->un_key)) 1172 goto out; 1173 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1174 } 1175 1176 1177 /* 1178 * Now do the master 1179 */ 1180 recid = mddb_makerecid(setno, 0); 1181 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 1182 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1183 continue; 1184 1185 un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); 1186 1187 /* 1188 * Trans master record always is old format 1189 */ 1190 self_id = &(un32->c.un_self_id); 1191 record_id = &(un32->c.un_record_id); 1192 1193 /* 1194 * Mark the record and update it 1195 */ 1196 *record_id = MAKERECID(setno, DBID(*record_id)); 1197 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1198 if (!md_update_minor(setno, mddb_getsidenum 1199 (setno), un32->un_m_key)) 1200 goto out; 1201 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1202 1203 gotsomething = 1; 1204 } 1205 1206 out: 1207 return (gotsomething); 1208 } 1209 1210 static md_named_services_t trans_named_services[] = { 1211 {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, 1212 {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, 1213 {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, 1214 {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, 1215 {(intptr_t (*)()) trans_exchange_self_update_from_down, 1216 MDRNM_SELF_UPDATE_FROM_DOWN }, 1217 {(intptr_t (*)()) trans_exchange_parent_update_to, 1218 MDRNM_PARENT_UPDATE_TO }, 1219 {NULL, 0 } 1220 }; 1221 1222 md_ops_t trans_md_ops = { 1223 trans_open, /* open */ 1224 trans_close, /* close */ 1225 md_trans_strategy, /* strategy */ 1226 NULL, /* print */ 1227 NULL, /* dump */ 1228 md_trans_read, /* read */ 1229 md_trans_write, /* write */ 1230 md_trans_ioctl, /* trans ioctl */ 1231 trans_snarf, /* trans_snarf */ 1232 trans_halt, /* halt */ 1233 md_trans_aread, /* aread */ 1234 md_trans_awrite, /* awrite */ 1235 trans_imp_set, /* import set */ 1236 trans_named_services 1237 }; 1238 1239 static void 1240 init_init(void) 1241 { 1242 _init_ldl(); 1243 ASSERT(_init_debug()); 1244 trans_parent_cache = kmem_cache_create("md_trans_parent", 1245 sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1246 trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, 1247 trans_child_constructor, trans_child_destructor, 1248 NULL, NULL, NULL, 0); 1249 } 1250 1251 static void 1252 fini_uninit(void) 1253 { 1254 ASSERT(_fini_debug()); 1255 _fini_ldl(); 1256 kmem_cache_destroy(trans_parent_cache); 1257 kmem_cache_destroy(trans_child_cache); 1258 trans_parent_cache = trans_child_cache = NULL; 1259 } 1260 1261 /* define the module linkage */ 1262 MD_PLUGIN_MISC_MODULE("trans module", init_init(), fini_uninit()) 1263