1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/conf.h> 31 #include <sys/debug.h> 32 #include <sys/file.h> 33 #include <sys/user.h> 34 #include <sys/uio.h> 35 #include <sys/dkio.h> 36 #include <sys/vtoc.h> 37 #include <sys/kmem.h> 38 #include <vm/page.h> 39 #include <sys/cmn_err.h> 40 #include <sys/sysmacros.h> 41 #include <sys/types.h> 42 #include <sys/mkdev.h> 43 #include <sys/stat.h> 44 #include <sys/open.h> 45 #include <sys/modctl.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/disp.h> 49 #include <sys/buf.h> 50 51 #include <sys/lvm/mdvar.h> 52 #include <sys/lvm/md_trans.h> 53 #include <sys/lvm/md_notify.h> 54 #include <sys/lvm/md_convert.h> 55 56 #include <sys/sysevent/eventdefs.h> 57 #include <sys/sysevent/svm.h> 58 59 md_ops_t trans_md_ops; 60 #ifndef lint 61 char _depends_on[] = "drv/md fs/ufs"; 62 md_ops_t *md_interface_ops = &trans_md_ops; 63 #endif /* lint */ 64 65 extern unit_t md_nunits; 66 extern set_t md_nsets; 67 extern md_set_t md_set[]; 68 extern int md_status; 69 extern major_t md_major; 70 71 extern int md_trans_ioctl(); 72 extern md_krwlock_t md_unit_array_rw; 73 74 extern mdq_anchor_t md_done_daemon; 75 76 extern int md_in_upgrade; 77 78 static kmem_cache_t *trans_parent_cache = NULL; 79 kmem_cache_t *trans_child_cache = NULL; 80 81 #ifdef DEBUG 82 /* 83 * ROUTINES FOR TESTING: 84 */ 85 static int 86 _init_debug() 87 { 88 extern int _init_ioctl(); 89 90 return (_init_ioctl()); 91 } 92 static int 93 _fini_debug() 94 { 95 extern int _fini_ioctl(); 96 int err = 0; 97 98 err = _fini_ioctl(); 99 return (err); 100 } 101 102 #endif /* DEBUG */ 103 104 /* 105 * BEGIN RELEASE DEBUG 106 * The following routines remain in the released product for testability 107 */ 108 int 109 trans_done_shadow(buf_t *bp) 110 { 111 buf_t *pb; 112 md_tps_t *ps = (md_tps_t *)bp->b_chain; 113 int rv = 0; 114 115 pb = ps->ps_bp; 116 mutex_enter(&ps->ps_mx); 117 ps->ps_count--; 118 if (ps->ps_count > 0) { 119 if ((bp->b_flags & B_ERROR) != 0) { 120 pb->b_flags |= B_ERROR; 121 pb->b_error = bp->b_error; 122 } 123 mutex_exit(&ps->ps_mx); 124 kmem_cache_free(trans_child_cache, bp); 125 } else { 126 mutex_exit(&ps->ps_mx); 127 mutex_destroy(&ps->ps_mx); 128 rv = trans_done(bp); 129 } 130 return (rv); 131 } 132 133 static void 134 shadow_debug(mt_unit_t *un, /* trans unit info */ 135 buf_t *pb, /* primary buffer */ 136 md_tps_t *ps, /* trans parent save */ 137 buf_t *cb, /* buffer for writing to master */ 138 int flag, 139 void *private) 140 { 141 buf_t *sb; /* Shadow buffer */ 142 143 mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); 144 ps->ps_count = 2; /* Write child buffer & shadow */ 145 cb->b_iodone = trans_done_shadow; 146 sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 147 trans_child_init(sb); 148 sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), 149 pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); 150 151 sb->b_flags |= B_ASYNC; 152 sb->b_chain = (void *)ps; 153 md_call_strategy(sb, flag | MD_STR_MAPPED, private); 154 } 155 /* 156 * END RELEASE DEBUG 157 */ 158 159 /* 160 * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) 161 */ 162 void * 163 md_trans_zalloc(size_t nb) 164 { 165 TRANSSTATS(ts_trans_zalloc); 166 TRANSSTATSADD(ts_trans_alloced, nb); 167 return (kmem_zalloc(nb, KM_SLEEP)); 168 } 169 void * 170 md_trans_alloc(size_t nb) 171 { 172 TRANSSTATS(ts_trans_alloc); 173 TRANSSTATSADD(ts_trans_alloced, nb); 174 return (kmem_alloc(nb, KM_SLEEP)); 175 } 176 void 177 md_trans_free(void *va, size_t nb) 178 { 179 TRANSSTATS(ts_trans_free); 180 TRANSSTATSADD(ts_trans_freed, nb); 181 if (nb) 182 kmem_free(va, nb); 183 } 184 185 static void 186 trans_parent_init(md_tps_t *ps) 187 { 188 bzero(ps, sizeof (md_tps_t)); 189 } 190 191 /*ARGSUSED1*/ 192 int 193 trans_child_constructor(void *p, void *d1, int d2) 194 { 195 bioinit(p); 196 return (0); 197 } 198 199 void 200 trans_child_init(struct buf *bp) 201 { 202 md_bioreset(bp); 203 } 204 205 /*ARGSUSED1*/ 206 void 207 trans_child_destructor(void *p, void *d) 208 { 209 biofini(p); 210 } 211 212 void 213 trans_commit(mt_unit_t *un, int domstr) 214 { 215 mddb_recid_t recids[4]; 216 md_unit_t *su; 217 int ri = 0; 218 219 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) 220 return; 221 222 recids[ri++] = un->c.un_record_id; 223 224 if (domstr) 225 if (md_getmajor(un->un_m_dev) == md_major) { 226 su = MD_UNIT(md_getminor(un->un_m_dev)); 227 recids[ri++] = su->c.un_record_id; 228 } 229 230 if (ri == 0) 231 return; 232 recids[ri] = 0; 233 234 uniqtime32(&un->un_timestamp); 235 mddb_commitrecs_wrapper(recids); 236 } 237 238 void 239 trans_close_all_devs(mt_unit_t *un) 240 { 241 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 242 md_layered_close(un->un_m_dev, MD_OFLG_NULL); 243 if (un->un_l_unit) 244 ldl_close_dev(un->un_l_unit); 245 un->un_flags |= TRANS_NEED_OPEN; 246 } 247 } 248 249 int 250 trans_open_all_devs(mt_unit_t *un) 251 { 252 int err; 253 minor_t mnum = MD_SID(un); 254 md_dev64_t tmpdev = un->un_m_dev; 255 set_t setno = MD_MIN2SET(MD_SID(un)); 256 side_t side = mddb_getsidenum(setno); 257 258 /* 259 * Do the open by device id if it is regular device 260 */ 261 if ((md_getmajor(tmpdev) != md_major) && 262 md_devid_found(setno, side, un->un_m_key) == 1) { 263 tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); 264 } 265 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); 266 un->un_m_dev = tmpdev; 267 268 if (err) 269 return (ENXIO); 270 271 if (un->un_l_unit) { 272 err = ldl_open_dev(un, un->un_l_unit); 273 if (err) { 274 md_layered_close(tmpdev, MD_OFLG_NULL); 275 return (ENXIO); 276 } 277 } 278 return (0); 279 } 280 281 uint_t mt_debug = 0; 282 283 int 284 trans_build_incore(void *p, int snarfing) 285 { 286 mt_unit_t *un = (mt_unit_t *)p; 287 minor_t mnum; 288 set_t setno; 289 290 /* 291 * initialize debug mode and always start with no shadowing. 292 */ 293 if (!snarfing) 294 un->un_debug = mt_debug; 295 un->un_s_dev = NODEV64; 296 297 mnum = MD_SID(un); 298 299 if (MD_UNIT(mnum) != NULL) 300 return (0); 301 302 setno = MD_MIN2SET(mnum); 303 304 /* 305 * If snarfing the metatrans device, 306 * then remake the device number 307 */ 308 if (snarfing) { 309 un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), 310 un->un_m_key, MD_NOTRUST_DEVT); 311 } 312 313 /* 314 * db rec is partially deleted; finish the db delete later 315 */ 316 if (MD_STATUS(un) & MD_UN_BEING_RESET) { 317 mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 318 return (1); 319 } 320 321 /* 322 * With the current device id implementation there is possibility 323 * that we may have NODEV if the underlying can't be resolved at 324 * snarf time. If this is the case we want to be consistent with 325 * the normal behavior and continue to allow the snarf of unit 326 * and resolve the devt at the open time 327 */ 328 if ((md_getmajor(un->un_m_dev) == md_major) && 329 (md_dev_exists(un->un_m_dev) == 0)) { 330 return (1); 331 } 332 333 /* 334 * retain the detach status; reset open status 335 */ 336 un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); 337 un->un_flags |= TRANS_NEED_OPEN; 338 if ((un->un_flags & TRANS_DETACHED) == 0) 339 un->un_flags |= TRANS_ATTACHING; 340 341 /* 342 * log device not set up yet; try again later 343 */ 344 if ((un->un_flags & TRANS_DETACHED) == 0) 345 if (ldl_findlog(un->un_l_recid) == NULL) 346 return (1); 347 348 /* 349 * initialize incore fields 350 */ 351 un->un_next = NULL; 352 un->un_l_unit = NULL; 353 un->un_deltamap = NULL; 354 un->un_udmap = NULL; 355 un->un_logmap = NULL; 356 un->un_matamap = NULL; 357 un->un_shadowmap = NULL; 358 un->un_ut = NULL; 359 un->un_logreset = 0; 360 un->un_dev = md_makedevice(md_major, mnum); 361 MD_STATUS(un) = 0; 362 363 /* necessary because capability didn't exist pre-4.1 */ 364 MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); 365 366 /* 367 * attach the log 368 */ 369 trans_attach(un, 0); 370 371 /* 372 * check for master dev dynconcat 373 */ 374 if (md_getmajor(un->un_m_dev) == md_major) { 375 struct mdc_unit *c; 376 377 c = MD_UNIT(md_getminor(un->un_m_dev)); 378 un->c.un_total_blocks = c->un_total_blocks; 379 } 380 381 MD_UNIT(mnum) = un; 382 383 return (0); 384 } 385 386 int 387 trans_detach(mt_unit_t *un, int force) 388 { 389 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 390 int error = 0; 391 392 /* 393 * The caller is responsible for single-threading this routine. 394 */ 395 396 if (ui == NULL) 397 return (0); 398 399 /* 400 * already detached or the log isn't attached yet; do nothing 401 */ 402 if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) 403 return (0); 404 405 /* 406 * set state to detaching 407 */ 408 if (force || !md_unit_isopen(ui)) { 409 un->un_flags |= TRANS_DETACHING; 410 if (!MD_UPGRADE) { 411 trans_commit(un, 0); 412 } 413 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, 414 MD_UN2SET(un), MD_SID(un)); 415 } 416 417 /* 418 * device is busy 419 */ 420 if (md_unit_isopen(ui)) 421 return (EBUSY); 422 423 /* 424 * detach the log 425 * if successful 426 * flags committed to TRANS_DETACHED in database 427 * un->un_l_unit set to NULL 428 * no error returned 429 */ 430 error = ldl_reset(un, 1, force); 431 if (error) 432 return (error); 433 434 /* 435 * commit to database 436 */ 437 if (!MD_UPGRADE) { 438 trans_commit(un, 0); 439 } 440 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), 441 MD_SID(un)); 442 443 return (0); 444 } 445 446 void 447 trans_attach(mt_unit_t *un, int attaching) 448 { 449 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 450 ml_unit_t *ul; 451 452 /* 453 * called from snarf, set, and attach. Hence, the attaching param 454 * The caller is responsible for single-threading this routine. 455 */ 456 457 /* 458 * not attaching; do nothing 459 */ 460 if ((un->un_flags & TRANS_ATTACHING) == 0) 461 return; 462 463 /* 464 * find log unit struct 465 */ 466 ul = ldl_findlog(un->un_l_recid); 467 if (ul == NULL) 468 return; 469 un->un_l_dev = ul->un_dev; 470 471 /* 472 * device is busy; do nothing 473 */ 474 if (attaching && md_unit_isopen(ui)) 475 return; 476 /* 477 * other functions use non-NULL un_l_unit as detach/attach flag 478 */ 479 un->un_l_unit = ul; 480 481 /* 482 * add metatrans device to the log's list of mt devices 483 */ 484 ldl_utadd(un); 485 486 /* 487 * attached 488 */ 489 un->un_flags &= ~TRANS_ATTACHING; 490 491 } 492 493 int 494 trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) 495 { 496 sv_dev_t sv; 497 mddb_recid_t vtoc_id; 498 int error = 0; 499 500 /* 501 * reset log, maps, and ufs interface 502 */ 503 error = ldl_reset(un, removing, force); 504 if (error) 505 return (error); 506 507 /* 508 * done with underyling devices 509 */ 510 trans_close_all_devs(un); 511 512 md_destroy_unit_incore(mnum, &trans_md_ops); 513 514 MD_UNIT(mnum) = NULL; 515 516 if (!removing) 517 return (0); 518 519 md_reset_parent(un->un_m_dev); 520 MD_STATUS(un) |= MD_UN_BEING_RESET; 521 trans_commit(un, 1); 522 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), 523 MD_SID(un)); 524 525 /* Save the mstr key */ 526 sv.setno = MD_MIN2SET(mnum); 527 sv.key = un->un_m_key; 528 529 vtoc_id = un->c.un_vtoc_id; 530 531 mddb_deleterec_wrapper(un->c.un_record_id); 532 533 /* Remove the vtoc, if present */ 534 if (vtoc_id) 535 mddb_deleterec_wrapper(vtoc_id); 536 md_rem_names(&sv, 1); 537 return (0); 538 } 539 540 static void 541 trans_wait_panic(struct buf *cb) 542 { 543 while ((cb->b_flags & B_DONE) == 0) { 544 md_daemon(1, &md_done_daemon); 545 drv_usecwait(10); 546 } 547 } 548 549 static void 550 trans_error(md_tps_t *ps) 551 { 552 md_dev64_t md_dev; 553 md_dev64_t m_dev; 554 char *str; 555 struct buf *pb; 556 mdi_unit_t *ui; 557 558 pb = ps->ps_bp; 559 ui = ps->ps_ui; 560 561 /* 562 * gather up params for cmn_err 563 */ 564 if (pb->b_flags & B_READ) 565 str = "read"; 566 else 567 str = "write"; 568 md_dev = md_expldev(pb->b_edev); 569 m_dev = ps->ps_un->un_m_dev; 570 571 /* 572 * free up the resources for this request and done the errored buf 573 */ 574 md_kstat_done(ui, pb, 0); 575 kmem_cache_free(trans_parent_cache, ps); 576 md_unit_readerexit(ui); 577 md_biodone(pb); 578 579 /* 580 * print pretty error message 581 */ 582 cmn_err(CE_WARN, "md: %s: %s error on %s", 583 md_shortname(md_getminor(md_dev)), str, 584 md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); 585 } 586 587 int 588 trans_done(struct buf *cb) 589 { 590 struct buf *pb; 591 mdi_unit_t *ui; 592 md_tps_t *ps; 593 594 ps = (md_tps_t *)cb->b_chain; 595 pb = ps->ps_bp; 596 ui = ps->ps_ui; 597 598 if (cb->b_flags & B_ERROR) { 599 pb->b_flags |= B_ERROR; 600 pb->b_error = cb->b_error; 601 /* 602 * device not in hard error state; report error 603 */ 604 if (!ldl_isherror(ps->ps_un->un_l_unit)) { 605 daemon_request(&md_done_daemon, trans_error, 606 (daemon_queue_t *)ps, REQ_OLD); 607 608 if (cb->b_flags & B_REMAPPED) 609 bp_mapout(cb); 610 if (panicstr) 611 cb->b_flags |= B_DONE; 612 else 613 kmem_cache_free(trans_child_cache, cb); 614 615 return (1); 616 } 617 } 618 619 if (cb->b_flags & B_REMAPPED) 620 bp_mapout(cb); 621 622 if (panicstr) 623 cb->b_flags |= B_DONE; 624 else 625 kmem_cache_free(trans_child_cache, cb); 626 kmem_cache_free(trans_parent_cache, ps); 627 md_kstat_done(ui, pb, 0); 628 md_unit_readerexit(ui); 629 md_biodone(pb); 630 631 return (0); 632 } 633 634 static void 635 md_trans_strategy(buf_t *pb, int flag, void *private) 636 { 637 md_tps_t *ps; 638 buf_t *cb; /* child buf pointer */ 639 mt_unit_t *un; 640 mdi_unit_t *ui; 641 642 ui = MDI_UNIT(getminor(pb->b_edev)); 643 644 md_kstat_waitq_enter(ui); 645 646 un = (mt_unit_t *)md_unit_readerlock(ui); 647 648 if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { 649 pb->b_flags |= B_ERROR; 650 pb->b_error = ENXIO; 651 pb->b_resid = pb->b_bcount; 652 md_unit_readerexit(ui); 653 biodone(pb); 654 return; 655 } 656 657 ASSERT(!(flag & MD_STR_NOTTOP)); 658 659 /* check and map */ 660 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 661 md_kstat_waitq_exit(ui); 662 return; 663 } 664 665 bp_mapin(pb); 666 667 ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); 668 trans_parent_init(ps); 669 670 /* 671 * Save essential information from the original buffhdr 672 * in the md_save structure. 673 */ 674 ps->ps_un = un; 675 ps->ps_ui = ui; 676 ps->ps_bp = pb; 677 678 cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 679 trans_child_init(cb); 680 681 cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), 682 pb->b_blkno, trans_done, cb, KM_NOSLEEP); 683 684 cb->b_chain = (void *)ps; 685 686 /* 687 * RELEASE DEBUG 688 * The following calls shadow debug for testing purposes if we are 689 * writing and if shadowing is turned on. 690 */ 691 if ((un->un_s_dev != NODEV64) && 692 ((pb->b_flags & B_READ) == 0)) 693 shadow_debug(un, pb, ps, cb, flag, private); 694 695 md_kstat_waitq_to_runq(ui); 696 697 (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); 698 699 /* 700 * panic in progress; process daemon queues 701 */ 702 if (panicstr) { 703 trans_wait_panic(cb); 704 kmem_cache_free(trans_child_cache, cb); 705 } 706 } 707 708 /* ARGSUSED */ 709 static int 710 md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) 711 { 712 int error; 713 714 if ((error = md_chk_uio(uio)) != 0) 715 return (error); 716 717 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 718 } 719 720 /* ARGSUSED */ 721 static int 722 md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 723 { 724 int error; 725 726 if ((error = md_chk_uio(aio->aio_uio)) != 0) 727 return (error); 728 729 return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); 730 } 731 732 /* ARGSUSED */ 733 static int 734 md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) 735 { 736 int error; 737 738 if ((error = md_chk_uio(uio)) != 0) 739 return (error); 740 741 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 742 } 743 744 /* ARGSUSED */ 745 static int 746 md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 747 { 748 int error; 749 750 if ((error = md_chk_uio(aio->aio_uio)) != 0) 751 return (error); 752 753 return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); 754 } 755 756 static void 757 trans_cleanup(mt_unit_t *un) 758 { 759 sv_dev_t sv; 760 761 MD_STATUS(un) |= MD_UN_LOG_DELETED; 762 trans_commit(un, 0); 763 764 /* Save the mstr key */ 765 sv.setno = MD_UN2SET(un); 766 sv.key = un->un_m_key; 767 768 mddb_deleterec_wrapper(un->c.un_record_id); 769 770 md_rem_names(&sv, 1); 771 } 772 773 static int 774 trans_snarf(md_snarfcmd_t cmd, set_t setno) 775 { 776 mt_unit_t *un; 777 ml_unit_t *ul; 778 mddb_recid_t recid; 779 int gotsomething; 780 mddb_type_t typ1; 781 int all_trans_gotten; 782 mddb_de_ic_t *dep; 783 mddb_rb32_t *rbp; 784 size_t newreqsize; 785 static int trans_found = 0; 786 787 788 789 if (cmd == MD_SNARF_CLEANUP) { 790 791 if (md_get_setstatus(setno) & MD_SET_STALE) 792 return (0); 793 794 /* 795 * clean up partially cleared trans devices 796 */ 797 typ1 = (mddb_type_t)md_getshared_key(setno, 798 trans_md_ops.md_driver.md_drivername); 799 recid = mddb_makerecid(setno, 0); 800 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 801 un = (mt_unit_t *)mddb_getrecaddr(recid); 802 (void) trans_detach(un, 1); 803 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 804 trans_cleanup(un); 805 recid = mddb_makerecid(setno, 0); 806 } 807 } 808 /* 809 * clean up partially cleared log devices 810 */ 811 recid = mddb_makerecid(setno, 0); 812 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 813 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 814 ul = (ml_unit_t *)mddb_getrecaddr(recid); 815 ldl_cleanup(ul); 816 recid = mddb_makerecid(setno, 0); 817 } 818 } 819 820 return (0); 821 } 822 823 /* 824 * must snarf up the log devices first 825 */ 826 gotsomething = 0; 827 all_trans_gotten = 1; 828 typ1 = (mddb_type_t)md_getshared_key(setno, 829 trans_md_ops.md_driver.md_drivername); 830 recid = mddb_makerecid(setno, 0); 831 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 832 ml_unit_t *big_ul; 833 ml_unit32_od_t *small_ul; 834 835 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 836 continue; 837 838 small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); 839 dep = mddb_getrecdep(recid); 840 dep->de_flags = MDDB_F_TRANS_LOG; 841 rbp = dep->de_rb; 842 /* 843 * As trans records are always old records, 844 * we have to check if this record already has been converted. 845 * We don't want to do that work twice. 846 */ 847 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 848 newreqsize = sizeof (ml_unit_t); 849 big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 850 trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, 851 SMALL_2_BIG); 852 kmem_free(small_ul, dep->de_reqsize); 853 /* 854 * Update userdata and incore userdata 855 * incores are at the end of ul 856 */ 857 dep->de_rb_userdata_ic = big_ul; 858 dep->de_rb_userdata = big_ul; 859 dep->de_icreqsize = newreqsize; 860 rbp->rb_private |= MD_PRV_CONVD; 861 ul = big_ul; 862 } else { 863 /* already converted, just set the pointer */ 864 ul = dep->de_rb_userdata; 865 } 866 all_trans_gotten = 0; 867 if (ldl_build_incore(ul, 1) == 0) { 868 mddb_setrecprivate(recid, MD_PRV_GOTIT); 869 gotsomething = 1; 870 } 871 } 872 873 /* 874 * now snarf up metatrans devices 875 */ 876 gotsomething = 0; 877 recid = mddb_makerecid(setno, 0); 878 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 879 mt_unit_t *big_un; 880 mt_unit32_od_t *small_un; 881 882 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 883 continue; 884 885 if ((trans_found == 0) && (!MD_UPGRADE)) { 886 cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); 887 trans_found = 1; 888 } 889 890 small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); 891 892 dep = mddb_getrecdep(recid); 893 dep->de_flags = MDDB_F_TRANS_MASTER; 894 rbp = dep->de_rb; 895 /* 896 * As trans records are always old records, 897 * we have to check if this record already has been converted. 898 * We don't want to do that work twice. 899 */ 900 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 901 newreqsize = sizeof (mt_unit_t); 902 big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 903 trans_master_convert((caddr_t)small_un, (caddr_t)big_un, 904 SMALL_2_BIG); 905 kmem_free(small_un, dep->de_reqsize); 906 /* 907 * Update userdata and incore userdata 908 * incores are at the end of ul 909 */ 910 dep->de_rb_userdata_ic = big_un; 911 dep->de_rb_userdata = big_un; 912 dep->de_icreqsize = newreqsize; 913 rbp->rb_private |= MD_PRV_CONVD; 914 un = big_un; 915 un->c.un_revision = MD_32BIT_META_DEV; 916 } else { 917 /* already converted, just set the pointer */ 918 un = dep->de_rb_userdata; 919 } 920 921 /* 922 * Create minor node for snarfed entry. 923 */ 924 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 925 926 if (MD_UNIT(MD_SID(un)) != NULL) { 927 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 928 continue; 929 } 930 931 all_trans_gotten = 0; 932 if (trans_build_incore(un, 1) == 0) { 933 mddb_setrecprivate(recid, MD_PRV_GOTIT); 934 md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); 935 gotsomething = 1; 936 } 937 } 938 939 if (!all_trans_gotten) 940 return (gotsomething); 941 942 recid = mddb_makerecid(setno, 0); 943 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 944 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 945 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 946 return (0); 947 } 948 949 static int 950 trans_halt(md_haltcmd_t cmd, set_t setno) 951 { 952 unit_t i; 953 mdi_unit_t *ui; 954 minor_t mnum; 955 mt_unit_t *un; 956 957 if (cmd == MD_HALT_CLOSE) { 958 for (i = 0; i < md_nunits; i++) { 959 mnum = MD_MKMIN(setno, i); 960 if ((ui = MDI_UNIT(mnum)) == NULL) 961 continue; 962 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 963 continue; 964 if (md_unit_isopen(ui)) { 965 return (1); 966 } 967 } 968 for (i = 0; i < md_nunits; i++) { 969 mnum = MD_MKMIN(setno, i); 970 if ((ui = MDI_UNIT(mnum)) == NULL) 971 continue; 972 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 973 continue; 974 un = (mt_unit_t *)MD_UNIT(mnum); 975 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 976 trans_close_all_devs(un); 977 } 978 } 979 return (0); 980 } 981 982 if (cmd == MD_HALT_OPEN) { 983 for (i = 0; i < md_nunits; i++) { 984 mnum = MD_MKMIN(setno, i); 985 if ((ui = MDI_UNIT(mnum)) == NULL) 986 continue; 987 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 988 continue; 989 ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); 990 } 991 return (0); 992 } 993 994 if (cmd == MD_HALT_CHECK) { 995 for (i = 0; i < md_nunits; i++) { 996 mnum = MD_MKMIN(setno, i); 997 if ((ui = MDI_UNIT(mnum)) == NULL) 998 continue; 999 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1000 continue; 1001 if (md_unit_isopen(ui)) { 1002 return (1); 1003 } 1004 } 1005 return (0); 1006 } 1007 if (cmd == MD_HALT_DOIT) { 1008 for (i = 0; i < md_nunits; i++) { 1009 mnum = MD_MKMIN(setno, i); 1010 if ((ui = MDI_UNIT(mnum)) == NULL) 1011 continue; 1012 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1013 continue; 1014 (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, 1015 0, 1); 1016 } 1017 return (0); 1018 } 1019 if (cmd == MD_HALT_UNLOAD) 1020 return (0); 1021 1022 return (1); 1023 } 1024 1025 /*ARGSUSED3*/ 1026 static int 1027 trans_open( 1028 dev_t *dev, 1029 int flag, 1030 int otyp, 1031 cred_t *cred_p, 1032 int md_oflags 1033 ) 1034 { 1035 minor_t mnum = getminor(*dev); 1036 mdi_unit_t *ui = MDI_UNIT(mnum); 1037 mt_unit_t *un; 1038 int err; 1039 1040 /* disallow layered opens (e.g., PrestoServe) */ 1041 if (otyp == OTYP_LYR) 1042 return (EINVAL); 1043 1044 /* single thread */ 1045 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1046 1047 /* if already open, count open, return success */ 1048 if (md_unit_isopen(ui)) { 1049 err = md_unit_incopen(mnum, flag, otyp); 1050 md_unit_openclose_exit(ui); 1051 if (err != 0) 1052 return (err); 1053 return (0); 1054 } 1055 1056 /* 1057 * For some reason, not all of the metatrans devices attached to 1058 * this log were openable at snarf; try again now. All of the 1059 * underlying devices have to be openable for the roll thread to work. 1060 */ 1061 if (un->un_flags & TRANS_NEED_OPEN) { 1062 md_unit_openclose_exit(ui); 1063 ldl_open_underlying(un); 1064 if (un->un_flags & TRANS_NEED_OPEN) 1065 return (EINVAL); 1066 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1067 } 1068 1069 /* count open */ 1070 err = md_unit_incopen(mnum, flag, otyp); 1071 md_unit_openclose_exit(ui); 1072 if (err != 0) 1073 return (err); 1074 1075 /* return success */ 1076 return (0); 1077 } 1078 1079 /*ARGSUSED1*/ 1080 static int 1081 trans_close( 1082 dev_t dev, 1083 int flag, 1084 int otyp, 1085 cred_t *cred_p, 1086 int md_oflags 1087 ) 1088 { 1089 minor_t mnum = getminor(dev); 1090 mdi_unit_t *ui = MDI_UNIT(mnum); 1091 mt_unit_t *un; 1092 int err = 0; 1093 1094 /* single thread */ 1095 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1096 1097 /* count closed */ 1098 if ((err = md_unit_decopen(mnum, otyp)) != 0) { 1099 md_unit_openclose_exit(ui); 1100 return (err); 1101 } 1102 1103 /* if still open */ 1104 if (md_unit_isopen(ui)) { 1105 md_unit_openclose_exit(ui); 1106 return (0); 1107 } 1108 md_unit_openclose_exit(ui); 1109 1110 if (un->un_flags & TRANS_DETACHING) { 1111 /* 1112 * prevent new opens and try to detach the log 1113 */ 1114 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1115 (void) trans_detach(un, 0); 1116 rw_exit(&md_unit_array_rw.lock); 1117 } 1118 if (un->un_flags & TRANS_ATTACHING) { 1119 /* 1120 * prevent new opens and try to attach the log 1121 */ 1122 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1123 trans_attach(un, 1); 1124 rw_exit(&md_unit_array_rw.lock); 1125 } 1126 1127 return (0); 1128 } 1129 1130 static int 1131 trans_imp_set( 1132 set_t setno 1133 ) 1134 { 1135 mt_unit32_od_t *un32; 1136 ml_unit32_od_t *ul32; 1137 mddb_recid_t recid; 1138 int gotsomething = 0; 1139 mddb_type_t typ1; 1140 minor_t *self_id; /* minor needs to be updated */ 1141 mddb_recid_t *record_id; /* record id needs to be updated */ 1142 1143 /* 1144 * Do log first if there is any 1145 * Note that trans record is always 32 bit 1146 */ 1147 typ1 = (mddb_type_t)md_getshared_key(setno, 1148 trans_md_ops.md_driver.md_drivername); 1149 recid = mddb_makerecid(setno, 0); 1150 1151 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 1152 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1153 continue; 1154 1155 ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); 1156 1157 /* 1158 * Trans log record always is old format 1159 * Go ahead update the record with the new set info 1160 */ 1161 record_id = &(ul32->un_recid); 1162 1163 /* 1164 * Mark the record and update it 1165 */ 1166 *record_id = MAKERECID(setno, DBID(*record_id)); 1167 if (!md_update_minor(setno, mddb_getsidenum 1168 (setno), ul32->un_key)) 1169 goto out; 1170 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1171 } 1172 1173 1174 /* 1175 * Now do the master 1176 */ 1177 recid = mddb_makerecid(setno, 0); 1178 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 1179 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1180 continue; 1181 1182 un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); 1183 1184 /* 1185 * Trans master record always is old format 1186 */ 1187 self_id = &(un32->c.un_self_id); 1188 record_id = &(un32->c.un_record_id); 1189 1190 /* 1191 * Mark the record and update it 1192 */ 1193 *record_id = MAKERECID(setno, DBID(*record_id)); 1194 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1195 if (!md_update_minor(setno, mddb_getsidenum 1196 (setno), un32->un_m_key)) 1197 goto out; 1198 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1199 1200 gotsomething = 1; 1201 } 1202 1203 out: 1204 return (gotsomething); 1205 } 1206 1207 static md_named_services_t trans_named_services[] = { 1208 {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, 1209 {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, 1210 {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, 1211 {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, 1212 {(intptr_t (*)()) trans_exchange_self_update_from_down, 1213 MDRNM_SELF_UPDATE_FROM_DOWN }, 1214 {(intptr_t (*)()) trans_exchange_parent_update_to, 1215 MDRNM_PARENT_UPDATE_TO }, 1216 {NULL, 0 } 1217 }; 1218 1219 md_ops_t trans_md_ops = { 1220 trans_open, /* open */ 1221 trans_close, /* close */ 1222 md_trans_strategy, /* strategy */ 1223 NULL, /* print */ 1224 NULL, /* dump */ 1225 md_trans_read, /* read */ 1226 md_trans_write, /* write */ 1227 md_trans_ioctl, /* trans ioctl */ 1228 trans_snarf, /* trans_snarf */ 1229 trans_halt, /* halt */ 1230 md_trans_aread, /* aread */ 1231 md_trans_awrite, /* awrite */ 1232 trans_imp_set, /* import set */ 1233 trans_named_services 1234 }; 1235 1236 static void 1237 init_init(void) 1238 { 1239 _init_ldl(); 1240 ASSERT(_init_debug()); 1241 trans_parent_cache = kmem_cache_create("md_trans_parent", 1242 sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1243 trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, 1244 trans_child_constructor, trans_child_destructor, 1245 NULL, NULL, NULL, 0); 1246 } 1247 1248 static void 1249 fini_uninit(void) 1250 { 1251 ASSERT(_fini_debug()); 1252 _fini_ldl(); 1253 kmem_cache_destroy(trans_parent_cache); 1254 kmem_cache_destroy(trans_child_cache); 1255 trans_parent_cache = trans_child_cache = NULL; 1256 } 1257 1258 /* define the module linkage */ 1259 MD_PLUGIN_MISC_MODULE("trans module %I%", init_init(), fini_uninit()) 1260