1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/conf.h> 32 #include <sys/debug.h> 33 #include <sys/file.h> 34 #include <sys/user.h> 35 #include <sys/uio.h> 36 #include <sys/dkio.h> 37 #include <sys/vtoc.h> 38 #include <sys/kmem.h> 39 #include <vm/page.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sysmacros.h> 42 #include <sys/types.h> 43 #include <sys/mkdev.h> 44 #include <sys/stat.h> 45 #include <sys/open.h> 46 #include <sys/modctl.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/disp.h> 50 #include <sys/buf.h> 51 52 #include <sys/lvm/mdvar.h> 53 #include <sys/lvm/md_trans.h> 54 #include <sys/lvm/md_notify.h> 55 #include <sys/lvm/md_convert.h> 56 57 #include <sys/sysevent/eventdefs.h> 58 #include <sys/sysevent/svm.h> 59 60 md_ops_t trans_md_ops; 61 #ifndef lint 62 static char _depends_on[] = "drv/md fs/ufs"; 63 md_ops_t *md_interface_ops = &trans_md_ops; 64 #endif /* lint */ 65 66 extern unit_t md_nunits; 67 extern set_t md_nsets; 68 extern md_set_t md_set[]; 69 extern int md_status; 70 extern major_t md_major; 71 72 extern int md_trans_ioctl(); 73 extern md_krwlock_t md_unit_array_rw; 74 75 extern mdq_anchor_t md_done_daemon; 76 77 extern int md_in_upgrade; 78 79 static kmem_cache_t *trans_parent_cache = NULL; 80 kmem_cache_t *trans_child_cache = NULL; 81 82 #ifdef DEBUG 83 /* 84 * ROUTINES FOR TESTING: 85 */ 86 static int 87 _init_debug() 88 { 89 extern int _init_ioctl(); 90 91 return (_init_ioctl()); 92 } 93 static int 94 _fini_debug() 95 { 96 extern int _fini_ioctl(); 97 int err = 0; 98 99 err = _fini_ioctl(); 100 return (err); 101 } 102 103 #endif /* DEBUG */ 104 105 /* 106 * BEGIN RELEASE DEBUG 107 * The following routines remain in the released product for testability 108 */ 109 int 110 trans_done_shadow(buf_t *bp) 111 { 112 buf_t *pb; 113 md_tps_t *ps = (md_tps_t *)bp->b_chain; 114 int rv = 0; 115 116 pb = ps->ps_bp; 117 mutex_enter(&ps->ps_mx); 118 ps->ps_count--; 119 if (ps->ps_count > 0) { 120 if ((bp->b_flags & B_ERROR) != 0) { 121 pb->b_flags |= B_ERROR; 122 pb->b_error = bp->b_error; 123 } 124 mutex_exit(&ps->ps_mx); 125 kmem_cache_free(trans_child_cache, bp); 126 } else { 127 mutex_exit(&ps->ps_mx); 128 mutex_destroy(&ps->ps_mx); 129 rv = trans_done(bp); 130 } 131 return (rv); 132 } 133 134 static void 135 shadow_debug(mt_unit_t *un, /* trans unit info */ 136 buf_t *pb, /* primary buffer */ 137 md_tps_t *ps, /* trans parent save */ 138 buf_t *cb, /* buffer for writing to master */ 139 int flag, 140 void *private) 141 { 142 buf_t *sb; /* Shadow buffer */ 143 144 mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); 145 ps->ps_count = 2; /* Write child buffer & shadow */ 146 cb->b_iodone = trans_done_shadow; 147 sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 148 trans_child_init(sb); 149 sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), 150 pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); 151 152 sb->b_flags |= B_ASYNC; 153 sb->b_chain = (void *)ps; 154 md_call_strategy(sb, flag | MD_STR_MAPPED, private); 155 } 156 /* 157 * END RELEASE DEBUG 158 */ 159 160 /* 161 * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) 162 */ 163 void * 164 md_trans_zalloc(size_t nb) 165 { 166 TRANSSTATS(ts_trans_zalloc); 167 TRANSSTATSADD(ts_trans_alloced, nb); 168 return (kmem_zalloc(nb, KM_SLEEP)); 169 } 170 void * 171 md_trans_alloc(size_t nb) 172 { 173 TRANSSTATS(ts_trans_alloc); 174 TRANSSTATSADD(ts_trans_alloced, nb); 175 return (kmem_alloc(nb, KM_SLEEP)); 176 } 177 void 178 md_trans_free(void *va, size_t nb) 179 { 180 TRANSSTATS(ts_trans_free); 181 TRANSSTATSADD(ts_trans_freed, nb); 182 if (nb) 183 kmem_free(va, nb); 184 } 185 186 static void 187 trans_parent_init(md_tps_t *ps) 188 { 189 bzero(ps, sizeof (md_tps_t)); 190 } 191 192 /*ARGSUSED1*/ 193 int 194 trans_child_constructor(void *p, void *d1, int d2) 195 { 196 bioinit(p); 197 return (0); 198 } 199 200 void 201 trans_child_init(struct buf *bp) 202 { 203 md_bioreset(bp); 204 } 205 206 /*ARGSUSED1*/ 207 void 208 trans_child_destructor(void *p, void *d) 209 { 210 biofini(p); 211 } 212 213 void 214 trans_commit(mt_unit_t *un, int domstr) 215 { 216 mddb_recid_t recids[4]; 217 md_unit_t *su; 218 int ri = 0; 219 220 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) 221 return; 222 223 recids[ri++] = un->c.un_record_id; 224 225 if (domstr) 226 if (md_getmajor(un->un_m_dev) == md_major) { 227 su = MD_UNIT(md_getminor(un->un_m_dev)); 228 recids[ri++] = su->c.un_record_id; 229 } 230 231 if (ri == 0) 232 return; 233 recids[ri] = 0; 234 235 uniqtime32(&un->un_timestamp); 236 mddb_commitrecs_wrapper(recids); 237 } 238 239 void 240 trans_close_all_devs(mt_unit_t *un) 241 { 242 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 243 md_layered_close(un->un_m_dev, MD_OFLG_NULL); 244 if (un->un_l_unit) 245 ldl_close_dev(un->un_l_unit); 246 un->un_flags |= TRANS_NEED_OPEN; 247 } 248 } 249 250 int 251 trans_open_all_devs(mt_unit_t *un) 252 { 253 int err; 254 minor_t mnum = MD_SID(un); 255 md_dev64_t tmpdev = un->un_m_dev; 256 set_t setno = MD_MIN2SET(MD_SID(un)); 257 side_t side = mddb_getsidenum(setno); 258 259 /* 260 * Do the open by device id if it is regular device 261 */ 262 if ((md_getmajor(tmpdev) != md_major) && 263 md_devid_found(setno, side, un->un_m_key) == 1) { 264 tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); 265 } 266 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); 267 un->un_m_dev = tmpdev; 268 269 if (err) 270 return (ENXIO); 271 272 if (un->un_l_unit) { 273 err = ldl_open_dev(un, un->un_l_unit); 274 if (err) { 275 md_layered_close(tmpdev, MD_OFLG_NULL); 276 return (ENXIO); 277 } 278 } 279 return (0); 280 } 281 282 uint_t mt_debug = 0; 283 284 int 285 trans_build_incore(void *p, int snarfing) 286 { 287 mt_unit_t *un = (mt_unit_t *)p; 288 minor_t mnum; 289 set_t setno; 290 291 /* 292 * initialize debug mode and always start with no shadowing. 293 */ 294 if (!snarfing) 295 un->un_debug = mt_debug; 296 un->un_s_dev = NODEV64; 297 298 mnum = MD_SID(un); 299 300 if (MD_UNIT(mnum) != NULL) 301 return (0); 302 303 setno = MD_MIN2SET(mnum); 304 305 /* 306 * If snarfing the metatrans device, 307 * then remake the device number 308 */ 309 if (snarfing) { 310 un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), 311 un->un_m_key, MD_NOTRUST_DEVT); 312 } 313 314 /* 315 * db rec is partially deleted; finish the db delete later 316 */ 317 if (MD_STATUS(un) & MD_UN_BEING_RESET) { 318 mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 319 return (1); 320 } 321 322 /* 323 * With the current device id implementation there is possibility 324 * that we may have NODEV if the underlying can't be resolved at 325 * snarf time. If this is the case we want to be consistent with 326 * the normal behavior and continue to allow the snarf of unit 327 * and resolve the devt at the open time 328 */ 329 if ((md_getmajor(un->un_m_dev) == md_major) && 330 (md_dev_exists(un->un_m_dev) == 0)) { 331 return (1); 332 } 333 334 /* 335 * retain the detach status; reset open status 336 */ 337 un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); 338 un->un_flags |= TRANS_NEED_OPEN; 339 if ((un->un_flags & TRANS_DETACHED) == 0) 340 un->un_flags |= TRANS_ATTACHING; 341 342 /* 343 * log device not set up yet; try again later 344 */ 345 if ((un->un_flags & TRANS_DETACHED) == 0) 346 if (ldl_findlog(un->un_l_recid) == NULL) 347 return (1); 348 349 /* 350 * initialize incore fields 351 */ 352 un->un_next = NULL; 353 un->un_l_unit = NULL; 354 un->un_deltamap = NULL; 355 un->un_udmap = NULL; 356 un->un_logmap = NULL; 357 un->un_matamap = NULL; 358 un->un_shadowmap = NULL; 359 un->un_ut = NULL; 360 un->un_logreset = 0; 361 un->un_dev = md_makedevice(md_major, mnum); 362 MD_STATUS(un) = 0; 363 364 /* necessary because capability didn't exist pre-4.1 */ 365 MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); 366 367 /* 368 * attach the log 369 */ 370 trans_attach(un, 0); 371 372 /* 373 * check for master dev dynconcat 374 */ 375 if (md_getmajor(un->un_m_dev) == md_major) { 376 struct mdc_unit *c; 377 378 c = MD_UNIT(md_getminor(un->un_m_dev)); 379 un->c.un_total_blocks = c->un_total_blocks; 380 } 381 382 MD_UNIT(mnum) = un; 383 384 return (0); 385 } 386 387 int 388 trans_detach(mt_unit_t *un, int force) 389 { 390 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 391 int error = 0; 392 393 /* 394 * The caller is responsible for single-threading this routine. 395 */ 396 397 if (ui == NULL) 398 return (0); 399 400 /* 401 * already detached or the log isn't attached yet; do nothing 402 */ 403 if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) 404 return (0); 405 406 /* 407 * set state to detaching 408 */ 409 if (force || !md_unit_isopen(ui)) { 410 un->un_flags |= TRANS_DETACHING; 411 if (!MD_UPGRADE) { 412 trans_commit(un, 0); 413 } 414 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, 415 MD_UN2SET(un), MD_SID(un)); 416 } 417 418 /* 419 * device is busy 420 */ 421 if (md_unit_isopen(ui)) 422 return (EBUSY); 423 424 /* 425 * detach the log 426 * if successful 427 * flags committed to TRANS_DETACHED in database 428 * un->un_l_unit set to NULL 429 * no error returned 430 */ 431 error = ldl_reset(un, 1, force); 432 if (error) 433 return (error); 434 435 /* 436 * commit to database 437 */ 438 if (!MD_UPGRADE) { 439 trans_commit(un, 0); 440 } 441 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), 442 MD_SID(un)); 443 444 return (0); 445 } 446 447 void 448 trans_attach(mt_unit_t *un, int attaching) 449 { 450 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 451 ml_unit_t *ul; 452 453 /* 454 * called from snarf, set, and attach. Hence, the attaching param 455 * The caller is responsible for single-threading this routine. 456 */ 457 458 /* 459 * not attaching; do nothing 460 */ 461 if ((un->un_flags & TRANS_ATTACHING) == 0) 462 return; 463 464 /* 465 * find log unit struct 466 */ 467 ul = ldl_findlog(un->un_l_recid); 468 if (ul == NULL) 469 return; 470 un->un_l_dev = ul->un_dev; 471 472 /* 473 * device is busy; do nothing 474 */ 475 if (attaching && md_unit_isopen(ui)) 476 return; 477 /* 478 * other functions use non-NULL un_l_unit as detach/attach flag 479 */ 480 un->un_l_unit = ul; 481 482 /* 483 * add metatrans device to the log's list of mt devices 484 */ 485 ldl_utadd(un); 486 487 /* 488 * attached 489 */ 490 un->un_flags &= ~TRANS_ATTACHING; 491 492 } 493 494 int 495 trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) 496 { 497 sv_dev_t sv; 498 mddb_recid_t vtoc_id; 499 int error = 0; 500 501 /* 502 * reset log, maps, and ufs interface 503 */ 504 error = ldl_reset(un, removing, force); 505 if (error) 506 return (error); 507 508 /* 509 * done with underyling devices 510 */ 511 trans_close_all_devs(un); 512 513 md_destroy_unit_incore(mnum, &trans_md_ops); 514 515 MD_UNIT(mnum) = NULL; 516 517 if (!removing) 518 return (0); 519 520 md_reset_parent(un->un_m_dev); 521 MD_STATUS(un) |= MD_UN_BEING_RESET; 522 trans_commit(un, 1); 523 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), 524 MD_SID(un)); 525 526 /* Save the mstr key */ 527 sv.setno = MD_MIN2SET(mnum); 528 sv.key = un->un_m_key; 529 530 vtoc_id = un->c.un_vtoc_id; 531 532 mddb_deleterec_wrapper(un->c.un_record_id); 533 534 /* Remove the vtoc, if present */ 535 if (vtoc_id) 536 mddb_deleterec_wrapper(vtoc_id); 537 md_rem_names(&sv, 1); 538 return (0); 539 } 540 541 static void 542 trans_wait_panic(struct buf *cb) 543 { 544 while ((cb->b_flags & B_DONE) == 0) { 545 md_daemon(1, &md_done_daemon); 546 drv_usecwait(10); 547 } 548 } 549 550 static void 551 trans_error(md_tps_t *ps) 552 { 553 md_dev64_t md_dev; 554 md_dev64_t m_dev; 555 char *str; 556 struct buf *pb; 557 mdi_unit_t *ui; 558 559 pb = ps->ps_bp; 560 ui = ps->ps_ui; 561 562 /* 563 * gather up params for cmn_err 564 */ 565 if (pb->b_flags & B_READ) 566 str = "read"; 567 else 568 str = "write"; 569 md_dev = md_expldev(pb->b_edev); 570 m_dev = ps->ps_un->un_m_dev; 571 572 /* 573 * free up the resources for this request and done the errored buf 574 */ 575 md_kstat_done(ui, pb, 0); 576 kmem_cache_free(trans_parent_cache, ps); 577 md_unit_readerexit(ui); 578 md_biodone(pb); 579 580 /* 581 * print pretty error message 582 */ 583 cmn_err(CE_WARN, "md: %s: %s error on %s", 584 md_shortname(md_getminor(md_dev)), str, 585 md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); 586 } 587 588 int 589 trans_done(struct buf *cb) 590 { 591 struct buf *pb; 592 mdi_unit_t *ui; 593 md_tps_t *ps; 594 595 ps = (md_tps_t *)cb->b_chain; 596 pb = ps->ps_bp; 597 ui = ps->ps_ui; 598 599 if (cb->b_flags & B_ERROR) { 600 pb->b_flags |= B_ERROR; 601 pb->b_error = cb->b_error; 602 /* 603 * device not in hard error state; report error 604 */ 605 if (!ldl_isherror(ps->ps_un->un_l_unit)) { 606 daemon_request(&md_done_daemon, trans_error, 607 (daemon_queue_t *)ps, REQ_OLD); 608 609 if (cb->b_flags & B_REMAPPED) 610 bp_mapout(cb); 611 if (panicstr) 612 cb->b_flags |= B_DONE; 613 else 614 kmem_cache_free(trans_child_cache, cb); 615 616 return (1); 617 } 618 } 619 620 if (cb->b_flags & B_REMAPPED) 621 bp_mapout(cb); 622 623 if (panicstr) 624 cb->b_flags |= B_DONE; 625 else 626 kmem_cache_free(trans_child_cache, cb); 627 kmem_cache_free(trans_parent_cache, ps); 628 md_kstat_done(ui, pb, 0); 629 md_unit_readerexit(ui); 630 md_biodone(pb); 631 632 return (0); 633 } 634 635 static void 636 md_trans_strategy(buf_t *pb, int flag, void *private) 637 { 638 md_tps_t *ps; 639 buf_t *cb; /* child buf pointer */ 640 mt_unit_t *un; 641 mdi_unit_t *ui; 642 643 ui = MDI_UNIT(getminor(pb->b_edev)); 644 645 md_kstat_waitq_enter(ui); 646 647 un = (mt_unit_t *)md_unit_readerlock(ui); 648 649 if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { 650 pb->b_flags |= B_ERROR; 651 pb->b_error = ENXIO; 652 pb->b_resid = pb->b_bcount; 653 md_unit_readerexit(ui); 654 biodone(pb); 655 return; 656 } 657 658 ASSERT(!(flag & MD_STR_NOTTOP)); 659 660 /* check and map */ 661 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 662 md_kstat_waitq_exit(ui); 663 return; 664 } 665 666 bp_mapin(pb); 667 668 ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); 669 trans_parent_init(ps); 670 671 /* 672 * Save essential information from the original buffhdr 673 * in the md_save structure. 674 */ 675 ps->ps_un = un; 676 ps->ps_ui = ui; 677 ps->ps_bp = pb; 678 679 cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 680 trans_child_init(cb); 681 682 cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), 683 pb->b_blkno, trans_done, cb, KM_NOSLEEP); 684 685 cb->b_chain = (void *)ps; 686 687 /* 688 * RELEASE DEBUG 689 * The following calls shadow debug for testing purposes if we are 690 * writing and if shadowing is turned on. 691 */ 692 if ((un->un_s_dev != NODEV64) && 693 ((pb->b_flags & B_READ) == 0)) 694 shadow_debug(un, pb, ps, cb, flag, private); 695 696 md_kstat_waitq_to_runq(ui); 697 698 (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); 699 700 /* 701 * panic in progress; process daemon queues 702 */ 703 if (panicstr) { 704 trans_wait_panic(cb); 705 kmem_cache_free(trans_child_cache, cb); 706 } 707 } 708 709 /* ARGSUSED */ 710 static int 711 md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) 712 { 713 int error; 714 715 if ((error = md_chk_uio(uio)) != 0) 716 return (error); 717 718 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 719 } 720 721 /* ARGSUSED */ 722 static int 723 md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 724 { 725 int error; 726 727 if ((error = md_chk_uio(aio->aio_uio)) != 0) 728 return (error); 729 730 return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); 731 } 732 733 /* ARGSUSED */ 734 static int 735 md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) 736 { 737 int error; 738 739 if ((error = md_chk_uio(uio)) != 0) 740 return (error); 741 742 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 743 } 744 745 /* ARGSUSED */ 746 static int 747 md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 748 { 749 int error; 750 751 if ((error = md_chk_uio(aio->aio_uio)) != 0) 752 return (error); 753 754 return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); 755 } 756 757 static void 758 trans_cleanup(mt_unit_t *un) 759 { 760 sv_dev_t sv; 761 762 MD_STATUS(un) |= MD_UN_LOG_DELETED; 763 trans_commit(un, 0); 764 765 /* Save the mstr key */ 766 sv.setno = MD_UN2SET(un); 767 sv.key = un->un_m_key; 768 769 mddb_deleterec_wrapper(un->c.un_record_id); 770 771 md_rem_names(&sv, 1); 772 } 773 774 static int 775 trans_snarf(md_snarfcmd_t cmd, set_t setno) 776 { 777 mt_unit_t *un; 778 ml_unit_t *ul; 779 mddb_recid_t recid; 780 int gotsomething; 781 mddb_type_t typ1; 782 int all_trans_gotten; 783 mddb_de_ic_t *dep; 784 mddb_rb32_t *rbp; 785 size_t newreqsize; 786 static int trans_found = 0; 787 788 789 790 if (cmd == MD_SNARF_CLEANUP) { 791 792 if (md_get_setstatus(setno) & MD_SET_STALE) 793 return (0); 794 795 /* 796 * clean up partially cleared trans devices 797 */ 798 typ1 = (mddb_type_t)md_getshared_key(setno, 799 trans_md_ops.md_driver.md_drivername); 800 recid = mddb_makerecid(setno, 0); 801 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 802 un = (mt_unit_t *)mddb_getrecaddr(recid); 803 (void) trans_detach(un, 1); 804 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 805 trans_cleanup(un); 806 recid = mddb_makerecid(setno, 0); 807 } 808 } 809 /* 810 * clean up partially cleared log devices 811 */ 812 recid = mddb_makerecid(setno, 0); 813 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 814 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 815 ul = (ml_unit_t *)mddb_getrecaddr(recid); 816 ldl_cleanup(ul); 817 recid = mddb_makerecid(setno, 0); 818 } 819 } 820 821 return (0); 822 } 823 824 /* 825 * must snarf up the log devices first 826 */ 827 gotsomething = 0; 828 all_trans_gotten = 1; 829 typ1 = (mddb_type_t)md_getshared_key(setno, 830 trans_md_ops.md_driver.md_drivername); 831 recid = mddb_makerecid(setno, 0); 832 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 833 ml_unit_t *big_ul; 834 ml_unit32_od_t *small_ul; 835 836 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 837 continue; 838 839 small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); 840 dep = mddb_getrecdep(recid); 841 dep->de_flags = MDDB_F_TRANS_LOG; 842 rbp = dep->de_rb; 843 /* 844 * As trans records are always old records, 845 * we have to check if this record already has been converted. 846 * We don't want to do that work twice. 847 */ 848 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 849 newreqsize = sizeof (ml_unit_t); 850 big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 851 trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, 852 SMALL_2_BIG); 853 kmem_free(small_ul, dep->de_reqsize); 854 /* 855 * Update userdata and incore userdata 856 * incores are at the end of ul 857 */ 858 dep->de_rb_userdata_ic = big_ul; 859 dep->de_rb_userdata = big_ul; 860 dep->de_icreqsize = newreqsize; 861 rbp->rb_private |= MD_PRV_CONVD; 862 ul = big_ul; 863 } else { 864 /* already converted, just set the pointer */ 865 ul = dep->de_rb_userdata; 866 } 867 all_trans_gotten = 0; 868 if (ldl_build_incore(ul, 1) == 0) { 869 mddb_setrecprivate(recid, MD_PRV_GOTIT); 870 gotsomething = 1; 871 } 872 } 873 874 /* 875 * now snarf up metatrans devices 876 */ 877 gotsomething = 0; 878 recid = mddb_makerecid(setno, 0); 879 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 880 mt_unit_t *big_un; 881 mt_unit32_od_t *small_un; 882 883 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 884 continue; 885 886 if ((trans_found == 0) && (!MD_UPGRADE)) { 887 cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); 888 trans_found = 1; 889 } 890 891 small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); 892 893 dep = mddb_getrecdep(recid); 894 dep->de_flags = MDDB_F_TRANS_MASTER; 895 rbp = dep->de_rb; 896 /* 897 * As trans records are always old records, 898 * we have to check if this record already has been converted. 899 * We don't want to do that work twice. 900 */ 901 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 902 newreqsize = sizeof (mt_unit_t); 903 big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 904 trans_master_convert((caddr_t)small_un, (caddr_t)big_un, 905 SMALL_2_BIG); 906 kmem_free(small_un, dep->de_reqsize); 907 /* 908 * Update userdata and incore userdata 909 * incores are at the end of ul 910 */ 911 dep->de_rb_userdata_ic = big_un; 912 dep->de_rb_userdata = big_un; 913 dep->de_icreqsize = newreqsize; 914 rbp->rb_private |= MD_PRV_CONVD; 915 un = big_un; 916 un->c.un_revision = MD_32BIT_META_DEV; 917 } else { 918 /* already converted, just set the pointer */ 919 un = dep->de_rb_userdata; 920 } 921 922 /* 923 * Create minor node for snarfed entry. 924 */ 925 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 926 927 if (MD_UNIT(MD_SID(un)) != NULL) { 928 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 929 continue; 930 } 931 932 all_trans_gotten = 0; 933 if (trans_build_incore(un, 1) == 0) { 934 mddb_setrecprivate(recid, MD_PRV_GOTIT); 935 md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); 936 gotsomething = 1; 937 } 938 } 939 940 if (!all_trans_gotten) 941 return (gotsomething); 942 943 recid = mddb_makerecid(setno, 0); 944 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 945 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 946 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 947 return (0); 948 } 949 950 static int 951 trans_halt(md_haltcmd_t cmd, set_t setno) 952 { 953 unit_t i; 954 mdi_unit_t *ui; 955 minor_t mnum; 956 mt_unit_t *un; 957 958 if (cmd == MD_HALT_CLOSE) { 959 for (i = 0; i < md_nunits; i++) { 960 mnum = MD_MKMIN(setno, i); 961 if ((ui = MDI_UNIT(mnum)) == NULL) 962 continue; 963 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 964 continue; 965 if (md_unit_isopen(ui)) { 966 return (1); 967 } 968 } 969 for (i = 0; i < md_nunits; i++) { 970 mnum = MD_MKMIN(setno, i); 971 if ((ui = MDI_UNIT(mnum)) == NULL) 972 continue; 973 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 974 continue; 975 un = (mt_unit_t *)MD_UNIT(mnum); 976 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 977 trans_close_all_devs(un); 978 } 979 } 980 return (0); 981 } 982 983 if (cmd == MD_HALT_OPEN) { 984 for (i = 0; i < md_nunits; i++) { 985 mnum = MD_MKMIN(setno, i); 986 if ((ui = MDI_UNIT(mnum)) == NULL) 987 continue; 988 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 989 continue; 990 ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); 991 } 992 return (0); 993 } 994 995 if (cmd == MD_HALT_CHECK) { 996 for (i = 0; i < md_nunits; i++) { 997 mnum = MD_MKMIN(setno, i); 998 if ((ui = MDI_UNIT(mnum)) == NULL) 999 continue; 1000 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1001 continue; 1002 if (md_unit_isopen(ui)) { 1003 return (1); 1004 } 1005 } 1006 return (0); 1007 } 1008 if (cmd == MD_HALT_DOIT) { 1009 for (i = 0; i < md_nunits; i++) { 1010 mnum = MD_MKMIN(setno, i); 1011 if ((ui = MDI_UNIT(mnum)) == NULL) 1012 continue; 1013 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1014 continue; 1015 (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, 1016 0, 1); 1017 } 1018 return (0); 1019 } 1020 if (cmd == MD_HALT_UNLOAD) 1021 return (0); 1022 1023 return (1); 1024 } 1025 1026 /*ARGSUSED3*/ 1027 static int 1028 trans_open( 1029 dev_t *dev, 1030 int flag, 1031 int otyp, 1032 cred_t *cred_p, 1033 int md_oflags 1034 ) 1035 { 1036 minor_t mnum = getminor(*dev); 1037 mdi_unit_t *ui = MDI_UNIT(mnum); 1038 mt_unit_t *un; 1039 int err; 1040 1041 /* disallow layered opens (e.g., PrestoServe) */ 1042 if (otyp == OTYP_LYR) 1043 return (EINVAL); 1044 1045 /* single thread */ 1046 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1047 1048 /* if already open, count open, return success */ 1049 if (md_unit_isopen(ui)) { 1050 err = md_unit_incopen(mnum, flag, otyp); 1051 md_unit_openclose_exit(ui); 1052 if (err != 0) 1053 return (err); 1054 return (0); 1055 } 1056 1057 /* 1058 * For some reason, not all of the metatrans devices attached to 1059 * this log were openable at snarf; try again now. All of the 1060 * underlying devices have to be openable for the roll thread to work. 1061 */ 1062 if (un->un_flags & TRANS_NEED_OPEN) { 1063 md_unit_openclose_exit(ui); 1064 ldl_open_underlying(un); 1065 if (un->un_flags & TRANS_NEED_OPEN) 1066 return (EINVAL); 1067 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1068 } 1069 1070 /* count open */ 1071 err = md_unit_incopen(mnum, flag, otyp); 1072 md_unit_openclose_exit(ui); 1073 if (err != 0) 1074 return (err); 1075 1076 /* return success */ 1077 return (0); 1078 } 1079 1080 /*ARGSUSED1*/ 1081 static int 1082 trans_close( 1083 dev_t dev, 1084 int flag, 1085 int otyp, 1086 cred_t *cred_p, 1087 int md_oflags 1088 ) 1089 { 1090 minor_t mnum = getminor(dev); 1091 mdi_unit_t *ui = MDI_UNIT(mnum); 1092 mt_unit_t *un; 1093 int err = 0; 1094 1095 /* single thread */ 1096 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1097 1098 /* count closed */ 1099 if ((err = md_unit_decopen(mnum, otyp)) != 0) { 1100 md_unit_openclose_exit(ui); 1101 return (err); 1102 } 1103 1104 /* if still open */ 1105 if (md_unit_isopen(ui)) { 1106 md_unit_openclose_exit(ui); 1107 return (0); 1108 } 1109 md_unit_openclose_exit(ui); 1110 1111 if (un->un_flags & TRANS_DETACHING) { 1112 /* 1113 * prevent new opens and try to detach the log 1114 */ 1115 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1116 (void) trans_detach(un, 0); 1117 rw_exit(&md_unit_array_rw.lock); 1118 } 1119 if (un->un_flags & TRANS_ATTACHING) { 1120 /* 1121 * prevent new opens and try to attach the log 1122 */ 1123 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1124 trans_attach(un, 1); 1125 rw_exit(&md_unit_array_rw.lock); 1126 } 1127 1128 return (0); 1129 } 1130 1131 static int 1132 trans_imp_set( 1133 set_t setno 1134 ) 1135 { 1136 mt_unit32_od_t *un32; 1137 ml_unit32_od_t *ul32; 1138 mddb_recid_t recid; 1139 int gotsomething = 0; 1140 mddb_type_t typ1; 1141 minor_t *self_id; /* minor needs to be updated */ 1142 mddb_recid_t *record_id; /* record id needs to be updated */ 1143 1144 /* 1145 * Do log first if there is any 1146 * Note that trans record is always 32 bit 1147 */ 1148 typ1 = (mddb_type_t)md_getshared_key(setno, 1149 trans_md_ops.md_driver.md_drivername); 1150 recid = mddb_makerecid(setno, 0); 1151 1152 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 1153 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1154 continue; 1155 1156 ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); 1157 1158 /* 1159 * Trans log record always is old format 1160 * Go ahead update the record with the new set info 1161 */ 1162 record_id = &(ul32->un_recid); 1163 1164 /* 1165 * Mark the record and update it 1166 */ 1167 *record_id = MAKERECID(setno, DBID(*record_id)); 1168 if (!md_update_minor(setno, mddb_getsidenum 1169 (setno), ul32->un_key)) 1170 goto out; 1171 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1172 } 1173 1174 1175 /* 1176 * Now do the master 1177 */ 1178 recid = mddb_makerecid(setno, 0); 1179 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 1180 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1181 continue; 1182 1183 un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); 1184 1185 /* 1186 * Trans master record always is old format 1187 */ 1188 self_id = &(un32->c.un_self_id); 1189 record_id = &(un32->c.un_record_id); 1190 1191 /* 1192 * Mark the record and update it 1193 */ 1194 *record_id = MAKERECID(setno, DBID(*record_id)); 1195 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1196 if (!md_update_minor(setno, mddb_getsidenum 1197 (setno), un32->un_m_key)) 1198 goto out; 1199 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1200 1201 gotsomething = 1; 1202 } 1203 1204 out: 1205 return (gotsomething); 1206 } 1207 1208 static md_named_services_t trans_named_services[] = { 1209 {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, 1210 {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, 1211 {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, 1212 {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, 1213 {(intptr_t (*)()) trans_exchange_self_update_from_down, 1214 MDRNM_SELF_UPDATE_FROM_DOWN }, 1215 {(intptr_t (*)()) trans_exchange_parent_update_to, 1216 MDRNM_PARENT_UPDATE_TO }, 1217 {NULL, 0 } 1218 }; 1219 1220 md_ops_t trans_md_ops = { 1221 trans_open, /* open */ 1222 trans_close, /* close */ 1223 md_trans_strategy, /* strategy */ 1224 NULL, /* print */ 1225 NULL, /* dump */ 1226 md_trans_read, /* read */ 1227 md_trans_write, /* write */ 1228 md_trans_ioctl, /* trans ioctl */ 1229 trans_snarf, /* trans_snarf */ 1230 trans_halt, /* halt */ 1231 md_trans_aread, /* aread */ 1232 md_trans_awrite, /* awrite */ 1233 trans_imp_set, /* import set */ 1234 trans_named_services 1235 }; 1236 1237 static void 1238 init_init(void) 1239 { 1240 _init_ldl(); 1241 ASSERT(_init_debug()); 1242 trans_parent_cache = kmem_cache_create("md_trans_parent", 1243 sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1244 trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, 1245 trans_child_constructor, trans_child_destructor, 1246 NULL, NULL, NULL, 0); 1247 } 1248 1249 static void 1250 fini_uninit(void) 1251 { 1252 ASSERT(_fini_debug()); 1253 _fini_ldl(); 1254 kmem_cache_destroy(trans_parent_cache); 1255 kmem_cache_destroy(trans_child_cache); 1256 trans_parent_cache = trans_child_cache = NULL; 1257 } 1258 1259 /* define the module linkage */ 1260 MD_PLUGIN_MISC_MODULE("trans module %I%", init_init(), fini_uninit()) 1261