1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2011 Bayard G. Bell. All rights reserved. 26 * Copyright 2012 Milan Jurik. All rights reserved. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/conf.h> 32 #include <sys/debug.h> 33 #include <sys/file.h> 34 #include <sys/user.h> 35 #include <sys/uio.h> 36 #include <sys/dkio.h> 37 #include <sys/vtoc.h> 38 #include <sys/kmem.h> 39 #include <vm/page.h> 40 #include <sys/cmn_err.h> 41 #include <sys/sysmacros.h> 42 #include <sys/types.h> 43 #include <sys/mkdev.h> 44 #include <sys/stat.h> 45 #include <sys/open.h> 46 #include <sys/modctl.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/disp.h> 50 #include <sys/buf.h> 51 52 #include <sys/lvm/mdvar.h> 53 #include <sys/lvm/md_trans.h> 54 #include <sys/lvm/md_notify.h> 55 #include <sys/lvm/md_convert.h> 56 57 #include <sys/sysevent/eventdefs.h> 58 #include <sys/sysevent/svm.h> 59 60 md_ops_t trans_md_ops; 61 #ifndef lint 62 md_ops_t *md_interface_ops = &trans_md_ops; 63 #endif /* lint */ 64 65 extern unit_t md_nunits; 66 extern set_t md_nsets; 67 extern md_set_t md_set[]; 68 extern int md_status; 69 extern major_t md_major; 70 71 extern int md_trans_ioctl(dev_t, int, void *, int, IOLOCK *); 72 extern md_krwlock_t md_unit_array_rw; 73 74 extern mdq_anchor_t md_done_daemon; 75 76 extern int md_in_upgrade; 77 78 static kmem_cache_t *trans_parent_cache = NULL; 79 kmem_cache_t *trans_child_cache = NULL; 80 81 #ifdef DEBUG 82 /* 83 * ROUTINES FOR TESTING: 84 */ 85 static int 86 _init_debug() 87 { 88 extern int _init_ioctl(); 89 90 return (_init_ioctl()); 91 } 92 static int 93 _fini_debug() 94 { 95 extern int _fini_ioctl(); 96 int err = 0; 97 98 err = _fini_ioctl(); 99 return (err); 100 } 101 102 #endif /* DEBUG */ 103 104 /* 105 * BEGIN RELEASE DEBUG 106 * The following routines remain in the released product for testability 107 */ 108 int 109 trans_done_shadow(buf_t *bp) 110 { 111 buf_t *pb; 112 md_tps_t *ps = (md_tps_t *)bp->b_chain; 113 int rv = 0; 114 115 pb = ps->ps_bp; 116 mutex_enter(&ps->ps_mx); 117 ps->ps_count--; 118 if (ps->ps_count > 0) { 119 if ((bp->b_flags & B_ERROR) != 0) { 120 pb->b_flags |= B_ERROR; 121 pb->b_error = bp->b_error; 122 } 123 mutex_exit(&ps->ps_mx); 124 kmem_cache_free(trans_child_cache, bp); 125 } else { 126 mutex_exit(&ps->ps_mx); 127 mutex_destroy(&ps->ps_mx); 128 rv = trans_done(bp); 129 } 130 return (rv); 131 } 132 133 static void 134 shadow_debug(mt_unit_t *un, /* trans unit info */ 135 buf_t *pb, /* primary buffer */ 136 md_tps_t *ps, /* trans parent save */ 137 buf_t *cb, /* buffer for writing to master */ 138 int flag, 139 void *private) 140 { 141 buf_t *sb; /* Shadow buffer */ 142 143 mutex_init(&ps->ps_mx, NULL, MUTEX_DEFAULT, NULL); 144 ps->ps_count = 2; /* Write child buffer & shadow */ 145 cb->b_iodone = trans_done_shadow; 146 sb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 147 trans_child_init(sb); 148 sb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_s_dev), 149 pb->b_blkno, trans_done_shadow, sb, KM_NOSLEEP); 150 151 sb->b_flags |= B_ASYNC; 152 sb->b_chain = (void *)ps; 153 md_call_strategy(sb, flag | MD_STR_MAPPED, private); 154 } 155 /* 156 * END RELEASE DEBUG 157 */ 158 159 /* 160 * COMMON MEMORY ALLOCATION ROUTINES (so that we can discover leaks) 161 */ 162 void * 163 md_trans_zalloc(size_t nb) 164 { 165 TRANSSTATS(ts_trans_zalloc); 166 TRANSSTATSADD(ts_trans_alloced, nb); 167 return (kmem_zalloc(nb, KM_SLEEP)); 168 } 169 void * 170 md_trans_alloc(size_t nb) 171 { 172 TRANSSTATS(ts_trans_alloc); 173 TRANSSTATSADD(ts_trans_alloced, nb); 174 return (kmem_alloc(nb, KM_SLEEP)); 175 } 176 void 177 md_trans_free(void *va, size_t nb) 178 { 179 TRANSSTATS(ts_trans_free); 180 TRANSSTATSADD(ts_trans_freed, nb); 181 if (nb) 182 kmem_free(va, nb); 183 } 184 185 static void 186 trans_parent_init(md_tps_t *ps) 187 { 188 bzero(ps, sizeof (md_tps_t)); 189 } 190 191 /*ARGSUSED1*/ 192 int 193 trans_child_constructor(void *p, void *d1, int d2) 194 { 195 bioinit(p); 196 return (0); 197 } 198 199 void 200 trans_child_init(struct buf *bp) 201 { 202 md_bioreset(bp); 203 } 204 205 /*ARGSUSED1*/ 206 void 207 trans_child_destructor(void *p, void *d) 208 { 209 biofini(p); 210 } 211 212 void 213 trans_commit(mt_unit_t *un, int domstr) 214 { 215 mddb_recid_t recids[4]; 216 md_unit_t *su; 217 int ri = 0; 218 219 if (md_get_setstatus(MD_UN2SET(un)) & MD_SET_STALE) 220 return; 221 222 recids[ri++] = un->c.un_record_id; 223 224 if (domstr) 225 if (md_getmajor(un->un_m_dev) == md_major) { 226 su = MD_UNIT(md_getminor(un->un_m_dev)); 227 recids[ri++] = su->c.un_record_id; 228 } 229 230 if (ri == 0) 231 return; 232 recids[ri] = 0; 233 234 uniqtime32(&un->un_timestamp); 235 mddb_commitrecs_wrapper(recids); 236 } 237 238 void 239 trans_close_all_devs(mt_unit_t *un) 240 { 241 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 242 md_layered_close(un->un_m_dev, MD_OFLG_NULL); 243 if (un->un_l_unit) 244 ldl_close_dev(un->un_l_unit); 245 un->un_flags |= TRANS_NEED_OPEN; 246 } 247 } 248 249 int 250 trans_open_all_devs(mt_unit_t *un) 251 { 252 int err; 253 minor_t mnum = MD_SID(un); 254 md_dev64_t tmpdev = un->un_m_dev; 255 set_t setno = MD_MIN2SET(MD_SID(un)); 256 side_t side = mddb_getsidenum(setno); 257 258 /* 259 * Do the open by device id if it is regular device 260 */ 261 if ((md_getmajor(tmpdev) != md_major) && 262 md_devid_found(setno, side, un->un_m_key) == 1) { 263 tmpdev = md_resolve_bydevid(mnum, tmpdev, un->un_m_key); 264 } 265 err = md_layered_open(mnum, &tmpdev, MD_OFLG_NULL); 266 un->un_m_dev = tmpdev; 267 268 if (err) 269 return (ENXIO); 270 271 if (un->un_l_unit) { 272 err = ldl_open_dev(un, un->un_l_unit); 273 if (err) { 274 md_layered_close(tmpdev, MD_OFLG_NULL); 275 return (ENXIO); 276 } 277 } 278 return (0); 279 } 280 281 uint_t mt_debug = 0; 282 283 int 284 trans_build_incore(void *p, int snarfing) 285 { 286 mt_unit_t *un = (mt_unit_t *)p; 287 minor_t mnum; 288 set_t setno; 289 290 /* 291 * initialize debug mode and always start with no shadowing. 292 */ 293 if (!snarfing) 294 un->un_debug = mt_debug; 295 un->un_s_dev = NODEV64; 296 297 mnum = MD_SID(un); 298 299 if (MD_UNIT(mnum) != NULL) 300 return (0); 301 302 setno = MD_MIN2SET(mnum); 303 304 /* 305 * If snarfing the metatrans device, 306 * then remake the device number 307 */ 308 if (snarfing) { 309 un->un_m_dev = md_getdevnum(setno, mddb_getsidenum(setno), 310 un->un_m_key, MD_NOTRUST_DEVT); 311 } 312 313 /* 314 * db rec is partially deleted; finish the db delete later 315 */ 316 if (MD_STATUS(un) & MD_UN_BEING_RESET) { 317 mddb_setrecprivate(un->c.un_record_id, MD_PRV_PENDCLEAN); 318 return (1); 319 } 320 321 /* 322 * With the current device id implementation there is possibility 323 * that we may have NODEV if the underlying can't be resolved at 324 * snarf time. If this is the case we want to be consistent with 325 * the normal behavior and continue to allow the snarf of unit 326 * and resolve the devt at the open time 327 */ 328 if ((md_getmajor(un->un_m_dev) == md_major) && 329 (md_dev_exists(un->un_m_dev) == 0)) { 330 return (1); 331 } 332 333 /* 334 * retain the detach status; reset open status 335 */ 336 un->un_flags &= (TRANS_DETACHING | TRANS_DETACHED); 337 un->un_flags |= TRANS_NEED_OPEN; 338 if ((un->un_flags & TRANS_DETACHED) == 0) 339 un->un_flags |= TRANS_ATTACHING; 340 341 /* 342 * log device not set up yet; try again later 343 */ 344 if ((un->un_flags & TRANS_DETACHED) == 0) 345 if (ldl_findlog(un->un_l_recid) == NULL) 346 return (1); 347 348 /* 349 * initialize incore fields 350 */ 351 un->un_next = NULL; 352 un->un_l_unit = NULL; 353 un->un_deltamap = NULL; 354 un->un_udmap = NULL; 355 un->un_logmap = NULL; 356 un->un_matamap = NULL; 357 un->un_shadowmap = NULL; 358 un->un_ut = NULL; 359 un->un_logreset = 0; 360 un->un_dev = md_makedevice(md_major, mnum); 361 MD_STATUS(un) = 0; 362 363 /* necessary because capability didn't exist pre-4.1 */ 364 MD_CAPAB(un) = (MD_CAN_META_CHILD & ~MD_CAN_PARENT); 365 366 /* 367 * attach the log 368 */ 369 trans_attach(un, 0); 370 371 /* 372 * check for master dev dynconcat 373 */ 374 if (md_getmajor(un->un_m_dev) == md_major) { 375 struct mdc_unit *c; 376 377 c = MD_UNIT(md_getminor(un->un_m_dev)); 378 un->c.un_total_blocks = c->un_total_blocks; 379 } 380 381 /* place various information in the in-core data structures */ 382 md_nblocks_set(mnum, un->c.un_total_blocks); 383 MD_UNIT(mnum) = un; 384 385 return (0); 386 } 387 388 int 389 trans_detach(mt_unit_t *un, int force) 390 { 391 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 392 int error = 0; 393 394 /* 395 * The caller is responsible for single-threading this routine. 396 */ 397 398 if (ui == NULL) 399 return (0); 400 401 /* 402 * already detached or the log isn't attached yet; do nothing 403 */ 404 if (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) 405 return (0); 406 407 /* 408 * set state to detaching 409 */ 410 if (force || !md_unit_isopen(ui)) { 411 un->un_flags |= TRANS_DETACHING; 412 if (!MD_UPGRADE) { 413 trans_commit(un, 0); 414 } 415 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACHING, TAG_METADEVICE, 416 MD_UN2SET(un), MD_SID(un)); 417 } 418 419 /* 420 * device is busy 421 */ 422 if (md_unit_isopen(ui)) 423 return (EBUSY); 424 425 /* 426 * detach the log 427 * if successful 428 * flags committed to TRANS_DETACHED in database 429 * un->un_l_unit set to NULL 430 * no error returned 431 */ 432 error = ldl_reset(un, 1, force); 433 if (error) 434 return (error); 435 436 /* 437 * commit to database 438 */ 439 if (!MD_UPGRADE) { 440 trans_commit(un, 0); 441 } 442 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, TAG_METADEVICE, MD_UN2SET(un), 443 MD_SID(un)); 444 445 return (0); 446 } 447 448 void 449 trans_attach(mt_unit_t *un, int attaching) 450 { 451 mdi_unit_t *ui = MDI_UNIT(MD_SID(un)); 452 ml_unit_t *ul; 453 454 /* 455 * called from snarf, set, and attach. Hence, the attaching param 456 * The caller is responsible for single-threading this routine. 457 */ 458 459 /* 460 * not attaching; do nothing 461 */ 462 if ((un->un_flags & TRANS_ATTACHING) == 0) 463 return; 464 465 /* 466 * find log unit struct 467 */ 468 ul = ldl_findlog(un->un_l_recid); 469 if (ul == NULL) 470 return; 471 un->un_l_dev = ul->un_dev; 472 473 /* 474 * device is busy; do nothing 475 */ 476 if (attaching && md_unit_isopen(ui)) 477 return; 478 /* 479 * other functions use non-NULL un_l_unit as detach/attach flag 480 */ 481 un->un_l_unit = ul; 482 483 /* 484 * add metatrans device to the log's list of mt devices 485 */ 486 ldl_utadd(un); 487 488 /* 489 * attached 490 */ 491 un->un_flags &= ~TRANS_ATTACHING; 492 493 } 494 495 int 496 trans_reset(mt_unit_t *un, minor_t mnum, int removing, int force) 497 { 498 sv_dev_t sv; 499 mddb_recid_t vtoc_id; 500 int error = 0; 501 502 /* 503 * reset log, maps, and ufs interface 504 */ 505 error = ldl_reset(un, removing, force); 506 if (error) 507 return (error); 508 509 /* 510 * done with underyling devices 511 */ 512 trans_close_all_devs(un); 513 514 md_destroy_unit_incore(mnum, &trans_md_ops); 515 516 md_nblocks_set(mnum, -1ULL); 517 MD_UNIT(mnum) = NULL; 518 519 if (!removing) 520 return (0); 521 522 md_reset_parent(un->un_m_dev); 523 MD_STATUS(un) |= MD_UN_BEING_RESET; 524 trans_commit(un, 1); 525 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, TAG_METADEVICE, MD_UN2SET(un), 526 MD_SID(un)); 527 528 /* Save the mstr key */ 529 sv.setno = MD_MIN2SET(mnum); 530 sv.key = un->un_m_key; 531 532 vtoc_id = un->c.un_vtoc_id; 533 534 mddb_deleterec_wrapper(un->c.un_record_id); 535 536 /* Remove the vtoc, if present */ 537 if (vtoc_id) 538 mddb_deleterec_wrapper(vtoc_id); 539 md_rem_names(&sv, 1); 540 return (0); 541 } 542 543 static void 544 trans_wait_panic(struct buf *cb) 545 { 546 while ((cb->b_flags & B_DONE) == 0) { 547 md_daemon(1, &md_done_daemon); 548 drv_usecwait(10); 549 } 550 } 551 552 static void 553 trans_error(md_tps_t *ps) 554 { 555 md_dev64_t md_dev; 556 md_dev64_t m_dev; 557 char *str; 558 struct buf *pb; 559 mdi_unit_t *ui; 560 561 pb = ps->ps_bp; 562 ui = ps->ps_ui; 563 564 /* 565 * gather up params for cmn_err 566 */ 567 if (pb->b_flags & B_READ) 568 str = "read"; 569 else 570 str = "write"; 571 md_dev = md_expldev(pb->b_edev); 572 m_dev = ps->ps_un->un_m_dev; 573 574 /* 575 * free up the resources for this request and done the errored buf 576 */ 577 md_kstat_done(ui, pb, 0); 578 kmem_cache_free(trans_parent_cache, ps); 579 md_unit_readerexit(ui); 580 md_biodone(pb); 581 582 /* 583 * print pretty error message 584 */ 585 cmn_err(CE_WARN, "md: %s: %s error on %s", 586 md_shortname(md_getminor(md_dev)), str, 587 md_devname(MD_DEV2SET(md_dev), m_dev, NULL, 0)); 588 } 589 590 int 591 trans_done(struct buf *cb) 592 { 593 struct buf *pb; 594 mdi_unit_t *ui; 595 md_tps_t *ps; 596 597 ps = (md_tps_t *)cb->b_chain; 598 pb = ps->ps_bp; 599 ui = ps->ps_ui; 600 601 if (cb->b_flags & B_ERROR) { 602 pb->b_flags |= B_ERROR; 603 pb->b_error = cb->b_error; 604 /* 605 * device not in hard error state; report error 606 */ 607 if (!ldl_isherror(ps->ps_un->un_l_unit)) { 608 daemon_request(&md_done_daemon, trans_error, 609 (daemon_queue_t *)ps, REQ_OLD); 610 611 if (cb->b_flags & B_REMAPPED) 612 bp_mapout(cb); 613 if (panicstr) 614 cb->b_flags |= B_DONE; 615 else 616 kmem_cache_free(trans_child_cache, cb); 617 618 return (1); 619 } 620 } 621 622 if (cb->b_flags & B_REMAPPED) 623 bp_mapout(cb); 624 625 if (panicstr) 626 cb->b_flags |= B_DONE; 627 else 628 kmem_cache_free(trans_child_cache, cb); 629 kmem_cache_free(trans_parent_cache, ps); 630 md_kstat_done(ui, pb, 0); 631 md_unit_readerexit(ui); 632 md_biodone(pb); 633 634 return (0); 635 } 636 637 static void 638 md_trans_strategy(buf_t *pb, int flag, void *private) 639 { 640 md_tps_t *ps; 641 buf_t *cb; /* child buf pointer */ 642 mt_unit_t *un; 643 mdi_unit_t *ui; 644 645 ui = MDI_UNIT(getminor(pb->b_edev)); 646 647 md_kstat_waitq_enter(ui); 648 649 un = (mt_unit_t *)md_unit_readerlock(ui); 650 651 if (md_inc_iocount(MD_MIN2SET(getminor(pb->b_edev))) != 0) { 652 pb->b_flags |= B_ERROR; 653 pb->b_error = ENXIO; 654 pb->b_resid = pb->b_bcount; 655 md_kstat_waitq_exit(ui); 656 md_unit_readerexit(ui); 657 biodone(pb); 658 return; 659 } 660 661 ASSERT(!(flag & MD_STR_NOTTOP)); 662 663 /* check and map */ 664 if (md_checkbuf(ui, (md_unit_t *)un, pb) != 0) { 665 md_kstat_waitq_exit(ui); 666 return; 667 } 668 669 bp_mapin(pb); 670 671 ps = kmem_cache_alloc(trans_parent_cache, MD_ALLOCFLAGS); 672 trans_parent_init(ps); 673 674 /* 675 * Save essential information from the original buffhdr 676 * in the md_save structure. 677 */ 678 ps->ps_un = un; 679 ps->ps_ui = ui; 680 ps->ps_bp = pb; 681 682 cb = kmem_cache_alloc(trans_child_cache, MD_ALLOCFLAGS); 683 trans_child_init(cb); 684 685 cb = bioclone(pb, 0, pb->b_bcount, md_dev64_to_dev(un->un_m_dev), 686 pb->b_blkno, trans_done, cb, KM_NOSLEEP); 687 688 cb->b_chain = (void *)ps; 689 690 /* 691 * RELEASE DEBUG 692 * The following calls shadow debug for testing purposes if we are 693 * writing and if shadowing is turned on. 694 */ 695 if ((un->un_s_dev != NODEV64) && 696 ((pb->b_flags & B_READ) == 0)) 697 shadow_debug(un, pb, ps, cb, flag, private); 698 699 md_kstat_waitq_to_runq(ui); 700 701 (void) md_call_strategy(cb, flag | MD_STR_MAPPED | MD_NOBLOCK, private); 702 703 /* 704 * panic in progress; process daemon queues 705 */ 706 if (panicstr) { 707 trans_wait_panic(cb); 708 kmem_cache_free(trans_child_cache, cb); 709 } 710 } 711 712 /* ARGSUSED */ 713 static int 714 md_trans_read(dev_t dev, struct uio *uio, cred_t *credp) 715 { 716 int error; 717 718 if ((error = md_chk_uio(uio)) != 0) 719 return (error); 720 721 return (physio(mdstrategy, NULL, dev, B_READ, minphys, uio)); 722 } 723 724 /* ARGSUSED */ 725 static int 726 md_trans_aread(dev_t dev, struct aio_req *aio, cred_t *credp) 727 { 728 int error; 729 730 if ((error = md_chk_uio(aio->aio_uio)) != 0) 731 return (error); 732 733 return (aphysio(mdstrategy, anocancel, dev, B_READ, minphys, aio)); 734 } 735 736 /* ARGSUSED */ 737 static int 738 md_trans_write(dev_t dev, struct uio *uio, cred_t *credp) 739 { 740 int error; 741 742 if ((error = md_chk_uio(uio)) != 0) 743 return (error); 744 745 return (physio(mdstrategy, NULL, dev, B_WRITE, minphys, uio)); 746 } 747 748 /* ARGSUSED */ 749 static int 750 md_trans_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) 751 { 752 int error; 753 754 if ((error = md_chk_uio(aio->aio_uio)) != 0) 755 return (error); 756 757 return (aphysio(mdstrategy, anocancel, dev, B_WRITE, minphys, aio)); 758 } 759 760 static void 761 trans_cleanup(mt_unit_t *un) 762 { 763 sv_dev_t sv; 764 765 MD_STATUS(un) |= MD_UN_LOG_DELETED; 766 trans_commit(un, 0); 767 768 /* Save the mstr key */ 769 sv.setno = MD_UN2SET(un); 770 sv.key = un->un_m_key; 771 772 mddb_deleterec_wrapper(un->c.un_record_id); 773 774 md_rem_names(&sv, 1); 775 } 776 777 static int 778 trans_snarf(md_snarfcmd_t cmd, set_t setno) 779 { 780 mt_unit_t *un; 781 ml_unit_t *ul; 782 mddb_recid_t recid; 783 int gotsomething; 784 mddb_type_t typ1; 785 int all_trans_gotten; 786 mddb_de_ic_t *dep; 787 mddb_rb32_t *rbp; 788 size_t newreqsize; 789 static int trans_found = 0; 790 791 792 793 if (cmd == MD_SNARF_CLEANUP) { 794 795 if (md_get_setstatus(setno) & MD_SET_STALE) 796 return (0); 797 798 /* 799 * clean up partially cleared trans devices 800 */ 801 typ1 = (mddb_type_t)md_getshared_key(setno, 802 trans_md_ops.md_driver.md_drivername); 803 recid = mddb_makerecid(setno, 0); 804 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 805 un = (mt_unit_t *)mddb_getrecaddr(recid); 806 (void) trans_detach(un, 1); 807 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 808 trans_cleanup(un); 809 recid = mddb_makerecid(setno, 0); 810 } 811 } 812 /* 813 * clean up partially cleared log devices 814 */ 815 recid = mddb_makerecid(setno, 0); 816 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 817 if (mddb_getrecprivate(recid) & MD_PRV_CLEANUP) { 818 ul = (ml_unit_t *)mddb_getrecaddr(recid); 819 ldl_cleanup(ul); 820 recid = mddb_makerecid(setno, 0); 821 } 822 } 823 824 return (0); 825 } 826 827 /* 828 * must snarf up the log devices first 829 */ 830 gotsomething = 0; 831 all_trans_gotten = 1; 832 typ1 = (mddb_type_t)md_getshared_key(setno, 833 trans_md_ops.md_driver.md_drivername); 834 recid = mddb_makerecid(setno, 0); 835 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 836 ml_unit_t *big_ul; 837 ml_unit32_od_t *small_ul; 838 839 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 840 continue; 841 842 small_ul = (ml_unit32_od_t *)mddb_getrecaddr(recid); 843 dep = mddb_getrecdep(recid); 844 dep->de_flags = MDDB_F_TRANS_LOG; 845 rbp = dep->de_rb; 846 /* 847 * As trans records are always old records, 848 * we have to check if this record already has been converted. 849 * We don't want to do that work twice. 850 */ 851 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 852 newreqsize = sizeof (ml_unit_t); 853 big_ul = (ml_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 854 trans_log_convert((caddr_t)small_ul, (caddr_t)big_ul, 855 SMALL_2_BIG); 856 kmem_free(small_ul, dep->de_reqsize); 857 /* 858 * Update userdata and incore userdata 859 * incores are at the end of ul 860 */ 861 dep->de_rb_userdata_ic = big_ul; 862 dep->de_rb_userdata = big_ul; 863 dep->de_icreqsize = newreqsize; 864 rbp->rb_private |= MD_PRV_CONVD; 865 ul = big_ul; 866 } else { 867 /* already converted, just set the pointer */ 868 ul = dep->de_rb_userdata; 869 } 870 all_trans_gotten = 0; 871 if (ldl_build_incore(ul, 1) == 0) { 872 mddb_setrecprivate(recid, MD_PRV_GOTIT); 873 gotsomething = 1; 874 } 875 } 876 877 /* 878 * now snarf up metatrans devices 879 */ 880 gotsomething = 0; 881 recid = mddb_makerecid(setno, 0); 882 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 883 mt_unit_t *big_un; 884 mt_unit32_od_t *small_un; 885 886 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 887 continue; 888 889 if ((trans_found == 0) && (!MD_UPGRADE)) { 890 cmn_err(CE_WARN, MD_EOF_TRANS_MSG MD_EOF_TRANS_WARNING); 891 trans_found = 1; 892 } 893 894 small_un = (mt_unit32_od_t *)mddb_getrecaddr(recid); 895 896 dep = mddb_getrecdep(recid); 897 dep->de_flags = MDDB_F_TRANS_MASTER; 898 rbp = dep->de_rb; 899 /* 900 * As trans records are always old records, 901 * we have to check if this record already has been converted. 902 * We don't want to do that work twice. 903 */ 904 if ((rbp->rb_private & MD_PRV_CONVD) == 0) { 905 newreqsize = sizeof (mt_unit_t); 906 big_un = (mt_unit_t *)kmem_zalloc(newreqsize, KM_SLEEP); 907 trans_master_convert((caddr_t)small_un, (caddr_t)big_un, 908 SMALL_2_BIG); 909 kmem_free(small_un, dep->de_reqsize); 910 /* 911 * Update userdata and incore userdata 912 * incores are at the end of ul 913 */ 914 dep->de_rb_userdata_ic = big_un; 915 dep->de_rb_userdata = big_un; 916 dep->de_icreqsize = newreqsize; 917 rbp->rb_private |= MD_PRV_CONVD; 918 un = big_un; 919 un->c.un_revision &= ~MD_64BIT_META_DEV; 920 } else { 921 /* already converted, just set the pointer */ 922 un = dep->de_rb_userdata; 923 } 924 925 /* 926 * Create minor node for snarfed entry. 927 */ 928 (void) md_create_minor_node(MD_MIN2SET(MD_SID(un)), MD_SID(un)); 929 930 if (MD_UNIT(MD_SID(un)) != NULL) { 931 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 932 continue; 933 } 934 935 all_trans_gotten = 0; 936 if (trans_build_incore(un, 1) == 0) { 937 mddb_setrecprivate(recid, MD_PRV_GOTIT); 938 md_create_unit_incore(MD_SID(un), &trans_md_ops, 0); 939 gotsomething = 1; 940 } 941 } 942 943 if (!all_trans_gotten) 944 return (gotsomething); 945 946 recid = mddb_makerecid(setno, 0); 947 while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) 948 if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT)) 949 mddb_setrecprivate(recid, MD_PRV_PENDDEL); 950 return (0); 951 } 952 953 static int 954 trans_halt(md_haltcmd_t cmd, set_t setno) 955 { 956 unit_t i; 957 mdi_unit_t *ui; 958 minor_t mnum; 959 mt_unit_t *un; 960 961 if (cmd == MD_HALT_CLOSE) { 962 for (i = 0; i < md_nunits; i++) { 963 mnum = MD_MKMIN(setno, i); 964 if ((ui = MDI_UNIT(mnum)) == NULL) 965 continue; 966 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 967 continue; 968 if (md_unit_isopen(ui)) { 969 return (1); 970 } 971 } 972 for (i = 0; i < md_nunits; i++) { 973 mnum = MD_MKMIN(setno, i); 974 if ((ui = MDI_UNIT(mnum)) == NULL) 975 continue; 976 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 977 continue; 978 un = (mt_unit_t *)MD_UNIT(mnum); 979 if ((un->un_flags & TRANS_NEED_OPEN) == 0) { 980 trans_close_all_devs(un); 981 } 982 } 983 return (0); 984 } 985 986 if (cmd == MD_HALT_OPEN) { 987 for (i = 0; i < md_nunits; i++) { 988 mnum = MD_MKMIN(setno, i); 989 if ((ui = MDI_UNIT(mnum)) == NULL) 990 continue; 991 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 992 continue; 993 ldl_open_underlying((mt_unit_t *)MD_UNIT(mnum)); 994 } 995 return (0); 996 } 997 998 if (cmd == MD_HALT_CHECK) { 999 for (i = 0; i < md_nunits; i++) { 1000 mnum = MD_MKMIN(setno, i); 1001 if ((ui = MDI_UNIT(mnum)) == NULL) 1002 continue; 1003 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1004 continue; 1005 if (md_unit_isopen(ui)) { 1006 return (1); 1007 } 1008 } 1009 return (0); 1010 } 1011 if (cmd == MD_HALT_DOIT) { 1012 for (i = 0; i < md_nunits; i++) { 1013 mnum = MD_MKMIN(setno, i); 1014 if ((ui = MDI_UNIT(mnum)) == NULL) 1015 continue; 1016 if (ui->ui_opsindex != trans_md_ops.md_selfindex) 1017 continue; 1018 (void) trans_reset((mt_unit_t *)MD_UNIT(mnum), mnum, 1019 0, 1); 1020 } 1021 return (0); 1022 } 1023 if (cmd == MD_HALT_UNLOAD) 1024 return (0); 1025 1026 return (1); 1027 } 1028 1029 /*ARGSUSED3*/ 1030 static int 1031 trans_open( 1032 dev_t *dev, 1033 int flag, 1034 int otyp, 1035 cred_t *cred_p, 1036 int md_oflags 1037 ) 1038 { 1039 minor_t mnum = getminor(*dev); 1040 mdi_unit_t *ui = MDI_UNIT(mnum); 1041 mt_unit_t *un; 1042 int err; 1043 1044 /* disallow layered opens (e.g., PrestoServe) */ 1045 if (otyp == OTYP_LYR) 1046 return (EINVAL); 1047 1048 /* single thread */ 1049 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1050 1051 /* if already open, count open, return success */ 1052 if (md_unit_isopen(ui)) { 1053 err = md_unit_incopen(mnum, flag, otyp); 1054 md_unit_openclose_exit(ui); 1055 if (err != 0) 1056 return (err); 1057 return (0); 1058 } 1059 1060 /* 1061 * For some reason, not all of the metatrans devices attached to 1062 * this log were openable at snarf; try again now. All of the 1063 * underlying devices have to be openable for the roll thread to work. 1064 */ 1065 if (un->un_flags & TRANS_NEED_OPEN) { 1066 md_unit_openclose_exit(ui); 1067 ldl_open_underlying(un); 1068 if (un->un_flags & TRANS_NEED_OPEN) 1069 return (EINVAL); 1070 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1071 } 1072 1073 /* count open */ 1074 err = md_unit_incopen(mnum, flag, otyp); 1075 md_unit_openclose_exit(ui); 1076 if (err != 0) 1077 return (err); 1078 1079 /* return success */ 1080 return (0); 1081 } 1082 1083 /*ARGSUSED1*/ 1084 static int 1085 trans_close( 1086 dev_t dev, 1087 int flag, 1088 int otyp, 1089 cred_t *cred_p, 1090 int md_oflags 1091 ) 1092 { 1093 minor_t mnum = getminor(dev); 1094 mdi_unit_t *ui = MDI_UNIT(mnum); 1095 mt_unit_t *un; 1096 int err = 0; 1097 1098 /* single thread */ 1099 un = (mt_unit_t *)md_unit_openclose_enter(ui); 1100 1101 /* count closed */ 1102 if ((err = md_unit_decopen(mnum, otyp)) != 0) { 1103 md_unit_openclose_exit(ui); 1104 return (err); 1105 } 1106 1107 /* if still open */ 1108 if (md_unit_isopen(ui)) { 1109 md_unit_openclose_exit(ui); 1110 return (0); 1111 } 1112 md_unit_openclose_exit(ui); 1113 1114 if (un->un_flags & TRANS_DETACHING) { 1115 /* 1116 * prevent new opens and try to detach the log 1117 */ 1118 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1119 (void) trans_detach(un, 0); 1120 rw_exit(&md_unit_array_rw.lock); 1121 } 1122 if (un->un_flags & TRANS_ATTACHING) { 1123 /* 1124 * prevent new opens and try to attach the log 1125 */ 1126 rw_enter(&md_unit_array_rw.lock, RW_WRITER); 1127 trans_attach(un, 1); 1128 rw_exit(&md_unit_array_rw.lock); 1129 } 1130 1131 return (0); 1132 } 1133 1134 static int 1135 trans_imp_set( 1136 set_t setno 1137 ) 1138 { 1139 mt_unit32_od_t *un32; 1140 ml_unit32_od_t *ul32; 1141 mddb_recid_t recid; 1142 int gotsomething = 0; 1143 mddb_type_t typ1; 1144 minor_t *self_id; /* minor needs to be updated */ 1145 mddb_recid_t *record_id; /* record id needs to be updated */ 1146 1147 /* 1148 * Do log first if there is any 1149 * Note that trans record is always 32 bit 1150 */ 1151 typ1 = (mddb_type_t)md_getshared_key(setno, 1152 trans_md_ops.md_driver.md_drivername); 1153 recid = mddb_makerecid(setno, 0); 1154 1155 while ((recid = mddb_getnextrec(recid, typ1, LOG_REC)) > 0) { 1156 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1157 continue; 1158 1159 ul32 = (ml_unit32_od_t *)mddb_getrecaddr(recid); 1160 1161 /* 1162 * Trans log record always is old format 1163 * Go ahead update the record with the new set info 1164 */ 1165 record_id = &(ul32->un_recid); 1166 1167 /* 1168 * Mark the record and update it 1169 */ 1170 *record_id = MAKERECID(setno, DBID(*record_id)); 1171 if (!md_update_minor(setno, mddb_getsidenum 1172 (setno), ul32->un_key)) 1173 goto out; 1174 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1175 } 1176 1177 1178 /* 1179 * Now do the master 1180 */ 1181 recid = mddb_makerecid(setno, 0); 1182 while ((recid = mddb_getnextrec(recid, typ1, TRANS_REC)) > 0) { 1183 if (mddb_getrecprivate(recid) & MD_PRV_GOTIT) 1184 continue; 1185 1186 un32 = (mt_unit32_od_t *)mddb_getrecaddr(recid); 1187 1188 /* 1189 * Trans master record always is old format 1190 */ 1191 self_id = &(un32->c.un_self_id); 1192 record_id = &(un32->c.un_record_id); 1193 1194 /* 1195 * Mark the record and update it 1196 */ 1197 *record_id = MAKERECID(setno, DBID(*record_id)); 1198 *self_id = MD_MKMIN(setno, MD_MIN2UNIT(*self_id)); 1199 if (!md_update_minor(setno, mddb_getsidenum 1200 (setno), un32->un_m_key)) 1201 goto out; 1202 mddb_setrecprivate(recid, MD_PRV_GOTIT); 1203 1204 gotsomething = 1; 1205 } 1206 1207 out: 1208 return (gotsomething); 1209 } 1210 1211 static md_named_services_t trans_named_services[] = { 1212 {(intptr_t (*)()) trans_rename_listkids, MDRNM_LIST_URKIDS }, 1213 {(intptr_t (*)()) trans_rename_check, MDRNM_CHECK }, 1214 {(intptr_t (*)()) trans_renexch_update_kids, MDRNM_UPDATE_KIDS }, 1215 {(intptr_t (*)()) trans_rename_update_self, MDRNM_UPDATE_SELF }, 1216 {(intptr_t (*)()) trans_exchange_self_update_from_down, 1217 MDRNM_SELF_UPDATE_FROM_DOWN }, 1218 {(intptr_t (*)()) trans_exchange_parent_update_to, 1219 MDRNM_PARENT_UPDATE_TO }, 1220 {NULL, 0 } 1221 }; 1222 1223 md_ops_t trans_md_ops = { 1224 trans_open, /* open */ 1225 trans_close, /* close */ 1226 md_trans_strategy, /* strategy */ 1227 NULL, /* print */ 1228 NULL, /* dump */ 1229 md_trans_read, /* read */ 1230 md_trans_write, /* write */ 1231 md_trans_ioctl, /* trans ioctl */ 1232 trans_snarf, /* trans_snarf */ 1233 trans_halt, /* halt */ 1234 md_trans_aread, /* aread */ 1235 md_trans_awrite, /* awrite */ 1236 trans_imp_set, /* import set */ 1237 trans_named_services 1238 }; 1239 1240 static void 1241 init_init(void) 1242 { 1243 _init_ldl(); 1244 ASSERT(_init_debug()); 1245 trans_parent_cache = kmem_cache_create("md_trans_parent", 1246 sizeof (md_tps_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 1247 trans_child_cache = kmem_cache_create("md_trans_child", biosize(), 0, 1248 trans_child_constructor, trans_child_destructor, 1249 NULL, NULL, NULL, 0); 1250 } 1251 1252 static void 1253 fini_uninit(void) 1254 { 1255 ASSERT(_fini_debug()); 1256 _fini_ldl(); 1257 kmem_cache_destroy(trans_parent_cache); 1258 kmem_cache_destroy(trans_child_cache); 1259 trans_parent_cache = trans_child_cache = NULL; 1260 } 1261 1262 /* define the module linkage */ 1263 MD_PLUGIN_MISC_MODULE("trans module", init_init(), fini_uninit()) 1264