1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_MD_TRANS_H 28 #define _SYS_MD_TRANS_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/lvm/mdvar.h> 33 #include <sys/buf.h> 34 #include <sys/fs/ufs_trans.h> 35 #include <sys/lvm/md_rename.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #define LDL_META_SBLK (16) 42 43 #define LDL_MINLOGSIZE (1024*1024) 44 #define LDL_MAXLOGSIZE (1024*1024*1024) 45 #define LDL_MINBUFSIZE (32*1024) 46 #define LDL_USABLE_BSIZE (DEV_BSIZE - sizeof (sect_trailer_t)) 47 #define NB_LEFT_IN_SECTOR(off) (LDL_USABLE_BSIZE - ((off) - dbtob(btodb(off)))) 48 49 typedef struct cirbuf32 { 50 caddr32_t xx_cb_bp; /* buf's with space in circular buf */ 51 caddr32_t xx_cb_dirty; /* filling this buffer for log write */ 52 caddr32_t xx_cb_free; /* free bufs list */ 53 caddr32_t xx_cb_va; /* address of circular buffer */ 54 uint_t xx_cb_nb; /* size of circular buffer */ 55 uint_t xx_cb_rwlock[3]; /* r/w lock to protect list mgmt. */ 56 } cirbuf32_t; 57 58 typedef struct cirbuf_ic { 59 buf_t *cb_bp; /* buf's with space in circular buf */ 60 buf_t *cb_dirty; /* filling this buffer for log write */ 61 buf_t *cb_free; /* free bufs list */ 62 caddr_t cb_va; /* address of circular buffer */ 63 size_t cb_nb; /* size of circular buffer */ 64 md_krwlock_t cb_rwlock; /* r/w lock to protect list mgmt. */ 65 } cirbuf_ic_t; 66 67 68 typedef struct ml_unit { 69 uint_t un_revision; /* revision number */ 70 /* 71 * mdd infrastructure stuff 72 */ 73 mddb_recid_t un_recid; /* db record id */ 74 mdkey_t un_key; /* namespace key */ 75 md_dev64_t un_dev; /* device number */ 76 uint_t un_opencnt; /* open count */ 77 78 /* 79 * metatrans infrastructure stuff 80 */ 81 uint_t un_transcnt; /* #open metatrans devices */ 82 83 /* 84 * log specific stuff 85 */ 86 off32_t un_head_lof; /* byte offset of head */ 87 uint_t un_head_ident; /* head sector id # */ 88 off32_t un_tail_lof; /* byte offset of tail */ 89 uint_t un_tail_ident; /* tail sector id # */ 90 off32_t un_bol_lof; /* byte offset of begin of log */ 91 off32_t un_eol_lof; /* byte offset of end of log */ 92 daddr32_t un_nblks; /* total blocks of log space */ 93 daddr32_t un_tblks; /* total blocks in log device */ 94 uint_t un_maxtransfer; /* max transfer in bytes */ 95 uint_t un_status; /* status bits */ 96 uint_t un_maxresv; /* maximum reservable space */ 97 daddr32_t un_pwsblk; /* block number of prewrite area */ 98 ulong_t un_devbsize; /* device bsize */ 99 uint_t un_resv; /* reserved byte count for this trans */ 100 uint_t un_resv_wantin; /* reserved byte count for next trans */ 101 mt_l_error_t un_error; /* error state */ 102 uint_t un_tid; /* used during logscan */ 103 uint_t un_head_tid; /* used for logscan; set at sethead */ 104 struct timeval32 un_timestamp; /* time of last state change */ 105 /* 106 * spares 107 */ 108 uint_t un_spare[16]; 109 /* 110 * following are incore only elements. 111 * Incore elements must always be at the end 112 * of this data struture. 113 */ 114 struct ml_unit *un_next; 115 struct mt_unit *un_utlist; 116 struct mt_map *un_logmap; 117 cirbuf_ic_t un_rdbuf; 118 cirbuf_ic_t un_wrbuf; 119 kmutex_t un_log_mutex; 120 } ml_unit_t; 121 122 123 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 124 #pragma pack(4) 125 #endif 126 typedef struct ml_unit32_od { 127 uint_t un_revision; /* revision number */ 128 /* 129 * mdd infrastructure stuff 130 */ 131 caddr32_t xx_un_next; /* next log unit struct */ 132 mddb_recid_t un_recid; /* db record id */ 133 mdkey_t un_key; /* namespace key */ 134 dev32_t un_dev; /* device number */ 135 uint_t un_opencnt; /* open count */ 136 137 /* 138 * metatrans infrastructure stuff 139 */ 140 uint_t un_transcnt; /* #open metatrans devices */ 141 caddr32_t xx_un_utlist; /* list of metatrans devices */ 142 caddr32_t xx_un_logmap; /* address of logmap */ 143 144 /* 145 * log specific stuff 146 */ 147 off32_t un_head_lof; /* byte offset of head */ 148 uint_t un_head_ident; /* head sector id # */ 149 off32_t un_tail_lof; /* byte offset of tail */ 150 uint_t un_tail_ident; /* tail sector id # */ 151 off32_t un_bol_lof; /* byte offset of begin of log */ 152 off32_t un_eol_lof; /* byte offset of end of log */ 153 daddr32_t un_nblks; /* total blocks of log space */ 154 daddr32_t un_tblks; /* total blocks in log device */ 155 uint_t un_maxtransfer; /* max transfer in bytes */ 156 uint_t un_status; /* status bits */ 157 uint_t un_maxresv; /* maximum reservable space */ 158 daddr32_t un_pwsblk; /* block number of prewrite area */ 159 uint_t un_devbsize; /* device bsize */ 160 uint_t un_resv; /* reserved byte count for this trans */ 161 uint_t un_resv_wantin; /* reserved byte count for next trans */ 162 mt_l_error_t un_error; /* error state */ 163 uint_t un_tid; /* used during logscan */ 164 uint_t un_head_tid; /* used for logscan; set at sethead */ 165 cirbuf32_t xx_un_rdbuf; /* read buffer space */ 166 cirbuf32_t xx_un_wrbuf; /* write buffer space */ 167 int xx_un_log_mutex[2]; /* allows one log write at a time */ 168 struct timeval32 un_timestamp; /* time of last state change */ 169 /* 170 * spares 171 */ 172 uint_t un_spare[16]; 173 } ml_unit32_od_t; 174 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 175 #pragma pack() 176 #endif 177 178 179 180 #define ML_UNIT_ONDSZ ((size_t)((caddr_t)&((ml_unit_t *)0)->un_spare[15] +\ 181 sizeof (uint_t))) 182 183 184 /* 185 * un_status 186 */ 187 #define LDL_BEING_RESET 0x0001 /* delete the log record at snarf */ 188 #define LDL_FIND_TAIL 0x0002 /* find tail of the log */ 189 #define LDL_SCAN_ACTIVE 0x0004 /* log scan in progress */ 190 #define LDL_METADEVICE 0x0008 /* underlying device is metadevice */ 191 #define LDL_PWVALID 0x0010 /* prewrite area is valid */ 192 #define LDL_INFO 0x0020 /* prewrite state is valid */ 193 194 typedef struct sect_trailer { 195 uint_t st_tid; /* transaction id */ 196 uint_t st_ident; /* unique sector id */ 197 } sect_trailer_t; 198 199 200 /* 201 * ioctls 202 */ 203 #define MD_IOCGET_LOG (MDIOC_MISC|0) 204 #define MD_IOC_DEBUG (MDIOC_MISC|4) 205 #define MD_IOCGET_TRANSSTATS (MDIOC_MISC|5) 206 #define MD_IOC_TSD (MDIOC_MISC|6) 207 #define MD_IOC_TRYGETBLK (MDIOC_MISC|7) 208 #define MD_IOC_TRYPAGE (MDIOC_MISC|8) 209 #define MD_IOC_SETSHADOW (MDIOC_MISC|11) 210 #define MD_IOC_INJECTERRORS (MDIOC_MISC|13) 211 #define MD_IOC_STOPERRORS (MDIOC_MISC|14) 212 #define MD_IOC_UFSERROR (MDIOC_MISC|15) 213 #define MD_IOC_ISDEBUG (MDIOC_MISC|17) 214 215 #define MD_IOC_TRANS_DETACH (MDIOC_MISC|32) 216 217 /* 218 * following bits are used in status word in the common section 219 * of unit structure 220 */ 221 #define MD_UN_LOG_DELETED (0x00010000) /* don't need to del @snarf */ 222 223 /* 224 * map block 225 */ 226 #define MAPBLOCKSIZE (8192) 227 #define MAPBLOCKSHIFT (13) 228 #define MAPBLOCKOFF (MAPBLOCKSIZE-1) 229 #define MAPBLOCKMASK (~MAPBLOCKOFF) 230 231 /* 232 * delta header 233 */ 234 struct delta { 235 offset_t d_mof; 236 off32_t d_nb; 237 dev32_t d_dev; 238 delta_t d_typ; 239 }; 240 241 /* 242 * common map entry 243 */ 244 typedef struct mapentry mapentry_t; 245 struct mapentry { 246 /* 247 * doubly linked list of all mapentries in map -- MUST BE FIRST 248 */ 249 mapentry_t *me_next; 250 mapentry_t *me_prev; 251 252 mapentry_t *me_hash; 253 mapentry_t *me_agenext; 254 mapentry_t *me_cancel; 255 int (*me_func)(); 256 uintptr_t me_arg; 257 off_t me_lof; 258 uint_t me_flags; 259 uint_t me_tid; 260 uint_t me_age; 261 struct delta me_delta; 262 }; 263 264 #define me_mof me_delta.d_mof 265 #define me_nb me_delta.d_nb 266 #define me_dt me_delta.d_typ 267 #define me_dev me_delta.d_dev 268 269 /* 270 * me_flags 271 */ 272 #define ME_FREE (0x0001) /* on free list */ 273 #define ME_HASH (0x0002) /* on hash list */ 274 #define ME_CANCEL (0x0004) /* on cancel list */ 275 #define ME_AGE (0x0008) /* on age list */ 276 #define ME_LIST (0x0010) /* on list list */ 277 #define ME_ROLL (0x0020) /* on pseudo-roll list */ 278 279 /* 280 * TRANSACTION OPS STATS 281 * mt_top_size_* should be 64bit but that would 282 * require test recompilations. It does not hurt the kernel 283 * so leave as 32 bit for now. 284 */ 285 struct topstats { 286 uint_t mtm_top_num[TOP_MAX]; 287 uint_t mtm_top_size_etot[TOP_MAX]; 288 uint_t mtm_top_size_rtot[TOP_MAX]; 289 uint_t mtm_top_size_max[TOP_MAX]; 290 uint_t mtm_top_size_min[TOP_MAX]; 291 uint_t mtm_delta_num[DT_MAX]; 292 }; 293 294 /* 295 * MAP STATS (global struct that is not updated if compiled w/o ASSERTs) 296 * some members of transstats need to be 64bit. See the comment above. 297 */ 298 struct transstats { 299 /* trans.c */ 300 uint_t ts_trans_zalloc; 301 uint_t ts_trans_zalloc_nosleep; 302 uint_t ts_trans_alloc; 303 uint_t ts_trans_alloc_nosleep; 304 uint_t ts_trans_free; 305 uint_t ts_trans_alloced; 306 uint_t ts_trans_freed; 307 uint_t ts_trans_write; 308 uint_t ts_trans_write_roll; 309 310 /* trans_delta.c */ 311 uint_t ts_mapentry_alloc; 312 uint_t ts_mapentry_alloc_list; 313 uint_t ts_mapentry_free; 314 315 uint_t ts_delta_add; 316 uint_t ts_delta_add_scan; 317 uint_t ts_delta_add_hit; 318 319 uint_t ts_delta_remove; 320 uint_t ts_delta_remove_scan; 321 uint_t ts_delta_remove_hit; 322 323 uint_t ts_delta_del; 324 uint_t ts_delta_del_scan; 325 326 uint_t ts_delta_push; 327 328 uint_t ts_overlap; 329 uint_t ts_overlap_scan; 330 uint_t ts_overlap_hit; 331 332 uint_t ts_remove_roll; 333 uint_t ts_remove_roll_scan; 334 uint_t ts_remove_roll_hit; 335 uint_t ts_remove_roll_dolock; 336 uint_t ts_remove_roll_sud; 337 338 uint_t ts_next_roll; 339 uint_t ts_next_roll_scan; 340 uint_t ts_next_roll_hit; 341 342 uint_t ts_list_age; 343 uint_t ts_list_age_scan; 344 345 uint_t ts_list_get; 346 uint_t ts_list_get_scan; 347 uint_t ts_list_get_hit; 348 uint_t ts_list_get_again; 349 350 uint_t ts_list_put; 351 uint_t ts_list_put_scan; 352 353 uint_t ts_read_mstr; 354 355 uint_t ts_logmap_secmap_roll; 356 357 uint_t ts_read_log; 358 359 uint_t ts_logmap_abort; 360 uint_t ts_logmap_abort_hit; 361 362 uint_t ts_list_add; 363 uint_t ts_list_add_scan; 364 uint_t ts_list_add_cancel; 365 uint_t ts_list_add_unhash; 366 367 uint_t ts_free_cancel; 368 uint_t ts_free_cancel_again; 369 uint_t ts_free_cancel_scan; 370 uint_t ts_free_cancel_hit; 371 372 uint_t ts_commit; 373 uint_t ts_commit_hit; 374 375 uint_t ts_logmap_roll_dev; 376 uint_t ts_logmap_roll_dev_scan; 377 uint_t ts_logmap_roll_dev_hit; 378 379 uint_t ts_logmap_roll_sud; 380 uint_t ts_logmap_roll_sud_hit; 381 382 uint_t ts_logmap_ud_done; 383 uint_t ts_logmap_ud_done_scan; 384 385 uint_t ts_logmap_ud_wait; 386 uint_t ts_logmap_ud_wait_hit; 387 388 uint_t ts_logmap_ud_commit; 389 uint_t ts_logmap_ud_commit_scan; 390 391 uint_t ts_logmap_cancel; 392 uint_t ts_logmap_cancel_scan; 393 uint_t ts_logmap_cancel_hit; 394 395 uint_t ts_logmap_iscancel; 396 uint_t ts_logmap_iscancel_scan; 397 uint_t ts_logmap_iscancel_hit; 398 399 uint_t ts_logscan; 400 uint_t ts_logscan_ud; 401 uint_t ts_logscan_delta; 402 uint_t ts_logscan_cancel; 403 uint_t ts_logscan_commit; 404 405 /* trans_thread.c */ 406 uint_t ts_prewrite; 407 uint_t ts_prewrite_read; 408 uint_t ts_prewrite_write; 409 uint_t ts_trans_roll; 410 uint_t ts_trans_roll_wait; 411 uint_t ts_trans_roll_wait_nada; 412 uint_t ts_trans_roll_wait_slow; 413 uint_t ts_trans_roll_force; 414 uint_t ts_trans_roll_nsud; 415 uint_t ts_trans_roll_ref; 416 uint_t ts_trans_roll_full; 417 uint_t ts_trans_roll_logmap; 418 uint_t ts_trans_roll_read; 419 uint_t ts_trans_roll_reread; 420 uint_t ts_trans_roll_wait_inuse; 421 uint_t ts_trans_roll_prewrite; 422 uint_t ts_trans_roll_write; 423 424 /* trans_top.c */ 425 uint_t ts_delta; 426 uint_t ts_ud_delta; 427 uint_t ts_ud_delta_log; 428 uint_t ts_cancel; 429 uint_t ts_iscancel; 430 uint_t ts_error; 431 uint_t ts_iserror; 432 uint_t ts_beginsync; 433 uint_t ts_active; 434 uint_t ts_activesync; 435 uint_t ts_beginasync; 436 uint_t ts_endsync; 437 uint_t ts_wantin; 438 uint_t ts_endasync; 439 uint_t ts_read; 440 uint_t ts_read_roll; 441 uint_t ts_readmt; 442 uint_t ts_write; 443 uint_t ts_writemt; 444 uint_t ts_writemt_done; 445 uint_t ts_log; 446 447 /* trans_log.c */ 448 uint_t ts_logcommitdb; 449 450 uint_t ts_push_dirty_bp; 451 uint_t ts_push_dirty_bp_extra; 452 uint_t ts_push_dirty_bp_fail; 453 454 uint_t ts_alloc_bp; 455 uint_t ts_alloc_bp_free; 456 457 uint_t ts_find_bp; 458 uint_t ts_find_bp_scan; 459 uint_t ts_find_bp_hit; 460 461 uint_t ts_find_read_lof; 462 uint_t ts_find_read_lof_scan; 463 uint_t ts_find_read_lof_hit; 464 465 uint_t ts_get_read_bp; 466 uint_t ts_get_read_bp_wr; 467 uint_t ts_get_read_bp_rd; 468 469 uint_t ts_extend_write_bp; 470 uint_t ts_extend_write_bp_hit; 471 472 uint_t ts_storebuf; 473 uint_t ts_fetchbuf; 474 uint_t ts_round_commit; 475 uint_t ts_push_commit; 476 477 uint_t ts_inval_range; 478 uint_t ts_inval_range_scan; 479 uint_t ts_inval_range_hit; 480 481 uint_t ts_writelog; 482 uint_t ts_writelog_max; 483 484 uint_t ts_readlog; 485 uint_t ts_readlog_max; 486 487 uint_t ts_get_write_bp; 488 uint_t ts_get_write_bp_steal; 489 490 uint_t ts_writesync; 491 uint_t ts_writesync_log; 492 uint_t ts_writesync_nolog; 493 494 uint_t ts_longmof_cnt; 495 496 } transstats; 497 498 #ifdef DEBUG 499 #define TRANSSTATS(f) (transstats.f++) 500 #define TRANSSTATSADD(f, n) (transstats.f += (n)) 501 #define TRANSSTATSMAX(m, v) \ 502 if ((v) > transstats.m)\ 503 transstats.m = (v); 504 #else 505 #define TRANSSTATS(f) 506 #define TRANSSTATSADD(f, n) 507 #define TRANSSTATSMAX(m, v) 508 #endif /* DEBUG */ 509 510 /* 511 * MAP TYPES 512 */ 513 enum maptypes { 514 deltamaptype, udmaptype, logmaptype, matamaptype, shadowmaptype 515 }; 516 517 /* 518 * MAP 519 */ 520 #define DELTAMAP_NHASH (512) 521 #define LOGMAP_NHASH (2048) 522 #define MAP_INDEX(dev, mof, mtm) \ 523 ((((mof) >> MAPBLOCKSHIFT) + (dev)) & ((mtm)->mtm_nhash-1)) 524 #define MAP_HASH(dev, mof, mtm) \ 525 (mtm->mtm_hash + MAP_INDEX(dev, mof, mtm)) 526 527 typedef struct mt_map { 528 /* 529 * anchor doubly linked list this map's entries -- MUST BE FIRST 530 */ 531 mapentry_t *mtm_next; 532 mapentry_t *mtm_prev; 533 534 int mtm_flags; /* generic flags */ 535 int mtm_ref; /* PTE like ref bit */ 536 uint_t mtm_debug; /* set at create time */ 537 uint_t mtm_age; /* mono-inc; tags mapentries */ 538 mapentry_t *mtm_cancel; /* to be canceled at commit */ 539 uint_t mtm_nhash; /* # of hash anchors */ 540 mapentry_t **mtm_hash; /* array of singly linked lists */ 541 struct topstats *mtm_tops; /* trans ops - enabled by an ioctl */ 542 int mtm_nme; /* # of mapentries */ 543 int mtm_nmet; /* # of mapentries this transaction */ 544 int mtm_nud; /* # of active userdata writes */ 545 int mtm_nsud; /* # of userdata scanned deltas */ 546 md_dev64_t mtm_dev; /* device identifying map */ 547 548 /* 549 * the following are protected by the global map_mutex 550 */ 551 struct mt_map *mtm_mapnext; /* singly linked list of all maps */ 552 uint_t mtm_refcnt; /* reference count to this map */ 553 enum maptypes mtm_type; /* type of map */ 554 555 /* 556 * used after logscan to set the log's tail 557 */ 558 off_t mtm_tail_lof; 559 size_t mtm_tail_nb; 560 561 /* 562 * debug field for Scan test 563 */ 564 off_t mtm_trimlof; /* log was trimmed to this lof */ 565 off_t mtm_trimtail; /* tail lof before trimming */ 566 off_t mtm_trimalof; /* lof of last allocation delta */ 567 off_t mtm_trimclof; /* lof of last commit delta */ 568 off_t mtm_trimrlof; /* lof of last rolled delta */ 569 struct ml_unit *mtm_ul; /* log unit for this map */ 570 571 /* 572 * moby trans stuff 573 */ 574 uint_t mtm_tid; 575 uint_t mtm_committid; 576 ushort_t mtm_closed; 577 ushort_t mtm_seq; 578 int mtm_wantin; 579 int mtm_active; 580 int mtm_activesync; 581 uint_t mtm_dirty; 582 kmutex_t mtm_lock; 583 kcondvar_t mtm_cv_commit; 584 kcondvar_t mtm_cv_next; 585 kcondvar_t mtm_cv_eot; 586 587 /* 588 * mutex that protects all the fields in mt_map except 589 * mtm_mapnext and mtm_refcnt 590 */ 591 kmutex_t mtm_mutex; 592 kcondvar_t mtm_cv; /* generic conditional */ 593 594 /* 595 * rw lock for the mapentry fields agenext and locnext 596 */ 597 md_krwlock_t mtm_rwlock; 598 /* 599 * DEBUG: runtestscan 600 */ 601 kmutex_t mtm_scan_mutex; 602 } mt_map_t; 603 604 /* 605 * mtm_flags 606 */ 607 #define MTM_ROLL_EXIT (0x00000001) 608 #define MTM_ROLL_RUNNING (0x00000002) 609 #define MTM_FORCE_ROLL (0x00000004) 610 611 /* 612 * Generic range checking macros 613 */ 614 #define OVERLAP(sof, snb, dof, dnb) \ 615 ((sof >= dof && sof < (dof + dnb)) || \ 616 (dof >= sof && dof < (sof + snb))) 617 618 #define WITHIN(sof, snb, dof, dnb) ((sof >= dof) && ((sof+snb) <= (dof+dnb))) 619 620 #define DATAoverlapME(mof, hnb, me) (OVERLAP(mof, hnb, me->me_mof, me->me_nb)) 621 #define MEwithinDATA(me, mof, hnb) (WITHIN(me->me_mof, me->me_nb, mof, hnb)) 622 #define DATAwithinME(mof, hnb, me) (WITHIN(mof, hnb, me->me_mof, me->me_nb)) 623 624 625 typedef struct mt_unit { 626 struct mdc_unit c; /* common stuff */ 627 /* 628 * infrastructure 629 */ 630 mt_flags_t un_flags; 631 /* 632 * log and master device 633 */ 634 mdkey_t un_m_key; 635 md_dev64_t un_m_dev; 636 mdkey_t un_l_key; 637 md_dev64_t un_l_dev; 638 daddr32_t un_l_sblk; /* start block */ 639 daddr32_t un_l_pwsblk; /* prewrite start block */ 640 daddr32_t un_l_nblks; /* # of usable log blocks */ 641 daddr32_t un_l_tblks; /* total log blocks */ 642 daddr32_t un_l_head; /* sector offset of log head */ 643 daddr32_t un_l_tail; /* sector offset of log tail */ 644 uint_t un_l_resv; /* current log reservations */ 645 uint_t un_l_maxresv; /* max log reservations */ 646 uint_t un_l_maxtransfer; /* maximum transfer at init */ 647 mddb_recid_t un_l_recid; /* database id */ 648 mt_l_error_t un_l_error; /* error state */ 649 struct timeval32 un_l_timestamp; /* time of last log state chg */ 650 md_dev64_t un_s_dev; /* shadow device for testing only */ 651 mt_debug_t un_debug; /* debug flags; set at create */ 652 md_dev64_t un_dev; /* this metatrans device */ 653 int un_logreset; /* part of _FIOLOGRESET ioctl stuff */ 654 struct timeval32 un_timestamp; /* time of last trans state change */ 655 /* 656 * spares 657 */ 658 ulong_t un_spare[16]; 659 /* 660 * following are incore only elements. 661 * Incore elements must always be at the end 662 * of this data struture. 663 */ 664 struct mt_unit *un_next; 665 struct ml_unit *un_l_unit; 666 struct ufstrans *un_ut; 667 mt_map_t *un_deltamap; 668 mt_map_t *un_udmap; 669 mt_map_t *un_logmap; 670 mt_map_t *un_matamap; 671 mt_map_t *un_shadowmap; 672 } mt_unit_t; 673 674 675 typedef struct mt_unit32_od { 676 mdc_unit32_od_t c; /* common stuff */ 677 /* 678 * infrastructure 679 */ 680 mt_flags_t un_flags; 681 caddr32_t xx_un_next; /* anchored in log unit */ 682 /* 683 * log and master device 684 */ 685 mdkey_t un_m_key; 686 dev32_t un_m_dev; 687 mdkey_t un_l_key; 688 dev32_t un_l_dev; 689 daddr32_t un_l_sblk; /* start block */ 690 daddr32_t un_l_pwsblk; /* prewrite start block */ 691 daddr32_t un_l_nblks; /* # of usable log blocks */ 692 daddr32_t un_l_tblks; /* total log blocks */ 693 daddr32_t un_l_head; /* sector offset of log head */ 694 daddr32_t un_l_tail; /* sector offset of log tail */ 695 uint_t un_l_resv; /* current log reservations */ 696 uint_t un_l_maxresv; /* max log reservations */ 697 uint_t un_l_maxtransfer; /* maximum transfer at init */ 698 mddb_recid_t un_l_recid; /* database id */ 699 caddr32_t xx_un_l_unit; /* log device unit struct */ 700 mt_l_error_t un_l_error; /* error state */ 701 struct timeval32 un_l_timestamp; /* time of last log state chg */ 702 dev32_t un_s_dev; /* shadow device for testing only */ 703 704 mt_debug_t un_debug; /* debug flags; set at create */ 705 caddr32_t xx_un_ut; /* ufstrans struct */ 706 dev32_t un_dev; /* this metatrans device */ 707 caddr32_t xx_un_deltamap; /* deltamap */ 708 caddr32_t xx_un_udmap; /* userdata map */ 709 caddr32_t xx_un_logmap; /* logmap includes moby trans stuff */ 710 caddr32_t xx_un_matamap; /* optional - matamap */ 711 caddr32_t xx_un_shadowmap; /* optional - shadowmap */ 712 int un_logreset; /* part of _FIOLOGRESET ioctl stuff */ 713 struct timeval32 un_timestamp; /* time of last trans state change */ 714 /* 715 * spares 716 */ 717 uint_t un_spare[16]; 718 } mt_unit32_od_t; 719 720 /* 721 * prewrite info (per buf); stored as array at beginning of prewrite area 722 */ 723 struct prewrite { 724 int pw_bufsize; /* every buffer is this size */ 725 daddr32_t pw_blkno; /* block number */ 726 dev32_t pw_dev; /* device to write to */ 727 ushort_t pw_secmap; /* bitmap */ 728 /* 1's write this sector in the buf */ 729 ushort_t pw_flags; 730 }; 731 /* 732 * pw_flags 733 */ 734 #define PW_INUSE 0x0001 /* this prewrite buf is in use */ 735 #define PW_WAIT 0x0002 /* write in progress; wait for completion */ 736 #define PW_REM 0x0004 /* remove deltas */ 737 738 /* 739 * log state 740 */ 741 struct logstate { 742 off32_t ls_head_lof; /* log head */ 743 uint_t ls_head_ident; /* log head ident */ 744 uint_t ls_head_tid; /* log head tid */ 745 uint_t ls_chksum; /* checksum of structure */ 746 off32_t ls_bol_lof; /* needed for TS_Tools/dumplog.c */ 747 off32_t ls_eol_lof; /* needed for TS_Tools/dumplog.c */ 748 uint_t ls_maxtransfer; /* needed for TS_Tools/dumplog.c */ 749 daddr32_t ls_pwsblk; /* needed for TS_Tools/dumplog.c */ 750 }; 751 752 /* 753 * log state defines 754 */ 755 #define LS_SECTORS (2) /* number of sectors used by state area */ 756 757 /* 758 * un_debug 759 * MT_TRANSACT - keep per thread accounting of tranactions 760 * MT_MATAMAP - double check deltas and ops against matamap 761 * MT_WRITE_CHECK - check master+deltas against metadata write 762 * MT_LOG_WRITE_CHECK - read after write for log writes 763 * MT_CHECK_MAP - check map after every insert/delete 764 * MT_TRACE - trace transactions (used with MT_TRANSACT) 765 * MT_SIZE - fail on size errors (used with MT_TRANSACT) 766 * MT_NOASYNC - force every op to be sync 767 * MT_FORCEROLL - forcibly roll the log after every commit 768 * MT_SCAN - running runtestscan; special case as needed 769 * MT_SHADOW - copy metatrans device writes to shadow dev. 770 * MT_PREWRITE - process prewrite area every roll 771 */ 772 #define MT_TRANSACT (0x00000001) 773 #define MT_MATAMAP (0x00000002) 774 #define MT_WRITE_CHECK (0x00000004) 775 #define MT_LOG_WRITE_CHECK (0x00000008) 776 #define MT_CHECK_MAP (0x00000010) 777 #define MT_TRACE (0x00000020) 778 #define MT_SIZE (0x00000040) 779 #define MT_NOASYNC (0x00000080) 780 #define MT_FORCEROLL (0x00000100) 781 #define MT_SCAN (0x00000200) 782 #define MT_SHADOW (0x00000400) 783 #define MT_PREWRITE (0x00000800) 784 785 /* Type 2 trans records */ 786 #define TRANS_REC 1 787 #define LOG_REC 2 788 789 #ifdef _KERNEL 790 791 typedef struct md_tps { /* trans parent save */ 792 DAEMON_QUEUE 793 struct mt_unit *ps_un; 794 mdi_unit_t *ps_ui; 795 buf_t *ps_bp; 796 size_t ps_count; /* Used for testing only. */ 797 kmutex_t ps_mx; /* protects ps_count. */ 798 } md_tps_t; 799 800 /* 801 * Log layer protos -- trans_log.c 802 */ 803 extern void _init_ldl(void); 804 extern void _fini_ldl(void); 805 extern void md_ldl_round_commit(mt_unit_t *); 806 extern void md_ldl_push_commit(mt_unit_t *); 807 extern int md_ldl_need_commit(ml_unit_t *); 808 extern int md_ldl_has_space(ml_unit_t *, mapentry_t *); 809 extern void md_ldl_write(mt_unit_t *, caddr_t, offset_t, 810 mapentry_t *); 811 extern void md_ldl_waito(ml_unit_t *); 812 extern int md_ldl_read(ml_unit_t *, caddr_t, offset_t, off_t, 813 mapentry_t *); 814 extern void md_ldl_sethead(ml_unit_t *, off_t, uint_t, 815 struct buf *); 816 extern void md_ldl_settail(ml_unit_t *, off_t, off_t, 817 struct buf *); 818 extern void ldl_setpwvalid(ml_unit_t *); 819 extern int ldl_build_incore(ml_unit_t *, int); 820 extern ml_unit_t *ldl_findlog(mddb_recid_t); 821 extern mddb_recid_t ldl_create(mdkey_t, mt_unit_t *); 822 extern void ldl_utadd(mt_unit_t *); 823 extern int ldl_open_dev(mt_unit_t *, ml_unit_t *); 824 extern void ldl_close_dev(ml_unit_t *); 825 extern int ldl_snarf(void); 826 extern void ldl_logscan_seterror(ml_unit_t *); 827 extern void ldl_logscan_saverror(ml_unit_t *); 828 extern size_t md_ldl_logscan_nbcommit(off_t); 829 extern int md_ldl_logscan_read(ml_unit_t *, off_t *, size_t, 830 caddr_t); 831 extern void md_ldl_logscan_begin(ml_unit_t *, daddr_t); 832 extern void md_ldl_logscan_end(ml_unit_t *); 833 extern int md_ldl_need_roll(ml_unit_t *); 834 extern int md_ldl_empty(ml_unit_t *); 835 extern int ldl_pwvalid(ml_unit_t *); 836 extern void ldl_waitscan(ml_unit_t *); 837 extern void ldl_errorbp(set_t, buf_t *, char *); 838 extern void md_ldl_seterror(ml_unit_t *); 839 extern int ldl_isherror(ml_unit_t *); 840 extern int ldl_iserror(ml_unit_t *); 841 extern int ldl_isanyerror(ml_unit_t *); 842 extern void ldl_start_scan(mt_unit_t *); 843 extern void ldl_opened_trans(mt_unit_t *, int); 844 extern void ldl_open_trans(mt_unit_t *, int); 845 extern int ldl_logreset(mt_unit_t *, buf_t *); 846 extern void ldl_close_trans(mt_unit_t *); 847 extern size_t md_ldl_bufsize(ml_unit_t *); 848 extern void ldl_open_underlying(mt_unit_t *); 849 extern void ldl_snarf_done(); 850 extern int ldl_reset(mt_unit_t *, int, int); 851 extern void ldl_cleanup(ml_unit_t *); 852 853 /* 854 * trans driver layer -- mdtrans.c 855 */ 856 extern kmem_cache_t *trans_child_cache; 857 extern void *md_trans_zalloc(size_t); 858 extern void *md_trans_zalloc_nosleep(size_t); 859 extern void *md_trans_alloc(size_t); 860 extern void *md_trans_alloc_nosleep(size_t); 861 extern void md_trans_free(void *, size_t); 862 extern int md_trans_not_wait(struct buf *cb); 863 extern int md_trans_not_done(struct buf *cb); 864 extern int md_trans_wait(struct buf *cb); 865 extern int trans_done(struct buf *cb); 866 extern int trans_done_shadow(struct buf *cb); 867 extern void trans_child_init(struct buf *bp); 868 extern void trans_close_all_devs(mt_unit_t *); 869 extern int trans_open_all_devs(mt_unit_t *); 870 extern int trans_build_incore(void *, int); 871 extern void trans_commit(mt_unit_t *, int); 872 extern int trans_detach(mt_unit_t *, int); 873 extern void trans_attach(mt_unit_t *, int); 874 extern int trans_reset(mt_unit_t *, minor_t, int, int); 875 876 /* 877 * transaction ioctl -- trans_ioctl.c 878 */ 879 880 /* rename named service functions */ 881 md_ren_list_svc_t trans_rename_listkids; 882 md_ren_svc_t trans_rename_check; 883 md_ren_roleswap_svc_t trans_renexch_update_kids; 884 md_ren_roleswap_svc_t trans_rename_update_self; 885 md_ren_roleswap_svc_t trans_exchange_parent_update_to; 886 md_ren_roleswap_svc_t trans_exchange_self_update_from_down; 887 888 /* 889 * transaction op layer -- trans_top.c 890 */ 891 extern void _init_md_top(void); 892 extern void _fini_top(void); 893 extern void top_read(struct buf *, char *, mt_unit_t *, int, void *); 894 extern void md_top_read_roll(struct buf *, mt_unit_t *, ushort_t *); 895 extern void top_build_incore(mt_unit_t *); 896 extern void top_reset(mt_unit_t *, int, int); 897 extern void top_write(struct buf *, char *, mt_unit_t *, int, void *); 898 899 /* 900 * map layer -- trans_delta.c 901 */ 902 extern void md_map_free_entries(mt_map_t *); 903 extern int md_matamap_overlap(mt_map_t *, offset_t, off_t); 904 extern int md_matamap_within(mt_map_t *, offset_t, off_t); 905 extern int md_deltamap_need_commit(mt_map_t *); 906 extern void md_deltamap_add(mt_map_t *, offset_t, off_t, delta_t, 907 int (*)(), uintptr_t); 908 extern mapentry_t *md_deltamap_remove(mt_map_t *, offset_t, off_t); 909 extern void md_deltamap_del(mt_map_t *, offset_t, off_t); 910 extern void md_deltamap_push(mt_unit_t *); 911 extern int md_logmap_need_commit(mt_map_t *); 912 extern int md_logmap_need_roll_async(mt_map_t *); 913 extern int md_logmap_need_roll_sync(mt_map_t *); 914 extern int md_logmap_need_roll(mt_map_t *); 915 extern void md_logmap_start_roll(mt_unit_t *); 916 extern void md_logmap_kill_roll(mt_map_t *); 917 extern void md_logmap_forceroll(mt_map_t *); 918 extern int md_logmap_overlap(mt_map_t *, md_dev64_t, offset_t, 919 off_t); 920 extern void md_logmap_remove_roll(mt_map_t *, md_dev64_t, offset_t, 921 off_t); 922 extern int md_logmap_next_roll(mt_map_t *, offset_t *, 923 md_dev64_t *); 924 extern void md_logmap_list_get(mt_map_t *, md_dev64_t, offset_t, 925 off_t, mapentry_t **); 926 extern void md_logmap_list_get_roll(mt_map_t *, md_dev64_t, 927 offset_t, off_t, mapentry_t **); 928 extern void md_logmap_list_put(mt_map_t *, mapentry_t *); 929 extern void md_logmap_read_mstr(ml_unit_t *, struct buf *, int, 930 void *); 931 extern void md_logmap_secmap_roll(mapentry_t *, offset_t, 932 ushort_t *); 933 extern int logmap_read_log(ml_unit_t *, char *, offset_t, off_t, 934 mapentry_t *); 935 extern void md_logmap_make_space(mt_map_t *, ml_unit_t *, 936 mapentry_t *); 937 extern void md_logmap_add(mt_unit_t *, md_dev64_t, char *, offset_t, 938 mapentry_t *); 939 extern void md_logmap_add_ud(mt_unit_t *, md_dev64_t, char *, 940 offset_t, mapentry_t *); 941 extern void md_logmap_commit(mt_unit_t *); 942 extern void md_logmap_sethead(mt_map_t *, ml_unit_t *, 943 struct buf *); 944 extern void md_logmap_roll_dev(mt_map_t *, ml_unit_t *ul, 945 md_dev64_t); 946 extern void md_logmap_roll_sud(mt_map_t *, ml_unit_t *ul, 947 md_dev64_t, offset_t, off_t); 948 extern int md_logmap_ud_done(struct buf *); 949 extern void md_logmap_ud_wait(); 950 extern void md_logmap_cancel(mt_unit_t *, md_dev64_t, offset_t, 951 off_t); 952 extern int md_logmap_iscancel(mt_map_t *, md_dev64_t, offset_t, 953 off_t); 954 extern void md_logmap_logscan(mt_unit_t *, daddr_t); 955 extern void map_build_incore(mt_unit_t *); 956 extern void map_reset(mt_unit_t *, int, int); 957 extern void _init_md_map(void); 958 extern void _fini_map(void); 959 960 /* 961 * scan and roll threads -- trans_thread.c 962 */ 963 extern void md_trans_roll(ml_unit_t *); 964 extern void trans_scan(mt_unit_t *); 965 extern void trans_roll_prewrite(ml_unit_t *); 966 967 #endif /* _KERNEL */ 968 969 #ifdef __cplusplus 970 } 971 #endif 972 973 #endif /* _SYS_MD_TRANS_H */ 974