1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_MD_MDDB_H 28 #define _SYS_MD_MDDB_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/types.h> 33 #include <sys/buf.h> 34 35 #ifdef __cplusplus 36 extern "C" { 37 #endif 38 39 #if 0 /* DRP FOR DEBUGGING */ 40 #define MDDB_FAKE 41 #endif 42 43 /* Private flags */ 44 #define MD_PRV_GOTIT 0x0001 /* Been snarfed */ 45 #define MD_PRV_DELETE 0x0002 /* Record pending to be deleted */ 46 #define MD_PRV_COMMIT 0x0004 /* Record pending to be commited */ 47 #define MD_PRV_CLEANUP 0x0008 /* Record pending to be cleaned up */ 48 #define MD_PRV_CONVD 0x0010 /* Record has been converted (32->64) */ 49 #define MD_PRV_PENDDEL (MD_PRV_GOTIT | MD_PRV_DELETE) 50 #define MD_PRV_PENDCOM (MD_PRV_GOTIT | MD_PRV_COMMIT) 51 #define MD_PRV_PENDCLEAN (MD_PRV_GOTIT | MD_PRV_CLEANUP) 52 53 54 #define MDDB_E_INVALID (-1) /* an invalid argument was passed */ 55 #define MDDB_E_EXISTS (-2) /* doing an operation a 2nd time which can */ 56 /* only be done once */ 57 #define MDDB_E_MASTER (-3) /* problem occurred accessing mastor block */ 58 /* returned from NEW_DEV */ 59 #define MDDB_E_TOOSMALL (-4) /* device is not large enough */ 60 #define MDDB_E_NORECORD (-5) /* record does not exits */ 61 /* 62 * returned from: mddb_getnextrec 63 * mddb_getrecsize 64 * mddb_commitrec 65 * mddb_commitrecs 66 * mddb_deleterec 67 */ 68 #define MDDB_E_NOSPACE (-6) /* no space to create record */ 69 #define MDDB_E_NOTNOW (-7) /* do not presently have enough resources */ 70 /* to perform requested operation */ 71 #define MDDB_E_NODB (-8) /* no database exist */ 72 #define MDDB_E_NOTOWNER (-9) /* have not been told to grab this set */ 73 #define MDDB_E_STALE (-10) /* database is stale */ 74 #define MDDB_E_TOOFEW (-11) /* not enough replicas available */ 75 #define MDDB_E_TAGDATA (-12) /* tagged data detected */ 76 #define MDDB_E_ACCOK (-13) /* 50/50 mode */ 77 #define MDDB_E_NTAGDATA (-14) /* tagop try, no tag data */ 78 #define MDDB_E_ACCNOTOK (-15) /* accop try, no accept possible */ 79 #define MDDB_E_NOLOCBLK (-16) /* No valid locators found */ 80 #define MDDB_E_NOLOCNMS (-17) /* No valid locator name information */ 81 #define MDDB_E_NODIRBLK (-18) /* No directory blocks found */ 82 #define MDDB_E_NOTAGREC (-19) /* No tag record blocks found */ 83 #define MDDB_E_NOTAG (-20) /* No matching tag record found */ 84 #define MDDB_E_NODEVID (-21) /* No device id found */ 85 86 #define MDDB_MINBLKS 16 /* enough for a few metadevices */ 87 #define MDDB_MAXBLKS 8192 /* size of free bit map (must be / 8) */ 88 #define MDDB_MN_MINBLKS 32768 /* Multinode metadb minimum size */ 89 /* 16MB */ 90 #define MDDB_MN_MAXBLKS 524288 /* size of free bit map (must be / 8) */ 91 /* 256MB */ 92 93 #define MDDB_C_STALE 0x0001 94 #define MDDB_C_TOOFEW 0x0002 95 #define MDDB_C_NOTOWNER 0x0004 96 #define MDDB_C_SET_MN_STALE 0x0008 /* Set MN set to stale */ 97 #define MDDB_C_IMPORT 0x0010 98 99 /* 100 * Defines used to set/reset new master flag in set structure. 101 * Used during reconfig cycle to determine quickly if there is 102 * new master for the set. 103 */ 104 #define MDDB_NM_SET 0x0001 105 #define MDDB_NM_RESET 0x0002 106 #define MDDB_NM_GET 0x0004 107 108 /* Definitions of flag in Locator Block Device ID data area - mddb_did_info */ 109 #define MDDB_DID_EXISTS 0x0001 /* Device ID exists */ 110 #define MDDB_DID_VALID 0x0002 /* Device ID valid on current system */ 111 #define MDDB_DID_UPDATED 0x0004 /* locator/sidelocator info updated */ 112 113 /* Definitions of flag in Locator Block - mddb_lb */ 114 #define MDDB_DEVID_STYLE 0x0001 /* Locator Block in Device ID format */ 115 #define MDDB_MNSET 0x0002 /* MDDB is for a multi-node set */ 116 117 118 #define MDDB_MAX_PATCH 25 /* number of locations that */ 119 /* can be patched in etc/system */ 120 121 /* 122 * Set struct used by all parts of the driver, to store anchor pointers. 123 */ 124 typedef struct md_set { 125 uint_t s_status; /* set status */ 126 void **s_ui; /* set unit incore anchor */ 127 void **s_un; /* set unit anchor */ 128 void *s_hsp; /* set Hot Spare Pool anchor */ 129 void *s_hs; /* set Hot Spare anchor */ 130 void *s_db; /* set MDDB anchor */ 131 kmutex_t s_dbmx; /* set MDDB mutex */ 132 void *s_nm; /* set namespace anchor */ 133 mddb_recid_t s_nmid; /* set namespace anchor record */ 134 void *s_did_nm; /* set device id namespace anchor */ 135 mddb_recid_t s_did_nmid; /* set device id namespace anchor rec */ 136 void *s_dtp; /* set data tag rec */ 137 int s_am_i_master; /* incore master flag for this node */ 138 md_mn_nodeid_t s_nodeid; /* nodeid of this node - for MN sets */ 139 uint_t s_rcnt; /* incore resync count for set */ 140 } md_set_t; 141 142 143 #define MDDB_MAGIC_MB 0x6d646d62 /* magic number for master blocks */ 144 #define MDDB_MAGIC_DB 0x6d646462 /* magic number for directory blocks */ 145 #define MDDB_MAGIC_RB 0x6d647262 /* magic number for record blocks */ 146 #define MDDB_MAGIC_LB 0x6d646c62 /* magic number for locator blocks */ 147 #define MDDB_MAGIC_LN 0x6d646c6e /* magic number for locator names */ 148 #define MDDB_MAGIC_DT 0x6d646474 /* magic number for data tag */ 149 #define MDDB_MAGIC_DI 0x6d646469 /* magic number for device ID block */ 150 #define MDDB_MAGIC_DU 0x6d646475 /* magic num for dummy mb */ 151 #define MDDB_MAGIC_DE 0x6d646465 /* magic num for mb devid */ 152 153 #define MDDB_GLOBAL_XOR 1234567890 154 155 #define MDDB_REV_MAJOR (uint_t)0xff00 156 #define MDDB_REV_MINOR (uint_t)0x00ff 157 158 /* 159 * MDDB_REV_MNMB: 160 * If a MN diskset, master block revision is set to MDDB_REV_MNMB. 161 * Even though the master block structure is no different 162 * for a MN set, setting the revision field to a different 163 * number keeps any pre-MN_diskset code from accessing 164 * this diskset. It also allows for an early determination 165 * of a MN diskset when reading in from disk so that the 166 * proper size locator block and locator names structure 167 * can be read in thus saving time on diskset startup. 168 * Since no change in master block structure, the MDDB_REV_MINOR 169 * portion of the revision was incremented. 170 * 171 * MDDB_REV_MNLB: 172 * If a MN diskset, the locator block structure is a different size in 173 * order to accomodate up to MD_MNMAXSIDES nodes in a diskset 174 * with any nodeid (sideno) allowed. 175 * The revision is set to MDDB_REV_MNLB which is a change of the 176 * MDDB_REV_MAJOR portion of the revision. 177 * 178 * MDDB_REV_MNLN: 179 * If a MN diskset, the locator names is a different size in 180 * order to accomodate up to MD_MNMAXSIDES nodes in a diskset 181 * with any nodeid (sideno) allowed. 182 * The revision is set to MDDB_REV_MNLN which is a change of the 183 * MDDB_REV_MAJOR portion of the revision. 184 */ 185 186 #define MDDB_REV_MB (uint_t)0x0201 187 #define MDDB_REV_MNMB (uint_t)0x0202 188 #define MDDB_REV_DB (uint_t)0x0201 189 #define MDDB_REV_LB (uint_t)0x0500 190 #define MDDB_REV_MNLB (uint_t)0x0600 191 #define MDDB_REV_LN (uint_t)0x0100 192 #define MDDB_REV_MNLN (uint_t)0x0300 193 #define MDDB_REV_RB (uint_t)0x0200 194 #define MDDB_REV_RB64 (uint_t)0x0201 195 #define MDDB_REV_DT (uint_t)0x0100 196 #define MDDB_REV_DI (uint_t)0x0100 197 198 #define MDDB_BSIZE (uint_t)DEV_BSIZE 199 #define MDDB_PREFIXCNT 10 200 #define MDDB_DRVNMCNT 10 201 202 typedef int mddb_block_t; 203 204 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 205 #pragma pack(4) 206 #endif 207 typedef struct md_mnname_suffix { 208 md_name_suffix mn_ln_suffix; 209 uint_t mn_ln_sideno; 210 } md_mnname_suffix_t; 211 212 typedef struct mddb_ln { 213 int ln_magic; 214 uint_t ln_revision; 215 uint_t ln_checksum; 216 struct timeval32 ln_timestamp; 217 md_name_prefix ln_prefixes[MDDB_PREFIXCNT]; 218 /* Don't change array sizes without changing RNDUP_BLKCNT */ 219 md_name_suffix ln_suffixes[MD_MAXSIDES][MDDB_NLB]; 220 } mddb_ln_t; 221 222 /* 223 * Locator name structure for MN diskset. Same as for traditional 224 * and local diskset except that more sides are supported and the 225 * side number can be any number since the side number is stored 226 * in the ln_mnsuffixes structure instead of being used as an index 227 * into that array. This means that the whole array may need to be 228 * searched in order to find the correct information given a side number. 229 */ 230 typedef struct mddb_mnln { 231 int ln_magic; 232 uint_t ln_revision; 233 uint_t ln_checksum; 234 struct timeval32 ln_timestamp; 235 md_name_prefix ln_prefixes[MDDB_PREFIXCNT]; 236 /* Don't change array sizes without changing MDDB_MNLNCNT */ 237 md_mnname_suffix_t ln_mnsuffixes[MD_MNMAXSIDES][MDDB_NLB]; 238 } mddb_mnln_t; 239 240 #define RNDUP_BLKCNT(sz, delta) (((sz) - \ 241 ((delta) * \ 242 ((MD_MAXSIDES - 1) * MDDB_NLB)) + \ 243 MDDB_BSIZE - 1) / MDDB_BSIZE) 244 #define MDDB_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), 0) 245 #define MDDB_LOCAL_LNCNT RNDUP_BLKCNT(sizeof (mddb_ln_t), \ 246 sizeof (md_name_suffix)) 247 248 #define MDDB_MNLNCNT ((sizeof (mddb_mnln_t) + (MDDB_BSIZE - 1)) \ 249 / MDDB_BSIZE) 250 251 typedef struct mddb_dt { 252 uint_t dt_mag; 253 uint_t dt_rev; 254 uint_t dt_cks; 255 mddb_dtag_t dt_dtag; 256 } mddb_dt_t; 257 258 #define MDDB_DT_BYTES (roundup(sizeof (mddb_dt_t), MDDB_BSIZE)) 259 #define MDDB_DT_BLOCKS (btodb(MDDB_DT_BYTES)) 260 261 typedef union identifier { 262 char serial[MDDB_SN_LEN]; 263 struct timeval32 createtime; 264 } identifier_t; 265 266 typedef struct mddb_locator { 267 dev32_t l_dev; 268 daddr32_t l_blkno; 269 int l_flags; 270 } mddb_locator_t; 271 272 typedef struct mddb_sidelocator { 273 uchar_t l_drvnm_index; 274 minor_t l_mnum; 275 } mddb_sidelocator_t; 276 277 typedef struct mddb_mnsidelocator { 278 uchar_t mnl_drvnm_index; 279 minor_t mnl_mnum; 280 uint_t mnl_sideno; 281 } mddb_mnsidelocator_t; 282 283 typedef struct mddb_drvnm { 284 uchar_t dn_len; 285 char dn_data[MD_MAXDRVNM]; 286 } mddb_drvnm_t; 287 288 /* 289 * Locator Block Device ID Information 290 * Several device id's may share one disk block in an effort to 291 * conserve used replica space. 292 */ 293 typedef struct mddb_did_info { 294 uint_t info_flags; /* MDDB Device ID flags */ 295 uint_t info_firstblk; /* Device ID Start Block */ 296 uint_t info_blkcnt; /* Device ID Block Count */ 297 uint_t info_offset; /* Device ID offset w/i Block */ 298 uint_t info_length; /* Device ID Length */ 299 uint_t info_checksum; /* Device ID Checksum */ 300 char info_minor_name[32]; /* Minor name of lb dev */ 301 } mddb_did_info_t; 302 303 typedef struct mddb_did_blk { 304 int blk_magic; /* used for verification */ 305 uint_t blk_revision; /* used for verification */ 306 int blk_checksum; /* used for verification */ 307 uint_t blk_commitcnt; /* matches LB's commitcnt */ 308 mddb_did_info_t blk_info[MDDB_NLB]; 309 } mddb_did_blk_t; 310 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 311 #pragma pack() 312 #endif 313 314 #define MDDB_DID_BYTES (roundup(sizeof (mddb_did_blk_t), MDDB_BSIZE)) 315 #define MDDB_DID_BLOCKS (btodb(MDDB_DID_BYTES)) 316 317 /* 318 * Device ID Disk Blocks. 319 * Incore linked list of disk blocks containing device IDs. 320 * The list is built when reading in the mddb_did_blk structure and 321 * when reading in the actual disk blocks containing device ids. 322 * This list is used to easily write out all disk blocks containing 323 * device ids. 324 */ 325 typedef struct mddb_did_db { 326 uint_t db_firstblk; /* Disk Block's logical addr */ 327 uint_t db_blkcnt; /* Contig Disk Block Count */ 328 caddr_t db_ptr; /* Ptr to incore Block(s) */ 329 struct mddb_did_db *db_next; /* Ptr to next in list */ 330 } mddb_did_db_t; 331 332 /* 333 * Device ID Free List. 334 * Incore linked list of free space in disk blocks containing device IDs. 335 * Used to manage placement of device IDs in disk blocks. 336 * All disk blocks on free list are also in linked list of disk block 337 * containing device IDs (mddb_did_db_t). 338 */ 339 typedef struct mddb_did_free { 340 uint_t free_blk; /* Disk Block's logical addr */ 341 uint_t free_offset; /* offset of free space */ 342 uint_t free_length; /* length of free space */ 343 struct mddb_did_free *free_next; /* Ptr to next in list */ 344 } mddb_did_free_t; 345 346 /* 347 * Device ID Incore Area 348 * Contains pointer to Device ID Disk Block list and 349 * Device ID Free List. 350 * Also contains incore array of pointers to device IDs. Pointers 351 * point into the device ID Disk Block list and are used as a 352 * shortcut to find incore device IDs. 353 */ 354 typedef struct mddb_did_ic { 355 mddb_did_blk_t *did_ic_blkp; 356 mddb_did_db_t *did_ic_dbp; 357 mddb_did_free_t *did_ic_freep; 358 ddi_devid_t did_ic_devid[MDDB_NLB]; /* Ptr to device IDs */ 359 } mddb_did_ic_t; 360 361 /* 362 * Locator Block (LB): 363 * - Are fixed size, but the size is different 364 * for local/shared set db replicas. 365 * - All LB's start at logical block 0. 366 * - After a replica quorum is found, there is 367 * is only one incore copy of the LB. 368 * - LB's are only written when replicas are added, deleted, or errored. 369 * - LB's provide information about other replica's and their state. 370 */ 371 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 372 #pragma pack(4) 373 #endif 374 typedef struct mddb_lb { 375 int lb_magic; /* used for verification */ 376 uint_t lb_revision; /* used for verification */ 377 int lb_checksum; /* used for verification */ 378 uint_t lb_commitcnt; /* IMPORTANT */ 379 struct timeval32 lb_timestamp; /* informative only */ 380 int lb_loccnt; /* used for verification */ 381 identifier_t lb_ident; /* used for verification */ 382 uint_t lb_flags; /* flags describing LB */ 383 uint_t lb_spare[8]; /* Spare/Pad */ 384 mddb_block_t lb_didfirstblk; /* Devid Array Start Block */ 385 mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */ 386 mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */ 387 mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */ 388 struct timeval32 lb_inittime; /* creation of database */ 389 set_t lb_setno; /* used for verification */ 390 mddb_block_t lb_blkcnt; /* used for verification */ 391 mddb_block_t lb_lnfirstblk; 392 mddb_block_t lb_lnblkcnt; 393 mddb_block_t lb_dbfirstblk; 394 mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT]; 395 mddb_locator_t lb_locators[MDDB_NLB]; 396 /* Don't change array sizes without changing RNDUP_BLKCNT */ 397 mddb_sidelocator_t lb_sidelocators[MD_MAXSIDES][MDDB_NLB]; 398 } mddb_lb_t; 399 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 400 #pragma pack() 401 #endif 402 403 /* 404 * Locator block structure for MN diskset. Same as for traditional 405 * and local diskset except that more sides are supported and the 406 * side number can be any number since the side number is stored 407 * in the lb_mnsidelocators structure instead of being used as an index 408 * into that array. This means that the whole array may need to be 409 * searched in order to find the correct information given a side number. 410 */ 411 typedef struct mddb_mnlb { 412 int lb_magic; /* used for verification */ 413 uint_t lb_revision; /* used for verification */ 414 int lb_checksum; /* used for verification */ 415 uint_t lb_commitcnt; /* IMPORTANT */ 416 struct timeval32 lb_timestamp; /* informative only */ 417 int lb_loccnt; /* used for verification */ 418 identifier_t lb_ident; /* used for verification */ 419 uint_t lb_flags; /* flags describing LB */ 420 uint_t lb_spare[8]; /* Spare/Pad */ 421 mddb_block_t lb_didfirstblk; /* Devid Array Start Block */ 422 mddb_block_t lb_didblkcnt; /* Devid Array Number Blocks */ 423 mddb_block_t lb_dtfirstblk; /* Data Tag Start Block */ 424 mddb_block_t lb_dtblkcnt; /* Data Tag Number Block(s) */ 425 struct timeval32 lb_inittime; /* creation of database */ 426 set_t lb_setno; /* used for verification */ 427 mddb_block_t lb_blkcnt; /* used for verification */ 428 mddb_block_t lb_lnfirstblk; 429 mddb_block_t lb_lnblkcnt; 430 mddb_block_t lb_dbfirstblk; 431 mddb_drvnm_t lb_drvnm[MDDB_DRVNMCNT]; 432 mddb_locator_t lb_locators[MDDB_NLB]; 433 /* Don't change array sizes without changing MDDB_MNLBCNT */ 434 mddb_mnsidelocator_t lb_mnsidelocators[MD_MNMAXSIDES][MDDB_NLB]; 435 } mddb_mnlb_t; 436 437 438 #define MDDB_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), 0) 439 #define MDDB_LOCAL_LBCNT RNDUP_BLKCNT(sizeof (mddb_lb_t), \ 440 sizeof (mddb_sidelocator_t)) 441 442 #define MDDB_MNLBCNT ((sizeof (mddb_mnlb_t) + (MDDB_BSIZE - 1)) \ 443 / MDDB_BSIZE) 444 445 typedef struct mddb_map { 446 daddr32_t m_consecutive; 447 daddr32_t m_firstblk; 448 } mddb_map_t; 449 450 /* 451 * Master block(s) (MB) 452 * - Are written by userland; Never by the driver! 453 * - Each replica has there own master blocks, 454 * the master block(s) are not shared. 455 * - MB's are not in the logical block address space of the database. 456 * - MB's are a fixed size record (MDDB_BSIZE) 457 * - MB's provide the logical to physical block translation, 458 * for their replica. 459 */ 460 typedef struct mddb_mb { 461 int mb_magic; /* used for verification */ 462 uint_t mb_revision; /* used for verification */ 463 uint_t mb_checksum; /* used for verification */ 464 #ifdef _LP64 465 uint32_t mb_next; /* incore to next mb */ 466 #else 467 struct mddb_mb *mb_next; /* incore to next mb */ 468 #endif /* _LP64 */ 469 daddr32_t mb_nextblk; /* block # for next mb */ 470 md_timeval32_t mb_timestamp; /* timestamp */ 471 daddr32_t mb_blkcnt; /* size of blkmap */ 472 daddr32_t mb_blkno; /* physical loc. for this MB */ 473 set_t mb_setno; /* used for verification */ 474 struct timeval32 mb_setcreatetime; /* set creation timestamp */ 475 int spares[7]; 476 mddb_map_t mb_blkmap; /* logical->physical blk map */ 477 int mb_devid_magic; /* verify devid in mb */ 478 short mb_devid_len; /* len of following devid */ 479 char mb_devid[1]; /* devid byte array */ 480 } mddb_mb_t; 481 482 /* 483 * In-core version of mddb_mb. It is known that the mddb_mb is 512 bytes on 484 * disk, really, and so this structure is 512 + sizeof(struct mddb_mb_ic *) 485 */ 486 #define MDDB_IC_BSIZE (MDDB_BSIZE + sizeof (struct mddb_mb_ic *)) 487 typedef struct mddb_mb_ic { 488 struct mddb_mb_ic *mbi_next; 489 struct mddb_mb mbi_mddb_mb; 490 } mddb_mb_ic_t; 491 492 493 /* 494 * there can be no address in record block. The checksum must 495 * stay the same where ever the record is in memory. Many 496 * things depend on this. Also the timestamp is the time the the 497 * record was committed not the time it was written to a particular 498 * device. 499 * 500 * Old definition of mddb_rb, for 32-bit apps and libraries 501 */ 502 typedef struct mddb_rb { 503 uint_t rb_magic; 504 uint_t rb_revision; 505 uint_t rb_checksum; 506 uint_t rb_checksum_fiddle; 507 uint_t rb_private; 508 void *rb_userdata; 509 uint_t rb_commitcnt; 510 uint_t rb_spare[1]; 511 struct timeval32 rb_timestamp; 512 int rb_data[1]; 513 } mddb_rb_t; 514 515 /* This is, and always will be, the on-disk version of mddb_rb */ 516 typedef struct mddb_rb32 { 517 uint_t rb_magic; 518 uint_t rb_revision; 519 uint_t rb_checksum; 520 uint_t rb_checksum_fiddle; 521 uint_t rb_private; 522 uint32_t rb_userdata; 523 uint_t rb_commitcnt; 524 uint_t rb_spare[1]; 525 struct timeval32 rb_timestamp; 526 int rb_data[1]; 527 } mddb_rb32_t; 528 529 /* 530 * directory entries 531 */ 532 typedef struct mddb_optinfo { 533 int o_li; 534 int o_flags; 535 } mddb_optinfo_t; 536 537 /* Old definition of mddb_de, for 32-bit apps and libraries */ 538 typedef struct mddb_de { 539 struct mddb_de *de_next; 540 mddb_rb_t *de_rb; 541 mddb_recid_t de_recid; 542 mddb_type_t de_type1; 543 uint_t de_type2; 544 uint_t de_reqsize; 545 uint_t de_recsize; 546 mddb_block_t de_blkcount; 547 uint_t de_flags; 548 mddb_optinfo_t de_optinfo[2]; 549 mddb_block_t de_blks[1]; 550 } mddb_de_t; 551 552 /* 553 * In core version of mddb_de, includes pointer for mddb_rb32_t user data 554 * mddb_rb32_t is used incore 555 */ 556 typedef struct mddb_de_ic { 557 void *de_rb_userdata; 558 void *de_rb_userdata_ic; 559 uint_t de_owner_nodeid; 560 struct mddb_de_ic *de_next; 561 mddb_rb32_t *de_rb; 562 mddb_recid_t de_recid; 563 mddb_type_t de_type1; 564 uint_t de_type2; 565 size_t de_reqsize; 566 size_t de_icreqsize; 567 size_t de_recsize; 568 uint_t de_blkcount; 569 uint_t de_flags; 570 mddb_optinfo_t de_optinfo[2]; 571 mddb_block_t de_blks[1]; 572 } mddb_de_ic_t; 573 574 typedef struct mddb_db { 575 uint_t db_magic; 576 uint_t db_revision; 577 uint_t db_checksum; 578 mddb_block_t db_blknum; 579 struct mddb_db *db_next; 580 mddb_block_t db_nextblk; 581 struct timeval32 db_timestamp; 582 uint_t db_recsum; 583 #ifdef _KERNEL 584 mddb_de_ic_t *db_firstentry; 585 #else 586 mddb_de_t *db_firstentry; 587 #endif 588 } mddb_db_t; 589 590 /* 591 * This is, and always will be, the on-disk version of mddb_de 592 * When mddb_de32 is read in it is converted into mddb_de_ic 593 */ 594 typedef struct mddb_de32 { 595 uint32_t de32_next; 596 uint32_t de32_rb; 597 mddb_recid_t de32_recid; 598 mddb_type_t de32_type1; 599 uint_t de32_type2; 600 uint_t de32_reqsize; 601 uint_t de32_recsize; 602 mddb_block_t de32_blkcount; 603 uint_t de32_flags; 604 mddb_optinfo_t de32_optinfo[2]; 605 mddb_block_t de32_blks[1]; 606 } mddb_de32_t; 607 608 /* 609 * This is, and always will be, the on-disk version of mddb_db 610 * When mddb_db32 is read in it is converted into mddb_db 611 * To minimize impact on mddb format mddb_db fileds remain intact 612 */ 613 typedef struct mddb_db32 { 614 uint_t db32_magic; 615 uint_t db32_revision; 616 uint_t db32_checksum; 617 mddb_block_t db32_blknum; 618 uint32_t db32_next; 619 mddb_block_t db32_nextblk; 620 struct timeval32 db32_timestamp; 621 uint_t db32_recsum; 622 uint32_t db32_firstentry; 623 } mddb_db32_t; 624 625 #define de32tode(from, to) \ 626 { \ 627 int i; \ 628 to->de_rb_userdata = NULL; \ 629 to->de_owner_nodeid = MD_MN_INVALID_NID; \ 630 to->de_next = (struct mddb_de_ic *)(uintptr_t)from->de32_next; \ 631 to->de_rb = (mddb_rb32_t *)(uintptr_t)from->de32_rb; \ 632 to->de_recid = from->de32_recid; \ 633 to->de_type1 = from->de32_type1; \ 634 to->de_type2 = from->de32_type2; \ 635 to->de_reqsize = from->de32_reqsize; \ 636 to->de_recsize = from->de32_recsize; \ 637 to->de_blkcount = from->de32_blkcount; \ 638 to->de_flags = from->de32_flags; \ 639 to->de_optinfo[0] = from->de32_optinfo[0]; \ 640 to->de_optinfo[1] = from->de32_optinfo[1]; \ 641 for (i = 0; i < from->de32_blkcount; i++) \ 642 to->de_blks[i] = from->de32_blks[i]; \ 643 } 644 645 #define detode32(from, to) \ 646 { \ 647 int i; \ 648 to->de32_next = (uint32_t)(uintptr_t)from->de_next; \ 649 to->de32_rb = (uint32_t)(uintptr_t)from->de_rb; \ 650 to->de32_recid = from->de_recid; \ 651 to->de32_type1 = from->de_type1; \ 652 to->de32_type2 = from->de_type2; \ 653 to->de32_reqsize = from->de_reqsize; \ 654 to->de32_recsize = from->de_recsize; \ 655 to->de32_blkcount = from->de_blkcount; \ 656 to->de32_flags = from->de_flags; \ 657 to->de32_optinfo[0] = from->de_optinfo[0]; \ 658 to->de32_optinfo[1] = from->de_optinfo[1]; \ 659 for (i = 0; i < from->de_blkcount; i++) \ 660 to->de32_blks[i] = from->de_blks[i]; \ 661 } 662 663 #define db32todb(from, to) \ 664 to->db_magic = from->db32_magic; \ 665 to->db_revision = from->db32_revision; \ 666 to->db_checksum = from->db32_checksum; \ 667 to->db_blknum = from->db32_blknum; \ 668 to->db_next = (struct mddb_db *)(uintptr_t)from->db32_next; \ 669 to->db_nextblk = from->db32_nextblk; \ 670 to->db_timestamp = from->db32_timestamp; \ 671 to->db_recsum = from->db32_recsum; \ 672 to->db_firstentry = (mddb_de_ic_t *)(uintptr_t)from->db32_firstentry; 673 674 #define dbtodb32(from, to) \ 675 to->db32_magic = from->db_magic; \ 676 to->db32_revision = from->db_revision; \ 677 to->db32_checksum = from->db_checksum; \ 678 to->db32_blknum = from->db_blknum; \ 679 to->db32_next = (uint32_t)(uintptr_t)from->db_next; \ 680 to->db32_nextblk = from->db_nextblk; \ 681 to->db32_timestamp = from->db_timestamp; \ 682 to->db32_recsum = from->db_recsum; \ 683 to->db32_firstentry = (uint32_t)(uintptr_t)from->db_firstentry; 684 685 /* 686 * information about a replica of the data base 687 */ 688 typedef struct mddb_ri { 689 struct mddb_ri *ri_next; 690 uint_t ri_flags; 691 uint_t ri_commitcnt; 692 int ri_transplant; 693 md_dev64_t ri_dev; 694 daddr32_t ri_blkno; 695 char ri_driver[16]; 696 mddb_mb_ic_t *ri_mbip; 697 mddb_lb_t *ri_lbp; 698 mddb_dt_t *ri_dtp; 699 mddb_did_ic_t *ri_did_icp; 700 ddi_devid_t ri_devid; 701 ddi_devid_t ri_old_devid; 702 char ri_minor_name[MDDB_MINOR_NAME_MAX]; 703 char ri_devname[MAXPATHLEN]; 704 } mddb_ri_t; 705 706 typedef struct mddb_bf { 707 struct mddb_bf *bf_next; 708 mddb_locator_t *bf_locator; 709 buf_t bf_buf; 710 } mddb_bf_t; 711 712 /* 713 * Information for sets of databases (which include replicas) 714 */ 715 #define MDDB_BITSRECID 31 716 #define MDDB_SETSHIFT (MDDB_BITSRECID - MD_BITSSET) 717 #define MDDB_SETMASK (MD_SETMASK << MDDB_SETSHIFT) 718 #define MDDB_RECIDMASK ((1 << MDDB_SETSHIFT) - 1) 719 720 #define DBSET(id) (((id) & MDDB_SETMASK) >> MDDB_SETSHIFT) 721 #define DBID(id) ((id) & MDDB_RECIDMASK) 722 #define MAKERECID(s, i) ((((s) << MDDB_SETSHIFT) & MDDB_SETMASK) | \ 723 ((i) & MDDB_RECIDMASK)) 724 725 #define MDDB_PARSE_LOCBLK 0x00000001 726 #define MDDB_PARSE_LOCNM 0x00000002 727 #define MDDB_PARSE_OPTRECS 0x00000004 728 #define MDDB_PARSE_MASK 0x0000000F 729 730 731 #define MDDB_BLOCK_PARSE 0x00000001 /* Block sending parse msgs */ 732 #define MDDB_UNBLOCK_PARSE 0x00000002 /* Unblock sending parse msgs */ 733 734 /* 735 * We need to keep s_ident and s_inittime 32 bit. They are used in mddb_lb 736 */ 737 typedef struct mddb_set { 738 uint_t s_setno; /* set number */ 739 uint_t s_sideno; /* side number */ 740 identifier_t s_ident; /* set identifier */ 741 char *s_setname; /* set name */ 742 mddb_mb_ic_t **s_mbiarray; /* master blocks array */ 743 mddb_db_t *s_dbp; /* directory block */ 744 mddb_lb_t *s_lbp; /* locator block */ 745 /* May be cast to mddb_mnlb_t */ 746 /* if accessing sidenames in */ 747 /* MN diskset */ 748 mddb_ln_t *s_lnp; /* locator names block */ 749 /* May be cast to mddb_mnln_t */ 750 /* if accessing sidenames in */ 751 /* MN diskset */ 752 mddb_dtag_lst_t *s_dtlp; /* List of data tags found */ 753 mddb_did_ic_t *s_did_icp; /* Device ID incore area */ 754 mddb_ri_t *s_rip; /* replicas incore list */ 755 int s_freeblkcnt; /* visable for test code */ 756 int s_totalblkcnt; /* visable for test code */ 757 int s_mn_parseflags; /* mddb parse flags for MNset */ 758 int s_mn_parseflags_sending; /* parse flgs sent to slaves */ 759 uchar_t *s_freebitmap; /* free blocks bitmap */ 760 uint_t s_freebitmapsize; /* size of bitmap */ 761 struct timeval32 s_inittime; /* timestamp set created */ 762 mddb_recid_t s_zombie; /* zombie record - createrec */ 763 int s_staledeletes; /* number of stale deleterec */ 764 int s_optcmtcnt; /* Following are opt. record */ 765 int s_opthavelck; /* bookkeeping records ... */ 766 int s_optwantlck; 767 kcondvar_t s_optwantlck_cv; 768 int s_optwaiterr; 769 int s_opthungerr; 770 kcondvar_t s_opthungerr_cv; 771 int s_opthavequeuinglck; 772 int s_optwantqueuinglck; 773 kcondvar_t s_optqueuing_cv; 774 ulong_t s_bufmisses; 775 mddb_bf_t *s_freebufhead; 776 int s_bufwakeup; 777 kcondvar_t s_buf_cv; 778 size_t s_databuffer_size; 779 void *s_databuffer; 780 int s_singlelockgotten; 781 int s_singlelockwanted; 782 kcondvar_t s_single_thread_cv; 783 md_hi_arr_t s_med; 784 } mddb_set_t; 785 786 #ifndef MDDB_FAKE 787 #ifdef _KERNEL 788 /* md_mddb.c */ 789 extern uint_t mddb_lb_did_convert(mddb_set_t *, 790 uint_t, uint_t *); 791 extern void mddb_locatorblock2splitname(mddb_ln_t *, 792 int, side_t, md_splitname *); 793 extern int mddb_configure(mddb_cfgcmd_t, 794 struct mddb_config *); 795 extern mddb_recid_t mddb_getnextrec(mddb_recid_t, 796 mddb_type_t, uint_t); 797 extern int mddb_getoptloc(mddb_optloc_t *); 798 extern void *mddb_getrecaddr(mddb_recid_t); 799 extern void *mddb_getrecaddr_resize(mddb_recid_t, size_t, 800 off_t); 801 extern int mddb_getrecprivate(mddb_recid_t); 802 extern void mddb_setrecprivate(mddb_recid_t, uint_t); 803 extern mddb_de_ic_t *mddb_getrecdep(mddb_recid_t); 804 extern mddb_type_t mddb_getrectype1(mddb_recid_t); 805 extern int mddb_getrectype2(mddb_recid_t); 806 extern int mddb_getrecsize(mddb_recid_t); 807 extern int mddb_commitrec(mddb_recid_t); 808 extern int mddb_commitrecs(mddb_recid_t *); 809 extern int mddb_deleterec(mddb_recid_t); 810 extern mddb_recstatus_t mddb_getrecstatus(mddb_recid_t); 811 extern mddb_recid_t mddb_createrec(size_t usersize, 812 mddb_type_t type, uint_t type2, 813 md_create_rec_option_t option, set_t setno); 814 extern void mddb_init(void); 815 extern void mddb_unload(void); 816 extern void mddb_unload_set(set_t setno); 817 extern mddb_recid_t mddb_makerecid(set_t setno, mddb_recid_t id); 818 extern set_t mddb_getsetnum(mddb_recid_t id); 819 extern char *mddb_getsetname(set_t setno); 820 extern side_t mddb_getsidenum(set_t setno); 821 extern int mddb_ownset(set_t setno); 822 extern int getmed_ioctl(mddb_med_parm_t *medpp, int mode); 823 extern int setmed_ioctl(mddb_med_parm_t *medpp, int mode); 824 extern int updmed_ioctl(mddb_med_upd_parm_t *medpp, 825 int mode); 826 extern int take_set(mddb_config_t *cp, int mode); 827 extern int release_set(mddb_config_t *cp, int mode); 828 extern int gettag_ioctl(mddb_dtag_get_parm_t *dtgpp, 829 int mode); 830 extern int usetag_ioctl(mddb_dtag_use_parm_t *dtupp, 831 int mode); 832 extern int accept_ioctl(mddb_accept_parm_t *medpp, 833 int mode); 834 extern int md_update_locator_namespace(set_t setno, 835 side_t side, char *dname, char *pname, 836 md_dev64_t devt); 837 extern int mddb_validate_lb(set_t setno, int *rmaxsz); 838 extern int mddb_getinvlb_devid(set_t setno, int count, 839 int size, char **ctdptr); 840 extern int md_update_minor(set_t, side_t, mdkey_t); 841 #ifdef DEBUG 842 extern void mddb_check(void); 843 #endif /* DEBUG */ 844 #endif /* _KERNEL */ 845 846 #else 847 848 caddr_t mddb_fakeit; 849 850 #define md_lb_did_convert(a, b, c) (0) 851 #define mddb_configure(a, b) (0) 852 #define mddb_getnextrec(a, b, c) ((mddb_recid_t)0) 853 #define mddb_getrecaddr(a) (mddb_fakeit) 854 #define mddb_getrecprivate(a) (0) 855 #define mddb_setrecprivate(a, b) (0) 856 #define mddb_getrectype1(a) (0) 857 #define mddb_getrectype2(a) (0) 858 #define mddb_getrecsize(a) (0) 859 #define mddb_commitrec(a) (0) 860 #define mddb_commitrecs(a) (0) 861 #define mddb_deleterec(a) (0) 862 #define mddb_getrecstatus(a) (MDDB_OK) 863 #define mddb_createrec(s, a, b) (0xffff & (int)(mddb_fakeit = \ 864 (caddr_t)kmem_zalloc(s, KM_SLEEP))) 865 #define mddb_unload() (0) 866 867 #endif 868 869 #define MDDB_NOSLEEP 1 870 #define MDDB_SLEEPOK 0 871 872 #define MDDB_NOOLDOK 0x1 873 #define MDDB_MUSTEXIST 0x2 874 #define MDDB_NOINIT 0x4 875 #define MDDB_MULTINODE 0x8 876 #define MDDB_MN_STALE 0x10 /* MN set is stale */ 877 878 /* Flags passed to selectreplicas - not a bit mask */ 879 #define MDDB_SCANALL 1 880 #define MDDB_RETRYSCAN 0 881 #define MDDB_SCANALLSYNC 2 /* During reconfig, sync up incore */ 882 /* and ondisk mddb by writing incore */ 883 /* values to disk. Don't write */ 884 /* change log records. */ 885 886 /* Flags passed to writestart and writecopy */ 887 #define MDDB_WRITECOPY_ALL 1 /* Write all incore mddb to disk */ 888 #define MDDB_WRITECOPY_SYNC 2 /* Write incore mddb to disk except */ 889 /* - change log records */ 890 /* - optimized resync records */ 891 892 893 #define MDDB_PROBE 1 894 #define MDDB_NOPROBE 0 895 896 897 /* 898 * MN diskset definitions used to determine if a slave can write 899 * directly to the mddb. ONLY_MASTER only allows the master node 900 * to write to the mddb. ANY_NODE allows any node to write 901 * to the mddb. 902 */ 903 #define MDDB_WR_ONLY_MASTER 0 904 #define MDDB_WR_ANY_NODE 1 905 906 #define MDDB_L_LOCKED 0x0001 /* this record is locked */ 907 #define MDDB_L_WANTED 0x0002 908 909 #ifdef __cplusplus 910 } 911 #endif 912 913 #endif /* _SYS_MD_MDDB_H */ 914