1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24 */ 25 26 /* Portions Copyright 2010 Robert Milkowski */ 27 28 #ifndef _SYS_ZIL_H 29 #define _SYS_ZIL_H 30 31 #include <sys/types.h> 32 #include <sys/spa.h> 33 #include <sys/zio.h> 34 #include <sys/dmu.h> 35 #include <sys/zio_crypt.h> 36 #include <sys/wmsum.h> 37 38 #ifdef __cplusplus 39 extern "C" { 40 #endif 41 42 struct dsl_pool; 43 struct dsl_dataset; 44 struct lwb; 45 46 /* 47 * Intent log format: 48 * 49 * Each objset has its own intent log. The log header (zil_header_t) 50 * for objset N's intent log is kept in the Nth object of the SPA's 51 * intent_log objset. The log header points to a chain of log blocks, 52 * each of which contains log records (i.e., transactions) followed by 53 * a log block trailer (zil_trailer_t). The format of a log record 54 * depends on the record (or transaction) type, but all records begin 55 * with a common structure that defines the type, length, and txg. 56 */ 57 58 /* 59 * Intent log header - this on disk structure holds fields to manage 60 * the log. All fields are 64 bit to easily handle cross architectures. 61 */ 62 typedef struct zil_header { 63 uint64_t zh_claim_txg; /* txg in which log blocks were claimed */ 64 uint64_t zh_replay_seq; /* highest replayed sequence number */ 65 blkptr_t zh_log; /* log chain */ 66 uint64_t zh_claim_blk_seq; /* highest claimed block sequence number */ 67 uint64_t zh_flags; /* header flags */ 68 uint64_t zh_claim_lr_seq; /* highest claimed lr sequence number */ 69 uint64_t zh_pad[3]; 70 } zil_header_t; 71 72 /* 73 * zh_flags bit settings 74 */ 75 #define ZIL_REPLAY_NEEDED 0x1 /* replay needed - internal only */ 76 #define ZIL_CLAIM_LR_SEQ_VALID 0x2 /* zh_claim_lr_seq field is valid */ 77 78 /* 79 * Log block chaining. 80 * 81 * Log blocks are chained together. Originally they were chained at the 82 * end of the block. For performance reasons the chain was moved to the 83 * beginning of the block which allows writes for only the data being used. 84 * The older position is supported for backwards compatibility. 85 * 86 * The zio_eck_t contains a zec_cksum which for the intent log is 87 * the sequence number of this log block. A seq of 0 is invalid. 88 * The zec_cksum is checked by the SPA against the sequence 89 * number passed in the blk_cksum field of the blkptr_t 90 */ 91 typedef struct zil_chain { 92 uint64_t zc_pad; 93 blkptr_t zc_next_blk; /* next block in chain */ 94 uint64_t zc_nused; /* bytes in log block used */ 95 zio_eck_t zc_eck; /* block trailer */ 96 } zil_chain_t; 97 98 #define ZIL_MIN_BLKSZ 4096ULL 99 100 /* 101 * ziltest is by and large an ugly hack, but very useful in 102 * checking replay without tedious work. 103 * When running ziltest we want to keep all itx's and so maintain 104 * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG 105 * We subtract TXG_CONCURRENT_STATES to allow for common code. 106 */ 107 #define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES) 108 109 /* 110 * The words of a log block checksum. 111 */ 112 #define ZIL_ZC_GUID_0 0 113 #define ZIL_ZC_GUID_1 1 114 #define ZIL_ZC_OBJSET 2 115 #define ZIL_ZC_SEQ 3 116 117 typedef enum zil_create { 118 Z_FILE, 119 Z_DIR, 120 Z_XATTRDIR, 121 } zil_create_t; 122 123 /* 124 * size of xvattr log section. 125 * its composed of lr_attr_t + xvattr bitmap + 2 64 bit timestamps 126 * for create time and a single 64 bit integer for all of the attributes, 127 * and 4 64 bit integers (32 bytes) for the scanstamp. 128 * 129 */ 130 131 #define ZIL_XVAT_SIZE(mapsize) \ 132 sizeof (lr_attr_t) + (sizeof (uint32_t) * (mapsize - 1)) + \ 133 (sizeof (uint64_t) * 7) 134 135 /* 136 * Size of ACL in log. The ACE data is padded out to properly align 137 * on 8 byte boundary. 138 */ 139 140 #define ZIL_ACE_LENGTH(x) (roundup(x, sizeof (uint64_t))) 141 142 /* 143 * Intent log transaction types and record structures 144 */ 145 #define TX_COMMIT 0 /* Commit marker (no on-disk state) */ 146 #define TX_CREATE 1 /* Create file */ 147 #define TX_MKDIR 2 /* Make directory */ 148 #define TX_MKXATTR 3 /* Make XATTR directory */ 149 #define TX_SYMLINK 4 /* Create symbolic link to a file */ 150 #define TX_REMOVE 5 /* Remove file */ 151 #define TX_RMDIR 6 /* Remove directory */ 152 #define TX_LINK 7 /* Create hard link to a file */ 153 #define TX_RENAME 8 /* Rename a file */ 154 #define TX_WRITE 9 /* File write */ 155 #define TX_TRUNCATE 10 /* Truncate a file */ 156 #define TX_SETATTR 11 /* Set file attributes */ 157 #define TX_ACL_V0 12 /* Set old formatted ACL */ 158 #define TX_ACL 13 /* Set ACL */ 159 #define TX_CREATE_ACL 14 /* create with ACL */ 160 #define TX_CREATE_ATTR 15 /* create + attrs */ 161 #define TX_CREATE_ACL_ATTR 16 /* create with ACL + attrs */ 162 #define TX_MKDIR_ACL 17 /* mkdir with ACL */ 163 #define TX_MKDIR_ATTR 18 /* mkdir with attr */ 164 #define TX_MKDIR_ACL_ATTR 19 /* mkdir with ACL + attrs */ 165 #define TX_WRITE2 20 /* dmu_sync EALREADY write */ 166 #define TX_SETSAXATTR 21 /* Set sa xattrs on file */ 167 #define TX_RENAME_EXCHANGE 22 /* Atomic swap via renameat2 */ 168 #define TX_RENAME_WHITEOUT 23 /* Atomic whiteout via renameat2 */ 169 #define TX_CLONE_RANGE 24 /* Clone a file range */ 170 #define TX_MAX_TYPE 25 /* Max transaction type */ 171 172 /* 173 * The transactions for mkdir, symlink, remove, rmdir, link, and rename 174 * may have the following bit set, indicating the original request 175 * specified case-insensitive handling of names. 176 */ 177 #define TX_CI ((uint64_t)0x1 << 63) /* case-insensitive behavior requested */ 178 179 /* 180 * Transactions for operations below can be logged out of order. 181 * For convenience in the code, all such records must have lr_foid 182 * at the same offset. 183 */ 184 #define TX_OOO(txtype) \ 185 ((txtype) == TX_WRITE || \ 186 (txtype) == TX_TRUNCATE || \ 187 (txtype) == TX_SETATTR || \ 188 (txtype) == TX_ACL_V0 || \ 189 (txtype) == TX_ACL || \ 190 (txtype) == TX_WRITE2 || \ 191 (txtype) == TX_SETSAXATTR || \ 192 (txtype) == TX_CLONE_RANGE) 193 194 /* 195 * The number of dnode slots consumed by the object is stored in the 8 196 * unused upper bits of the object ID. We subtract 1 from the value 197 * stored on disk for compatibility with implementations that don't 198 * support large dnodes. The slot count for a single-slot dnode will 199 * contain 0 for those bits to preserve the log record format for 200 * "small" dnodes. 201 */ 202 #define LR_FOID_GET_SLOTS(oid) (BF64_GET((oid), 56, 8) + 1) 203 #define LR_FOID_SET_SLOTS(oid, x) BF64_SET((oid), 56, 8, (x) - 1) 204 #define LR_FOID_GET_OBJ(oid) BF64_GET((oid), 0, DN_MAX_OBJECT_SHIFT) 205 #define LR_FOID_SET_OBJ(oid, x) BF64_SET((oid), 0, DN_MAX_OBJECT_SHIFT, (x)) 206 207 /* 208 * Format of log records. 209 * The fields are carefully defined to allow them to be aligned 210 * and sized the same on sparc & intel architectures. 211 * Each log record has a common structure at the beginning. 212 * 213 * The log record on disk (lrc_seq) holds the sequence number of all log 214 * records which is used to ensure we don't replay the same record. 215 */ 216 typedef struct { /* common log record header */ 217 uint64_t lrc_txtype; /* intent log transaction type */ 218 uint64_t lrc_reclen; /* transaction record length */ 219 uint64_t lrc_txg; /* dmu transaction group number */ 220 uint64_t lrc_seq; /* see comment above */ 221 } lr_t; 222 223 /* 224 * Common start of all out-of-order record types (TX_OOO() above). 225 */ 226 typedef struct { 227 lr_t lr_common; /* common portion of log record */ 228 uint64_t lr_foid; /* object id */ 229 } lr_ooo_t; 230 231 /* 232 * Additional lr_attr_t fields. 233 */ 234 typedef struct { 235 uint64_t lr_attr_attrs; /* all of the attributes */ 236 uint64_t lr_attr_crtime[2]; /* create time */ 237 uint8_t lr_attr_scanstamp[32]; 238 } lr_attr_end_t; 239 240 /* 241 * Handle option extended vattr attributes. 242 * 243 * Whenever new attributes are added the version number 244 * will need to be updated as will code in 245 * zfs_log.c and zfs_replay.c 246 */ 247 typedef struct { 248 uint32_t lr_attr_masksize; /* number of elements in array */ 249 uint32_t lr_attr_bitmap; /* First entry of array */ 250 /* remainder of array and additional lr_attr_end_t fields */ 251 uint8_t lr_attr_data[]; 252 } lr_attr_t; 253 254 /* 255 * log record for creates without optional ACL. 256 * This log record does support optional xvattr_t attributes. 257 */ 258 typedef struct { 259 lr_t lr_common; /* common portion of log record */ 260 uint64_t lr_doid; /* object id of directory */ 261 uint64_t lr_foid; /* object id of created file object */ 262 uint64_t lr_mode; /* mode of object */ 263 uint64_t lr_uid; /* uid of object */ 264 uint64_t lr_gid; /* gid of object */ 265 uint64_t lr_gen; /* generation (txg of creation) */ 266 uint64_t lr_crtime[2]; /* creation time */ 267 uint64_t lr_rdev; /* rdev of object to create */ 268 } _lr_create_t; 269 270 typedef struct { 271 _lr_create_t lr_create; /* common create portion */ 272 /* name of object to create follows this */ 273 /* for symlinks, link content follows name */ 274 /* for creates with xvattr data, the name follows the xvattr info */ 275 uint8_t lr_data[]; 276 } lr_create_t; 277 278 /* 279 * FUID ACL record will be an array of ACEs from the original ACL. 280 * If this array includes ephemeral IDs, the record will also include 281 * an array of log-specific FUIDs to replace the ephemeral IDs. 282 * Only one copy of each unique domain will be present, so the log-specific 283 * FUIDs will use an index into a compressed domain table. On replay this 284 * information will be used to construct real FUIDs (and bypass idmap, 285 * since it may not be available). 286 */ 287 288 /* 289 * Log record for creates with optional ACL 290 * This log record is also used for recording any FUID 291 * information needed for replaying the create. If the 292 * file doesn't have any actual ACEs then the lr_aclcnt 293 * would be zero. 294 * 295 * After lr_acl_flags, there are a lr_acl_bytes number of variable sized ace's. 296 * If create is also setting xvattr's, then acl data follows xvattr. 297 * If ACE FUIDs are needed then they will follow the xvattr_t. Following 298 * the FUIDs will be the domain table information. The FUIDs for the owner 299 * and group will be in lr_create. Name follows ACL data. 300 */ 301 typedef struct { 302 _lr_create_t lr_create; /* common create portion */ 303 uint64_t lr_aclcnt; /* number of ACEs in ACL */ 304 uint64_t lr_domcnt; /* number of unique domains */ 305 uint64_t lr_fuidcnt; /* number of real fuids */ 306 uint64_t lr_acl_bytes; /* number of bytes in ACL */ 307 uint64_t lr_acl_flags; /* ACL flags */ 308 uint8_t lr_data[]; 309 } lr_acl_create_t; 310 311 typedef struct { 312 lr_t lr_common; /* common portion of log record */ 313 uint64_t lr_doid; /* obj id of directory */ 314 /* name of object to remove follows this */ 315 uint8_t lr_data[]; 316 } lr_remove_t; 317 318 typedef struct { 319 lr_t lr_common; /* common portion of log record */ 320 uint64_t lr_doid; /* obj id of directory */ 321 uint64_t lr_link_obj; /* obj id of link */ 322 /* name of object to link follows this */ 323 uint8_t lr_data[]; 324 } lr_link_t; 325 326 typedef struct { 327 lr_t lr_common; /* common portion of log record */ 328 uint64_t lr_sdoid; /* obj id of source directory */ 329 uint64_t lr_tdoid; /* obj id of target directory */ 330 } _lr_rename_t; 331 332 typedef struct { 333 _lr_rename_t lr_rename; /* common rename portion */ 334 /* 2 strings: names of source and destination follow this */ 335 uint8_t lr_data[]; 336 } lr_rename_t; 337 338 typedef struct { 339 _lr_rename_t lr_rename; /* common rename portion */ 340 /* members related to the whiteout file (based on _lr_create_t) */ 341 uint64_t lr_wfoid; /* obj id of the new whiteout file */ 342 uint64_t lr_wmode; /* mode of object */ 343 uint64_t lr_wuid; /* uid of whiteout */ 344 uint64_t lr_wgid; /* gid of whiteout */ 345 uint64_t lr_wgen; /* generation (txg of creation) */ 346 uint64_t lr_wcrtime[2]; /* creation time */ 347 uint64_t lr_wrdev; /* always makedev(0, 0) */ 348 /* 2 strings: names of source and destination follow this */ 349 uint8_t lr_data[]; 350 } lr_rename_whiteout_t; 351 352 typedef struct { 353 lr_t lr_common; /* common portion of log record */ 354 uint64_t lr_foid; /* file object to write */ 355 uint64_t lr_offset; /* offset to write to */ 356 uint64_t lr_length; /* user data length to write */ 357 uint64_t lr_blkoff; /* no longer used */ 358 blkptr_t lr_blkptr; /* spa block pointer for replay */ 359 /* write data will follow for small writes */ 360 uint8_t lr_data[]; 361 } lr_write_t; 362 363 typedef struct { 364 lr_t lr_common; /* common portion of log record */ 365 uint64_t lr_foid; /* object id of file to truncate */ 366 uint64_t lr_offset; /* offset to truncate from */ 367 uint64_t lr_length; /* length to truncate */ 368 } lr_truncate_t; 369 370 typedef struct { 371 lr_t lr_common; /* common portion of log record */ 372 uint64_t lr_foid; /* file object to change attributes */ 373 uint64_t lr_mask; /* mask of attributes to set */ 374 uint64_t lr_mode; /* mode to set */ 375 uint64_t lr_uid; /* uid to set */ 376 uint64_t lr_gid; /* gid to set */ 377 uint64_t lr_size; /* size to set */ 378 uint64_t lr_atime[2]; /* access time */ 379 uint64_t lr_mtime[2]; /* modification time */ 380 /* optional attribute lr_attr_t may be here */ 381 uint8_t lr_data[]; 382 } lr_setattr_t; 383 384 typedef struct { 385 lr_t lr_common; /* common portion of log record */ 386 uint64_t lr_foid; /* file object to change attributes */ 387 uint64_t lr_size; 388 /* xattr name and value follows */ 389 uint8_t lr_data[]; 390 } lr_setsaxattr_t; 391 392 typedef struct { 393 lr_t lr_common; /* common portion of log record */ 394 uint64_t lr_foid; /* obj id of file */ 395 uint64_t lr_aclcnt; /* number of acl entries */ 396 /* lr_aclcnt number of ace_t entries follow this */ 397 uint8_t lr_data[]; 398 } lr_acl_v0_t; 399 400 typedef struct { 401 lr_t lr_common; /* common portion of log record */ 402 uint64_t lr_foid; /* obj id of file */ 403 uint64_t lr_aclcnt; /* number of ACEs in ACL */ 404 uint64_t lr_domcnt; /* number of unique domains */ 405 uint64_t lr_fuidcnt; /* number of real fuids */ 406 uint64_t lr_acl_bytes; /* number of bytes in ACL */ 407 uint64_t lr_acl_flags; /* ACL flags */ 408 /* lr_acl_bytes number of variable sized ace's follows */ 409 uint8_t lr_data[]; 410 } lr_acl_t; 411 412 typedef struct { 413 lr_t lr_common; /* common portion of log record */ 414 uint64_t lr_foid; /* file object to clone into */ 415 uint64_t lr_offset; /* offset to clone to */ 416 uint64_t lr_length; /* length of the blocks to clone */ 417 uint64_t lr_blksz; /* file's block size */ 418 uint64_t lr_nbps; /* number of block pointers */ 419 /* block pointers of the blocks to clone follows */ 420 blkptr_t lr_bps[]; 421 } lr_clone_range_t; 422 423 /* 424 * ZIL structure definitions, interface function prototype and globals. 425 */ 426 427 /* 428 * Writes are handled in three different ways: 429 * 430 * WR_INDIRECT: 431 * In this mode, if we need to commit the write later, then the block 432 * is immediately written into the file system (using dmu_sync), 433 * and a pointer to the block is put into the log record. 434 * When the txg commits the block is linked in. 435 * This saves additionally writing the data into the log record. 436 * There are a few requirements for this to occur: 437 * - write is greater than zfs/zvol_immediate_write_sz 438 * - not using slogs (as slogs are assumed to always be faster 439 * than writing into the main pool) 440 * - the write occupies only one block 441 * WR_COPIED: 442 * If we know we'll immediately be committing the 443 * transaction (O_SYNC or O_DSYNC), then we allocate a larger 444 * log record here for the data and copy the data in. 445 * WR_NEED_COPY: 446 * Otherwise we don't allocate a buffer, and *if* we need to 447 * flush the write later then a buffer is allocated and 448 * we retrieve the data using the dmu. 449 */ 450 typedef enum { 451 WR_INDIRECT, /* indirect - a large write (dmu_sync() data */ 452 /* and put blkptr in log, rather than actual data) */ 453 WR_COPIED, /* immediate - data is copied into lr_write_t */ 454 WR_NEED_COPY, /* immediate - data needs to be copied if pushed */ 455 WR_NUM_STATES /* number of states */ 456 } itx_wr_state_t; 457 458 typedef void (*zil_callback_t)(void *data); 459 460 typedef struct itx { 461 list_node_t itx_node; /* linkage on zl_itx_list */ 462 void *itx_private; /* type-specific opaque data */ 463 itx_wr_state_t itx_wr_state; /* write state */ 464 uint8_t itx_sync; /* synchronous transaction */ 465 zil_callback_t itx_callback; /* Called when the itx is persistent */ 466 void *itx_callback_data; /* User data for the callback */ 467 size_t itx_size; /* allocated itx structure size */ 468 uint64_t itx_oid; /* object id */ 469 uint64_t itx_gen; /* gen number for zfs_get_data */ 470 lr_t itx_lr; /* common part of log record */ 471 uint8_t itx_lr_data[]; /* type-specific part of lr_xx_t */ 472 } itx_t; 473 474 /* 475 * Used for zil kstat. 476 */ 477 typedef struct zil_stats { 478 /* 479 * Number of times a ZIL commit (e.g. fsync) has been requested. 480 */ 481 kstat_named_t zil_commit_count; 482 483 /* 484 * Number of times the ZIL has been flushed to stable storage. 485 * This is less than zil_commit_count when commits are "merged" 486 * (see the documentation above zil_commit()). 487 */ 488 kstat_named_t zil_commit_writer_count; 489 490 /* 491 * Number of times a ZIL commit failed and the ZIL was forced to fall 492 * back to txg_wait_synced(). The separate counts are for different 493 * reasons: 494 * - error: ZIL IO (write/flush) returned an error 495 * (see zil_commit_impl()) 496 * - stall: LWB block allocation failed, ZIL chain abandoned 497 * (see zil_commit_writer_stall()) 498 * - suspend: ZIL suspended 499 * (see zil_commit(), zil_get_commit_list()) 500 */ 501 kstat_named_t zil_commit_error_count; 502 kstat_named_t zil_commit_stall_count; 503 kstat_named_t zil_commit_suspend_count; 504 505 /* 506 * Number of transactions (reads, writes, renames, etc.) 507 * that have been committed. 508 */ 509 kstat_named_t zil_itx_count; 510 511 /* 512 * See the documentation for itx_wr_state_t above. 513 * Note that "bytes" accumulates the length of the transactions 514 * (i.e. data), not the actual log record sizes. 515 */ 516 kstat_named_t zil_itx_indirect_count; 517 kstat_named_t zil_itx_indirect_bytes; 518 kstat_named_t zil_itx_copied_count; 519 kstat_named_t zil_itx_copied_bytes; 520 kstat_named_t zil_itx_needcopy_count; 521 kstat_named_t zil_itx_needcopy_bytes; 522 523 /* 524 * Transactions which have been allocated to the "normal" 525 * (i.e. not slog) storage pool. Note that "bytes" accumulate 526 * the actual log record sizes - which do not include the actual 527 * data in case of indirect writes. bytes <= write <= alloc. 528 */ 529 kstat_named_t zil_itx_metaslab_normal_count; 530 kstat_named_t zil_itx_metaslab_normal_bytes; 531 kstat_named_t zil_itx_metaslab_normal_write; 532 kstat_named_t zil_itx_metaslab_normal_alloc; 533 534 /* 535 * Transactions which have been allocated to the "slog" storage pool. 536 * If there are no separate log devices, this is the same as the 537 * "normal" pool. bytes <= write <= alloc. 538 */ 539 kstat_named_t zil_itx_metaslab_slog_count; 540 kstat_named_t zil_itx_metaslab_slog_bytes; 541 kstat_named_t zil_itx_metaslab_slog_write; 542 kstat_named_t zil_itx_metaslab_slog_alloc; 543 } zil_kstat_values_t; 544 545 typedef struct zil_sums { 546 wmsum_t zil_commit_count; 547 wmsum_t zil_commit_writer_count; 548 wmsum_t zil_commit_error_count; 549 wmsum_t zil_commit_stall_count; 550 wmsum_t zil_commit_suspend_count; 551 wmsum_t zil_itx_count; 552 wmsum_t zil_itx_indirect_count; 553 wmsum_t zil_itx_indirect_bytes; 554 wmsum_t zil_itx_copied_count; 555 wmsum_t zil_itx_copied_bytes; 556 wmsum_t zil_itx_needcopy_count; 557 wmsum_t zil_itx_needcopy_bytes; 558 wmsum_t zil_itx_metaslab_normal_count; 559 wmsum_t zil_itx_metaslab_normal_bytes; 560 wmsum_t zil_itx_metaslab_normal_write; 561 wmsum_t zil_itx_metaslab_normal_alloc; 562 wmsum_t zil_itx_metaslab_slog_count; 563 wmsum_t zil_itx_metaslab_slog_bytes; 564 wmsum_t zil_itx_metaslab_slog_write; 565 wmsum_t zil_itx_metaslab_slog_alloc; 566 } zil_sums_t; 567 568 #define ZIL_STAT_INCR(zil, stat, val) \ 569 do { \ 570 int64_t tmpval = (val); \ 571 wmsum_add(&(zil_sums_global.stat), tmpval); \ 572 if ((zil)->zl_sums) \ 573 wmsum_add(&((zil)->zl_sums->stat), tmpval); \ 574 } while (0) 575 576 #define ZIL_STAT_BUMP(zil, stat) \ 577 ZIL_STAT_INCR(zil, stat, 1); 578 579 typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg, 580 uint64_t txg); 581 typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg, 582 uint64_t txg); 583 typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap); 584 typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf, 585 struct lwb *lwb, zio_t *zio); 586 587 extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, 588 zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg, 589 boolean_t decrypt); 590 591 extern void zil_init(void); 592 extern void zil_fini(void); 593 594 extern zilog_t *zil_alloc(objset_t *os, zil_header_t *zh_phys); 595 extern void zil_free(zilog_t *zilog); 596 597 extern zilog_t *zil_open(objset_t *os, zil_get_data_t *get_data, 598 zil_sums_t *zil_sums); 599 extern void zil_close(zilog_t *zilog); 600 601 extern boolean_t zil_replay(objset_t *os, void *arg, 602 zil_replay_func_t *const replay_func[TX_MAX_TYPE]); 603 extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx); 604 extern boolean_t zil_destroy(zilog_t *zilog, boolean_t keep_first); 605 extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx); 606 607 extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize); 608 extern void zil_itx_destroy(itx_t *itx); 609 extern void zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx); 610 611 extern void zil_async_to_sync(zilog_t *zilog, uint64_t oid); 612 extern void zil_commit(zilog_t *zilog, uint64_t oid); 613 extern void zil_commit_impl(zilog_t *zilog, uint64_t oid); 614 extern void zil_remove_async(zilog_t *zilog, uint64_t oid); 615 616 extern int zil_reset(const char *osname, void *txarg); 617 extern int zil_claim(struct dsl_pool *dp, 618 struct dsl_dataset *ds, void *txarg); 619 extern int zil_check_log_chain(struct dsl_pool *dp, 620 struct dsl_dataset *ds, void *tx); 621 extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx); 622 extern void zil_clean(zilog_t *zilog, uint64_t synced_txg); 623 624 extern int zil_suspend(const char *osname, void **cookiep); 625 extern void zil_resume(void *cookie); 626 627 extern void zil_lwb_add_block(struct lwb *lwb, const blkptr_t *bp); 628 extern void zil_lwb_add_txg(struct lwb *lwb, uint64_t txg); 629 extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp); 630 631 extern void zil_set_sync(zilog_t *zilog, uint64_t syncval); 632 633 extern void zil_set_logbias(zilog_t *zilog, uint64_t slogval); 634 635 extern uint64_t zil_max_copied_data(zilog_t *zilog); 636 extern uint64_t zil_max_log_data(zilog_t *zilog, size_t hdrsize); 637 638 extern void zil_sums_init(zil_sums_t *zs); 639 extern void zil_sums_fini(zil_sums_t *zs); 640 extern void zil_kstat_values_update(zil_kstat_values_t *zs, 641 zil_sums_t *zil_sums); 642 643 extern int zil_replay_disable; 644 645 #ifdef __cplusplus 646 } 647 #endif 648 649 #endif /* _SYS_ZIL_H */ 650