Lines Matching +full:scaled +full:- +full:sync
9 * or https://opensource.org/licenses/CDDL-1.0.
57 * 2. they are committed to the on-disk ZIL for the dataset being
62 * dataset's on-disk ZIL will be replayed when that dataset is first
66 * As hinted at above, there is one ZIL per dataset (both the in-memory
67 * representation, and the on-disk representation). The on-disk format
70 * - a single, per-dataset, ZIL header; which points to a chain of
71 * - zero or more ZIL blocks; each of which contains
72 * - zero or more ZIL records
133 * power loss if a volatile out-of-order write cache is enabled.
154 const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva; in zil_bp_compare()
155 const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva; in zil_bp_compare()
167 avl_create(&zilog->zl_bp_tree, zil_bp_compare, in zil_bp_tree_init()
174 avl_tree_t *t = &zilog->zl_bp_tree; in zil_bp_tree_fini()
187 avl_tree_t *t = &zilog->zl_bp_tree; in zil_bp_tree_add()
201 zn->zn_dva = *dva; in zil_bp_tree_add()
210 return ((zil_header_t *)zilog->zl_header); in zil_header_in_syncing_context()
216 zio_cksum_t *zc = &bp->blk_cksum; in zil_init_log_chain()
218 (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_0], in zil_init_log_chain()
219 sizeof (zc->zc_word[ZIL_ZC_GUID_0])); in zil_init_log_chain()
220 (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_1], in zil_init_log_chain()
221 sizeof (zc->zc_word[ZIL_ZC_GUID_1])); in zil_init_log_chain()
222 zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); in zil_init_log_chain()
223 zc->zc_word[ZIL_ZC_SEQ] = 1ULL; in zil_init_log_chain()
229 zil_kstat_values_t *zs = ksp->ks_data; in zil_kstats_global_update()
253 if (zilog->zl_header->zh_claim_txg == 0) in zil_read_log_block()
256 if (!(zilog->zl_header->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) in zil_read_log_block()
262 SET_BOOKMARK(&zb, bp->blk_cksum.zc_word[ZIL_ZC_OBJSET], in zil_read_log_block()
263 ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); in zil_read_log_block()
265 error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, in zil_read_log_block()
269 zio_cksum_t cksum = bp->blk_cksum; in zil_read_log_block()
283 zil_chain_t *zilc = (*abuf)->b_data; in zil_read_log_block()
286 if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum, in zil_read_log_block()
288 zilc->zc_nused < sizeof (*zilc) || in zil_read_log_block()
289 zilc->zc_nused > size) { in zil_read_log_block()
293 *end = lr + zilc->zc_nused - sizeof (*zilc); in zil_read_log_block()
294 *nbp = zilc->zc_next_blk; in zil_read_log_block()
297 char *lr = (*abuf)->b_data; in zil_read_log_block()
298 zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1; in zil_read_log_block()
300 if (memcmp(&cksum, &zilc->zc_next_blk.blk_cksum, in zil_read_log_block()
302 (zilc->zc_nused > (size - sizeof (*zilc)))) { in zil_read_log_block()
306 *end = lr + zilc->zc_nused; in zil_read_log_block()
307 *nbp = zilc->zc_next_blk; in zil_read_log_block()
322 const blkptr_t *bp = &lr->lr_blkptr; in zil_read_log_data()
330 memset(wbuf, 0, MAX(BP_GET_LSIZE(bp), lr->lr_length)); in zil_read_log_data()
334 if (zilog->zl_header->zh_claim_txg == 0) in zil_read_log_data()
346 SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os), lr->lr_foid, in zil_read_log_data()
347 ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); in zil_read_log_data()
349 error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func, &abuf, in zil_read_log_data()
354 memcpy(wbuf, abuf->b_data, arc_buf_size(abuf)); in zil_read_log_data()
364 wmsum_init(&zs->zil_commit_count, 0); in zil_sums_init()
365 wmsum_init(&zs->zil_commit_writer_count, 0); in zil_sums_init()
366 wmsum_init(&zs->zil_commit_error_count, 0); in zil_sums_init()
367 wmsum_init(&zs->zil_commit_stall_count, 0); in zil_sums_init()
368 wmsum_init(&zs->zil_commit_suspend_count, 0); in zil_sums_init()
369 wmsum_init(&zs->zil_itx_count, 0); in zil_sums_init()
370 wmsum_init(&zs->zil_itx_indirect_count, 0); in zil_sums_init()
371 wmsum_init(&zs->zil_itx_indirect_bytes, 0); in zil_sums_init()
372 wmsum_init(&zs->zil_itx_copied_count, 0); in zil_sums_init()
373 wmsum_init(&zs->zil_itx_copied_bytes, 0); in zil_sums_init()
374 wmsum_init(&zs->zil_itx_needcopy_count, 0); in zil_sums_init()
375 wmsum_init(&zs->zil_itx_needcopy_bytes, 0); in zil_sums_init()
376 wmsum_init(&zs->zil_itx_metaslab_normal_count, 0); in zil_sums_init()
377 wmsum_init(&zs->zil_itx_metaslab_normal_bytes, 0); in zil_sums_init()
378 wmsum_init(&zs->zil_itx_metaslab_normal_write, 0); in zil_sums_init()
379 wmsum_init(&zs->zil_itx_metaslab_normal_alloc, 0); in zil_sums_init()
380 wmsum_init(&zs->zil_itx_metaslab_slog_count, 0); in zil_sums_init()
381 wmsum_init(&zs->zil_itx_metaslab_slog_bytes, 0); in zil_sums_init()
382 wmsum_init(&zs->zil_itx_metaslab_slog_write, 0); in zil_sums_init()
383 wmsum_init(&zs->zil_itx_metaslab_slog_alloc, 0); in zil_sums_init()
389 wmsum_fini(&zs->zil_commit_count); in zil_sums_fini()
390 wmsum_fini(&zs->zil_commit_writer_count); in zil_sums_fini()
391 wmsum_fini(&zs->zil_commit_error_count); in zil_sums_fini()
392 wmsum_fini(&zs->zil_commit_stall_count); in zil_sums_fini()
393 wmsum_fini(&zs->zil_commit_suspend_count); in zil_sums_fini()
394 wmsum_fini(&zs->zil_itx_count); in zil_sums_fini()
395 wmsum_fini(&zs->zil_itx_indirect_count); in zil_sums_fini()
396 wmsum_fini(&zs->zil_itx_indirect_bytes); in zil_sums_fini()
397 wmsum_fini(&zs->zil_itx_copied_count); in zil_sums_fini()
398 wmsum_fini(&zs->zil_itx_copied_bytes); in zil_sums_fini()
399 wmsum_fini(&zs->zil_itx_needcopy_count); in zil_sums_fini()
400 wmsum_fini(&zs->zil_itx_needcopy_bytes); in zil_sums_fini()
401 wmsum_fini(&zs->zil_itx_metaslab_normal_count); in zil_sums_fini()
402 wmsum_fini(&zs->zil_itx_metaslab_normal_bytes); in zil_sums_fini()
403 wmsum_fini(&zs->zil_itx_metaslab_normal_write); in zil_sums_fini()
404 wmsum_fini(&zs->zil_itx_metaslab_normal_alloc); in zil_sums_fini()
405 wmsum_fini(&zs->zil_itx_metaslab_slog_count); in zil_sums_fini()
406 wmsum_fini(&zs->zil_itx_metaslab_slog_bytes); in zil_sums_fini()
407 wmsum_fini(&zs->zil_itx_metaslab_slog_write); in zil_sums_fini()
408 wmsum_fini(&zs->zil_itx_metaslab_slog_alloc); in zil_sums_fini()
414 zs->zil_commit_count.value.ui64 = in zil_kstat_values_update()
415 wmsum_value(&zil_sums->zil_commit_count); in zil_kstat_values_update()
416 zs->zil_commit_writer_count.value.ui64 = in zil_kstat_values_update()
417 wmsum_value(&zil_sums->zil_commit_writer_count); in zil_kstat_values_update()
418 zs->zil_commit_error_count.value.ui64 = in zil_kstat_values_update()
419 wmsum_value(&zil_sums->zil_commit_error_count); in zil_kstat_values_update()
420 zs->zil_commit_stall_count.value.ui64 = in zil_kstat_values_update()
421 wmsum_value(&zil_sums->zil_commit_stall_count); in zil_kstat_values_update()
422 zs->zil_commit_suspend_count.value.ui64 = in zil_kstat_values_update()
423 wmsum_value(&zil_sums->zil_commit_suspend_count); in zil_kstat_values_update()
424 zs->zil_itx_count.value.ui64 = in zil_kstat_values_update()
425 wmsum_value(&zil_sums->zil_itx_count); in zil_kstat_values_update()
426 zs->zil_itx_indirect_count.value.ui64 = in zil_kstat_values_update()
427 wmsum_value(&zil_sums->zil_itx_indirect_count); in zil_kstat_values_update()
428 zs->zil_itx_indirect_bytes.value.ui64 = in zil_kstat_values_update()
429 wmsum_value(&zil_sums->zil_itx_indirect_bytes); in zil_kstat_values_update()
430 zs->zil_itx_copied_count.value.ui64 = in zil_kstat_values_update()
431 wmsum_value(&zil_sums->zil_itx_copied_count); in zil_kstat_values_update()
432 zs->zil_itx_copied_bytes.value.ui64 = in zil_kstat_values_update()
433 wmsum_value(&zil_sums->zil_itx_copied_bytes); in zil_kstat_values_update()
434 zs->zil_itx_needcopy_count.value.ui64 = in zil_kstat_values_update()
435 wmsum_value(&zil_sums->zil_itx_needcopy_count); in zil_kstat_values_update()
436 zs->zil_itx_needcopy_bytes.value.ui64 = in zil_kstat_values_update()
437 wmsum_value(&zil_sums->zil_itx_needcopy_bytes); in zil_kstat_values_update()
438 zs->zil_itx_metaslab_normal_count.value.ui64 = in zil_kstat_values_update()
439 wmsum_value(&zil_sums->zil_itx_metaslab_normal_count); in zil_kstat_values_update()
440 zs->zil_itx_metaslab_normal_bytes.value.ui64 = in zil_kstat_values_update()
441 wmsum_value(&zil_sums->zil_itx_metaslab_normal_bytes); in zil_kstat_values_update()
442 zs->zil_itx_metaslab_normal_write.value.ui64 = in zil_kstat_values_update()
443 wmsum_value(&zil_sums->zil_itx_metaslab_normal_write); in zil_kstat_values_update()
444 zs->zil_itx_metaslab_normal_alloc.value.ui64 = in zil_kstat_values_update()
445 wmsum_value(&zil_sums->zil_itx_metaslab_normal_alloc); in zil_kstat_values_update()
446 zs->zil_itx_metaslab_slog_count.value.ui64 = in zil_kstat_values_update()
447 wmsum_value(&zil_sums->zil_itx_metaslab_slog_count); in zil_kstat_values_update()
448 zs->zil_itx_metaslab_slog_bytes.value.ui64 = in zil_kstat_values_update()
449 wmsum_value(&zil_sums->zil_itx_metaslab_slog_bytes); in zil_kstat_values_update()
450 zs->zil_itx_metaslab_slog_write.value.ui64 = in zil_kstat_values_update()
451 wmsum_value(&zil_sums->zil_itx_metaslab_slog_write); in zil_kstat_values_update()
452 zs->zil_itx_metaslab_slog_alloc.value.ui64 = in zil_kstat_values_update()
453 wmsum_value(&zil_sums->zil_itx_metaslab_slog_alloc); in zil_kstat_values_update()
464 const zil_header_t *zh = zilog->zl_header; in zil_parse()
465 boolean_t claimed = !!zh->zh_claim_txg; in zil_parse()
466 uint64_t claim_blk_seq = claimed ? zh->zh_claim_blk_seq : UINT64_MAX; in zil_parse()
467 uint64_t claim_lr_seq = claimed ? zh->zh_claim_lr_seq : UINT64_MAX; in zil_parse()
478 if (!(zh->zh_flags & ZIL_CLAIM_LR_SEQ_VALID)) in zil_parse()
492 for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) { in zil_parse()
519 dmu_objset_name(zilog->zl_os, name); in zil_parse()
541 reclen = lr->lrc_reclen; in zil_parse()
542 if (reclen < sizeof (lr_t) || reclen > end - lrp) { in zil_parse()
550 if (lr->lrc_seq > claim_lr_seq) { in zil_parse()
560 ASSERT3U(max_lr_seq, <, lr->lrc_seq); in zil_parse()
561 max_lr_seq = lr->lrc_seq; in zil_parse()
567 zilog->zl_parse_error = error; in zil_parse()
568 zilog->zl_parse_blk_seq = max_blk_seq; in zil_parse()
569 zilog->zl_parse_lr_seq = max_lr_seq; in zil_parse()
570 zilog->zl_parse_blk_count = blk_count; in zil_parse()
571 zilog->zl_parse_lr_count = lr_count; in zil_parse()
588 * that we rewind to is invalid. Thus, we return -1 so in zil_clear_log_block()
592 return (-1); in zil_clear_log_block()
597 zio_free(zilog->zl_spa, first_txg, bp); in zil_clear_log_block()
621 return (zio_wait(zio_claim(NULL, zilog->zl_spa, in zil_claim_log_block()
632 ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); in zil_claim_write()
642 if (BP_GET_LOGICAL_BIRTH(&lr->lr_blkptr) >= first_txg) { in zil_claim_write()
648 return (zil_claim_log_block(zilog, &lr->lr_blkptr, tx, first_txg)); in zil_claim_write()
657 spa_t *spa = zilog->zl_spa; in zil_claim_clone_range()
660 ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); in zil_claim_clone_range()
661 ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t, in zil_claim_clone_range()
662 lr_bps[lr->lr_nbps])); in zil_claim_clone_range()
672 for (ii = 0; ii < lr->lr_nbps; ii++) { in zil_claim_clone_range()
673 bp = &lr->lr_bps[ii]; in zil_claim_clone_range()
698 for (ii = 0; ii < lr->lr_nbps; ii++) { in zil_claim_clone_range()
699 bp = &lr->lr_bps[ii]; in zil_claim_clone_range()
712 switch (lrc->lrc_txtype) { in zil_claim_log_record()
728 zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); in zil_free_log_block()
737 blkptr_t *bp = &lr->lr_blkptr; in zil_free_write()
739 ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); in zil_free_write()
746 zio_free(zilog->zl_spa, dmu_tx_get_txg(tx), bp); in zil_free_write()
760 ASSERT3U(lrc->lrc_reclen, >=, sizeof (*lr)); in zil_free_clone_range()
761 ASSERT3U(lrc->lrc_reclen, >=, offsetof(lr_clone_range_t, in zil_free_clone_range()
762 lr_bps[lr->lr_nbps])); in zil_free_clone_range()
768 spa = zilog->zl_spa; in zil_free_clone_range()
770 for (ii = 0; ii < lr->lr_nbps; ii++) { in zil_free_clone_range()
771 bp = &lr->lr_bps[ii]; in zil_free_clone_range()
790 switch (lrc->lrc_txtype) { in zil_free_log_record()
803 const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev; in zil_lwb_vdev_compare()
804 const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev; in zil_lwb_vdev_compare()
821 lwb->lwb_zilog = zilog; in zil_alloc_lwb()
823 lwb->lwb_blk = *bp; in zil_alloc_lwb()
824 lwb->lwb_slim = (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2); in zil_alloc_lwb()
827 BP_ZERO(&lwb->lwb_blk); in zil_alloc_lwb()
828 lwb->lwb_slim = (spa_version(zilog->zl_spa) >= in zil_alloc_lwb()
831 lwb->lwb_slog = slog; in zil_alloc_lwb()
832 lwb->lwb_error = 0; in zil_alloc_lwb()
833 if (lwb->lwb_slim) { in zil_alloc_lwb()
834 lwb->lwb_nmax = sz; in zil_alloc_lwb()
835 lwb->lwb_nused = lwb->lwb_nfilled = sizeof (zil_chain_t); in zil_alloc_lwb()
837 lwb->lwb_nmax = sz - sizeof (zil_chain_t); in zil_alloc_lwb()
838 lwb->lwb_nused = lwb->lwb_nfilled = 0; in zil_alloc_lwb()
840 lwb->lwb_sz = sz; in zil_alloc_lwb()
841 lwb->lwb_state = state; in zil_alloc_lwb()
842 lwb->lwb_buf = zio_buf_alloc(sz); in zil_alloc_lwb()
843 lwb->lwb_child_zio = NULL; in zil_alloc_lwb()
844 lwb->lwb_write_zio = NULL; in zil_alloc_lwb()
845 lwb->lwb_root_zio = NULL; in zil_alloc_lwb()
846 lwb->lwb_issued_timestamp = 0; in zil_alloc_lwb()
847 lwb->lwb_issued_txg = 0; in zil_alloc_lwb()
848 lwb->lwb_alloc_txg = txg; in zil_alloc_lwb()
849 lwb->lwb_max_txg = 0; in zil_alloc_lwb()
851 mutex_enter(&zilog->zl_lock); in zil_alloc_lwb()
852 list_insert_tail(&zilog->zl_lwb_list, lwb); in zil_alloc_lwb()
854 zilog->zl_last_lwb_opened = lwb; in zil_alloc_lwb()
855 mutex_exit(&zilog->zl_lock); in zil_alloc_lwb()
863 ASSERT(MUTEX_HELD(&zilog->zl_lock)); in zil_free_lwb()
864 ASSERT(lwb->lwb_state == LWB_STATE_NEW || in zil_free_lwb()
865 lwb->lwb_state == LWB_STATE_FLUSH_DONE); in zil_free_lwb()
866 ASSERT3P(lwb->lwb_child_zio, ==, NULL); in zil_free_lwb()
867 ASSERT3P(lwb->lwb_write_zio, ==, NULL); in zil_free_lwb()
868 ASSERT3P(lwb->lwb_root_zio, ==, NULL); in zil_free_lwb()
869 ASSERT3U(lwb->lwb_alloc_txg, <=, spa_syncing_txg(zilog->zl_spa)); in zil_free_lwb()
870 ASSERT3U(lwb->lwb_max_txg, <=, spa_syncing_txg(zilog->zl_spa)); in zil_free_lwb()
871 VERIFY(list_is_empty(&lwb->lwb_itxs)); in zil_free_lwb()
872 VERIFY(list_is_empty(&lwb->lwb_waiters)); in zil_free_lwb()
873 ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); in zil_free_lwb()
874 ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock)); in zil_free_lwb()
878 * valid, and prevent use-after-free errors. in zil_free_lwb()
880 if (zilog->zl_last_lwb_opened == lwb) in zil_free_lwb()
881 zilog->zl_last_lwb_opened = NULL; in zil_free_lwb()
887 * Called when we create in-memory log transactions so that we know
893 dsl_pool_t *dp = zilog->zl_dmu_pool; in zilog_dirty()
894 dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); in zilog_dirty()
896 ASSERT(spa_writeable(zilog->zl_spa)); in zilog_dirty()
898 if (ds->ds_is_snapshot) in zilog_dirty()
901 if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) { in zilog_dirty()
903 dmu_buf_add_ref(ds->ds_dbuf, zilog); in zilog_dirty()
905 zilog->zl_dirty_max_txg = MAX(txg, zilog->zl_dirty_max_txg); in zilog_dirty()
919 dsl_pool_t *dp = zilog->zl_dmu_pool; in zilog_is_dirty_in_txg()
921 if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK)) in zilog_is_dirty_in_txg()
933 dsl_pool_t *dp = zilog->zl_dmu_pool; in zilog_is_dirty()
936 if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t)) in zilog_is_dirty()
951 dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); in zil_commit_activate_saxattr_feature()
955 if (spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) && in zil_commit_activate_saxattr_feature()
956 dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL && in zil_commit_activate_saxattr_feature()
958 tx = dmu_tx_create(zilog->zl_os); in zil_commit_activate_saxattr_feature()
963 mutex_enter(&ds->ds_lock); in zil_commit_activate_saxattr_feature()
964 ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] = in zil_commit_activate_saxattr_feature()
966 mutex_exit(&ds->ds_lock); in zil_commit_activate_saxattr_feature()
968 txg_wait_synced(zilog->zl_dmu_pool, txg); in zil_commit_activate_saxattr_feature()
973 * Create an on-disk intent log.
978 const zil_header_t *zh = zilog->zl_header; in zil_create()
985 dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); in zil_create()
991 txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); in zil_create()
993 ASSERT(zh->zh_claim_txg == 0); in zil_create()
994 ASSERT(zh->zh_replay_seq == 0); in zil_create()
996 blk = zh->zh_log; in zil_create()
1000 * - there isn't one already in zil_create()
1001 * - the existing block is the wrong endianness in zil_create()
1004 tx = dmu_tx_create(zilog->zl_os); in zil_create()
1006 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); in zil_create()
1010 zio_free(zilog->zl_spa, txg, &blk); in zil_create()
1014 error = zio_alloc_zil(zilog->zl_spa, zilog->zl_os, txg, &blk, in zil_create()
1036 * need to wait for the feature activation to sync out. in zil_create()
1038 if (spa_feature_is_enabled(zilog->zl_spa, in zil_create()
1039 SPA_FEATURE_ZILSAXATTR) && dmu_objset_type(zilog->zl_os) != in zil_create()
1041 mutex_enter(&ds->ds_lock); in zil_create()
1042 ds->ds_feature_activation[SPA_FEATURE_ZILSAXATTR] = in zil_create()
1044 mutex_exit(&ds->ds_lock); in zil_create()
1048 txg_wait_synced(zilog->zl_dmu_pool, txg); in zil_create()
1056 IMPLY(spa_feature_is_enabled(zilog->zl_spa, SPA_FEATURE_ZILSAXATTR) && in zil_create()
1057 dmu_objset_type(zilog->zl_os) != DMU_OST_ZVOL, in zil_create()
1060 ASSERT(error != 0 || memcmp(&blk, &zh->zh_log, sizeof (blk)) == 0); in zil_create()
1072 * zil_create() and zil_destroy() will wait for any in-progress destroys
1079 const zil_header_t *zh = zilog->zl_header; in zil_destroy()
1087 txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); in zil_destroy()
1089 zilog->zl_old_header = *zh; /* debugging aid */ in zil_destroy()
1091 if (BP_IS_HOLE(&zh->zh_log)) in zil_destroy()
1094 tx = dmu_tx_create(zilog->zl_os); in zil_destroy()
1096 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); in zil_destroy()
1099 mutex_enter(&zilog->zl_lock); in zil_destroy()
1101 ASSERT3U(zilog->zl_destroy_txg, <, txg); in zil_destroy()
1102 zilog->zl_destroy_txg = txg; in zil_destroy()
1103 zilog->zl_keep_first = keep_first; in zil_destroy()
1105 if (!list_is_empty(&zilog->zl_lwb_list)) { in zil_destroy()
1106 ASSERT(zh->zh_claim_txg == 0); in zil_destroy()
1108 while ((lwb = list_remove_head(&zilog->zl_lwb_list)) != NULL) { in zil_destroy()
1109 if (lwb->lwb_buf != NULL) in zil_destroy()
1110 zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); in zil_destroy()
1111 if (!BP_IS_HOLE(&lwb->lwb_blk)) in zil_destroy()
1112 zio_free(zilog->zl_spa, txg, &lwb->lwb_blk); in zil_destroy()
1118 mutex_exit(&zilog->zl_lock); in zil_destroy()
1128 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_destroy_sync()
1130 zil_free_log_record, tx, zilog->zl_header->zh_claim_txg, B_FALSE); in zil_destroy_sync()
1143 error = dmu_objset_own_obj(dp, ds->ds_object, in zil_claim()
1152 (unsigned long long)ds->ds_object, error); in zil_claim()
1160 ASSERT3U(tx->tx_txg, ==, spa_first_txg(zilog->zl_spa)); in zil_claim()
1161 first_txg = spa_min_claim_txg(zilog->zl_spa); in zil_claim()
1190 if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR || in zil_claim()
1191 (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 && in zil_claim()
1192 zh->zh_claim_txg == 0)) { in zil_claim()
1193 if (!BP_IS_HOLE(&zh->zh_log)) { in zil_claim()
1197 BP_ZERO(&zh->zh_log); in zil_claim()
1198 if (os->os_encrypted) in zil_claim()
1199 os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; in zil_claim()
1209 ASSERT3U(first_txg, ==, spa_first_txg(zilog->zl_spa)); in zil_claim()
1218 ASSERT3U(zh->zh_claim_txg, <=, first_txg); in zil_claim()
1219 if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { in zil_claim()
1222 zh->zh_claim_txg = first_txg; in zil_claim()
1223 zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq; in zil_claim()
1224 zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq; in zil_claim()
1225 if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1) in zil_claim()
1226 zh->zh_flags |= ZIL_REPLAY_NEEDED; in zil_claim()
1227 zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID; in zil_claim()
1228 if (os->os_encrypted) in zil_claim()
1229 os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_TRUE; in zil_claim()
1233 ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); in zil_claim()
1257 (unsigned long long)ds->ds_object, error); in zil_check_log_chain()
1262 bp = (blkptr_t *)&zilog->zl_header->zh_log; in zil_check_log_chain()
1275 spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER); in zil_check_log_chain()
1276 vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0])); in zil_check_log_chain()
1277 if (vd->vdev_islog && vdev_is_dead(vd)) in zil_check_log_chain()
1279 spa_config_exit(os->os_spa, SCL_STATE, FTAG); in zil_check_log_chain()
1292 if (zilog->zl_spa->spa_uberblock.ub_checkpoint_txg != 0 && in zil_check_log_chain()
1293 zh->zh_claim_txg == 0) in zil_check_log_chain()
1305 zilog->zl_header->zh_claim_txg ? -1ULL : in zil_check_log_chain()
1306 spa_min_claim_txg(os->os_spa), B_FALSE); in zil_check_log_chain()
1322 mutex_enter(&zcw->zcw_lock); in zil_commit_waiter_skip()
1323 ASSERT3B(zcw->zcw_done, ==, B_FALSE); in zil_commit_waiter_skip()
1324 zcw->zcw_done = B_TRUE; in zil_commit_waiter_skip()
1325 cv_broadcast(&zcw->zcw_cv); in zil_commit_waiter_skip()
1326 mutex_exit(&zcw->zcw_lock); in zil_commit_waiter_skip()
1345 ASSERT(MUTEX_HELD(&lwb->lwb_zilog->zl_issuer_lock)); in zil_commit_waiter_link_lwb()
1346 IMPLY(lwb->lwb_state != LWB_STATE_OPENED, in zil_commit_waiter_link_lwb()
1347 MUTEX_HELD(&lwb->lwb_zilog->zl_lock)); in zil_commit_waiter_link_lwb()
1348 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_NEW); in zil_commit_waiter_link_lwb()
1349 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); in zil_commit_waiter_link_lwb()
1351 ASSERT(!list_link_active(&zcw->zcw_node)); in zil_commit_waiter_link_lwb()
1352 list_insert_tail(&lwb->lwb_waiters, zcw); in zil_commit_waiter_link_lwb()
1353 ASSERT3P(zcw->zcw_lwb, ==, NULL); in zil_commit_waiter_link_lwb()
1354 zcw->zcw_lwb = lwb; in zil_commit_waiter_link_lwb()
1365 ASSERT(!list_link_active(&zcw->zcw_node)); in zil_commit_waiter_link_nolwb()
1367 ASSERT3P(zcw->zcw_lwb, ==, NULL); in zil_commit_waiter_link_nolwb()
1373 avl_tree_t *t = &lwb->lwb_vdev_tree; in zil_lwb_add_block()
1379 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_WRITE_DONE); in zil_lwb_add_block()
1380 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); in zil_lwb_add_block()
1385 mutex_enter(&lwb->lwb_vdev_lock); in zil_lwb_add_block()
1387 zvsearch.zv_vdev = DVA_GET_VDEV(&bp->blk_dva[i]); in zil_lwb_add_block()
1390 zv->zv_vdev = zvsearch.zv_vdev; in zil_lwb_add_block()
1394 mutex_exit(&lwb->lwb_vdev_lock); in zil_lwb_add_block()
1400 avl_tree_t *src = &lwb->lwb_vdev_tree; in zil_lwb_flush_defer()
1401 avl_tree_t *dst = &nlwb->lwb_vdev_tree; in zil_lwb_flush_defer()
1405 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); in zil_lwb_flush_defer()
1406 ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_WRITE_DONE); in zil_lwb_flush_defer()
1407 ASSERT3S(nlwb->lwb_state, !=, LWB_STATE_FLUSH_DONE); in zil_lwb_flush_defer()
1412 * while holding zilog->zl_lock) as its writes and those of its in zil_lwb_flush_defer()
1416 mutex_enter(&nlwb->lwb_vdev_lock); in zil_lwb_flush_defer()
1419 * exist in 'nlwb' are moved to it, freeing any would-be duplicates. in zil_lwb_flush_defer()
1430 mutex_exit(&nlwb->lwb_vdev_lock); in zil_lwb_flush_defer()
1436 lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg); in zil_lwb_add_txg()
1455 lwb_t *lwb = zio->io_private; in zil_lwb_flush_vdevs_done()
1456 zilog_t *zilog = lwb->lwb_zilog; in zil_lwb_flush_vdevs_done()
1460 spa_config_exit(zilog->zl_spa, SCL_STATE, lwb); in zil_lwb_flush_vdevs_done()
1462 hrtime_t t = gethrtime() - lwb->lwb_issued_timestamp; in zil_lwb_flush_vdevs_done()
1464 mutex_enter(&zilog->zl_lock); in zil_lwb_flush_vdevs_done()
1466 zilog->zl_last_lwb_latency = (zilog->zl_last_lwb_latency * 7 + t) / 8; in zil_lwb_flush_vdevs_done()
1468 lwb->lwb_root_zio = NULL; in zil_lwb_flush_vdevs_done()
1470 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); in zil_lwb_flush_vdevs_done()
1471 lwb->lwb_state = LWB_STATE_FLUSH_DONE; in zil_lwb_flush_vdevs_done()
1473 if (zilog->zl_last_lwb_opened == lwb) { in zil_lwb_flush_vdevs_done()
1480 zilog->zl_commit_lr_seq = zilog->zl_lr_seq; in zil_lwb_flush_vdevs_done()
1483 while ((itx = list_remove_head(&lwb->lwb_itxs)) != NULL) in zil_lwb_flush_vdevs_done()
1486 while ((zcw = list_remove_head(&lwb->lwb_waiters)) != NULL) { in zil_lwb_flush_vdevs_done()
1487 mutex_enter(&zcw->zcw_lock); in zil_lwb_flush_vdevs_done()
1489 ASSERT3P(zcw->zcw_lwb, ==, lwb); in zil_lwb_flush_vdevs_done()
1490 zcw->zcw_lwb = NULL; in zil_lwb_flush_vdevs_done()
1506 zcw->zcw_zio_error = zio->io_error; in zil_lwb_flush_vdevs_done()
1508 ASSERT3B(zcw->zcw_done, ==, B_FALSE); in zil_lwb_flush_vdevs_done()
1509 zcw->zcw_done = B_TRUE; in zil_lwb_flush_vdevs_done()
1510 cv_broadcast(&zcw->zcw_cv); in zil_lwb_flush_vdevs_done()
1512 mutex_exit(&zcw->zcw_lock); in zil_lwb_flush_vdevs_done()
1515 uint64_t txg = lwb->lwb_issued_txg; in zil_lwb_flush_vdevs_done()
1518 mutex_exit(&zilog->zl_lock); in zil_lwb_flush_vdevs_done()
1520 mutex_enter(&zilog->zl_lwb_io_lock); in zil_lwb_flush_vdevs_done()
1521 ASSERT3U(zilog->zl_lwb_inflight[txg & TXG_MASK], >, 0); in zil_lwb_flush_vdevs_done()
1522 zilog->zl_lwb_inflight[txg & TXG_MASK]--; in zil_lwb_flush_vdevs_done()
1523 if (zilog->zl_lwb_inflight[txg & TXG_MASK] == 0) in zil_lwb_flush_vdevs_done()
1524 cv_broadcast(&zilog->zl_lwb_io_cv); in zil_lwb_flush_vdevs_done()
1525 mutex_exit(&zilog->zl_lwb_io_lock); in zil_lwb_flush_vdevs_done()
1535 ASSERT3U(txg, ==, spa_syncing_txg(zilog->zl_spa)); in zil_lwb_flush_wait_all()
1537 mutex_enter(&zilog->zl_lwb_io_lock); in zil_lwb_flush_wait_all()
1538 while (zilog->zl_lwb_inflight[txg & TXG_MASK] > 0) in zil_lwb_flush_wait_all()
1539 cv_wait(&zilog->zl_lwb_io_cv, &zilog->zl_lwb_io_lock); in zil_lwb_flush_wait_all()
1540 mutex_exit(&zilog->zl_lwb_io_lock); in zil_lwb_flush_wait_all()
1543 mutex_enter(&zilog->zl_lock); in zil_lwb_flush_wait_all()
1544 mutex_enter(&zilog->zl_lwb_io_lock); in zil_lwb_flush_wait_all()
1545 lwb_t *lwb = list_head(&zilog->zl_lwb_list); in zil_lwb_flush_wait_all()
1547 if (lwb->lwb_issued_txg <= txg) { in zil_lwb_flush_wait_all()
1548 ASSERT(lwb->lwb_state != LWB_STATE_ISSUED); in zil_lwb_flush_wait_all()
1549 ASSERT(lwb->lwb_state != LWB_STATE_WRITE_DONE); in zil_lwb_flush_wait_all()
1550 IMPLY(lwb->lwb_issued_txg > 0, in zil_lwb_flush_wait_all()
1551 lwb->lwb_state == LWB_STATE_FLUSH_DONE); in zil_lwb_flush_wait_all()
1553 IMPLY(lwb->lwb_state == LWB_STATE_WRITE_DONE || in zil_lwb_flush_wait_all()
1554 lwb->lwb_state == LWB_STATE_FLUSH_DONE, in zil_lwb_flush_wait_all()
1555 lwb->lwb_buf == NULL); in zil_lwb_flush_wait_all()
1556 lwb = list_next(&zilog->zl_lwb_list, lwb); in zil_lwb_flush_wait_all()
1558 mutex_exit(&zilog->zl_lwb_io_lock); in zil_lwb_flush_wait_all()
1559 mutex_exit(&zilog->zl_lock); in zil_lwb_flush_wait_all()
1578 lwb_t *lwb = zio->io_private; in zil_lwb_write_done()
1579 spa_t *spa = zio->io_spa; in zil_lwb_write_done()
1580 zilog_t *zilog = lwb->lwb_zilog; in zil_lwb_write_done()
1581 avl_tree_t *t = &lwb->lwb_vdev_tree; in zil_lwb_write_done()
1588 abd_free(zio->io_abd); in zil_lwb_write_done()
1589 zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); in zil_lwb_write_done()
1590 lwb->lwb_buf = NULL; in zil_lwb_write_done()
1592 mutex_enter(&zilog->zl_lock); in zil_lwb_write_done()
1593 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_ISSUED); in zil_lwb_write_done()
1594 lwb->lwb_state = LWB_STATE_WRITE_DONE; in zil_lwb_write_done()
1595 lwb->lwb_child_zio = NULL; in zil_lwb_write_done()
1596 lwb->lwb_write_zio = NULL; in zil_lwb_write_done()
1604 nlwb = list_next(&zilog->zl_lwb_list, lwb); in zil_lwb_write_done()
1605 if (nlwb && nlwb->lwb_state != LWB_STATE_ISSUED) in zil_lwb_write_done()
1607 mutex_exit(&zilog->zl_lock); in zil_lwb_write_done()
1625 if (zio->io_error != 0) { in zil_lwb_write_done()
1641 * with lots of async write activity and few sync write and/or fsync in zil_lwb_write_done()
1645 if (list_is_empty(&lwb->lwb_waiters) && nlwb != NULL) { in zil_lwb_write_done()
1647 ASSERT(avl_is_empty(&lwb->lwb_vdev_tree)); in zil_lwb_write_done()
1652 vdev_t *vd = vdev_lookup_top(spa, zv->zv_vdev); in zil_lwb_write_done()
1662 zio_flush(lwb->lwb_root_zio, vd); in zil_lwb_write_done()
1682 ASSERT(MUTEX_HELD(&zilog->zl_lock)); in zil_lwb_set_zio_dependency()
1684 lwb_t *prev_lwb = list_prev(&zilog->zl_lwb_list, lwb); in zil_lwb_set_zio_dependency()
1686 prev_lwb->lwb_state == LWB_STATE_FLUSH_DONE) in zil_lwb_set_zio_dependency()
1708 if (prev_lwb->lwb_state == LWB_STATE_ISSUED) { in zil_lwb_set_zio_dependency()
1709 ASSERT3P(prev_lwb->lwb_write_zio, !=, NULL); in zil_lwb_set_zio_dependency()
1710 zio_add_child(lwb->lwb_write_zio, prev_lwb->lwb_write_zio); in zil_lwb_set_zio_dependency()
1712 ASSERT3S(prev_lwb->lwb_state, ==, LWB_STATE_WRITE_DONE); in zil_lwb_set_zio_dependency()
1715 ASSERT3P(prev_lwb->lwb_root_zio, !=, NULL); in zil_lwb_set_zio_dependency()
1716 zio_add_child(lwb->lwb_root_zio, prev_lwb->lwb_root_zio); in zil_lwb_set_zio_dependency()
1723 * the passed in lwb has already been opened, it is essentially a no-op.
1728 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_lwb_write_open()
1730 if (lwb->lwb_state != LWB_STATE_NEW) { in zil_lwb_write_open()
1731 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); in zil_lwb_write_open()
1735 mutex_enter(&zilog->zl_lock); in zil_lwb_write_open()
1736 lwb->lwb_state = LWB_STATE_OPENED; in zil_lwb_write_open()
1737 zilog->zl_last_lwb_opened = lwb; in zil_lwb_write_open()
1738 mutex_exit(&zilog->zl_lock); in zil_lwb_write_open()
1754 uint_t md = zilog->zl_max_block_size - sizeof (zil_chain_t); in zil_lwb_plan()
1758 * Small bursts are written as-is in one block. in zil_lwb_plan()
1778 uint_t n = DIV_ROUND_UP(s, md - sizeof (lr_write_t)); in zil_lwb_plan()
1781 waste = MAX(waste, zilog->zl_cur_max); in zil_lwb_plan()
1782 if (chunk <= md - waste) { in zil_lwb_plan()
1783 *minsize = MAX(s - (md - waste) * (n - 1), waste); in zil_lwb_plan()
1803 if (zilog->zl_cur_size > 0) { in zil_lwb_predict()
1804 o = zil_lwb_plan(zilog, zilog->zl_cur_size, &m); in zil_lwb_predict()
1812 o = MIN(o, zilog->zl_prev_opt[i]); in zil_lwb_predict()
1817 m = zilog->zl_prev_min[i]; in zil_lwb_predict()
1827 * If second minimum size gives 50% saving -- use it. It may cost us in zil_lwb_predict()
1842 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_lwb_write_close()
1843 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_OPENED); in zil_lwb_write_close()
1844 lwb->lwb_state = LWB_STATE_CLOSED; in zil_lwb_write_close()
1851 if (lwb->lwb_error != 0) in zil_lwb_write_close()
1855 * Log blocks are pre-allocated. Here we select the size of the next in zil_lwb_write_close()
1861 if (zilog->zl_cur_left > 0) { in zil_lwb_write_close()
1864 * But if workload is multi-threaded there may be more soon. in zil_lwb_write_close()
1868 plan = zil_lwb_plan(zilog, zilog->zl_cur_left, &m); in zil_lwb_write_close()
1869 if (zilog->zl_parallel) { in zil_lwb_write_close()
1870 plan2 = zil_lwb_plan(zilog, zilog->zl_cur_left + in zil_lwb_write_close()
1884 blksz = MIN(blksz, zilog->zl_max_block_size); in zil_lwb_write_close()
1897 spa_t *spa = zilog->zl_spa; in zil_lwb_write_issue()
1904 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED); in zil_lwb_write_issue()
1907 for (itx_t *itx = list_head(&lwb->lwb_itxs); itx; in zil_lwb_write_issue()
1908 itx = list_next(&lwb->lwb_itxs, itx)) in zil_lwb_write_issue()
1910 lwb->lwb_nused = lwb->lwb_nfilled; in zil_lwb_write_issue()
1911 ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax); in zil_lwb_write_issue()
1913 lwb->lwb_root_zio = zio_root(spa, zil_lwb_flush_vdevs_done, lwb, in zil_lwb_write_issue()
1919 * Otherwise leave it as-is, relying on some other thread to issue it in zil_lwb_write_issue()
1923 mutex_enter(&zilog->zl_lock); in zil_lwb_write_issue()
1924 lwb->lwb_state = LWB_STATE_READY; in zil_lwb_write_issue()
1925 if (BP_IS_HOLE(&lwb->lwb_blk) && lwb->lwb_error == 0) { in zil_lwb_write_issue()
1926 mutex_exit(&zilog->zl_lock); in zil_lwb_write_issue()
1929 mutex_exit(&zilog->zl_lock); in zil_lwb_write_issue()
1932 if (lwb->lwb_slim) in zil_lwb_write_issue()
1933 zilc = (zil_chain_t *)lwb->lwb_buf; in zil_lwb_write_issue()
1935 zilc = (zil_chain_t *)(lwb->lwb_buf + lwb->lwb_nmax); in zil_lwb_write_issue()
1936 int wsz = lwb->lwb_sz; in zil_lwb_write_issue()
1937 if (lwb->lwb_error == 0) { in zil_lwb_write_issue()
1938 abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf, lwb->lwb_sz); in zil_lwb_write_issue()
1939 if (!lwb->lwb_slog || zilog->zl_cur_size <= zil_slog_bulk) in zil_lwb_write_issue()
1943 SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET], in zil_lwb_write_issue()
1945 lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]); in zil_lwb_write_issue()
1946 lwb->lwb_write_zio = zio_rewrite(lwb->lwb_root_zio, spa, 0, in zil_lwb_write_issue()
1947 &lwb->lwb_blk, lwb_abd, lwb->lwb_sz, zil_lwb_write_done, in zil_lwb_write_issue()
1949 zil_lwb_add_block(lwb, &lwb->lwb_blk); in zil_lwb_write_issue()
1951 if (lwb->lwb_slim) { in zil_lwb_write_issue()
1953 wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, in zil_lwb_write_issue()
1955 ASSERT3S(wsz, <=, lwb->lwb_sz); in zil_lwb_write_issue()
1956 zio_shrink(lwb->lwb_write_zio, wsz); in zil_lwb_write_issue()
1957 wsz = lwb->lwb_write_zio->io_size; in zil_lwb_write_issue()
1959 memset(lwb->lwb_buf + lwb->lwb_nused, 0, wsz - lwb->lwb_nused); in zil_lwb_write_issue()
1960 zilc->zc_pad = 0; in zil_lwb_write_issue()
1961 zilc->zc_nused = lwb->lwb_nused; in zil_lwb_write_issue()
1962 zilc->zc_eck.zec_cksum = lwb->lwb_blk.blk_cksum; in zil_lwb_write_issue()
1968 lwb->lwb_write_zio = zio_null(lwb->lwb_root_zio, spa, NULL, in zil_lwb_write_issue()
1970 lwb->lwb_write_zio->io_error = lwb->lwb_error; in zil_lwb_write_issue()
1972 if (lwb->lwb_child_zio) in zil_lwb_write_issue()
1973 zio_add_child(lwb->lwb_write_zio, lwb->lwb_child_zio); in zil_lwb_write_issue()
1978 dmu_tx_t *tx = dmu_tx_create(zilog->zl_os); in zil_lwb_write_issue()
1980 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); in zil_lwb_write_issue()
1986 lwb_t *nlwb = list_next(&zilog->zl_lwb_list, lwb); in zil_lwb_write_issue()
1987 blkptr_t *bp = &zilc->zc_next_blk; in zil_lwb_write_issue()
1989 error = lwb->lwb_error; in zil_lwb_write_issue()
1991 error = zio_alloc_zil(spa, zilog->zl_os, txg, bp, nlwb->lwb_sz, in zil_lwb_write_issue()
1996 BP_SET_CHECKSUM(bp, nlwb->lwb_slim ? ZIO_CHECKSUM_ZILOG2 : in zil_lwb_write_issue()
1998 bp->blk_cksum = lwb->lwb_blk.blk_cksum; in zil_lwb_write_issue()
1999 bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; in zil_lwb_write_issue()
2006 mutex_enter(&zilog->zl_lwb_io_lock); in zil_lwb_write_issue()
2007 lwb->lwb_issued_txg = txg; in zil_lwb_write_issue()
2008 zilog->zl_lwb_inflight[txg & TXG_MASK]++; in zil_lwb_write_issue()
2009 zilog->zl_lwb_max_issued_txg = MAX(txg, zilog->zl_lwb_max_issued_txg); in zil_lwb_write_issue()
2010 mutex_exit(&zilog->zl_lwb_io_lock); in zil_lwb_write_issue()
2019 mutex_enter(&zilog->zl_lock); in zil_lwb_write_issue()
2021 lwb->lwb_state = LWB_STATE_ISSUED; in zil_lwb_write_issue()
2024 nlwb->lwb_blk = *bp; in zil_lwb_write_issue()
2025 nlwb->lwb_error = error; in zil_lwb_write_issue()
2026 nlwb->lwb_slog = slog; in zil_lwb_write_issue()
2027 nlwb->lwb_alloc_txg = txg; in zil_lwb_write_issue()
2028 if (nlwb->lwb_state != LWB_STATE_READY) in zil_lwb_write_issue()
2031 mutex_exit(&zilog->zl_lock); in zil_lwb_write_issue()
2033 if (lwb->lwb_slog) { in zil_lwb_write_issue()
2036 lwb->lwb_nused); in zil_lwb_write_issue()
2040 BP_GET_LSIZE(&lwb->lwb_blk)); in zil_lwb_write_issue()
2044 lwb->lwb_nused); in zil_lwb_write_issue()
2048 BP_GET_LSIZE(&lwb->lwb_blk)); in zil_lwb_write_issue()
2050 lwb->lwb_issued_timestamp = gethrtime(); in zil_lwb_write_issue()
2051 if (lwb->lwb_child_zio) in zil_lwb_write_issue()
2052 zio_nowait(lwb->lwb_child_zio); in zil_lwb_write_issue()
2053 zio_nowait(lwb->lwb_write_zio); in zil_lwb_write_issue()
2054 zio_nowait(lwb->lwb_root_zio); in zil_lwb_write_issue()
2071 return (zilog->zl_max_block_size - sizeof (zil_chain_t) - hdrsize); in zil_max_log_data()
2103 lr_t *lr = &itx->itx_lr; in zil_itx_record_size()
2105 if (lr->lrc_txtype == TX_COMMIT) in zil_itx_record_size()
2107 ASSERT3U(lr->lrc_reclen, >=, sizeof (lr_t)); in zil_itx_record_size()
2108 return (lr->lrc_reclen); in zil_itx_record_size()
2114 lr_t *lr = &itx->itx_lr; in zil_itx_data_size()
2117 if (lr->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) { in zil_itx_data_size()
2118 ASSERT3U(lr->lrc_reclen, ==, sizeof (lr_write_t)); in zil_itx_data_size()
2119 return (P2ROUNDUP_TYPED(lrw->lr_length, sizeof (uint64_t), in zil_itx_data_size()
2128 lr_t *lr = &itx->itx_lr; in zil_itx_full_size()
2130 if (lr->lrc_txtype == TX_COMMIT) in zil_itx_full_size()
2132 ASSERT3U(lr->lrc_reclen, >=, sizeof (lr_t)); in zil_itx_full_size()
2133 return (lr->lrc_reclen + zil_itx_data_size(itx)); in zil_itx_full_size()
2150 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_lwb_assign()
2152 ASSERT3P(lwb->lwb_buf, !=, NULL); in zil_lwb_assign()
2156 lr = &itx->itx_lr; in zil_lwb_assign()
2160 * A commit itx doesn't represent any on-disk state; instead in zil_lwb_assign()
2171 if (lr->lrc_txtype == TX_COMMIT) { in zil_lwb_assign()
2172 zil_commit_waiter_link_lwb(itx->itx_private, lwb); in zil_lwb_assign()
2173 list_insert_tail(&lwb->lwb_itxs, itx); in zil_lwb_assign()
2177 reclen = lr->lrc_reclen; in zil_lwb_assign()
2187 lwb_sp = lwb->lwb_nmax - lwb->lwb_nused; in zil_lwb_assign()
2197 lwb_sp = lwb->lwb_nmax - lwb->lwb_nused; in zil_lwb_assign()
2211 dnow = MIN(dlen, lwb_sp - reclen); in zil_lwb_assign()
2213 ASSERT3U(lr->lrc_txtype, ==, TX_WRITE); in zil_lwb_assign()
2214 ASSERT3U(itx->itx_wr_state, ==, WR_NEED_COPY); in zil_lwb_assign()
2216 clr = &citx->itx_lr; in zil_lwb_assign()
2218 clrw->lr_length = dnow; in zil_lwb_assign()
2219 lrw->lr_offset += dnow; in zil_lwb_assign()
2220 lrw->lr_length -= dnow; in zil_lwb_assign()
2221 zilog->zl_cur_left -= dnow; in zil_lwb_assign()
2233 clr->lrc_seq = ++zilog->zl_lr_seq; in zil_lwb_assign()
2235 lwb->lwb_nused += reclen + dnow; in zil_lwb_assign()
2236 ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_nmax); in zil_lwb_assign()
2237 ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t))); in zil_lwb_assign()
2239 zil_lwb_add_txg(lwb, lr->lrc_txg); in zil_lwb_assign()
2240 list_insert_tail(&lwb->lwb_itxs, citx); in zil_lwb_assign()
2242 dlen -= dnow; in zil_lwb_assign()
2246 if (lr->lrc_txtype == TX_WRITE && in zil_lwb_assign()
2247 lr->lrc_txg > spa_freeze_txg(zilog->zl_spa)) in zil_lwb_assign()
2248 txg_wait_synced(zilog->zl_dmu_pool, lr->lrc_txg); in zil_lwb_assign()
2265 lr = &itx->itx_lr; in zil_lwb_commit()
2268 if (lr->lrc_txtype == TX_COMMIT) in zil_lwb_commit()
2271 reclen = lr->lrc_reclen; in zil_lwb_commit()
2273 ASSERT3U(reclen + dlen, <=, lwb->lwb_nused - lwb->lwb_nfilled); in zil_lwb_commit()
2275 lr_buf = lwb->lwb_buf + lwb->lwb_nfilled; in zil_lwb_commit()
2285 if (lr->lrc_txtype == TX_WRITE) { in zil_lwb_commit()
2286 if (itx->itx_wr_state == WR_COPIED) { in zil_lwb_commit()
2289 lrw->lr_length); in zil_lwb_commit()
2294 if (itx->itx_wr_state == WR_NEED_COPY) { in zil_lwb_commit()
2296 lrb->lrc_reclen += dlen; in zil_lwb_commit()
2301 ASSERT3S(itx->itx_wr_state, ==, WR_INDIRECT); in zil_lwb_commit()
2305 lrw->lr_length); in zil_lwb_commit()
2306 if (lwb->lwb_child_zio == NULL) { in zil_lwb_commit()
2307 lwb->lwb_child_zio = zio_null(NULL, in zil_lwb_commit()
2308 zilog->zl_spa, NULL, NULL, NULL, in zil_lwb_commit()
2321 error = zilog->zl_get_data(itx->itx_private, in zil_lwb_commit()
2322 itx->itx_gen, lrwb, dbuf, lwb, in zil_lwb_commit()
2323 lwb->lwb_child_zio); in zil_lwb_commit()
2326 memset((char *)dbuf + lrwb->lr_length, 0, in zil_lwb_commit()
2327 dlen - lrwb->lr_length); in zil_lwb_commit()
2332 * ->zl_get_data() are 0, EIO, ENOENT, EEXIST or in zil_lwb_commit()
2335 * dmu_read() -> dnode_hold() -> dnode_hold_impl() or in zil_lwb_commit()
2337 * block layer through dmu_buf_hold() -> dbuf_read() in zil_lwb_commit()
2338 * -> zio_wait(), as well as through dmu_read() -> in zil_lwb_commit()
2339 * dnode_hold() -> dnode_hold_impl() -> dbuf_read() -> in zil_lwb_commit()
2351 "unexpected error %d from ->zl_get_data()" in zil_lwb_commit()
2356 txg_wait_synced(zilog->zl_dmu_pool, in zil_lwb_commit()
2357 lr->lrc_txg); in zil_lwb_commit()
2369 lwb->lwb_nfilled += reclen + dlen; in zil_lwb_commit()
2370 ASSERT3S(lwb->lwb_nfilled, <=, lwb->lwb_nused); in zil_lwb_commit()
2371 ASSERT0(P2PHASE(lwb->lwb_nfilled, sizeof (uint64_t))); in zil_lwb_commit()
2386 itx->itx_lr.lrc_txtype = txtype; in zil_itx_create()
2387 itx->itx_lr.lrc_reclen = lrsize; in zil_itx_create()
2388 itx->itx_lr.lrc_seq = 0; /* defensive */ in zil_itx_create()
2389 memset((char *)&itx->itx_lr + olrsize, 0, lrsize - olrsize); in zil_itx_create()
2390 itx->itx_sync = B_TRUE; /* default is synchronous */ in zil_itx_create()
2391 itx->itx_callback = NULL; in zil_itx_create()
2392 itx->itx_callback_data = NULL; in zil_itx_create()
2393 itx->itx_size = itxsize; in zil_itx_create()
2401 ASSERT3U(oitx->itx_size, >=, sizeof (itx_t)); in zil_itx_clone()
2402 ASSERT3U(oitx->itx_size, ==, in zil_itx_clone()
2403 offsetof(itx_t, itx_lr) + oitx->itx_lr.lrc_reclen); in zil_itx_clone()
2405 itx_t *itx = zio_data_buf_alloc(oitx->itx_size); in zil_itx_clone()
2406 memcpy(itx, oitx, oitx->itx_size); in zil_itx_clone()
2407 itx->itx_callback = NULL; in zil_itx_clone()
2408 itx->itx_callback_data = NULL; in zil_itx_clone()
2415 ASSERT3U(itx->itx_size, >=, sizeof (itx_t)); in zil_itx_destroy()
2416 ASSERT3U(itx->itx_lr.lrc_reclen, ==, in zil_itx_destroy()
2417 itx->itx_size - offsetof(itx_t, itx_lr)); in zil_itx_destroy()
2418 IMPLY(itx->itx_lr.lrc_txtype == TX_COMMIT, itx->itx_callback == NULL); in zil_itx_destroy()
2419 IMPLY(itx->itx_callback != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT); in zil_itx_destroy()
2421 if (itx->itx_callback != NULL) in zil_itx_destroy()
2422 itx->itx_callback(itx->itx_callback_data); in zil_itx_destroy()
2424 zio_data_buf_free(itx, itx->itx_size); in zil_itx_destroy()
2428 * Free up the sync and async itxs. The itxs_t has already been detached
2441 list = &itxs->i_sync_list; in zil_itxg_clean()
2450 * - a thread calls zil_commit() which assigns the in zil_itxg_clean()
2451 * commit itx to a per-txg i_sync_list in zil_itxg_clean()
2452 * - zil_itxg_clean() is called (e.g. via spa_sync()) in zil_itxg_clean()
2462 if (itx->itx_lr.lrc_txtype == TX_COMMIT) in zil_itxg_clean()
2463 zil_commit_waiter_skip(itx->itx_private); in zil_itxg_clean()
2469 t = &itxs->i_async_tree; in zil_itxg_clean()
2471 list = &ian->ia_list; in zil_itxg_clean()
2474 ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT); in zil_itxg_clean()
2488 const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid; in zil_aitx_compare()
2489 const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid; in zil_aitx_compare()
2510 if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ in zil_remove_async()
2513 otxg = spa_last_synced_txg(zilog->zl_spa) + 1; in zil_remove_async()
2516 itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; in zil_remove_async()
2518 mutex_enter(&itxg->itxg_lock); in zil_remove_async()
2519 if (itxg->itxg_txg != txg) { in zil_remove_async()
2520 mutex_exit(&itxg->itxg_lock); in zil_remove_async()
2527 t = &itxg->itxg_itxs->i_async_tree; in zil_remove_async()
2531 list_move_tail(&clean_list, &ian->ia_list); in zil_remove_async()
2532 mutex_exit(&itxg->itxg_lock); in zil_remove_async()
2536 ASSERT3U(itx->itx_lr.lrc_txtype, !=, TX_COMMIT); in zil_remove_async()
2552 if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME) in zil_itx_assign()
2553 zil_async_to_sync(zilog, itx->itx_oid); in zil_itx_assign()
2555 if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) in zil_itx_assign()
2560 itxg = &zilog->zl_itxg[txg & TXG_MASK]; in zil_itx_assign()
2561 mutex_enter(&itxg->itxg_lock); in zil_itx_assign()
2562 itxs = itxg->itxg_itxs; in zil_itx_assign()
2563 if (itxg->itxg_txg != txg) { in zil_itx_assign()
2571 "txg %llu", (u_longlong_t)itxg->itxg_txg); in zil_itx_assign()
2572 clean = itxg->itxg_itxs; in zil_itx_assign()
2574 itxg->itxg_txg = txg; in zil_itx_assign()
2575 itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t), in zil_itx_assign()
2578 list_create(&itxs->i_sync_list, sizeof (itx_t), in zil_itx_assign()
2580 avl_create(&itxs->i_async_tree, zil_aitx_compare, in zil_itx_assign()
2584 if (itx->itx_sync) { in zil_itx_assign()
2585 list_insert_tail(&itxs->i_sync_list, itx); in zil_itx_assign()
2587 avl_tree_t *t = &itxs->i_async_tree; in zil_itx_assign()
2589 LR_FOID_GET_OBJ(((lr_ooo_t *)&itx->itx_lr)->lr_foid); in zil_itx_assign()
2597 list_create(&ian->ia_list, sizeof (itx_t), in zil_itx_assign()
2599 ian->ia_foid = foid; in zil_itx_assign()
2602 list_insert_tail(&ian->ia_list, itx); in zil_itx_assign()
2605 itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx); in zil_itx_assign()
2614 mutex_exit(&itxg->itxg_lock); in zil_itx_assign()
2622 * If there are any in-memory intent log transactions which have now been
2625 * don't inadvertently clean out in-memory log records that would be required
2631 itxg_t *itxg = &zilog->zl_itxg[synced_txg & TXG_MASK]; in zil_clean()
2636 mutex_enter(&itxg->itxg_lock); in zil_clean()
2637 if (itxg->itxg_itxs == NULL || itxg->itxg_txg == ZILTEST_TXG) { in zil_clean()
2638 mutex_exit(&itxg->itxg_lock); in zil_clean()
2641 ASSERT3U(itxg->itxg_txg, <=, synced_txg); in zil_clean()
2642 ASSERT3U(itxg->itxg_txg, !=, 0); in zil_clean()
2643 clean_me = itxg->itxg_itxs; in zil_clean()
2644 itxg->itxg_itxs = NULL; in zil_clean()
2645 itxg->itxg_txg = 0; in zil_clean()
2646 mutex_exit(&itxg->itxg_lock); in zil_clean()
2650 * free it in-line. This should be rare. Note, using TQ_SLEEP in zil_clean()
2653 ASSERT3P(zilog->zl_dmu_pool, !=, NULL); in zil_clean()
2654 ASSERT3P(zilog->zl_dmu_pool->dp_zil_clean_taskq, !=, NULL); in zil_clean()
2655 taskqid_t id = taskq_dispatch(zilog->zl_dmu_pool->dp_zil_clean_taskq, in zil_clean()
2669 list_t *commit_list = &zilog->zl_itx_commit_list; in zil_get_commit_list()
2671 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_get_commit_list()
2673 if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ in zil_get_commit_list()
2676 otxg = spa_last_synced_txg(zilog->zl_spa) + 1; in zil_get_commit_list()
2684 itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; in zil_get_commit_list()
2686 mutex_enter(&itxg->itxg_lock); in zil_get_commit_list()
2687 if (itxg->itxg_txg != txg) { in zil_get_commit_list()
2688 mutex_exit(&itxg->itxg_lock); in zil_get_commit_list()
2701 spa_freeze_txg(zilog->zl_spa) != UINT64_MAX); in zil_get_commit_list()
2702 list_t *sync_list = &itxg->itxg_itxs->i_sync_list; in zil_get_commit_list()
2704 if (unlikely(zilog->zl_suspend > 0)) { in zil_get_commit_list()
2717 mutex_exit(&itxg->itxg_lock); in zil_get_commit_list()
2721 zilog->zl_cur_size += s; in zil_get_commit_list()
2722 zilog->zl_cur_left += s; in zil_get_commit_list()
2724 zilog->zl_cur_max = MAX(zilog->zl_cur_max, s); in zil_get_commit_list()
2732 * Move the async itxs for a specified object to commit into sync lists.
2742 if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */ in zil_async_to_sync()
2745 otxg = spa_last_synced_txg(zilog->zl_spa) + 1; in zil_async_to_sync()
2752 itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK]; in zil_async_to_sync()
2754 mutex_enter(&itxg->itxg_lock); in zil_async_to_sync()
2755 if (itxg->itxg_txg != txg) { in zil_async_to_sync()
2756 mutex_exit(&itxg->itxg_lock); in zil_async_to_sync()
2763 * to the sync list. We add to the end rather than the in zil_async_to_sync()
2766 t = &itxg->itxg_itxs->i_async_tree; in zil_async_to_sync()
2771 list_move_tail(&itxg->itxg_itxs->i_sync_list, in zil_async_to_sync()
2772 &ian->ia_list); in zil_async_to_sync()
2778 list_move_tail(&itxg->itxg_itxs->i_sync_list, in zil_async_to_sync()
2779 &ian->ia_list); in zil_async_to_sync()
2780 list_destroy(&ian->ia_list); in zil_async_to_sync()
2784 mutex_exit(&itxg->itxg_lock); in zil_async_to_sync()
2790 * commit list (it won't prune past the first non-commit itx), and
2802 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_prune_commit_list()
2804 while ((itx = list_head(&zilog->zl_itx_commit_list)) != NULL) { in zil_prune_commit_list()
2805 lr_t *lrc = &itx->itx_lr; in zil_prune_commit_list()
2806 if (lrc->lrc_txtype != TX_COMMIT) in zil_prune_commit_list()
2809 mutex_enter(&zilog->zl_lock); in zil_prune_commit_list()
2811 lwb_t *last_lwb = zilog->zl_last_lwb_opened; in zil_prune_commit_list()
2813 last_lwb->lwb_state == LWB_STATE_FLUSH_DONE) { in zil_prune_commit_list()
2820 zil_commit_waiter_skip(itx->itx_private); in zil_prune_commit_list()
2822 zil_commit_waiter_link_lwb(itx->itx_private, last_lwb); in zil_prune_commit_list()
2825 mutex_exit(&zilog->zl_lock); in zil_prune_commit_list()
2827 list_remove(&zilog->zl_itx_commit_list, itx); in zil_prune_commit_list()
2831 IMPLY(itx != NULL, itx->itx_lr.lrc_txtype != TX_COMMIT); in zil_prune_commit_list()
2848 * on-disk lwb, that block could be leaked in the event of a in zil_commit_writer_stall()
2849 * crash (because the previous lwb on-disk would not point to in zil_commit_writer_stall()
2857 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_commit_writer_stall()
2859 txg_wait_synced(zilog->zl_dmu_pool, 0); in zil_commit_writer_stall()
2860 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_commit_writer_stall()
2866 if (!list_is_empty(&zilog->zl_itx_commit_list) || in zil_burst_done()
2867 zilog->zl_cur_size == 0) in zil_burst_done()
2870 if (zilog->zl_parallel) in zil_burst_done()
2871 zilog->zl_parallel--; in zil_burst_done()
2873 uint_t r = (zilog->zl_prev_rotor + 1) & (ZIL_BURSTS - 1); in zil_burst_done()
2874 zilog->zl_prev_rotor = r; in zil_burst_done()
2875 zilog->zl_prev_opt[r] = zil_lwb_plan(zilog, zilog->zl_cur_size, in zil_burst_done()
2876 &zilog->zl_prev_min[r]); in zil_burst_done()
2878 zilog->zl_cur_size = 0; in zil_burst_done()
2879 zilog->zl_cur_max = 0; in zil_burst_done()
2880 zilog->zl_cur_left = 0; in zil_burst_done()
2892 spa_t *spa = zilog->zl_spa; in zil_process_commit_list()
2898 ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_process_commit_list()
2904 if (list_is_empty(&zilog->zl_itx_commit_list)) in zil_process_commit_list()
2911 lwb = list_tail(&zilog->zl_lwb_list); in zil_process_commit_list()
2920 ASSERT(lwb->lwb_state == LWB_STATE_NEW || in zil_process_commit_list()
2921 lwb->lwb_state == LWB_STATE_OPENED); in zil_process_commit_list()
2925 * multi-threaded and we won the chance of write aggregation. in zil_process_commit_list()
2927 * flushed, it still means the workload is multi-threaded, but in zil_process_commit_list()
2931 if (lwb->lwb_state == LWB_STATE_OPENED) { in zil_process_commit_list()
2932 zilog->zl_parallel = ZIL_BURSTS; in zil_process_commit_list()
2933 } else if ((plwb = list_prev(&zilog->zl_lwb_list, lwb)) in zil_process_commit_list()
2934 != NULL && plwb->lwb_state != LWB_STATE_FLUSH_DONE) { in zil_process_commit_list()
2935 zilog->zl_parallel = MAX(zilog->zl_parallel, in zil_process_commit_list()
2940 while ((itx = list_remove_head(&zilog->zl_itx_commit_list)) != NULL) { in zil_process_commit_list()
2941 lr_t *lrc = &itx->itx_lr; in zil_process_commit_list()
2942 uint64_t txg = lrc->lrc_txg; in zil_process_commit_list()
2946 if (lrc->lrc_txtype == TX_COMMIT) { in zil_process_commit_list()
2972 * As a counter-example, if we skipped TX_COMMIT itx's in zil_process_commit_list()
2977 * 1. We commit a non-TX_COMMIT itx to an lwb, where the in zil_process_commit_list()
3001 if (frozen || !synced || lrc->lrc_txtype == TX_COMMIT) { in zil_process_commit_list()
3006 } else if ((zcw->zcw_lwb != NULL && in zil_process_commit_list()
3007 zcw->zcw_lwb != lwb) || zcw->zcw_done) { in zil_process_commit_list()
3012 zilog->zl_parallel = ZIL_BURSTS; in zil_process_commit_list()
3013 zilog->zl_cur_left -= in zil_process_commit_list()
3018 if (lrc->lrc_txtype == TX_COMMIT) { in zil_process_commit_list()
3020 itx->itx_private, &nolwb_waiters); in zil_process_commit_list()
3024 zilog->zl_cur_left -= zil_itx_full_size(itx); in zil_process_commit_list()
3026 ASSERT3S(lrc->lrc_txtype, !=, TX_COMMIT); in zil_process_commit_list()
3027 zilog->zl_cur_left -= zil_itx_full_size(itx); in zil_process_commit_list()
3035 * "next" lwb on-disk. When this happens, we must stall in zil_process_commit_list()
3063 ASSERT(lwb->lwb_state == LWB_STATE_NEW || in zil_process_commit_list()
3064 lwb->lwb_state == LWB_STATE_OPENED); in zil_process_commit_list()
3109 if (lwb->lwb_state == LWB_STATE_OPENED && !zilog->zl_parallel) { in zil_process_commit_list()
3127 * commit itx will found in the queue just like the other non-commit
3143 ASSERT(!MUTEX_HELD(&zilog->zl_lock)); in zil_commit_writer()
3144 ASSERT(spa_writeable(zilog->zl_spa)); in zil_commit_writer()
3147 mutex_enter(&zilog->zl_issuer_lock); in zil_commit_writer()
3149 if (zcw->zcw_lwb != NULL || zcw->zcw_done) { in zil_commit_writer()
3164 * and 1024 threads performing sync writes. in zil_commit_writer()
3176 mutex_exit(&zilog->zl_issuer_lock); in zil_commit_writer()
3186 ASSERT(!MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_commit_waiter_timeout()
3187 ASSERT(MUTEX_HELD(&zcw->zcw_lock)); in zil_commit_waiter_timeout()
3188 ASSERT3B(zcw->zcw_done, ==, B_FALSE); in zil_commit_waiter_timeout()
3190 lwb_t *lwb = zcw->zcw_lwb; in zil_commit_waiter_timeout()
3192 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_NEW); in zil_commit_waiter_timeout()
3201 if (lwb->lwb_state != LWB_STATE_OPENED) in zil_commit_waiter_timeout()
3211 mutex_exit(&zcw->zcw_lock); in zil_commit_waiter_timeout()
3212 mutex_enter(&zilog->zl_issuer_lock); in zil_commit_waiter_timeout()
3213 mutex_enter(&zcw->zcw_lock); in zil_commit_waiter_timeout()
3216 * Since we just dropped and re-acquired the commit waiter's in zil_commit_waiter_timeout()
3217 * lock, we have to re-check to see if the waiter was marked in zil_commit_waiter_timeout()
3221 * wind up with a use-after-free error below. in zil_commit_waiter_timeout()
3223 if (zcw->zcw_done) { in zil_commit_waiter_timeout()
3224 mutex_exit(&zilog->zl_issuer_lock); in zil_commit_waiter_timeout()
3228 ASSERT3P(lwb, ==, zcw->zcw_lwb); in zil_commit_waiter_timeout()
3250 if (lwb->lwb_state != LWB_STATE_OPENED) { in zil_commit_waiter_timeout()
3251 mutex_exit(&zilog->zl_issuer_lock); in zil_commit_waiter_timeout()
3262 mutex_exit(&zcw->zcw_lock); in zil_commit_waiter_timeout()
3273 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_CLOSED); in zil_commit_waiter_timeout()
3279 * "next" lwb on-disk. When this occurs, the ZIL write in zil_commit_waiter_timeout()
3285 mutex_exit(&zilog->zl_issuer_lock); in zil_commit_waiter_timeout()
3287 mutex_exit(&zilog->zl_issuer_lock); in zil_commit_waiter_timeout()
3290 mutex_enter(&zcw->zcw_lock); in zil_commit_waiter_timeout()
3315 ASSERT(!MUTEX_HELD(&zilog->zl_lock)); in zil_commit_waiter()
3316 ASSERT(!MUTEX_HELD(&zilog->zl_issuer_lock)); in zil_commit_waiter()
3317 ASSERT(spa_writeable(zilog->zl_spa)); in zil_commit_waiter()
3319 mutex_enter(&zcw->zcw_lock); in zil_commit_waiter()
3322 * The timeout is scaled based on the lwb latency to avoid in zil_commit_waiter()
3328 hrtime_t sleep = (zilog->zl_last_lwb_latency * pct) / 100; in zil_commit_waiter()
3332 while (!zcw->zcw_done) { in zil_commit_waiter()
3333 ASSERT(MUTEX_HELD(&zcw->zcw_lock)); in zil_commit_waiter()
3335 lwb_t *lwb = zcw->zcw_lwb; in zil_commit_waiter()
3338 * Usually, the waiter will have a non-NULL lwb field here, in zil_commit_waiter()
3355 IMPLY(lwb != NULL, lwb->lwb_state != LWB_STATE_NEW); in zil_commit_waiter()
3357 if (lwb != NULL && lwb->lwb_state == LWB_STATE_OPENED) { in zil_commit_waiter()
3367 int rc = cv_timedwait_hires(&zcw->zcw_cv, in zil_commit_waiter()
3368 &zcw->zcw_lock, wakeup, USEC2NSEC(1), in zil_commit_waiter()
3371 if (rc != -1 || zcw->zcw_done) in zil_commit_waiter()
3377 if (!zcw->zcw_done) { in zil_commit_waiter()
3386 ASSERT3P(lwb, ==, zcw->zcw_lwb); in zil_commit_waiter()
3387 ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED); in zil_commit_waiter()
3403 lwb->lwb_state == LWB_STATE_CLOSED || in zil_commit_waiter()
3404 lwb->lwb_state == LWB_STATE_READY || in zil_commit_waiter()
3405 lwb->lwb_state == LWB_STATE_ISSUED || in zil_commit_waiter()
3406 lwb->lwb_state == LWB_STATE_WRITE_DONE || in zil_commit_waiter()
3407 lwb->lwb_state == LWB_STATE_FLUSH_DONE); in zil_commit_waiter()
3408 cv_wait(&zcw->zcw_cv, &zcw->zcw_lock); in zil_commit_waiter()
3412 mutex_exit(&zcw->zcw_lock); in zil_commit_waiter()
3420 cv_init(&zcw->zcw_cv, NULL, CV_DEFAULT, NULL); in zil_alloc_commit_waiter()
3421 mutex_init(&zcw->zcw_lock, NULL, MUTEX_DEFAULT, NULL); in zil_alloc_commit_waiter()
3422 list_link_init(&zcw->zcw_node); in zil_alloc_commit_waiter()
3423 zcw->zcw_lwb = NULL; in zil_alloc_commit_waiter()
3424 zcw->zcw_done = B_FALSE; in zil_alloc_commit_waiter()
3425 zcw->zcw_zio_error = 0; in zil_alloc_commit_waiter()
3433 ASSERT(!list_link_active(&zcw->zcw_node)); in zil_free_commit_waiter()
3434 ASSERT3P(zcw->zcw_lwb, ==, NULL); in zil_free_commit_waiter()
3435 ASSERT3B(zcw->zcw_done, ==, B_TRUE); in zil_free_commit_waiter()
3436 mutex_destroy(&zcw->zcw_lock); in zil_free_commit_waiter()
3437 cv_destroy(&zcw->zcw_cv); in zil_free_commit_waiter()
3450 dmu_tx_t *tx = dmu_tx_create(zilog->zl_os); in zil_commit_itx_assign()
3461 itx->itx_sync = B_TRUE; in zil_commit_itx_assign()
3462 itx->itx_private = zcw; in zil_commit_itx_assign()
3472 * When writing ZIL transactions to the on-disk representation of the
3481 * is written to disk, it becomes an on-disk ZIL block.
3490 * storage prior to zil_commit() returning. If "foid" is non-zero, all
3499 * fine-grained communication. A better interface would allow a consumer
3515 * The commit itx is special; it doesn't have any on-disk representation.
3519 * -- allowing the thread waiting on the waiter to return from zil_commit().
3522 * to make use of the commit itxs, commit waiters, per-lwb lists of
3536 * in-memory linked lists.
3546 * - the order of "sync" itxs is preserved w.r.t. other
3547 * "sync" itxs, regardless of the corresponding objects.
3548 * - the order of "async" itxs is preserved w.r.t. other
3550 * - the order of "async" itxs is *not* preserved w.r.t. other
3552 * - the order of "sync" itxs w.r.t. "async" itxs (or vice
3577 * (leveraging the zio parent-child dependency graph)
3579 * By relying on this parent-child zio relationship, we can have
3592 * in-flight itxs that would have modified the dataset. in zil_commit()
3600 ASSERT3B(dmu_objset_is_snapshot(zilog->zl_os), ==, B_FALSE); in zil_commit()
3602 if (zilog->zl_sync == ZFS_SYNC_DISABLED) in zil_commit()
3605 if (!spa_writeable(zilog->zl_spa)) { in zil_commit()
3613 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_commit()
3614 ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL); in zil_commit()
3616 ASSERT3P(zilog->zl_itxg[i].itxg_itxs, ==, NULL); in zil_commit()
3627 if (zilog->zl_suspend > 0) { in zil_commit()
3629 txg_wait_synced(zilog->zl_dmu_pool, 0); in zil_commit()
3642 * Move the "async" itxs for the specified foid to the "sync" in zil_commit_impl()
3655 * Since the commit itx doesn't represent any on-disk state, in zil_commit_impl()
3673 if (zcw->zcw_zio_error != 0) { in zil_commit_impl()
3685 txg_wait_synced(zilog->zl_dmu_pool, 0); in zil_commit_impl()
3688 txg_wait_synced(zilog->zl_dmu_pool, wtxg); in zil_commit_impl()
3702 spa_t *spa = zilog->zl_spa; in zil_sync()
3703 uint64_t *replayed_seq = &zilog->zl_replayed_seq[txg & TXG_MASK]; in zil_sync()
3715 mutex_enter(&zilog->zl_lock); in zil_sync()
3717 ASSERT(zilog->zl_stop_sync == 0); in zil_sync()
3720 ASSERT(zh->zh_replay_seq < *replayed_seq); in zil_sync()
3721 zh->zh_replay_seq = *replayed_seq; in zil_sync()
3725 if (zilog->zl_destroy_txg == txg) { in zil_sync()
3726 blkptr_t blk = zh->zh_log; in zil_sync()
3727 dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); in zil_sync()
3729 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_sync()
3732 memset(zilog->zl_replayed_seq, 0, in zil_sync()
3733 sizeof (zilog->zl_replayed_seq)); in zil_sync()
3735 if (zilog->zl_keep_first) { in zil_sync()
3745 zh->zh_log = blk; in zil_sync()
3759 while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) { in zil_sync()
3760 zh->zh_log = lwb->lwb_blk; in zil_sync()
3761 if (lwb->lwb_state != LWB_STATE_FLUSH_DONE || in zil_sync()
3762 lwb->lwb_alloc_txg > txg || lwb->lwb_max_txg > txg) in zil_sync()
3764 list_remove(&zilog->zl_lwb_list, lwb); in zil_sync()
3765 if (!BP_IS_HOLE(&lwb->lwb_blk)) in zil_sync()
3766 zio_free(spa, txg, &lwb->lwb_blk); in zil_sync()
3775 if (list_is_empty(&zilog->zl_lwb_list)) in zil_sync()
3776 BP_ZERO(&zh->zh_log); in zil_sync()
3779 mutex_exit(&zilog->zl_lock); in zil_sync()
3787 list_create(&lwb->lwb_itxs, sizeof (itx_t), offsetof(itx_t, itx_node)); in zil_lwb_cons()
3788 list_create(&lwb->lwb_waiters, sizeof (zil_commit_waiter_t), in zil_lwb_cons()
3790 avl_create(&lwb->lwb_vdev_tree, zil_lwb_vdev_compare, in zil_lwb_cons()
3792 mutex_init(&lwb->lwb_vdev_lock, NULL, MUTEX_DEFAULT, NULL); in zil_lwb_cons()
3801 mutex_destroy(&lwb->lwb_vdev_lock); in zil_lwb_dest()
3802 avl_destroy(&lwb->lwb_vdev_tree); in zil_lwb_dest()
3803 list_destroy(&lwb->lwb_waiters); in zil_lwb_dest()
3804 list_destroy(&lwb->lwb_itxs); in zil_lwb_dest()
3822 zil_kstats_global->ks_data = &zil_stats; in zil_init()
3823 zil_kstats_global->ks_update = zil_kstats_global_update; in zil_init()
3824 zil_kstats_global->ks_private = NULL; in zil_init()
3844 zil_set_sync(zilog_t *zilog, uint64_t sync) in zil_set_sync() argument
3846 zilog->zl_sync = sync; in zil_set_sync()
3852 zilog->zl_logbias = logbias; in zil_set_logbias()
3862 zilog->zl_header = zh_phys; in zil_alloc()
3863 zilog->zl_os = os; in zil_alloc()
3864 zilog->zl_spa = dmu_objset_spa(os); in zil_alloc()
3865 zilog->zl_dmu_pool = dmu_objset_pool(os); in zil_alloc()
3866 zilog->zl_destroy_txg = TXG_INITIAL - 1; in zil_alloc()
3867 zilog->zl_logbias = dmu_objset_logbias(os); in zil_alloc()
3868 zilog->zl_sync = dmu_objset_syncprop(os); in zil_alloc()
3869 zilog->zl_dirty_max_txg = 0; in zil_alloc()
3870 zilog->zl_last_lwb_opened = NULL; in zil_alloc()
3871 zilog->zl_last_lwb_latency = 0; in zil_alloc()
3872 zilog->zl_max_block_size = MIN(MAX(P2ALIGN_TYPED(zil_maxblocksize, in zil_alloc()
3876 mutex_init(&zilog->zl_lock, NULL, MUTEX_DEFAULT, NULL); in zil_alloc()
3877 mutex_init(&zilog->zl_issuer_lock, NULL, MUTEX_DEFAULT, NULL); in zil_alloc()
3878 mutex_init(&zilog->zl_lwb_io_lock, NULL, MUTEX_DEFAULT, NULL); in zil_alloc()
3881 mutex_init(&zilog->zl_itxg[i].itxg_lock, NULL, in zil_alloc()
3885 list_create(&zilog->zl_lwb_list, sizeof (lwb_t), in zil_alloc()
3888 list_create(&zilog->zl_itx_commit_list, sizeof (itx_t), in zil_alloc()
3891 cv_init(&zilog->zl_cv_suspend, NULL, CV_DEFAULT, NULL); in zil_alloc()
3892 cv_init(&zilog->zl_lwb_io_cv, NULL, CV_DEFAULT, NULL); in zil_alloc()
3895 zilog->zl_prev_opt[i] = zilog->zl_max_block_size - in zil_alloc()
3907 zilog->zl_stop_sync = 1; in zil_free()
3909 ASSERT0(zilog->zl_suspend); in zil_free()
3910 ASSERT0(zilog->zl_suspending); in zil_free()
3912 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_free()
3913 list_destroy(&zilog->zl_lwb_list); in zil_free()
3915 ASSERT(list_is_empty(&zilog->zl_itx_commit_list)); in zil_free()
3916 list_destroy(&zilog->zl_itx_commit_list); in zil_free()
3926 if (zilog->zl_itxg[i].itxg_itxs) in zil_free()
3927 zil_itxg_clean(zilog->zl_itxg[i].itxg_itxs); in zil_free()
3928 mutex_destroy(&zilog->zl_itxg[i].itxg_lock); in zil_free()
3931 mutex_destroy(&zilog->zl_issuer_lock); in zil_free()
3932 mutex_destroy(&zilog->zl_lock); in zil_free()
3933 mutex_destroy(&zilog->zl_lwb_io_lock); in zil_free()
3935 cv_destroy(&zilog->zl_cv_suspend); in zil_free()
3936 cv_destroy(&zilog->zl_lwb_io_cv); in zil_free()
3949 ASSERT3P(zilog->zl_get_data, ==, NULL); in zil_open()
3950 ASSERT3P(zilog->zl_last_lwb_opened, ==, NULL); in zil_open()
3951 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_open()
3953 zilog->zl_get_data = get_data; in zil_open()
3954 zilog->zl_sums = zil_sums; in zil_open()
3968 if (!dmu_objset_is_snapshot(zilog->zl_os)) { in zil_close()
3971 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_close()
3972 ASSERT0(zilog->zl_dirty_max_txg); in zil_close()
3976 mutex_enter(&zilog->zl_lock); in zil_close()
3977 txg = zilog->zl_dirty_max_txg; in zil_close()
3978 lwb = list_tail(&zilog->zl_lwb_list); in zil_close()
3980 txg = MAX(txg, lwb->lwb_alloc_txg); in zil_close()
3981 txg = MAX(txg, lwb->lwb_max_txg); in zil_close()
3983 mutex_exit(&zilog->zl_lock); in zil_close()
3990 mutex_enter(&zilog->zl_lwb_io_lock); in zil_close()
3991 txg = MAX(zilog->zl_lwb_max_issued_txg, txg); in zil_close()
3992 mutex_exit(&zilog->zl_lwb_io_lock); in zil_close()
4000 txg_wait_synced(zilog->zl_dmu_pool, txg); in zil_close()
4005 if (txg < spa_freeze_txg(zilog->zl_spa)) in zil_close()
4008 zilog->zl_get_data = NULL; in zil_close()
4013 mutex_enter(&zilog->zl_lock); in zil_close()
4014 lwb = list_remove_head(&zilog->zl_lwb_list); in zil_close()
4016 ASSERT(list_is_empty(&zilog->zl_lwb_list)); in zil_close()
4017 ASSERT3S(lwb->lwb_state, ==, LWB_STATE_NEW); in zil_close()
4018 zio_buf_free(lwb->lwb_buf, lwb->lwb_sz); in zil_close()
4021 mutex_exit(&zilog->zl_lock); in zil_close()
4032 * Long holds are not really intended to be used the way we do here --
4039 * Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
4061 mutex_enter(&zilog->zl_lock); in zil_suspend()
4062 zh = zilog->zl_header; in zil_suspend()
4064 if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */ in zil_suspend()
4065 mutex_exit(&zilog->zl_lock); in zil_suspend()
4076 if (cookiep == NULL && !zilog->zl_suspending && in zil_suspend()
4077 (zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) { in zil_suspend()
4078 mutex_exit(&zilog->zl_lock); in zil_suspend()
4086 zilog->zl_suspend++; in zil_suspend()
4088 if (zilog->zl_suspend > 1) { in zil_suspend()
4094 while (zilog->zl_suspending) in zil_suspend()
4095 cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock); in zil_suspend()
4096 mutex_exit(&zilog->zl_lock); in zil_suspend()
4106 * If there is no pointer to an on-disk block, this ZIL must not in zil_suspend()
4110 if (BP_IS_HOLE(&zh->zh_log)) { in zil_suspend()
4114 mutex_exit(&zilog->zl_lock); in zil_suspend()
4124 if (os->os_encrypted && in zil_suspend()
4126 zilog->zl_suspend--; in zil_suspend()
4127 mutex_exit(&zilog->zl_lock); in zil_suspend()
4133 zilog->zl_suspending = B_TRUE; in zil_suspend()
4134 mutex_exit(&zilog->zl_lock); in zil_suspend()
4151 txg_wait_synced(zilog->zl_dmu_pool, 0); in zil_suspend()
4155 mutex_enter(&zilog->zl_lock); in zil_suspend()
4156 zilog->zl_suspending = B_FALSE; in zil_suspend()
4157 cv_broadcast(&zilog->zl_cv_suspend); in zil_suspend()
4158 mutex_exit(&zilog->zl_lock); in zil_suspend()
4160 if (os->os_encrypted) in zil_suspend()
4176 mutex_enter(&zilog->zl_lock); in zil_resume()
4177 ASSERT(zilog->zl_suspend != 0); in zil_resume()
4178 zilog->zl_suspend--; in zil_resume()
4179 mutex_exit(&zilog->zl_lock); in zil_resume()
4196 zilog->zl_replaying_seq--; /* didn't actually replay this one */ in zil_replay_error()
4198 dmu_objset_name(zilog->zl_os, name); in zil_replay_error()
4202 (u_longlong_t)lr->lrc_seq, in zil_replay_error()
4203 (u_longlong_t)(lr->lrc_txtype & ~TX_CI), in zil_replay_error()
4204 (lr->lrc_txtype & TX_CI) ? "CI" : ""); in zil_replay_error()
4214 const zil_header_t *zh = zilog->zl_header; in zil_replay_log_record()
4215 uint64_t reclen = lr->lrc_reclen; in zil_replay_log_record()
4216 uint64_t txtype = lr->lrc_txtype; in zil_replay_log_record()
4219 zilog->zl_replaying_seq = lr->lrc_seq; in zil_replay_log_record()
4221 if (lr->lrc_seq <= zh->zh_replay_seq) /* already replayed */ in zil_replay_log_record()
4224 if (lr->lrc_txg < claim_txg) /* already committed */ in zil_replay_log_record()
4227 /* Strip case-insensitive bit, still present in log record */ in zil_replay_log_record()
4238 error = dmu_object_info(zilog->zl_os, in zil_replay_log_record()
4239 LR_FOID_GET_OBJ(((lr_ooo_t *)lr)->lr_foid), NULL); in zil_replay_log_record()
4247 memcpy(zr->zr_lr, lr, reclen); in zil_replay_log_record()
4254 zr->zr_lr + reclen); in zil_replay_log_record()
4266 if (zr->zr_byteswap) in zil_replay_log_record()
4267 byteswap_uint64_array(zr->zr_lr, reclen); in zil_replay_log_record()
4275 error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap); in zil_replay_log_record()
4284 txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0); in zil_replay_log_record()
4285 error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE); in zil_replay_log_record()
4297 zilog->zl_replay_blks++; in zil_incr_blks()
4303 * If this dataset has a non-empty intent log, replay it and destroy it.
4311 const zil_header_t *zh = zilog->zl_header; in zil_replay()
4314 if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { in zil_replay()
4320 zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); in zil_replay()
4324 * Wait for in-progress removes to sync before starting replay. in zil_replay()
4326 txg_wait_synced(zilog->zl_dmu_pool, 0); in zil_replay()
4328 zilog->zl_replay = B_TRUE; in zil_replay()
4329 zilog->zl_replay_time = ddi_get_lbolt(); in zil_replay()
4330 ASSERT(zilog->zl_replay_blks == 0); in zil_replay()
4332 zh->zh_claim_txg, B_TRUE); in zil_replay()
4336 txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); in zil_replay()
4337 zilog->zl_replay = B_FALSE; in zil_replay()
4345 if (zilog->zl_sync == ZFS_SYNC_DISABLED) in zil_replaying()
4348 if (zilog->zl_replay) { in zil_replaying()
4349 dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); in zil_replaying()
4350 zilog->zl_replayed_seq[dmu_tx_get_txg(tx) & TXG_MASK] = in zil_replaying()
4351 zilog->zl_replaying_seq; in zil_replaying()
4408 "Limit in bytes slog sync writes per commit");