1*61145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy * CDDL HEADER START
4eda14cbcSMatt Macy *
5eda14cbcSMatt Macy * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy *
9eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy * See the License for the specific language governing permissions
12eda14cbcSMatt Macy * and limitations under the License.
13eda14cbcSMatt Macy *
14eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy *
20eda14cbcSMatt Macy * CDDL HEADER END
21eda14cbcSMatt Macy */
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy * Portions Copyright 2011 Martin Matuska
25eda14cbcSMatt Macy * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
26eda14cbcSMatt Macy */
27eda14cbcSMatt Macy
28eda14cbcSMatt Macy #include <sys/zfs_context.h>
29eda14cbcSMatt Macy #include <sys/txg_impl.h>
30eda14cbcSMatt Macy #include <sys/dmu_impl.h>
31eda14cbcSMatt Macy #include <sys/spa_impl.h>
32eda14cbcSMatt Macy #include <sys/dmu_tx.h>
33eda14cbcSMatt Macy #include <sys/dsl_pool.h>
34eda14cbcSMatt Macy #include <sys/dsl_scan.h>
35eda14cbcSMatt Macy #include <sys/zil.h>
36eda14cbcSMatt Macy #include <sys/callb.h>
37eda14cbcSMatt Macy #include <sys/trace_zfs.h>
38eda14cbcSMatt Macy
39eda14cbcSMatt Macy /*
40eda14cbcSMatt Macy * ZFS Transaction Groups
41eda14cbcSMatt Macy * ----------------------
42eda14cbcSMatt Macy *
43eda14cbcSMatt Macy * ZFS transaction groups are, as the name implies, groups of transactions
44eda14cbcSMatt Macy * that act on persistent state. ZFS asserts consistency at the granularity of
45eda14cbcSMatt Macy * these transaction groups. Each successive transaction group (txg) is
46eda14cbcSMatt Macy * assigned a 64-bit consecutive identifier. There are three active
47eda14cbcSMatt Macy * transaction group states: open, quiescing, or syncing. At any given time,
48eda14cbcSMatt Macy * there may be an active txg associated with each state; each active txg may
49eda14cbcSMatt Macy * either be processing, or blocked waiting to enter the next state. There may
50eda14cbcSMatt Macy * be up to three active txgs, and there is always a txg in the open state
51eda14cbcSMatt Macy * (though it may be blocked waiting to enter the quiescing state). In broad
52eda14cbcSMatt Macy * strokes, transactions -- operations that change in-memory structures -- are
53eda14cbcSMatt Macy * accepted into the txg in the open state, and are completed while the txg is
54eda14cbcSMatt Macy * in the open or quiescing states. The accumulated changes are written to
55eda14cbcSMatt Macy * disk in the syncing state.
56eda14cbcSMatt Macy *
57eda14cbcSMatt Macy * Open
58eda14cbcSMatt Macy *
59eda14cbcSMatt Macy * When a new txg becomes active, it first enters the open state. New
60eda14cbcSMatt Macy * transactions -- updates to in-memory structures -- are assigned to the
61eda14cbcSMatt Macy * currently open txg. There is always a txg in the open state so that ZFS can
62eda14cbcSMatt Macy * accept new changes (though the txg may refuse new changes if it has hit
63eda14cbcSMatt Macy * some limit). ZFS advances the open txg to the next state for a variety of
64eda14cbcSMatt Macy * reasons such as it hitting a time or size threshold, or the execution of an
65eda14cbcSMatt Macy * administrative action that must be completed in the syncing state.
66eda14cbcSMatt Macy *
67eda14cbcSMatt Macy * Quiescing
68eda14cbcSMatt Macy *
69eda14cbcSMatt Macy * After a txg exits the open state, it enters the quiescing state. The
70eda14cbcSMatt Macy * quiescing state is intended to provide a buffer between accepting new
71eda14cbcSMatt Macy * transactions in the open state and writing them out to stable storage in
72eda14cbcSMatt Macy * the syncing state. While quiescing, transactions can continue their
73eda14cbcSMatt Macy * operation without delaying either of the other states. Typically, a txg is
74eda14cbcSMatt Macy * in the quiescing state very briefly since the operations are bounded by
75eda14cbcSMatt Macy * software latencies rather than, say, slower I/O latencies. After all
76eda14cbcSMatt Macy * transactions complete, the txg is ready to enter the next state.
77eda14cbcSMatt Macy *
78eda14cbcSMatt Macy * Syncing
79eda14cbcSMatt Macy *
80eda14cbcSMatt Macy * In the syncing state, the in-memory state built up during the open and (to
81eda14cbcSMatt Macy * a lesser degree) the quiescing states is written to stable storage. The
82eda14cbcSMatt Macy * process of writing out modified data can, in turn modify more data. For
83eda14cbcSMatt Macy * example when we write new blocks, we need to allocate space for them; those
84eda14cbcSMatt Macy * allocations modify metadata (space maps)... which themselves must be
85eda14cbcSMatt Macy * written to stable storage. During the sync state, ZFS iterates, writing out
86eda14cbcSMatt Macy * data until it converges and all in-memory changes have been written out.
87eda14cbcSMatt Macy * The first such pass is the largest as it encompasses all the modified user
88eda14cbcSMatt Macy * data (as opposed to filesystem metadata). Subsequent passes typically have
89eda14cbcSMatt Macy * far less data to write as they consist exclusively of filesystem metadata.
90eda14cbcSMatt Macy *
91eda14cbcSMatt Macy * To ensure convergence, after a certain number of passes ZFS begins
92eda14cbcSMatt Macy * overwriting locations on stable storage that had been allocated earlier in
93eda14cbcSMatt Macy * the syncing state (and subsequently freed). ZFS usually allocates new
94eda14cbcSMatt Macy * blocks to optimize for large, continuous, writes. For the syncing state to
95eda14cbcSMatt Macy * converge however it must complete a pass where no new blocks are allocated
96eda14cbcSMatt Macy * since each allocation requires a modification of persistent metadata.
97eda14cbcSMatt Macy * Further, to hasten convergence, after a prescribed number of passes, ZFS
98eda14cbcSMatt Macy * also defers frees, and stops compressing.
99eda14cbcSMatt Macy *
100eda14cbcSMatt Macy * In addition to writing out user data, we must also execute synctasks during
101eda14cbcSMatt Macy * the syncing context. A synctask is the mechanism by which some
102eda14cbcSMatt Macy * administrative activities work such as creating and destroying snapshots or
103eda14cbcSMatt Macy * datasets. Note that when a synctask is initiated it enters the open txg,
104eda14cbcSMatt Macy * and ZFS then pushes that txg as quickly as possible to completion of the
105eda14cbcSMatt Macy * syncing state in order to reduce the latency of the administrative
106eda14cbcSMatt Macy * activity. To complete the syncing state, ZFS writes out a new uberblock,
107eda14cbcSMatt Macy * the root of the tree of blocks that comprise all state stored on the ZFS
108eda14cbcSMatt Macy * pool. Finally, if there is a quiesced txg waiting, we signal that it can
109eda14cbcSMatt Macy * now transition to the syncing state.
110eda14cbcSMatt Macy */
111eda14cbcSMatt Macy
112da5137abSMartin Matuska static __attribute__((noreturn)) void txg_sync_thread(void *arg);
113da5137abSMartin Matuska static __attribute__((noreturn)) void txg_quiesce_thread(void *arg);
114eda14cbcSMatt Macy
115be181ee2SMartin Matuska uint_t zfs_txg_timeout = 5; /* max seconds worth of delta per txg */
116eda14cbcSMatt Macy
117eda14cbcSMatt Macy /*
118eda14cbcSMatt Macy * Prepare the txg subsystem.
119eda14cbcSMatt Macy */
120eda14cbcSMatt Macy void
txg_init(dsl_pool_t * dp,uint64_t txg)121eda14cbcSMatt Macy txg_init(dsl_pool_t *dp, uint64_t txg)
122eda14cbcSMatt Macy {
123eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
124eda14cbcSMatt Macy int c;
125da5137abSMartin Matuska memset(tx, 0, sizeof (tx_state_t));
126eda14cbcSMatt Macy
127eda14cbcSMatt Macy tx->tx_cpu = vmem_zalloc(max_ncpus * sizeof (tx_cpu_t), KM_SLEEP);
128eda14cbcSMatt Macy
129eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
130eda14cbcSMatt Macy int i;
131eda14cbcSMatt Macy
132eda14cbcSMatt Macy mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
133eda14cbcSMatt Macy mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_NOLOCKDEP,
134eda14cbcSMatt Macy NULL);
135eda14cbcSMatt Macy for (i = 0; i < TXG_SIZE; i++) {
136eda14cbcSMatt Macy cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT,
137eda14cbcSMatt Macy NULL);
138eda14cbcSMatt Macy list_create(&tx->tx_cpu[c].tc_callbacks[i],
139eda14cbcSMatt Macy sizeof (dmu_tx_callback_t),
140eda14cbcSMatt Macy offsetof(dmu_tx_callback_t, dcb_node));
141eda14cbcSMatt Macy }
142eda14cbcSMatt Macy }
143eda14cbcSMatt Macy
144eda14cbcSMatt Macy mutex_init(&tx->tx_sync_lock, NULL, MUTEX_DEFAULT, NULL);
145eda14cbcSMatt Macy
146eda14cbcSMatt Macy cv_init(&tx->tx_sync_more_cv, NULL, CV_DEFAULT, NULL);
147eda14cbcSMatt Macy cv_init(&tx->tx_sync_done_cv, NULL, CV_DEFAULT, NULL);
148eda14cbcSMatt Macy cv_init(&tx->tx_quiesce_more_cv, NULL, CV_DEFAULT, NULL);
149eda14cbcSMatt Macy cv_init(&tx->tx_quiesce_done_cv, NULL, CV_DEFAULT, NULL);
150eda14cbcSMatt Macy cv_init(&tx->tx_exit_cv, NULL, CV_DEFAULT, NULL);
151eda14cbcSMatt Macy
152eda14cbcSMatt Macy tx->tx_open_txg = txg;
153eda14cbcSMatt Macy }
154eda14cbcSMatt Macy
155eda14cbcSMatt Macy /*
156eda14cbcSMatt Macy * Close down the txg subsystem.
157eda14cbcSMatt Macy */
158eda14cbcSMatt Macy void
txg_fini(dsl_pool_t * dp)159eda14cbcSMatt Macy txg_fini(dsl_pool_t *dp)
160eda14cbcSMatt Macy {
161eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
162eda14cbcSMatt Macy int c;
163eda14cbcSMatt Macy
164eda14cbcSMatt Macy ASSERT0(tx->tx_threads);
165eda14cbcSMatt Macy
166eda14cbcSMatt Macy mutex_destroy(&tx->tx_sync_lock);
167eda14cbcSMatt Macy
168eda14cbcSMatt Macy cv_destroy(&tx->tx_sync_more_cv);
169eda14cbcSMatt Macy cv_destroy(&tx->tx_sync_done_cv);
170eda14cbcSMatt Macy cv_destroy(&tx->tx_quiesce_more_cv);
171eda14cbcSMatt Macy cv_destroy(&tx->tx_quiesce_done_cv);
172eda14cbcSMatt Macy cv_destroy(&tx->tx_exit_cv);
173eda14cbcSMatt Macy
174eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
175eda14cbcSMatt Macy int i;
176eda14cbcSMatt Macy
177eda14cbcSMatt Macy mutex_destroy(&tx->tx_cpu[c].tc_open_lock);
178eda14cbcSMatt Macy mutex_destroy(&tx->tx_cpu[c].tc_lock);
179eda14cbcSMatt Macy for (i = 0; i < TXG_SIZE; i++) {
180eda14cbcSMatt Macy cv_destroy(&tx->tx_cpu[c].tc_cv[i]);
181eda14cbcSMatt Macy list_destroy(&tx->tx_cpu[c].tc_callbacks[i]);
182eda14cbcSMatt Macy }
183eda14cbcSMatt Macy }
184eda14cbcSMatt Macy
185eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq != NULL)
186eda14cbcSMatt Macy taskq_destroy(tx->tx_commit_cb_taskq);
187eda14cbcSMatt Macy
188eda14cbcSMatt Macy vmem_free(tx->tx_cpu, max_ncpus * sizeof (tx_cpu_t));
189eda14cbcSMatt Macy
190da5137abSMartin Matuska memset(tx, 0, sizeof (tx_state_t));
191eda14cbcSMatt Macy }
192eda14cbcSMatt Macy
193eda14cbcSMatt Macy /*
194eda14cbcSMatt Macy * Start syncing transaction groups.
195eda14cbcSMatt Macy */
196eda14cbcSMatt Macy void
txg_sync_start(dsl_pool_t * dp)197eda14cbcSMatt Macy txg_sync_start(dsl_pool_t *dp)
198eda14cbcSMatt Macy {
199eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
200eda14cbcSMatt Macy
201eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
202eda14cbcSMatt Macy
203eda14cbcSMatt Macy dprintf("pool %p\n", dp);
204eda14cbcSMatt Macy
205eda14cbcSMatt Macy ASSERT0(tx->tx_threads);
206eda14cbcSMatt Macy
207eda14cbcSMatt Macy tx->tx_threads = 2;
208eda14cbcSMatt Macy
209eda14cbcSMatt Macy tx->tx_quiesce_thread = thread_create(NULL, 0, txg_quiesce_thread,
210eda14cbcSMatt Macy dp, 0, &p0, TS_RUN, defclsyspri);
211eda14cbcSMatt Macy
212eda14cbcSMatt Macy /*
213eda14cbcSMatt Macy * The sync thread can need a larger-than-default stack size on
214eda14cbcSMatt Macy * 32-bit x86. This is due in part to nested pools and
215eda14cbcSMatt Macy * scrub_visitbp() recursion.
216eda14cbcSMatt Macy */
217eda14cbcSMatt Macy tx->tx_sync_thread = thread_create(NULL, 0, txg_sync_thread,
218eda14cbcSMatt Macy dp, 0, &p0, TS_RUN, defclsyspri);
219eda14cbcSMatt Macy
220eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
221eda14cbcSMatt Macy }
222eda14cbcSMatt Macy
223eda14cbcSMatt Macy static void
txg_thread_enter(tx_state_t * tx,callb_cpr_t * cpr)224eda14cbcSMatt Macy txg_thread_enter(tx_state_t *tx, callb_cpr_t *cpr)
225eda14cbcSMatt Macy {
226eda14cbcSMatt Macy CALLB_CPR_INIT(cpr, &tx->tx_sync_lock, callb_generic_cpr, FTAG);
227eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
228eda14cbcSMatt Macy }
229eda14cbcSMatt Macy
230eda14cbcSMatt Macy static void
txg_thread_exit(tx_state_t * tx,callb_cpr_t * cpr,kthread_t ** tpp)231eda14cbcSMatt Macy txg_thread_exit(tx_state_t *tx, callb_cpr_t *cpr, kthread_t **tpp)
232eda14cbcSMatt Macy {
233eda14cbcSMatt Macy ASSERT(*tpp != NULL);
234eda14cbcSMatt Macy *tpp = NULL;
235eda14cbcSMatt Macy tx->tx_threads--;
236eda14cbcSMatt Macy cv_broadcast(&tx->tx_exit_cv);
237eda14cbcSMatt Macy CALLB_CPR_EXIT(cpr); /* drops &tx->tx_sync_lock */
238eda14cbcSMatt Macy thread_exit();
239eda14cbcSMatt Macy }
240eda14cbcSMatt Macy
241eda14cbcSMatt Macy static void
txg_thread_wait(tx_state_t * tx,callb_cpr_t * cpr,kcondvar_t * cv,clock_t time)242eda14cbcSMatt Macy txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time)
243eda14cbcSMatt Macy {
244eda14cbcSMatt Macy CALLB_CPR_SAFE_BEGIN(cpr);
245eda14cbcSMatt Macy
246eda14cbcSMatt Macy if (time) {
2472c48331dSMatt Macy (void) cv_timedwait_idle(cv, &tx->tx_sync_lock,
248eda14cbcSMatt Macy ddi_get_lbolt() + time);
249eda14cbcSMatt Macy } else {
2502c48331dSMatt Macy cv_wait_idle(cv, &tx->tx_sync_lock);
251eda14cbcSMatt Macy }
252eda14cbcSMatt Macy
253eda14cbcSMatt Macy CALLB_CPR_SAFE_END(cpr, &tx->tx_sync_lock);
254eda14cbcSMatt Macy }
255eda14cbcSMatt Macy
256eda14cbcSMatt Macy /*
257eda14cbcSMatt Macy * Stop syncing transaction groups.
258eda14cbcSMatt Macy */
259eda14cbcSMatt Macy void
txg_sync_stop(dsl_pool_t * dp)260eda14cbcSMatt Macy txg_sync_stop(dsl_pool_t *dp)
261eda14cbcSMatt Macy {
262eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
263eda14cbcSMatt Macy
264eda14cbcSMatt Macy dprintf("pool %p\n", dp);
265eda14cbcSMatt Macy /*
266eda14cbcSMatt Macy * Finish off any work in progress.
267eda14cbcSMatt Macy */
268eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
269eda14cbcSMatt Macy
270eda14cbcSMatt Macy /*
271eda14cbcSMatt Macy * We need to ensure that we've vacated the deferred metaslab trees.
272eda14cbcSMatt Macy */
273eda14cbcSMatt Macy txg_wait_synced(dp, tx->tx_open_txg + TXG_DEFER_SIZE);
274eda14cbcSMatt Macy
275eda14cbcSMatt Macy /*
276eda14cbcSMatt Macy * Wake all sync threads and wait for them to die.
277eda14cbcSMatt Macy */
278eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
279eda14cbcSMatt Macy
280eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
281eda14cbcSMatt Macy
282eda14cbcSMatt Macy tx->tx_exiting = 1;
283eda14cbcSMatt Macy
284eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
285eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_done_cv);
286eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
287eda14cbcSMatt Macy
288eda14cbcSMatt Macy while (tx->tx_threads != 0)
289eda14cbcSMatt Macy cv_wait(&tx->tx_exit_cv, &tx->tx_sync_lock);
290eda14cbcSMatt Macy
291eda14cbcSMatt Macy tx->tx_exiting = 0;
292eda14cbcSMatt Macy
293eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
294eda14cbcSMatt Macy }
295eda14cbcSMatt Macy
296184c1b94SMartin Matuska /*
297184c1b94SMartin Matuska * Get a handle on the currently open txg and keep it open.
298184c1b94SMartin Matuska *
299184c1b94SMartin Matuska * The txg is guaranteed to stay open until txg_rele_to_quiesce() is called for
300184c1b94SMartin Matuska * the handle. Once txg_rele_to_quiesce() has been called, the txg stays
301184c1b94SMartin Matuska * in quiescing state until txg_rele_to_sync() is called for the handle.
302184c1b94SMartin Matuska *
303184c1b94SMartin Matuska * It is guaranteed that subsequent calls return monotonically increasing
304184c1b94SMartin Matuska * txgs for the same dsl_pool_t. Of course this is not strong monotonicity,
305184c1b94SMartin Matuska * because the same txg can be returned multiple times in a row. This
306184c1b94SMartin Matuska * guarantee holds both for subsequent calls from one thread and for multiple
307184c1b94SMartin Matuska * threads. For example, it is impossible to observe the following sequence
308184c1b94SMartin Matuska * of events:
309184c1b94SMartin Matuska *
310184c1b94SMartin Matuska * Thread 1 Thread 2
311184c1b94SMartin Matuska *
312184c1b94SMartin Matuska * 1 <- txg_hold_open(P, ...)
313184c1b94SMartin Matuska * 2 <- txg_hold_open(P, ...)
314184c1b94SMartin Matuska * 1 <- txg_hold_open(P, ...)
315184c1b94SMartin Matuska *
316184c1b94SMartin Matuska */
317eda14cbcSMatt Macy uint64_t
txg_hold_open(dsl_pool_t * dp,txg_handle_t * th)318eda14cbcSMatt Macy txg_hold_open(dsl_pool_t *dp, txg_handle_t *th)
319eda14cbcSMatt Macy {
320eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
321eda14cbcSMatt Macy tx_cpu_t *tc;
322eda14cbcSMatt Macy uint64_t txg;
323eda14cbcSMatt Macy
324eda14cbcSMatt Macy /*
325eda14cbcSMatt Macy * It appears the processor id is simply used as a "random"
326eda14cbcSMatt Macy * number to index into the array, and there isn't any other
327eda14cbcSMatt Macy * significance to the chosen tx_cpu. Because.. Why not use
328eda14cbcSMatt Macy * the current cpu to index into the array?
329eda14cbcSMatt Macy */
3307877fdebSMatt Macy tc = &tx->tx_cpu[CPU_SEQID_UNSTABLE];
331eda14cbcSMatt Macy
332eda14cbcSMatt Macy mutex_enter(&tc->tc_open_lock);
333eda14cbcSMatt Macy txg = tx->tx_open_txg;
334eda14cbcSMatt Macy
335eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
336eda14cbcSMatt Macy tc->tc_count[txg & TXG_MASK]++;
337eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
338eda14cbcSMatt Macy
339eda14cbcSMatt Macy th->th_cpu = tc;
340eda14cbcSMatt Macy th->th_txg = txg;
341eda14cbcSMatt Macy
342eda14cbcSMatt Macy return (txg);
343eda14cbcSMatt Macy }
344eda14cbcSMatt Macy
345eda14cbcSMatt Macy void
txg_rele_to_quiesce(txg_handle_t * th)346eda14cbcSMatt Macy txg_rele_to_quiesce(txg_handle_t *th)
347eda14cbcSMatt Macy {
348eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
349eda14cbcSMatt Macy
350eda14cbcSMatt Macy ASSERT(!MUTEX_HELD(&tc->tc_lock));
351eda14cbcSMatt Macy mutex_exit(&tc->tc_open_lock);
352eda14cbcSMatt Macy }
353eda14cbcSMatt Macy
354eda14cbcSMatt Macy void
txg_register_callbacks(txg_handle_t * th,list_t * tx_callbacks)355eda14cbcSMatt Macy txg_register_callbacks(txg_handle_t *th, list_t *tx_callbacks)
356eda14cbcSMatt Macy {
357eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
358eda14cbcSMatt Macy int g = th->th_txg & TXG_MASK;
359eda14cbcSMatt Macy
360eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
361eda14cbcSMatt Macy list_move_tail(&tc->tc_callbacks[g], tx_callbacks);
362eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
363eda14cbcSMatt Macy }
364eda14cbcSMatt Macy
365eda14cbcSMatt Macy void
txg_rele_to_sync(txg_handle_t * th)366eda14cbcSMatt Macy txg_rele_to_sync(txg_handle_t *th)
367eda14cbcSMatt Macy {
368eda14cbcSMatt Macy tx_cpu_t *tc = th->th_cpu;
369eda14cbcSMatt Macy int g = th->th_txg & TXG_MASK;
370eda14cbcSMatt Macy
371eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
372eda14cbcSMatt Macy ASSERT(tc->tc_count[g] != 0);
373eda14cbcSMatt Macy if (--tc->tc_count[g] == 0)
374eda14cbcSMatt Macy cv_broadcast(&tc->tc_cv[g]);
375eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
376eda14cbcSMatt Macy
377eda14cbcSMatt Macy th->th_cpu = NULL; /* defensive */
378eda14cbcSMatt Macy }
379eda14cbcSMatt Macy
380eda14cbcSMatt Macy /*
381eda14cbcSMatt Macy * Blocks until all transactions in the group are committed.
382eda14cbcSMatt Macy *
383eda14cbcSMatt Macy * On return, the transaction group has reached a stable state in which it can
384eda14cbcSMatt Macy * then be passed off to the syncing context.
385eda14cbcSMatt Macy */
386eda14cbcSMatt Macy static void
txg_quiesce(dsl_pool_t * dp,uint64_t txg)387eda14cbcSMatt Macy txg_quiesce(dsl_pool_t *dp, uint64_t txg)
388eda14cbcSMatt Macy {
389eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
390eda14cbcSMatt Macy uint64_t tx_open_time;
391eda14cbcSMatt Macy int g = txg & TXG_MASK;
392eda14cbcSMatt Macy int c;
393eda14cbcSMatt Macy
394eda14cbcSMatt Macy /*
395eda14cbcSMatt Macy * Grab all tc_open_locks so nobody else can get into this txg.
396eda14cbcSMatt Macy */
397eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++)
398eda14cbcSMatt Macy mutex_enter(&tx->tx_cpu[c].tc_open_lock);
399eda14cbcSMatt Macy
400eda14cbcSMatt Macy ASSERT(txg == tx->tx_open_txg);
401eda14cbcSMatt Macy tx->tx_open_txg++;
402eda14cbcSMatt Macy tx->tx_open_time = tx_open_time = gethrtime();
403eda14cbcSMatt Macy
404eda14cbcSMatt Macy DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
405eda14cbcSMatt Macy DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
406eda14cbcSMatt Macy
407eda14cbcSMatt Macy /*
408eda14cbcSMatt Macy * Now that we've incremented tx_open_txg, we can let threads
409eda14cbcSMatt Macy * enter the next transaction group.
410eda14cbcSMatt Macy */
411eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++)
412eda14cbcSMatt Macy mutex_exit(&tx->tx_cpu[c].tc_open_lock);
413eda14cbcSMatt Macy
414eda14cbcSMatt Macy spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx_open_time);
415eda14cbcSMatt Macy spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);
416eda14cbcSMatt Macy
417eda14cbcSMatt Macy /*
418184c1b94SMartin Matuska * Quiesce the transaction group by waiting for everyone to
419184c1b94SMartin Matuska * call txg_rele_to_sync() for their open transaction handles.
420eda14cbcSMatt Macy */
421eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
422eda14cbcSMatt Macy tx_cpu_t *tc = &tx->tx_cpu[c];
423eda14cbcSMatt Macy mutex_enter(&tc->tc_lock);
424eda14cbcSMatt Macy while (tc->tc_count[g] != 0)
425eda14cbcSMatt Macy cv_wait(&tc->tc_cv[g], &tc->tc_lock);
426eda14cbcSMatt Macy mutex_exit(&tc->tc_lock);
427eda14cbcSMatt Macy }
428eda14cbcSMatt Macy
429eda14cbcSMatt Macy spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_QUIESCED, gethrtime());
430eda14cbcSMatt Macy }
431eda14cbcSMatt Macy
432eda14cbcSMatt Macy static void
txg_do_callbacks(void * cb_list)433bb2d13b6SMartin Matuska txg_do_callbacks(void *cb_list)
434eda14cbcSMatt Macy {
435eda14cbcSMatt Macy dmu_tx_do_callbacks(cb_list, 0);
436eda14cbcSMatt Macy
437eda14cbcSMatt Macy list_destroy(cb_list);
438eda14cbcSMatt Macy
439eda14cbcSMatt Macy kmem_free(cb_list, sizeof (list_t));
440eda14cbcSMatt Macy }
441eda14cbcSMatt Macy
442eda14cbcSMatt Macy /*
443eda14cbcSMatt Macy * Dispatch the commit callbacks registered on this txg to worker threads.
444eda14cbcSMatt Macy *
445eda14cbcSMatt Macy * If no callbacks are registered for a given TXG, nothing happens.
446eda14cbcSMatt Macy * This function creates a taskq for the associated pool, if needed.
447eda14cbcSMatt Macy */
448eda14cbcSMatt Macy static void
txg_dispatch_callbacks(dsl_pool_t * dp,uint64_t txg)449eda14cbcSMatt Macy txg_dispatch_callbacks(dsl_pool_t *dp, uint64_t txg)
450eda14cbcSMatt Macy {
451eda14cbcSMatt Macy int c;
452eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
453eda14cbcSMatt Macy list_t *cb_list;
454eda14cbcSMatt Macy
455eda14cbcSMatt Macy for (c = 0; c < max_ncpus; c++) {
456eda14cbcSMatt Macy tx_cpu_t *tc = &tx->tx_cpu[c];
457eda14cbcSMatt Macy /*
458eda14cbcSMatt Macy * No need to lock tx_cpu_t at this point, since this can
459eda14cbcSMatt Macy * only be called once a txg has been synced.
460eda14cbcSMatt Macy */
461eda14cbcSMatt Macy
462eda14cbcSMatt Macy int g = txg & TXG_MASK;
463eda14cbcSMatt Macy
464eda14cbcSMatt Macy if (list_is_empty(&tc->tc_callbacks[g]))
465eda14cbcSMatt Macy continue;
466eda14cbcSMatt Macy
467eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq == NULL) {
468eda14cbcSMatt Macy /*
469eda14cbcSMatt Macy * Commit callback taskq hasn't been created yet.
470eda14cbcSMatt Macy */
471eda14cbcSMatt Macy tx->tx_commit_cb_taskq = taskq_create("tx_commit_cb",
4727877fdebSMatt Macy 100, defclsyspri, boot_ncpus, boot_ncpus * 2,
4737877fdebSMatt Macy TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
4747877fdebSMatt Macy TASKQ_THREADS_CPU_PCT);
475eda14cbcSMatt Macy }
476eda14cbcSMatt Macy
477eda14cbcSMatt Macy cb_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
478eda14cbcSMatt Macy list_create(cb_list, sizeof (dmu_tx_callback_t),
479eda14cbcSMatt Macy offsetof(dmu_tx_callback_t, dcb_node));
480eda14cbcSMatt Macy
481eda14cbcSMatt Macy list_move_tail(cb_list, &tc->tc_callbacks[g]);
482eda14cbcSMatt Macy
483bb2d13b6SMartin Matuska (void) taskq_dispatch(tx->tx_commit_cb_taskq,
484eda14cbcSMatt Macy txg_do_callbacks, cb_list, TQ_SLEEP);
485eda14cbcSMatt Macy }
486eda14cbcSMatt Macy }
487eda14cbcSMatt Macy
488eda14cbcSMatt Macy /*
489eda14cbcSMatt Macy * Wait for pending commit callbacks of already-synced transactions to finish
490eda14cbcSMatt Macy * processing.
491eda14cbcSMatt Macy * Calling this function from within a commit callback will deadlock.
492eda14cbcSMatt Macy */
493eda14cbcSMatt Macy void
txg_wait_callbacks(dsl_pool_t * dp)494eda14cbcSMatt Macy txg_wait_callbacks(dsl_pool_t *dp)
495eda14cbcSMatt Macy {
496eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
497eda14cbcSMatt Macy
498eda14cbcSMatt Macy if (tx->tx_commit_cb_taskq != NULL)
499eda14cbcSMatt Macy taskq_wait_outstanding(tx->tx_commit_cb_taskq, 0);
500eda14cbcSMatt Macy }
501eda14cbcSMatt Macy
502eda14cbcSMatt Macy static boolean_t
txg_is_quiescing(dsl_pool_t * dp)503eda14cbcSMatt Macy txg_is_quiescing(dsl_pool_t *dp)
504eda14cbcSMatt Macy {
505eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
506eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
507eda14cbcSMatt Macy return (tx->tx_quiescing_txg != 0);
508eda14cbcSMatt Macy }
509eda14cbcSMatt Macy
510eda14cbcSMatt Macy static boolean_t
txg_has_quiesced_to_sync(dsl_pool_t * dp)511eda14cbcSMatt Macy txg_has_quiesced_to_sync(dsl_pool_t *dp)
512eda14cbcSMatt Macy {
513eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
514eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tx->tx_sync_lock));
515eda14cbcSMatt Macy return (tx->tx_quiesced_txg != 0);
516eda14cbcSMatt Macy }
517eda14cbcSMatt Macy
518da5137abSMartin Matuska static __attribute__((noreturn)) void
txg_sync_thread(void * arg)519eda14cbcSMatt Macy txg_sync_thread(void *arg)
520eda14cbcSMatt Macy {
521eda14cbcSMatt Macy dsl_pool_t *dp = arg;
522eda14cbcSMatt Macy spa_t *spa = dp->dp_spa;
523eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
524eda14cbcSMatt Macy callb_cpr_t cpr;
525eda14cbcSMatt Macy clock_t start, delta;
526eda14cbcSMatt Macy
527eda14cbcSMatt Macy (void) spl_fstrans_mark();
528eda14cbcSMatt Macy txg_thread_enter(tx, &cpr);
529eda14cbcSMatt Macy
530eda14cbcSMatt Macy start = delta = 0;
531eda14cbcSMatt Macy for (;;) {
532eda14cbcSMatt Macy clock_t timeout = zfs_txg_timeout * hz;
533eda14cbcSMatt Macy clock_t timer;
534eda14cbcSMatt Macy uint64_t txg;
535eda14cbcSMatt Macy
536eda14cbcSMatt Macy /*
537eda14cbcSMatt Macy * We sync when we're scanning, there's someone waiting
538eda14cbcSMatt Macy * on us, or the quiesce thread has handed off a txg to
539eda14cbcSMatt Macy * us, or we have reached our timeout.
540eda14cbcSMatt Macy */
541eda14cbcSMatt Macy timer = (delta >= timeout ? 0 : timeout - delta);
542eda14cbcSMatt Macy while (!dsl_scan_active(dp->dp_scan) &&
543eda14cbcSMatt Macy !tx->tx_exiting && timer > 0 &&
544eda14cbcSMatt Macy tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
5457cd22ac4SMartin Matuska !txg_has_quiesced_to_sync(dp)) {
546eda14cbcSMatt Macy dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
54733b8c039SMartin Matuska (u_longlong_t)tx->tx_synced_txg,
54833b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting, dp);
549eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
550eda14cbcSMatt Macy delta = ddi_get_lbolt() - start;
551eda14cbcSMatt Macy timer = (delta > timeout ? 0 : timeout - delta);
552eda14cbcSMatt Macy }
553eda14cbcSMatt Macy
554eda14cbcSMatt Macy /*
555b985c9caSMartin Matuska * When we're suspended, nothing should be changing and for
556b985c9caSMartin Matuska * MMP we don't want to bump anything that would make it
557b985c9caSMartin Matuska * harder to detect if another host is changing it when
558b985c9caSMartin Matuska * resuming after a MMP suspend.
559b985c9caSMartin Matuska */
560b985c9caSMartin Matuska if (spa_suspended(spa))
561b985c9caSMartin Matuska continue;
562b985c9caSMartin Matuska
563b985c9caSMartin Matuska /*
564eda14cbcSMatt Macy * Wait until the quiesce thread hands off a txg to us,
565eda14cbcSMatt Macy * prompting it to do so if necessary.
566eda14cbcSMatt Macy */
567eda14cbcSMatt Macy while (!tx->tx_exiting && !txg_has_quiesced_to_sync(dp)) {
5687cd22ac4SMartin Matuska if (txg_is_quiescing(dp)) {
5697cd22ac4SMartin Matuska txg_thread_wait(tx, &cpr,
5707cd22ac4SMartin Matuska &tx->tx_quiesce_done_cv, 0);
5717cd22ac4SMartin Matuska continue;
5727cd22ac4SMartin Matuska }
573eda14cbcSMatt Macy if (tx->tx_quiesce_txg_waiting < tx->tx_open_txg+1)
574eda14cbcSMatt Macy tx->tx_quiesce_txg_waiting = tx->tx_open_txg+1;
575eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
576eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
577eda14cbcSMatt Macy }
578eda14cbcSMatt Macy
579eda14cbcSMatt Macy if (tx->tx_exiting)
580eda14cbcSMatt Macy txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
581eda14cbcSMatt Macy
582eda14cbcSMatt Macy /*
583eda14cbcSMatt Macy * Consume the quiesced txg which has been handed off to
584eda14cbcSMatt Macy * us. This may cause the quiescing thread to now be
585eda14cbcSMatt Macy * able to quiesce another txg, so we must signal it.
586eda14cbcSMatt Macy */
587eda14cbcSMatt Macy ASSERT(tx->tx_quiesced_txg != 0);
588eda14cbcSMatt Macy txg = tx->tx_quiesced_txg;
589eda14cbcSMatt Macy tx->tx_quiesced_txg = 0;
590eda14cbcSMatt Macy tx->tx_syncing_txg = txg;
591eda14cbcSMatt Macy DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
592eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
593eda14cbcSMatt Macy
594eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
59533b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
59633b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
597eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
598eda14cbcSMatt Macy
599eda14cbcSMatt Macy txg_stat_t *ts = spa_txg_history_init_io(spa, txg, dp);
600eda14cbcSMatt Macy start = ddi_get_lbolt();
601eda14cbcSMatt Macy spa_sync(spa, txg);
602eda14cbcSMatt Macy delta = ddi_get_lbolt() - start;
603eda14cbcSMatt Macy spa_txg_history_fini_io(spa, ts);
604eda14cbcSMatt Macy
605eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
606eda14cbcSMatt Macy tx->tx_synced_txg = txg;
607eda14cbcSMatt Macy tx->tx_syncing_txg = 0;
608eda14cbcSMatt Macy DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg);
609eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_done_cv);
610eda14cbcSMatt Macy
611eda14cbcSMatt Macy /*
612eda14cbcSMatt Macy * Dispatch commit callbacks to worker threads.
613eda14cbcSMatt Macy */
614eda14cbcSMatt Macy txg_dispatch_callbacks(dp, txg);
615eda14cbcSMatt Macy }
616eda14cbcSMatt Macy }
617eda14cbcSMatt Macy
618da5137abSMartin Matuska static __attribute__((noreturn)) void
txg_quiesce_thread(void * arg)619eda14cbcSMatt Macy txg_quiesce_thread(void *arg)
620eda14cbcSMatt Macy {
621eda14cbcSMatt Macy dsl_pool_t *dp = arg;
622eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
623eda14cbcSMatt Macy callb_cpr_t cpr;
624eda14cbcSMatt Macy
625eda14cbcSMatt Macy txg_thread_enter(tx, &cpr);
626eda14cbcSMatt Macy
627eda14cbcSMatt Macy for (;;) {
628eda14cbcSMatt Macy uint64_t txg;
629eda14cbcSMatt Macy
630eda14cbcSMatt Macy /*
631eda14cbcSMatt Macy * We quiesce when there's someone waiting on us.
632eda14cbcSMatt Macy * However, we can only have one txg in "quiescing" or
633eda14cbcSMatt Macy * "quiesced, waiting to sync" state. So we wait until
634eda14cbcSMatt Macy * the "quiesced, waiting to sync" txg has been consumed
635eda14cbcSMatt Macy * by the sync thread.
636eda14cbcSMatt Macy */
637eda14cbcSMatt Macy while (!tx->tx_exiting &&
638eda14cbcSMatt Macy (tx->tx_open_txg >= tx->tx_quiesce_txg_waiting ||
639eda14cbcSMatt Macy txg_has_quiesced_to_sync(dp)))
640eda14cbcSMatt Macy txg_thread_wait(tx, &cpr, &tx->tx_quiesce_more_cv, 0);
641eda14cbcSMatt Macy
642eda14cbcSMatt Macy if (tx->tx_exiting)
643eda14cbcSMatt Macy txg_thread_exit(tx, &cpr, &tx->tx_quiesce_thread);
644eda14cbcSMatt Macy
645eda14cbcSMatt Macy txg = tx->tx_open_txg;
646eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
64733b8c039SMartin Matuska (u_longlong_t)txg,
64833b8c039SMartin Matuska (u_longlong_t)tx->tx_quiesce_txg_waiting,
64933b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
650eda14cbcSMatt Macy tx->tx_quiescing_txg = txg;
651eda14cbcSMatt Macy
652eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
653eda14cbcSMatt Macy txg_quiesce(dp, txg);
654eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
655eda14cbcSMatt Macy
656eda14cbcSMatt Macy /*
657eda14cbcSMatt Macy * Hand this txg off to the sync thread.
658eda14cbcSMatt Macy */
65933b8c039SMartin Matuska dprintf("quiesce done, handing off txg %llu\n",
66033b8c039SMartin Matuska (u_longlong_t)txg);
661eda14cbcSMatt Macy tx->tx_quiescing_txg = 0;
662eda14cbcSMatt Macy tx->tx_quiesced_txg = txg;
663eda14cbcSMatt Macy DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg);
664eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
665eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_done_cv);
666eda14cbcSMatt Macy }
667eda14cbcSMatt Macy }
668eda14cbcSMatt Macy
669eda14cbcSMatt Macy /*
670eda14cbcSMatt Macy * Delay this thread by delay nanoseconds if we are still in the open
671eda14cbcSMatt Macy * transaction group and there is already a waiting txg quiescing or quiesced.
672eda14cbcSMatt Macy * Abort the delay if this txg stalls or enters the quiescing state.
673eda14cbcSMatt Macy */
674eda14cbcSMatt Macy void
txg_delay(dsl_pool_t * dp,uint64_t txg,hrtime_t delay,hrtime_t resolution)675eda14cbcSMatt Macy txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
676eda14cbcSMatt Macy {
677eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
678eda14cbcSMatt Macy hrtime_t start = gethrtime();
679eda14cbcSMatt Macy
680eda14cbcSMatt Macy /* don't delay if this txg could transition to quiescing immediately */
681eda14cbcSMatt Macy if (tx->tx_open_txg > txg ||
682eda14cbcSMatt Macy tx->tx_syncing_txg == txg-1 || tx->tx_synced_txg == txg-1)
683eda14cbcSMatt Macy return;
684eda14cbcSMatt Macy
685eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
686eda14cbcSMatt Macy if (tx->tx_open_txg > txg || tx->tx_synced_txg == txg-1) {
687eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
688eda14cbcSMatt Macy return;
689eda14cbcSMatt Macy }
690eda14cbcSMatt Macy
691eda14cbcSMatt Macy while (gethrtime() - start < delay &&
692eda14cbcSMatt Macy tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) {
693eda14cbcSMatt Macy (void) cv_timedwait_hires(&tx->tx_quiesce_more_cv,
694eda14cbcSMatt Macy &tx->tx_sync_lock, delay, resolution, 0);
695eda14cbcSMatt Macy }
696eda14cbcSMatt Macy
697eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_delay);
698eda14cbcSMatt Macy
699eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
700eda14cbcSMatt Macy }
701eda14cbcSMatt Macy
702eda14cbcSMatt Macy static boolean_t
txg_wait_synced_impl(dsl_pool_t * dp,uint64_t txg,boolean_t wait_sig)703eda14cbcSMatt Macy txg_wait_synced_impl(dsl_pool_t *dp, uint64_t txg, boolean_t wait_sig)
704eda14cbcSMatt Macy {
705eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
706eda14cbcSMatt Macy
707eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
708eda14cbcSMatt Macy
709eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
710eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
711eda14cbcSMatt Macy if (txg == 0)
712eda14cbcSMatt Macy txg = tx->tx_open_txg + TXG_DEFER_SIZE;
713eda14cbcSMatt Macy if (tx->tx_sync_txg_waiting < txg)
714eda14cbcSMatt Macy tx->tx_sync_txg_waiting = txg;
715eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
71633b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
71733b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
718eda14cbcSMatt Macy while (tx->tx_synced_txg < txg) {
719eda14cbcSMatt Macy dprintf("broadcasting sync more "
720eda14cbcSMatt Macy "tx_synced=%llu waiting=%llu dp=%px\n",
72133b8c039SMartin Matuska (u_longlong_t)tx->tx_synced_txg,
72233b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting, dp);
723eda14cbcSMatt Macy cv_broadcast(&tx->tx_sync_more_cv);
724eda14cbcSMatt Macy if (wait_sig) {
725eda14cbcSMatt Macy /*
726eda14cbcSMatt Macy * Condition wait here but stop if the thread receives a
727eda14cbcSMatt Macy * signal. The caller may call txg_wait_synced*() again
728eda14cbcSMatt Macy * to resume waiting for this txg.
729eda14cbcSMatt Macy */
730eda14cbcSMatt Macy if (cv_wait_io_sig(&tx->tx_sync_done_cv,
731eda14cbcSMatt Macy &tx->tx_sync_lock) == 0) {
732eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
733eda14cbcSMatt Macy return (B_TRUE);
734eda14cbcSMatt Macy }
735eda14cbcSMatt Macy } else {
736eda14cbcSMatt Macy cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock);
737eda14cbcSMatt Macy }
738eda14cbcSMatt Macy }
739eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
740eda14cbcSMatt Macy return (B_FALSE);
741eda14cbcSMatt Macy }
742eda14cbcSMatt Macy
743eda14cbcSMatt Macy void
txg_wait_synced(dsl_pool_t * dp,uint64_t txg)744eda14cbcSMatt Macy txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
745eda14cbcSMatt Macy {
746eda14cbcSMatt Macy VERIFY0(txg_wait_synced_impl(dp, txg, B_FALSE));
747eda14cbcSMatt Macy }
748eda14cbcSMatt Macy
749eda14cbcSMatt Macy /*
750eda14cbcSMatt Macy * Similar to a txg_wait_synced but it can be interrupted from a signal.
751eda14cbcSMatt Macy * Returns B_TRUE if the thread was signaled while waiting.
752eda14cbcSMatt Macy */
753eda14cbcSMatt Macy boolean_t
txg_wait_synced_sig(dsl_pool_t * dp,uint64_t txg)754eda14cbcSMatt Macy txg_wait_synced_sig(dsl_pool_t *dp, uint64_t txg)
755eda14cbcSMatt Macy {
756eda14cbcSMatt Macy return (txg_wait_synced_impl(dp, txg, B_TRUE));
757eda14cbcSMatt Macy }
758eda14cbcSMatt Macy
759eda14cbcSMatt Macy /*
760eda14cbcSMatt Macy * Wait for the specified open transaction group. Set should_quiesce
761eda14cbcSMatt Macy * when the current open txg should be quiesced immediately.
762eda14cbcSMatt Macy */
763eda14cbcSMatt Macy void
txg_wait_open(dsl_pool_t * dp,uint64_t txg,boolean_t should_quiesce)764eda14cbcSMatt Macy txg_wait_open(dsl_pool_t *dp, uint64_t txg, boolean_t should_quiesce)
765eda14cbcSMatt Macy {
766eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
767eda14cbcSMatt Macy
768eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
769eda14cbcSMatt Macy
770eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
771eda14cbcSMatt Macy ASSERT3U(tx->tx_threads, ==, 2);
772eda14cbcSMatt Macy if (txg == 0)
773eda14cbcSMatt Macy txg = tx->tx_open_txg + 1;
774eda14cbcSMatt Macy if (tx->tx_quiesce_txg_waiting < txg && should_quiesce)
775eda14cbcSMatt Macy tx->tx_quiesce_txg_waiting = txg;
776eda14cbcSMatt Macy dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
77733b8c039SMartin Matuska (u_longlong_t)txg, (u_longlong_t)tx->tx_quiesce_txg_waiting,
77833b8c039SMartin Matuska (u_longlong_t)tx->tx_sync_txg_waiting);
779eda14cbcSMatt Macy while (tx->tx_open_txg < txg) {
780eda14cbcSMatt Macy cv_broadcast(&tx->tx_quiesce_more_cv);
781eda14cbcSMatt Macy /*
782eda14cbcSMatt Macy * Callers setting should_quiesce will use cv_wait_io() and
783eda14cbcSMatt Macy * be accounted for as iowait time. Otherwise, the caller is
784eda14cbcSMatt Macy * understood to be idle and cv_wait_sig() is used to prevent
785eda14cbcSMatt Macy * incorrectly inflating the system load average.
786eda14cbcSMatt Macy */
787eda14cbcSMatt Macy if (should_quiesce == B_TRUE) {
788eda14cbcSMatt Macy cv_wait_io(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
789eda14cbcSMatt Macy } else {
7902c48331dSMatt Macy cv_wait_idle(&tx->tx_quiesce_done_cv,
7912c48331dSMatt Macy &tx->tx_sync_lock);
792eda14cbcSMatt Macy }
793eda14cbcSMatt Macy }
794eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
795eda14cbcSMatt Macy }
796eda14cbcSMatt Macy
797eda14cbcSMatt Macy /*
7987cd22ac4SMartin Matuska * Pass in the txg number that should be synced.
799eda14cbcSMatt Macy */
800eda14cbcSMatt Macy void
txg_kick(dsl_pool_t * dp,uint64_t txg)8017cd22ac4SMartin Matuska txg_kick(dsl_pool_t *dp, uint64_t txg)
802eda14cbcSMatt Macy {
803eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
804eda14cbcSMatt Macy
805eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dp));
806eda14cbcSMatt Macy
8077cd22ac4SMartin Matuska if (tx->tx_sync_txg_waiting >= txg)
8087cd22ac4SMartin Matuska return;
8097cd22ac4SMartin Matuska
810eda14cbcSMatt Macy mutex_enter(&tx->tx_sync_lock);
8117cd22ac4SMartin Matuska if (tx->tx_sync_txg_waiting < txg) {
8127cd22ac4SMartin Matuska tx->tx_sync_txg_waiting = txg;
8137cd22ac4SMartin Matuska cv_broadcast(&tx->tx_sync_more_cv);
814eda14cbcSMatt Macy }
815eda14cbcSMatt Macy mutex_exit(&tx->tx_sync_lock);
816eda14cbcSMatt Macy }
817eda14cbcSMatt Macy
818eda14cbcSMatt Macy boolean_t
txg_stalled(dsl_pool_t * dp)819eda14cbcSMatt Macy txg_stalled(dsl_pool_t *dp)
820eda14cbcSMatt Macy {
821eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
822eda14cbcSMatt Macy return (tx->tx_quiesce_txg_waiting > tx->tx_open_txg);
823eda14cbcSMatt Macy }
824eda14cbcSMatt Macy
825eda14cbcSMatt Macy boolean_t
txg_sync_waiting(dsl_pool_t * dp)826eda14cbcSMatt Macy txg_sync_waiting(dsl_pool_t *dp)
827eda14cbcSMatt Macy {
828eda14cbcSMatt Macy tx_state_t *tx = &dp->dp_tx;
829eda14cbcSMatt Macy
830eda14cbcSMatt Macy return (tx->tx_syncing_txg <= tx->tx_sync_txg_waiting ||
831eda14cbcSMatt Macy tx->tx_quiesced_txg != 0);
832eda14cbcSMatt Macy }
833eda14cbcSMatt Macy
834eda14cbcSMatt Macy /*
835eda14cbcSMatt Macy * Verify that this txg is active (open, quiescing, syncing). Non-active
836eda14cbcSMatt Macy * txg's should not be manipulated.
837eda14cbcSMatt Macy */
838eda14cbcSMatt Macy #ifdef ZFS_DEBUG
839eda14cbcSMatt Macy void
txg_verify(spa_t * spa,uint64_t txg)840eda14cbcSMatt Macy txg_verify(spa_t *spa, uint64_t txg)
841eda14cbcSMatt Macy {
842eda14cbcSMatt Macy dsl_pool_t *dp __maybe_unused = spa_get_dsl(spa);
843eda14cbcSMatt Macy if (txg <= TXG_INITIAL || txg == ZILTEST_TXG)
844eda14cbcSMatt Macy return;
845eda14cbcSMatt Macy ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
846eda14cbcSMatt Macy ASSERT3U(txg, >=, dp->dp_tx.tx_synced_txg);
847eda14cbcSMatt Macy ASSERT3U(txg, >=, dp->dp_tx.tx_open_txg - TXG_CONCURRENT_STATES);
848eda14cbcSMatt Macy }
849eda14cbcSMatt Macy #endif
850eda14cbcSMatt Macy
851eda14cbcSMatt Macy /*
852eda14cbcSMatt Macy * Per-txg object lists.
853eda14cbcSMatt Macy */
854eda14cbcSMatt Macy void
txg_list_create(txg_list_t * tl,spa_t * spa,size_t offset)855eda14cbcSMatt Macy txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset)
856eda14cbcSMatt Macy {
857eda14cbcSMatt Macy int t;
858eda14cbcSMatt Macy
859eda14cbcSMatt Macy mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
860eda14cbcSMatt Macy
861eda14cbcSMatt Macy tl->tl_offset = offset;
862eda14cbcSMatt Macy tl->tl_spa = spa;
863eda14cbcSMatt Macy
864eda14cbcSMatt Macy for (t = 0; t < TXG_SIZE; t++)
865eda14cbcSMatt Macy tl->tl_head[t] = NULL;
866eda14cbcSMatt Macy }
867eda14cbcSMatt Macy
868eda14cbcSMatt Macy static boolean_t
txg_list_empty_impl(txg_list_t * tl,uint64_t txg)869eda14cbcSMatt Macy txg_list_empty_impl(txg_list_t *tl, uint64_t txg)
870eda14cbcSMatt Macy {
871eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&tl->tl_lock));
872eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
873eda14cbcSMatt Macy return (tl->tl_head[txg & TXG_MASK] == NULL);
874eda14cbcSMatt Macy }
875eda14cbcSMatt Macy
876eda14cbcSMatt Macy boolean_t
txg_list_empty(txg_list_t * tl,uint64_t txg)877eda14cbcSMatt Macy txg_list_empty(txg_list_t *tl, uint64_t txg)
878eda14cbcSMatt Macy {
879eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
880eda14cbcSMatt Macy boolean_t ret = txg_list_empty_impl(tl, txg);
881eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
882eda14cbcSMatt Macy
883eda14cbcSMatt Macy return (ret);
884eda14cbcSMatt Macy }
885eda14cbcSMatt Macy
886eda14cbcSMatt Macy void
txg_list_destroy(txg_list_t * tl)887eda14cbcSMatt Macy txg_list_destroy(txg_list_t *tl)
888eda14cbcSMatt Macy {
889eda14cbcSMatt Macy int t;
890eda14cbcSMatt Macy
891eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
892eda14cbcSMatt Macy for (t = 0; t < TXG_SIZE; t++)
893eda14cbcSMatt Macy ASSERT(txg_list_empty_impl(tl, t));
894eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
895eda14cbcSMatt Macy
896eda14cbcSMatt Macy mutex_destroy(&tl->tl_lock);
897eda14cbcSMatt Macy }
898eda14cbcSMatt Macy
899eda14cbcSMatt Macy /*
900eda14cbcSMatt Macy * Returns true if all txg lists are empty.
901eda14cbcSMatt Macy *
902eda14cbcSMatt Macy * Warning: this is inherently racy (an item could be added immediately
903eda14cbcSMatt Macy * after this function returns).
904eda14cbcSMatt Macy */
905eda14cbcSMatt Macy boolean_t
txg_all_lists_empty(txg_list_t * tl)906eda14cbcSMatt Macy txg_all_lists_empty(txg_list_t *tl)
907eda14cbcSMatt Macy {
9087b5e6873SMartin Matuska boolean_t res = B_TRUE;
9097b5e6873SMartin Matuska for (int i = 0; i < TXG_SIZE; i++)
9107b5e6873SMartin Matuska res &= (tl->tl_head[i] == NULL);
9117b5e6873SMartin Matuska return (res);
912eda14cbcSMatt Macy }
913eda14cbcSMatt Macy
914eda14cbcSMatt Macy /*
915eda14cbcSMatt Macy * Add an entry to the list (unless it's already on the list).
916eda14cbcSMatt Macy * Returns B_TRUE if it was actually added.
917eda14cbcSMatt Macy */
918eda14cbcSMatt Macy boolean_t
txg_list_add(txg_list_t * tl,void * p,uint64_t txg)919eda14cbcSMatt Macy txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
920eda14cbcSMatt Macy {
921eda14cbcSMatt Macy int t = txg & TXG_MASK;
922eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
923eda14cbcSMatt Macy boolean_t add;
924eda14cbcSMatt Macy
925eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
926eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
927eda14cbcSMatt Macy add = (tn->tn_member[t] == 0);
928eda14cbcSMatt Macy if (add) {
929eda14cbcSMatt Macy tn->tn_member[t] = 1;
930eda14cbcSMatt Macy tn->tn_next[t] = tl->tl_head[t];
931eda14cbcSMatt Macy tl->tl_head[t] = tn;
932eda14cbcSMatt Macy }
933eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
934eda14cbcSMatt Macy
935eda14cbcSMatt Macy return (add);
936eda14cbcSMatt Macy }
937eda14cbcSMatt Macy
938eda14cbcSMatt Macy /*
939eda14cbcSMatt Macy * Add an entry to the end of the list, unless it's already on the list.
940eda14cbcSMatt Macy * (walks list to find end)
941eda14cbcSMatt Macy * Returns B_TRUE if it was actually added.
942eda14cbcSMatt Macy */
943eda14cbcSMatt Macy boolean_t
txg_list_add_tail(txg_list_t * tl,void * p,uint64_t txg)944eda14cbcSMatt Macy txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
945eda14cbcSMatt Macy {
946eda14cbcSMatt Macy int t = txg & TXG_MASK;
947eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
948eda14cbcSMatt Macy boolean_t add;
949eda14cbcSMatt Macy
950eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
951eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
952eda14cbcSMatt Macy add = (tn->tn_member[t] == 0);
953eda14cbcSMatt Macy if (add) {
954eda14cbcSMatt Macy txg_node_t **tp;
955eda14cbcSMatt Macy
956eda14cbcSMatt Macy for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
957eda14cbcSMatt Macy continue;
958eda14cbcSMatt Macy
959eda14cbcSMatt Macy tn->tn_member[t] = 1;
960eda14cbcSMatt Macy tn->tn_next[t] = NULL;
961eda14cbcSMatt Macy *tp = tn;
962eda14cbcSMatt Macy }
963eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
964eda14cbcSMatt Macy
965eda14cbcSMatt Macy return (add);
966eda14cbcSMatt Macy }
967eda14cbcSMatt Macy
968eda14cbcSMatt Macy /*
969eda14cbcSMatt Macy * Remove the head of the list and return it.
970eda14cbcSMatt Macy */
971eda14cbcSMatt Macy void *
txg_list_remove(txg_list_t * tl,uint64_t txg)972eda14cbcSMatt Macy txg_list_remove(txg_list_t *tl, uint64_t txg)
973eda14cbcSMatt Macy {
974eda14cbcSMatt Macy int t = txg & TXG_MASK;
975eda14cbcSMatt Macy txg_node_t *tn;
976eda14cbcSMatt Macy void *p = NULL;
977eda14cbcSMatt Macy
978eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
979eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
980eda14cbcSMatt Macy if ((tn = tl->tl_head[t]) != NULL) {
981eda14cbcSMatt Macy ASSERT(tn->tn_member[t]);
982eda14cbcSMatt Macy ASSERT(tn->tn_next[t] == NULL || tn->tn_next[t]->tn_member[t]);
983eda14cbcSMatt Macy p = (char *)tn - tl->tl_offset;
984eda14cbcSMatt Macy tl->tl_head[t] = tn->tn_next[t];
985eda14cbcSMatt Macy tn->tn_next[t] = NULL;
986eda14cbcSMatt Macy tn->tn_member[t] = 0;
987eda14cbcSMatt Macy }
988eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
989eda14cbcSMatt Macy
990eda14cbcSMatt Macy return (p);
991eda14cbcSMatt Macy }
992eda14cbcSMatt Macy
993eda14cbcSMatt Macy /*
994eda14cbcSMatt Macy * Remove a specific item from the list and return it.
995eda14cbcSMatt Macy */
996eda14cbcSMatt Macy void *
txg_list_remove_this(txg_list_t * tl,void * p,uint64_t txg)997eda14cbcSMatt Macy txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
998eda14cbcSMatt Macy {
999eda14cbcSMatt Macy int t = txg & TXG_MASK;
1000eda14cbcSMatt Macy txg_node_t *tn, **tp;
1001eda14cbcSMatt Macy
1002eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1003eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1004eda14cbcSMatt Macy
1005eda14cbcSMatt Macy for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
1006eda14cbcSMatt Macy if ((char *)tn - tl->tl_offset == p) {
1007eda14cbcSMatt Macy *tp = tn->tn_next[t];
1008eda14cbcSMatt Macy tn->tn_next[t] = NULL;
1009eda14cbcSMatt Macy tn->tn_member[t] = 0;
1010eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1011eda14cbcSMatt Macy return (p);
1012eda14cbcSMatt Macy }
1013eda14cbcSMatt Macy }
1014eda14cbcSMatt Macy
1015eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1016eda14cbcSMatt Macy
1017eda14cbcSMatt Macy return (NULL);
1018eda14cbcSMatt Macy }
1019eda14cbcSMatt Macy
1020eda14cbcSMatt Macy boolean_t
txg_list_member(txg_list_t * tl,void * p,uint64_t txg)1021eda14cbcSMatt Macy txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
1022eda14cbcSMatt Macy {
1023eda14cbcSMatt Macy int t = txg & TXG_MASK;
1024eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
1025eda14cbcSMatt Macy
1026eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1027eda14cbcSMatt Macy return (tn->tn_member[t] != 0);
1028eda14cbcSMatt Macy }
1029eda14cbcSMatt Macy
1030eda14cbcSMatt Macy /*
1031eda14cbcSMatt Macy * Walk a txg list
1032eda14cbcSMatt Macy */
1033eda14cbcSMatt Macy void *
txg_list_head(txg_list_t * tl,uint64_t txg)1034eda14cbcSMatt Macy txg_list_head(txg_list_t *tl, uint64_t txg)
1035eda14cbcSMatt Macy {
1036eda14cbcSMatt Macy int t = txg & TXG_MASK;
1037eda14cbcSMatt Macy txg_node_t *tn;
1038eda14cbcSMatt Macy
1039eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1040eda14cbcSMatt Macy tn = tl->tl_head[t];
1041eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1042eda14cbcSMatt Macy
1043eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1044eda14cbcSMatt Macy return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
1045eda14cbcSMatt Macy }
1046eda14cbcSMatt Macy
1047eda14cbcSMatt Macy void *
txg_list_next(txg_list_t * tl,void * p,uint64_t txg)1048eda14cbcSMatt Macy txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
1049eda14cbcSMatt Macy {
1050eda14cbcSMatt Macy int t = txg & TXG_MASK;
1051eda14cbcSMatt Macy txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
1052eda14cbcSMatt Macy
1053eda14cbcSMatt Macy TXG_VERIFY(tl->tl_spa, txg);
1054eda14cbcSMatt Macy
1055eda14cbcSMatt Macy mutex_enter(&tl->tl_lock);
1056eda14cbcSMatt Macy tn = tn->tn_next[t];
1057eda14cbcSMatt Macy mutex_exit(&tl->tl_lock);
1058eda14cbcSMatt Macy
1059eda14cbcSMatt Macy return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
1060eda14cbcSMatt Macy }
1061eda14cbcSMatt Macy
1062eda14cbcSMatt Macy EXPORT_SYMBOL(txg_init);
1063eda14cbcSMatt Macy EXPORT_SYMBOL(txg_fini);
1064eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_start);
1065eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_stop);
1066eda14cbcSMatt Macy EXPORT_SYMBOL(txg_hold_open);
1067eda14cbcSMatt Macy EXPORT_SYMBOL(txg_rele_to_quiesce);
1068eda14cbcSMatt Macy EXPORT_SYMBOL(txg_rele_to_sync);
1069eda14cbcSMatt Macy EXPORT_SYMBOL(txg_register_callbacks);
1070eda14cbcSMatt Macy EXPORT_SYMBOL(txg_delay);
1071eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_synced);
1072eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_open);
1073eda14cbcSMatt Macy EXPORT_SYMBOL(txg_wait_callbacks);
1074eda14cbcSMatt Macy EXPORT_SYMBOL(txg_stalled);
1075eda14cbcSMatt Macy EXPORT_SYMBOL(txg_sync_waiting);
1076eda14cbcSMatt Macy
1077be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs_txg, zfs_txg_, timeout, UINT, ZMOD_RW,
1078eda14cbcSMatt Macy "Max seconds worth of delta per txg");
1079