1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
25 */
26
27 #include <sys/dmu.h>
28 #include <sys/dmu_tx.h>
29 #include <sys/dsl_pool.h>
30 #include <sys/dsl_dir.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/metaslab.h>
33
34 #define DST_AVG_BLKSHIFT 14
35
36 static int
dsl_null_checkfunc(void * arg,dmu_tx_t * tx)37 dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
38 {
39 (void) arg, (void) tx;
40 return (0);
41 }
42
43 static int
dsl_sync_task_common(const char * pool,dsl_checkfunc_t * checkfunc,dsl_syncfunc_t * syncfunc,dsl_sigfunc_t * sigfunc,void * arg,int blocks_modified,zfs_space_check_t space_check,boolean_t early)44 dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc,
45 dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
46 int blocks_modified, zfs_space_check_t space_check, boolean_t early)
47 {
48 spa_t *spa;
49 dmu_tx_t *tx;
50 int err;
51 dsl_sync_task_t dst = { { { NULL } } };
52 dsl_pool_t *dp;
53
54 err = spa_open(pool, &spa, FTAG);
55 if (err != 0)
56 return (err);
57 dp = spa_get_dsl(spa);
58
59 top:
60 tx = dmu_tx_create_dd(dp->dp_mos_dir);
61 VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND));
62
63 dst.dst_pool = dp;
64 dst.dst_txg = dmu_tx_get_txg(tx);
65 dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
66 dst.dst_space_check = space_check;
67 dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
68 dst.dst_syncfunc = syncfunc;
69 dst.dst_arg = arg;
70 dst.dst_error = 0;
71 dst.dst_nowaiter = B_FALSE;
72
73 dsl_pool_config_enter(dp, FTAG);
74 err = dst.dst_checkfunc(arg, tx);
75 dsl_pool_config_exit(dp, FTAG);
76
77 if (err != 0) {
78 dmu_tx_commit(tx);
79 spa_close(spa, FTAG);
80 return (err);
81 }
82
83 txg_list_t *task_list = (early) ?
84 &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
85 VERIFY(txg_list_add_tail(task_list, &dst, dst.dst_txg));
86
87 dmu_tx_commit(tx);
88
89 if (sigfunc != NULL) {
90 err = txg_wait_synced_flags(dp, dst.dst_txg, TXG_WAIT_SIGNAL);
91 if (err != 0) {
92 VERIFY3U(err, ==, EINTR);
93 /* current contract is to call func once */
94 sigfunc(arg, tx);
95 /* in case we're performing an EAGAIN retry */
96 sigfunc = NULL;
97
98 txg_wait_synced(dp, dst.dst_txg);
99 }
100 } else
101 txg_wait_synced(dp, dst.dst_txg);
102
103 if (dst.dst_error == EAGAIN) {
104 txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
105 goto top;
106 }
107
108 spa_close(spa, FTAG);
109 return (dst.dst_error);
110 }
111
112 /*
113 * Called from open context to perform a callback in syncing context. Waits
114 * for the operation to complete.
115 *
116 * The checkfunc will be called from open context as a preliminary check
117 * which can quickly fail. If it succeeds, it will be called again from
118 * syncing context. The checkfunc should generally be designed to work
119 * properly in either context, but if necessary it can check
120 * dmu_tx_is_syncing(tx).
121 *
122 * The synctask infrastructure enforces proper locking strategy with respect
123 * to the dp_config_rwlock -- the lock will always be held when the callbacks
124 * are called. It will be held for read during the open-context (preliminary)
125 * call to the checkfunc, and then held for write from syncing context during
126 * the calls to the check and sync funcs.
127 *
128 * A dataset or pool name can be passed as the first argument. Typically,
129 * the check func will hold, check the return value of the hold, and then
130 * release the dataset. The sync func will VERIFYO(hold()) the dataset.
131 * This is safe because no changes can be made between the check and sync funcs,
132 * and the sync func will only be called if the check func successfully opened
133 * the dataset.
134 */
135 int
dsl_sync_task(const char * pool,dsl_checkfunc_t * checkfunc,dsl_syncfunc_t * syncfunc,void * arg,int blocks_modified,zfs_space_check_t space_check)136 dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
137 dsl_syncfunc_t *syncfunc, void *arg,
138 int blocks_modified, zfs_space_check_t space_check)
139 {
140 return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
141 blocks_modified, space_check, B_FALSE));
142 }
143
144 /*
145 * An early synctask works exactly as a standard synctask with one important
146 * difference on the way it is handled during syncing context. Standard
147 * synctasks run after we've written out all the dirty blocks of dirty
148 * datasets. Early synctasks are executed before writing out any dirty data,
149 * and thus before standard synctasks.
150 *
151 * For that reason, early synctasks can affect the process of writing dirty
152 * changes to disk for the txg that they run and should be used with caution.
153 * In addition, early synctasks should not dirty any metaslabs as this would
154 * invalidate the precondition/invariant for subsequent early synctasks.
155 * [see dsl_pool_sync() and dsl_early_sync_task_verify()]
156 */
157 int
dsl_early_sync_task(const char * pool,dsl_checkfunc_t * checkfunc,dsl_syncfunc_t * syncfunc,void * arg,int blocks_modified,zfs_space_check_t space_check)158 dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
159 dsl_syncfunc_t *syncfunc, void *arg,
160 int blocks_modified, zfs_space_check_t space_check)
161 {
162 return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
163 blocks_modified, space_check, B_TRUE));
164 }
165
166 /*
167 * A standard synctask that can be interrupted from a signal. The sigfunc
168 * is called once if a signal occurred while waiting for the task to sync.
169 */
170 int
dsl_sync_task_sig(const char * pool,dsl_checkfunc_t * checkfunc,dsl_syncfunc_t * syncfunc,dsl_sigfunc_t * sigfunc,void * arg,int blocks_modified,zfs_space_check_t space_check)171 dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc,
172 dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
173 int blocks_modified, zfs_space_check_t space_check)
174 {
175 return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg,
176 blocks_modified, space_check, B_FALSE));
177 }
178
179 static void
dsl_sync_task_nowait_common(dsl_pool_t * dp,dsl_syncfunc_t * syncfunc,void * arg,dmu_tx_t * tx,boolean_t early)180 dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
181 dmu_tx_t *tx, boolean_t early)
182 {
183 dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
184
185 dst->dst_pool = dp;
186 dst->dst_txg = dmu_tx_get_txg(tx);
187 dst->dst_space_check = ZFS_SPACE_CHECK_NONE;
188 dst->dst_checkfunc = dsl_null_checkfunc;
189 dst->dst_syncfunc = syncfunc;
190 dst->dst_arg = arg;
191 dst->dst_error = 0;
192 dst->dst_nowaiter = B_TRUE;
193
194 txg_list_t *task_list = (early) ?
195 &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
196 VERIFY(txg_list_add_tail(task_list, dst, dst->dst_txg));
197 }
198
199 void
dsl_sync_task_nowait(dsl_pool_t * dp,dsl_syncfunc_t * syncfunc,void * arg,dmu_tx_t * tx)200 dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
201 dmu_tx_t *tx)
202 {
203 dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_FALSE);
204 }
205
206 void
dsl_early_sync_task_nowait(dsl_pool_t * dp,dsl_syncfunc_t * syncfunc,void * arg,dmu_tx_t * tx)207 dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
208 dmu_tx_t *tx)
209 {
210 dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_TRUE);
211 }
212
213 /*
214 * Called in syncing context to execute the synctask.
215 */
216 void
dsl_sync_task_sync(dsl_sync_task_t * dst,dmu_tx_t * tx)217 dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
218 {
219 dsl_pool_t *dp = dst->dst_pool;
220
221 ASSERT0(dst->dst_error);
222
223 /*
224 * Check for sufficient space.
225 *
226 * When the sync task was created, the caller specified the
227 * type of space checking required. See the comment in
228 * zfs_space_check_t for details on the semantics of each
229 * type of space checking.
230 *
231 * We just check against what's on-disk; we don't want any
232 * in-flight accounting to get in our way, because open context
233 * may have already used up various in-core limits
234 * (arc_tempreserve, dsl_pool_tempreserve).
235 */
236 if (dst->dst_space_check != ZFS_SPACE_CHECK_NONE) {
237 uint64_t quota = dsl_pool_unreserved_space(dp,
238 dst->dst_space_check);
239 uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes;
240
241 /* MOS space is triple-dittoed, so we multiply by 3. */
242 if (used + dst->dst_space * 3 > quota) {
243 dst->dst_error = SET_ERROR(ENOSPC);
244 if (dst->dst_nowaiter)
245 kmem_free(dst, sizeof (*dst));
246 return;
247 }
248 }
249
250 /*
251 * Check for errors by calling checkfunc.
252 */
253 rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
254 dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
255 if (dst->dst_error == 0)
256 dst->dst_syncfunc(dst->dst_arg, tx);
257 rrw_exit(&dp->dp_config_rwlock, FTAG);
258 if (dst->dst_nowaiter)
259 kmem_free(dst, sizeof (*dst));
260 }
261
262 #if defined(_KERNEL)
263 EXPORT_SYMBOL(dsl_sync_task);
264 EXPORT_SYMBOL(dsl_sync_task_nowait);
265 #endif
266