xref: /freebsd/sys/contrib/openzfs/module/zfs/abd.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
23*eda14cbcSMatt Macy  * Copyright (c) 2019 by Delphix. All rights reserved.
24*eda14cbcSMatt Macy  */
25*eda14cbcSMatt Macy 
26*eda14cbcSMatt Macy /*
27*eda14cbcSMatt Macy  * ARC buffer data (ABD).
28*eda14cbcSMatt Macy  *
29*eda14cbcSMatt Macy  * ABDs are an abstract data structure for the ARC which can use two
30*eda14cbcSMatt Macy  * different ways of storing the underlying data:
31*eda14cbcSMatt Macy  *
32*eda14cbcSMatt Macy  * (a) Linear buffer. In this case, all the data in the ABD is stored in one
33*eda14cbcSMatt Macy  *     contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
34*eda14cbcSMatt Macy  *
35*eda14cbcSMatt Macy  *         +-------------------+
36*eda14cbcSMatt Macy  *         | ABD (linear)      |
37*eda14cbcSMatt Macy  *         |   abd_flags = ... |
38*eda14cbcSMatt Macy  *         |   abd_size = ...  |     +--------------------------------+
39*eda14cbcSMatt Macy  *         |   abd_buf ------------->| raw buffer of size abd_size    |
40*eda14cbcSMatt Macy  *         +-------------------+     +--------------------------------+
41*eda14cbcSMatt Macy  *              no abd_chunks
42*eda14cbcSMatt Macy  *
43*eda14cbcSMatt Macy  * (b) Scattered buffer. In this case, the data in the ABD is split into
44*eda14cbcSMatt Macy  *     equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
45*eda14cbcSMatt Macy  *     to the chunks recorded in an array at the end of the ABD structure.
46*eda14cbcSMatt Macy  *
47*eda14cbcSMatt Macy  *         +-------------------+
48*eda14cbcSMatt Macy  *         | ABD (scattered)   |
49*eda14cbcSMatt Macy  *         |   abd_flags = ... |
50*eda14cbcSMatt Macy  *         |   abd_size = ...  |
51*eda14cbcSMatt Macy  *         |   abd_offset = 0  |                           +-----------+
52*eda14cbcSMatt Macy  *         |   abd_chunks[0] ----------------------------->| chunk 0   |
53*eda14cbcSMatt Macy  *         |   abd_chunks[1] ---------------------+        +-----------+
54*eda14cbcSMatt Macy  *         |   ...             |                  |        +-----------+
55*eda14cbcSMatt Macy  *         |   abd_chunks[N-1] ---------+         +------->| chunk 1   |
56*eda14cbcSMatt Macy  *         +-------------------+        |                  +-----------+
57*eda14cbcSMatt Macy  *                                      |                      ...
58*eda14cbcSMatt Macy  *                                      |                  +-----------+
59*eda14cbcSMatt Macy  *                                      +----------------->| chunk N-1 |
60*eda14cbcSMatt Macy  *                                                         +-----------+
61*eda14cbcSMatt Macy  *
62*eda14cbcSMatt Macy  * In addition to directly allocating a linear or scattered ABD, it is also
63*eda14cbcSMatt Macy  * possible to create an ABD by requesting the "sub-ABD" starting at an offset
64*eda14cbcSMatt Macy  * within an existing ABD. In linear buffers this is simple (set abd_buf of
65*eda14cbcSMatt Macy  * the new ABD to the starting point within the original raw buffer), but
66*eda14cbcSMatt Macy  * scattered ABDs are a little more complex. The new ABD makes a copy of the
67*eda14cbcSMatt Macy  * relevant abd_chunks pointers (but not the underlying data). However, to
68*eda14cbcSMatt Macy  * provide arbitrary rather than only chunk-aligned starting offsets, it also
69*eda14cbcSMatt Macy  * tracks an abd_offset field which represents the starting point of the data
70*eda14cbcSMatt Macy  * within the first chunk in abd_chunks. For both linear and scattered ABDs,
71*eda14cbcSMatt Macy  * creating an offset ABD marks the original ABD as the offset's parent, and the
72*eda14cbcSMatt Macy  * original ABD's abd_children refcount is incremented. This data allows us to
73*eda14cbcSMatt Macy  * ensure the root ABD isn't deleted before its children.
74*eda14cbcSMatt Macy  *
75*eda14cbcSMatt Macy  * Most consumers should never need to know what type of ABD they're using --
76*eda14cbcSMatt Macy  * the ABD public API ensures that it's possible to transparently switch from
77*eda14cbcSMatt Macy  * using a linear ABD to a scattered one when doing so would be beneficial.
78*eda14cbcSMatt Macy  *
79*eda14cbcSMatt Macy  * If you need to use the data within an ABD directly, if you know it's linear
80*eda14cbcSMatt Macy  * (because you allocated it) you can use abd_to_buf() to access the underlying
81*eda14cbcSMatt Macy  * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
82*eda14cbcSMatt Macy  * which will allocate a raw buffer if necessary. Use the abd_return_buf*
83*eda14cbcSMatt Macy  * functions to return any raw buffers that are no longer necessary when you're
84*eda14cbcSMatt Macy  * done using them.
85*eda14cbcSMatt Macy  *
86*eda14cbcSMatt Macy  * There are a variety of ABD APIs that implement basic buffer operations:
87*eda14cbcSMatt Macy  * compare, copy, read, write, and fill with zeroes. If you need a custom
88*eda14cbcSMatt Macy  * function which progressively accesses the whole ABD, use the abd_iterate_*
89*eda14cbcSMatt Macy  * functions.
90*eda14cbcSMatt Macy  *
91*eda14cbcSMatt Macy  * As an additional feature, linear and scatter ABD's can be stitched together
92*eda14cbcSMatt Macy  * by using the gang ABD type (abd_alloc_gang_abd()). This allows for
93*eda14cbcSMatt Macy  * multiple ABDs to be viewed as a singular ABD.
94*eda14cbcSMatt Macy  *
95*eda14cbcSMatt Macy  * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
96*eda14cbcSMatt Macy  * B_FALSE.
97*eda14cbcSMatt Macy  */
98*eda14cbcSMatt Macy 
99*eda14cbcSMatt Macy #include <sys/abd_impl.h>
100*eda14cbcSMatt Macy #include <sys/param.h>
101*eda14cbcSMatt Macy #include <sys/zio.h>
102*eda14cbcSMatt Macy #include <sys/zfs_context.h>
103*eda14cbcSMatt Macy #include <sys/zfs_znode.h>
104*eda14cbcSMatt Macy 
105*eda14cbcSMatt Macy /* see block comment above for description */
106*eda14cbcSMatt Macy int zfs_abd_scatter_enabled = B_TRUE;
107*eda14cbcSMatt Macy 
108*eda14cbcSMatt Macy boolean_t
109*eda14cbcSMatt Macy abd_is_linear(abd_t *abd)
110*eda14cbcSMatt Macy {
111*eda14cbcSMatt Macy 	return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
112*eda14cbcSMatt Macy }
113*eda14cbcSMatt Macy 
114*eda14cbcSMatt Macy boolean_t
115*eda14cbcSMatt Macy abd_is_linear_page(abd_t *abd)
116*eda14cbcSMatt Macy {
117*eda14cbcSMatt Macy 	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) != 0 ?
118*eda14cbcSMatt Macy 	    B_TRUE : B_FALSE);
119*eda14cbcSMatt Macy }
120*eda14cbcSMatt Macy 
121*eda14cbcSMatt Macy boolean_t
122*eda14cbcSMatt Macy abd_is_gang(abd_t *abd)
123*eda14cbcSMatt Macy {
124*eda14cbcSMatt Macy 	return ((abd->abd_flags & ABD_FLAG_GANG) != 0 ? B_TRUE :
125*eda14cbcSMatt Macy 	    B_FALSE);
126*eda14cbcSMatt Macy }
127*eda14cbcSMatt Macy 
128*eda14cbcSMatt Macy void
129*eda14cbcSMatt Macy abd_verify(abd_t *abd)
130*eda14cbcSMatt Macy {
131*eda14cbcSMatt Macy 	ASSERT3U(abd->abd_size, >, 0);
132*eda14cbcSMatt Macy 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
133*eda14cbcSMatt Macy 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
134*eda14cbcSMatt Macy 	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
135*eda14cbcSMatt Macy 	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
136*eda14cbcSMatt Macy 	    ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS));
137*eda14cbcSMatt Macy 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
138*eda14cbcSMatt Macy 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
139*eda14cbcSMatt Macy 	if (abd_is_linear(abd)) {
140*eda14cbcSMatt Macy 		ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
141*eda14cbcSMatt Macy 	} else if (abd_is_gang(abd)) {
142*eda14cbcSMatt Macy 		uint_t child_sizes = 0;
143*eda14cbcSMatt Macy 		for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
144*eda14cbcSMatt Macy 		    cabd != NULL;
145*eda14cbcSMatt Macy 		    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
146*eda14cbcSMatt Macy 			ASSERT(list_link_active(&cabd->abd_gang_link));
147*eda14cbcSMatt Macy 			child_sizes += cabd->abd_size;
148*eda14cbcSMatt Macy 			abd_verify(cabd);
149*eda14cbcSMatt Macy 		}
150*eda14cbcSMatt Macy 		ASSERT3U(abd->abd_size, ==, child_sizes);
151*eda14cbcSMatt Macy 	} else {
152*eda14cbcSMatt Macy 		abd_verify_scatter(abd);
153*eda14cbcSMatt Macy 	}
154*eda14cbcSMatt Macy }
155*eda14cbcSMatt Macy 
156*eda14cbcSMatt Macy uint_t
157*eda14cbcSMatt Macy abd_get_size(abd_t *abd)
158*eda14cbcSMatt Macy {
159*eda14cbcSMatt Macy 	abd_verify(abd);
160*eda14cbcSMatt Macy 	return (abd->abd_size);
161*eda14cbcSMatt Macy }
162*eda14cbcSMatt Macy 
163*eda14cbcSMatt Macy /*
164*eda14cbcSMatt Macy  * Allocate an ABD, along with its own underlying data buffers. Use this if you
165*eda14cbcSMatt Macy  * don't care whether the ABD is linear or not.
166*eda14cbcSMatt Macy  */
167*eda14cbcSMatt Macy abd_t *
168*eda14cbcSMatt Macy abd_alloc(size_t size, boolean_t is_metadata)
169*eda14cbcSMatt Macy {
170*eda14cbcSMatt Macy 	if (!zfs_abd_scatter_enabled || abd_size_alloc_linear(size))
171*eda14cbcSMatt Macy 		return (abd_alloc_linear(size, is_metadata));
172*eda14cbcSMatt Macy 
173*eda14cbcSMatt Macy 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
174*eda14cbcSMatt Macy 
175*eda14cbcSMatt Macy 	abd_t *abd = abd_alloc_struct(size);
176*eda14cbcSMatt Macy 	abd->abd_flags = ABD_FLAG_OWNER;
177*eda14cbcSMatt Macy 	abd->abd_u.abd_scatter.abd_offset = 0;
178*eda14cbcSMatt Macy 	abd_alloc_chunks(abd, size);
179*eda14cbcSMatt Macy 
180*eda14cbcSMatt Macy 	if (is_metadata) {
181*eda14cbcSMatt Macy 		abd->abd_flags |= ABD_FLAG_META;
182*eda14cbcSMatt Macy 	}
183*eda14cbcSMatt Macy 	abd->abd_size = size;
184*eda14cbcSMatt Macy 	abd->abd_parent = NULL;
185*eda14cbcSMatt Macy 	zfs_refcount_create(&abd->abd_children);
186*eda14cbcSMatt Macy 
187*eda14cbcSMatt Macy 	abd_update_scatter_stats(abd, ABDSTAT_INCR);
188*eda14cbcSMatt Macy 
189*eda14cbcSMatt Macy 	return (abd);
190*eda14cbcSMatt Macy }
191*eda14cbcSMatt Macy 
192*eda14cbcSMatt Macy static void
193*eda14cbcSMatt Macy abd_free_scatter(abd_t *abd)
194*eda14cbcSMatt Macy {
195*eda14cbcSMatt Macy 	abd_free_chunks(abd);
196*eda14cbcSMatt Macy 
197*eda14cbcSMatt Macy 	zfs_refcount_destroy(&abd->abd_children);
198*eda14cbcSMatt Macy 	abd_update_scatter_stats(abd, ABDSTAT_DECR);
199*eda14cbcSMatt Macy 	abd_free_struct(abd);
200*eda14cbcSMatt Macy }
201*eda14cbcSMatt Macy 
202*eda14cbcSMatt Macy static void
203*eda14cbcSMatt Macy abd_put_gang_abd(abd_t *abd)
204*eda14cbcSMatt Macy {
205*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(abd));
206*eda14cbcSMatt Macy 	abd_t *cabd;
207*eda14cbcSMatt Macy 
208*eda14cbcSMatt Macy 	while ((cabd = list_remove_head(&ABD_GANG(abd).abd_gang_chain))
209*eda14cbcSMatt Macy 	    != NULL) {
210*eda14cbcSMatt Macy 		ASSERT0(cabd->abd_flags & ABD_FLAG_GANG_FREE);
211*eda14cbcSMatt Macy 		abd->abd_size -= cabd->abd_size;
212*eda14cbcSMatt Macy 		abd_put(cabd);
213*eda14cbcSMatt Macy 	}
214*eda14cbcSMatt Macy 	ASSERT0(abd->abd_size);
215*eda14cbcSMatt Macy 	list_destroy(&ABD_GANG(abd).abd_gang_chain);
216*eda14cbcSMatt Macy }
217*eda14cbcSMatt Macy 
218*eda14cbcSMatt Macy /*
219*eda14cbcSMatt Macy  * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
220*eda14cbcSMatt Macy  * free the underlying scatterlist or buffer.
221*eda14cbcSMatt Macy  */
222*eda14cbcSMatt Macy void
223*eda14cbcSMatt Macy abd_put(abd_t *abd)
224*eda14cbcSMatt Macy {
225*eda14cbcSMatt Macy 	if (abd == NULL)
226*eda14cbcSMatt Macy 		return;
227*eda14cbcSMatt Macy 
228*eda14cbcSMatt Macy 	abd_verify(abd);
229*eda14cbcSMatt Macy 	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
230*eda14cbcSMatt Macy 
231*eda14cbcSMatt Macy 	if (abd->abd_parent != NULL) {
232*eda14cbcSMatt Macy 		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
233*eda14cbcSMatt Macy 		    abd->abd_size, abd);
234*eda14cbcSMatt Macy 	}
235*eda14cbcSMatt Macy 
236*eda14cbcSMatt Macy 	if (abd_is_gang(abd))
237*eda14cbcSMatt Macy 		abd_put_gang_abd(abd);
238*eda14cbcSMatt Macy 
239*eda14cbcSMatt Macy 	zfs_refcount_destroy(&abd->abd_children);
240*eda14cbcSMatt Macy 	abd_free_struct(abd);
241*eda14cbcSMatt Macy }
242*eda14cbcSMatt Macy 
243*eda14cbcSMatt Macy /*
244*eda14cbcSMatt Macy  * Allocate an ABD that must be linear, along with its own underlying data
245*eda14cbcSMatt Macy  * buffer. Only use this when it would be very annoying to write your ABD
246*eda14cbcSMatt Macy  * consumer with a scattered ABD.
247*eda14cbcSMatt Macy  */
248*eda14cbcSMatt Macy abd_t *
249*eda14cbcSMatt Macy abd_alloc_linear(size_t size, boolean_t is_metadata)
250*eda14cbcSMatt Macy {
251*eda14cbcSMatt Macy 	abd_t *abd = abd_alloc_struct(0);
252*eda14cbcSMatt Macy 
253*eda14cbcSMatt Macy 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
254*eda14cbcSMatt Macy 
255*eda14cbcSMatt Macy 	abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
256*eda14cbcSMatt Macy 	if (is_metadata) {
257*eda14cbcSMatt Macy 		abd->abd_flags |= ABD_FLAG_META;
258*eda14cbcSMatt Macy 	}
259*eda14cbcSMatt Macy 	abd->abd_size = size;
260*eda14cbcSMatt Macy 	abd->abd_parent = NULL;
261*eda14cbcSMatt Macy 	zfs_refcount_create(&abd->abd_children);
262*eda14cbcSMatt Macy 
263*eda14cbcSMatt Macy 	if (is_metadata) {
264*eda14cbcSMatt Macy 		ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
265*eda14cbcSMatt Macy 	} else {
266*eda14cbcSMatt Macy 		ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size);
267*eda14cbcSMatt Macy 	}
268*eda14cbcSMatt Macy 
269*eda14cbcSMatt Macy 	abd_update_linear_stats(abd, ABDSTAT_INCR);
270*eda14cbcSMatt Macy 
271*eda14cbcSMatt Macy 	return (abd);
272*eda14cbcSMatt Macy }
273*eda14cbcSMatt Macy 
274*eda14cbcSMatt Macy static void
275*eda14cbcSMatt Macy abd_free_linear(abd_t *abd)
276*eda14cbcSMatt Macy {
277*eda14cbcSMatt Macy 	if (abd_is_linear_page(abd)) {
278*eda14cbcSMatt Macy 		abd_free_linear_page(abd);
279*eda14cbcSMatt Macy 		return;
280*eda14cbcSMatt Macy 	}
281*eda14cbcSMatt Macy 	if (abd->abd_flags & ABD_FLAG_META) {
282*eda14cbcSMatt Macy 		zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
283*eda14cbcSMatt Macy 	} else {
284*eda14cbcSMatt Macy 		zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
285*eda14cbcSMatt Macy 	}
286*eda14cbcSMatt Macy 
287*eda14cbcSMatt Macy 	zfs_refcount_destroy(&abd->abd_children);
288*eda14cbcSMatt Macy 	abd_update_linear_stats(abd, ABDSTAT_DECR);
289*eda14cbcSMatt Macy 
290*eda14cbcSMatt Macy 	abd_free_struct(abd);
291*eda14cbcSMatt Macy }
292*eda14cbcSMatt Macy 
293*eda14cbcSMatt Macy static void
294*eda14cbcSMatt Macy abd_free_gang_abd(abd_t *abd)
295*eda14cbcSMatt Macy {
296*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(abd));
297*eda14cbcSMatt Macy 	abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
298*eda14cbcSMatt Macy 
299*eda14cbcSMatt Macy 	while (cabd != NULL) {
300*eda14cbcSMatt Macy 		/*
301*eda14cbcSMatt Macy 		 * We must acquire the child ABDs mutex to ensure that if it
302*eda14cbcSMatt Macy 		 * is being added to another gang ABD we will set the link
303*eda14cbcSMatt Macy 		 * as inactive when removing it from this gang ABD and before
304*eda14cbcSMatt Macy 		 * adding it to the other gang ABD.
305*eda14cbcSMatt Macy 		 */
306*eda14cbcSMatt Macy 		mutex_enter(&cabd->abd_mtx);
307*eda14cbcSMatt Macy 		ASSERT(list_link_active(&cabd->abd_gang_link));
308*eda14cbcSMatt Macy 		list_remove(&ABD_GANG(abd).abd_gang_chain, cabd);
309*eda14cbcSMatt Macy 		mutex_exit(&cabd->abd_mtx);
310*eda14cbcSMatt Macy 		abd->abd_size -= cabd->abd_size;
311*eda14cbcSMatt Macy 		if (cabd->abd_flags & ABD_FLAG_GANG_FREE) {
312*eda14cbcSMatt Macy 			if (cabd->abd_flags & ABD_FLAG_OWNER)
313*eda14cbcSMatt Macy 				abd_free(cabd);
314*eda14cbcSMatt Macy 			else
315*eda14cbcSMatt Macy 				abd_put(cabd);
316*eda14cbcSMatt Macy 		}
317*eda14cbcSMatt Macy 		cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
318*eda14cbcSMatt Macy 	}
319*eda14cbcSMatt Macy 	ASSERT0(abd->abd_size);
320*eda14cbcSMatt Macy 	list_destroy(&ABD_GANG(abd).abd_gang_chain);
321*eda14cbcSMatt Macy 	zfs_refcount_destroy(&abd->abd_children);
322*eda14cbcSMatt Macy 	abd_free_struct(abd);
323*eda14cbcSMatt Macy }
324*eda14cbcSMatt Macy 
325*eda14cbcSMatt Macy /*
326*eda14cbcSMatt Macy  * Free an ABD. Only use this on ABDs allocated with abd_alloc(),
327*eda14cbcSMatt Macy  * abd_alloc_linear(), or abd_alloc_gang_abd().
328*eda14cbcSMatt Macy  */
329*eda14cbcSMatt Macy void
330*eda14cbcSMatt Macy abd_free(abd_t *abd)
331*eda14cbcSMatt Macy {
332*eda14cbcSMatt Macy 	if (abd == NULL)
333*eda14cbcSMatt Macy 		return;
334*eda14cbcSMatt Macy 
335*eda14cbcSMatt Macy 	abd_verify(abd);
336*eda14cbcSMatt Macy 	ASSERT3P(abd->abd_parent, ==, NULL);
337*eda14cbcSMatt Macy 	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
338*eda14cbcSMatt Macy 	if (abd_is_linear(abd))
339*eda14cbcSMatt Macy 		abd_free_linear(abd);
340*eda14cbcSMatt Macy 	else if (abd_is_gang(abd))
341*eda14cbcSMatt Macy 		abd_free_gang_abd(abd);
342*eda14cbcSMatt Macy 	else
343*eda14cbcSMatt Macy 		abd_free_scatter(abd);
344*eda14cbcSMatt Macy }
345*eda14cbcSMatt Macy 
346*eda14cbcSMatt Macy /*
347*eda14cbcSMatt Macy  * Allocate an ABD of the same format (same metadata flag, same scatterize
348*eda14cbcSMatt Macy  * setting) as another ABD.
349*eda14cbcSMatt Macy  */
350*eda14cbcSMatt Macy abd_t *
351*eda14cbcSMatt Macy abd_alloc_sametype(abd_t *sabd, size_t size)
352*eda14cbcSMatt Macy {
353*eda14cbcSMatt Macy 	boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
354*eda14cbcSMatt Macy 	if (abd_is_linear(sabd) &&
355*eda14cbcSMatt Macy 	    !abd_is_linear_page(sabd)) {
356*eda14cbcSMatt Macy 		return (abd_alloc_linear(size, is_metadata));
357*eda14cbcSMatt Macy 	} else {
358*eda14cbcSMatt Macy 		return (abd_alloc(size, is_metadata));
359*eda14cbcSMatt Macy 	}
360*eda14cbcSMatt Macy }
361*eda14cbcSMatt Macy 
362*eda14cbcSMatt Macy 
363*eda14cbcSMatt Macy /*
364*eda14cbcSMatt Macy  * Create gang ABD that will be the head of a list of ABD's. This is used
365*eda14cbcSMatt Macy  * to "chain" scatter/gather lists together when constructing aggregated
366*eda14cbcSMatt Macy  * IO's. To free this abd, abd_free() must be called.
367*eda14cbcSMatt Macy  */
368*eda14cbcSMatt Macy abd_t *
369*eda14cbcSMatt Macy abd_alloc_gang_abd(void)
370*eda14cbcSMatt Macy {
371*eda14cbcSMatt Macy 	abd_t *abd;
372*eda14cbcSMatt Macy 
373*eda14cbcSMatt Macy 	abd = abd_alloc_struct(0);
374*eda14cbcSMatt Macy 	abd->abd_flags = ABD_FLAG_GANG | ABD_FLAG_OWNER;
375*eda14cbcSMatt Macy 	abd->abd_size = 0;
376*eda14cbcSMatt Macy 	abd->abd_parent = NULL;
377*eda14cbcSMatt Macy 	list_create(&ABD_GANG(abd).abd_gang_chain,
378*eda14cbcSMatt Macy 	    sizeof (abd_t), offsetof(abd_t, abd_gang_link));
379*eda14cbcSMatt Macy 	zfs_refcount_create(&abd->abd_children);
380*eda14cbcSMatt Macy 	return (abd);
381*eda14cbcSMatt Macy }
382*eda14cbcSMatt Macy 
383*eda14cbcSMatt Macy /*
384*eda14cbcSMatt Macy  * Add a child gang ABD to a parent gang ABDs chained list.
385*eda14cbcSMatt Macy  */
386*eda14cbcSMatt Macy static void
387*eda14cbcSMatt Macy abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
388*eda14cbcSMatt Macy {
389*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(pabd));
390*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(cabd));
391*eda14cbcSMatt Macy 
392*eda14cbcSMatt Macy 	if (free_on_free) {
393*eda14cbcSMatt Macy 		/*
394*eda14cbcSMatt Macy 		 * If the parent is responsible for freeing the child gang
395*eda14cbcSMatt Macy 		 * ABD we will just splice the childs children ABD list to
396*eda14cbcSMatt Macy 		 * the parents list and immediately free the child gang ABD
397*eda14cbcSMatt Macy 		 * struct. The parent gang ABDs children from the child gang
398*eda14cbcSMatt Macy 		 * will retain all the free_on_free settings after being
399*eda14cbcSMatt Macy 		 * added to the parents list.
400*eda14cbcSMatt Macy 		 */
401*eda14cbcSMatt Macy 		pabd->abd_size += cabd->abd_size;
402*eda14cbcSMatt Macy 		list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
403*eda14cbcSMatt Macy 		    &ABD_GANG(cabd).abd_gang_chain);
404*eda14cbcSMatt Macy 		ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
405*eda14cbcSMatt Macy 		abd_verify(pabd);
406*eda14cbcSMatt Macy 		abd_free_struct(cabd);
407*eda14cbcSMatt Macy 	} else {
408*eda14cbcSMatt Macy 		for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain);
409*eda14cbcSMatt Macy 		    child != NULL;
410*eda14cbcSMatt Macy 		    child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) {
411*eda14cbcSMatt Macy 			/*
412*eda14cbcSMatt Macy 			 * We always pass B_FALSE for free_on_free as it is the
413*eda14cbcSMatt Macy 			 * original child gang ABDs responsibilty to determine
414*eda14cbcSMatt Macy 			 * if any of its child ABDs should be free'd on the call
415*eda14cbcSMatt Macy 			 * to abd_free().
416*eda14cbcSMatt Macy 			 */
417*eda14cbcSMatt Macy 			abd_gang_add(pabd, child, B_FALSE);
418*eda14cbcSMatt Macy 		}
419*eda14cbcSMatt Macy 		abd_verify(pabd);
420*eda14cbcSMatt Macy 	}
421*eda14cbcSMatt Macy }
422*eda14cbcSMatt Macy 
423*eda14cbcSMatt Macy /*
424*eda14cbcSMatt Macy  * Add a child ABD to a gang ABD's chained list.
425*eda14cbcSMatt Macy  */
426*eda14cbcSMatt Macy void
427*eda14cbcSMatt Macy abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
428*eda14cbcSMatt Macy {
429*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(pabd));
430*eda14cbcSMatt Macy 	abd_t *child_abd = NULL;
431*eda14cbcSMatt Macy 
432*eda14cbcSMatt Macy 	/*
433*eda14cbcSMatt Macy 	 * If the child being added is a gang ABD, we will add the
434*eda14cbcSMatt Macy 	 * childs ABDs to the parent gang ABD. This alllows us to account
435*eda14cbcSMatt Macy 	 * for the offset correctly in the parent gang ABD.
436*eda14cbcSMatt Macy 	 */
437*eda14cbcSMatt Macy 	if (abd_is_gang(cabd)) {
438*eda14cbcSMatt Macy 		ASSERT(!list_link_active(&cabd->abd_gang_link));
439*eda14cbcSMatt Macy 		ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
440*eda14cbcSMatt Macy 		return (abd_gang_add_gang(pabd, cabd, free_on_free));
441*eda14cbcSMatt Macy 	}
442*eda14cbcSMatt Macy 	ASSERT(!abd_is_gang(cabd));
443*eda14cbcSMatt Macy 
444*eda14cbcSMatt Macy 	/*
445*eda14cbcSMatt Macy 	 * In order to verify that an ABD is not already part of
446*eda14cbcSMatt Macy 	 * another gang ABD, we must lock the child ABD's abd_mtx
447*eda14cbcSMatt Macy 	 * to check its abd_gang_link status. We unlock the abd_mtx
448*eda14cbcSMatt Macy 	 * only after it is has been added to a gang ABD, which
449*eda14cbcSMatt Macy 	 * will update the abd_gang_link's status. See comment below
450*eda14cbcSMatt Macy 	 * for how an ABD can be in multiple gang ABD's simultaneously.
451*eda14cbcSMatt Macy 	 */
452*eda14cbcSMatt Macy 	mutex_enter(&cabd->abd_mtx);
453*eda14cbcSMatt Macy 	if (list_link_active(&cabd->abd_gang_link)) {
454*eda14cbcSMatt Macy 		/*
455*eda14cbcSMatt Macy 		 * If the child ABD is already part of another
456*eda14cbcSMatt Macy 		 * gang ABD then we must allocate a new
457*eda14cbcSMatt Macy 		 * ABD to use a separate link. We mark the newly
458*eda14cbcSMatt Macy 		 * allocated ABD with ABD_FLAG_GANG_FREE, before
459*eda14cbcSMatt Macy 		 * adding it to the gang ABD's list, to make the
460*eda14cbcSMatt Macy 		 * gang ABD aware that it is responsible to call
461*eda14cbcSMatt Macy 		 * abd_put(). We use abd_get_offset() in order
462*eda14cbcSMatt Macy 		 * to just allocate a new ABD but avoid copying the
463*eda14cbcSMatt Macy 		 * data over into the newly allocated ABD.
464*eda14cbcSMatt Macy 		 *
465*eda14cbcSMatt Macy 		 * An ABD may become part of multiple gang ABD's. For
466*eda14cbcSMatt Macy 		 * example, when writing ditto bocks, the same ABD
467*eda14cbcSMatt Macy 		 * is used to write 2 or 3 locations with 2 or 3
468*eda14cbcSMatt Macy 		 * zio_t's. Each of the zio's may be aggregated with
469*eda14cbcSMatt Macy 		 * different adjacent zio's. zio aggregation uses gang
470*eda14cbcSMatt Macy 		 * zio's, so the single ABD can become part of multiple
471*eda14cbcSMatt Macy 		 * gang zio's.
472*eda14cbcSMatt Macy 		 *
473*eda14cbcSMatt Macy 		 * The ASSERT below is to make sure that if
474*eda14cbcSMatt Macy 		 * free_on_free is passed as B_TRUE, the ABD can
475*eda14cbcSMatt Macy 		 * not be in multiple gang ABD's. The gang ABD
476*eda14cbcSMatt Macy 		 * can not be responsible for cleaning up the child
477*eda14cbcSMatt Macy 		 * ABD memory allocation if the ABD can be in
478*eda14cbcSMatt Macy 		 * multiple gang ABD's at one time.
479*eda14cbcSMatt Macy 		 */
480*eda14cbcSMatt Macy 		ASSERT3B(free_on_free, ==, B_FALSE);
481*eda14cbcSMatt Macy 		child_abd = abd_get_offset(cabd, 0);
482*eda14cbcSMatt Macy 		child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
483*eda14cbcSMatt Macy 	} else {
484*eda14cbcSMatt Macy 		child_abd = cabd;
485*eda14cbcSMatt Macy 		if (free_on_free)
486*eda14cbcSMatt Macy 			child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
487*eda14cbcSMatt Macy 	}
488*eda14cbcSMatt Macy 	ASSERT3P(child_abd, !=, NULL);
489*eda14cbcSMatt Macy 
490*eda14cbcSMatt Macy 	list_insert_tail(&ABD_GANG(pabd).abd_gang_chain, child_abd);
491*eda14cbcSMatt Macy 	mutex_exit(&cabd->abd_mtx);
492*eda14cbcSMatt Macy 	pabd->abd_size += child_abd->abd_size;
493*eda14cbcSMatt Macy }
494*eda14cbcSMatt Macy 
495*eda14cbcSMatt Macy /*
496*eda14cbcSMatt Macy  * Locate the ABD for the supplied offset in the gang ABD.
497*eda14cbcSMatt Macy  * Return a new offset relative to the returned ABD.
498*eda14cbcSMatt Macy  */
499*eda14cbcSMatt Macy abd_t *
500*eda14cbcSMatt Macy abd_gang_get_offset(abd_t *abd, size_t *off)
501*eda14cbcSMatt Macy {
502*eda14cbcSMatt Macy 	abd_t *cabd;
503*eda14cbcSMatt Macy 
504*eda14cbcSMatt Macy 	ASSERT(abd_is_gang(abd));
505*eda14cbcSMatt Macy 	ASSERT3U(*off, <, abd->abd_size);
506*eda14cbcSMatt Macy 	for (cabd = list_head(&ABD_GANG(abd).abd_gang_chain); cabd != NULL;
507*eda14cbcSMatt Macy 	    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
508*eda14cbcSMatt Macy 		if (*off >= cabd->abd_size)
509*eda14cbcSMatt Macy 			*off -= cabd->abd_size;
510*eda14cbcSMatt Macy 		else
511*eda14cbcSMatt Macy 			return (cabd);
512*eda14cbcSMatt Macy 	}
513*eda14cbcSMatt Macy 	VERIFY3P(cabd, !=, NULL);
514*eda14cbcSMatt Macy 	return (cabd);
515*eda14cbcSMatt Macy }
516*eda14cbcSMatt Macy 
517*eda14cbcSMatt Macy /*
518*eda14cbcSMatt Macy  * Allocate a new ABD to point to offset off of sabd. It shares the underlying
519*eda14cbcSMatt Macy  * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
520*eda14cbcSMatt Macy  * any derived ABDs exist.
521*eda14cbcSMatt Macy  */
522*eda14cbcSMatt Macy static abd_t *
523*eda14cbcSMatt Macy abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
524*eda14cbcSMatt Macy {
525*eda14cbcSMatt Macy 	abd_t *abd = NULL;
526*eda14cbcSMatt Macy 
527*eda14cbcSMatt Macy 	abd_verify(sabd);
528*eda14cbcSMatt Macy 	ASSERT3U(off, <=, sabd->abd_size);
529*eda14cbcSMatt Macy 
530*eda14cbcSMatt Macy 	if (abd_is_linear(sabd)) {
531*eda14cbcSMatt Macy 		abd = abd_alloc_struct(0);
532*eda14cbcSMatt Macy 
533*eda14cbcSMatt Macy 		/*
534*eda14cbcSMatt Macy 		 * Even if this buf is filesystem metadata, we only track that
535*eda14cbcSMatt Macy 		 * if we own the underlying data buffer, which is not true in
536*eda14cbcSMatt Macy 		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
537*eda14cbcSMatt Macy 		 */
538*eda14cbcSMatt Macy 		abd->abd_flags = ABD_FLAG_LINEAR;
539*eda14cbcSMatt Macy 
540*eda14cbcSMatt Macy 		ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
541*eda14cbcSMatt Macy 	} else if (abd_is_gang(sabd)) {
542*eda14cbcSMatt Macy 		size_t left = size;
543*eda14cbcSMatt Macy 		abd = abd_alloc_gang_abd();
544*eda14cbcSMatt Macy 		abd->abd_flags &= ~ABD_FLAG_OWNER;
545*eda14cbcSMatt Macy 		for (abd_t *cabd = abd_gang_get_offset(sabd, &off);
546*eda14cbcSMatt Macy 		    cabd != NULL && left > 0;
547*eda14cbcSMatt Macy 		    cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) {
548*eda14cbcSMatt Macy 			int csize = MIN(left, cabd->abd_size - off);
549*eda14cbcSMatt Macy 
550*eda14cbcSMatt Macy 			abd_t *nabd = abd_get_offset_impl(cabd, off, csize);
551*eda14cbcSMatt Macy 			abd_gang_add(abd, nabd, B_FALSE);
552*eda14cbcSMatt Macy 			left -= csize;
553*eda14cbcSMatt Macy 			off = 0;
554*eda14cbcSMatt Macy 		}
555*eda14cbcSMatt Macy 		ASSERT3U(left, ==, 0);
556*eda14cbcSMatt Macy 	} else {
557*eda14cbcSMatt Macy 		abd = abd_get_offset_scatter(sabd, off);
558*eda14cbcSMatt Macy 	}
559*eda14cbcSMatt Macy 
560*eda14cbcSMatt Macy 	abd->abd_size = size;
561*eda14cbcSMatt Macy 	abd->abd_parent = sabd;
562*eda14cbcSMatt Macy 	zfs_refcount_create(&abd->abd_children);
563*eda14cbcSMatt Macy 	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
564*eda14cbcSMatt Macy 	return (abd);
565*eda14cbcSMatt Macy }
566*eda14cbcSMatt Macy 
567*eda14cbcSMatt Macy abd_t *
568*eda14cbcSMatt Macy abd_get_offset(abd_t *sabd, size_t off)
569*eda14cbcSMatt Macy {
570*eda14cbcSMatt Macy 	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
571*eda14cbcSMatt Macy 	VERIFY3U(size, >, 0);
572*eda14cbcSMatt Macy 	return (abd_get_offset_impl(sabd, off, size));
573*eda14cbcSMatt Macy }
574*eda14cbcSMatt Macy 
575*eda14cbcSMatt Macy abd_t *
576*eda14cbcSMatt Macy abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
577*eda14cbcSMatt Macy {
578*eda14cbcSMatt Macy 	ASSERT3U(off + size, <=, sabd->abd_size);
579*eda14cbcSMatt Macy 	return (abd_get_offset_impl(sabd, off, size));
580*eda14cbcSMatt Macy }
581*eda14cbcSMatt Macy 
582*eda14cbcSMatt Macy /*
583*eda14cbcSMatt Macy  * Return a size scatter ABD. In order to free the returned
584*eda14cbcSMatt Macy  * ABD abd_put() must be called.
585*eda14cbcSMatt Macy  */
586*eda14cbcSMatt Macy abd_t *
587*eda14cbcSMatt Macy abd_get_zeros(size_t size)
588*eda14cbcSMatt Macy {
589*eda14cbcSMatt Macy 	ASSERT3P(abd_zero_scatter, !=, NULL);
590*eda14cbcSMatt Macy 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
591*eda14cbcSMatt Macy 	return (abd_get_offset_size(abd_zero_scatter, 0, size));
592*eda14cbcSMatt Macy }
593*eda14cbcSMatt Macy 
594*eda14cbcSMatt Macy /*
595*eda14cbcSMatt Macy  * Allocate a linear ABD structure for buf. You must free this with abd_put()
596*eda14cbcSMatt Macy  * since the resulting ABD doesn't own its own buffer.
597*eda14cbcSMatt Macy  */
598*eda14cbcSMatt Macy abd_t *
599*eda14cbcSMatt Macy abd_get_from_buf(void *buf, size_t size)
600*eda14cbcSMatt Macy {
601*eda14cbcSMatt Macy 	abd_t *abd = abd_alloc_struct(0);
602*eda14cbcSMatt Macy 
603*eda14cbcSMatt Macy 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
604*eda14cbcSMatt Macy 
605*eda14cbcSMatt Macy 	/*
606*eda14cbcSMatt Macy 	 * Even if this buf is filesystem metadata, we only track that if we
607*eda14cbcSMatt Macy 	 * own the underlying data buffer, which is not true in this case.
608*eda14cbcSMatt Macy 	 * Therefore, we don't ever use ABD_FLAG_META here.
609*eda14cbcSMatt Macy 	 */
610*eda14cbcSMatt Macy 	abd->abd_flags = ABD_FLAG_LINEAR;
611*eda14cbcSMatt Macy 	abd->abd_size = size;
612*eda14cbcSMatt Macy 	abd->abd_parent = NULL;
613*eda14cbcSMatt Macy 	zfs_refcount_create(&abd->abd_children);
614*eda14cbcSMatt Macy 
615*eda14cbcSMatt Macy 	ABD_LINEAR_BUF(abd) = buf;
616*eda14cbcSMatt Macy 
617*eda14cbcSMatt Macy 	return (abd);
618*eda14cbcSMatt Macy }
619*eda14cbcSMatt Macy 
620*eda14cbcSMatt Macy /*
621*eda14cbcSMatt Macy  * Get the raw buffer associated with a linear ABD.
622*eda14cbcSMatt Macy  */
623*eda14cbcSMatt Macy void *
624*eda14cbcSMatt Macy abd_to_buf(abd_t *abd)
625*eda14cbcSMatt Macy {
626*eda14cbcSMatt Macy 	ASSERT(abd_is_linear(abd));
627*eda14cbcSMatt Macy 	abd_verify(abd);
628*eda14cbcSMatt Macy 	return (ABD_LINEAR_BUF(abd));
629*eda14cbcSMatt Macy }
630*eda14cbcSMatt Macy 
631*eda14cbcSMatt Macy /*
632*eda14cbcSMatt Macy  * Borrow a raw buffer from an ABD without copying the contents of the ABD
633*eda14cbcSMatt Macy  * into the buffer. If the ABD is scattered, this will allocate a raw buffer
634*eda14cbcSMatt Macy  * whose contents are undefined. To copy over the existing data in the ABD, use
635*eda14cbcSMatt Macy  * abd_borrow_buf_copy() instead.
636*eda14cbcSMatt Macy  */
637*eda14cbcSMatt Macy void *
638*eda14cbcSMatt Macy abd_borrow_buf(abd_t *abd, size_t n)
639*eda14cbcSMatt Macy {
640*eda14cbcSMatt Macy 	void *buf;
641*eda14cbcSMatt Macy 	abd_verify(abd);
642*eda14cbcSMatt Macy 	ASSERT3U(abd->abd_size, >=, n);
643*eda14cbcSMatt Macy 	if (abd_is_linear(abd)) {
644*eda14cbcSMatt Macy 		buf = abd_to_buf(abd);
645*eda14cbcSMatt Macy 	} else {
646*eda14cbcSMatt Macy 		buf = zio_buf_alloc(n);
647*eda14cbcSMatt Macy 	}
648*eda14cbcSMatt Macy 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
649*eda14cbcSMatt Macy 	return (buf);
650*eda14cbcSMatt Macy }
651*eda14cbcSMatt Macy 
652*eda14cbcSMatt Macy void *
653*eda14cbcSMatt Macy abd_borrow_buf_copy(abd_t *abd, size_t n)
654*eda14cbcSMatt Macy {
655*eda14cbcSMatt Macy 	void *buf = abd_borrow_buf(abd, n);
656*eda14cbcSMatt Macy 	if (!abd_is_linear(abd)) {
657*eda14cbcSMatt Macy 		abd_copy_to_buf(buf, abd, n);
658*eda14cbcSMatt Macy 	}
659*eda14cbcSMatt Macy 	return (buf);
660*eda14cbcSMatt Macy }
661*eda14cbcSMatt Macy 
662*eda14cbcSMatt Macy /*
663*eda14cbcSMatt Macy  * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
664*eda14cbcSMatt Macy  * not change the contents of the ABD and will ASSERT that you didn't modify
665*eda14cbcSMatt Macy  * the buffer since it was borrowed. If you want any changes you made to buf to
666*eda14cbcSMatt Macy  * be copied back to abd, use abd_return_buf_copy() instead.
667*eda14cbcSMatt Macy  */
668*eda14cbcSMatt Macy void
669*eda14cbcSMatt Macy abd_return_buf(abd_t *abd, void *buf, size_t n)
670*eda14cbcSMatt Macy {
671*eda14cbcSMatt Macy 	abd_verify(abd);
672*eda14cbcSMatt Macy 	ASSERT3U(abd->abd_size, >=, n);
673*eda14cbcSMatt Macy 	if (abd_is_linear(abd)) {
674*eda14cbcSMatt Macy 		ASSERT3P(buf, ==, abd_to_buf(abd));
675*eda14cbcSMatt Macy 	} else {
676*eda14cbcSMatt Macy 		ASSERT0(abd_cmp_buf(abd, buf, n));
677*eda14cbcSMatt Macy 		zio_buf_free(buf, n);
678*eda14cbcSMatt Macy 	}
679*eda14cbcSMatt Macy 	(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
680*eda14cbcSMatt Macy }
681*eda14cbcSMatt Macy 
682*eda14cbcSMatt Macy void
683*eda14cbcSMatt Macy abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
684*eda14cbcSMatt Macy {
685*eda14cbcSMatt Macy 	if (!abd_is_linear(abd)) {
686*eda14cbcSMatt Macy 		abd_copy_from_buf(abd, buf, n);
687*eda14cbcSMatt Macy 	}
688*eda14cbcSMatt Macy 	abd_return_buf(abd, buf, n);
689*eda14cbcSMatt Macy }
690*eda14cbcSMatt Macy 
691*eda14cbcSMatt Macy void
692*eda14cbcSMatt Macy abd_release_ownership_of_buf(abd_t *abd)
693*eda14cbcSMatt Macy {
694*eda14cbcSMatt Macy 	ASSERT(abd_is_linear(abd));
695*eda14cbcSMatt Macy 	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
696*eda14cbcSMatt Macy 
697*eda14cbcSMatt Macy 	/*
698*eda14cbcSMatt Macy 	 * abd_free() needs to handle LINEAR_PAGE ABD's specially.
699*eda14cbcSMatt Macy 	 * Since that flag does not survive the
700*eda14cbcSMatt Macy 	 * abd_release_ownership_of_buf() -> abd_get_from_buf() ->
701*eda14cbcSMatt Macy 	 * abd_take_ownership_of_buf() sequence, we don't allow releasing
702*eda14cbcSMatt Macy 	 * these "linear but not zio_[data_]buf_alloc()'ed" ABD's.
703*eda14cbcSMatt Macy 	 */
704*eda14cbcSMatt Macy 	ASSERT(!abd_is_linear_page(abd));
705*eda14cbcSMatt Macy 
706*eda14cbcSMatt Macy 	abd_verify(abd);
707*eda14cbcSMatt Macy 
708*eda14cbcSMatt Macy 	abd->abd_flags &= ~ABD_FLAG_OWNER;
709*eda14cbcSMatt Macy 	/* Disable this flag since we no longer own the data buffer */
710*eda14cbcSMatt Macy 	abd->abd_flags &= ~ABD_FLAG_META;
711*eda14cbcSMatt Macy 
712*eda14cbcSMatt Macy 	abd_update_linear_stats(abd, ABDSTAT_DECR);
713*eda14cbcSMatt Macy }
714*eda14cbcSMatt Macy 
715*eda14cbcSMatt Macy 
716*eda14cbcSMatt Macy /*
717*eda14cbcSMatt Macy  * Give this ABD ownership of the buffer that it's storing. Can only be used on
718*eda14cbcSMatt Macy  * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
719*eda14cbcSMatt Macy  * with abd_alloc_linear() which subsequently released ownership of their buf
720*eda14cbcSMatt Macy  * with abd_release_ownership_of_buf().
721*eda14cbcSMatt Macy  */
722*eda14cbcSMatt Macy void
723*eda14cbcSMatt Macy abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
724*eda14cbcSMatt Macy {
725*eda14cbcSMatt Macy 	ASSERT(abd_is_linear(abd));
726*eda14cbcSMatt Macy 	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
727*eda14cbcSMatt Macy 	abd_verify(abd);
728*eda14cbcSMatt Macy 
729*eda14cbcSMatt Macy 	abd->abd_flags |= ABD_FLAG_OWNER;
730*eda14cbcSMatt Macy 	if (is_metadata) {
731*eda14cbcSMatt Macy 		abd->abd_flags |= ABD_FLAG_META;
732*eda14cbcSMatt Macy 	}
733*eda14cbcSMatt Macy 
734*eda14cbcSMatt Macy 	abd_update_linear_stats(abd, ABDSTAT_INCR);
735*eda14cbcSMatt Macy }
736*eda14cbcSMatt Macy 
737*eda14cbcSMatt Macy /*
738*eda14cbcSMatt Macy  * Initializes an abd_iter based on whether the abd is a gang ABD
739*eda14cbcSMatt Macy  * or just a single ABD.
740*eda14cbcSMatt Macy  */
741*eda14cbcSMatt Macy static inline abd_t *
742*eda14cbcSMatt Macy abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off)
743*eda14cbcSMatt Macy {
744*eda14cbcSMatt Macy 	abd_t *cabd = NULL;
745*eda14cbcSMatt Macy 
746*eda14cbcSMatt Macy 	if (abd_is_gang(abd)) {
747*eda14cbcSMatt Macy 		cabd = abd_gang_get_offset(abd, &off);
748*eda14cbcSMatt Macy 		if (cabd) {
749*eda14cbcSMatt Macy 			abd_iter_init(aiter, cabd);
750*eda14cbcSMatt Macy 			abd_iter_advance(aiter, off);
751*eda14cbcSMatt Macy 		}
752*eda14cbcSMatt Macy 	} else {
753*eda14cbcSMatt Macy 		abd_iter_init(aiter, abd);
754*eda14cbcSMatt Macy 		abd_iter_advance(aiter, off);
755*eda14cbcSMatt Macy 	}
756*eda14cbcSMatt Macy 	return (cabd);
757*eda14cbcSMatt Macy }
758*eda14cbcSMatt Macy 
759*eda14cbcSMatt Macy /*
760*eda14cbcSMatt Macy  * Advances an abd_iter. We have to be careful with gang ABD as
761*eda14cbcSMatt Macy  * advancing could mean that we are at the end of a particular ABD and
762*eda14cbcSMatt Macy  * must grab the ABD in the gang ABD's list.
763*eda14cbcSMatt Macy  */
764*eda14cbcSMatt Macy static inline abd_t *
765*eda14cbcSMatt Macy abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter,
766*eda14cbcSMatt Macy     size_t len)
767*eda14cbcSMatt Macy {
768*eda14cbcSMatt Macy 	abd_iter_advance(aiter, len);
769*eda14cbcSMatt Macy 	if (abd_is_gang(abd) && abd_iter_at_end(aiter)) {
770*eda14cbcSMatt Macy 		ASSERT3P(cabd, !=, NULL);
771*eda14cbcSMatt Macy 		cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd);
772*eda14cbcSMatt Macy 		if (cabd) {
773*eda14cbcSMatt Macy 			abd_iter_init(aiter, cabd);
774*eda14cbcSMatt Macy 			abd_iter_advance(aiter, 0);
775*eda14cbcSMatt Macy 		}
776*eda14cbcSMatt Macy 	}
777*eda14cbcSMatt Macy 	return (cabd);
778*eda14cbcSMatt Macy }
779*eda14cbcSMatt Macy 
780*eda14cbcSMatt Macy int
781*eda14cbcSMatt Macy abd_iterate_func(abd_t *abd, size_t off, size_t size,
782*eda14cbcSMatt Macy     abd_iter_func_t *func, void *private)
783*eda14cbcSMatt Macy {
784*eda14cbcSMatt Macy 	int ret = 0;
785*eda14cbcSMatt Macy 	struct abd_iter aiter;
786*eda14cbcSMatt Macy 	boolean_t abd_multi;
787*eda14cbcSMatt Macy 	abd_t *c_abd;
788*eda14cbcSMatt Macy 
789*eda14cbcSMatt Macy 	abd_verify(abd);
790*eda14cbcSMatt Macy 	ASSERT3U(off + size, <=, abd->abd_size);
791*eda14cbcSMatt Macy 
792*eda14cbcSMatt Macy 	abd_multi = abd_is_gang(abd);
793*eda14cbcSMatt Macy 	c_abd = abd_init_abd_iter(abd, &aiter, off);
794*eda14cbcSMatt Macy 
795*eda14cbcSMatt Macy 	while (size > 0) {
796*eda14cbcSMatt Macy 		/* If we are at the end of the gang ABD we are done */
797*eda14cbcSMatt Macy 		if (abd_multi && !c_abd)
798*eda14cbcSMatt Macy 			break;
799*eda14cbcSMatt Macy 
800*eda14cbcSMatt Macy 		abd_iter_map(&aiter);
801*eda14cbcSMatt Macy 
802*eda14cbcSMatt Macy 		size_t len = MIN(aiter.iter_mapsize, size);
803*eda14cbcSMatt Macy 		ASSERT3U(len, >, 0);
804*eda14cbcSMatt Macy 
805*eda14cbcSMatt Macy 		ret = func(aiter.iter_mapaddr, len, private);
806*eda14cbcSMatt Macy 
807*eda14cbcSMatt Macy 		abd_iter_unmap(&aiter);
808*eda14cbcSMatt Macy 
809*eda14cbcSMatt Macy 		if (ret != 0)
810*eda14cbcSMatt Macy 			break;
811*eda14cbcSMatt Macy 
812*eda14cbcSMatt Macy 		size -= len;
813*eda14cbcSMatt Macy 		c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
814*eda14cbcSMatt Macy 	}
815*eda14cbcSMatt Macy 
816*eda14cbcSMatt Macy 	return (ret);
817*eda14cbcSMatt Macy }
818*eda14cbcSMatt Macy 
819*eda14cbcSMatt Macy struct buf_arg {
820*eda14cbcSMatt Macy 	void *arg_buf;
821*eda14cbcSMatt Macy };
822*eda14cbcSMatt Macy 
823*eda14cbcSMatt Macy static int
824*eda14cbcSMatt Macy abd_copy_to_buf_off_cb(void *buf, size_t size, void *private)
825*eda14cbcSMatt Macy {
826*eda14cbcSMatt Macy 	struct buf_arg *ba_ptr = private;
827*eda14cbcSMatt Macy 
828*eda14cbcSMatt Macy 	(void) memcpy(ba_ptr->arg_buf, buf, size);
829*eda14cbcSMatt Macy 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
830*eda14cbcSMatt Macy 
831*eda14cbcSMatt Macy 	return (0);
832*eda14cbcSMatt Macy }
833*eda14cbcSMatt Macy 
834*eda14cbcSMatt Macy /*
835*eda14cbcSMatt Macy  * Copy abd to buf. (off is the offset in abd.)
836*eda14cbcSMatt Macy  */
837*eda14cbcSMatt Macy void
838*eda14cbcSMatt Macy abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size)
839*eda14cbcSMatt Macy {
840*eda14cbcSMatt Macy 	struct buf_arg ba_ptr = { buf };
841*eda14cbcSMatt Macy 
842*eda14cbcSMatt Macy 	(void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb,
843*eda14cbcSMatt Macy 	    &ba_ptr);
844*eda14cbcSMatt Macy }
845*eda14cbcSMatt Macy 
846*eda14cbcSMatt Macy static int
847*eda14cbcSMatt Macy abd_cmp_buf_off_cb(void *buf, size_t size, void *private)
848*eda14cbcSMatt Macy {
849*eda14cbcSMatt Macy 	int ret;
850*eda14cbcSMatt Macy 	struct buf_arg *ba_ptr = private;
851*eda14cbcSMatt Macy 
852*eda14cbcSMatt Macy 	ret = memcmp(buf, ba_ptr->arg_buf, size);
853*eda14cbcSMatt Macy 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
854*eda14cbcSMatt Macy 
855*eda14cbcSMatt Macy 	return (ret);
856*eda14cbcSMatt Macy }
857*eda14cbcSMatt Macy 
858*eda14cbcSMatt Macy /*
859*eda14cbcSMatt Macy  * Compare the contents of abd to buf. (off is the offset in abd.)
860*eda14cbcSMatt Macy  */
861*eda14cbcSMatt Macy int
862*eda14cbcSMatt Macy abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
863*eda14cbcSMatt Macy {
864*eda14cbcSMatt Macy 	struct buf_arg ba_ptr = { (void *) buf };
865*eda14cbcSMatt Macy 
866*eda14cbcSMatt Macy 	return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr));
867*eda14cbcSMatt Macy }
868*eda14cbcSMatt Macy 
869*eda14cbcSMatt Macy static int
870*eda14cbcSMatt Macy abd_copy_from_buf_off_cb(void *buf, size_t size, void *private)
871*eda14cbcSMatt Macy {
872*eda14cbcSMatt Macy 	struct buf_arg *ba_ptr = private;
873*eda14cbcSMatt Macy 
874*eda14cbcSMatt Macy 	(void) memcpy(buf, ba_ptr->arg_buf, size);
875*eda14cbcSMatt Macy 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
876*eda14cbcSMatt Macy 
877*eda14cbcSMatt Macy 	return (0);
878*eda14cbcSMatt Macy }
879*eda14cbcSMatt Macy 
880*eda14cbcSMatt Macy /*
881*eda14cbcSMatt Macy  * Copy from buf to abd. (off is the offset in abd.)
882*eda14cbcSMatt Macy  */
883*eda14cbcSMatt Macy void
884*eda14cbcSMatt Macy abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
885*eda14cbcSMatt Macy {
886*eda14cbcSMatt Macy 	struct buf_arg ba_ptr = { (void *) buf };
887*eda14cbcSMatt Macy 
888*eda14cbcSMatt Macy 	(void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb,
889*eda14cbcSMatt Macy 	    &ba_ptr);
890*eda14cbcSMatt Macy }
891*eda14cbcSMatt Macy 
892*eda14cbcSMatt Macy /*ARGSUSED*/
893*eda14cbcSMatt Macy static int
894*eda14cbcSMatt Macy abd_zero_off_cb(void *buf, size_t size, void *private)
895*eda14cbcSMatt Macy {
896*eda14cbcSMatt Macy 	(void) memset(buf, 0, size);
897*eda14cbcSMatt Macy 	return (0);
898*eda14cbcSMatt Macy }
899*eda14cbcSMatt Macy 
900*eda14cbcSMatt Macy /*
901*eda14cbcSMatt Macy  * Zero out the abd from a particular offset to the end.
902*eda14cbcSMatt Macy  */
903*eda14cbcSMatt Macy void
904*eda14cbcSMatt Macy abd_zero_off(abd_t *abd, size_t off, size_t size)
905*eda14cbcSMatt Macy {
906*eda14cbcSMatt Macy 	(void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
907*eda14cbcSMatt Macy }
908*eda14cbcSMatt Macy 
909*eda14cbcSMatt Macy /*
910*eda14cbcSMatt Macy  * Iterate over two ABDs and call func incrementally on the two ABDs' data in
911*eda14cbcSMatt Macy  * equal-sized chunks (passed to func as raw buffers). func could be called many
912*eda14cbcSMatt Macy  * times during this iteration.
913*eda14cbcSMatt Macy  */
914*eda14cbcSMatt Macy int
915*eda14cbcSMatt Macy abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
916*eda14cbcSMatt Macy     size_t size, abd_iter_func2_t *func, void *private)
917*eda14cbcSMatt Macy {
918*eda14cbcSMatt Macy 	int ret = 0;
919*eda14cbcSMatt Macy 	struct abd_iter daiter, saiter;
920*eda14cbcSMatt Macy 	boolean_t dabd_is_gang_abd, sabd_is_gang_abd;
921*eda14cbcSMatt Macy 	abd_t *c_dabd, *c_sabd;
922*eda14cbcSMatt Macy 
923*eda14cbcSMatt Macy 	abd_verify(dabd);
924*eda14cbcSMatt Macy 	abd_verify(sabd);
925*eda14cbcSMatt Macy 
926*eda14cbcSMatt Macy 	ASSERT3U(doff + size, <=, dabd->abd_size);
927*eda14cbcSMatt Macy 	ASSERT3U(soff + size, <=, sabd->abd_size);
928*eda14cbcSMatt Macy 
929*eda14cbcSMatt Macy 	dabd_is_gang_abd = abd_is_gang(dabd);
930*eda14cbcSMatt Macy 	sabd_is_gang_abd = abd_is_gang(sabd);
931*eda14cbcSMatt Macy 	c_dabd = abd_init_abd_iter(dabd, &daiter, doff);
932*eda14cbcSMatt Macy 	c_sabd = abd_init_abd_iter(sabd, &saiter, soff);
933*eda14cbcSMatt Macy 
934*eda14cbcSMatt Macy 	while (size > 0) {
935*eda14cbcSMatt Macy 		/* if we are at the end of the gang ABD we are done */
936*eda14cbcSMatt Macy 		if ((dabd_is_gang_abd && !c_dabd) ||
937*eda14cbcSMatt Macy 		    (sabd_is_gang_abd && !c_sabd))
938*eda14cbcSMatt Macy 			break;
939*eda14cbcSMatt Macy 
940*eda14cbcSMatt Macy 		abd_iter_map(&daiter);
941*eda14cbcSMatt Macy 		abd_iter_map(&saiter);
942*eda14cbcSMatt Macy 
943*eda14cbcSMatt Macy 		size_t dlen = MIN(daiter.iter_mapsize, size);
944*eda14cbcSMatt Macy 		size_t slen = MIN(saiter.iter_mapsize, size);
945*eda14cbcSMatt Macy 		size_t len = MIN(dlen, slen);
946*eda14cbcSMatt Macy 		ASSERT(dlen > 0 || slen > 0);
947*eda14cbcSMatt Macy 
948*eda14cbcSMatt Macy 		ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len,
949*eda14cbcSMatt Macy 		    private);
950*eda14cbcSMatt Macy 
951*eda14cbcSMatt Macy 		abd_iter_unmap(&saiter);
952*eda14cbcSMatt Macy 		abd_iter_unmap(&daiter);
953*eda14cbcSMatt Macy 
954*eda14cbcSMatt Macy 		if (ret != 0)
955*eda14cbcSMatt Macy 			break;
956*eda14cbcSMatt Macy 
957*eda14cbcSMatt Macy 		size -= len;
958*eda14cbcSMatt Macy 		c_dabd =
959*eda14cbcSMatt Macy 		    abd_advance_abd_iter(dabd, c_dabd, &daiter, len);
960*eda14cbcSMatt Macy 		c_sabd =
961*eda14cbcSMatt Macy 		    abd_advance_abd_iter(sabd, c_sabd, &saiter, len);
962*eda14cbcSMatt Macy 	}
963*eda14cbcSMatt Macy 
964*eda14cbcSMatt Macy 	return (ret);
965*eda14cbcSMatt Macy }
966*eda14cbcSMatt Macy 
967*eda14cbcSMatt Macy /*ARGSUSED*/
968*eda14cbcSMatt Macy static int
969*eda14cbcSMatt Macy abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
970*eda14cbcSMatt Macy {
971*eda14cbcSMatt Macy 	(void) memcpy(dbuf, sbuf, size);
972*eda14cbcSMatt Macy 	return (0);
973*eda14cbcSMatt Macy }
974*eda14cbcSMatt Macy 
975*eda14cbcSMatt Macy /*
976*eda14cbcSMatt Macy  * Copy from sabd to dabd starting from soff and doff.
977*eda14cbcSMatt Macy  */
978*eda14cbcSMatt Macy void
979*eda14cbcSMatt Macy abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size)
980*eda14cbcSMatt Macy {
981*eda14cbcSMatt Macy 	(void) abd_iterate_func2(dabd, sabd, doff, soff, size,
982*eda14cbcSMatt Macy 	    abd_copy_off_cb, NULL);
983*eda14cbcSMatt Macy }
984*eda14cbcSMatt Macy 
985*eda14cbcSMatt Macy /*ARGSUSED*/
986*eda14cbcSMatt Macy static int
987*eda14cbcSMatt Macy abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
988*eda14cbcSMatt Macy {
989*eda14cbcSMatt Macy 	return (memcmp(bufa, bufb, size));
990*eda14cbcSMatt Macy }
991*eda14cbcSMatt Macy 
992*eda14cbcSMatt Macy /*
993*eda14cbcSMatt Macy  * Compares the contents of two ABDs.
994*eda14cbcSMatt Macy  */
995*eda14cbcSMatt Macy int
996*eda14cbcSMatt Macy abd_cmp(abd_t *dabd, abd_t *sabd)
997*eda14cbcSMatt Macy {
998*eda14cbcSMatt Macy 	ASSERT3U(dabd->abd_size, ==, sabd->abd_size);
999*eda14cbcSMatt Macy 	return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size,
1000*eda14cbcSMatt Macy 	    abd_cmp_cb, NULL));
1001*eda14cbcSMatt Macy }
1002*eda14cbcSMatt Macy 
1003*eda14cbcSMatt Macy /*
1004*eda14cbcSMatt Macy  * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
1005*eda14cbcSMatt Macy  *
1006*eda14cbcSMatt Macy  * @cabds          parity ABDs, must have equal size
1007*eda14cbcSMatt Macy  * @dabd           data ABD. Can be NULL (in this case @dsize = 0)
1008*eda14cbcSMatt Macy  * @func_raidz_gen should be implemented so that its behaviour
1009*eda14cbcSMatt Macy  *                 is the same when taking linear and when taking scatter
1010*eda14cbcSMatt Macy  */
1011*eda14cbcSMatt Macy void
1012*eda14cbcSMatt Macy abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
1013*eda14cbcSMatt Macy     ssize_t csize, ssize_t dsize, const unsigned parity,
1014*eda14cbcSMatt Macy     void (*func_raidz_gen)(void **, const void *, size_t, size_t))
1015*eda14cbcSMatt Macy {
1016*eda14cbcSMatt Macy 	int i;
1017*eda14cbcSMatt Macy 	ssize_t len, dlen;
1018*eda14cbcSMatt Macy 	struct abd_iter caiters[3];
1019*eda14cbcSMatt Macy 	struct abd_iter daiter = {0};
1020*eda14cbcSMatt Macy 	void *caddrs[3];
1021*eda14cbcSMatt Macy 	unsigned long flags __maybe_unused = 0;
1022*eda14cbcSMatt Macy 	abd_t *c_cabds[3];
1023*eda14cbcSMatt Macy 	abd_t *c_dabd = NULL;
1024*eda14cbcSMatt Macy 	boolean_t cabds_is_gang_abd[3];
1025*eda14cbcSMatt Macy 	boolean_t dabd_is_gang_abd = B_FALSE;
1026*eda14cbcSMatt Macy 
1027*eda14cbcSMatt Macy 	ASSERT3U(parity, <=, 3);
1028*eda14cbcSMatt Macy 
1029*eda14cbcSMatt Macy 	for (i = 0; i < parity; i++) {
1030*eda14cbcSMatt Macy 		cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
1031*eda14cbcSMatt Macy 		c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
1032*eda14cbcSMatt Macy 	}
1033*eda14cbcSMatt Macy 
1034*eda14cbcSMatt Macy 	if (dabd) {
1035*eda14cbcSMatt Macy 		dabd_is_gang_abd = abd_is_gang(dabd);
1036*eda14cbcSMatt Macy 		c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
1037*eda14cbcSMatt Macy 	}
1038*eda14cbcSMatt Macy 
1039*eda14cbcSMatt Macy 	ASSERT3S(dsize, >=, 0);
1040*eda14cbcSMatt Macy 
1041*eda14cbcSMatt Macy 	abd_enter_critical(flags);
1042*eda14cbcSMatt Macy 	while (csize > 0) {
1043*eda14cbcSMatt Macy 		/* if we are at the end of the gang ABD we are done */
1044*eda14cbcSMatt Macy 		if (dabd_is_gang_abd && !c_dabd)
1045*eda14cbcSMatt Macy 			break;
1046*eda14cbcSMatt Macy 
1047*eda14cbcSMatt Macy 		for (i = 0; i < parity; i++) {
1048*eda14cbcSMatt Macy 			/*
1049*eda14cbcSMatt Macy 			 * If we are at the end of the gang ABD we are
1050*eda14cbcSMatt Macy 			 * done.
1051*eda14cbcSMatt Macy 			 */
1052*eda14cbcSMatt Macy 			if (cabds_is_gang_abd[i] && !c_cabds[i])
1053*eda14cbcSMatt Macy 				break;
1054*eda14cbcSMatt Macy 			abd_iter_map(&caiters[i]);
1055*eda14cbcSMatt Macy 			caddrs[i] = caiters[i].iter_mapaddr;
1056*eda14cbcSMatt Macy 		}
1057*eda14cbcSMatt Macy 
1058*eda14cbcSMatt Macy 		len = csize;
1059*eda14cbcSMatt Macy 
1060*eda14cbcSMatt Macy 		if (dabd && dsize > 0)
1061*eda14cbcSMatt Macy 			abd_iter_map(&daiter);
1062*eda14cbcSMatt Macy 
1063*eda14cbcSMatt Macy 		switch (parity) {
1064*eda14cbcSMatt Macy 			case 3:
1065*eda14cbcSMatt Macy 				len = MIN(caiters[2].iter_mapsize, len);
1066*eda14cbcSMatt Macy 				/* falls through */
1067*eda14cbcSMatt Macy 			case 2:
1068*eda14cbcSMatt Macy 				len = MIN(caiters[1].iter_mapsize, len);
1069*eda14cbcSMatt Macy 				/* falls through */
1070*eda14cbcSMatt Macy 			case 1:
1071*eda14cbcSMatt Macy 				len = MIN(caiters[0].iter_mapsize, len);
1072*eda14cbcSMatt Macy 		}
1073*eda14cbcSMatt Macy 
1074*eda14cbcSMatt Macy 		/* must be progressive */
1075*eda14cbcSMatt Macy 		ASSERT3S(len, >, 0);
1076*eda14cbcSMatt Macy 
1077*eda14cbcSMatt Macy 		if (dabd && dsize > 0) {
1078*eda14cbcSMatt Macy 			/* this needs precise iter.length */
1079*eda14cbcSMatt Macy 			len = MIN(daiter.iter_mapsize, len);
1080*eda14cbcSMatt Macy 			dlen = len;
1081*eda14cbcSMatt Macy 		} else
1082*eda14cbcSMatt Macy 			dlen = 0;
1083*eda14cbcSMatt Macy 
1084*eda14cbcSMatt Macy 		/* must be progressive */
1085*eda14cbcSMatt Macy 		ASSERT3S(len, >, 0);
1086*eda14cbcSMatt Macy 		/*
1087*eda14cbcSMatt Macy 		 * The iterated function likely will not do well if each
1088*eda14cbcSMatt Macy 		 * segment except the last one is not multiple of 512 (raidz).
1089*eda14cbcSMatt Macy 		 */
1090*eda14cbcSMatt Macy 		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
1091*eda14cbcSMatt Macy 
1092*eda14cbcSMatt Macy 		func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
1093*eda14cbcSMatt Macy 
1094*eda14cbcSMatt Macy 		for (i = parity-1; i >= 0; i--) {
1095*eda14cbcSMatt Macy 			abd_iter_unmap(&caiters[i]);
1096*eda14cbcSMatt Macy 			c_cabds[i] =
1097*eda14cbcSMatt Macy 			    abd_advance_abd_iter(cabds[i], c_cabds[i],
1098*eda14cbcSMatt Macy 			    &caiters[i], len);
1099*eda14cbcSMatt Macy 		}
1100*eda14cbcSMatt Macy 
1101*eda14cbcSMatt Macy 		if (dabd && dsize > 0) {
1102*eda14cbcSMatt Macy 			abd_iter_unmap(&daiter);
1103*eda14cbcSMatt Macy 			c_dabd =
1104*eda14cbcSMatt Macy 			    abd_advance_abd_iter(dabd, c_dabd, &daiter,
1105*eda14cbcSMatt Macy 			    dlen);
1106*eda14cbcSMatt Macy 			dsize -= dlen;
1107*eda14cbcSMatt Macy 		}
1108*eda14cbcSMatt Macy 
1109*eda14cbcSMatt Macy 		csize -= len;
1110*eda14cbcSMatt Macy 
1111*eda14cbcSMatt Macy 		ASSERT3S(dsize, >=, 0);
1112*eda14cbcSMatt Macy 		ASSERT3S(csize, >=, 0);
1113*eda14cbcSMatt Macy 	}
1114*eda14cbcSMatt Macy 	abd_exit_critical(flags);
1115*eda14cbcSMatt Macy }
1116*eda14cbcSMatt Macy 
1117*eda14cbcSMatt Macy /*
1118*eda14cbcSMatt Macy  * Iterate over code ABDs and data reconstruction target ABDs and call
1119*eda14cbcSMatt Macy  * @func_raidz_rec. Function maps at most 6 pages atomically.
1120*eda14cbcSMatt Macy  *
1121*eda14cbcSMatt Macy  * @cabds           parity ABDs, must have equal size
1122*eda14cbcSMatt Macy  * @tabds           rec target ABDs, at most 3
1123*eda14cbcSMatt Macy  * @tsize           size of data target columns
1124*eda14cbcSMatt Macy  * @func_raidz_rec  expects syndrome data in target columns. Function
1125*eda14cbcSMatt Macy  *                  reconstructs data and overwrites target columns.
1126*eda14cbcSMatt Macy  */
1127*eda14cbcSMatt Macy void
1128*eda14cbcSMatt Macy abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
1129*eda14cbcSMatt Macy     ssize_t tsize, const unsigned parity,
1130*eda14cbcSMatt Macy     void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
1131*eda14cbcSMatt Macy     const unsigned *mul),
1132*eda14cbcSMatt Macy     const unsigned *mul)
1133*eda14cbcSMatt Macy {
1134*eda14cbcSMatt Macy 	int i;
1135*eda14cbcSMatt Macy 	ssize_t len;
1136*eda14cbcSMatt Macy 	struct abd_iter citers[3];
1137*eda14cbcSMatt Macy 	struct abd_iter xiters[3];
1138*eda14cbcSMatt Macy 	void *caddrs[3], *xaddrs[3];
1139*eda14cbcSMatt Macy 	unsigned long flags __maybe_unused = 0;
1140*eda14cbcSMatt Macy 	boolean_t cabds_is_gang_abd[3];
1141*eda14cbcSMatt Macy 	boolean_t tabds_is_gang_abd[3];
1142*eda14cbcSMatt Macy 	abd_t *c_cabds[3];
1143*eda14cbcSMatt Macy 	abd_t *c_tabds[3];
1144*eda14cbcSMatt Macy 
1145*eda14cbcSMatt Macy 	ASSERT3U(parity, <=, 3);
1146*eda14cbcSMatt Macy 
1147*eda14cbcSMatt Macy 	for (i = 0; i < parity; i++) {
1148*eda14cbcSMatt Macy 		cabds_is_gang_abd[i] = abd_is_gang(cabds[i]);
1149*eda14cbcSMatt Macy 		tabds_is_gang_abd[i] = abd_is_gang(tabds[i]);
1150*eda14cbcSMatt Macy 		c_cabds[i] =
1151*eda14cbcSMatt Macy 		    abd_init_abd_iter(cabds[i], &citers[i], 0);
1152*eda14cbcSMatt Macy 		c_tabds[i] =
1153*eda14cbcSMatt Macy 		    abd_init_abd_iter(tabds[i], &xiters[i], 0);
1154*eda14cbcSMatt Macy 	}
1155*eda14cbcSMatt Macy 
1156*eda14cbcSMatt Macy 	abd_enter_critical(flags);
1157*eda14cbcSMatt Macy 	while (tsize > 0) {
1158*eda14cbcSMatt Macy 
1159*eda14cbcSMatt Macy 		for (i = 0; i < parity; i++) {
1160*eda14cbcSMatt Macy 			/*
1161*eda14cbcSMatt Macy 			 * If we are at the end of the gang ABD we
1162*eda14cbcSMatt Macy 			 * are done.
1163*eda14cbcSMatt Macy 			 */
1164*eda14cbcSMatt Macy 			if (cabds_is_gang_abd[i] && !c_cabds[i])
1165*eda14cbcSMatt Macy 				break;
1166*eda14cbcSMatt Macy 			if (tabds_is_gang_abd[i] && !c_tabds[i])
1167*eda14cbcSMatt Macy 				break;
1168*eda14cbcSMatt Macy 			abd_iter_map(&citers[i]);
1169*eda14cbcSMatt Macy 			abd_iter_map(&xiters[i]);
1170*eda14cbcSMatt Macy 			caddrs[i] = citers[i].iter_mapaddr;
1171*eda14cbcSMatt Macy 			xaddrs[i] = xiters[i].iter_mapaddr;
1172*eda14cbcSMatt Macy 		}
1173*eda14cbcSMatt Macy 
1174*eda14cbcSMatt Macy 		len = tsize;
1175*eda14cbcSMatt Macy 		switch (parity) {
1176*eda14cbcSMatt Macy 			case 3:
1177*eda14cbcSMatt Macy 				len = MIN(xiters[2].iter_mapsize, len);
1178*eda14cbcSMatt Macy 				len = MIN(citers[2].iter_mapsize, len);
1179*eda14cbcSMatt Macy 				/* falls through */
1180*eda14cbcSMatt Macy 			case 2:
1181*eda14cbcSMatt Macy 				len = MIN(xiters[1].iter_mapsize, len);
1182*eda14cbcSMatt Macy 				len = MIN(citers[1].iter_mapsize, len);
1183*eda14cbcSMatt Macy 				/* falls through */
1184*eda14cbcSMatt Macy 			case 1:
1185*eda14cbcSMatt Macy 				len = MIN(xiters[0].iter_mapsize, len);
1186*eda14cbcSMatt Macy 				len = MIN(citers[0].iter_mapsize, len);
1187*eda14cbcSMatt Macy 		}
1188*eda14cbcSMatt Macy 		/* must be progressive */
1189*eda14cbcSMatt Macy 		ASSERT3S(len, >, 0);
1190*eda14cbcSMatt Macy 		/*
1191*eda14cbcSMatt Macy 		 * The iterated function likely will not do well if each
1192*eda14cbcSMatt Macy 		 * segment except the last one is not multiple of 512 (raidz).
1193*eda14cbcSMatt Macy 		 */
1194*eda14cbcSMatt Macy 		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
1195*eda14cbcSMatt Macy 
1196*eda14cbcSMatt Macy 		func_raidz_rec(xaddrs, len, caddrs, mul);
1197*eda14cbcSMatt Macy 
1198*eda14cbcSMatt Macy 		for (i = parity-1; i >= 0; i--) {
1199*eda14cbcSMatt Macy 			abd_iter_unmap(&xiters[i]);
1200*eda14cbcSMatt Macy 			abd_iter_unmap(&citers[i]);
1201*eda14cbcSMatt Macy 			c_tabds[i] =
1202*eda14cbcSMatt Macy 			    abd_advance_abd_iter(tabds[i], c_tabds[i],
1203*eda14cbcSMatt Macy 			    &xiters[i], len);
1204*eda14cbcSMatt Macy 			c_cabds[i] =
1205*eda14cbcSMatt Macy 			    abd_advance_abd_iter(cabds[i], c_cabds[i],
1206*eda14cbcSMatt Macy 			    &citers[i], len);
1207*eda14cbcSMatt Macy 		}
1208*eda14cbcSMatt Macy 
1209*eda14cbcSMatt Macy 		tsize -= len;
1210*eda14cbcSMatt Macy 		ASSERT3S(tsize, >=, 0);
1211*eda14cbcSMatt Macy 	}
1212*eda14cbcSMatt Macy 	abd_exit_critical(flags);
1213*eda14cbcSMatt Macy }
1214