1*61145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy * CDDL HEADER START
4eda14cbcSMatt Macy *
5eda14cbcSMatt Macy * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy *
9eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy * See the License for the specific language governing permissions
12eda14cbcSMatt Macy * and limitations under the License.
13eda14cbcSMatt Macy *
14eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy *
20eda14cbcSMatt Macy * CDDL HEADER END
21eda14cbcSMatt Macy */
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
24eda14cbcSMatt Macy * Copyright (c) 2019 by Delphix. All rights reserved.
25eda14cbcSMatt Macy */
26eda14cbcSMatt Macy
27eda14cbcSMatt Macy /*
28eda14cbcSMatt Macy * ARC buffer data (ABD).
29eda14cbcSMatt Macy *
30eda14cbcSMatt Macy * ABDs are an abstract data structure for the ARC which can use two
31eda14cbcSMatt Macy * different ways of storing the underlying data:
32eda14cbcSMatt Macy *
33eda14cbcSMatt Macy * (a) Linear buffer. In this case, all the data in the ABD is stored in one
34eda14cbcSMatt Macy * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
35eda14cbcSMatt Macy *
36eda14cbcSMatt Macy * +-------------------+
37eda14cbcSMatt Macy * | ABD (linear) |
38eda14cbcSMatt Macy * | abd_flags = ... |
39eda14cbcSMatt Macy * | abd_size = ... | +--------------------------------+
40eda14cbcSMatt Macy * | abd_buf ------------->| raw buffer of size abd_size |
41eda14cbcSMatt Macy * +-------------------+ +--------------------------------+
42eda14cbcSMatt Macy * no abd_chunks
43eda14cbcSMatt Macy *
44eda14cbcSMatt Macy * (b) Scattered buffer. In this case, the data in the ABD is split into
45eda14cbcSMatt Macy * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
46eda14cbcSMatt Macy * to the chunks recorded in an array at the end of the ABD structure.
47eda14cbcSMatt Macy *
48eda14cbcSMatt Macy * +-------------------+
49eda14cbcSMatt Macy * | ABD (scattered) |
50eda14cbcSMatt Macy * | abd_flags = ... |
51eda14cbcSMatt Macy * | abd_size = ... |
52eda14cbcSMatt Macy * | abd_offset = 0 | +-----------+
53eda14cbcSMatt Macy * | abd_chunks[0] ----------------------------->| chunk 0 |
54eda14cbcSMatt Macy * | abd_chunks[1] ---------------------+ +-----------+
55eda14cbcSMatt Macy * | ... | | +-----------+
56eda14cbcSMatt Macy * | abd_chunks[N-1] ---------+ +------->| chunk 1 |
57eda14cbcSMatt Macy * +-------------------+ | +-----------+
58eda14cbcSMatt Macy * | ...
59eda14cbcSMatt Macy * | +-----------+
60eda14cbcSMatt Macy * +----------------->| chunk N-1 |
61eda14cbcSMatt Macy * +-----------+
62eda14cbcSMatt Macy *
63eda14cbcSMatt Macy * In addition to directly allocating a linear or scattered ABD, it is also
64eda14cbcSMatt Macy * possible to create an ABD by requesting the "sub-ABD" starting at an offset
65eda14cbcSMatt Macy * within an existing ABD. In linear buffers this is simple (set abd_buf of
66eda14cbcSMatt Macy * the new ABD to the starting point within the original raw buffer), but
67eda14cbcSMatt Macy * scattered ABDs are a little more complex. The new ABD makes a copy of the
68eda14cbcSMatt Macy * relevant abd_chunks pointers (but not the underlying data). However, to
69eda14cbcSMatt Macy * provide arbitrary rather than only chunk-aligned starting offsets, it also
70eda14cbcSMatt Macy * tracks an abd_offset field which represents the starting point of the data
71eda14cbcSMatt Macy * within the first chunk in abd_chunks. For both linear and scattered ABDs,
72eda14cbcSMatt Macy * creating an offset ABD marks the original ABD as the offset's parent, and the
73eda14cbcSMatt Macy * original ABD's abd_children refcount is incremented. This data allows us to
74eda14cbcSMatt Macy * ensure the root ABD isn't deleted before its children.
75eda14cbcSMatt Macy *
76eda14cbcSMatt Macy * Most consumers should never need to know what type of ABD they're using --
77eda14cbcSMatt Macy * the ABD public API ensures that it's possible to transparently switch from
78eda14cbcSMatt Macy * using a linear ABD to a scattered one when doing so would be beneficial.
79eda14cbcSMatt Macy *
80eda14cbcSMatt Macy * If you need to use the data within an ABD directly, if you know it's linear
81eda14cbcSMatt Macy * (because you allocated it) you can use abd_to_buf() to access the underlying
82eda14cbcSMatt Macy * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
83eda14cbcSMatt Macy * which will allocate a raw buffer if necessary. Use the abd_return_buf*
84eda14cbcSMatt Macy * functions to return any raw buffers that are no longer necessary when you're
85eda14cbcSMatt Macy * done using them.
86eda14cbcSMatt Macy *
87eda14cbcSMatt Macy * There are a variety of ABD APIs that implement basic buffer operations:
88eda14cbcSMatt Macy * compare, copy, read, write, and fill with zeroes. If you need a custom
89eda14cbcSMatt Macy * function which progressively accesses the whole ABD, use the abd_iterate_*
90eda14cbcSMatt Macy * functions.
91eda14cbcSMatt Macy *
92eda14cbcSMatt Macy * As an additional feature, linear and scatter ABD's can be stitched together
937a7741afSMartin Matuska * by using the gang ABD type (abd_alloc_gang()). This allows for multiple ABDs
947a7741afSMartin Matuska * to be viewed as a singular ABD.
95eda14cbcSMatt Macy *
96eda14cbcSMatt Macy * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
97eda14cbcSMatt Macy * B_FALSE.
98eda14cbcSMatt Macy */
99eda14cbcSMatt Macy
100eda14cbcSMatt Macy #include <sys/abd_impl.h>
101eda14cbcSMatt Macy #include <sys/param.h>
102eda14cbcSMatt Macy #include <sys/zio.h>
103eda14cbcSMatt Macy #include <sys/zfs_context.h>
104eda14cbcSMatt Macy #include <sys/zfs_znode.h>
105eda14cbcSMatt Macy
106eda14cbcSMatt Macy /* see block comment above for description */
107eda14cbcSMatt Macy int zfs_abd_scatter_enabled = B_TRUE;
108eda14cbcSMatt Macy
109eda14cbcSMatt Macy void
abd_verify(abd_t * abd)110eda14cbcSMatt Macy abd_verify(abd_t *abd)
111eda14cbcSMatt Macy {
11233b8c039SMartin Matuska #ifdef ZFS_DEBUG
1137a7741afSMartin Matuska if (abd_is_from_pages(abd)) {
1147a7741afSMartin Matuska ASSERT3U(abd->abd_size, <=, DMU_MAX_ACCESS);
1157a7741afSMartin Matuska } else {
116eda14cbcSMatt Macy ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
1177a7741afSMartin Matuska }
118eda14cbcSMatt Macy ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
119eda14cbcSMatt Macy ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
120eda14cbcSMatt Macy ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
1217a7741afSMartin Matuska ABD_FLAG_GANG_FREE | ABD_FLAG_ALLOCD | ABD_FLAG_FROM_PAGES));
122eda14cbcSMatt Macy IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
123eda14cbcSMatt Macy IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
124eda14cbcSMatt Macy if (abd_is_linear(abd)) {
125d09a955aSMateusz Guzik ASSERT3U(abd->abd_size, >, 0);
126eda14cbcSMatt Macy ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
127eda14cbcSMatt Macy } else if (abd_is_gang(abd)) {
128eda14cbcSMatt Macy uint_t child_sizes = 0;
129eda14cbcSMatt Macy for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
130eda14cbcSMatt Macy cabd != NULL;
131eda14cbcSMatt Macy cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
132eda14cbcSMatt Macy ASSERT(list_link_active(&cabd->abd_gang_link));
133eda14cbcSMatt Macy child_sizes += cabd->abd_size;
134eda14cbcSMatt Macy abd_verify(cabd);
135eda14cbcSMatt Macy }
136eda14cbcSMatt Macy ASSERT3U(abd->abd_size, ==, child_sizes);
137eda14cbcSMatt Macy } else {
138d09a955aSMateusz Guzik ASSERT3U(abd->abd_size, >, 0);
139eda14cbcSMatt Macy abd_verify_scatter(abd);
140eda14cbcSMatt Macy }
14133b8c039SMartin Matuska #endif
142eda14cbcSMatt Macy }
143eda14cbcSMatt Macy
1447a7741afSMartin Matuska void
abd_init_struct(abd_t * abd)145184c1b94SMartin Matuska abd_init_struct(abd_t *abd)
146eda14cbcSMatt Macy {
147184c1b94SMartin Matuska list_link_init(&abd->abd_gang_link);
148184c1b94SMartin Matuska mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
149184c1b94SMartin Matuska abd->abd_flags = 0;
150184c1b94SMartin Matuska #ifdef ZFS_DEBUG
151184c1b94SMartin Matuska zfs_refcount_create(&abd->abd_children);
152184c1b94SMartin Matuska abd->abd_parent = NULL;
153184c1b94SMartin Matuska #endif
154184c1b94SMartin Matuska abd->abd_size = 0;
155184c1b94SMartin Matuska }
156184c1b94SMartin Matuska
157184c1b94SMartin Matuska static void
abd_fini_struct(abd_t * abd)158184c1b94SMartin Matuska abd_fini_struct(abd_t *abd)
159184c1b94SMartin Matuska {
160184c1b94SMartin Matuska mutex_destroy(&abd->abd_mtx);
161184c1b94SMartin Matuska ASSERT(!list_link_active(&abd->abd_gang_link));
162184c1b94SMartin Matuska #ifdef ZFS_DEBUG
163184c1b94SMartin Matuska zfs_refcount_destroy(&abd->abd_children);
164184c1b94SMartin Matuska #endif
165184c1b94SMartin Matuska }
166184c1b94SMartin Matuska
167184c1b94SMartin Matuska abd_t *
abd_alloc_struct(size_t size)168184c1b94SMartin Matuska abd_alloc_struct(size_t size)
169184c1b94SMartin Matuska {
170184c1b94SMartin Matuska abd_t *abd = abd_alloc_struct_impl(size);
171184c1b94SMartin Matuska abd_init_struct(abd);
172184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_ALLOCD;
173184c1b94SMartin Matuska return (abd);
174184c1b94SMartin Matuska }
175184c1b94SMartin Matuska
176184c1b94SMartin Matuska void
abd_free_struct(abd_t * abd)177184c1b94SMartin Matuska abd_free_struct(abd_t *abd)
178184c1b94SMartin Matuska {
179184c1b94SMartin Matuska abd_fini_struct(abd);
180184c1b94SMartin Matuska abd_free_struct_impl(abd);
181eda14cbcSMatt Macy }
182eda14cbcSMatt Macy
183eda14cbcSMatt Macy /*
184eda14cbcSMatt Macy * Allocate an ABD, along with its own underlying data buffers. Use this if you
185eda14cbcSMatt Macy * don't care whether the ABD is linear or not.
186eda14cbcSMatt Macy */
187eda14cbcSMatt Macy abd_t *
abd_alloc(size_t size,boolean_t is_metadata)188eda14cbcSMatt Macy abd_alloc(size_t size, boolean_t is_metadata)
189eda14cbcSMatt Macy {
1901f88aa09SMartin Matuska if (abd_size_alloc_linear(size))
191eda14cbcSMatt Macy return (abd_alloc_linear(size, is_metadata));
192eda14cbcSMatt Macy
193eda14cbcSMatt Macy VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
194eda14cbcSMatt Macy
195eda14cbcSMatt Macy abd_t *abd = abd_alloc_struct(size);
196184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_OWNER;
197eda14cbcSMatt Macy abd->abd_u.abd_scatter.abd_offset = 0;
198eda14cbcSMatt Macy abd_alloc_chunks(abd, size);
199eda14cbcSMatt Macy
200eda14cbcSMatt Macy if (is_metadata) {
201eda14cbcSMatt Macy abd->abd_flags |= ABD_FLAG_META;
202eda14cbcSMatt Macy }
203eda14cbcSMatt Macy abd->abd_size = size;
204eda14cbcSMatt Macy
205eda14cbcSMatt Macy abd_update_scatter_stats(abd, ABDSTAT_INCR);
206eda14cbcSMatt Macy
207eda14cbcSMatt Macy return (abd);
208eda14cbcSMatt Macy }
209eda14cbcSMatt Macy
210eda14cbcSMatt Macy /*
211eda14cbcSMatt Macy * Allocate an ABD that must be linear, along with its own underlying data
212eda14cbcSMatt Macy * buffer. Only use this when it would be very annoying to write your ABD
213eda14cbcSMatt Macy * consumer with a scattered ABD.
214eda14cbcSMatt Macy */
215eda14cbcSMatt Macy abd_t *
abd_alloc_linear(size_t size,boolean_t is_metadata)216eda14cbcSMatt Macy abd_alloc_linear(size_t size, boolean_t is_metadata)
217eda14cbcSMatt Macy {
218eda14cbcSMatt Macy abd_t *abd = abd_alloc_struct(0);
219eda14cbcSMatt Macy
220eda14cbcSMatt Macy VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
221eda14cbcSMatt Macy
222184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
223eda14cbcSMatt Macy if (is_metadata) {
224eda14cbcSMatt Macy abd->abd_flags |= ABD_FLAG_META;
225eda14cbcSMatt Macy }
226eda14cbcSMatt Macy abd->abd_size = size;
227eda14cbcSMatt Macy
228eda14cbcSMatt Macy if (is_metadata) {
229eda14cbcSMatt Macy ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
230eda14cbcSMatt Macy } else {
231eda14cbcSMatt Macy ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size);
232eda14cbcSMatt Macy }
233eda14cbcSMatt Macy
234eda14cbcSMatt Macy abd_update_linear_stats(abd, ABDSTAT_INCR);
235eda14cbcSMatt Macy
236eda14cbcSMatt Macy return (abd);
237eda14cbcSMatt Macy }
238eda14cbcSMatt Macy
239eda14cbcSMatt Macy static void
abd_free_linear(abd_t * abd)240eda14cbcSMatt Macy abd_free_linear(abd_t *abd)
241eda14cbcSMatt Macy {
242eda14cbcSMatt Macy if (abd_is_linear_page(abd)) {
243eda14cbcSMatt Macy abd_free_linear_page(abd);
244eda14cbcSMatt Macy return;
245eda14cbcSMatt Macy }
2467a7741afSMartin Matuska
247eda14cbcSMatt Macy if (abd->abd_flags & ABD_FLAG_META) {
248eda14cbcSMatt Macy zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
249eda14cbcSMatt Macy } else {
250eda14cbcSMatt Macy zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
251eda14cbcSMatt Macy }
252eda14cbcSMatt Macy
253eda14cbcSMatt Macy abd_update_linear_stats(abd, ABDSTAT_DECR);
254eda14cbcSMatt Macy }
255eda14cbcSMatt Macy
256eda14cbcSMatt Macy static void
abd_free_gang(abd_t * abd)257184c1b94SMartin Matuska abd_free_gang(abd_t *abd)
258eda14cbcSMatt Macy {
259eda14cbcSMatt Macy ASSERT(abd_is_gang(abd));
260184c1b94SMartin Matuska abd_t *cabd;
261eda14cbcSMatt Macy
262184c1b94SMartin Matuska while ((cabd = list_head(&ABD_GANG(abd).abd_gang_chain)) != NULL) {
263eda14cbcSMatt Macy /*
264eda14cbcSMatt Macy * We must acquire the child ABDs mutex to ensure that if it
265eda14cbcSMatt Macy * is being added to another gang ABD we will set the link
266eda14cbcSMatt Macy * as inactive when removing it from this gang ABD and before
267eda14cbcSMatt Macy * adding it to the other gang ABD.
268eda14cbcSMatt Macy */
269eda14cbcSMatt Macy mutex_enter(&cabd->abd_mtx);
270eda14cbcSMatt Macy ASSERT(list_link_active(&cabd->abd_gang_link));
271eda14cbcSMatt Macy list_remove(&ABD_GANG(abd).abd_gang_chain, cabd);
272eda14cbcSMatt Macy mutex_exit(&cabd->abd_mtx);
273184c1b94SMartin Matuska if (cabd->abd_flags & ABD_FLAG_GANG_FREE)
274eda14cbcSMatt Macy abd_free(cabd);
275eda14cbcSMatt Macy }
276eda14cbcSMatt Macy list_destroy(&ABD_GANG(abd).abd_gang_chain);
277184c1b94SMartin Matuska }
278184c1b94SMartin Matuska
279184c1b94SMartin Matuska static void
abd_free_scatter(abd_t * abd)280184c1b94SMartin Matuska abd_free_scatter(abd_t *abd)
281184c1b94SMartin Matuska {
282184c1b94SMartin Matuska abd_free_chunks(abd);
283184c1b94SMartin Matuska abd_update_scatter_stats(abd, ABDSTAT_DECR);
284eda14cbcSMatt Macy }
285eda14cbcSMatt Macy
286eda14cbcSMatt Macy /*
287184c1b94SMartin Matuska * Free an ABD. Use with any kind of abd: those created with abd_alloc_*()
288184c1b94SMartin Matuska * and abd_get_*(), including abd_get_offset_struct().
289184c1b94SMartin Matuska *
290184c1b94SMartin Matuska * If the ABD was created with abd_alloc_*(), the underlying data
291184c1b94SMartin Matuska * (scatterlist or linear buffer) will also be freed. (Subject to ownership
292184c1b94SMartin Matuska * changes via abd_*_ownership_of_buf().)
293184c1b94SMartin Matuska *
294184c1b94SMartin Matuska * Unless the ABD was created with abd_get_offset_struct(), the abd_t will
295184c1b94SMartin Matuska * also be freed.
296eda14cbcSMatt Macy */
297eda14cbcSMatt Macy void
abd_free(abd_t * abd)298eda14cbcSMatt Macy abd_free(abd_t *abd)
299eda14cbcSMatt Macy {
300eda14cbcSMatt Macy if (abd == NULL)
301eda14cbcSMatt Macy return;
302eda14cbcSMatt Macy
303eda14cbcSMatt Macy abd_verify(abd);
304184c1b94SMartin Matuska #ifdef ZFS_DEBUG
305184c1b94SMartin Matuska IMPLY(abd->abd_flags & ABD_FLAG_OWNER, abd->abd_parent == NULL);
306184c1b94SMartin Matuska #endif
307184c1b94SMartin Matuska
308184c1b94SMartin Matuska if (abd_is_gang(abd)) {
309184c1b94SMartin Matuska abd_free_gang(abd);
310184c1b94SMartin Matuska } else if (abd_is_linear(abd)) {
311184c1b94SMartin Matuska if (abd->abd_flags & ABD_FLAG_OWNER)
312eda14cbcSMatt Macy abd_free_linear(abd);
313184c1b94SMartin Matuska } else {
314184c1b94SMartin Matuska if (abd->abd_flags & ABD_FLAG_OWNER)
315eda14cbcSMatt Macy abd_free_scatter(abd);
316eda14cbcSMatt Macy }
317eda14cbcSMatt Macy
318184c1b94SMartin Matuska #ifdef ZFS_DEBUG
319184c1b94SMartin Matuska if (abd->abd_parent != NULL) {
320184c1b94SMartin Matuska (void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
321184c1b94SMartin Matuska abd->abd_size, abd);
322184c1b94SMartin Matuska }
323184c1b94SMartin Matuska #endif
324184c1b94SMartin Matuska
325184c1b94SMartin Matuska abd_fini_struct(abd);
326184c1b94SMartin Matuska if (abd->abd_flags & ABD_FLAG_ALLOCD)
327184c1b94SMartin Matuska abd_free_struct_impl(abd);
328184c1b94SMartin Matuska }
329184c1b94SMartin Matuska
330eda14cbcSMatt Macy /*
331eda14cbcSMatt Macy * Allocate an ABD of the same format (same metadata flag, same scatterize
332eda14cbcSMatt Macy * setting) as another ABD.
333eda14cbcSMatt Macy */
334eda14cbcSMatt Macy abd_t *
abd_alloc_sametype(abd_t * sabd,size_t size)335eda14cbcSMatt Macy abd_alloc_sametype(abd_t *sabd, size_t size)
336eda14cbcSMatt Macy {
337eda14cbcSMatt Macy boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
338eda14cbcSMatt Macy if (abd_is_linear(sabd) &&
339eda14cbcSMatt Macy !abd_is_linear_page(sabd)) {
340eda14cbcSMatt Macy return (abd_alloc_linear(size, is_metadata));
341eda14cbcSMatt Macy } else {
342eda14cbcSMatt Macy return (abd_alloc(size, is_metadata));
343eda14cbcSMatt Macy }
344eda14cbcSMatt Macy }
345eda14cbcSMatt Macy
346eda14cbcSMatt Macy /*
347eda14cbcSMatt Macy * Create gang ABD that will be the head of a list of ABD's. This is used
348eda14cbcSMatt Macy * to "chain" scatter/gather lists together when constructing aggregated
349eda14cbcSMatt Macy * IO's. To free this abd, abd_free() must be called.
350eda14cbcSMatt Macy */
351eda14cbcSMatt Macy abd_t *
abd_alloc_gang(void)352184c1b94SMartin Matuska abd_alloc_gang(void)
353eda14cbcSMatt Macy {
354184c1b94SMartin Matuska abd_t *abd = abd_alloc_struct(0);
355184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_GANG | ABD_FLAG_OWNER;
356eda14cbcSMatt Macy list_create(&ABD_GANG(abd).abd_gang_chain,
357eda14cbcSMatt Macy sizeof (abd_t), offsetof(abd_t, abd_gang_link));
358eda14cbcSMatt Macy return (abd);
359eda14cbcSMatt Macy }
360eda14cbcSMatt Macy
361eda14cbcSMatt Macy /*
362eda14cbcSMatt Macy * Add a child gang ABD to a parent gang ABDs chained list.
363eda14cbcSMatt Macy */
364eda14cbcSMatt Macy static void
abd_gang_add_gang(abd_t * pabd,abd_t * cabd,boolean_t free_on_free)365eda14cbcSMatt Macy abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
366eda14cbcSMatt Macy {
367eda14cbcSMatt Macy ASSERT(abd_is_gang(pabd));
368eda14cbcSMatt Macy ASSERT(abd_is_gang(cabd));
369eda14cbcSMatt Macy
370eda14cbcSMatt Macy if (free_on_free) {
371eda14cbcSMatt Macy /*
372eda14cbcSMatt Macy * If the parent is responsible for freeing the child gang
373184c1b94SMartin Matuska * ABD we will just splice the child's children ABD list to
374184c1b94SMartin Matuska * the parent's list and immediately free the child gang ABD
375eda14cbcSMatt Macy * struct. The parent gang ABDs children from the child gang
376eda14cbcSMatt Macy * will retain all the free_on_free settings after being
377eda14cbcSMatt Macy * added to the parents list.
378eda14cbcSMatt Macy */
379e639e0d2SMartin Matuska #ifdef ZFS_DEBUG
380e639e0d2SMartin Matuska /*
381e639e0d2SMartin Matuska * If cabd had abd_parent, we have to drop it here. We can't
382e639e0d2SMartin Matuska * transfer it to pabd, nor we can clear abd_size leaving it.
383e639e0d2SMartin Matuska */
384e639e0d2SMartin Matuska if (cabd->abd_parent != NULL) {
385e639e0d2SMartin Matuska (void) zfs_refcount_remove_many(
386e639e0d2SMartin Matuska &cabd->abd_parent->abd_children,
387e639e0d2SMartin Matuska cabd->abd_size, cabd);
388e639e0d2SMartin Matuska cabd->abd_parent = NULL;
389e639e0d2SMartin Matuska }
390e639e0d2SMartin Matuska #endif
391eda14cbcSMatt Macy pabd->abd_size += cabd->abd_size;
392e639e0d2SMartin Matuska cabd->abd_size = 0;
393eda14cbcSMatt Macy list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
394eda14cbcSMatt Macy &ABD_GANG(cabd).abd_gang_chain);
395eda14cbcSMatt Macy ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
396eda14cbcSMatt Macy abd_verify(pabd);
397184c1b94SMartin Matuska abd_free(cabd);
398eda14cbcSMatt Macy } else {
399eda14cbcSMatt Macy for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain);
400eda14cbcSMatt Macy child != NULL;
401eda14cbcSMatt Macy child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) {
402eda14cbcSMatt Macy /*
403eda14cbcSMatt Macy * We always pass B_FALSE for free_on_free as it is the
40416038816SMartin Matuska * original child gang ABDs responsibility to determine
405eda14cbcSMatt Macy * if any of its child ABDs should be free'd on the call
406eda14cbcSMatt Macy * to abd_free().
407eda14cbcSMatt Macy */
408eda14cbcSMatt Macy abd_gang_add(pabd, child, B_FALSE);
409eda14cbcSMatt Macy }
410eda14cbcSMatt Macy abd_verify(pabd);
411eda14cbcSMatt Macy }
412eda14cbcSMatt Macy }
413eda14cbcSMatt Macy
414eda14cbcSMatt Macy /*
415eda14cbcSMatt Macy * Add a child ABD to a gang ABD's chained list.
416eda14cbcSMatt Macy */
417eda14cbcSMatt Macy void
abd_gang_add(abd_t * pabd,abd_t * cabd,boolean_t free_on_free)418eda14cbcSMatt Macy abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
419eda14cbcSMatt Macy {
420eda14cbcSMatt Macy ASSERT(abd_is_gang(pabd));
421eda14cbcSMatt Macy abd_t *child_abd = NULL;
422eda14cbcSMatt Macy
423eda14cbcSMatt Macy /*
424eda14cbcSMatt Macy * If the child being added is a gang ABD, we will add the
425184c1b94SMartin Matuska * child's ABDs to the parent gang ABD. This allows us to account
426eda14cbcSMatt Macy * for the offset correctly in the parent gang ABD.
427eda14cbcSMatt Macy */
428eda14cbcSMatt Macy if (abd_is_gang(cabd)) {
429eda14cbcSMatt Macy ASSERT(!list_link_active(&cabd->abd_gang_link));
430eda14cbcSMatt Macy return (abd_gang_add_gang(pabd, cabd, free_on_free));
431eda14cbcSMatt Macy }
432eda14cbcSMatt Macy ASSERT(!abd_is_gang(cabd));
433eda14cbcSMatt Macy
434eda14cbcSMatt Macy /*
435eda14cbcSMatt Macy * In order to verify that an ABD is not already part of
436eda14cbcSMatt Macy * another gang ABD, we must lock the child ABD's abd_mtx
437eda14cbcSMatt Macy * to check its abd_gang_link status. We unlock the abd_mtx
438eda14cbcSMatt Macy * only after it is has been added to a gang ABD, which
439eda14cbcSMatt Macy * will update the abd_gang_link's status. See comment below
440eda14cbcSMatt Macy * for how an ABD can be in multiple gang ABD's simultaneously.
441eda14cbcSMatt Macy */
442eda14cbcSMatt Macy mutex_enter(&cabd->abd_mtx);
443eda14cbcSMatt Macy if (list_link_active(&cabd->abd_gang_link)) {
444eda14cbcSMatt Macy /*
445eda14cbcSMatt Macy * If the child ABD is already part of another
446eda14cbcSMatt Macy * gang ABD then we must allocate a new
447eda14cbcSMatt Macy * ABD to use a separate link. We mark the newly
448eda14cbcSMatt Macy * allocated ABD with ABD_FLAG_GANG_FREE, before
449eda14cbcSMatt Macy * adding it to the gang ABD's list, to make the
450eda14cbcSMatt Macy * gang ABD aware that it is responsible to call
451184c1b94SMartin Matuska * abd_free(). We use abd_get_offset() in order
452eda14cbcSMatt Macy * to just allocate a new ABD but avoid copying the
453eda14cbcSMatt Macy * data over into the newly allocated ABD.
454eda14cbcSMatt Macy *
455eda14cbcSMatt Macy * An ABD may become part of multiple gang ABD's. For
456eda14cbcSMatt Macy * example, when writing ditto bocks, the same ABD
457eda14cbcSMatt Macy * is used to write 2 or 3 locations with 2 or 3
458eda14cbcSMatt Macy * zio_t's. Each of the zio's may be aggregated with
459eda14cbcSMatt Macy * different adjacent zio's. zio aggregation uses gang
460eda14cbcSMatt Macy * zio's, so the single ABD can become part of multiple
461eda14cbcSMatt Macy * gang zio's.
462eda14cbcSMatt Macy *
463eda14cbcSMatt Macy * The ASSERT below is to make sure that if
464eda14cbcSMatt Macy * free_on_free is passed as B_TRUE, the ABD can
465eda14cbcSMatt Macy * not be in multiple gang ABD's. The gang ABD
466eda14cbcSMatt Macy * can not be responsible for cleaning up the child
467eda14cbcSMatt Macy * ABD memory allocation if the ABD can be in
468eda14cbcSMatt Macy * multiple gang ABD's at one time.
469eda14cbcSMatt Macy */
470eda14cbcSMatt Macy ASSERT3B(free_on_free, ==, B_FALSE);
471eda14cbcSMatt Macy child_abd = abd_get_offset(cabd, 0);
472eda14cbcSMatt Macy child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
473eda14cbcSMatt Macy } else {
474eda14cbcSMatt Macy child_abd = cabd;
475eda14cbcSMatt Macy if (free_on_free)
476eda14cbcSMatt Macy child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
477eda14cbcSMatt Macy }
478eda14cbcSMatt Macy ASSERT3P(child_abd, !=, NULL);
479eda14cbcSMatt Macy
480eda14cbcSMatt Macy list_insert_tail(&ABD_GANG(pabd).abd_gang_chain, child_abd);
481eda14cbcSMatt Macy mutex_exit(&cabd->abd_mtx);
482eda14cbcSMatt Macy pabd->abd_size += child_abd->abd_size;
483eda14cbcSMatt Macy }
484eda14cbcSMatt Macy
485eda14cbcSMatt Macy /*
486eda14cbcSMatt Macy * Locate the ABD for the supplied offset in the gang ABD.
487eda14cbcSMatt Macy * Return a new offset relative to the returned ABD.
488eda14cbcSMatt Macy */
489eda14cbcSMatt Macy abd_t *
abd_gang_get_offset(abd_t * abd,size_t * off)490eda14cbcSMatt Macy abd_gang_get_offset(abd_t *abd, size_t *off)
491eda14cbcSMatt Macy {
492eda14cbcSMatt Macy abd_t *cabd;
493eda14cbcSMatt Macy
494eda14cbcSMatt Macy ASSERT(abd_is_gang(abd));
495eda14cbcSMatt Macy ASSERT3U(*off, <, abd->abd_size);
496eda14cbcSMatt Macy for (cabd = list_head(&ABD_GANG(abd).abd_gang_chain); cabd != NULL;
497eda14cbcSMatt Macy cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
498eda14cbcSMatt Macy if (*off >= cabd->abd_size)
499eda14cbcSMatt Macy *off -= cabd->abd_size;
500eda14cbcSMatt Macy else
501eda14cbcSMatt Macy return (cabd);
502eda14cbcSMatt Macy }
503eda14cbcSMatt Macy VERIFY3P(cabd, !=, NULL);
504eda14cbcSMatt Macy return (cabd);
505eda14cbcSMatt Macy }
506eda14cbcSMatt Macy
507eda14cbcSMatt Macy /*
508184c1b94SMartin Matuska * Allocate a new ABD, using the provided struct (if non-NULL, and if
509184c1b94SMartin Matuska * circumstances allow - otherwise allocate the struct). The returned ABD will
510184c1b94SMartin Matuska * point to offset off of sabd. It shares the underlying buffer data with sabd.
511184c1b94SMartin Matuska * Use abd_free() to free. sabd must not be freed while any derived ABDs exist.
512eda14cbcSMatt Macy */
513eda14cbcSMatt Macy static abd_t *
abd_get_offset_impl(abd_t * abd,abd_t * sabd,size_t off,size_t size)514184c1b94SMartin Matuska abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
515eda14cbcSMatt Macy {
516eda14cbcSMatt Macy abd_verify(sabd);
517184c1b94SMartin Matuska ASSERT3U(off + size, <=, sabd->abd_size);
518eda14cbcSMatt Macy
519eda14cbcSMatt Macy if (abd_is_linear(sabd)) {
520184c1b94SMartin Matuska if (abd == NULL)
521eda14cbcSMatt Macy abd = abd_alloc_struct(0);
522eda14cbcSMatt Macy /*
523eda14cbcSMatt Macy * Even if this buf is filesystem metadata, we only track that
524eda14cbcSMatt Macy * if we own the underlying data buffer, which is not true in
525eda14cbcSMatt Macy * this case. Therefore, we don't ever use ABD_FLAG_META here.
526eda14cbcSMatt Macy */
527184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_LINEAR;
528eda14cbcSMatt Macy
5297a7741afSMartin Matuska /*
5307a7741afSMartin Matuska * User pages from Direct I/O requests may be in a single page
5317a7741afSMartin Matuska * (ABD_FLAG_LINEAR_PAGE), and we must make sure to still flag
5327a7741afSMartin Matuska * that here for abd. This is required because we have to be
5337a7741afSMartin Matuska * careful when borrowing the buffer from the ABD because we
5347a7741afSMartin Matuska * can not place user pages under write protection on Linux.
5357a7741afSMartin Matuska * See the comments in abd_os.c for abd_borrow_buf(),
5367a7741afSMartin Matuska * abd_borrow_buf_copy(), abd_return_buf() and
5377a7741afSMartin Matuska * abd_return_buf_copy().
5387a7741afSMartin Matuska */
5397a7741afSMartin Matuska if (abd_is_from_pages(sabd)) {
5407a7741afSMartin Matuska abd->abd_flags |= ABD_FLAG_FROM_PAGES |
5417a7741afSMartin Matuska ABD_FLAG_LINEAR_PAGE;
5427a7741afSMartin Matuska }
5437a7741afSMartin Matuska
544eda14cbcSMatt Macy ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
545eda14cbcSMatt Macy } else if (abd_is_gang(sabd)) {
546eda14cbcSMatt Macy size_t left = size;
547184c1b94SMartin Matuska if (abd == NULL) {
548184c1b94SMartin Matuska abd = abd_alloc_gang();
549184c1b94SMartin Matuska } else {
550184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_GANG;
551184c1b94SMartin Matuska list_create(&ABD_GANG(abd).abd_gang_chain,
552184c1b94SMartin Matuska sizeof (abd_t), offsetof(abd_t, abd_gang_link));
553184c1b94SMartin Matuska }
554184c1b94SMartin Matuska
555eda14cbcSMatt Macy abd->abd_flags &= ~ABD_FLAG_OWNER;
556eda14cbcSMatt Macy for (abd_t *cabd = abd_gang_get_offset(sabd, &off);
557eda14cbcSMatt Macy cabd != NULL && left > 0;
558eda14cbcSMatt Macy cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) {
559eda14cbcSMatt Macy int csize = MIN(left, cabd->abd_size - off);
560eda14cbcSMatt Macy
561184c1b94SMartin Matuska abd_t *nabd = abd_get_offset_size(cabd, off, csize);
562184c1b94SMartin Matuska abd_gang_add(abd, nabd, B_TRUE);
563eda14cbcSMatt Macy left -= csize;
564eda14cbcSMatt Macy off = 0;
565eda14cbcSMatt Macy }
566eda14cbcSMatt Macy ASSERT3U(left, ==, 0);
567eda14cbcSMatt Macy } else {
5687cd22ac4SMartin Matuska abd = abd_get_offset_scatter(abd, sabd, off, size);
569eda14cbcSMatt Macy }
570eda14cbcSMatt Macy
571184c1b94SMartin Matuska ASSERT3P(abd, !=, NULL);
572eda14cbcSMatt Macy abd->abd_size = size;
573184c1b94SMartin Matuska #ifdef ZFS_DEBUG
574eda14cbcSMatt Macy abd->abd_parent = sabd;
575eda14cbcSMatt Macy (void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
576184c1b94SMartin Matuska #endif
577eda14cbcSMatt Macy return (abd);
578eda14cbcSMatt Macy }
579eda14cbcSMatt Macy
580184c1b94SMartin Matuska /*
581184c1b94SMartin Matuska * Like abd_get_offset_size(), but memory for the abd_t is provided by the
582184c1b94SMartin Matuska * caller. Using this routine can improve performance by avoiding the cost
583184c1b94SMartin Matuska * of allocating memory for the abd_t struct, and updating the abd stats.
584184c1b94SMartin Matuska * Usually, the provided abd is returned, but in some circumstances (FreeBSD,
585184c1b94SMartin Matuska * if sabd is scatter and size is more than 2 pages) a new abd_t may need to
586184c1b94SMartin Matuska * be allocated. Therefore callers should be careful to use the returned
587184c1b94SMartin Matuska * abd_t*.
588184c1b94SMartin Matuska */
589184c1b94SMartin Matuska abd_t *
abd_get_offset_struct(abd_t * abd,abd_t * sabd,size_t off,size_t size)590184c1b94SMartin Matuska abd_get_offset_struct(abd_t *abd, abd_t *sabd, size_t off, size_t size)
591184c1b94SMartin Matuska {
5929db44a8eSMartin Matuska abd_t *result;
593184c1b94SMartin Matuska abd_init_struct(abd);
5949db44a8eSMartin Matuska result = abd_get_offset_impl(abd, sabd, off, size);
5959db44a8eSMartin Matuska if (result != abd)
5969db44a8eSMartin Matuska abd_fini_struct(abd);
5979db44a8eSMartin Matuska return (result);
598184c1b94SMartin Matuska }
599184c1b94SMartin Matuska
600eda14cbcSMatt Macy abd_t *
abd_get_offset(abd_t * sabd,size_t off)601eda14cbcSMatt Macy abd_get_offset(abd_t *sabd, size_t off)
602eda14cbcSMatt Macy {
603eda14cbcSMatt Macy size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
604eda14cbcSMatt Macy VERIFY3U(size, >, 0);
605184c1b94SMartin Matuska return (abd_get_offset_impl(NULL, sabd, off, size));
606eda14cbcSMatt Macy }
607eda14cbcSMatt Macy
608eda14cbcSMatt Macy abd_t *
abd_get_offset_size(abd_t * sabd,size_t off,size_t size)609eda14cbcSMatt Macy abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
610eda14cbcSMatt Macy {
611eda14cbcSMatt Macy ASSERT3U(off + size, <=, sabd->abd_size);
612184c1b94SMartin Matuska return (abd_get_offset_impl(NULL, sabd, off, size));
613eda14cbcSMatt Macy }
614eda14cbcSMatt Macy
615eda14cbcSMatt Macy /*
616184c1b94SMartin Matuska * Return a size scatter ABD containing only zeros.
617eda14cbcSMatt Macy */
618eda14cbcSMatt Macy abd_t *
abd_get_zeros(size_t size)619eda14cbcSMatt Macy abd_get_zeros(size_t size)
620eda14cbcSMatt Macy {
621eda14cbcSMatt Macy ASSERT3P(abd_zero_scatter, !=, NULL);
622eda14cbcSMatt Macy ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
623eda14cbcSMatt Macy return (abd_get_offset_size(abd_zero_scatter, 0, size));
624eda14cbcSMatt Macy }
625eda14cbcSMatt Macy
626eda14cbcSMatt Macy /*
627e2df9bb4SMartin Matuska * Create a linear ABD for an existing buf.
628eda14cbcSMatt Macy */
629e2df9bb4SMartin Matuska static abd_t *
abd_get_from_buf_impl(abd_t * abd,void * buf,size_t size)630e2df9bb4SMartin Matuska abd_get_from_buf_impl(abd_t *abd, void *buf, size_t size)
631eda14cbcSMatt Macy {
632eda14cbcSMatt Macy VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
633eda14cbcSMatt Macy
634eda14cbcSMatt Macy /*
635eda14cbcSMatt Macy * Even if this buf is filesystem metadata, we only track that if we
636eda14cbcSMatt Macy * own the underlying data buffer, which is not true in this case.
637eda14cbcSMatt Macy * Therefore, we don't ever use ABD_FLAG_META here.
638eda14cbcSMatt Macy */
639184c1b94SMartin Matuska abd->abd_flags |= ABD_FLAG_LINEAR;
640eda14cbcSMatt Macy abd->abd_size = size;
641eda14cbcSMatt Macy
642eda14cbcSMatt Macy ABD_LINEAR_BUF(abd) = buf;
643eda14cbcSMatt Macy
644eda14cbcSMatt Macy return (abd);
645eda14cbcSMatt Macy }
646eda14cbcSMatt Macy
647e2df9bb4SMartin Matuska abd_t *
abd_get_from_buf(void * buf,size_t size)648e2df9bb4SMartin Matuska abd_get_from_buf(void *buf, size_t size)
649e2df9bb4SMartin Matuska {
650e2df9bb4SMartin Matuska abd_t *abd = abd_alloc_struct(0);
651e2df9bb4SMartin Matuska return (abd_get_from_buf_impl(abd, buf, size));
652e2df9bb4SMartin Matuska }
653e2df9bb4SMartin Matuska
654e2df9bb4SMartin Matuska abd_t *
abd_get_from_buf_struct(abd_t * abd,void * buf,size_t size)655e2df9bb4SMartin Matuska abd_get_from_buf_struct(abd_t *abd, void *buf, size_t size)
656e2df9bb4SMartin Matuska {
657e2df9bb4SMartin Matuska abd_init_struct(abd);
658e2df9bb4SMartin Matuska return (abd_get_from_buf_impl(abd, buf, size));
659e2df9bb4SMartin Matuska }
660e2df9bb4SMartin Matuska
661eda14cbcSMatt Macy /*
662eda14cbcSMatt Macy * Get the raw buffer associated with a linear ABD.
663eda14cbcSMatt Macy */
664eda14cbcSMatt Macy void *
abd_to_buf(abd_t * abd)665eda14cbcSMatt Macy abd_to_buf(abd_t *abd)
666eda14cbcSMatt Macy {
667eda14cbcSMatt Macy ASSERT(abd_is_linear(abd));
668eda14cbcSMatt Macy abd_verify(abd);
669eda14cbcSMatt Macy return (ABD_LINEAR_BUF(abd));
670eda14cbcSMatt Macy }
671eda14cbcSMatt Macy
672eda14cbcSMatt Macy void
abd_release_ownership_of_buf(abd_t * abd)673eda14cbcSMatt Macy abd_release_ownership_of_buf(abd_t *abd)
674eda14cbcSMatt Macy {
675eda14cbcSMatt Macy ASSERT(abd_is_linear(abd));
676eda14cbcSMatt Macy ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
677eda14cbcSMatt Macy
678eda14cbcSMatt Macy /*
679eda14cbcSMatt Macy * abd_free() needs to handle LINEAR_PAGE ABD's specially.
680eda14cbcSMatt Macy * Since that flag does not survive the
681eda14cbcSMatt Macy * abd_release_ownership_of_buf() -> abd_get_from_buf() ->
682eda14cbcSMatt Macy * abd_take_ownership_of_buf() sequence, we don't allow releasing
683eda14cbcSMatt Macy * these "linear but not zio_[data_]buf_alloc()'ed" ABD's.
684eda14cbcSMatt Macy */
685eda14cbcSMatt Macy ASSERT(!abd_is_linear_page(abd));
686eda14cbcSMatt Macy
687eda14cbcSMatt Macy abd_verify(abd);
688eda14cbcSMatt Macy
689eda14cbcSMatt Macy abd->abd_flags &= ~ABD_FLAG_OWNER;
690eda14cbcSMatt Macy /* Disable this flag since we no longer own the data buffer */
691eda14cbcSMatt Macy abd->abd_flags &= ~ABD_FLAG_META;
692eda14cbcSMatt Macy
693eda14cbcSMatt Macy abd_update_linear_stats(abd, ABDSTAT_DECR);
694eda14cbcSMatt Macy }
695eda14cbcSMatt Macy
696eda14cbcSMatt Macy
697eda14cbcSMatt Macy /*
698eda14cbcSMatt Macy * Give this ABD ownership of the buffer that it's storing. Can only be used on
699eda14cbcSMatt Macy * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
700eda14cbcSMatt Macy * with abd_alloc_linear() which subsequently released ownership of their buf
701eda14cbcSMatt Macy * with abd_release_ownership_of_buf().
702eda14cbcSMatt Macy */
703eda14cbcSMatt Macy void
abd_take_ownership_of_buf(abd_t * abd,boolean_t is_metadata)704eda14cbcSMatt Macy abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
705eda14cbcSMatt Macy {
706eda14cbcSMatt Macy ASSERT(abd_is_linear(abd));
707eda14cbcSMatt Macy ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
708eda14cbcSMatt Macy abd_verify(abd);
709eda14cbcSMatt Macy
710eda14cbcSMatt Macy abd->abd_flags |= ABD_FLAG_OWNER;
711eda14cbcSMatt Macy if (is_metadata) {
712eda14cbcSMatt Macy abd->abd_flags |= ABD_FLAG_META;
713eda14cbcSMatt Macy }
714eda14cbcSMatt Macy
715eda14cbcSMatt Macy abd_update_linear_stats(abd, ABDSTAT_INCR);
716eda14cbcSMatt Macy }
717eda14cbcSMatt Macy
718eda14cbcSMatt Macy /*
719eda14cbcSMatt Macy * Initializes an abd_iter based on whether the abd is a gang ABD
720eda14cbcSMatt Macy * or just a single ABD.
721eda14cbcSMatt Macy */
722eda14cbcSMatt Macy static inline abd_t *
abd_init_abd_iter(abd_t * abd,struct abd_iter * aiter,size_t off)723eda14cbcSMatt Macy abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off)
724eda14cbcSMatt Macy {
725eda14cbcSMatt Macy abd_t *cabd = NULL;
726eda14cbcSMatt Macy
727eda14cbcSMatt Macy if (abd_is_gang(abd)) {
728eda14cbcSMatt Macy cabd = abd_gang_get_offset(abd, &off);
729eda14cbcSMatt Macy if (cabd) {
730eda14cbcSMatt Macy abd_iter_init(aiter, cabd);
731eda14cbcSMatt Macy abd_iter_advance(aiter, off);
732eda14cbcSMatt Macy }
733eda14cbcSMatt Macy } else {
734eda14cbcSMatt Macy abd_iter_init(aiter, abd);
735eda14cbcSMatt Macy abd_iter_advance(aiter, off);
736eda14cbcSMatt Macy }
737eda14cbcSMatt Macy return (cabd);
738eda14cbcSMatt Macy }
739eda14cbcSMatt Macy
740eda14cbcSMatt Macy /*
741eda14cbcSMatt Macy * Advances an abd_iter. We have to be careful with gang ABD as
742eda14cbcSMatt Macy * advancing could mean that we are at the end of a particular ABD and
743eda14cbcSMatt Macy * must grab the ABD in the gang ABD's list.
744eda14cbcSMatt Macy */
745eda14cbcSMatt Macy static inline abd_t *
abd_advance_abd_iter(abd_t * abd,abd_t * cabd,struct abd_iter * aiter,size_t len)746eda14cbcSMatt Macy abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter,
747eda14cbcSMatt Macy size_t len)
748eda14cbcSMatt Macy {
749eda14cbcSMatt Macy abd_iter_advance(aiter, len);
750eda14cbcSMatt Macy if (abd_is_gang(abd) && abd_iter_at_end(aiter)) {
751eda14cbcSMatt Macy ASSERT3P(cabd, !=, NULL);
752eda14cbcSMatt Macy cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd);
753eda14cbcSMatt Macy if (cabd) {
754eda14cbcSMatt Macy abd_iter_init(aiter, cabd);
755eda14cbcSMatt Macy abd_iter_advance(aiter, 0);
756eda14cbcSMatt Macy }
757eda14cbcSMatt Macy }
758eda14cbcSMatt Macy return (cabd);
759eda14cbcSMatt Macy }
760eda14cbcSMatt Macy
761eda14cbcSMatt Macy int
abd_iterate_func(abd_t * abd,size_t off,size_t size,abd_iter_func_t * func,void * private)762eda14cbcSMatt Macy abd_iterate_func(abd_t *abd, size_t off, size_t size,
763eda14cbcSMatt Macy abd_iter_func_t *func, void *private)
764eda14cbcSMatt Macy {
765eda14cbcSMatt Macy struct abd_iter aiter;
7667877fdebSMatt Macy int ret = 0;
7677877fdebSMatt Macy
7687877fdebSMatt Macy if (size == 0)
7697877fdebSMatt Macy return (0);
770eda14cbcSMatt Macy
771eda14cbcSMatt Macy abd_verify(abd);
772eda14cbcSMatt Macy ASSERT3U(off + size, <=, abd->abd_size);
773eda14cbcSMatt Macy
7747877fdebSMatt Macy abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
775eda14cbcSMatt Macy
776eda14cbcSMatt Macy while (size > 0) {
7776c1e79dfSMartin Matuska IMPLY(abd_is_gang(abd), c_abd != NULL);
778eda14cbcSMatt Macy
779eda14cbcSMatt Macy abd_iter_map(&aiter);
780eda14cbcSMatt Macy
781eda14cbcSMatt Macy size_t len = MIN(aiter.iter_mapsize, size);
782eda14cbcSMatt Macy ASSERT3U(len, >, 0);
783eda14cbcSMatt Macy
784eda14cbcSMatt Macy ret = func(aiter.iter_mapaddr, len, private);
785eda14cbcSMatt Macy
786eda14cbcSMatt Macy abd_iter_unmap(&aiter);
787eda14cbcSMatt Macy
788eda14cbcSMatt Macy if (ret != 0)
789eda14cbcSMatt Macy break;
790eda14cbcSMatt Macy
791eda14cbcSMatt Macy size -= len;
792eda14cbcSMatt Macy c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
793eda14cbcSMatt Macy }
794eda14cbcSMatt Macy
795eda14cbcSMatt Macy return (ret);
796eda14cbcSMatt Macy }
797eda14cbcSMatt Macy
798783d3ff6SMartin Matuska #if defined(__linux__) && defined(_KERNEL)
799783d3ff6SMartin Matuska int
abd_iterate_page_func(abd_t * abd,size_t off,size_t size,abd_iter_page_func_t * func,void * private)800783d3ff6SMartin Matuska abd_iterate_page_func(abd_t *abd, size_t off, size_t size,
801783d3ff6SMartin Matuska abd_iter_page_func_t *func, void *private)
802783d3ff6SMartin Matuska {
803783d3ff6SMartin Matuska struct abd_iter aiter;
804783d3ff6SMartin Matuska int ret = 0;
805783d3ff6SMartin Matuska
806783d3ff6SMartin Matuska if (size == 0)
807783d3ff6SMartin Matuska return (0);
808783d3ff6SMartin Matuska
809783d3ff6SMartin Matuska abd_verify(abd);
810783d3ff6SMartin Matuska ASSERT3U(off + size, <=, abd->abd_size);
811783d3ff6SMartin Matuska
812783d3ff6SMartin Matuska abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
813783d3ff6SMartin Matuska
814783d3ff6SMartin Matuska while (size > 0) {
815783d3ff6SMartin Matuska IMPLY(abd_is_gang(abd), c_abd != NULL);
816783d3ff6SMartin Matuska
817783d3ff6SMartin Matuska abd_iter_page(&aiter);
818783d3ff6SMartin Matuska
819783d3ff6SMartin Matuska size_t len = MIN(aiter.iter_page_dsize, size);
820783d3ff6SMartin Matuska ASSERT3U(len, >, 0);
821783d3ff6SMartin Matuska
822783d3ff6SMartin Matuska ret = func(aiter.iter_page, aiter.iter_page_doff,
823783d3ff6SMartin Matuska len, private);
824783d3ff6SMartin Matuska
825783d3ff6SMartin Matuska aiter.iter_page = NULL;
826783d3ff6SMartin Matuska aiter.iter_page_doff = 0;
827783d3ff6SMartin Matuska aiter.iter_page_dsize = 0;
828783d3ff6SMartin Matuska
829783d3ff6SMartin Matuska if (ret != 0)
830783d3ff6SMartin Matuska break;
831783d3ff6SMartin Matuska
832783d3ff6SMartin Matuska size -= len;
833783d3ff6SMartin Matuska c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
834783d3ff6SMartin Matuska }
835783d3ff6SMartin Matuska
836783d3ff6SMartin Matuska return (ret);
837783d3ff6SMartin Matuska }
838783d3ff6SMartin Matuska #endif
839783d3ff6SMartin Matuska
840eda14cbcSMatt Macy struct buf_arg {
841eda14cbcSMatt Macy void *arg_buf;
842eda14cbcSMatt Macy };
843eda14cbcSMatt Macy
844eda14cbcSMatt Macy static int
abd_copy_to_buf_off_cb(void * buf,size_t size,void * private)845eda14cbcSMatt Macy abd_copy_to_buf_off_cb(void *buf, size_t size, void *private)
846eda14cbcSMatt Macy {
847eda14cbcSMatt Macy struct buf_arg *ba_ptr = private;
848eda14cbcSMatt Macy
849eda14cbcSMatt Macy (void) memcpy(ba_ptr->arg_buf, buf, size);
850eda14cbcSMatt Macy ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
851eda14cbcSMatt Macy
852eda14cbcSMatt Macy return (0);
853eda14cbcSMatt Macy }
854eda14cbcSMatt Macy
855eda14cbcSMatt Macy /*
856eda14cbcSMatt Macy * Copy abd to buf. (off is the offset in abd.)
857eda14cbcSMatt Macy */
858eda14cbcSMatt Macy void
abd_copy_to_buf_off(void * buf,abd_t * abd,size_t off,size_t size)859eda14cbcSMatt Macy abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size)
860eda14cbcSMatt Macy {
861eda14cbcSMatt Macy struct buf_arg ba_ptr = { buf };
862eda14cbcSMatt Macy
863eda14cbcSMatt Macy (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb,
864eda14cbcSMatt Macy &ba_ptr);
865eda14cbcSMatt Macy }
866eda14cbcSMatt Macy
867eda14cbcSMatt Macy static int
abd_cmp_buf_off_cb(void * buf,size_t size,void * private)868eda14cbcSMatt Macy abd_cmp_buf_off_cb(void *buf, size_t size, void *private)
869eda14cbcSMatt Macy {
870eda14cbcSMatt Macy int ret;
871eda14cbcSMatt Macy struct buf_arg *ba_ptr = private;
872eda14cbcSMatt Macy
873eda14cbcSMatt Macy ret = memcmp(buf, ba_ptr->arg_buf, size);
874eda14cbcSMatt Macy ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
875eda14cbcSMatt Macy
876eda14cbcSMatt Macy return (ret);
877eda14cbcSMatt Macy }
878eda14cbcSMatt Macy
879eda14cbcSMatt Macy /*
880eda14cbcSMatt Macy * Compare the contents of abd to buf. (off is the offset in abd.)
881eda14cbcSMatt Macy */
882eda14cbcSMatt Macy int
abd_cmp_buf_off(abd_t * abd,const void * buf,size_t off,size_t size)883eda14cbcSMatt Macy abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
884eda14cbcSMatt Macy {
885eda14cbcSMatt Macy struct buf_arg ba_ptr = { (void *) buf };
886eda14cbcSMatt Macy
887eda14cbcSMatt Macy return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr));
888eda14cbcSMatt Macy }
889eda14cbcSMatt Macy
890eda14cbcSMatt Macy static int
abd_copy_from_buf_off_cb(void * buf,size_t size,void * private)891eda14cbcSMatt Macy abd_copy_from_buf_off_cb(void *buf, size_t size, void *private)
892eda14cbcSMatt Macy {
893eda14cbcSMatt Macy struct buf_arg *ba_ptr = private;
894eda14cbcSMatt Macy
895eda14cbcSMatt Macy (void) memcpy(buf, ba_ptr->arg_buf, size);
896eda14cbcSMatt Macy ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
897eda14cbcSMatt Macy
898eda14cbcSMatt Macy return (0);
899eda14cbcSMatt Macy }
900eda14cbcSMatt Macy
901eda14cbcSMatt Macy /*
902eda14cbcSMatt Macy * Copy from buf to abd. (off is the offset in abd.)
903eda14cbcSMatt Macy */
904eda14cbcSMatt Macy void
abd_copy_from_buf_off(abd_t * abd,const void * buf,size_t off,size_t size)905eda14cbcSMatt Macy abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
906eda14cbcSMatt Macy {
907eda14cbcSMatt Macy struct buf_arg ba_ptr = { (void *) buf };
908eda14cbcSMatt Macy
909eda14cbcSMatt Macy (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb,
910eda14cbcSMatt Macy &ba_ptr);
911eda14cbcSMatt Macy }
912eda14cbcSMatt Macy
913eda14cbcSMatt Macy static int
abd_zero_off_cb(void * buf,size_t size,void * private)914eda14cbcSMatt Macy abd_zero_off_cb(void *buf, size_t size, void *private)
915eda14cbcSMatt Macy {
916e92ffd9bSMartin Matuska (void) private;
917eda14cbcSMatt Macy (void) memset(buf, 0, size);
918eda14cbcSMatt Macy return (0);
919eda14cbcSMatt Macy }
920eda14cbcSMatt Macy
921eda14cbcSMatt Macy /*
922eda14cbcSMatt Macy * Zero out the abd from a particular offset to the end.
923eda14cbcSMatt Macy */
924eda14cbcSMatt Macy void
abd_zero_off(abd_t * abd,size_t off,size_t size)925eda14cbcSMatt Macy abd_zero_off(abd_t *abd, size_t off, size_t size)
926eda14cbcSMatt Macy {
927eda14cbcSMatt Macy (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
928eda14cbcSMatt Macy }
929eda14cbcSMatt Macy
930eda14cbcSMatt Macy /*
931eda14cbcSMatt Macy * Iterate over two ABDs and call func incrementally on the two ABDs' data in
932eda14cbcSMatt Macy * equal-sized chunks (passed to func as raw buffers). func could be called many
933eda14cbcSMatt Macy * times during this iteration.
934eda14cbcSMatt Macy */
935eda14cbcSMatt Macy int
abd_iterate_func2(abd_t * dabd,abd_t * sabd,size_t doff,size_t soff,size_t size,abd_iter_func2_t * func,void * private)936eda14cbcSMatt Macy abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
937eda14cbcSMatt Macy size_t size, abd_iter_func2_t *func, void *private)
938eda14cbcSMatt Macy {
939eda14cbcSMatt Macy int ret = 0;
940eda14cbcSMatt Macy struct abd_iter daiter, saiter;
941eda14cbcSMatt Macy abd_t *c_dabd, *c_sabd;
942eda14cbcSMatt Macy
9437877fdebSMatt Macy if (size == 0)
9447877fdebSMatt Macy return (0);
9457877fdebSMatt Macy
946eda14cbcSMatt Macy abd_verify(dabd);
947eda14cbcSMatt Macy abd_verify(sabd);
948eda14cbcSMatt Macy
949eda14cbcSMatt Macy ASSERT3U(doff + size, <=, dabd->abd_size);
950eda14cbcSMatt Macy ASSERT3U(soff + size, <=, sabd->abd_size);
951eda14cbcSMatt Macy
952eda14cbcSMatt Macy c_dabd = abd_init_abd_iter(dabd, &daiter, doff);
953eda14cbcSMatt Macy c_sabd = abd_init_abd_iter(sabd, &saiter, soff);
954eda14cbcSMatt Macy
955eda14cbcSMatt Macy while (size > 0) {
9566c1e79dfSMartin Matuska IMPLY(abd_is_gang(dabd), c_dabd != NULL);
9576c1e79dfSMartin Matuska IMPLY(abd_is_gang(sabd), c_sabd != NULL);
958eda14cbcSMatt Macy
959eda14cbcSMatt Macy abd_iter_map(&daiter);
960eda14cbcSMatt Macy abd_iter_map(&saiter);
961eda14cbcSMatt Macy
962eda14cbcSMatt Macy size_t dlen = MIN(daiter.iter_mapsize, size);
963eda14cbcSMatt Macy size_t slen = MIN(saiter.iter_mapsize, size);
964eda14cbcSMatt Macy size_t len = MIN(dlen, slen);
965eda14cbcSMatt Macy ASSERT(dlen > 0 || slen > 0);
966eda14cbcSMatt Macy
967eda14cbcSMatt Macy ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len,
968eda14cbcSMatt Macy private);
969eda14cbcSMatt Macy
970eda14cbcSMatt Macy abd_iter_unmap(&saiter);
971eda14cbcSMatt Macy abd_iter_unmap(&daiter);
972eda14cbcSMatt Macy
973eda14cbcSMatt Macy if (ret != 0)
974eda14cbcSMatt Macy break;
975eda14cbcSMatt Macy
976eda14cbcSMatt Macy size -= len;
977eda14cbcSMatt Macy c_dabd =
978eda14cbcSMatt Macy abd_advance_abd_iter(dabd, c_dabd, &daiter, len);
979eda14cbcSMatt Macy c_sabd =
980eda14cbcSMatt Macy abd_advance_abd_iter(sabd, c_sabd, &saiter, len);
981eda14cbcSMatt Macy }
982eda14cbcSMatt Macy
983eda14cbcSMatt Macy return (ret);
984eda14cbcSMatt Macy }
985eda14cbcSMatt Macy
986eda14cbcSMatt Macy static int
abd_copy_off_cb(void * dbuf,void * sbuf,size_t size,void * private)987eda14cbcSMatt Macy abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
988eda14cbcSMatt Macy {
989e92ffd9bSMartin Matuska (void) private;
990eda14cbcSMatt Macy (void) memcpy(dbuf, sbuf, size);
991eda14cbcSMatt Macy return (0);
992eda14cbcSMatt Macy }
993eda14cbcSMatt Macy
994eda14cbcSMatt Macy /*
995eda14cbcSMatt Macy * Copy from sabd to dabd starting from soff and doff.
996eda14cbcSMatt Macy */
997eda14cbcSMatt Macy void
abd_copy_off(abd_t * dabd,abd_t * sabd,size_t doff,size_t soff,size_t size)998eda14cbcSMatt Macy abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size)
999eda14cbcSMatt Macy {
1000eda14cbcSMatt Macy (void) abd_iterate_func2(dabd, sabd, doff, soff, size,
1001eda14cbcSMatt Macy abd_copy_off_cb, NULL);
1002eda14cbcSMatt Macy }
1003eda14cbcSMatt Macy
1004eda14cbcSMatt Macy static int
abd_cmp_cb(void * bufa,void * bufb,size_t size,void * private)1005eda14cbcSMatt Macy abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
1006eda14cbcSMatt Macy {
1007e92ffd9bSMartin Matuska (void) private;
1008eda14cbcSMatt Macy return (memcmp(bufa, bufb, size));
1009eda14cbcSMatt Macy }
1010eda14cbcSMatt Macy
1011eda14cbcSMatt Macy /*
1012eda14cbcSMatt Macy * Compares the contents of two ABDs.
1013eda14cbcSMatt Macy */
1014eda14cbcSMatt Macy int
abd_cmp(abd_t * dabd,abd_t * sabd)1015eda14cbcSMatt Macy abd_cmp(abd_t *dabd, abd_t *sabd)
1016eda14cbcSMatt Macy {
1017eda14cbcSMatt Macy ASSERT3U(dabd->abd_size, ==, sabd->abd_size);
1018eda14cbcSMatt Macy return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size,
1019eda14cbcSMatt Macy abd_cmp_cb, NULL));
1020eda14cbcSMatt Macy }
1021eda14cbcSMatt Macy
1022eda14cbcSMatt Macy /*
1023ce4dcb97SMartin Matuska * Check if ABD content is all-zeroes.
1024ce4dcb97SMartin Matuska */
1025ce4dcb97SMartin Matuska static int
abd_cmp_zero_off_cb(void * data,size_t len,void * private)1026ce4dcb97SMartin Matuska abd_cmp_zero_off_cb(void *data, size_t len, void *private)
1027ce4dcb97SMartin Matuska {
1028ce4dcb97SMartin Matuska (void) private;
1029ce4dcb97SMartin Matuska
1030ce4dcb97SMartin Matuska /* This function can only check whole uint64s. Enforce that. */
1031ce4dcb97SMartin Matuska ASSERT0(P2PHASE(len, 8));
1032ce4dcb97SMartin Matuska
1033ce4dcb97SMartin Matuska uint64_t *end = (uint64_t *)((char *)data + len);
1034ce4dcb97SMartin Matuska for (uint64_t *word = (uint64_t *)data; word < end; word++)
1035ce4dcb97SMartin Matuska if (*word != 0)
1036ce4dcb97SMartin Matuska return (1);
1037ce4dcb97SMartin Matuska
1038ce4dcb97SMartin Matuska return (0);
1039ce4dcb97SMartin Matuska }
1040ce4dcb97SMartin Matuska
1041ce4dcb97SMartin Matuska int
abd_cmp_zero_off(abd_t * abd,size_t off,size_t size)1042ce4dcb97SMartin Matuska abd_cmp_zero_off(abd_t *abd, size_t off, size_t size)
1043ce4dcb97SMartin Matuska {
1044ce4dcb97SMartin Matuska return (abd_iterate_func(abd, off, size, abd_cmp_zero_off_cb, NULL));
1045ce4dcb97SMartin Matuska }
1046ce4dcb97SMartin Matuska
1047ce4dcb97SMartin Matuska /*
1048eda14cbcSMatt Macy * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
1049eda14cbcSMatt Macy *
1050eda14cbcSMatt Macy * @cabds parity ABDs, must have equal size
1051eda14cbcSMatt Macy * @dabd data ABD. Can be NULL (in this case @dsize = 0)
1052eda14cbcSMatt Macy * @func_raidz_gen should be implemented so that its behaviour
1053eda14cbcSMatt Macy * is the same when taking linear and when taking scatter
1054eda14cbcSMatt Macy */
1055eda14cbcSMatt Macy void
abd_raidz_gen_iterate(abd_t ** cabds,abd_t * dabd,size_t off,size_t csize,size_t dsize,const unsigned parity,void (* func_raidz_gen)(void **,const void *,size_t,size_t))1056f8b1db88SMartin Matuska abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
1057f8b1db88SMartin Matuska size_t csize, size_t dsize, const unsigned parity,
1058eda14cbcSMatt Macy void (*func_raidz_gen)(void **, const void *, size_t, size_t))
1059eda14cbcSMatt Macy {
1060eda14cbcSMatt Macy int i;
1061f8b1db88SMartin Matuska size_t len, dlen;
1062eda14cbcSMatt Macy struct abd_iter caiters[3];
10636c1e79dfSMartin Matuska struct abd_iter daiter;
106414c2e0a0SMartin Matuska void *caddrs[3], *daddr;
1065eda14cbcSMatt Macy unsigned long flags __maybe_unused = 0;
1066eda14cbcSMatt Macy abd_t *c_cabds[3];
1067eda14cbcSMatt Macy abd_t *c_dabd = NULL;
1068eda14cbcSMatt Macy
1069eda14cbcSMatt Macy ASSERT3U(parity, <=, 3);
1070eda14cbcSMatt Macy for (i = 0; i < parity; i++) {
10716c1e79dfSMartin Matuska abd_verify(cabds[i]);
1072f8b1db88SMartin Matuska ASSERT3U(off + csize, <=, cabds[i]->abd_size);
1073f8b1db88SMartin Matuska c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
1074eda14cbcSMatt Macy }
1075eda14cbcSMatt Macy
10766c1e79dfSMartin Matuska if (dsize > 0) {
10776c1e79dfSMartin Matuska ASSERT(dabd);
10786c1e79dfSMartin Matuska abd_verify(dabd);
1079f8b1db88SMartin Matuska ASSERT3U(off + dsize, <=, dabd->abd_size);
1080f8b1db88SMartin Matuska c_dabd = abd_init_abd_iter(dabd, &daiter, off);
1081eda14cbcSMatt Macy }
1082eda14cbcSMatt Macy
1083eda14cbcSMatt Macy abd_enter_critical(flags);
1084eda14cbcSMatt Macy while (csize > 0) {
10856c1e79dfSMartin Matuska len = csize;
1086eda14cbcSMatt Macy for (i = 0; i < parity; i++) {
10876c1e79dfSMartin Matuska IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
1088eda14cbcSMatt Macy abd_iter_map(&caiters[i]);
1089eda14cbcSMatt Macy caddrs[i] = caiters[i].iter_mapaddr;
10906c1e79dfSMartin Matuska len = MIN(caiters[i].iter_mapsize, len);
1091eda14cbcSMatt Macy }
1092eda14cbcSMatt Macy
10936c1e79dfSMartin Matuska if (dsize > 0) {
10946c1e79dfSMartin Matuska IMPLY(abd_is_gang(dabd), c_dabd != NULL);
1095eda14cbcSMatt Macy abd_iter_map(&daiter);
109614c2e0a0SMartin Matuska daddr = daiter.iter_mapaddr;
1097eda14cbcSMatt Macy len = MIN(daiter.iter_mapsize, len);
1098eda14cbcSMatt Macy dlen = len;
109914c2e0a0SMartin Matuska } else {
110014c2e0a0SMartin Matuska daddr = NULL;
1101eda14cbcSMatt Macy dlen = 0;
110214c2e0a0SMartin Matuska }
1103eda14cbcSMatt Macy
1104eda14cbcSMatt Macy /* must be progressive */
1105f8b1db88SMartin Matuska ASSERT3U(len, >, 0);
1106eda14cbcSMatt Macy /*
1107eda14cbcSMatt Macy * The iterated function likely will not do well if each
1108eda14cbcSMatt Macy * segment except the last one is not multiple of 512 (raidz).
1109eda14cbcSMatt Macy */
1110eda14cbcSMatt Macy ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
1111eda14cbcSMatt Macy
111214c2e0a0SMartin Matuska func_raidz_gen(caddrs, daddr, len, dlen);
1113eda14cbcSMatt Macy
1114eda14cbcSMatt Macy for (i = parity-1; i >= 0; i--) {
1115eda14cbcSMatt Macy abd_iter_unmap(&caiters[i]);
1116eda14cbcSMatt Macy c_cabds[i] =
1117eda14cbcSMatt Macy abd_advance_abd_iter(cabds[i], c_cabds[i],
1118eda14cbcSMatt Macy &caiters[i], len);
1119eda14cbcSMatt Macy }
1120eda14cbcSMatt Macy
11216c1e79dfSMartin Matuska if (dsize > 0) {
1122eda14cbcSMatt Macy abd_iter_unmap(&daiter);
1123eda14cbcSMatt Macy c_dabd =
1124eda14cbcSMatt Macy abd_advance_abd_iter(dabd, c_dabd, &daiter,
1125eda14cbcSMatt Macy dlen);
1126eda14cbcSMatt Macy dsize -= dlen;
1127eda14cbcSMatt Macy }
1128eda14cbcSMatt Macy
1129eda14cbcSMatt Macy csize -= len;
1130eda14cbcSMatt Macy }
1131eda14cbcSMatt Macy abd_exit_critical(flags);
1132eda14cbcSMatt Macy }
1133eda14cbcSMatt Macy
1134eda14cbcSMatt Macy /*
1135eda14cbcSMatt Macy * Iterate over code ABDs and data reconstruction target ABDs and call
1136eda14cbcSMatt Macy * @func_raidz_rec. Function maps at most 6 pages atomically.
1137eda14cbcSMatt Macy *
1138eda14cbcSMatt Macy * @cabds parity ABDs, must have equal size
1139eda14cbcSMatt Macy * @tabds rec target ABDs, at most 3
1140eda14cbcSMatt Macy * @tsize size of data target columns
1141eda14cbcSMatt Macy * @func_raidz_rec expects syndrome data in target columns. Function
1142eda14cbcSMatt Macy * reconstructs data and overwrites target columns.
1143eda14cbcSMatt Macy */
1144eda14cbcSMatt Macy void
abd_raidz_rec_iterate(abd_t ** cabds,abd_t ** tabds,size_t tsize,const unsigned parity,void (* func_raidz_rec)(void ** t,const size_t tsize,void ** c,const unsigned * mul),const unsigned * mul)1145eda14cbcSMatt Macy abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
1146f8b1db88SMartin Matuska size_t tsize, const unsigned parity,
1147eda14cbcSMatt Macy void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
1148eda14cbcSMatt Macy const unsigned *mul),
1149eda14cbcSMatt Macy const unsigned *mul)
1150eda14cbcSMatt Macy {
1151eda14cbcSMatt Macy int i;
1152f8b1db88SMartin Matuska size_t len;
1153eda14cbcSMatt Macy struct abd_iter citers[3];
1154eda14cbcSMatt Macy struct abd_iter xiters[3];
1155eda14cbcSMatt Macy void *caddrs[3], *xaddrs[3];
1156eda14cbcSMatt Macy unsigned long flags __maybe_unused = 0;
1157eda14cbcSMatt Macy abd_t *c_cabds[3];
1158eda14cbcSMatt Macy abd_t *c_tabds[3];
1159eda14cbcSMatt Macy
1160eda14cbcSMatt Macy ASSERT3U(parity, <=, 3);
1161eda14cbcSMatt Macy
1162eda14cbcSMatt Macy for (i = 0; i < parity; i++) {
11636c1e79dfSMartin Matuska abd_verify(cabds[i]);
11646c1e79dfSMartin Matuska abd_verify(tabds[i]);
11656c1e79dfSMartin Matuska ASSERT3U(tsize, <=, cabds[i]->abd_size);
11666c1e79dfSMartin Matuska ASSERT3U(tsize, <=, tabds[i]->abd_size);
1167eda14cbcSMatt Macy c_cabds[i] =
1168eda14cbcSMatt Macy abd_init_abd_iter(cabds[i], &citers[i], 0);
1169eda14cbcSMatt Macy c_tabds[i] =
1170eda14cbcSMatt Macy abd_init_abd_iter(tabds[i], &xiters[i], 0);
1171eda14cbcSMatt Macy }
1172eda14cbcSMatt Macy
1173eda14cbcSMatt Macy abd_enter_critical(flags);
1174eda14cbcSMatt Macy while (tsize > 0) {
11756c1e79dfSMartin Matuska len = tsize;
1176eda14cbcSMatt Macy for (i = 0; i < parity; i++) {
11776c1e79dfSMartin Matuska IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
11786c1e79dfSMartin Matuska IMPLY(abd_is_gang(tabds[i]), c_tabds[i] != NULL);
1179eda14cbcSMatt Macy abd_iter_map(&citers[i]);
1180eda14cbcSMatt Macy abd_iter_map(&xiters[i]);
1181eda14cbcSMatt Macy caddrs[i] = citers[i].iter_mapaddr;
1182eda14cbcSMatt Macy xaddrs[i] = xiters[i].iter_mapaddr;
11836c1e79dfSMartin Matuska len = MIN(citers[i].iter_mapsize, len);
11846c1e79dfSMartin Matuska len = MIN(xiters[i].iter_mapsize, len);
1185eda14cbcSMatt Macy }
1186eda14cbcSMatt Macy
1187eda14cbcSMatt Macy /* must be progressive */
1188eda14cbcSMatt Macy ASSERT3S(len, >, 0);
1189eda14cbcSMatt Macy /*
1190eda14cbcSMatt Macy * The iterated function likely will not do well if each
1191eda14cbcSMatt Macy * segment except the last one is not multiple of 512 (raidz).
1192eda14cbcSMatt Macy */
1193eda14cbcSMatt Macy ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
1194eda14cbcSMatt Macy
1195eda14cbcSMatt Macy func_raidz_rec(xaddrs, len, caddrs, mul);
1196eda14cbcSMatt Macy
1197eda14cbcSMatt Macy for (i = parity-1; i >= 0; i--) {
1198eda14cbcSMatt Macy abd_iter_unmap(&xiters[i]);
1199eda14cbcSMatt Macy abd_iter_unmap(&citers[i]);
1200eda14cbcSMatt Macy c_tabds[i] =
1201eda14cbcSMatt Macy abd_advance_abd_iter(tabds[i], c_tabds[i],
1202eda14cbcSMatt Macy &xiters[i], len);
1203eda14cbcSMatt Macy c_cabds[i] =
1204eda14cbcSMatt Macy abd_advance_abd_iter(cabds[i], c_cabds[i],
1205eda14cbcSMatt Macy &citers[i], len);
1206eda14cbcSMatt Macy }
1207eda14cbcSMatt Macy
1208eda14cbcSMatt Macy tsize -= len;
1209eda14cbcSMatt Macy ASSERT3S(tsize, >=, 0);
1210eda14cbcSMatt Macy }
1211eda14cbcSMatt Macy abd_exit_critical(flags);
1212eda14cbcSMatt Macy }
1213