xref: /freebsd/sys/contrib/openzfs/module/zfs/dmu_zfetch.c (revision 180f822596ecc49d3074dcc9dfea9628aae1d48d)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23eda14cbcSMatt Macy  * Use is subject to license terms.
24eda14cbcSMatt Macy  */
25eda14cbcSMatt Macy 
26eda14cbcSMatt Macy /*
27eda14cbcSMatt Macy  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
28eda14cbcSMatt Macy  */
29eda14cbcSMatt Macy 
30eda14cbcSMatt Macy #include <sys/zfs_context.h>
31eda14cbcSMatt Macy #include <sys/dnode.h>
32eda14cbcSMatt Macy #include <sys/dmu_objset.h>
33eda14cbcSMatt Macy #include <sys/dmu_zfetch.h>
34eda14cbcSMatt Macy #include <sys/dmu.h>
35eda14cbcSMatt Macy #include <sys/dbuf.h>
36eda14cbcSMatt Macy #include <sys/kstat.h>
37eda14cbcSMatt Macy 
38eda14cbcSMatt Macy /*
39eda14cbcSMatt Macy  * This tunable disables predictive prefetch.  Note that it leaves "prescient"
40eda14cbcSMatt Macy  * prefetch (e.g. prefetch for zfs send) intact.  Unlike predictive prefetch,
41eda14cbcSMatt Macy  * prescient prefetch never issues i/os that end up not being needed,
42eda14cbcSMatt Macy  * so it can't hurt performance.
43eda14cbcSMatt Macy  */
44eda14cbcSMatt Macy 
45eda14cbcSMatt Macy int zfs_prefetch_disable = B_FALSE;
46eda14cbcSMatt Macy 
47eda14cbcSMatt Macy /* max # of streams per zfetch */
48eda14cbcSMatt Macy unsigned int	zfetch_max_streams = 8;
49eda14cbcSMatt Macy /* min time before stream reclaim */
50eda14cbcSMatt Macy unsigned int	zfetch_min_sec_reap = 2;
51eda14cbcSMatt Macy /* max bytes to prefetch per stream (default 8MB) */
52eda14cbcSMatt Macy unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
53eda14cbcSMatt Macy /* max bytes to prefetch indirects for per stream (default 64MB) */
54eda14cbcSMatt Macy unsigned int	zfetch_max_idistance = 64 * 1024 * 1024;
55eda14cbcSMatt Macy /* max number of bytes in an array_read in which we allow prefetching (1MB) */
56eda14cbcSMatt Macy unsigned long	zfetch_array_rd_sz = 1024 * 1024;
57eda14cbcSMatt Macy 
58eda14cbcSMatt Macy typedef struct zfetch_stats {
59eda14cbcSMatt Macy 	kstat_named_t zfetchstat_hits;
60eda14cbcSMatt Macy 	kstat_named_t zfetchstat_misses;
61eda14cbcSMatt Macy 	kstat_named_t zfetchstat_max_streams;
62eda14cbcSMatt Macy } zfetch_stats_t;
63eda14cbcSMatt Macy 
64eda14cbcSMatt Macy static zfetch_stats_t zfetch_stats = {
65eda14cbcSMatt Macy 	{ "hits",			KSTAT_DATA_UINT64 },
66eda14cbcSMatt Macy 	{ "misses",			KSTAT_DATA_UINT64 },
67eda14cbcSMatt Macy 	{ "max_streams",		KSTAT_DATA_UINT64 },
68eda14cbcSMatt Macy };
69eda14cbcSMatt Macy 
70eda14cbcSMatt Macy #define	ZFETCHSTAT_BUMP(stat) \
71eda14cbcSMatt Macy 	atomic_inc_64(&zfetch_stats.stat.value.ui64);
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy kstat_t		*zfetch_ksp;
74eda14cbcSMatt Macy 
75eda14cbcSMatt Macy void
76eda14cbcSMatt Macy zfetch_init(void)
77eda14cbcSMatt Macy {
78eda14cbcSMatt Macy 	zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
79eda14cbcSMatt Macy 	    KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
80eda14cbcSMatt Macy 	    KSTAT_FLAG_VIRTUAL);
81eda14cbcSMatt Macy 
82eda14cbcSMatt Macy 	if (zfetch_ksp != NULL) {
83eda14cbcSMatt Macy 		zfetch_ksp->ks_data = &zfetch_stats;
84eda14cbcSMatt Macy 		kstat_install(zfetch_ksp);
85eda14cbcSMatt Macy 	}
86eda14cbcSMatt Macy }
87eda14cbcSMatt Macy 
88eda14cbcSMatt Macy void
89eda14cbcSMatt Macy zfetch_fini(void)
90eda14cbcSMatt Macy {
91eda14cbcSMatt Macy 	if (zfetch_ksp != NULL) {
92eda14cbcSMatt Macy 		kstat_delete(zfetch_ksp);
93eda14cbcSMatt Macy 		zfetch_ksp = NULL;
94eda14cbcSMatt Macy 	}
95eda14cbcSMatt Macy }
96eda14cbcSMatt Macy 
97eda14cbcSMatt Macy /*
98eda14cbcSMatt Macy  * This takes a pointer to a zfetch structure and a dnode.  It performs the
99eda14cbcSMatt Macy  * necessary setup for the zfetch structure, grokking data from the
100eda14cbcSMatt Macy  * associated dnode.
101eda14cbcSMatt Macy  */
102eda14cbcSMatt Macy void
103eda14cbcSMatt Macy dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
104eda14cbcSMatt Macy {
105eda14cbcSMatt Macy 	if (zf == NULL)
106eda14cbcSMatt Macy 		return;
107eda14cbcSMatt Macy 
108eda14cbcSMatt Macy 	zf->zf_dnode = dno;
109eda14cbcSMatt Macy 
110eda14cbcSMatt Macy 	list_create(&zf->zf_stream, sizeof (zstream_t),
111eda14cbcSMatt Macy 	    offsetof(zstream_t, zs_node));
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy 	mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL);
114eda14cbcSMatt Macy }
115eda14cbcSMatt Macy 
116eda14cbcSMatt Macy static void
117eda14cbcSMatt Macy dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
118eda14cbcSMatt Macy {
119eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&zf->zf_lock));
120eda14cbcSMatt Macy 	list_remove(&zf->zf_stream, zs);
121eda14cbcSMatt Macy 	mutex_destroy(&zs->zs_lock);
122eda14cbcSMatt Macy 	kmem_free(zs, sizeof (*zs));
123eda14cbcSMatt Macy }
124eda14cbcSMatt Macy 
125eda14cbcSMatt Macy /*
126eda14cbcSMatt Macy  * Clean-up state associated with a zfetch structure (e.g. destroy the
127eda14cbcSMatt Macy  * streams).  This doesn't free the zfetch_t itself, that's left to the caller.
128eda14cbcSMatt Macy  */
129eda14cbcSMatt Macy void
130eda14cbcSMatt Macy dmu_zfetch_fini(zfetch_t *zf)
131eda14cbcSMatt Macy {
132eda14cbcSMatt Macy 	zstream_t *zs;
133eda14cbcSMatt Macy 
134eda14cbcSMatt Macy 	mutex_enter(&zf->zf_lock);
135eda14cbcSMatt Macy 	while ((zs = list_head(&zf->zf_stream)) != NULL)
136eda14cbcSMatt Macy 		dmu_zfetch_stream_remove(zf, zs);
137eda14cbcSMatt Macy 	mutex_exit(&zf->zf_lock);
138eda14cbcSMatt Macy 	list_destroy(&zf->zf_stream);
139eda14cbcSMatt Macy 	mutex_destroy(&zf->zf_lock);
140eda14cbcSMatt Macy 
141eda14cbcSMatt Macy 	zf->zf_dnode = NULL;
142eda14cbcSMatt Macy }
143eda14cbcSMatt Macy 
144eda14cbcSMatt Macy /*
145eda14cbcSMatt Macy  * If there aren't too many streams already, create a new stream.
146eda14cbcSMatt Macy  * The "blkid" argument is the next block that we expect this stream to access.
147eda14cbcSMatt Macy  * While we're here, clean up old streams (which haven't been
148eda14cbcSMatt Macy  * accessed for at least zfetch_min_sec_reap seconds).
149eda14cbcSMatt Macy  */
150eda14cbcSMatt Macy static void
151eda14cbcSMatt Macy dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
152eda14cbcSMatt Macy {
153eda14cbcSMatt Macy 	zstream_t *zs_next;
154eda14cbcSMatt Macy 	int numstreams = 0;
155eda14cbcSMatt Macy 
156eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&zf->zf_lock));
157eda14cbcSMatt Macy 
158eda14cbcSMatt Macy 	/*
159eda14cbcSMatt Macy 	 * Clean up old streams.
160eda14cbcSMatt Macy 	 */
161eda14cbcSMatt Macy 	for (zstream_t *zs = list_head(&zf->zf_stream);
162eda14cbcSMatt Macy 	    zs != NULL; zs = zs_next) {
163eda14cbcSMatt Macy 		zs_next = list_next(&zf->zf_stream, zs);
164eda14cbcSMatt Macy 		if (((gethrtime() - zs->zs_atime) / NANOSEC) >
165eda14cbcSMatt Macy 		    zfetch_min_sec_reap)
166eda14cbcSMatt Macy 			dmu_zfetch_stream_remove(zf, zs);
167eda14cbcSMatt Macy 		else
168eda14cbcSMatt Macy 			numstreams++;
169eda14cbcSMatt Macy 	}
170eda14cbcSMatt Macy 
171eda14cbcSMatt Macy 	/*
172eda14cbcSMatt Macy 	 * The maximum number of streams is normally zfetch_max_streams,
173eda14cbcSMatt Macy 	 * but for small files we lower it such that it's at least possible
174eda14cbcSMatt Macy 	 * for all the streams to be non-overlapping.
175eda14cbcSMatt Macy 	 *
176eda14cbcSMatt Macy 	 * If we are already at the maximum number of streams for this file,
177eda14cbcSMatt Macy 	 * even after removing old streams, then don't create this stream.
178eda14cbcSMatt Macy 	 */
179eda14cbcSMatt Macy 	uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
180eda14cbcSMatt Macy 	    zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
181eda14cbcSMatt Macy 	    zfetch_max_distance));
182eda14cbcSMatt Macy 	if (numstreams >= max_streams) {
183eda14cbcSMatt Macy 		ZFETCHSTAT_BUMP(zfetchstat_max_streams);
184eda14cbcSMatt Macy 		return;
185eda14cbcSMatt Macy 	}
186eda14cbcSMatt Macy 
187eda14cbcSMatt Macy 	zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
188eda14cbcSMatt Macy 	zs->zs_blkid = blkid;
189eda14cbcSMatt Macy 	zs->zs_pf_blkid = blkid;
190eda14cbcSMatt Macy 	zs->zs_ipf_blkid = blkid;
191eda14cbcSMatt Macy 	zs->zs_atime = gethrtime();
192eda14cbcSMatt Macy 	mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
193eda14cbcSMatt Macy 
194eda14cbcSMatt Macy 	list_insert_head(&zf->zf_stream, zs);
195eda14cbcSMatt Macy }
196eda14cbcSMatt Macy 
197eda14cbcSMatt Macy /*
198eda14cbcSMatt Macy  * This is the predictive prefetch entry point.  It associates dnode access
199eda14cbcSMatt Macy  * specified with blkid and nblks arguments with prefetch stream, predicts
200eda14cbcSMatt Macy  * further accesses based on that stats and initiates speculative prefetch.
201eda14cbcSMatt Macy  * fetch_data argument specifies whether actual data blocks should be fetched:
202eda14cbcSMatt Macy  *   FALSE -- prefetch only indirect blocks for predicted data blocks;
203eda14cbcSMatt Macy  *   TRUE -- prefetch predicted data blocks plus following indirect blocks.
204eda14cbcSMatt Macy  */
205eda14cbcSMatt Macy void
206eda14cbcSMatt Macy dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
207eda14cbcSMatt Macy     boolean_t have_lock)
208eda14cbcSMatt Macy {
209eda14cbcSMatt Macy 	zstream_t *zs;
210eda14cbcSMatt Macy 	int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
211eda14cbcSMatt Macy 	int64_t pf_ahead_blks, max_blks;
212eda14cbcSMatt Macy 	int epbs, max_dist_blks, pf_nblks, ipf_nblks;
213eda14cbcSMatt Macy 	uint64_t end_of_access_blkid;
214eda14cbcSMatt Macy 	end_of_access_blkid = blkid + nblks;
215eda14cbcSMatt Macy 	spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
216eda14cbcSMatt Macy 
217eda14cbcSMatt Macy 	if (zfs_prefetch_disable)
218eda14cbcSMatt Macy 		return;
219eda14cbcSMatt Macy 	/*
220eda14cbcSMatt Macy 	 * If we haven't yet loaded the indirect vdevs' mappings, we
221eda14cbcSMatt Macy 	 * can only read from blocks that we carefully ensure are on
222eda14cbcSMatt Macy 	 * concrete vdevs (or previously-loaded indirect vdevs).  So we
223eda14cbcSMatt Macy 	 * can't allow the predictive prefetcher to attempt reads of other
224eda14cbcSMatt Macy 	 * blocks (e.g. of the MOS's dnode object).
225eda14cbcSMatt Macy 	 */
226eda14cbcSMatt Macy 	if (!spa_indirect_vdevs_loaded(spa))
227eda14cbcSMatt Macy 		return;
228eda14cbcSMatt Macy 
229eda14cbcSMatt Macy 	/*
230eda14cbcSMatt Macy 	 * As a fast path for small (single-block) files, ignore access
231eda14cbcSMatt Macy 	 * to the first block.
232eda14cbcSMatt Macy 	 */
233eda14cbcSMatt Macy 	if (blkid == 0)
234eda14cbcSMatt Macy 		return;
235eda14cbcSMatt Macy 
236eda14cbcSMatt Macy 	if (!have_lock)
237eda14cbcSMatt Macy 		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
238eda14cbcSMatt Macy 	mutex_enter(&zf->zf_lock);
239eda14cbcSMatt Macy 
240eda14cbcSMatt Macy 	/*
241eda14cbcSMatt Macy 	 * Find matching prefetch stream.  Depending on whether the accesses
242eda14cbcSMatt Macy 	 * are block-aligned, first block of the new access may either follow
243eda14cbcSMatt Macy 	 * the last block of the previous access, or be equal to it.
244eda14cbcSMatt Macy 	 */
245eda14cbcSMatt Macy 	for (zs = list_head(&zf->zf_stream); zs != NULL;
246eda14cbcSMatt Macy 	    zs = list_next(&zf->zf_stream, zs)) {
247eda14cbcSMatt Macy 		if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
248eda14cbcSMatt Macy 			mutex_enter(&zs->zs_lock);
249eda14cbcSMatt Macy 			/*
250eda14cbcSMatt Macy 			 * zs_blkid could have changed before we
251eda14cbcSMatt Macy 			 * acquired zs_lock; re-check them here.
252eda14cbcSMatt Macy 			 */
253eda14cbcSMatt Macy 			if (blkid == zs->zs_blkid) {
254eda14cbcSMatt Macy 				break;
255eda14cbcSMatt Macy 			} else if (blkid + 1 == zs->zs_blkid) {
256eda14cbcSMatt Macy 				blkid++;
257eda14cbcSMatt Macy 				nblks--;
258eda14cbcSMatt Macy 				if (nblks == 0) {
259eda14cbcSMatt Macy 					/* Already prefetched this before. */
260eda14cbcSMatt Macy 					mutex_exit(&zs->zs_lock);
261eda14cbcSMatt Macy 					mutex_exit(&zf->zf_lock);
262eda14cbcSMatt Macy 					if (!have_lock) {
263eda14cbcSMatt Macy 						rw_exit(&zf->zf_dnode->
264eda14cbcSMatt Macy 						    dn_struct_rwlock);
265eda14cbcSMatt Macy 					}
266eda14cbcSMatt Macy 					return;
267eda14cbcSMatt Macy 				}
268eda14cbcSMatt Macy 				break;
269eda14cbcSMatt Macy 			}
270eda14cbcSMatt Macy 			mutex_exit(&zs->zs_lock);
271eda14cbcSMatt Macy 		}
272eda14cbcSMatt Macy 	}
273eda14cbcSMatt Macy 
274eda14cbcSMatt Macy 	if (zs == NULL) {
275eda14cbcSMatt Macy 		/*
276eda14cbcSMatt Macy 		 * This access is not part of any existing stream.  Create
277eda14cbcSMatt Macy 		 * a new stream for it.
278eda14cbcSMatt Macy 		 */
279eda14cbcSMatt Macy 		ZFETCHSTAT_BUMP(zfetchstat_misses);
280eda14cbcSMatt Macy 
281eda14cbcSMatt Macy 		dmu_zfetch_stream_create(zf, end_of_access_blkid);
282eda14cbcSMatt Macy 		mutex_exit(&zf->zf_lock);
283eda14cbcSMatt Macy 		if (!have_lock)
284eda14cbcSMatt Macy 			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
285eda14cbcSMatt Macy 		return;
286eda14cbcSMatt Macy 	}
287eda14cbcSMatt Macy 
288eda14cbcSMatt Macy 	/*
289eda14cbcSMatt Macy 	 * This access was to a block that we issued a prefetch for on
290eda14cbcSMatt Macy 	 * behalf of this stream. Issue further prefetches for this stream.
291eda14cbcSMatt Macy 	 *
292eda14cbcSMatt Macy 	 * Normally, we start prefetching where we stopped
293eda14cbcSMatt Macy 	 * prefetching last (zs_pf_blkid).  But when we get our first
294eda14cbcSMatt Macy 	 * hit on this stream, zs_pf_blkid == zs_blkid, we don't
295eda14cbcSMatt Macy 	 * want to prefetch the block we just accessed.  In this case,
296eda14cbcSMatt Macy 	 * start just after the block we just accessed.
297eda14cbcSMatt Macy 	 */
298eda14cbcSMatt Macy 	pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
299eda14cbcSMatt Macy 
300eda14cbcSMatt Macy 	/*
301eda14cbcSMatt Macy 	 * Double our amount of prefetched data, but don't let the
302eda14cbcSMatt Macy 	 * prefetch get further ahead than zfetch_max_distance.
303eda14cbcSMatt Macy 	 */
304eda14cbcSMatt Macy 	if (fetch_data) {
305eda14cbcSMatt Macy 		max_dist_blks =
306eda14cbcSMatt Macy 		    zfetch_max_distance >> zf->zf_dnode->dn_datablkshift;
307eda14cbcSMatt Macy 		/*
308eda14cbcSMatt Macy 		 * Previously, we were (zs_pf_blkid - blkid) ahead.  We
309eda14cbcSMatt Macy 		 * want to now be double that, so read that amount again,
310eda14cbcSMatt Macy 		 * plus the amount we are catching up by (i.e. the amount
311eda14cbcSMatt Macy 		 * read just now).
312eda14cbcSMatt Macy 		 */
313eda14cbcSMatt Macy 		pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks;
314eda14cbcSMatt Macy 		max_blks = max_dist_blks - (pf_start - end_of_access_blkid);
315eda14cbcSMatt Macy 		pf_nblks = MIN(pf_ahead_blks, max_blks);
316eda14cbcSMatt Macy 	} else {
317eda14cbcSMatt Macy 		pf_nblks = 0;
318eda14cbcSMatt Macy 	}
319eda14cbcSMatt Macy 
320eda14cbcSMatt Macy 	zs->zs_pf_blkid = pf_start + pf_nblks;
321eda14cbcSMatt Macy 
322eda14cbcSMatt Macy 	/*
323eda14cbcSMatt Macy 	 * Do the same for indirects, starting from where we stopped last,
324eda14cbcSMatt Macy 	 * or where we will stop reading data blocks (and the indirects
325eda14cbcSMatt Macy 	 * that point to them).
326eda14cbcSMatt Macy 	 */
327eda14cbcSMatt Macy 	ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid);
328eda14cbcSMatt Macy 	max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift;
329eda14cbcSMatt Macy 	/*
330eda14cbcSMatt Macy 	 * We want to double our distance ahead of the data prefetch
331eda14cbcSMatt Macy 	 * (or reader, if we are not prefetching data).  Previously, we
332eda14cbcSMatt Macy 	 * were (zs_ipf_blkid - blkid) ahead.  To double that, we read
333eda14cbcSMatt Macy 	 * that amount again, plus the amount we are catching up by
334eda14cbcSMatt Macy 	 * (i.e. the amount read now + the amount of data prefetched now).
335eda14cbcSMatt Macy 	 */
336eda14cbcSMatt Macy 	pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
337eda14cbcSMatt Macy 	max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
338eda14cbcSMatt Macy 	ipf_nblks = MIN(pf_ahead_blks, max_blks);
339eda14cbcSMatt Macy 	zs->zs_ipf_blkid = ipf_start + ipf_nblks;
340eda14cbcSMatt Macy 
341eda14cbcSMatt Macy 	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
342eda14cbcSMatt Macy 	ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
343eda14cbcSMatt Macy 	ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
344eda14cbcSMatt Macy 
345eda14cbcSMatt Macy 	zs->zs_atime = gethrtime();
346eda14cbcSMatt Macy 	zs->zs_blkid = end_of_access_blkid;
347eda14cbcSMatt Macy 	mutex_exit(&zs->zs_lock);
348eda14cbcSMatt Macy 	mutex_exit(&zf->zf_lock);
349eda14cbcSMatt Macy 
350eda14cbcSMatt Macy 	/*
351eda14cbcSMatt Macy 	 * dbuf_prefetch() is asynchronous (even when it needs to read
352eda14cbcSMatt Macy 	 * indirect blocks), but we still prefer to drop our locks before
353eda14cbcSMatt Macy 	 * calling it to reduce the time we hold them.
354eda14cbcSMatt Macy 	 */
355eda14cbcSMatt Macy 
356eda14cbcSMatt Macy 	for (int i = 0; i < pf_nblks; i++) {
357eda14cbcSMatt Macy 		dbuf_prefetch(zf->zf_dnode, 0, pf_start + i,
358eda14cbcSMatt Macy 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
359eda14cbcSMatt Macy 	}
360eda14cbcSMatt Macy 	for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) {
361eda14cbcSMatt Macy 		dbuf_prefetch(zf->zf_dnode, 1, iblk,
362eda14cbcSMatt Macy 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
363eda14cbcSMatt Macy 	}
364eda14cbcSMatt Macy 	if (!have_lock)
365eda14cbcSMatt Macy 		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
366eda14cbcSMatt Macy 	ZFETCHSTAT_BUMP(zfetchstat_hits);
367eda14cbcSMatt Macy }
368eda14cbcSMatt Macy 
369eda14cbcSMatt Macy /* BEGIN CSTYLED */
370eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
371eda14cbcSMatt Macy 	"Disable all ZFS prefetching");
372eda14cbcSMatt Macy 
373eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW,
374eda14cbcSMatt Macy 	"Max number of streams per zfetch");
375eda14cbcSMatt Macy 
376eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW,
377eda14cbcSMatt Macy 	"Min time before stream reclaim");
378eda14cbcSMatt Macy 
379eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
380*180f8225SMatt Macy 	"Max bytes to prefetch per stream");
381*180f8225SMatt Macy 
382*180f8225SMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_idistance, UINT, ZMOD_RW,
383*180f8225SMatt Macy 	"Max bytes to prefetch indirects for per stream");
384eda14cbcSMatt Macy 
385eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW,
386eda14cbcSMatt Macy 	"Number of bytes in a array_read");
387eda14cbcSMatt Macy /* END CSTYLED */
388