xref: /freebsd/sys/contrib/openzfs/module/zfs/dmu_zfetch.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23*eda14cbcSMatt Macy  * Use is subject to license terms.
24*eda14cbcSMatt Macy  */
25*eda14cbcSMatt Macy 
26*eda14cbcSMatt Macy /*
27*eda14cbcSMatt Macy  * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
28*eda14cbcSMatt Macy  */
29*eda14cbcSMatt Macy 
30*eda14cbcSMatt Macy #include <sys/zfs_context.h>
31*eda14cbcSMatt Macy #include <sys/dnode.h>
32*eda14cbcSMatt Macy #include <sys/dmu_objset.h>
33*eda14cbcSMatt Macy #include <sys/dmu_zfetch.h>
34*eda14cbcSMatt Macy #include <sys/dmu.h>
35*eda14cbcSMatt Macy #include <sys/dbuf.h>
36*eda14cbcSMatt Macy #include <sys/kstat.h>
37*eda14cbcSMatt Macy 
38*eda14cbcSMatt Macy /*
39*eda14cbcSMatt Macy  * This tunable disables predictive prefetch.  Note that it leaves "prescient"
40*eda14cbcSMatt Macy  * prefetch (e.g. prefetch for zfs send) intact.  Unlike predictive prefetch,
41*eda14cbcSMatt Macy  * prescient prefetch never issues i/os that end up not being needed,
42*eda14cbcSMatt Macy  * so it can't hurt performance.
43*eda14cbcSMatt Macy  */
44*eda14cbcSMatt Macy 
45*eda14cbcSMatt Macy int zfs_prefetch_disable = B_FALSE;
46*eda14cbcSMatt Macy 
47*eda14cbcSMatt Macy /* max # of streams per zfetch */
48*eda14cbcSMatt Macy unsigned int	zfetch_max_streams = 8;
49*eda14cbcSMatt Macy /* min time before stream reclaim */
50*eda14cbcSMatt Macy unsigned int	zfetch_min_sec_reap = 2;
51*eda14cbcSMatt Macy /* max bytes to prefetch per stream (default 8MB) */
52*eda14cbcSMatt Macy unsigned int	zfetch_max_distance = 8 * 1024 * 1024;
53*eda14cbcSMatt Macy /* max bytes to prefetch indirects for per stream (default 64MB) */
54*eda14cbcSMatt Macy unsigned int	zfetch_max_idistance = 64 * 1024 * 1024;
55*eda14cbcSMatt Macy /* max number of bytes in an array_read in which we allow prefetching (1MB) */
56*eda14cbcSMatt Macy unsigned long	zfetch_array_rd_sz = 1024 * 1024;
57*eda14cbcSMatt Macy 
58*eda14cbcSMatt Macy typedef struct zfetch_stats {
59*eda14cbcSMatt Macy 	kstat_named_t zfetchstat_hits;
60*eda14cbcSMatt Macy 	kstat_named_t zfetchstat_misses;
61*eda14cbcSMatt Macy 	kstat_named_t zfetchstat_max_streams;
62*eda14cbcSMatt Macy } zfetch_stats_t;
63*eda14cbcSMatt Macy 
64*eda14cbcSMatt Macy static zfetch_stats_t zfetch_stats = {
65*eda14cbcSMatt Macy 	{ "hits",			KSTAT_DATA_UINT64 },
66*eda14cbcSMatt Macy 	{ "misses",			KSTAT_DATA_UINT64 },
67*eda14cbcSMatt Macy 	{ "max_streams",		KSTAT_DATA_UINT64 },
68*eda14cbcSMatt Macy };
69*eda14cbcSMatt Macy 
70*eda14cbcSMatt Macy #define	ZFETCHSTAT_BUMP(stat) \
71*eda14cbcSMatt Macy 	atomic_inc_64(&zfetch_stats.stat.value.ui64);
72*eda14cbcSMatt Macy 
73*eda14cbcSMatt Macy kstat_t		*zfetch_ksp;
74*eda14cbcSMatt Macy 
75*eda14cbcSMatt Macy void
76*eda14cbcSMatt Macy zfetch_init(void)
77*eda14cbcSMatt Macy {
78*eda14cbcSMatt Macy 	zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
79*eda14cbcSMatt Macy 	    KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
80*eda14cbcSMatt Macy 	    KSTAT_FLAG_VIRTUAL);
81*eda14cbcSMatt Macy 
82*eda14cbcSMatt Macy 	if (zfetch_ksp != NULL) {
83*eda14cbcSMatt Macy 		zfetch_ksp->ks_data = &zfetch_stats;
84*eda14cbcSMatt Macy 		kstat_install(zfetch_ksp);
85*eda14cbcSMatt Macy 	}
86*eda14cbcSMatt Macy }
87*eda14cbcSMatt Macy 
88*eda14cbcSMatt Macy void
89*eda14cbcSMatt Macy zfetch_fini(void)
90*eda14cbcSMatt Macy {
91*eda14cbcSMatt Macy 	if (zfetch_ksp != NULL) {
92*eda14cbcSMatt Macy 		kstat_delete(zfetch_ksp);
93*eda14cbcSMatt Macy 		zfetch_ksp = NULL;
94*eda14cbcSMatt Macy 	}
95*eda14cbcSMatt Macy }
96*eda14cbcSMatt Macy 
97*eda14cbcSMatt Macy /*
98*eda14cbcSMatt Macy  * This takes a pointer to a zfetch structure and a dnode.  It performs the
99*eda14cbcSMatt Macy  * necessary setup for the zfetch structure, grokking data from the
100*eda14cbcSMatt Macy  * associated dnode.
101*eda14cbcSMatt Macy  */
102*eda14cbcSMatt Macy void
103*eda14cbcSMatt Macy dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
104*eda14cbcSMatt Macy {
105*eda14cbcSMatt Macy 	if (zf == NULL)
106*eda14cbcSMatt Macy 		return;
107*eda14cbcSMatt Macy 
108*eda14cbcSMatt Macy 	zf->zf_dnode = dno;
109*eda14cbcSMatt Macy 
110*eda14cbcSMatt Macy 	list_create(&zf->zf_stream, sizeof (zstream_t),
111*eda14cbcSMatt Macy 	    offsetof(zstream_t, zs_node));
112*eda14cbcSMatt Macy 
113*eda14cbcSMatt Macy 	mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL);
114*eda14cbcSMatt Macy }
115*eda14cbcSMatt Macy 
116*eda14cbcSMatt Macy static void
117*eda14cbcSMatt Macy dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
118*eda14cbcSMatt Macy {
119*eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&zf->zf_lock));
120*eda14cbcSMatt Macy 	list_remove(&zf->zf_stream, zs);
121*eda14cbcSMatt Macy 	mutex_destroy(&zs->zs_lock);
122*eda14cbcSMatt Macy 	kmem_free(zs, sizeof (*zs));
123*eda14cbcSMatt Macy }
124*eda14cbcSMatt Macy 
125*eda14cbcSMatt Macy /*
126*eda14cbcSMatt Macy  * Clean-up state associated with a zfetch structure (e.g. destroy the
127*eda14cbcSMatt Macy  * streams).  This doesn't free the zfetch_t itself, that's left to the caller.
128*eda14cbcSMatt Macy  */
129*eda14cbcSMatt Macy void
130*eda14cbcSMatt Macy dmu_zfetch_fini(zfetch_t *zf)
131*eda14cbcSMatt Macy {
132*eda14cbcSMatt Macy 	zstream_t *zs;
133*eda14cbcSMatt Macy 
134*eda14cbcSMatt Macy 	mutex_enter(&zf->zf_lock);
135*eda14cbcSMatt Macy 	while ((zs = list_head(&zf->zf_stream)) != NULL)
136*eda14cbcSMatt Macy 		dmu_zfetch_stream_remove(zf, zs);
137*eda14cbcSMatt Macy 	mutex_exit(&zf->zf_lock);
138*eda14cbcSMatt Macy 	list_destroy(&zf->zf_stream);
139*eda14cbcSMatt Macy 	mutex_destroy(&zf->zf_lock);
140*eda14cbcSMatt Macy 
141*eda14cbcSMatt Macy 	zf->zf_dnode = NULL;
142*eda14cbcSMatt Macy }
143*eda14cbcSMatt Macy 
144*eda14cbcSMatt Macy /*
145*eda14cbcSMatt Macy  * If there aren't too many streams already, create a new stream.
146*eda14cbcSMatt Macy  * The "blkid" argument is the next block that we expect this stream to access.
147*eda14cbcSMatt Macy  * While we're here, clean up old streams (which haven't been
148*eda14cbcSMatt Macy  * accessed for at least zfetch_min_sec_reap seconds).
149*eda14cbcSMatt Macy  */
150*eda14cbcSMatt Macy static void
151*eda14cbcSMatt Macy dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
152*eda14cbcSMatt Macy {
153*eda14cbcSMatt Macy 	zstream_t *zs_next;
154*eda14cbcSMatt Macy 	int numstreams = 0;
155*eda14cbcSMatt Macy 
156*eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&zf->zf_lock));
157*eda14cbcSMatt Macy 
158*eda14cbcSMatt Macy 	/*
159*eda14cbcSMatt Macy 	 * Clean up old streams.
160*eda14cbcSMatt Macy 	 */
161*eda14cbcSMatt Macy 	for (zstream_t *zs = list_head(&zf->zf_stream);
162*eda14cbcSMatt Macy 	    zs != NULL; zs = zs_next) {
163*eda14cbcSMatt Macy 		zs_next = list_next(&zf->zf_stream, zs);
164*eda14cbcSMatt Macy 		if (((gethrtime() - zs->zs_atime) / NANOSEC) >
165*eda14cbcSMatt Macy 		    zfetch_min_sec_reap)
166*eda14cbcSMatt Macy 			dmu_zfetch_stream_remove(zf, zs);
167*eda14cbcSMatt Macy 		else
168*eda14cbcSMatt Macy 			numstreams++;
169*eda14cbcSMatt Macy 	}
170*eda14cbcSMatt Macy 
171*eda14cbcSMatt Macy 	/*
172*eda14cbcSMatt Macy 	 * The maximum number of streams is normally zfetch_max_streams,
173*eda14cbcSMatt Macy 	 * but for small files we lower it such that it's at least possible
174*eda14cbcSMatt Macy 	 * for all the streams to be non-overlapping.
175*eda14cbcSMatt Macy 	 *
176*eda14cbcSMatt Macy 	 * If we are already at the maximum number of streams for this file,
177*eda14cbcSMatt Macy 	 * even after removing old streams, then don't create this stream.
178*eda14cbcSMatt Macy 	 */
179*eda14cbcSMatt Macy 	uint32_t max_streams = MAX(1, MIN(zfetch_max_streams,
180*eda14cbcSMatt Macy 	    zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
181*eda14cbcSMatt Macy 	    zfetch_max_distance));
182*eda14cbcSMatt Macy 	if (numstreams >= max_streams) {
183*eda14cbcSMatt Macy 		ZFETCHSTAT_BUMP(zfetchstat_max_streams);
184*eda14cbcSMatt Macy 		return;
185*eda14cbcSMatt Macy 	}
186*eda14cbcSMatt Macy 
187*eda14cbcSMatt Macy 	zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
188*eda14cbcSMatt Macy 	zs->zs_blkid = blkid;
189*eda14cbcSMatt Macy 	zs->zs_pf_blkid = blkid;
190*eda14cbcSMatt Macy 	zs->zs_ipf_blkid = blkid;
191*eda14cbcSMatt Macy 	zs->zs_atime = gethrtime();
192*eda14cbcSMatt Macy 	mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
193*eda14cbcSMatt Macy 
194*eda14cbcSMatt Macy 	list_insert_head(&zf->zf_stream, zs);
195*eda14cbcSMatt Macy }
196*eda14cbcSMatt Macy 
197*eda14cbcSMatt Macy /*
198*eda14cbcSMatt Macy  * This is the predictive prefetch entry point.  It associates dnode access
199*eda14cbcSMatt Macy  * specified with blkid and nblks arguments with prefetch stream, predicts
200*eda14cbcSMatt Macy  * further accesses based on that stats and initiates speculative prefetch.
201*eda14cbcSMatt Macy  * fetch_data argument specifies whether actual data blocks should be fetched:
202*eda14cbcSMatt Macy  *   FALSE -- prefetch only indirect blocks for predicted data blocks;
203*eda14cbcSMatt Macy  *   TRUE -- prefetch predicted data blocks plus following indirect blocks.
204*eda14cbcSMatt Macy  */
205*eda14cbcSMatt Macy void
206*eda14cbcSMatt Macy dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
207*eda14cbcSMatt Macy     boolean_t have_lock)
208*eda14cbcSMatt Macy {
209*eda14cbcSMatt Macy 	zstream_t *zs;
210*eda14cbcSMatt Macy 	int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
211*eda14cbcSMatt Macy 	int64_t pf_ahead_blks, max_blks;
212*eda14cbcSMatt Macy 	int epbs, max_dist_blks, pf_nblks, ipf_nblks;
213*eda14cbcSMatt Macy 	uint64_t end_of_access_blkid;
214*eda14cbcSMatt Macy 	end_of_access_blkid = blkid + nblks;
215*eda14cbcSMatt Macy 	spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
216*eda14cbcSMatt Macy 
217*eda14cbcSMatt Macy 	if (zfs_prefetch_disable)
218*eda14cbcSMatt Macy 		return;
219*eda14cbcSMatt Macy 	/*
220*eda14cbcSMatt Macy 	 * If we haven't yet loaded the indirect vdevs' mappings, we
221*eda14cbcSMatt Macy 	 * can only read from blocks that we carefully ensure are on
222*eda14cbcSMatt Macy 	 * concrete vdevs (or previously-loaded indirect vdevs).  So we
223*eda14cbcSMatt Macy 	 * can't allow the predictive prefetcher to attempt reads of other
224*eda14cbcSMatt Macy 	 * blocks (e.g. of the MOS's dnode object).
225*eda14cbcSMatt Macy 	 */
226*eda14cbcSMatt Macy 	if (!spa_indirect_vdevs_loaded(spa))
227*eda14cbcSMatt Macy 		return;
228*eda14cbcSMatt Macy 
229*eda14cbcSMatt Macy 	/*
230*eda14cbcSMatt Macy 	 * As a fast path for small (single-block) files, ignore access
231*eda14cbcSMatt Macy 	 * to the first block.
232*eda14cbcSMatt Macy 	 */
233*eda14cbcSMatt Macy 	if (blkid == 0)
234*eda14cbcSMatt Macy 		return;
235*eda14cbcSMatt Macy 
236*eda14cbcSMatt Macy 	if (!have_lock)
237*eda14cbcSMatt Macy 		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
238*eda14cbcSMatt Macy 	mutex_enter(&zf->zf_lock);
239*eda14cbcSMatt Macy 
240*eda14cbcSMatt Macy 	/*
241*eda14cbcSMatt Macy 	 * Find matching prefetch stream.  Depending on whether the accesses
242*eda14cbcSMatt Macy 	 * are block-aligned, first block of the new access may either follow
243*eda14cbcSMatt Macy 	 * the last block of the previous access, or be equal to it.
244*eda14cbcSMatt Macy 	 */
245*eda14cbcSMatt Macy 	for (zs = list_head(&zf->zf_stream); zs != NULL;
246*eda14cbcSMatt Macy 	    zs = list_next(&zf->zf_stream, zs)) {
247*eda14cbcSMatt Macy 		if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
248*eda14cbcSMatt Macy 			mutex_enter(&zs->zs_lock);
249*eda14cbcSMatt Macy 			/*
250*eda14cbcSMatt Macy 			 * zs_blkid could have changed before we
251*eda14cbcSMatt Macy 			 * acquired zs_lock; re-check them here.
252*eda14cbcSMatt Macy 			 */
253*eda14cbcSMatt Macy 			if (blkid == zs->zs_blkid) {
254*eda14cbcSMatt Macy 				break;
255*eda14cbcSMatt Macy 			} else if (blkid + 1 == zs->zs_blkid) {
256*eda14cbcSMatt Macy 				blkid++;
257*eda14cbcSMatt Macy 				nblks--;
258*eda14cbcSMatt Macy 				if (nblks == 0) {
259*eda14cbcSMatt Macy 					/* Already prefetched this before. */
260*eda14cbcSMatt Macy 					mutex_exit(&zs->zs_lock);
261*eda14cbcSMatt Macy 					mutex_exit(&zf->zf_lock);
262*eda14cbcSMatt Macy 					if (!have_lock) {
263*eda14cbcSMatt Macy 						rw_exit(&zf->zf_dnode->
264*eda14cbcSMatt Macy 						    dn_struct_rwlock);
265*eda14cbcSMatt Macy 					}
266*eda14cbcSMatt Macy 					return;
267*eda14cbcSMatt Macy 				}
268*eda14cbcSMatt Macy 				break;
269*eda14cbcSMatt Macy 			}
270*eda14cbcSMatt Macy 			mutex_exit(&zs->zs_lock);
271*eda14cbcSMatt Macy 		}
272*eda14cbcSMatt Macy 	}
273*eda14cbcSMatt Macy 
274*eda14cbcSMatt Macy 	if (zs == NULL) {
275*eda14cbcSMatt Macy 		/*
276*eda14cbcSMatt Macy 		 * This access is not part of any existing stream.  Create
277*eda14cbcSMatt Macy 		 * a new stream for it.
278*eda14cbcSMatt Macy 		 */
279*eda14cbcSMatt Macy 		ZFETCHSTAT_BUMP(zfetchstat_misses);
280*eda14cbcSMatt Macy 
281*eda14cbcSMatt Macy 		dmu_zfetch_stream_create(zf, end_of_access_blkid);
282*eda14cbcSMatt Macy 		mutex_exit(&zf->zf_lock);
283*eda14cbcSMatt Macy 		if (!have_lock)
284*eda14cbcSMatt Macy 			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
285*eda14cbcSMatt Macy 		return;
286*eda14cbcSMatt Macy 	}
287*eda14cbcSMatt Macy 
288*eda14cbcSMatt Macy 	/*
289*eda14cbcSMatt Macy 	 * This access was to a block that we issued a prefetch for on
290*eda14cbcSMatt Macy 	 * behalf of this stream. Issue further prefetches for this stream.
291*eda14cbcSMatt Macy 	 *
292*eda14cbcSMatt Macy 	 * Normally, we start prefetching where we stopped
293*eda14cbcSMatt Macy 	 * prefetching last (zs_pf_blkid).  But when we get our first
294*eda14cbcSMatt Macy 	 * hit on this stream, zs_pf_blkid == zs_blkid, we don't
295*eda14cbcSMatt Macy 	 * want to prefetch the block we just accessed.  In this case,
296*eda14cbcSMatt Macy 	 * start just after the block we just accessed.
297*eda14cbcSMatt Macy 	 */
298*eda14cbcSMatt Macy 	pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
299*eda14cbcSMatt Macy 
300*eda14cbcSMatt Macy 	/*
301*eda14cbcSMatt Macy 	 * Double our amount of prefetched data, but don't let the
302*eda14cbcSMatt Macy 	 * prefetch get further ahead than zfetch_max_distance.
303*eda14cbcSMatt Macy 	 */
304*eda14cbcSMatt Macy 	if (fetch_data) {
305*eda14cbcSMatt Macy 		max_dist_blks =
306*eda14cbcSMatt Macy 		    zfetch_max_distance >> zf->zf_dnode->dn_datablkshift;
307*eda14cbcSMatt Macy 		/*
308*eda14cbcSMatt Macy 		 * Previously, we were (zs_pf_blkid - blkid) ahead.  We
309*eda14cbcSMatt Macy 		 * want to now be double that, so read that amount again,
310*eda14cbcSMatt Macy 		 * plus the amount we are catching up by (i.e. the amount
311*eda14cbcSMatt Macy 		 * read just now).
312*eda14cbcSMatt Macy 		 */
313*eda14cbcSMatt Macy 		pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks;
314*eda14cbcSMatt Macy 		max_blks = max_dist_blks - (pf_start - end_of_access_blkid);
315*eda14cbcSMatt Macy 		pf_nblks = MIN(pf_ahead_blks, max_blks);
316*eda14cbcSMatt Macy 	} else {
317*eda14cbcSMatt Macy 		pf_nblks = 0;
318*eda14cbcSMatt Macy 	}
319*eda14cbcSMatt Macy 
320*eda14cbcSMatt Macy 	zs->zs_pf_blkid = pf_start + pf_nblks;
321*eda14cbcSMatt Macy 
322*eda14cbcSMatt Macy 	/*
323*eda14cbcSMatt Macy 	 * Do the same for indirects, starting from where we stopped last,
324*eda14cbcSMatt Macy 	 * or where we will stop reading data blocks (and the indirects
325*eda14cbcSMatt Macy 	 * that point to them).
326*eda14cbcSMatt Macy 	 */
327*eda14cbcSMatt Macy 	ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid);
328*eda14cbcSMatt Macy 	max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift;
329*eda14cbcSMatt Macy 	/*
330*eda14cbcSMatt Macy 	 * We want to double our distance ahead of the data prefetch
331*eda14cbcSMatt Macy 	 * (or reader, if we are not prefetching data).  Previously, we
332*eda14cbcSMatt Macy 	 * were (zs_ipf_blkid - blkid) ahead.  To double that, we read
333*eda14cbcSMatt Macy 	 * that amount again, plus the amount we are catching up by
334*eda14cbcSMatt Macy 	 * (i.e. the amount read now + the amount of data prefetched now).
335*eda14cbcSMatt Macy 	 */
336*eda14cbcSMatt Macy 	pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
337*eda14cbcSMatt Macy 	max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
338*eda14cbcSMatt Macy 	ipf_nblks = MIN(pf_ahead_blks, max_blks);
339*eda14cbcSMatt Macy 	zs->zs_ipf_blkid = ipf_start + ipf_nblks;
340*eda14cbcSMatt Macy 
341*eda14cbcSMatt Macy 	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
342*eda14cbcSMatt Macy 	ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
343*eda14cbcSMatt Macy 	ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
344*eda14cbcSMatt Macy 
345*eda14cbcSMatt Macy 	zs->zs_atime = gethrtime();
346*eda14cbcSMatt Macy 	zs->zs_blkid = end_of_access_blkid;
347*eda14cbcSMatt Macy 	mutex_exit(&zs->zs_lock);
348*eda14cbcSMatt Macy 	mutex_exit(&zf->zf_lock);
349*eda14cbcSMatt Macy 
350*eda14cbcSMatt Macy 	/*
351*eda14cbcSMatt Macy 	 * dbuf_prefetch() is asynchronous (even when it needs to read
352*eda14cbcSMatt Macy 	 * indirect blocks), but we still prefer to drop our locks before
353*eda14cbcSMatt Macy 	 * calling it to reduce the time we hold them.
354*eda14cbcSMatt Macy 	 */
355*eda14cbcSMatt Macy 
356*eda14cbcSMatt Macy 	for (int i = 0; i < pf_nblks; i++) {
357*eda14cbcSMatt Macy 		dbuf_prefetch(zf->zf_dnode, 0, pf_start + i,
358*eda14cbcSMatt Macy 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
359*eda14cbcSMatt Macy 	}
360*eda14cbcSMatt Macy 	for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) {
361*eda14cbcSMatt Macy 		dbuf_prefetch(zf->zf_dnode, 1, iblk,
362*eda14cbcSMatt Macy 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
363*eda14cbcSMatt Macy 	}
364*eda14cbcSMatt Macy 	if (!have_lock)
365*eda14cbcSMatt Macy 		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
366*eda14cbcSMatt Macy 	ZFETCHSTAT_BUMP(zfetchstat_hits);
367*eda14cbcSMatt Macy }
368*eda14cbcSMatt Macy 
369*eda14cbcSMatt Macy /* BEGIN CSTYLED */
370*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
371*eda14cbcSMatt Macy 	"Disable all ZFS prefetching");
372*eda14cbcSMatt Macy 
373*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW,
374*eda14cbcSMatt Macy 	"Max number of streams per zfetch");
375*eda14cbcSMatt Macy 
376*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW,
377*eda14cbcSMatt Macy 	"Min time before stream reclaim");
378*eda14cbcSMatt Macy 
379*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW,
380*eda14cbcSMatt Macy 	"Max bytes to prefetch per stream (default 8MB)");
381*eda14cbcSMatt Macy 
382*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW,
383*eda14cbcSMatt Macy 	"Number of bytes in a array_read");
384*eda14cbcSMatt Macy /* END CSTYLED */
385