1fa9e4066Sahrens /*
2fa9e4066Sahrens * CDDL HEADER START
3fa9e4066Sahrens *
4fa9e4066Sahrens * The contents of this file are subject to the terms of the
513506d1eSmaybee * Common Development and Distribution License (the "License").
613506d1eSmaybee * You may not use this file except in compliance with the License.
7fa9e4066Sahrens *
8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens * See the License for the specific language governing permissions
11fa9e4066Sahrens * and limitations under the License.
12fa9e4066Sahrens *
13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens *
19fa9e4066Sahrens * CDDL HEADER END
20fa9e4066Sahrens */
21fa9e4066Sahrens /*
227cbf8b43SRich Morris * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23fa9e4066Sahrens * Use is subject to license terms.
24fa9e4066Sahrens */
25fa9e4066Sahrens
2669962b56SMatthew Ahrens /*
2769962b56SMatthew Ahrens * Copyright (c) 2013 by Delphix. All rights reserved.
2869962b56SMatthew Ahrens */
2969962b56SMatthew Ahrens
30fa9e4066Sahrens #include <sys/zfs_context.h>
31fa9e4066Sahrens #include <sys/dnode.h>
32fa9e4066Sahrens #include <sys/dmu_objset.h>
33fa9e4066Sahrens #include <sys/dmu_zfetch.h>
34fa9e4066Sahrens #include <sys/dmu.h>
35fa9e4066Sahrens #include <sys/dbuf.h>
367cbf8b43SRich Morris #include <sys/kstat.h>
37fa9e4066Sahrens
38fa9e4066Sahrens /*
39fa9e4066Sahrens * I'm against tune-ables, but these should probably exist as tweakable globals
40fa9e4066Sahrens * until we can get this working the way we want it to.
41fa9e4066Sahrens */
42fa9e4066Sahrens
43416e0cd8Sek110237 int zfs_prefetch_disable = 0;
44a2eea2e1Sahrens
45fa9e4066Sahrens /* max # of streams per zfetch */
46fa9e4066Sahrens uint32_t zfetch_max_streams = 8;
47fa9e4066Sahrens /* min time before stream reclaim */
48fa9e4066Sahrens uint32_t zfetch_min_sec_reap = 2;
49fa9e4066Sahrens /* max number of blocks to fetch at a time */
5013506d1eSmaybee uint32_t zfetch_block_cap = 256;
51fa9e4066Sahrens /* number of bytes in a array_read at which we stop prefetching (1Mb) */
52fa9e4066Sahrens uint64_t zfetch_array_rd_sz = 1024 * 1024;
53fa9e4066Sahrens
54fa9e4066Sahrens /* forward decls for static routines */
553e30c24aSWill Andrews static boolean_t dmu_zfetch_colinear(zfetch_t *, zstream_t *);
56fa9e4066Sahrens static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
57fa9e4066Sahrens static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
58fa9e4066Sahrens static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
593e30c24aSWill Andrews static boolean_t dmu_zfetch_find(zfetch_t *, zstream_t *, int);
60fa9e4066Sahrens static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
61fa9e4066Sahrens static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
62fa9e4066Sahrens static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
63fa9e4066Sahrens static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
64fa9e4066Sahrens
657cbf8b43SRich Morris typedef struct zfetch_stats {
667cbf8b43SRich Morris kstat_named_t zfetchstat_hits;
677cbf8b43SRich Morris kstat_named_t zfetchstat_misses;
687cbf8b43SRich Morris kstat_named_t zfetchstat_colinear_hits;
697cbf8b43SRich Morris kstat_named_t zfetchstat_colinear_misses;
707cbf8b43SRich Morris kstat_named_t zfetchstat_stride_hits;
717cbf8b43SRich Morris kstat_named_t zfetchstat_stride_misses;
727cbf8b43SRich Morris kstat_named_t zfetchstat_reclaim_successes;
737cbf8b43SRich Morris kstat_named_t zfetchstat_reclaim_failures;
747cbf8b43SRich Morris kstat_named_t zfetchstat_stream_resets;
757cbf8b43SRich Morris kstat_named_t zfetchstat_stream_noresets;
767cbf8b43SRich Morris kstat_named_t zfetchstat_bogus_streams;
777cbf8b43SRich Morris } zfetch_stats_t;
787cbf8b43SRich Morris
797cbf8b43SRich Morris static zfetch_stats_t zfetch_stats = {
807cbf8b43SRich Morris { "hits", KSTAT_DATA_UINT64 },
817cbf8b43SRich Morris { "misses", KSTAT_DATA_UINT64 },
827cbf8b43SRich Morris { "colinear_hits", KSTAT_DATA_UINT64 },
837cbf8b43SRich Morris { "colinear_misses", KSTAT_DATA_UINT64 },
847cbf8b43SRich Morris { "stride_hits", KSTAT_DATA_UINT64 },
857cbf8b43SRich Morris { "stride_misses", KSTAT_DATA_UINT64 },
867cbf8b43SRich Morris { "reclaim_successes", KSTAT_DATA_UINT64 },
877cbf8b43SRich Morris { "reclaim_failures", KSTAT_DATA_UINT64 },
887cbf8b43SRich Morris { "streams_resets", KSTAT_DATA_UINT64 },
897cbf8b43SRich Morris { "streams_noresets", KSTAT_DATA_UINT64 },
907cbf8b43SRich Morris { "bogus_streams", KSTAT_DATA_UINT64 },
917cbf8b43SRich Morris };
927cbf8b43SRich Morris
937cbf8b43SRich Morris #define ZFETCHSTAT_INCR(stat, val) \
947cbf8b43SRich Morris atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
957cbf8b43SRich Morris
967cbf8b43SRich Morris #define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
977cbf8b43SRich Morris
987cbf8b43SRich Morris kstat_t *zfetch_ksp;
997cbf8b43SRich Morris
100fa9e4066Sahrens /*
101fa9e4066Sahrens * Given a zfetch structure and a zstream structure, determine whether the
10213506d1eSmaybee * blocks to be read are part of a co-linear pair of existing prefetch
103fa9e4066Sahrens * streams. If a set is found, coalesce the streams, removing one, and
104fa9e4066Sahrens * configure the prefetch so it looks for a strided access pattern.
105fa9e4066Sahrens *
10613506d1eSmaybee * In other words: if we find two sequential access streams that are
10713506d1eSmaybee * the same length and distance N appart, and this read is N from the
10813506d1eSmaybee * last stream, then we are probably in a strided access pattern. So
10913506d1eSmaybee * combine the two sequential streams into a single strided stream.
11013506d1eSmaybee *
1113e30c24aSWill Andrews * Returns whether co-linear streams were found.
112fa9e4066Sahrens */
1133e30c24aSWill Andrews static boolean_t
dmu_zfetch_colinear(zfetch_t * zf,zstream_t * zh)114fa9e4066Sahrens dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
115fa9e4066Sahrens {
116fa9e4066Sahrens zstream_t *z_walk;
117fa9e4066Sahrens zstream_t *z_comp;
118fa9e4066Sahrens
119404ba9d7Srbourbon if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
120404ba9d7Srbourbon return (0);
121fa9e4066Sahrens
122fa9e4066Sahrens if (zh == NULL) {
123fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
124fa9e4066Sahrens return (0);
125fa9e4066Sahrens }
126fa9e4066Sahrens
127fa9e4066Sahrens for (z_walk = list_head(&zf->zf_stream); z_walk;
128fa9e4066Sahrens z_walk = list_next(&zf->zf_stream, z_walk)) {
129fa9e4066Sahrens for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
130fa9e4066Sahrens z_comp = list_next(&zf->zf_stream, z_comp)) {
131fa9e4066Sahrens int64_t diff;
132fa9e4066Sahrens
133fa9e4066Sahrens if (z_walk->zst_len != z_walk->zst_stride ||
134fa9e4066Sahrens z_comp->zst_len != z_comp->zst_stride) {
135fa9e4066Sahrens continue;
136fa9e4066Sahrens }
137fa9e4066Sahrens
138fa9e4066Sahrens diff = z_comp->zst_offset - z_walk->zst_offset;
139fa9e4066Sahrens if (z_comp->zst_offset + diff == zh->zst_offset) {
140fa9e4066Sahrens z_walk->zst_offset = zh->zst_offset;
141fa9e4066Sahrens z_walk->zst_direction = diff < 0 ? -1 : 1;
142fa9e4066Sahrens z_walk->zst_stride =
143fa9e4066Sahrens diff * z_walk->zst_direction;
144fa9e4066Sahrens z_walk->zst_ph_offset =
145fa9e4066Sahrens zh->zst_offset + z_walk->zst_stride;
146fa9e4066Sahrens dmu_zfetch_stream_remove(zf, z_comp);
147fa9e4066Sahrens mutex_destroy(&z_comp->zst_lock);
148fa9e4066Sahrens kmem_free(z_comp, sizeof (zstream_t));
149fa9e4066Sahrens
150fa9e4066Sahrens dmu_zfetch_dofetch(zf, z_walk);
151fa9e4066Sahrens
152fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
153fa9e4066Sahrens return (1);
154fa9e4066Sahrens }
155fa9e4066Sahrens
156fa9e4066Sahrens diff = z_walk->zst_offset - z_comp->zst_offset;
157fa9e4066Sahrens if (z_walk->zst_offset + diff == zh->zst_offset) {
158fa9e4066Sahrens z_walk->zst_offset = zh->zst_offset;
159fa9e4066Sahrens z_walk->zst_direction = diff < 0 ? -1 : 1;
160fa9e4066Sahrens z_walk->zst_stride =
161fa9e4066Sahrens diff * z_walk->zst_direction;
162fa9e4066Sahrens z_walk->zst_ph_offset =
163fa9e4066Sahrens zh->zst_offset + z_walk->zst_stride;
164fa9e4066Sahrens dmu_zfetch_stream_remove(zf, z_comp);
165fa9e4066Sahrens mutex_destroy(&z_comp->zst_lock);
166fa9e4066Sahrens kmem_free(z_comp, sizeof (zstream_t));
167fa9e4066Sahrens
168fa9e4066Sahrens dmu_zfetch_dofetch(zf, z_walk);
169fa9e4066Sahrens
170fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
171fa9e4066Sahrens return (1);
172fa9e4066Sahrens }
173fa9e4066Sahrens }
174fa9e4066Sahrens }
175fa9e4066Sahrens
176fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
177fa9e4066Sahrens return (0);
178fa9e4066Sahrens }
179fa9e4066Sahrens
180fa9e4066Sahrens /*
181fa9e4066Sahrens * Given a zstream_t, determine the bounds of the prefetch. Then call the
182fa9e4066Sahrens * routine that actually prefetches the individual blocks.
183fa9e4066Sahrens */
184fa9e4066Sahrens static void
dmu_zfetch_dofetch(zfetch_t * zf,zstream_t * zs)185fa9e4066Sahrens dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
186fa9e4066Sahrens {
187fa9e4066Sahrens uint64_t prefetch_tail;
188fa9e4066Sahrens uint64_t prefetch_limit;
189fa9e4066Sahrens uint64_t prefetch_ofst;
190fa9e4066Sahrens uint64_t prefetch_len;
191fa9e4066Sahrens uint64_t blocks_fetched;
192fa9e4066Sahrens
193fa9e4066Sahrens zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
194fa9e4066Sahrens zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
195fa9e4066Sahrens
196fa9e4066Sahrens prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
197fa9e4066Sahrens (int64_t)(zs->zst_offset + zs->zst_stride));
198fa9e4066Sahrens /*
199fa9e4066Sahrens * XXX: use a faster division method?
200fa9e4066Sahrens */
201fa9e4066Sahrens prefetch_limit = zs->zst_offset + zs->zst_len +
202fa9e4066Sahrens (zs->zst_cap * zs->zst_stride) / zs->zst_len;
203fa9e4066Sahrens
204fa9e4066Sahrens while (prefetch_tail < prefetch_limit) {
205fa9e4066Sahrens prefetch_ofst = zs->zst_offset + zs->zst_direction *
206fa9e4066Sahrens (prefetch_tail - zs->zst_offset);
207fa9e4066Sahrens
208fa9e4066Sahrens prefetch_len = zs->zst_len;
209fa9e4066Sahrens
210fa9e4066Sahrens /*
211fa9e4066Sahrens * Don't prefetch beyond the end of the file, if working
212fa9e4066Sahrens * backwards.
213fa9e4066Sahrens */
214fa9e4066Sahrens if ((zs->zst_direction == ZFETCH_BACKWARD) &&
215fa9e4066Sahrens (prefetch_ofst > prefetch_tail)) {
216fa9e4066Sahrens prefetch_len += prefetch_ofst;
217fa9e4066Sahrens prefetch_ofst = 0;
218fa9e4066Sahrens }
219fa9e4066Sahrens
220fa9e4066Sahrens /* don't prefetch more than we're supposed to */
221fa9e4066Sahrens if (prefetch_len > zs->zst_len)
222fa9e4066Sahrens break;
223fa9e4066Sahrens
224fa9e4066Sahrens blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
225fa9e4066Sahrens prefetch_ofst, zs->zst_len);
226fa9e4066Sahrens
227fa9e4066Sahrens prefetch_tail += zs->zst_stride;
228fa9e4066Sahrens /* stop if we've run out of stuff to prefetch */
229fa9e4066Sahrens if (blocks_fetched < zs->zst_len)
230fa9e4066Sahrens break;
231fa9e4066Sahrens }
232fa9e4066Sahrens zs->zst_ph_offset = prefetch_tail;
233d3d50737SRafael Vanoni zs->zst_last = ddi_get_lbolt();
234fa9e4066Sahrens }
235fa9e4066Sahrens
2367cbf8b43SRich Morris void
zfetch_init(void)2377cbf8b43SRich Morris zfetch_init(void)
2387cbf8b43SRich Morris {
2397cbf8b43SRich Morris
2407cbf8b43SRich Morris zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
2417cbf8b43SRich Morris KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
2427cbf8b43SRich Morris KSTAT_FLAG_VIRTUAL);
2437cbf8b43SRich Morris
2447cbf8b43SRich Morris if (zfetch_ksp != NULL) {
2457cbf8b43SRich Morris zfetch_ksp->ks_data = &zfetch_stats;
2467cbf8b43SRich Morris kstat_install(zfetch_ksp);
2477cbf8b43SRich Morris }
2487cbf8b43SRich Morris }
2497cbf8b43SRich Morris
2507cbf8b43SRich Morris void
zfetch_fini(void)2517cbf8b43SRich Morris zfetch_fini(void)
2527cbf8b43SRich Morris {
2537cbf8b43SRich Morris if (zfetch_ksp != NULL) {
2547cbf8b43SRich Morris kstat_delete(zfetch_ksp);
2557cbf8b43SRich Morris zfetch_ksp = NULL;
2567cbf8b43SRich Morris }
2577cbf8b43SRich Morris }
2587cbf8b43SRich Morris
259fa9e4066Sahrens /*
260fa9e4066Sahrens * This takes a pointer to a zfetch structure and a dnode. It performs the
261fa9e4066Sahrens * necessary setup for the zfetch structure, grokking data from the
262fa9e4066Sahrens * associated dnode.
263fa9e4066Sahrens */
264fa9e4066Sahrens void
dmu_zfetch_init(zfetch_t * zf,dnode_t * dno)265fa9e4066Sahrens dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
266fa9e4066Sahrens {
267fa9e4066Sahrens if (zf == NULL) {
268fa9e4066Sahrens return;
269fa9e4066Sahrens }
270fa9e4066Sahrens
271fa9e4066Sahrens zf->zf_dnode = dno;
272fa9e4066Sahrens zf->zf_stream_cnt = 0;
273fa9e4066Sahrens zf->zf_alloc_fail = 0;
274fa9e4066Sahrens
275fa9e4066Sahrens list_create(&zf->zf_stream, sizeof (zstream_t),
276fa9e4066Sahrens offsetof(zstream_t, zst_node));
277fa9e4066Sahrens
278fa9e4066Sahrens rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
279fa9e4066Sahrens }
280fa9e4066Sahrens
281fa9e4066Sahrens /*
282fa9e4066Sahrens * This function computes the actual size, in blocks, that can be prefetched,
283fa9e4066Sahrens * and fetches it.
284fa9e4066Sahrens */
285fa9e4066Sahrens static uint64_t
dmu_zfetch_fetch(dnode_t * dn,uint64_t blkid,uint64_t nblks)286fa9e4066Sahrens dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
287fa9e4066Sahrens {
288fa9e4066Sahrens uint64_t fetchsz;
289fa9e4066Sahrens uint64_t i;
290fa9e4066Sahrens
291fa9e4066Sahrens fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
292fa9e4066Sahrens
293fa9e4066Sahrens for (i = 0; i < fetchsz; i++) {
294*a2cdcdd2SPaul Dagnelie dbuf_prefetch(dn, 0, blkid + i, ZIO_PRIORITY_ASYNC_READ,
295*a2cdcdd2SPaul Dagnelie ARC_FLAG_PREFETCH);
296fa9e4066Sahrens }
297fa9e4066Sahrens
298fa9e4066Sahrens return (fetchsz);
299fa9e4066Sahrens }
300fa9e4066Sahrens
301fa9e4066Sahrens /*
302fa9e4066Sahrens * this function returns the number of blocks that would be prefetched, based
303fa9e4066Sahrens * upon the supplied dnode, blockid, and nblks. This is used so that we can
304fa9e4066Sahrens * update streams in place, and then prefetch with their old value after the
305fa9e4066Sahrens * fact. This way, we can delay the prefetch, but subsequent accesses to the
306fa9e4066Sahrens * stream won't result in the same data being prefetched multiple times.
307fa9e4066Sahrens */
308fa9e4066Sahrens static uint64_t
dmu_zfetch_fetchsz(dnode_t * dn,uint64_t blkid,uint64_t nblks)309fa9e4066Sahrens dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
310fa9e4066Sahrens {
311fa9e4066Sahrens uint64_t fetchsz;
312fa9e4066Sahrens
313fa9e4066Sahrens if (blkid > dn->dn_maxblkid) {
314fa9e4066Sahrens return (0);
315fa9e4066Sahrens }
316fa9e4066Sahrens
317fa9e4066Sahrens /* compute fetch size */
31813506d1eSmaybee if (blkid + nblks + 1 > dn->dn_maxblkid) {
31913506d1eSmaybee fetchsz = (dn->dn_maxblkid - blkid) + 1;
32013506d1eSmaybee ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
321fa9e4066Sahrens } else {
322fa9e4066Sahrens fetchsz = nblks;
323fa9e4066Sahrens }
324fa9e4066Sahrens
325fa9e4066Sahrens
326fa9e4066Sahrens return (fetchsz);
327fa9e4066Sahrens }
328fa9e4066Sahrens
329fa9e4066Sahrens /*
3307cbf8b43SRich Morris * given a zfetch and a zstream structure, see if there is an associated zstream
331fa9e4066Sahrens * for this block read. If so, it starts a prefetch for the stream it
332fa9e4066Sahrens * located and returns true, otherwise it returns false
333fa9e4066Sahrens */
3343e30c24aSWill Andrews static boolean_t
dmu_zfetch_find(zfetch_t * zf,zstream_t * zh,int prefetched)33513506d1eSmaybee dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
336fa9e4066Sahrens {
337fa9e4066Sahrens zstream_t *zs;
338fa9e4066Sahrens int64_t diff;
33913506d1eSmaybee int reset = !prefetched;
340fa9e4066Sahrens int rc = 0;
341fa9e4066Sahrens
342fa9e4066Sahrens if (zh == NULL)
343fa9e4066Sahrens return (0);
344fa9e4066Sahrens
345fa9e4066Sahrens /*
346fa9e4066Sahrens * XXX: This locking strategy is a bit coarse; however, it's impact has
347fa9e4066Sahrens * yet to be tested. If this turns out to be an issue, it can be
348fa9e4066Sahrens * modified in a number of different ways.
349fa9e4066Sahrens */
350fa9e4066Sahrens
351fa9e4066Sahrens rw_enter(&zf->zf_rwlock, RW_READER);
352fa9e4066Sahrens top:
353fa9e4066Sahrens
354fa9e4066Sahrens for (zs = list_head(&zf->zf_stream); zs;
355fa9e4066Sahrens zs = list_next(&zf->zf_stream, zs)) {
356fa9e4066Sahrens
35713506d1eSmaybee /*
35813506d1eSmaybee * XXX - should this be an assert?
35913506d1eSmaybee */
360fa9e4066Sahrens if (zs->zst_len == 0) {
361fa9e4066Sahrens /* bogus stream */
3627cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
363fa9e4066Sahrens continue;
364fa9e4066Sahrens }
365fa9e4066Sahrens
36613506d1eSmaybee /*
36713506d1eSmaybee * We hit this case when we are in a strided prefetch stream:
36813506d1eSmaybee * we will read "len" blocks before "striding".
36913506d1eSmaybee */
37013506d1eSmaybee if (zh->zst_offset >= zs->zst_offset &&
37113506d1eSmaybee zh->zst_offset < zs->zst_offset + zs->zst_len) {
3727cbf8b43SRich Morris if (prefetched) {
373fa9e4066Sahrens /* already fetched */
3747cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
37513506d1eSmaybee rc = 1;
37613506d1eSmaybee goto out;
3777cbf8b43SRich Morris } else {
3787cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
3797cbf8b43SRich Morris }
380fa9e4066Sahrens }
381fa9e4066Sahrens
38213506d1eSmaybee /*
38313506d1eSmaybee * This is the forward sequential read case: we increment
38413506d1eSmaybee * len by one each time we hit here, so we will enter this
38513506d1eSmaybee * case on every read.
38613506d1eSmaybee */
387fa9e4066Sahrens if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
38813506d1eSmaybee
38913506d1eSmaybee reset = !prefetched && zs->zst_len > 1;
390fa9e4066Sahrens
391fa9e4066Sahrens mutex_enter(&zs->zst_lock);
392fa9e4066Sahrens
393fa9e4066Sahrens if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
394fa9e4066Sahrens mutex_exit(&zs->zst_lock);
395fa9e4066Sahrens goto top;
396fa9e4066Sahrens }
397fa9e4066Sahrens zs->zst_len += zh->zst_len;
398fa9e4066Sahrens diff = zs->zst_len - zfetch_block_cap;
399fa9e4066Sahrens if (diff > 0) {
400fa9e4066Sahrens zs->zst_offset += diff;
401fa9e4066Sahrens zs->zst_len = zs->zst_len > diff ?
402fa9e4066Sahrens zs->zst_len - diff : 0;
403fa9e4066Sahrens }
404fa9e4066Sahrens zs->zst_direction = ZFETCH_FORWARD;
405fa9e4066Sahrens
406fa9e4066Sahrens break;
407fa9e4066Sahrens
40813506d1eSmaybee /*
40913506d1eSmaybee * Same as above, but reading backwards through the file.
41013506d1eSmaybee */
411fa9e4066Sahrens } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
412fa9e4066Sahrens /* backwards sequential access */
413fa9e4066Sahrens
41413506d1eSmaybee reset = !prefetched && zs->zst_len > 1;
41513506d1eSmaybee
416fa9e4066Sahrens mutex_enter(&zs->zst_lock);
417fa9e4066Sahrens
418fa9e4066Sahrens if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
419fa9e4066Sahrens mutex_exit(&zs->zst_lock);
420fa9e4066Sahrens goto top;
421fa9e4066Sahrens }
422fa9e4066Sahrens
423fa9e4066Sahrens zs->zst_offset = zs->zst_offset > zh->zst_len ?
424fa9e4066Sahrens zs->zst_offset - zh->zst_len : 0;
425fa9e4066Sahrens zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
426fa9e4066Sahrens zs->zst_ph_offset - zh->zst_len : 0;
427fa9e4066Sahrens zs->zst_len += zh->zst_len;
428fa9e4066Sahrens
429fa9e4066Sahrens diff = zs->zst_len - zfetch_block_cap;
430fa9e4066Sahrens if (diff > 0) {
431fa9e4066Sahrens zs->zst_ph_offset = zs->zst_ph_offset > diff ?
432fa9e4066Sahrens zs->zst_ph_offset - diff : 0;
433fa9e4066Sahrens zs->zst_len = zs->zst_len > diff ?
434fa9e4066Sahrens zs->zst_len - diff : zs->zst_len;
435fa9e4066Sahrens }
436fa9e4066Sahrens zs->zst_direction = ZFETCH_BACKWARD;
437fa9e4066Sahrens
438fa9e4066Sahrens break;
439fa9e4066Sahrens
440fa9e4066Sahrens } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
441fa9e4066Sahrens zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
442fa9e4066Sahrens /* strided forward access */
443fa9e4066Sahrens
444fa9e4066Sahrens mutex_enter(&zs->zst_lock);
445fa9e4066Sahrens
446fa9e4066Sahrens if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
447fa9e4066Sahrens zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
448fa9e4066Sahrens mutex_exit(&zs->zst_lock);
449fa9e4066Sahrens goto top;
450fa9e4066Sahrens }
451fa9e4066Sahrens
452fa9e4066Sahrens zs->zst_offset += zs->zst_stride;
453fa9e4066Sahrens zs->zst_direction = ZFETCH_FORWARD;
454fa9e4066Sahrens
455fa9e4066Sahrens break;
456fa9e4066Sahrens
457fa9e4066Sahrens } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
458fa9e4066Sahrens zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
459fa9e4066Sahrens /* strided reverse access */
460fa9e4066Sahrens
461fa9e4066Sahrens mutex_enter(&zs->zst_lock);
462fa9e4066Sahrens
463fa9e4066Sahrens if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
464fa9e4066Sahrens zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
465fa9e4066Sahrens mutex_exit(&zs->zst_lock);
466fa9e4066Sahrens goto top;
467fa9e4066Sahrens }
468fa9e4066Sahrens
469fa9e4066Sahrens zs->zst_offset = zs->zst_offset > zs->zst_stride ?
470fa9e4066Sahrens zs->zst_offset - zs->zst_stride : 0;
471fa9e4066Sahrens zs->zst_ph_offset = (zs->zst_ph_offset >
472fa9e4066Sahrens (2 * zs->zst_stride)) ?
473fa9e4066Sahrens (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
474fa9e4066Sahrens zs->zst_direction = ZFETCH_BACKWARD;
475fa9e4066Sahrens
476fa9e4066Sahrens break;
477fa9e4066Sahrens }
478fa9e4066Sahrens }
479fa9e4066Sahrens
480fa9e4066Sahrens if (zs) {
48113506d1eSmaybee if (reset) {
48213506d1eSmaybee zstream_t *remove = zs;
48313506d1eSmaybee
4847cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
48513506d1eSmaybee rc = 0;
48613506d1eSmaybee mutex_exit(&zs->zst_lock);
48713506d1eSmaybee rw_exit(&zf->zf_rwlock);
48813506d1eSmaybee rw_enter(&zf->zf_rwlock, RW_WRITER);
48913506d1eSmaybee /*
49013506d1eSmaybee * Relocate the stream, in case someone removes
49113506d1eSmaybee * it while we were acquiring the WRITER lock.
49213506d1eSmaybee */
49313506d1eSmaybee for (zs = list_head(&zf->zf_stream); zs;
49413506d1eSmaybee zs = list_next(&zf->zf_stream, zs)) {
49513506d1eSmaybee if (zs == remove) {
49613506d1eSmaybee dmu_zfetch_stream_remove(zf, zs);
49713506d1eSmaybee mutex_destroy(&zs->zst_lock);
49813506d1eSmaybee kmem_free(zs, sizeof (zstream_t));
49913506d1eSmaybee break;
50013506d1eSmaybee }
50113506d1eSmaybee }
50213506d1eSmaybee } else {
5037cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
504fa9e4066Sahrens rc = 1;
505fa9e4066Sahrens dmu_zfetch_dofetch(zf, zs);
506fa9e4066Sahrens mutex_exit(&zs->zst_lock);
507fa9e4066Sahrens }
50813506d1eSmaybee }
50913506d1eSmaybee out:
510fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
511fa9e4066Sahrens return (rc);
512fa9e4066Sahrens }
513fa9e4066Sahrens
514fa9e4066Sahrens /*
515fa9e4066Sahrens * Clean-up state associated with a zfetch structure. This frees allocated
516fa9e4066Sahrens * structure members, empties the zf_stream tree, and generally makes things
517fa9e4066Sahrens * nice. This doesn't free the zfetch_t itself, that's left to the caller.
518fa9e4066Sahrens */
519fa9e4066Sahrens void
dmu_zfetch_rele(zfetch_t * zf)520fa9e4066Sahrens dmu_zfetch_rele(zfetch_t *zf)
521fa9e4066Sahrens {
522fa9e4066Sahrens zstream_t *zs;
523fa9e4066Sahrens zstream_t *zs_next;
524fa9e4066Sahrens
525fa9e4066Sahrens ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
526fa9e4066Sahrens
527fa9e4066Sahrens for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
528fa9e4066Sahrens zs_next = list_next(&zf->zf_stream, zs);
529fa9e4066Sahrens
530fa9e4066Sahrens list_remove(&zf->zf_stream, zs);
531fa9e4066Sahrens mutex_destroy(&zs->zst_lock);
532fa9e4066Sahrens kmem_free(zs, sizeof (zstream_t));
533fa9e4066Sahrens }
534fa9e4066Sahrens list_destroy(&zf->zf_stream);
535fa9e4066Sahrens rw_destroy(&zf->zf_rwlock);
536fa9e4066Sahrens
537fa9e4066Sahrens zf->zf_dnode = NULL;
538fa9e4066Sahrens }
539fa9e4066Sahrens
540fa9e4066Sahrens /*
541fa9e4066Sahrens * Given a zfetch and zstream structure, insert the zstream structure into the
542fa9e4066Sahrens * AVL tree contained within the zfetch structure. Peform the appropriate
543fa9e4066Sahrens * book-keeping. It is possible that another thread has inserted a stream which
544fa9e4066Sahrens * matches one that we are about to insert, so we must be sure to check for this
545fa9e4066Sahrens * case. If one is found, return failure, and let the caller cleanup the
546fa9e4066Sahrens * duplicates.
547fa9e4066Sahrens */
548fa9e4066Sahrens static int
dmu_zfetch_stream_insert(zfetch_t * zf,zstream_t * zs)549fa9e4066Sahrens dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
550fa9e4066Sahrens {
551fa9e4066Sahrens zstream_t *zs_walk;
552fa9e4066Sahrens zstream_t *zs_next;
553fa9e4066Sahrens
554fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
555fa9e4066Sahrens
556fa9e4066Sahrens for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
557fa9e4066Sahrens zs_next = list_next(&zf->zf_stream, zs_walk);
558fa9e4066Sahrens
559fa9e4066Sahrens if (dmu_zfetch_streams_equal(zs_walk, zs)) {
560fa9e4066Sahrens return (0);
561fa9e4066Sahrens }
562fa9e4066Sahrens }
563fa9e4066Sahrens
564fa9e4066Sahrens list_insert_head(&zf->zf_stream, zs);
565fa9e4066Sahrens zf->zf_stream_cnt++;
566fa9e4066Sahrens return (1);
567fa9e4066Sahrens }
568fa9e4066Sahrens
569fa9e4066Sahrens
570fa9e4066Sahrens /*
571fa9e4066Sahrens * Walk the list of zstreams in the given zfetch, find an old one (by time), and
572fa9e4066Sahrens * reclaim it for use by the caller.
573fa9e4066Sahrens */
574fa9e4066Sahrens static zstream_t *
dmu_zfetch_stream_reclaim(zfetch_t * zf)575fa9e4066Sahrens dmu_zfetch_stream_reclaim(zfetch_t *zf)
576fa9e4066Sahrens {
577fa9e4066Sahrens zstream_t *zs;
578fa9e4066Sahrens
579404ba9d7Srbourbon if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
580404ba9d7Srbourbon return (0);
581fa9e4066Sahrens
582fa9e4066Sahrens for (zs = list_head(&zf->zf_stream); zs;
583fa9e4066Sahrens zs = list_next(&zf->zf_stream, zs)) {
584fa9e4066Sahrens
585d3d50737SRafael Vanoni if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
586fa9e4066Sahrens break;
587fa9e4066Sahrens }
588fa9e4066Sahrens
589fa9e4066Sahrens if (zs) {
590fa9e4066Sahrens dmu_zfetch_stream_remove(zf, zs);
591fa9e4066Sahrens mutex_destroy(&zs->zst_lock);
592fa9e4066Sahrens bzero(zs, sizeof (zstream_t));
593fa9e4066Sahrens } else {
594fa9e4066Sahrens zf->zf_alloc_fail++;
595fa9e4066Sahrens }
596fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
597fa9e4066Sahrens
598fa9e4066Sahrens return (zs);
599fa9e4066Sahrens }
600fa9e4066Sahrens
601fa9e4066Sahrens /*
602fa9e4066Sahrens * Given a zfetch and zstream structure, remove the zstream structure from its
603fa9e4066Sahrens * container in the zfetch structure. Perform the appropriate book-keeping.
604fa9e4066Sahrens */
605fa9e4066Sahrens static void
dmu_zfetch_stream_remove(zfetch_t * zf,zstream_t * zs)606fa9e4066Sahrens dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
607fa9e4066Sahrens {
608fa9e4066Sahrens ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
609fa9e4066Sahrens
610fa9e4066Sahrens list_remove(&zf->zf_stream, zs);
611fa9e4066Sahrens zf->zf_stream_cnt--;
612fa9e4066Sahrens }
613fa9e4066Sahrens
614fa9e4066Sahrens static int
dmu_zfetch_streams_equal(zstream_t * zs1,zstream_t * zs2)615fa9e4066Sahrens dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
616fa9e4066Sahrens {
617fa9e4066Sahrens if (zs1->zst_offset != zs2->zst_offset)
618fa9e4066Sahrens return (0);
619fa9e4066Sahrens
620fa9e4066Sahrens if (zs1->zst_len != zs2->zst_len)
621fa9e4066Sahrens return (0);
622fa9e4066Sahrens
623fa9e4066Sahrens if (zs1->zst_stride != zs2->zst_stride)
624fa9e4066Sahrens return (0);
625fa9e4066Sahrens
626fa9e4066Sahrens if (zs1->zst_ph_offset != zs2->zst_ph_offset)
627fa9e4066Sahrens return (0);
628fa9e4066Sahrens
629fa9e4066Sahrens if (zs1->zst_cap != zs2->zst_cap)
630fa9e4066Sahrens return (0);
631fa9e4066Sahrens
632fa9e4066Sahrens if (zs1->zst_direction != zs2->zst_direction)
633fa9e4066Sahrens return (0);
634fa9e4066Sahrens
635fa9e4066Sahrens return (1);
636fa9e4066Sahrens }
637fa9e4066Sahrens
638fa9e4066Sahrens /*
639fa9e4066Sahrens * This is the prefetch entry point. It calls all of the other dmu_zfetch
640fa9e4066Sahrens * routines to create, delete, find, or operate upon prefetch streams.
641fa9e4066Sahrens */
642fa9e4066Sahrens void
dmu_zfetch(zfetch_t * zf,uint64_t offset,uint64_t size,int prefetched)64313506d1eSmaybee dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
644fa9e4066Sahrens {
645fa9e4066Sahrens zstream_t zst;
646fa9e4066Sahrens zstream_t *newstream;
6473e30c24aSWill Andrews boolean_t fetched;
648fa9e4066Sahrens int inserted;
649fa9e4066Sahrens unsigned int blkshft;
650fa9e4066Sahrens uint64_t blksz;
651fa9e4066Sahrens
652a2eea2e1Sahrens if (zfs_prefetch_disable)
653fa9e4066Sahrens return;
654a2eea2e1Sahrens
655a2eea2e1Sahrens /* files that aren't ln2 blocksz are only one block -- nothing to do */
656a2eea2e1Sahrens if (!zf->zf_dnode->dn_datablkshift)
657a2eea2e1Sahrens return;
658fa9e4066Sahrens
659fa9e4066Sahrens /* convert offset and size, into blockid and nblocks */
660fa9e4066Sahrens blkshft = zf->zf_dnode->dn_datablkshift;
661fa9e4066Sahrens blksz = (1 << blkshft);
662fa9e4066Sahrens
663fa9e4066Sahrens bzero(&zst, sizeof (zstream_t));
664fa9e4066Sahrens zst.zst_offset = offset >> blkshft;
665fa9e4066Sahrens zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
666fa9e4066Sahrens P2ALIGN(offset, blksz)) >> blkshft;
667fa9e4066Sahrens
66813506d1eSmaybee fetched = dmu_zfetch_find(zf, &zst, prefetched);
6697cbf8b43SRich Morris if (fetched) {
6707cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_hits);
6717cbf8b43SRich Morris } else {
6727cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_misses);
6733e30c24aSWill Andrews fetched = dmu_zfetch_colinear(zf, &zst);
6743e30c24aSWill Andrews if (fetched) {
6757cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
6767cbf8b43SRich Morris } else {
6777cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
6787cbf8b43SRich Morris }
679fa9e4066Sahrens }
680fa9e4066Sahrens
681fa9e4066Sahrens if (!fetched) {
682fa9e4066Sahrens newstream = dmu_zfetch_stream_reclaim(zf);
683fa9e4066Sahrens
684fa9e4066Sahrens /*
685fa9e4066Sahrens * we still couldn't find a stream, drop the lock, and allocate
686fa9e4066Sahrens * one if possible. Otherwise, give up and go home.
687fa9e4066Sahrens */
6887cbf8b43SRich Morris if (newstream) {
6897cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
6907cbf8b43SRich Morris } else {
691fa9e4066Sahrens uint64_t maxblocks;
692fa9e4066Sahrens uint32_t max_streams;
693fa9e4066Sahrens uint32_t cur_streams;
694fa9e4066Sahrens
6957cbf8b43SRich Morris ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
696fa9e4066Sahrens cur_streams = zf->zf_stream_cnt;
697fa9e4066Sahrens maxblocks = zf->zf_dnode->dn_maxblkid;
698fa9e4066Sahrens
699fa9e4066Sahrens max_streams = MIN(zfetch_max_streams,
700fa9e4066Sahrens (maxblocks / zfetch_block_cap));
701fa9e4066Sahrens if (max_streams == 0) {
702fa9e4066Sahrens max_streams++;
703fa9e4066Sahrens }
704fa9e4066Sahrens
705fa9e4066Sahrens if (cur_streams >= max_streams) {
706fa9e4066Sahrens return;
707fa9e4066Sahrens }
708fa9e4066Sahrens newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
709fa9e4066Sahrens }
710fa9e4066Sahrens
711fa9e4066Sahrens newstream->zst_offset = zst.zst_offset;
712fa9e4066Sahrens newstream->zst_len = zst.zst_len;
713fa9e4066Sahrens newstream->zst_stride = zst.zst_len;
714fa9e4066Sahrens newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
715fa9e4066Sahrens newstream->zst_cap = zst.zst_len;
716fa9e4066Sahrens newstream->zst_direction = ZFETCH_FORWARD;
717d3d50737SRafael Vanoni newstream->zst_last = ddi_get_lbolt();
718fa9e4066Sahrens
719fa9e4066Sahrens mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
720fa9e4066Sahrens
721fa9e4066Sahrens rw_enter(&zf->zf_rwlock, RW_WRITER);
722fa9e4066Sahrens inserted = dmu_zfetch_stream_insert(zf, newstream);
723fa9e4066Sahrens rw_exit(&zf->zf_rwlock);
724fa9e4066Sahrens
725fa9e4066Sahrens if (!inserted) {
726fa9e4066Sahrens mutex_destroy(&newstream->zst_lock);
727fa9e4066Sahrens kmem_free(newstream, sizeof (zstream_t));
728fa9e4066Sahrens }
729fa9e4066Sahrens }
730fa9e4066Sahrens }
731