1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23*eda14cbcSMatt Macy * Use is subject to license terms. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy /* 27*eda14cbcSMatt Macy * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 28*eda14cbcSMatt Macy */ 29*eda14cbcSMatt Macy 30*eda14cbcSMatt Macy #include <sys/zfs_context.h> 31*eda14cbcSMatt Macy #include <sys/dnode.h> 32*eda14cbcSMatt Macy #include <sys/dmu_objset.h> 33*eda14cbcSMatt Macy #include <sys/dmu_zfetch.h> 34*eda14cbcSMatt Macy #include <sys/dmu.h> 35*eda14cbcSMatt Macy #include <sys/dbuf.h> 36*eda14cbcSMatt Macy #include <sys/kstat.h> 37*eda14cbcSMatt Macy 38*eda14cbcSMatt Macy /* 39*eda14cbcSMatt Macy * This tunable disables predictive prefetch. Note that it leaves "prescient" 40*eda14cbcSMatt Macy * prefetch (e.g. prefetch for zfs send) intact. Unlike predictive prefetch, 41*eda14cbcSMatt Macy * prescient prefetch never issues i/os that end up not being needed, 42*eda14cbcSMatt Macy * so it can't hurt performance. 43*eda14cbcSMatt Macy */ 44*eda14cbcSMatt Macy 45*eda14cbcSMatt Macy int zfs_prefetch_disable = B_FALSE; 46*eda14cbcSMatt Macy 47*eda14cbcSMatt Macy /* max # of streams per zfetch */ 48*eda14cbcSMatt Macy unsigned int zfetch_max_streams = 8; 49*eda14cbcSMatt Macy /* min time before stream reclaim */ 50*eda14cbcSMatt Macy unsigned int zfetch_min_sec_reap = 2; 51*eda14cbcSMatt Macy /* max bytes to prefetch per stream (default 8MB) */ 52*eda14cbcSMatt Macy unsigned int zfetch_max_distance = 8 * 1024 * 1024; 53*eda14cbcSMatt Macy /* max bytes to prefetch indirects for per stream (default 64MB) */ 54*eda14cbcSMatt Macy unsigned int zfetch_max_idistance = 64 * 1024 * 1024; 55*eda14cbcSMatt Macy /* max number of bytes in an array_read in which we allow prefetching (1MB) */ 56*eda14cbcSMatt Macy unsigned long zfetch_array_rd_sz = 1024 * 1024; 57*eda14cbcSMatt Macy 58*eda14cbcSMatt Macy typedef struct zfetch_stats { 59*eda14cbcSMatt Macy kstat_named_t zfetchstat_hits; 60*eda14cbcSMatt Macy kstat_named_t zfetchstat_misses; 61*eda14cbcSMatt Macy kstat_named_t zfetchstat_max_streams; 62*eda14cbcSMatt Macy } zfetch_stats_t; 63*eda14cbcSMatt Macy 64*eda14cbcSMatt Macy static zfetch_stats_t zfetch_stats = { 65*eda14cbcSMatt Macy { "hits", KSTAT_DATA_UINT64 }, 66*eda14cbcSMatt Macy { "misses", KSTAT_DATA_UINT64 }, 67*eda14cbcSMatt Macy { "max_streams", KSTAT_DATA_UINT64 }, 68*eda14cbcSMatt Macy }; 69*eda14cbcSMatt Macy 70*eda14cbcSMatt Macy #define ZFETCHSTAT_BUMP(stat) \ 71*eda14cbcSMatt Macy atomic_inc_64(&zfetch_stats.stat.value.ui64); 72*eda14cbcSMatt Macy 73*eda14cbcSMatt Macy kstat_t *zfetch_ksp; 74*eda14cbcSMatt Macy 75*eda14cbcSMatt Macy void 76*eda14cbcSMatt Macy zfetch_init(void) 77*eda14cbcSMatt Macy { 78*eda14cbcSMatt Macy zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", 79*eda14cbcSMatt Macy KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), 80*eda14cbcSMatt Macy KSTAT_FLAG_VIRTUAL); 81*eda14cbcSMatt Macy 82*eda14cbcSMatt Macy if (zfetch_ksp != NULL) { 83*eda14cbcSMatt Macy zfetch_ksp->ks_data = &zfetch_stats; 84*eda14cbcSMatt Macy kstat_install(zfetch_ksp); 85*eda14cbcSMatt Macy } 86*eda14cbcSMatt Macy } 87*eda14cbcSMatt Macy 88*eda14cbcSMatt Macy void 89*eda14cbcSMatt Macy zfetch_fini(void) 90*eda14cbcSMatt Macy { 91*eda14cbcSMatt Macy if (zfetch_ksp != NULL) { 92*eda14cbcSMatt Macy kstat_delete(zfetch_ksp); 93*eda14cbcSMatt Macy zfetch_ksp = NULL; 94*eda14cbcSMatt Macy } 95*eda14cbcSMatt Macy } 96*eda14cbcSMatt Macy 97*eda14cbcSMatt Macy /* 98*eda14cbcSMatt Macy * This takes a pointer to a zfetch structure and a dnode. It performs the 99*eda14cbcSMatt Macy * necessary setup for the zfetch structure, grokking data from the 100*eda14cbcSMatt Macy * associated dnode. 101*eda14cbcSMatt Macy */ 102*eda14cbcSMatt Macy void 103*eda14cbcSMatt Macy dmu_zfetch_init(zfetch_t *zf, dnode_t *dno) 104*eda14cbcSMatt Macy { 105*eda14cbcSMatt Macy if (zf == NULL) 106*eda14cbcSMatt Macy return; 107*eda14cbcSMatt Macy 108*eda14cbcSMatt Macy zf->zf_dnode = dno; 109*eda14cbcSMatt Macy 110*eda14cbcSMatt Macy list_create(&zf->zf_stream, sizeof (zstream_t), 111*eda14cbcSMatt Macy offsetof(zstream_t, zs_node)); 112*eda14cbcSMatt Macy 113*eda14cbcSMatt Macy mutex_init(&zf->zf_lock, NULL, MUTEX_DEFAULT, NULL); 114*eda14cbcSMatt Macy } 115*eda14cbcSMatt Macy 116*eda14cbcSMatt Macy static void 117*eda14cbcSMatt Macy dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs) 118*eda14cbcSMatt Macy { 119*eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zf->zf_lock)); 120*eda14cbcSMatt Macy list_remove(&zf->zf_stream, zs); 121*eda14cbcSMatt Macy mutex_destroy(&zs->zs_lock); 122*eda14cbcSMatt Macy kmem_free(zs, sizeof (*zs)); 123*eda14cbcSMatt Macy } 124*eda14cbcSMatt Macy 125*eda14cbcSMatt Macy /* 126*eda14cbcSMatt Macy * Clean-up state associated with a zfetch structure (e.g. destroy the 127*eda14cbcSMatt Macy * streams). This doesn't free the zfetch_t itself, that's left to the caller. 128*eda14cbcSMatt Macy */ 129*eda14cbcSMatt Macy void 130*eda14cbcSMatt Macy dmu_zfetch_fini(zfetch_t *zf) 131*eda14cbcSMatt Macy { 132*eda14cbcSMatt Macy zstream_t *zs; 133*eda14cbcSMatt Macy 134*eda14cbcSMatt Macy mutex_enter(&zf->zf_lock); 135*eda14cbcSMatt Macy while ((zs = list_head(&zf->zf_stream)) != NULL) 136*eda14cbcSMatt Macy dmu_zfetch_stream_remove(zf, zs); 137*eda14cbcSMatt Macy mutex_exit(&zf->zf_lock); 138*eda14cbcSMatt Macy list_destroy(&zf->zf_stream); 139*eda14cbcSMatt Macy mutex_destroy(&zf->zf_lock); 140*eda14cbcSMatt Macy 141*eda14cbcSMatt Macy zf->zf_dnode = NULL; 142*eda14cbcSMatt Macy } 143*eda14cbcSMatt Macy 144*eda14cbcSMatt Macy /* 145*eda14cbcSMatt Macy * If there aren't too many streams already, create a new stream. 146*eda14cbcSMatt Macy * The "blkid" argument is the next block that we expect this stream to access. 147*eda14cbcSMatt Macy * While we're here, clean up old streams (which haven't been 148*eda14cbcSMatt Macy * accessed for at least zfetch_min_sec_reap seconds). 149*eda14cbcSMatt Macy */ 150*eda14cbcSMatt Macy static void 151*eda14cbcSMatt Macy dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid) 152*eda14cbcSMatt Macy { 153*eda14cbcSMatt Macy zstream_t *zs_next; 154*eda14cbcSMatt Macy int numstreams = 0; 155*eda14cbcSMatt Macy 156*eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&zf->zf_lock)); 157*eda14cbcSMatt Macy 158*eda14cbcSMatt Macy /* 159*eda14cbcSMatt Macy * Clean up old streams. 160*eda14cbcSMatt Macy */ 161*eda14cbcSMatt Macy for (zstream_t *zs = list_head(&zf->zf_stream); 162*eda14cbcSMatt Macy zs != NULL; zs = zs_next) { 163*eda14cbcSMatt Macy zs_next = list_next(&zf->zf_stream, zs); 164*eda14cbcSMatt Macy if (((gethrtime() - zs->zs_atime) / NANOSEC) > 165*eda14cbcSMatt Macy zfetch_min_sec_reap) 166*eda14cbcSMatt Macy dmu_zfetch_stream_remove(zf, zs); 167*eda14cbcSMatt Macy else 168*eda14cbcSMatt Macy numstreams++; 169*eda14cbcSMatt Macy } 170*eda14cbcSMatt Macy 171*eda14cbcSMatt Macy /* 172*eda14cbcSMatt Macy * The maximum number of streams is normally zfetch_max_streams, 173*eda14cbcSMatt Macy * but for small files we lower it such that it's at least possible 174*eda14cbcSMatt Macy * for all the streams to be non-overlapping. 175*eda14cbcSMatt Macy * 176*eda14cbcSMatt Macy * If we are already at the maximum number of streams for this file, 177*eda14cbcSMatt Macy * even after removing old streams, then don't create this stream. 178*eda14cbcSMatt Macy */ 179*eda14cbcSMatt Macy uint32_t max_streams = MAX(1, MIN(zfetch_max_streams, 180*eda14cbcSMatt Macy zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz / 181*eda14cbcSMatt Macy zfetch_max_distance)); 182*eda14cbcSMatt Macy if (numstreams >= max_streams) { 183*eda14cbcSMatt Macy ZFETCHSTAT_BUMP(zfetchstat_max_streams); 184*eda14cbcSMatt Macy return; 185*eda14cbcSMatt Macy } 186*eda14cbcSMatt Macy 187*eda14cbcSMatt Macy zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP); 188*eda14cbcSMatt Macy zs->zs_blkid = blkid; 189*eda14cbcSMatt Macy zs->zs_pf_blkid = blkid; 190*eda14cbcSMatt Macy zs->zs_ipf_blkid = blkid; 191*eda14cbcSMatt Macy zs->zs_atime = gethrtime(); 192*eda14cbcSMatt Macy mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL); 193*eda14cbcSMatt Macy 194*eda14cbcSMatt Macy list_insert_head(&zf->zf_stream, zs); 195*eda14cbcSMatt Macy } 196*eda14cbcSMatt Macy 197*eda14cbcSMatt Macy /* 198*eda14cbcSMatt Macy * This is the predictive prefetch entry point. It associates dnode access 199*eda14cbcSMatt Macy * specified with blkid and nblks arguments with prefetch stream, predicts 200*eda14cbcSMatt Macy * further accesses based on that stats and initiates speculative prefetch. 201*eda14cbcSMatt Macy * fetch_data argument specifies whether actual data blocks should be fetched: 202*eda14cbcSMatt Macy * FALSE -- prefetch only indirect blocks for predicted data blocks; 203*eda14cbcSMatt Macy * TRUE -- prefetch predicted data blocks plus following indirect blocks. 204*eda14cbcSMatt Macy */ 205*eda14cbcSMatt Macy void 206*eda14cbcSMatt Macy dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data, 207*eda14cbcSMatt Macy boolean_t have_lock) 208*eda14cbcSMatt Macy { 209*eda14cbcSMatt Macy zstream_t *zs; 210*eda14cbcSMatt Macy int64_t pf_start, ipf_start, ipf_istart, ipf_iend; 211*eda14cbcSMatt Macy int64_t pf_ahead_blks, max_blks; 212*eda14cbcSMatt Macy int epbs, max_dist_blks, pf_nblks, ipf_nblks; 213*eda14cbcSMatt Macy uint64_t end_of_access_blkid; 214*eda14cbcSMatt Macy end_of_access_blkid = blkid + nblks; 215*eda14cbcSMatt Macy spa_t *spa = zf->zf_dnode->dn_objset->os_spa; 216*eda14cbcSMatt Macy 217*eda14cbcSMatt Macy if (zfs_prefetch_disable) 218*eda14cbcSMatt Macy return; 219*eda14cbcSMatt Macy /* 220*eda14cbcSMatt Macy * If we haven't yet loaded the indirect vdevs' mappings, we 221*eda14cbcSMatt Macy * can only read from blocks that we carefully ensure are on 222*eda14cbcSMatt Macy * concrete vdevs (or previously-loaded indirect vdevs). So we 223*eda14cbcSMatt Macy * can't allow the predictive prefetcher to attempt reads of other 224*eda14cbcSMatt Macy * blocks (e.g. of the MOS's dnode object). 225*eda14cbcSMatt Macy */ 226*eda14cbcSMatt Macy if (!spa_indirect_vdevs_loaded(spa)) 227*eda14cbcSMatt Macy return; 228*eda14cbcSMatt Macy 229*eda14cbcSMatt Macy /* 230*eda14cbcSMatt Macy * As a fast path for small (single-block) files, ignore access 231*eda14cbcSMatt Macy * to the first block. 232*eda14cbcSMatt Macy */ 233*eda14cbcSMatt Macy if (blkid == 0) 234*eda14cbcSMatt Macy return; 235*eda14cbcSMatt Macy 236*eda14cbcSMatt Macy if (!have_lock) 237*eda14cbcSMatt Macy rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER); 238*eda14cbcSMatt Macy mutex_enter(&zf->zf_lock); 239*eda14cbcSMatt Macy 240*eda14cbcSMatt Macy /* 241*eda14cbcSMatt Macy * Find matching prefetch stream. Depending on whether the accesses 242*eda14cbcSMatt Macy * are block-aligned, first block of the new access may either follow 243*eda14cbcSMatt Macy * the last block of the previous access, or be equal to it. 244*eda14cbcSMatt Macy */ 245*eda14cbcSMatt Macy for (zs = list_head(&zf->zf_stream); zs != NULL; 246*eda14cbcSMatt Macy zs = list_next(&zf->zf_stream, zs)) { 247*eda14cbcSMatt Macy if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) { 248*eda14cbcSMatt Macy mutex_enter(&zs->zs_lock); 249*eda14cbcSMatt Macy /* 250*eda14cbcSMatt Macy * zs_blkid could have changed before we 251*eda14cbcSMatt Macy * acquired zs_lock; re-check them here. 252*eda14cbcSMatt Macy */ 253*eda14cbcSMatt Macy if (blkid == zs->zs_blkid) { 254*eda14cbcSMatt Macy break; 255*eda14cbcSMatt Macy } else if (blkid + 1 == zs->zs_blkid) { 256*eda14cbcSMatt Macy blkid++; 257*eda14cbcSMatt Macy nblks--; 258*eda14cbcSMatt Macy if (nblks == 0) { 259*eda14cbcSMatt Macy /* Already prefetched this before. */ 260*eda14cbcSMatt Macy mutex_exit(&zs->zs_lock); 261*eda14cbcSMatt Macy mutex_exit(&zf->zf_lock); 262*eda14cbcSMatt Macy if (!have_lock) { 263*eda14cbcSMatt Macy rw_exit(&zf->zf_dnode-> 264*eda14cbcSMatt Macy dn_struct_rwlock); 265*eda14cbcSMatt Macy } 266*eda14cbcSMatt Macy return; 267*eda14cbcSMatt Macy } 268*eda14cbcSMatt Macy break; 269*eda14cbcSMatt Macy } 270*eda14cbcSMatt Macy mutex_exit(&zs->zs_lock); 271*eda14cbcSMatt Macy } 272*eda14cbcSMatt Macy } 273*eda14cbcSMatt Macy 274*eda14cbcSMatt Macy if (zs == NULL) { 275*eda14cbcSMatt Macy /* 276*eda14cbcSMatt Macy * This access is not part of any existing stream. Create 277*eda14cbcSMatt Macy * a new stream for it. 278*eda14cbcSMatt Macy */ 279*eda14cbcSMatt Macy ZFETCHSTAT_BUMP(zfetchstat_misses); 280*eda14cbcSMatt Macy 281*eda14cbcSMatt Macy dmu_zfetch_stream_create(zf, end_of_access_blkid); 282*eda14cbcSMatt Macy mutex_exit(&zf->zf_lock); 283*eda14cbcSMatt Macy if (!have_lock) 284*eda14cbcSMatt Macy rw_exit(&zf->zf_dnode->dn_struct_rwlock); 285*eda14cbcSMatt Macy return; 286*eda14cbcSMatt Macy } 287*eda14cbcSMatt Macy 288*eda14cbcSMatt Macy /* 289*eda14cbcSMatt Macy * This access was to a block that we issued a prefetch for on 290*eda14cbcSMatt Macy * behalf of this stream. Issue further prefetches for this stream. 291*eda14cbcSMatt Macy * 292*eda14cbcSMatt Macy * Normally, we start prefetching where we stopped 293*eda14cbcSMatt Macy * prefetching last (zs_pf_blkid). But when we get our first 294*eda14cbcSMatt Macy * hit on this stream, zs_pf_blkid == zs_blkid, we don't 295*eda14cbcSMatt Macy * want to prefetch the block we just accessed. In this case, 296*eda14cbcSMatt Macy * start just after the block we just accessed. 297*eda14cbcSMatt Macy */ 298*eda14cbcSMatt Macy pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid); 299*eda14cbcSMatt Macy 300*eda14cbcSMatt Macy /* 301*eda14cbcSMatt Macy * Double our amount of prefetched data, but don't let the 302*eda14cbcSMatt Macy * prefetch get further ahead than zfetch_max_distance. 303*eda14cbcSMatt Macy */ 304*eda14cbcSMatt Macy if (fetch_data) { 305*eda14cbcSMatt Macy max_dist_blks = 306*eda14cbcSMatt Macy zfetch_max_distance >> zf->zf_dnode->dn_datablkshift; 307*eda14cbcSMatt Macy /* 308*eda14cbcSMatt Macy * Previously, we were (zs_pf_blkid - blkid) ahead. We 309*eda14cbcSMatt Macy * want to now be double that, so read that amount again, 310*eda14cbcSMatt Macy * plus the amount we are catching up by (i.e. the amount 311*eda14cbcSMatt Macy * read just now). 312*eda14cbcSMatt Macy */ 313*eda14cbcSMatt Macy pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks; 314*eda14cbcSMatt Macy max_blks = max_dist_blks - (pf_start - end_of_access_blkid); 315*eda14cbcSMatt Macy pf_nblks = MIN(pf_ahead_blks, max_blks); 316*eda14cbcSMatt Macy } else { 317*eda14cbcSMatt Macy pf_nblks = 0; 318*eda14cbcSMatt Macy } 319*eda14cbcSMatt Macy 320*eda14cbcSMatt Macy zs->zs_pf_blkid = pf_start + pf_nblks; 321*eda14cbcSMatt Macy 322*eda14cbcSMatt Macy /* 323*eda14cbcSMatt Macy * Do the same for indirects, starting from where we stopped last, 324*eda14cbcSMatt Macy * or where we will stop reading data blocks (and the indirects 325*eda14cbcSMatt Macy * that point to them). 326*eda14cbcSMatt Macy */ 327*eda14cbcSMatt Macy ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid); 328*eda14cbcSMatt Macy max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift; 329*eda14cbcSMatt Macy /* 330*eda14cbcSMatt Macy * We want to double our distance ahead of the data prefetch 331*eda14cbcSMatt Macy * (or reader, if we are not prefetching data). Previously, we 332*eda14cbcSMatt Macy * were (zs_ipf_blkid - blkid) ahead. To double that, we read 333*eda14cbcSMatt Macy * that amount again, plus the amount we are catching up by 334*eda14cbcSMatt Macy * (i.e. the amount read now + the amount of data prefetched now). 335*eda14cbcSMatt Macy */ 336*eda14cbcSMatt Macy pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks; 337*eda14cbcSMatt Macy max_blks = max_dist_blks - (ipf_start - end_of_access_blkid); 338*eda14cbcSMatt Macy ipf_nblks = MIN(pf_ahead_blks, max_blks); 339*eda14cbcSMatt Macy zs->zs_ipf_blkid = ipf_start + ipf_nblks; 340*eda14cbcSMatt Macy 341*eda14cbcSMatt Macy epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT; 342*eda14cbcSMatt Macy ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs; 343*eda14cbcSMatt Macy ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs; 344*eda14cbcSMatt Macy 345*eda14cbcSMatt Macy zs->zs_atime = gethrtime(); 346*eda14cbcSMatt Macy zs->zs_blkid = end_of_access_blkid; 347*eda14cbcSMatt Macy mutex_exit(&zs->zs_lock); 348*eda14cbcSMatt Macy mutex_exit(&zf->zf_lock); 349*eda14cbcSMatt Macy 350*eda14cbcSMatt Macy /* 351*eda14cbcSMatt Macy * dbuf_prefetch() is asynchronous (even when it needs to read 352*eda14cbcSMatt Macy * indirect blocks), but we still prefer to drop our locks before 353*eda14cbcSMatt Macy * calling it to reduce the time we hold them. 354*eda14cbcSMatt Macy */ 355*eda14cbcSMatt Macy 356*eda14cbcSMatt Macy for (int i = 0; i < pf_nblks; i++) { 357*eda14cbcSMatt Macy dbuf_prefetch(zf->zf_dnode, 0, pf_start + i, 358*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH); 359*eda14cbcSMatt Macy } 360*eda14cbcSMatt Macy for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) { 361*eda14cbcSMatt Macy dbuf_prefetch(zf->zf_dnode, 1, iblk, 362*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH); 363*eda14cbcSMatt Macy } 364*eda14cbcSMatt Macy if (!have_lock) 365*eda14cbcSMatt Macy rw_exit(&zf->zf_dnode->dn_struct_rwlock); 366*eda14cbcSMatt Macy ZFETCHSTAT_BUMP(zfetchstat_hits); 367*eda14cbcSMatt Macy } 368*eda14cbcSMatt Macy 369*eda14cbcSMatt Macy /* BEGIN CSTYLED */ 370*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW, 371*eda14cbcSMatt Macy "Disable all ZFS prefetching"); 372*eda14cbcSMatt Macy 373*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_streams, UINT, ZMOD_RW, 374*eda14cbcSMatt Macy "Max number of streams per zfetch"); 375*eda14cbcSMatt Macy 376*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, min_sec_reap, UINT, ZMOD_RW, 377*eda14cbcSMatt Macy "Min time before stream reclaim"); 378*eda14cbcSMatt Macy 379*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, max_distance, UINT, ZMOD_RW, 380*eda14cbcSMatt Macy "Max bytes to prefetch per stream (default 8MB)"); 381*eda14cbcSMatt Macy 382*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs_prefetch, zfetch_, array_rd_sz, ULONG, ZMOD_RW, 383*eda14cbcSMatt Macy "Number of bytes in a array_read"); 384*eda14cbcSMatt Macy /* END CSTYLED */ 385