1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*eda14cbcSMatt Macy * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy #include <sys/zfs_context.h> 27*eda14cbcSMatt Macy #include <sys/dmu_objset.h> 28*eda14cbcSMatt Macy #include <sys/dmu_traverse.h> 29*eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 30*eda14cbcSMatt Macy #include <sys/dsl_dir.h> 31*eda14cbcSMatt Macy #include <sys/dsl_pool.h> 32*eda14cbcSMatt Macy #include <sys/dnode.h> 33*eda14cbcSMatt Macy #include <sys/spa.h> 34*eda14cbcSMatt Macy #include <sys/spa_impl.h> 35*eda14cbcSMatt Macy #include <sys/zio.h> 36*eda14cbcSMatt Macy #include <sys/dmu_impl.h> 37*eda14cbcSMatt Macy #include <sys/sa.h> 38*eda14cbcSMatt Macy #include <sys/sa_impl.h> 39*eda14cbcSMatt Macy #include <sys/callb.h> 40*eda14cbcSMatt Macy #include <sys/zfeature.h> 41*eda14cbcSMatt Macy 42*eda14cbcSMatt Macy int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */ 43*eda14cbcSMatt Macy int32_t send_holes_without_birth_time = 1; 44*eda14cbcSMatt Macy 45*eda14cbcSMatt Macy typedef struct prefetch_data { 46*eda14cbcSMatt Macy kmutex_t pd_mtx; 47*eda14cbcSMatt Macy kcondvar_t pd_cv; 48*eda14cbcSMatt Macy int32_t pd_bytes_fetched; 49*eda14cbcSMatt Macy int pd_flags; 50*eda14cbcSMatt Macy boolean_t pd_cancel; 51*eda14cbcSMatt Macy boolean_t pd_exited; 52*eda14cbcSMatt Macy zbookmark_phys_t pd_resume; 53*eda14cbcSMatt Macy } prefetch_data_t; 54*eda14cbcSMatt Macy 55*eda14cbcSMatt Macy typedef struct traverse_data { 56*eda14cbcSMatt Macy spa_t *td_spa; 57*eda14cbcSMatt Macy uint64_t td_objset; 58*eda14cbcSMatt Macy blkptr_t *td_rootbp; 59*eda14cbcSMatt Macy uint64_t td_min_txg; 60*eda14cbcSMatt Macy zbookmark_phys_t *td_resume; 61*eda14cbcSMatt Macy int td_flags; 62*eda14cbcSMatt Macy prefetch_data_t *td_pfd; 63*eda14cbcSMatt Macy boolean_t td_paused; 64*eda14cbcSMatt Macy uint64_t td_hole_birth_enabled_txg; 65*eda14cbcSMatt Macy blkptr_cb_t *td_func; 66*eda14cbcSMatt Macy void *td_arg; 67*eda14cbcSMatt Macy boolean_t td_realloc_possible; 68*eda14cbcSMatt Macy } traverse_data_t; 69*eda14cbcSMatt Macy 70*eda14cbcSMatt Macy static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp, 71*eda14cbcSMatt Macy const dnode_phys_t *dnp, uint64_t objset, uint64_t object); 72*eda14cbcSMatt Macy static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *, 73*eda14cbcSMatt Macy uint64_t objset, uint64_t object); 74*eda14cbcSMatt Macy 75*eda14cbcSMatt Macy static int 76*eda14cbcSMatt Macy traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) 77*eda14cbcSMatt Macy { 78*eda14cbcSMatt Macy traverse_data_t *td = arg; 79*eda14cbcSMatt Macy zbookmark_phys_t zb; 80*eda14cbcSMatt Macy 81*eda14cbcSMatt Macy if (BP_IS_HOLE(bp)) 82*eda14cbcSMatt Macy return (0); 83*eda14cbcSMatt Macy 84*eda14cbcSMatt Macy if (claim_txg == 0 && bp->blk_birth >= spa_min_claim_txg(td->td_spa)) 85*eda14cbcSMatt Macy return (-1); 86*eda14cbcSMatt Macy 87*eda14cbcSMatt Macy SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, 88*eda14cbcSMatt Macy bp->blk_cksum.zc_word[ZIL_ZC_SEQ]); 89*eda14cbcSMatt Macy 90*eda14cbcSMatt Macy (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg); 91*eda14cbcSMatt Macy 92*eda14cbcSMatt Macy return (0); 93*eda14cbcSMatt Macy } 94*eda14cbcSMatt Macy 95*eda14cbcSMatt Macy static int 96*eda14cbcSMatt Macy traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg) 97*eda14cbcSMatt Macy { 98*eda14cbcSMatt Macy traverse_data_t *td = arg; 99*eda14cbcSMatt Macy 100*eda14cbcSMatt Macy if (lrc->lrc_txtype == TX_WRITE) { 101*eda14cbcSMatt Macy lr_write_t *lr = (lr_write_t *)lrc; 102*eda14cbcSMatt Macy blkptr_t *bp = &lr->lr_blkptr; 103*eda14cbcSMatt Macy zbookmark_phys_t zb; 104*eda14cbcSMatt Macy 105*eda14cbcSMatt Macy if (BP_IS_HOLE(bp)) 106*eda14cbcSMatt Macy return (0); 107*eda14cbcSMatt Macy 108*eda14cbcSMatt Macy if (claim_txg == 0 || bp->blk_birth < claim_txg) 109*eda14cbcSMatt Macy return (0); 110*eda14cbcSMatt Macy 111*eda14cbcSMatt Macy SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid, 112*eda14cbcSMatt Macy ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp)); 113*eda14cbcSMatt Macy 114*eda14cbcSMatt Macy (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, 115*eda14cbcSMatt Macy td->td_arg); 116*eda14cbcSMatt Macy } 117*eda14cbcSMatt Macy return (0); 118*eda14cbcSMatt Macy } 119*eda14cbcSMatt Macy 120*eda14cbcSMatt Macy static void 121*eda14cbcSMatt Macy traverse_zil(traverse_data_t *td, zil_header_t *zh) 122*eda14cbcSMatt Macy { 123*eda14cbcSMatt Macy uint64_t claim_txg = zh->zh_claim_txg; 124*eda14cbcSMatt Macy 125*eda14cbcSMatt Macy /* 126*eda14cbcSMatt Macy * We only want to visit blocks that have been claimed but not yet 127*eda14cbcSMatt Macy * replayed; plus blocks that are already stable in read-only mode. 128*eda14cbcSMatt Macy */ 129*eda14cbcSMatt Macy if (claim_txg == 0 && spa_writeable(td->td_spa)) 130*eda14cbcSMatt Macy return; 131*eda14cbcSMatt Macy 132*eda14cbcSMatt Macy zilog_t *zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh); 133*eda14cbcSMatt Macy (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td, 134*eda14cbcSMatt Macy claim_txg, !(td->td_flags & TRAVERSE_NO_DECRYPT)); 135*eda14cbcSMatt Macy zil_free(zilog); 136*eda14cbcSMatt Macy } 137*eda14cbcSMatt Macy 138*eda14cbcSMatt Macy typedef enum resume_skip { 139*eda14cbcSMatt Macy RESUME_SKIP_ALL, 140*eda14cbcSMatt Macy RESUME_SKIP_NONE, 141*eda14cbcSMatt Macy RESUME_SKIP_CHILDREN 142*eda14cbcSMatt Macy } resume_skip_t; 143*eda14cbcSMatt Macy 144*eda14cbcSMatt Macy /* 145*eda14cbcSMatt Macy * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and 146*eda14cbcSMatt Macy * the block indicated by zb does not need to be visited at all. Returns 147*eda14cbcSMatt Macy * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the 148*eda14cbcSMatt Macy * resume point. This indicates that this block should be visited but not its 149*eda14cbcSMatt Macy * children (since they must have been visited in a previous traversal). 150*eda14cbcSMatt Macy * Otherwise returns RESUME_SKIP_NONE. 151*eda14cbcSMatt Macy */ 152*eda14cbcSMatt Macy static resume_skip_t 153*eda14cbcSMatt Macy resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp, 154*eda14cbcSMatt Macy const zbookmark_phys_t *zb) 155*eda14cbcSMatt Macy { 156*eda14cbcSMatt Macy if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) { 157*eda14cbcSMatt Macy /* 158*eda14cbcSMatt Macy * If we already visited this bp & everything below, 159*eda14cbcSMatt Macy * don't bother doing it again. 160*eda14cbcSMatt Macy */ 161*eda14cbcSMatt Macy if (zbookmark_subtree_completed(dnp, zb, td->td_resume)) 162*eda14cbcSMatt Macy return (RESUME_SKIP_ALL); 163*eda14cbcSMatt Macy 164*eda14cbcSMatt Macy /* 165*eda14cbcSMatt Macy * If we found the block we're trying to resume from, zero 166*eda14cbcSMatt Macy * the bookmark out to indicate that we have resumed. 167*eda14cbcSMatt Macy */ 168*eda14cbcSMatt Macy if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) { 169*eda14cbcSMatt Macy bzero(td->td_resume, sizeof (*zb)); 170*eda14cbcSMatt Macy if (td->td_flags & TRAVERSE_POST) 171*eda14cbcSMatt Macy return (RESUME_SKIP_CHILDREN); 172*eda14cbcSMatt Macy } 173*eda14cbcSMatt Macy } 174*eda14cbcSMatt Macy return (RESUME_SKIP_NONE); 175*eda14cbcSMatt Macy } 176*eda14cbcSMatt Macy 177*eda14cbcSMatt Macy static void 178*eda14cbcSMatt Macy traverse_prefetch_metadata(traverse_data_t *td, 179*eda14cbcSMatt Macy const blkptr_t *bp, const zbookmark_phys_t *zb) 180*eda14cbcSMatt Macy { 181*eda14cbcSMatt Macy arc_flags_t flags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH; 182*eda14cbcSMatt Macy int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; 183*eda14cbcSMatt Macy 184*eda14cbcSMatt Macy if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) 185*eda14cbcSMatt Macy return; 186*eda14cbcSMatt Macy /* 187*eda14cbcSMatt Macy * If we are in the process of resuming, don't prefetch, because 188*eda14cbcSMatt Macy * some children will not be needed (and in fact may have already 189*eda14cbcSMatt Macy * been freed). 190*eda14cbcSMatt Macy */ 191*eda14cbcSMatt Macy if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) 192*eda14cbcSMatt Macy return; 193*eda14cbcSMatt Macy if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg) 194*eda14cbcSMatt Macy return; 195*eda14cbcSMatt Macy if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) 196*eda14cbcSMatt Macy return; 197*eda14cbcSMatt Macy ASSERT(!BP_IS_REDACTED(bp)); 198*eda14cbcSMatt Macy 199*eda14cbcSMatt Macy if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) 200*eda14cbcSMatt Macy zio_flags |= ZIO_FLAG_RAW; 201*eda14cbcSMatt Macy 202*eda14cbcSMatt Macy (void) arc_read(NULL, td->td_spa, bp, NULL, NULL, 203*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); 204*eda14cbcSMatt Macy } 205*eda14cbcSMatt Macy 206*eda14cbcSMatt Macy static boolean_t 207*eda14cbcSMatt Macy prefetch_needed(prefetch_data_t *pfd, const blkptr_t *bp) 208*eda14cbcSMatt Macy { 209*eda14cbcSMatt Macy ASSERT(pfd->pd_flags & TRAVERSE_PREFETCH_DATA); 210*eda14cbcSMatt Macy if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) || 211*eda14cbcSMatt Macy BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG || BP_IS_REDACTED(bp)) 212*eda14cbcSMatt Macy return (B_FALSE); 213*eda14cbcSMatt Macy return (B_TRUE); 214*eda14cbcSMatt Macy } 215*eda14cbcSMatt Macy 216*eda14cbcSMatt Macy static int 217*eda14cbcSMatt Macy traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, 218*eda14cbcSMatt Macy const blkptr_t *bp, const zbookmark_phys_t *zb) 219*eda14cbcSMatt Macy { 220*eda14cbcSMatt Macy int err = 0; 221*eda14cbcSMatt Macy arc_buf_t *buf = NULL; 222*eda14cbcSMatt Macy prefetch_data_t *pd = td->td_pfd; 223*eda14cbcSMatt Macy 224*eda14cbcSMatt Macy switch (resume_skip_check(td, dnp, zb)) { 225*eda14cbcSMatt Macy case RESUME_SKIP_ALL: 226*eda14cbcSMatt Macy return (0); 227*eda14cbcSMatt Macy case RESUME_SKIP_CHILDREN: 228*eda14cbcSMatt Macy goto post; 229*eda14cbcSMatt Macy case RESUME_SKIP_NONE: 230*eda14cbcSMatt Macy break; 231*eda14cbcSMatt Macy default: 232*eda14cbcSMatt Macy ASSERT(0); 233*eda14cbcSMatt Macy } 234*eda14cbcSMatt Macy 235*eda14cbcSMatt Macy if (bp->blk_birth == 0) { 236*eda14cbcSMatt Macy /* 237*eda14cbcSMatt Macy * Since this block has a birth time of 0 it must be one of 238*eda14cbcSMatt Macy * two things: a hole created before the 239*eda14cbcSMatt Macy * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole 240*eda14cbcSMatt Macy * which has always been a hole in an object. 241*eda14cbcSMatt Macy * 242*eda14cbcSMatt Macy * If a file is written sparsely, then the unwritten parts of 243*eda14cbcSMatt Macy * the file were "always holes" -- that is, they have been 244*eda14cbcSMatt Macy * holes since this object was allocated. However, we (and 245*eda14cbcSMatt Macy * our callers) can not necessarily tell when an object was 246*eda14cbcSMatt Macy * allocated. Therefore, if it's possible that this object 247*eda14cbcSMatt Macy * was freed and then its object number reused, we need to 248*eda14cbcSMatt Macy * visit all the holes with birth==0. 249*eda14cbcSMatt Macy * 250*eda14cbcSMatt Macy * If it isn't possible that the object number was reused, 251*eda14cbcSMatt Macy * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote 252*eda14cbcSMatt Macy * all the blocks we will visit as part of this traversal, 253*eda14cbcSMatt Macy * then this hole must have always existed, so we can skip 254*eda14cbcSMatt Macy * it. We visit blocks born after (exclusive) td_min_txg. 255*eda14cbcSMatt Macy * 256*eda14cbcSMatt Macy * Note that the meta-dnode cannot be reallocated. 257*eda14cbcSMatt Macy */ 258*eda14cbcSMatt Macy if (!send_holes_without_birth_time && 259*eda14cbcSMatt Macy (!td->td_realloc_possible || 260*eda14cbcSMatt Macy zb->zb_object == DMU_META_DNODE_OBJECT) && 261*eda14cbcSMatt Macy td->td_hole_birth_enabled_txg <= td->td_min_txg) 262*eda14cbcSMatt Macy return (0); 263*eda14cbcSMatt Macy } else if (bp->blk_birth <= td->td_min_txg) { 264*eda14cbcSMatt Macy return (0); 265*eda14cbcSMatt Macy } 266*eda14cbcSMatt Macy 267*eda14cbcSMatt Macy if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) { 268*eda14cbcSMatt Macy uint64_t size = BP_GET_LSIZE(bp); 269*eda14cbcSMatt Macy mutex_enter(&pd->pd_mtx); 270*eda14cbcSMatt Macy ASSERT(pd->pd_bytes_fetched >= 0); 271*eda14cbcSMatt Macy while (pd->pd_bytes_fetched < size && !pd->pd_exited) 272*eda14cbcSMatt Macy cv_wait_sig(&pd->pd_cv, &pd->pd_mtx); 273*eda14cbcSMatt Macy pd->pd_bytes_fetched -= size; 274*eda14cbcSMatt Macy cv_broadcast(&pd->pd_cv); 275*eda14cbcSMatt Macy mutex_exit(&pd->pd_mtx); 276*eda14cbcSMatt Macy } 277*eda14cbcSMatt Macy 278*eda14cbcSMatt Macy if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) { 279*eda14cbcSMatt Macy err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); 280*eda14cbcSMatt Macy if (err != 0) 281*eda14cbcSMatt Macy goto post; 282*eda14cbcSMatt Macy return (0); 283*eda14cbcSMatt Macy } 284*eda14cbcSMatt Macy 285*eda14cbcSMatt Macy if (td->td_flags & TRAVERSE_PRE) { 286*eda14cbcSMatt Macy err = td->td_func(td->td_spa, NULL, bp, zb, dnp, 287*eda14cbcSMatt Macy td->td_arg); 288*eda14cbcSMatt Macy if (err == TRAVERSE_VISIT_NO_CHILDREN) 289*eda14cbcSMatt Macy return (0); 290*eda14cbcSMatt Macy if (err != 0) 291*eda14cbcSMatt Macy goto post; 292*eda14cbcSMatt Macy } 293*eda14cbcSMatt Macy 294*eda14cbcSMatt Macy if (BP_GET_LEVEL(bp) > 0) { 295*eda14cbcSMatt Macy uint32_t flags = ARC_FLAG_WAIT; 296*eda14cbcSMatt Macy int32_t i; 297*eda14cbcSMatt Macy int32_t epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 298*eda14cbcSMatt Macy zbookmark_phys_t *czb; 299*eda14cbcSMatt Macy 300*eda14cbcSMatt Macy ASSERT(!BP_IS_PROTECTED(bp)); 301*eda14cbcSMatt Macy 302*eda14cbcSMatt Macy err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 303*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); 304*eda14cbcSMatt Macy if (err != 0) 305*eda14cbcSMatt Macy goto post; 306*eda14cbcSMatt Macy 307*eda14cbcSMatt Macy czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); 308*eda14cbcSMatt Macy 309*eda14cbcSMatt Macy for (i = 0; i < epb; i++) { 310*eda14cbcSMatt Macy SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, 311*eda14cbcSMatt Macy zb->zb_level - 1, 312*eda14cbcSMatt Macy zb->zb_blkid * epb + i); 313*eda14cbcSMatt Macy traverse_prefetch_metadata(td, 314*eda14cbcSMatt Macy &((blkptr_t *)buf->b_data)[i], czb); 315*eda14cbcSMatt Macy } 316*eda14cbcSMatt Macy 317*eda14cbcSMatt Macy /* recursively visitbp() blocks below this */ 318*eda14cbcSMatt Macy for (i = 0; i < epb; i++) { 319*eda14cbcSMatt Macy SET_BOOKMARK(czb, zb->zb_objset, zb->zb_object, 320*eda14cbcSMatt Macy zb->zb_level - 1, 321*eda14cbcSMatt Macy zb->zb_blkid * epb + i); 322*eda14cbcSMatt Macy err = traverse_visitbp(td, dnp, 323*eda14cbcSMatt Macy &((blkptr_t *)buf->b_data)[i], czb); 324*eda14cbcSMatt Macy if (err != 0) 325*eda14cbcSMatt Macy break; 326*eda14cbcSMatt Macy } 327*eda14cbcSMatt Macy 328*eda14cbcSMatt Macy kmem_free(czb, sizeof (zbookmark_phys_t)); 329*eda14cbcSMatt Macy 330*eda14cbcSMatt Macy } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { 331*eda14cbcSMatt Macy uint32_t flags = ARC_FLAG_WAIT; 332*eda14cbcSMatt Macy uint32_t zio_flags = ZIO_FLAG_CANFAIL; 333*eda14cbcSMatt Macy int32_t i; 334*eda14cbcSMatt Macy int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; 335*eda14cbcSMatt Macy dnode_phys_t *child_dnp; 336*eda14cbcSMatt Macy 337*eda14cbcSMatt Macy /* 338*eda14cbcSMatt Macy * dnode blocks might have their bonus buffers encrypted, so 339*eda14cbcSMatt Macy * we must be careful to honor TRAVERSE_NO_DECRYPT 340*eda14cbcSMatt Macy */ 341*eda14cbcSMatt Macy if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) 342*eda14cbcSMatt Macy zio_flags |= ZIO_FLAG_RAW; 343*eda14cbcSMatt Macy 344*eda14cbcSMatt Macy err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 345*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); 346*eda14cbcSMatt Macy if (err != 0) 347*eda14cbcSMatt Macy goto post; 348*eda14cbcSMatt Macy 349*eda14cbcSMatt Macy child_dnp = buf->b_data; 350*eda14cbcSMatt Macy 351*eda14cbcSMatt Macy for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) { 352*eda14cbcSMatt Macy prefetch_dnode_metadata(td, &child_dnp[i], 353*eda14cbcSMatt Macy zb->zb_objset, zb->zb_blkid * epb + i); 354*eda14cbcSMatt Macy } 355*eda14cbcSMatt Macy 356*eda14cbcSMatt Macy /* recursively visitbp() blocks below this */ 357*eda14cbcSMatt Macy for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) { 358*eda14cbcSMatt Macy err = traverse_dnode(td, bp, &child_dnp[i], 359*eda14cbcSMatt Macy zb->zb_objset, zb->zb_blkid * epb + i); 360*eda14cbcSMatt Macy if (err != 0) 361*eda14cbcSMatt Macy break; 362*eda14cbcSMatt Macy } 363*eda14cbcSMatt Macy } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) { 364*eda14cbcSMatt Macy uint32_t zio_flags = ZIO_FLAG_CANFAIL; 365*eda14cbcSMatt Macy arc_flags_t flags = ARC_FLAG_WAIT; 366*eda14cbcSMatt Macy objset_phys_t *osp; 367*eda14cbcSMatt Macy 368*eda14cbcSMatt Macy if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) 369*eda14cbcSMatt Macy zio_flags |= ZIO_FLAG_RAW; 370*eda14cbcSMatt Macy 371*eda14cbcSMatt Macy err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf, 372*eda14cbcSMatt Macy ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb); 373*eda14cbcSMatt Macy if (err != 0) 374*eda14cbcSMatt Macy goto post; 375*eda14cbcSMatt Macy 376*eda14cbcSMatt Macy osp = buf->b_data; 377*eda14cbcSMatt Macy prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset, 378*eda14cbcSMatt Macy DMU_META_DNODE_OBJECT); 379*eda14cbcSMatt Macy /* 380*eda14cbcSMatt Macy * See the block comment above for the goal of this variable. 381*eda14cbcSMatt Macy * If the maxblkid of the meta-dnode is 0, then we know that 382*eda14cbcSMatt Macy * we've never had more than DNODES_PER_BLOCK objects in the 383*eda14cbcSMatt Macy * dataset, which means we can't have reused any object ids. 384*eda14cbcSMatt Macy */ 385*eda14cbcSMatt Macy if (osp->os_meta_dnode.dn_maxblkid == 0) 386*eda14cbcSMatt Macy td->td_realloc_possible = B_FALSE; 387*eda14cbcSMatt Macy 388*eda14cbcSMatt Macy if (OBJSET_BUF_HAS_USERUSED(buf)) { 389*eda14cbcSMatt Macy if (OBJSET_BUF_HAS_PROJECTUSED(buf)) 390*eda14cbcSMatt Macy prefetch_dnode_metadata(td, 391*eda14cbcSMatt Macy &osp->os_projectused_dnode, 392*eda14cbcSMatt Macy zb->zb_objset, DMU_PROJECTUSED_OBJECT); 393*eda14cbcSMatt Macy prefetch_dnode_metadata(td, &osp->os_groupused_dnode, 394*eda14cbcSMatt Macy zb->zb_objset, DMU_GROUPUSED_OBJECT); 395*eda14cbcSMatt Macy prefetch_dnode_metadata(td, &osp->os_userused_dnode, 396*eda14cbcSMatt Macy zb->zb_objset, DMU_USERUSED_OBJECT); 397*eda14cbcSMatt Macy } 398*eda14cbcSMatt Macy 399*eda14cbcSMatt Macy err = traverse_dnode(td, bp, &osp->os_meta_dnode, zb->zb_objset, 400*eda14cbcSMatt Macy DMU_META_DNODE_OBJECT); 401*eda14cbcSMatt Macy if (err == 0 && OBJSET_BUF_HAS_USERUSED(buf)) { 402*eda14cbcSMatt Macy if (OBJSET_BUF_HAS_PROJECTUSED(buf)) 403*eda14cbcSMatt Macy err = traverse_dnode(td, bp, 404*eda14cbcSMatt Macy &osp->os_projectused_dnode, zb->zb_objset, 405*eda14cbcSMatt Macy DMU_PROJECTUSED_OBJECT); 406*eda14cbcSMatt Macy if (err == 0) 407*eda14cbcSMatt Macy err = traverse_dnode(td, bp, 408*eda14cbcSMatt Macy &osp->os_groupused_dnode, zb->zb_objset, 409*eda14cbcSMatt Macy DMU_GROUPUSED_OBJECT); 410*eda14cbcSMatt Macy if (err == 0) 411*eda14cbcSMatt Macy err = traverse_dnode(td, bp, 412*eda14cbcSMatt Macy &osp->os_userused_dnode, zb->zb_objset, 413*eda14cbcSMatt Macy DMU_USERUSED_OBJECT); 414*eda14cbcSMatt Macy } 415*eda14cbcSMatt Macy } 416*eda14cbcSMatt Macy 417*eda14cbcSMatt Macy if (buf) 418*eda14cbcSMatt Macy arc_buf_destroy(buf, &buf); 419*eda14cbcSMatt Macy 420*eda14cbcSMatt Macy post: 421*eda14cbcSMatt Macy if (err == 0 && (td->td_flags & TRAVERSE_POST)) 422*eda14cbcSMatt Macy err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg); 423*eda14cbcSMatt Macy 424*eda14cbcSMatt Macy if ((td->td_flags & TRAVERSE_HARD) && (err == EIO || err == ECKSUM)) { 425*eda14cbcSMatt Macy /* 426*eda14cbcSMatt Macy * Ignore this disk error as requested by the HARD flag, 427*eda14cbcSMatt Macy * and continue traversal. 428*eda14cbcSMatt Macy */ 429*eda14cbcSMatt Macy err = 0; 430*eda14cbcSMatt Macy } 431*eda14cbcSMatt Macy 432*eda14cbcSMatt Macy /* 433*eda14cbcSMatt Macy * If we are stopping here, set td_resume. 434*eda14cbcSMatt Macy */ 435*eda14cbcSMatt Macy if (td->td_resume != NULL && err != 0 && !td->td_paused) { 436*eda14cbcSMatt Macy td->td_resume->zb_objset = zb->zb_objset; 437*eda14cbcSMatt Macy td->td_resume->zb_object = zb->zb_object; 438*eda14cbcSMatt Macy td->td_resume->zb_level = 0; 439*eda14cbcSMatt Macy /* 440*eda14cbcSMatt Macy * If we have stopped on an indirect block (e.g. due to 441*eda14cbcSMatt Macy * i/o error), we have not visited anything below it. 442*eda14cbcSMatt Macy * Set the bookmark to the first level-0 block that we need 443*eda14cbcSMatt Macy * to visit. This way, the resuming code does not need to 444*eda14cbcSMatt Macy * deal with resuming from indirect blocks. 445*eda14cbcSMatt Macy * 446*eda14cbcSMatt Macy * Note, if zb_level <= 0, dnp may be NULL, so we don't want 447*eda14cbcSMatt Macy * to dereference it. 448*eda14cbcSMatt Macy */ 449*eda14cbcSMatt Macy td->td_resume->zb_blkid = zb->zb_blkid; 450*eda14cbcSMatt Macy if (zb->zb_level > 0) { 451*eda14cbcSMatt Macy td->td_resume->zb_blkid <<= zb->zb_level * 452*eda14cbcSMatt Macy (dnp->dn_indblkshift - SPA_BLKPTRSHIFT); 453*eda14cbcSMatt Macy } 454*eda14cbcSMatt Macy td->td_paused = B_TRUE; 455*eda14cbcSMatt Macy } 456*eda14cbcSMatt Macy 457*eda14cbcSMatt Macy return (err); 458*eda14cbcSMatt Macy } 459*eda14cbcSMatt Macy 460*eda14cbcSMatt Macy static void 461*eda14cbcSMatt Macy prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp, 462*eda14cbcSMatt Macy uint64_t objset, uint64_t object) 463*eda14cbcSMatt Macy { 464*eda14cbcSMatt Macy int j; 465*eda14cbcSMatt Macy zbookmark_phys_t czb; 466*eda14cbcSMatt Macy 467*eda14cbcSMatt Macy for (j = 0; j < dnp->dn_nblkptr; j++) { 468*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); 469*eda14cbcSMatt Macy traverse_prefetch_metadata(td, &dnp->dn_blkptr[j], &czb); 470*eda14cbcSMatt Macy } 471*eda14cbcSMatt Macy 472*eda14cbcSMatt Macy if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 473*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); 474*eda14cbcSMatt Macy traverse_prefetch_metadata(td, DN_SPILL_BLKPTR(dnp), &czb); 475*eda14cbcSMatt Macy } 476*eda14cbcSMatt Macy } 477*eda14cbcSMatt Macy 478*eda14cbcSMatt Macy static int 479*eda14cbcSMatt Macy traverse_dnode(traverse_data_t *td, const blkptr_t *bp, const dnode_phys_t *dnp, 480*eda14cbcSMatt Macy uint64_t objset, uint64_t object) 481*eda14cbcSMatt Macy { 482*eda14cbcSMatt Macy int j, err = 0; 483*eda14cbcSMatt Macy zbookmark_phys_t czb; 484*eda14cbcSMatt Macy 485*eda14cbcSMatt Macy if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL && 486*eda14cbcSMatt Macy object < td->td_resume->zb_object) 487*eda14cbcSMatt Macy return (0); 488*eda14cbcSMatt Macy 489*eda14cbcSMatt Macy if (td->td_flags & TRAVERSE_PRE) { 490*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL, 491*eda14cbcSMatt Macy ZB_DNODE_BLKID); 492*eda14cbcSMatt Macy err = td->td_func(td->td_spa, NULL, bp, &czb, dnp, 493*eda14cbcSMatt Macy td->td_arg); 494*eda14cbcSMatt Macy if (err == TRAVERSE_VISIT_NO_CHILDREN) 495*eda14cbcSMatt Macy return (0); 496*eda14cbcSMatt Macy if (err != 0) 497*eda14cbcSMatt Macy return (err); 498*eda14cbcSMatt Macy } 499*eda14cbcSMatt Macy 500*eda14cbcSMatt Macy for (j = 0; j < dnp->dn_nblkptr; j++) { 501*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); 502*eda14cbcSMatt Macy err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb); 503*eda14cbcSMatt Macy if (err != 0) 504*eda14cbcSMatt Macy break; 505*eda14cbcSMatt Macy } 506*eda14cbcSMatt Macy 507*eda14cbcSMatt Macy if (err == 0 && (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) { 508*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); 509*eda14cbcSMatt Macy err = traverse_visitbp(td, dnp, DN_SPILL_BLKPTR(dnp), &czb); 510*eda14cbcSMatt Macy } 511*eda14cbcSMatt Macy 512*eda14cbcSMatt Macy if (err == 0 && (td->td_flags & TRAVERSE_POST)) { 513*eda14cbcSMatt Macy SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL, 514*eda14cbcSMatt Macy ZB_DNODE_BLKID); 515*eda14cbcSMatt Macy err = td->td_func(td->td_spa, NULL, bp, &czb, dnp, 516*eda14cbcSMatt Macy td->td_arg); 517*eda14cbcSMatt Macy if (err == TRAVERSE_VISIT_NO_CHILDREN) 518*eda14cbcSMatt Macy return (0); 519*eda14cbcSMatt Macy if (err != 0) 520*eda14cbcSMatt Macy return (err); 521*eda14cbcSMatt Macy } 522*eda14cbcSMatt Macy return (err); 523*eda14cbcSMatt Macy } 524*eda14cbcSMatt Macy 525*eda14cbcSMatt Macy /* ARGSUSED */ 526*eda14cbcSMatt Macy static int 527*eda14cbcSMatt Macy traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 528*eda14cbcSMatt Macy const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 529*eda14cbcSMatt Macy { 530*eda14cbcSMatt Macy prefetch_data_t *pfd = arg; 531*eda14cbcSMatt Macy int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE; 532*eda14cbcSMatt Macy arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH | 533*eda14cbcSMatt Macy ARC_FLAG_PRESCIENT_PREFETCH; 534*eda14cbcSMatt Macy 535*eda14cbcSMatt Macy ASSERT(pfd->pd_bytes_fetched >= 0); 536*eda14cbcSMatt Macy if (zb->zb_level == ZB_DNODE_LEVEL) 537*eda14cbcSMatt Macy return (0); 538*eda14cbcSMatt Macy if (pfd->pd_cancel) 539*eda14cbcSMatt Macy return (SET_ERROR(EINTR)); 540*eda14cbcSMatt Macy 541*eda14cbcSMatt Macy if (!prefetch_needed(pfd, bp)) 542*eda14cbcSMatt Macy return (0); 543*eda14cbcSMatt Macy 544*eda14cbcSMatt Macy mutex_enter(&pfd->pd_mtx); 545*eda14cbcSMatt Macy while (!pfd->pd_cancel && pfd->pd_bytes_fetched >= zfs_pd_bytes_max) 546*eda14cbcSMatt Macy cv_wait_sig(&pfd->pd_cv, &pfd->pd_mtx); 547*eda14cbcSMatt Macy pfd->pd_bytes_fetched += BP_GET_LSIZE(bp); 548*eda14cbcSMatt Macy cv_broadcast(&pfd->pd_cv); 549*eda14cbcSMatt Macy mutex_exit(&pfd->pd_mtx); 550*eda14cbcSMatt Macy 551*eda14cbcSMatt Macy if ((pfd->pd_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp)) 552*eda14cbcSMatt Macy zio_flags |= ZIO_FLAG_RAW; 553*eda14cbcSMatt Macy 554*eda14cbcSMatt Macy (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, 555*eda14cbcSMatt Macy zio_flags, &aflags, zb); 556*eda14cbcSMatt Macy 557*eda14cbcSMatt Macy return (0); 558*eda14cbcSMatt Macy } 559*eda14cbcSMatt Macy 560*eda14cbcSMatt Macy static void 561*eda14cbcSMatt Macy traverse_prefetch_thread(void *arg) 562*eda14cbcSMatt Macy { 563*eda14cbcSMatt Macy traverse_data_t *td_main = arg; 564*eda14cbcSMatt Macy traverse_data_t td = *td_main; 565*eda14cbcSMatt Macy zbookmark_phys_t czb; 566*eda14cbcSMatt Macy fstrans_cookie_t cookie = spl_fstrans_mark(); 567*eda14cbcSMatt Macy 568*eda14cbcSMatt Macy td.td_func = traverse_prefetcher; 569*eda14cbcSMatt Macy td.td_arg = td_main->td_pfd; 570*eda14cbcSMatt Macy td.td_pfd = NULL; 571*eda14cbcSMatt Macy td.td_resume = &td_main->td_pfd->pd_resume; 572*eda14cbcSMatt Macy 573*eda14cbcSMatt Macy SET_BOOKMARK(&czb, td.td_objset, 574*eda14cbcSMatt Macy ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 575*eda14cbcSMatt Macy (void) traverse_visitbp(&td, NULL, td.td_rootbp, &czb); 576*eda14cbcSMatt Macy 577*eda14cbcSMatt Macy mutex_enter(&td_main->td_pfd->pd_mtx); 578*eda14cbcSMatt Macy td_main->td_pfd->pd_exited = B_TRUE; 579*eda14cbcSMatt Macy cv_broadcast(&td_main->td_pfd->pd_cv); 580*eda14cbcSMatt Macy mutex_exit(&td_main->td_pfd->pd_mtx); 581*eda14cbcSMatt Macy spl_fstrans_unmark(cookie); 582*eda14cbcSMatt Macy } 583*eda14cbcSMatt Macy 584*eda14cbcSMatt Macy /* 585*eda14cbcSMatt Macy * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 586*eda14cbcSMatt Macy * in syncing context). 587*eda14cbcSMatt Macy */ 588*eda14cbcSMatt Macy static int 589*eda14cbcSMatt Macy traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, 590*eda14cbcSMatt Macy uint64_t txg_start, zbookmark_phys_t *resume, int flags, 591*eda14cbcSMatt Macy blkptr_cb_t func, void *arg) 592*eda14cbcSMatt Macy { 593*eda14cbcSMatt Macy traverse_data_t *td; 594*eda14cbcSMatt Macy prefetch_data_t *pd; 595*eda14cbcSMatt Macy zbookmark_phys_t *czb; 596*eda14cbcSMatt Macy int err; 597*eda14cbcSMatt Macy 598*eda14cbcSMatt Macy ASSERT(ds == NULL || objset == ds->ds_object); 599*eda14cbcSMatt Macy ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST)); 600*eda14cbcSMatt Macy 601*eda14cbcSMatt Macy td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP); 602*eda14cbcSMatt Macy pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP); 603*eda14cbcSMatt Macy czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP); 604*eda14cbcSMatt Macy 605*eda14cbcSMatt Macy td->td_spa = spa; 606*eda14cbcSMatt Macy td->td_objset = objset; 607*eda14cbcSMatt Macy td->td_rootbp = rootbp; 608*eda14cbcSMatt Macy td->td_min_txg = txg_start; 609*eda14cbcSMatt Macy td->td_resume = resume; 610*eda14cbcSMatt Macy td->td_func = func; 611*eda14cbcSMatt Macy td->td_arg = arg; 612*eda14cbcSMatt Macy td->td_pfd = pd; 613*eda14cbcSMatt Macy td->td_flags = flags; 614*eda14cbcSMatt Macy td->td_paused = B_FALSE; 615*eda14cbcSMatt Macy td->td_realloc_possible = (txg_start == 0 ? B_FALSE : B_TRUE); 616*eda14cbcSMatt Macy 617*eda14cbcSMatt Macy if (spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) { 618*eda14cbcSMatt Macy VERIFY(spa_feature_enabled_txg(spa, 619*eda14cbcSMatt Macy SPA_FEATURE_HOLE_BIRTH, &td->td_hole_birth_enabled_txg)); 620*eda14cbcSMatt Macy } else { 621*eda14cbcSMatt Macy td->td_hole_birth_enabled_txg = UINT64_MAX; 622*eda14cbcSMatt Macy } 623*eda14cbcSMatt Macy 624*eda14cbcSMatt Macy pd->pd_flags = flags; 625*eda14cbcSMatt Macy if (resume != NULL) 626*eda14cbcSMatt Macy pd->pd_resume = *resume; 627*eda14cbcSMatt Macy mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL); 628*eda14cbcSMatt Macy cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL); 629*eda14cbcSMatt Macy 630*eda14cbcSMatt Macy SET_BOOKMARK(czb, td->td_objset, 631*eda14cbcSMatt Macy ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 632*eda14cbcSMatt Macy 633*eda14cbcSMatt Macy /* See comment on ZIL traversal in dsl_scan_visitds. */ 634*eda14cbcSMatt Macy if (ds != NULL && !ds->ds_is_snapshot && !BP_IS_HOLE(rootbp)) { 635*eda14cbcSMatt Macy enum zio_flag zio_flags = ZIO_FLAG_CANFAIL; 636*eda14cbcSMatt Macy uint32_t flags = ARC_FLAG_WAIT; 637*eda14cbcSMatt Macy objset_phys_t *osp; 638*eda14cbcSMatt Macy arc_buf_t *buf; 639*eda14cbcSMatt Macy ASSERT(!BP_IS_REDACTED(rootbp)); 640*eda14cbcSMatt Macy 641*eda14cbcSMatt Macy if ((td->td_flags & TRAVERSE_NO_DECRYPT) && 642*eda14cbcSMatt Macy BP_IS_PROTECTED(rootbp)) 643*eda14cbcSMatt Macy zio_flags |= ZIO_FLAG_RAW; 644*eda14cbcSMatt Macy 645*eda14cbcSMatt Macy err = arc_read(NULL, td->td_spa, rootbp, arc_getbuf_func, 646*eda14cbcSMatt Macy &buf, ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, czb); 647*eda14cbcSMatt Macy if (err != 0) { 648*eda14cbcSMatt Macy /* 649*eda14cbcSMatt Macy * If both TRAVERSE_HARD and TRAVERSE_PRE are set, 650*eda14cbcSMatt Macy * continue to visitbp so that td_func can be called 651*eda14cbcSMatt Macy * in pre stage, and err will reset to zero. 652*eda14cbcSMatt Macy */ 653*eda14cbcSMatt Macy if (!(td->td_flags & TRAVERSE_HARD) || 654*eda14cbcSMatt Macy !(td->td_flags & TRAVERSE_PRE)) 655*eda14cbcSMatt Macy goto out; 656*eda14cbcSMatt Macy } else { 657*eda14cbcSMatt Macy osp = buf->b_data; 658*eda14cbcSMatt Macy traverse_zil(td, &osp->os_zil_header); 659*eda14cbcSMatt Macy arc_buf_destroy(buf, &buf); 660*eda14cbcSMatt Macy } 661*eda14cbcSMatt Macy } 662*eda14cbcSMatt Macy 663*eda14cbcSMatt Macy if (!(flags & TRAVERSE_PREFETCH_DATA) || 664*eda14cbcSMatt Macy taskq_dispatch(spa->spa_prefetch_taskq, traverse_prefetch_thread, 665*eda14cbcSMatt Macy td, TQ_NOQUEUE) == TASKQID_INVALID) 666*eda14cbcSMatt Macy pd->pd_exited = B_TRUE; 667*eda14cbcSMatt Macy 668*eda14cbcSMatt Macy err = traverse_visitbp(td, NULL, rootbp, czb); 669*eda14cbcSMatt Macy 670*eda14cbcSMatt Macy mutex_enter(&pd->pd_mtx); 671*eda14cbcSMatt Macy pd->pd_cancel = B_TRUE; 672*eda14cbcSMatt Macy cv_broadcast(&pd->pd_cv); 673*eda14cbcSMatt Macy while (!pd->pd_exited) 674*eda14cbcSMatt Macy cv_wait_sig(&pd->pd_cv, &pd->pd_mtx); 675*eda14cbcSMatt Macy mutex_exit(&pd->pd_mtx); 676*eda14cbcSMatt Macy out: 677*eda14cbcSMatt Macy mutex_destroy(&pd->pd_mtx); 678*eda14cbcSMatt Macy cv_destroy(&pd->pd_cv); 679*eda14cbcSMatt Macy 680*eda14cbcSMatt Macy kmem_free(czb, sizeof (zbookmark_phys_t)); 681*eda14cbcSMatt Macy kmem_free(pd, sizeof (struct prefetch_data)); 682*eda14cbcSMatt Macy kmem_free(td, sizeof (struct traverse_data)); 683*eda14cbcSMatt Macy 684*eda14cbcSMatt Macy return (err); 685*eda14cbcSMatt Macy } 686*eda14cbcSMatt Macy 687*eda14cbcSMatt Macy /* 688*eda14cbcSMatt Macy * NB: dataset must not be changing on-disk (eg, is a snapshot or we are 689*eda14cbcSMatt Macy * in syncing context). 690*eda14cbcSMatt Macy */ 691*eda14cbcSMatt Macy int 692*eda14cbcSMatt Macy traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start, 693*eda14cbcSMatt Macy zbookmark_phys_t *resume, 694*eda14cbcSMatt Macy int flags, blkptr_cb_t func, void *arg) 695*eda14cbcSMatt Macy { 696*eda14cbcSMatt Macy return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object, 697*eda14cbcSMatt Macy &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg)); 698*eda14cbcSMatt Macy } 699*eda14cbcSMatt Macy 700*eda14cbcSMatt Macy int 701*eda14cbcSMatt Macy traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, 702*eda14cbcSMatt Macy int flags, blkptr_cb_t func, void *arg) 703*eda14cbcSMatt Macy { 704*eda14cbcSMatt Macy return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg)); 705*eda14cbcSMatt Macy } 706*eda14cbcSMatt Macy 707*eda14cbcSMatt Macy int 708*eda14cbcSMatt Macy traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr, 709*eda14cbcSMatt Macy uint64_t txg_start, zbookmark_phys_t *resume, int flags, 710*eda14cbcSMatt Macy blkptr_cb_t func, void *arg) 711*eda14cbcSMatt Macy { 712*eda14cbcSMatt Macy return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET, 713*eda14cbcSMatt Macy blkptr, txg_start, resume, flags, func, arg)); 714*eda14cbcSMatt Macy } 715*eda14cbcSMatt Macy 716*eda14cbcSMatt Macy /* 717*eda14cbcSMatt Macy * NB: pool must not be changing on-disk (eg, from zdb or sync context). 718*eda14cbcSMatt Macy */ 719*eda14cbcSMatt Macy int 720*eda14cbcSMatt Macy traverse_pool(spa_t *spa, uint64_t txg_start, int flags, 721*eda14cbcSMatt Macy blkptr_cb_t func, void *arg) 722*eda14cbcSMatt Macy { 723*eda14cbcSMatt Macy int err; 724*eda14cbcSMatt Macy dsl_pool_t *dp = spa_get_dsl(spa); 725*eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset; 726*eda14cbcSMatt Macy boolean_t hard = (flags & TRAVERSE_HARD); 727*eda14cbcSMatt Macy 728*eda14cbcSMatt Macy /* visit the MOS */ 729*eda14cbcSMatt Macy err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa), 730*eda14cbcSMatt Macy txg_start, NULL, flags, func, arg); 731*eda14cbcSMatt Macy if (err != 0) 732*eda14cbcSMatt Macy return (err); 733*eda14cbcSMatt Macy 734*eda14cbcSMatt Macy /* visit each dataset */ 735*eda14cbcSMatt Macy for (uint64_t obj = 1; err == 0; 736*eda14cbcSMatt Macy err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) { 737*eda14cbcSMatt Macy dmu_object_info_t doi; 738*eda14cbcSMatt Macy 739*eda14cbcSMatt Macy err = dmu_object_info(mos, obj, &doi); 740*eda14cbcSMatt Macy if (err != 0) { 741*eda14cbcSMatt Macy if (hard) 742*eda14cbcSMatt Macy continue; 743*eda14cbcSMatt Macy break; 744*eda14cbcSMatt Macy } 745*eda14cbcSMatt Macy 746*eda14cbcSMatt Macy if (doi.doi_bonus_type == DMU_OT_DSL_DATASET) { 747*eda14cbcSMatt Macy dsl_dataset_t *ds; 748*eda14cbcSMatt Macy uint64_t txg = txg_start; 749*eda14cbcSMatt Macy 750*eda14cbcSMatt Macy dsl_pool_config_enter(dp, FTAG); 751*eda14cbcSMatt Macy err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 752*eda14cbcSMatt Macy dsl_pool_config_exit(dp, FTAG); 753*eda14cbcSMatt Macy if (err != 0) { 754*eda14cbcSMatt Macy if (hard) 755*eda14cbcSMatt Macy continue; 756*eda14cbcSMatt Macy break; 757*eda14cbcSMatt Macy } 758*eda14cbcSMatt Macy if (dsl_dataset_phys(ds)->ds_prev_snap_txg > txg) 759*eda14cbcSMatt Macy txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; 760*eda14cbcSMatt Macy err = traverse_dataset(ds, txg, flags, func, arg); 761*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 762*eda14cbcSMatt Macy if (err != 0) 763*eda14cbcSMatt Macy break; 764*eda14cbcSMatt Macy } 765*eda14cbcSMatt Macy } 766*eda14cbcSMatt Macy if (err == ESRCH) 767*eda14cbcSMatt Macy err = 0; 768*eda14cbcSMatt Macy return (err); 769*eda14cbcSMatt Macy } 770*eda14cbcSMatt Macy 771*eda14cbcSMatt Macy EXPORT_SYMBOL(traverse_dataset); 772*eda14cbcSMatt Macy EXPORT_SYMBOL(traverse_pool); 773*eda14cbcSMatt Macy 774*eda14cbcSMatt Macy /* BEGIN CSTYLED */ 775*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs, zfs_, pd_bytes_max, INT, ZMOD_RW, 776*eda14cbcSMatt Macy "Max number of bytes to prefetch"); 777*eda14cbcSMatt Macy 778*eda14cbcSMatt Macy #if defined(_KERNEL) 779*eda14cbcSMatt Macy module_param_named(ignore_hole_birth, send_holes_without_birth_time, int, 0644); 780*eda14cbcSMatt Macy MODULE_PARM_DESC(ignore_hole_birth, 781*eda14cbcSMatt Macy "Alias for send_holes_without_birth_time"); 782*eda14cbcSMatt Macy #endif 783*eda14cbcSMatt Macy 784*eda14cbcSMatt Macy ZFS_MODULE_PARAM(zfs, , send_holes_without_birth_time, INT, ZMOD_RW, 785*eda14cbcSMatt Macy "Ignore hole_birth txg for zfs send"); 786*eda14cbcSMatt Macy /* END CSTYLED */ 787