1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dbuf.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/zfs_context.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dmu_traverse.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dsl_pool.h> 39fa9e4066Sahrens #include <sys/dmu_zfetch.h> 40fa9e4066Sahrens #include <sys/zfs_ioctl.h> 41fa9e4066Sahrens #include <sys/zap.h> 42ea8dc4b6Seschrock #include <sys/zio_checksum.h> 43fa9e4066Sahrens 44fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 45fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 46fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 47fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 48fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 49fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 50fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 51fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 52fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 53fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 55fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 56fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 57fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 58fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 59fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 60fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 61fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 62fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 63fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 64fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 65fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 66fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 67fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 68fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 69fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 70fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 71fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 72fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 73ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 74fa9e4066Sahrens }; 75fa9e4066Sahrens 76fa9e4066Sahrens int 77ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 78ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 79fa9e4066Sahrens { 80fa9e4066Sahrens dnode_t *dn; 81fa9e4066Sahrens uint64_t blkid; 82fa9e4066Sahrens dmu_buf_impl_t *db; 83ea8dc4b6Seschrock int err; 84fa9e4066Sahrens 85ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 86ea8dc4b6Seschrock if (err) 87ea8dc4b6Seschrock return (err); 88fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 89fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 90ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 91fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 92ea8dc4b6Seschrock if (db == NULL) { 93ea8dc4b6Seschrock err = EIO; 94ea8dc4b6Seschrock } else { 95ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 96ea8dc4b6Seschrock if (err) { 97ea8dc4b6Seschrock dbuf_rele(db, tag); 98ea8dc4b6Seschrock db = NULL; 99ea8dc4b6Seschrock } 100fa9e4066Sahrens } 101fa9e4066Sahrens 102ea8dc4b6Seschrock dnode_rele(dn, FTAG); 103ea8dc4b6Seschrock *dbp = &db->db; 104ea8dc4b6Seschrock return (err); 105fa9e4066Sahrens } 106fa9e4066Sahrens 107fa9e4066Sahrens int 108fa9e4066Sahrens dmu_bonus_max(void) 109fa9e4066Sahrens { 110fa9e4066Sahrens return (DN_MAX_BONUSLEN); 111fa9e4066Sahrens } 112fa9e4066Sahrens 113fa9e4066Sahrens /* 114ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 115fa9e4066Sahrens */ 116ea8dc4b6Seschrock int 117ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 118fa9e4066Sahrens { 119ea8dc4b6Seschrock dnode_t *dn; 120ea8dc4b6Seschrock int err, count; 121fa9e4066Sahrens dmu_buf_impl_t *db; 122fa9e4066Sahrens 123ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 124ea8dc4b6Seschrock if (err) 125ea8dc4b6Seschrock return (err); 126fa9e4066Sahrens 127fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 128ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 129fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 130ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 131ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 132ea8dc4b6Seschrock dn->dn_bonus = dbuf_create_bonus(dn); 133fa9e4066Sahrens } 134ea8dc4b6Seschrock db = dn->dn_bonus; 135ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 136ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 137ea8dc4b6Seschrock count = refcount_add(&db->db_holds, tag); 138ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 139ea8dc4b6Seschrock if (count == 1) 140ea8dc4b6Seschrock dnode_add_ref(dn, db); 141fa9e4066Sahrens dnode_rele(dn, FTAG); 142ea8dc4b6Seschrock 143ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 144ea8dc4b6Seschrock 145ea8dc4b6Seschrock *dbp = &db->db; 146ea8dc4b6Seschrock return (0); 147fa9e4066Sahrens } 148fa9e4066Sahrens 149ea8dc4b6Seschrock int 150ea8dc4b6Seschrock dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 151ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 152fa9e4066Sahrens { 153ea8dc4b6Seschrock dnode_t *dn; 154fa9e4066Sahrens dmu_buf_t **dbp; 155fa9e4066Sahrens uint64_t blkid, nblks, i; 156ea8dc4b6Seschrock uint32_t flags; 157ea8dc4b6Seschrock int err; 158ea8dc4b6Seschrock zio_t *zio; 159ea8dc4b6Seschrock 160ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 161fa9e4066Sahrens 162fa9e4066Sahrens if (length == 0) { 163fa9e4066Sahrens if (numbufsp) 164fa9e4066Sahrens *numbufsp = 0; 165ea8dc4b6Seschrock *dbpp = NULL; 166ea8dc4b6Seschrock return (0); 167fa9e4066Sahrens } 168fa9e4066Sahrens 169ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 170e1930233Sbonwick if (length > zfetch_array_rd_sz) 171ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 172ea8dc4b6Seschrock 173ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 174ea8dc4b6Seschrock if (err) 175ea8dc4b6Seschrock return (err); 176ea8dc4b6Seschrock 177fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 178fa9e4066Sahrens if (dn->dn_datablkshift) { 179fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 180fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 181fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 182fa9e4066Sahrens } else { 183fa9e4066Sahrens ASSERT3U(offset + length, <=, dn->dn_datablksz); 184fa9e4066Sahrens nblks = 1; 185fa9e4066Sahrens } 186ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 187fa9e4066Sahrens 188ea8dc4b6Seschrock zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); 189fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 190fa9e4066Sahrens for (i = 0; i < nblks; i++) { 191ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 192ea8dc4b6Seschrock if (db == NULL) { 193ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 194ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 195ea8dc4b6Seschrock dnode_rele(dn, FTAG); 196ea8dc4b6Seschrock zio_nowait(zio); 197ea8dc4b6Seschrock return (EIO); 198ea8dc4b6Seschrock } 199ea8dc4b6Seschrock /* initiate async i/o */ 200ea8dc4b6Seschrock if (read && db->db_state == DB_UNCACHED) { 201ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 202ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 203ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 204ea8dc4b6Seschrock } 205ea8dc4b6Seschrock dbp[i] = &db->db; 206fa9e4066Sahrens } 207fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 208fa9e4066Sahrens dnode_rele(dn, FTAG); 209fa9e4066Sahrens 210ea8dc4b6Seschrock /* wait for async i/o */ 211ea8dc4b6Seschrock err = zio_wait(zio); 212ea8dc4b6Seschrock if (err) { 213ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 214ea8dc4b6Seschrock return (err); 215ea8dc4b6Seschrock } 216ea8dc4b6Seschrock 217ea8dc4b6Seschrock /* wait for other io to complete */ 218ea8dc4b6Seschrock if (read) { 219ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 220ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 221ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 222ea8dc4b6Seschrock while (db->db_state == DB_READ || 223ea8dc4b6Seschrock db->db_state == DB_FILL) 224ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 225ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 226ea8dc4b6Seschrock err = EIO; 227ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 228ea8dc4b6Seschrock if (err) { 229ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 230ea8dc4b6Seschrock return (err); 231ea8dc4b6Seschrock } 232ea8dc4b6Seschrock } 233ea8dc4b6Seschrock } 234ea8dc4b6Seschrock 235ea8dc4b6Seschrock *numbufsp = nblks; 236ea8dc4b6Seschrock *dbpp = dbp; 237ea8dc4b6Seschrock return (0); 238fa9e4066Sahrens } 239fa9e4066Sahrens 240fa9e4066Sahrens void 241ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 242fa9e4066Sahrens { 243fa9e4066Sahrens int i; 244fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 245fa9e4066Sahrens 246fa9e4066Sahrens if (numbufs == 0) 247fa9e4066Sahrens return; 248fa9e4066Sahrens 249ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 250ea8dc4b6Seschrock if (dbp[i]) 251ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 252ea8dc4b6Seschrock } 253fa9e4066Sahrens 254fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 255fa9e4066Sahrens } 256fa9e4066Sahrens 257fa9e4066Sahrens void 258fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 259fa9e4066Sahrens { 260fa9e4066Sahrens dnode_t *dn; 261fa9e4066Sahrens uint64_t blkid; 262ea8dc4b6Seschrock int nblks, i, err; 263fa9e4066Sahrens 264fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 265fa9e4066Sahrens dn = os->os->os_meta_dnode; 266fa9e4066Sahrens 267fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 268fa9e4066Sahrens return; 269fa9e4066Sahrens 270fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 271fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 272fa9e4066Sahrens dbuf_prefetch(dn, blkid); 273fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 274fa9e4066Sahrens return; 275fa9e4066Sahrens } 276fa9e4066Sahrens 277fa9e4066Sahrens /* 278fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 279fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 280fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 281fa9e4066Sahrens */ 282ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 283ea8dc4b6Seschrock if (err != 0) 284fa9e4066Sahrens return; 285fa9e4066Sahrens 286fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 287fa9e4066Sahrens if (dn->dn_datablkshift) { 288fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 289fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 290fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 291fa9e4066Sahrens } else { 292fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 293fa9e4066Sahrens } 294fa9e4066Sahrens 295fa9e4066Sahrens if (nblks != 0) { 296fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 297fa9e4066Sahrens for (i = 0; i < nblks; i++) 298fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 299fa9e4066Sahrens } 300fa9e4066Sahrens 301fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 302fa9e4066Sahrens 303fa9e4066Sahrens dnode_rele(dn, FTAG); 304fa9e4066Sahrens } 305fa9e4066Sahrens 306ea8dc4b6Seschrock int 307fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 308fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 309fa9e4066Sahrens { 310ea8dc4b6Seschrock dnode_t *dn; 311ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 312ea8dc4b6Seschrock if (err) 313ea8dc4b6Seschrock return (err); 314fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 315fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 316fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 317fa9e4066Sahrens dnode_rele(dn, FTAG); 318ea8dc4b6Seschrock return (0); 319fa9e4066Sahrens } 320fa9e4066Sahrens 321ea8dc4b6Seschrock int 322ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 323ea8dc4b6Seschrock void *buf) 324fa9e4066Sahrens { 325fa9e4066Sahrens dnode_t *dn; 326fa9e4066Sahrens dmu_buf_t **dbp; 327ea8dc4b6Seschrock int numbufs, i, err; 328fa9e4066Sahrens 329ea8dc4b6Seschrock /* 330ea8dc4b6Seschrock * Deal with odd block sizes, where there can't be data past the 331ea8dc4b6Seschrock * first block. 332ea8dc4b6Seschrock */ 333ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 334ea8dc4b6Seschrock if (err) 335ea8dc4b6Seschrock return (err); 336fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 337fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 338fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 339fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 340fa9e4066Sahrens size = newsz; 341fa9e4066Sahrens } 342fa9e4066Sahrens dnode_rele(dn, FTAG); 343fa9e4066Sahrens 344fa9e4066Sahrens while (size > 0) { 345fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 346fa9e4066Sahrens int err; 347fa9e4066Sahrens 348fa9e4066Sahrens /* 349fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 350fa9e4066Sahrens * to be reading in parallel. 351fa9e4066Sahrens */ 352ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, mylen, 353ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 354ea8dc4b6Seschrock if (err) 355fa9e4066Sahrens return (err); 356fa9e4066Sahrens 357fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 358fa9e4066Sahrens int tocpy; 359fa9e4066Sahrens int bufoff; 360fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 361fa9e4066Sahrens 362fa9e4066Sahrens ASSERT(size > 0); 363fa9e4066Sahrens 364fa9e4066Sahrens bufoff = offset - db->db_offset; 365fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 366fa9e4066Sahrens 367fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 368fa9e4066Sahrens 369fa9e4066Sahrens offset += tocpy; 370fa9e4066Sahrens size -= tocpy; 371fa9e4066Sahrens buf = (char *)buf + tocpy; 372fa9e4066Sahrens } 373ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 374fa9e4066Sahrens } 375fa9e4066Sahrens return (0); 376fa9e4066Sahrens } 377fa9e4066Sahrens 378fa9e4066Sahrens void 379fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 380fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 381fa9e4066Sahrens { 382fa9e4066Sahrens dmu_buf_t **dbp; 383fa9e4066Sahrens int numbufs, i; 384fa9e4066Sahrens 385ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 386ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 387fa9e4066Sahrens 388fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 389fa9e4066Sahrens int tocpy; 390fa9e4066Sahrens int bufoff; 391fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 392fa9e4066Sahrens 393fa9e4066Sahrens ASSERT(size > 0); 394fa9e4066Sahrens 395fa9e4066Sahrens bufoff = offset - db->db_offset; 396fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 397fa9e4066Sahrens 398fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 399fa9e4066Sahrens 400fa9e4066Sahrens if (tocpy == db->db_size) 401fa9e4066Sahrens dmu_buf_will_fill(db, tx); 402fa9e4066Sahrens else 403fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 404fa9e4066Sahrens 405fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 406fa9e4066Sahrens 407fa9e4066Sahrens if (tocpy == db->db_size) 408fa9e4066Sahrens dmu_buf_fill_done(db, tx); 409fa9e4066Sahrens 410fa9e4066Sahrens offset += tocpy; 411fa9e4066Sahrens size -= tocpy; 412fa9e4066Sahrens buf = (char *)buf + tocpy; 413fa9e4066Sahrens } 414ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 415fa9e4066Sahrens } 416fa9e4066Sahrens 417fa9e4066Sahrens #ifdef _KERNEL 418fa9e4066Sahrens int 419fa9e4066Sahrens dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 420fa9e4066Sahrens uio_t *uio, dmu_tx_t *tx) 421fa9e4066Sahrens { 422fa9e4066Sahrens dmu_buf_t **dbp; 423fa9e4066Sahrens int numbufs, i; 424fa9e4066Sahrens int err = 0; 425fa9e4066Sahrens 426ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, size, 427ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 428ea8dc4b6Seschrock if (err) 429ea8dc4b6Seschrock return (err); 430fa9e4066Sahrens 431fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 432fa9e4066Sahrens int tocpy; 433fa9e4066Sahrens int bufoff; 434fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 435fa9e4066Sahrens 436fa9e4066Sahrens ASSERT(size > 0); 437fa9e4066Sahrens 438fa9e4066Sahrens bufoff = offset - db->db_offset; 439fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 440fa9e4066Sahrens 441fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 442fa9e4066Sahrens 443fa9e4066Sahrens if (tocpy == db->db_size) 444fa9e4066Sahrens dmu_buf_will_fill(db, tx); 445fa9e4066Sahrens else 446fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 447fa9e4066Sahrens 448fa9e4066Sahrens /* 449fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 450fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 451fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 452fa9e4066Sahrens * block. 453fa9e4066Sahrens */ 454fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 455fa9e4066Sahrens UIO_WRITE, uio); 456fa9e4066Sahrens 457fa9e4066Sahrens if (tocpy == db->db_size) 458fa9e4066Sahrens dmu_buf_fill_done(db, tx); 459fa9e4066Sahrens 460fa9e4066Sahrens if (err) 461fa9e4066Sahrens break; 462fa9e4066Sahrens 463fa9e4066Sahrens offset += tocpy; 464fa9e4066Sahrens size -= tocpy; 465fa9e4066Sahrens } 466ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 467fa9e4066Sahrens return (err); 468fa9e4066Sahrens } 469fa9e4066Sahrens #endif 470fa9e4066Sahrens 471fa9e4066Sahrens struct backuparg { 472fa9e4066Sahrens dmu_replay_record_t *drr; 473fa9e4066Sahrens vnode_t *vp; 474fa9e4066Sahrens objset_t *os; 475ea8dc4b6Seschrock zio_cksum_t zc; 476fa9e4066Sahrens int err; 477fa9e4066Sahrens }; 478fa9e4066Sahrens 479fa9e4066Sahrens static int 480fa9e4066Sahrens dump_bytes(struct backuparg *ba, void *buf, int len) 481fa9e4066Sahrens { 482fa9e4066Sahrens ssize_t resid; /* have to get resid to get detailed errno */ 483fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 484ea8dc4b6Seschrock 485ea8dc4b6Seschrock fletcher_4_incremental_native(buf, len, &ba->zc); 486fa9e4066Sahrens ba->err = vn_rdwr(UIO_WRITE, ba->vp, 487fa9e4066Sahrens (caddr_t)buf, len, 48893fcfe85Sahrens 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); 489fa9e4066Sahrens return (ba->err); 490fa9e4066Sahrens } 491fa9e4066Sahrens 492fa9e4066Sahrens static int 493fa9e4066Sahrens dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 494fa9e4066Sahrens uint64_t length) 495fa9e4066Sahrens { 496fa9e4066Sahrens /* write a FREE record */ 497fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 498fa9e4066Sahrens ba->drr->drr_type = DRR_FREE; 499fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_object = object; 500fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_offset = offset; 501fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_length = length; 502fa9e4066Sahrens 503fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 504fa9e4066Sahrens return (EINTR); 505fa9e4066Sahrens return (0); 506fa9e4066Sahrens } 507fa9e4066Sahrens 508fa9e4066Sahrens static int 509fa9e4066Sahrens dump_data(struct backuparg *ba, dmu_object_type_t type, 510fa9e4066Sahrens uint64_t object, uint64_t offset, int blksz, void *data) 511fa9e4066Sahrens { 512fa9e4066Sahrens /* write a DATA record */ 513fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 514fa9e4066Sahrens ba->drr->drr_type = DRR_WRITE; 515fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_object = object; 516fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_type = type; 517fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_offset = offset; 518fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_length = blksz; 519fa9e4066Sahrens 520fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 521fa9e4066Sahrens return (EINTR); 522fa9e4066Sahrens if (dump_bytes(ba, data, blksz)) 523fa9e4066Sahrens return (EINTR); 524fa9e4066Sahrens return (0); 525fa9e4066Sahrens } 526fa9e4066Sahrens 527fa9e4066Sahrens static int 528fa9e4066Sahrens dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 529fa9e4066Sahrens { 530fa9e4066Sahrens /* write a FREEOBJECTS record */ 531fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 532fa9e4066Sahrens ba->drr->drr_type = DRR_FREEOBJECTS; 533fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj; 534fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs; 535fa9e4066Sahrens 536fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 537fa9e4066Sahrens return (EINTR); 538fa9e4066Sahrens return (0); 539fa9e4066Sahrens } 540fa9e4066Sahrens 541fa9e4066Sahrens static int 542fa9e4066Sahrens dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 543fa9e4066Sahrens { 544fa9e4066Sahrens if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 545fa9e4066Sahrens return (dump_freeobjects(ba, object, 1)); 546fa9e4066Sahrens 547fa9e4066Sahrens /* write an OBJECT record */ 548fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 549fa9e4066Sahrens ba->drr->drr_type = DRR_OBJECT; 550fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_object = object; 551fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_type = dnp->dn_type; 552fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype; 553fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_blksz = 554fa9e4066Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 555fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; 556fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; 557fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; 558fa9e4066Sahrens 559fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 560fa9e4066Sahrens return (EINTR); 561fa9e4066Sahrens 562fa9e4066Sahrens if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8))) 563fa9e4066Sahrens return (EINTR); 564fa9e4066Sahrens 565fa9e4066Sahrens /* free anything past the end of the file */ 566fa9e4066Sahrens if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 567fa9e4066Sahrens (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 568fa9e4066Sahrens return (EINTR); 569fa9e4066Sahrens if (ba->err) 570fa9e4066Sahrens return (EINTR); 571fa9e4066Sahrens return (0); 572fa9e4066Sahrens } 573fa9e4066Sahrens 574fa9e4066Sahrens #define BP_SPAN(dnp, level) \ 575fa9e4066Sahrens (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 576fa9e4066Sahrens (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 577fa9e4066Sahrens 578fa9e4066Sahrens static int 579fa9e4066Sahrens backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 580fa9e4066Sahrens { 581fa9e4066Sahrens struct backuparg *ba = arg; 582fa9e4066Sahrens uint64_t object = bc->bc_bookmark.zb_object; 583fa9e4066Sahrens int level = bc->bc_bookmark.zb_level; 584fa9e4066Sahrens uint64_t blkid = bc->bc_bookmark.zb_blkid; 585fa9e4066Sahrens blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; 586fa9e4066Sahrens dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 587fa9e4066Sahrens void *data = bc->bc_data; 588fa9e4066Sahrens int err = 0; 589fa9e4066Sahrens 590ea8dc4b6Seschrock if (issig(JUSTLOOKING) && issig(FORREAL)) 591fa9e4066Sahrens return (EINTR); 592fa9e4066Sahrens 593fa9e4066Sahrens ASSERT(data || bp == NULL); 594fa9e4066Sahrens 595fa9e4066Sahrens if (bp == NULL && object == 0) { 596fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 597fa9e4066Sahrens uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; 598fa9e4066Sahrens err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 599fa9e4066Sahrens } else if (bp == NULL) { 600fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 601fa9e4066Sahrens err = dump_free(ba, object, blkid * span, span); 602fa9e4066Sahrens } else if (data && level == 0 && type == DMU_OT_DNODE) { 603fa9e4066Sahrens dnode_phys_t *blk = data; 604fa9e4066Sahrens int i; 605fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 606fa9e4066Sahrens 607fa9e4066Sahrens for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 608fa9e4066Sahrens uint64_t dnobj = 609fa9e4066Sahrens (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 610fa9e4066Sahrens err = dump_dnode(ba, dnobj, blk+i); 611fa9e4066Sahrens if (err) 612fa9e4066Sahrens break; 613fa9e4066Sahrens } 614fa9e4066Sahrens } else if (level == 0 && 615fa9e4066Sahrens type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { 616fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 617fa9e4066Sahrens if (data == NULL) { 618fa9e4066Sahrens arc_buf_t *abuf; 619ea8dc4b6Seschrock zbookmark_t zb; 620fa9e4066Sahrens 621ea8dc4b6Seschrock zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object; 622ea8dc4b6Seschrock zb.zb_object = object; 623ea8dc4b6Seschrock zb.zb_level = level; 624ea8dc4b6Seschrock zb.zb_blkid = blkid; 625fa9e4066Sahrens (void) arc_read(NULL, spa, bp, 626fa9e4066Sahrens dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf, 627fa9e4066Sahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED, 628ea8dc4b6Seschrock ARC_WAIT, &zb); 629fa9e4066Sahrens 630fa9e4066Sahrens if (abuf) { 631fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 632fa9e4066Sahrens blksz, abuf->b_data); 633ea8dc4b6Seschrock (void) arc_buf_remove_ref(abuf, &abuf); 634fa9e4066Sahrens } 635fa9e4066Sahrens } else { 636fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 637fa9e4066Sahrens blksz, data); 638fa9e4066Sahrens } 639fa9e4066Sahrens } 640fa9e4066Sahrens 641fa9e4066Sahrens ASSERT(err == 0 || err == EINTR); 642fa9e4066Sahrens return (err); 643fa9e4066Sahrens } 644fa9e4066Sahrens 645fa9e4066Sahrens int 646fa9e4066Sahrens dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) 647fa9e4066Sahrens { 648fa9e4066Sahrens dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; 649fa9e4066Sahrens dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; 650fa9e4066Sahrens dmu_replay_record_t *drr; 651fa9e4066Sahrens struct backuparg ba; 652fa9e4066Sahrens int err; 653fa9e4066Sahrens 654fa9e4066Sahrens /* tosnap must be a snapshot */ 655fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 656fa9e4066Sahrens return (EINVAL); 657fa9e4066Sahrens 658fa9e4066Sahrens /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 659fa9e4066Sahrens if (fromds && (ds->ds_dir != fromds->ds_dir || 660fa9e4066Sahrens fromds->ds_phys->ds_creation_txg >= 661fa9e4066Sahrens ds->ds_phys->ds_creation_txg)) 662fa9e4066Sahrens return (EXDEV); 663fa9e4066Sahrens 664fa9e4066Sahrens drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 665fa9e4066Sahrens drr->drr_type = DRR_BEGIN; 666fa9e4066Sahrens drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 667fa9e4066Sahrens drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION; 668fa9e4066Sahrens drr->drr_u.drr_begin.drr_creation_time = 669fa9e4066Sahrens ds->ds_phys->ds_creation_time; 670fa9e4066Sahrens drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; 671fa9e4066Sahrens drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 672fa9e4066Sahrens if (fromds) 673fa9e4066Sahrens drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 674fa9e4066Sahrens dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 675fa9e4066Sahrens 676fa9e4066Sahrens ba.drr = drr; 677fa9e4066Sahrens ba.vp = vp; 678fa9e4066Sahrens ba.os = tosnap; 679ea8dc4b6Seschrock ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 680fa9e4066Sahrens 681fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 682fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 683fa9e4066Sahrens return (ba.err); 684fa9e4066Sahrens } 685fa9e4066Sahrens 686fa9e4066Sahrens err = traverse_dsl_dataset(ds, 687fa9e4066Sahrens fromds ? fromds->ds_phys->ds_creation_txg : 0, 688fa9e4066Sahrens ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, 689fa9e4066Sahrens backup_cb, &ba); 690fa9e4066Sahrens 691fa9e4066Sahrens if (err) { 692fa9e4066Sahrens if (err == EINTR && ba.err) 693fa9e4066Sahrens err = ba.err; 694fa9e4066Sahrens return (err); 695fa9e4066Sahrens } 696fa9e4066Sahrens 697fa9e4066Sahrens bzero(drr, sizeof (dmu_replay_record_t)); 698fa9e4066Sahrens drr->drr_type = DRR_END; 699ea8dc4b6Seschrock drr->drr_u.drr_end.drr_checksum = ba.zc; 700fa9e4066Sahrens 701fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) 702fa9e4066Sahrens return (ba.err); 703fa9e4066Sahrens 704fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 705fa9e4066Sahrens 706fa9e4066Sahrens return (0); 707fa9e4066Sahrens } 708fa9e4066Sahrens 709fa9e4066Sahrens struct restorearg { 710fa9e4066Sahrens int err; 711fa9e4066Sahrens int byteswap; 712fa9e4066Sahrens vnode_t *vp; 713fa9e4066Sahrens char *buf; 714fa9e4066Sahrens uint64_t voff; 715fa9e4066Sahrens int buflen; /* number of valid bytes in buf */ 716fa9e4066Sahrens int bufoff; /* next offset to read */ 717fa9e4066Sahrens int bufsize; /* amount of memory allocated for buf */ 718ea8dc4b6Seschrock zio_cksum_t zc; 719fa9e4066Sahrens }; 720fa9e4066Sahrens 721fa9e4066Sahrens static int 722fa9e4066Sahrens replay_incremental_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 723fa9e4066Sahrens { 724fa9e4066Sahrens struct drr_begin *drrb = arg; 725fa9e4066Sahrens dsl_dataset_t *ds = NULL; 726fa9e4066Sahrens dsl_dataset_t *ds_prev = NULL; 727fa9e4066Sahrens const char *snapname; 728fa9e4066Sahrens int err = EINVAL; 729fa9e4066Sahrens uint64_t val; 730fa9e4066Sahrens 731fa9e4066Sahrens /* this must be a filesytem */ 732fa9e4066Sahrens if (dd->dd_phys->dd_head_dataset_obj == 0) 733fa9e4066Sahrens goto die; 734fa9e4066Sahrens 735ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 736ea8dc4b6Seschrock dd->dd_phys->dd_head_dataset_obj, 737ea8dc4b6Seschrock NULL, DS_MODE_EXCLUSIVE, FTAG, &ds); 738ea8dc4b6Seschrock if (err) 739ea8dc4b6Seschrock goto die; 740fa9e4066Sahrens 741fa9e4066Sahrens if (ds == NULL) { 742fa9e4066Sahrens err = EBUSY; 743fa9e4066Sahrens goto die; 744fa9e4066Sahrens } 745fa9e4066Sahrens 746fa9e4066Sahrens /* must already be a snapshot of this fs */ 747fa9e4066Sahrens if (ds->ds_phys->ds_prev_snap_obj == 0) { 748fa9e4066Sahrens err = ENODEV; 749fa9e4066Sahrens goto die; 750fa9e4066Sahrens } 751fa9e4066Sahrens 752fa9e4066Sahrens /* most recent snapshot must match fromguid */ 753ea8dc4b6Seschrock err = dsl_dataset_open_obj(dd->dd_pool, 754fa9e4066Sahrens ds->ds_phys->ds_prev_snap_obj, NULL, 755ea8dc4b6Seschrock DS_MODE_STANDARD | DS_MODE_READONLY, FTAG, &ds_prev); 756ea8dc4b6Seschrock if (err) 757ea8dc4b6Seschrock goto die; 758fa9e4066Sahrens if (ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) { 759fa9e4066Sahrens err = ENODEV; 760fa9e4066Sahrens goto die; 761fa9e4066Sahrens } 762fa9e4066Sahrens 763fa9e4066Sahrens /* must not have any changes since most recent snapshot */ 764fa9e4066Sahrens if (ds->ds_phys->ds_bp.blk_birth > 765fa9e4066Sahrens ds_prev->ds_phys->ds_creation_txg) { 766fa9e4066Sahrens err = ETXTBSY; 767fa9e4066Sahrens goto die; 768fa9e4066Sahrens } 769fa9e4066Sahrens 770fa9e4066Sahrens /* new snapshot name must not exist */ 771fa9e4066Sahrens snapname = strrchr(drrb->drr_toname, '@'); 772fa9e4066Sahrens if (snapname == NULL) { 773fa9e4066Sahrens err = EEXIST; 774fa9e4066Sahrens goto die; 775fa9e4066Sahrens } 776fa9e4066Sahrens snapname++; 777fa9e4066Sahrens err = zap_lookup(dd->dd_pool->dp_meta_objset, 778fa9e4066Sahrens ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); 779fa9e4066Sahrens if (err != ENOENT) { 780fa9e4066Sahrens if (err == 0) 781fa9e4066Sahrens err = EEXIST; 782fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 783fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 784fa9e4066Sahrens return (err); 785fa9e4066Sahrens } 786fa9e4066Sahrens 787fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 788fa9e4066Sahrens 789fa9e4066Sahrens /* The point of no (unsuccessful) return. */ 790fa9e4066Sahrens 791fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 792*99653d4eSeschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 793fa9e4066Sahrens 794fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 795fa9e4066Sahrens return (0); 796fa9e4066Sahrens 797fa9e4066Sahrens die: 798fa9e4066Sahrens if (ds_prev) 799fa9e4066Sahrens dsl_dataset_close(ds_prev, DS_MODE_STANDARD, FTAG); 800fa9e4066Sahrens if (ds) 801fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 802fa9e4066Sahrens return (err); 803fa9e4066Sahrens } 804fa9e4066Sahrens 805fa9e4066Sahrens static int 806fa9e4066Sahrens replay_full_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 807fa9e4066Sahrens { 808fa9e4066Sahrens struct drr_begin *drrb = arg; 809fa9e4066Sahrens int err; 810fa9e4066Sahrens char *fsfullname, *fslastname, *cp; 811fa9e4066Sahrens dsl_dataset_t *ds; 812fa9e4066Sahrens 813fa9e4066Sahrens fsfullname = kmem_alloc(MAXNAMELEN, KM_SLEEP); 814fa9e4066Sahrens (void) strncpy(fsfullname, drrb->drr_toname, MAXNAMELEN); 815fa9e4066Sahrens cp = strchr(fsfullname, '@'); 816fa9e4066Sahrens if (cp == NULL) { 817fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 818fa9e4066Sahrens return (EINVAL); 819fa9e4066Sahrens } 820fa9e4066Sahrens *cp = '\0'; 821fa9e4066Sahrens fslastname = strrchr(fsfullname, '/'); 822fa9e4066Sahrens if (fslastname == NULL) { 823fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 824fa9e4066Sahrens return (EINVAL); 825fa9e4066Sahrens } 826fa9e4066Sahrens fslastname++; 827fa9e4066Sahrens 828fa9e4066Sahrens err = dsl_dataset_create_sync(dd, fsfullname, fslastname, NULL, tx); 829fa9e4066Sahrens if (err) { 830fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 831fa9e4066Sahrens return (err); 832fa9e4066Sahrens } 833fa9e4066Sahrens 834fa9e4066Sahrens /* the point of no (unsuccessful) return */ 835fa9e4066Sahrens 836ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, fsfullname, 837ea8dc4b6Seschrock DS_MODE_EXCLUSIVE, FTAG, &ds)); 838fa9e4066Sahrens kmem_free(fsfullname, MAXNAMELEN); 839fa9e4066Sahrens 840fa9e4066Sahrens (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), 841fa9e4066Sahrens ds, drrb->drr_type, tx); 842fa9e4066Sahrens 843fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 844*99653d4eSeschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 845fa9e4066Sahrens 846fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 847fa9e4066Sahrens return (0); 848fa9e4066Sahrens } 849fa9e4066Sahrens 850fa9e4066Sahrens static int 851fa9e4066Sahrens replay_end_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 852fa9e4066Sahrens { 853fa9e4066Sahrens struct drr_begin *drrb = arg; 854fa9e4066Sahrens int err; 855fa9e4066Sahrens char *snapname; 856fa9e4066Sahrens dsl_dataset_t *ds; 857fa9e4066Sahrens 858fa9e4066Sahrens /* XXX verify that drr_toname is in dd */ 859fa9e4066Sahrens 860fa9e4066Sahrens snapname = strchr(drrb->drr_toname, '@'); 861fa9e4066Sahrens if (snapname == NULL) 862fa9e4066Sahrens return (EINVAL); 863fa9e4066Sahrens snapname++; 864fa9e4066Sahrens 865fa9e4066Sahrens /* create snapshot */ 866fa9e4066Sahrens err = dsl_dataset_snapshot_sync(dd, snapname, tx); 867fa9e4066Sahrens if (err) 868fa9e4066Sahrens return (err); 869fa9e4066Sahrens 870fa9e4066Sahrens /* set snapshot's creation time and guid */ 871ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_spa(dd->dd_pool->dp_spa, drrb->drr_toname, 872e1930233Sbonwick DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 873e1930233Sbonwick FTAG, &ds)); 874fa9e4066Sahrens 875fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 876fa9e4066Sahrens ds->ds_phys->ds_creation_time = drrb->drr_creation_time; 877fa9e4066Sahrens ds->ds_phys->ds_guid = drrb->drr_toguid; 878*99653d4eSeschrock ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 879fa9e4066Sahrens 880fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); 881fa9e4066Sahrens 882ea8dc4b6Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 883ea8dc4b6Seschrock dd->dd_phys->dd_head_dataset_obj, 884e1930233Sbonwick NULL, DS_MODE_STANDARD | DS_MODE_INCONSISTENT, FTAG, &ds)); 885fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 886*99653d4eSeschrock ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 887fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 888fa9e4066Sahrens 889fa9e4066Sahrens return (0); 890fa9e4066Sahrens } 891fa9e4066Sahrens 892fa9e4066Sahrens void * 893fa9e4066Sahrens restore_read(struct restorearg *ra, int len) 894fa9e4066Sahrens { 895fa9e4066Sahrens void *rv; 896fa9e4066Sahrens 897fa9e4066Sahrens /* some things will require 8-byte alignment, so everything must */ 898fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 899fa9e4066Sahrens 900fa9e4066Sahrens while (ra->buflen - ra->bufoff < len) { 901fa9e4066Sahrens ssize_t resid; 902fa9e4066Sahrens int leftover = ra->buflen - ra->bufoff; 903fa9e4066Sahrens 904fa9e4066Sahrens (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover); 905fa9e4066Sahrens ra->err = vn_rdwr(UIO_READ, ra->vp, 906fa9e4066Sahrens (caddr_t)ra->buf + leftover, ra->bufsize - leftover, 907fa9e4066Sahrens ra->voff, UIO_SYSSPACE, FAPPEND, 90893fcfe85Sahrens RLIM64_INFINITY, CRED(), &resid); 909fa9e4066Sahrens 910fa9e4066Sahrens ra->voff += ra->bufsize - leftover - resid; 911fa9e4066Sahrens ra->buflen = ra->bufsize - resid; 912fa9e4066Sahrens ra->bufoff = 0; 913fa9e4066Sahrens if (resid == ra->bufsize - leftover) 914fa9e4066Sahrens ra->err = EINVAL; 915fa9e4066Sahrens if (ra->err) 916fa9e4066Sahrens return (NULL); 917ea8dc4b6Seschrock /* Could compute checksum here? */ 918fa9e4066Sahrens } 919fa9e4066Sahrens 920fa9e4066Sahrens ASSERT3U(ra->bufoff % 8, ==, 0); 921fa9e4066Sahrens ASSERT3U(ra->buflen - ra->bufoff, >=, len); 922fa9e4066Sahrens rv = ra->buf + ra->bufoff; 923fa9e4066Sahrens ra->bufoff += len; 924ea8dc4b6Seschrock if (ra->byteswap) 925ea8dc4b6Seschrock fletcher_4_incremental_byteswap(rv, len, &ra->zc); 926ea8dc4b6Seschrock else 927ea8dc4b6Seschrock fletcher_4_incremental_native(rv, len, &ra->zc); 928fa9e4066Sahrens return (rv); 929fa9e4066Sahrens } 930fa9e4066Sahrens 931fa9e4066Sahrens static void 932fa9e4066Sahrens backup_byteswap(dmu_replay_record_t *drr) 933fa9e4066Sahrens { 934fa9e4066Sahrens #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 935fa9e4066Sahrens #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 936fa9e4066Sahrens drr->drr_type = BSWAP_32(drr->drr_type); 937fa9e4066Sahrens switch (drr->drr_type) { 938fa9e4066Sahrens case DRR_BEGIN: 939fa9e4066Sahrens DO64(drr_begin.drr_magic); 940fa9e4066Sahrens DO64(drr_begin.drr_version); 941fa9e4066Sahrens DO64(drr_begin.drr_creation_time); 942fa9e4066Sahrens DO32(drr_begin.drr_type); 943fa9e4066Sahrens DO64(drr_begin.drr_toguid); 944fa9e4066Sahrens DO64(drr_begin.drr_fromguid); 945fa9e4066Sahrens break; 946fa9e4066Sahrens case DRR_OBJECT: 947fa9e4066Sahrens DO64(drr_object.drr_object); 948fa9e4066Sahrens /* DO64(drr_object.drr_allocation_txg); */ 949fa9e4066Sahrens DO32(drr_object.drr_type); 950fa9e4066Sahrens DO32(drr_object.drr_bonustype); 951fa9e4066Sahrens DO32(drr_object.drr_blksz); 952fa9e4066Sahrens DO32(drr_object.drr_bonuslen); 953fa9e4066Sahrens break; 954fa9e4066Sahrens case DRR_FREEOBJECTS: 955fa9e4066Sahrens DO64(drr_freeobjects.drr_firstobj); 956fa9e4066Sahrens DO64(drr_freeobjects.drr_numobjs); 957fa9e4066Sahrens break; 958fa9e4066Sahrens case DRR_WRITE: 959fa9e4066Sahrens DO64(drr_write.drr_object); 960fa9e4066Sahrens DO32(drr_write.drr_type); 961fa9e4066Sahrens DO64(drr_write.drr_offset); 962fa9e4066Sahrens DO64(drr_write.drr_length); 963fa9e4066Sahrens break; 964fa9e4066Sahrens case DRR_FREE: 965fa9e4066Sahrens DO64(drr_free.drr_object); 966fa9e4066Sahrens DO64(drr_free.drr_offset); 967fa9e4066Sahrens DO64(drr_free.drr_length); 968fa9e4066Sahrens break; 969fa9e4066Sahrens case DRR_END: 970ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[0]); 971ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[1]); 972ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[2]); 973ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[3]); 974fa9e4066Sahrens break; 975fa9e4066Sahrens } 976fa9e4066Sahrens #undef DO64 977fa9e4066Sahrens #undef DO32 978fa9e4066Sahrens } 979fa9e4066Sahrens 980fa9e4066Sahrens static int 981fa9e4066Sahrens restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 982fa9e4066Sahrens { 983fa9e4066Sahrens int err; 984fa9e4066Sahrens dmu_tx_t *tx; 985fa9e4066Sahrens 986fa9e4066Sahrens err = dmu_object_info(os, drro->drr_object, NULL); 987fa9e4066Sahrens 988fa9e4066Sahrens if (err != 0 && err != ENOENT) 989fa9e4066Sahrens return (EINVAL); 990fa9e4066Sahrens 991fa9e4066Sahrens if (drro->drr_type == DMU_OT_NONE || 992fa9e4066Sahrens drro->drr_type >= DMU_OT_NUMTYPES || 993fa9e4066Sahrens drro->drr_bonustype >= DMU_OT_NUMTYPES || 994fa9e4066Sahrens drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || 995fa9e4066Sahrens drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 996fa9e4066Sahrens P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 997fa9e4066Sahrens drro->drr_blksz < SPA_MINBLOCKSIZE || 998fa9e4066Sahrens drro->drr_blksz > SPA_MAXBLOCKSIZE || 999fa9e4066Sahrens drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1000fa9e4066Sahrens return (EINVAL); 1001fa9e4066Sahrens } 1002fa9e4066Sahrens 1003fa9e4066Sahrens tx = dmu_tx_create(os); 1004fa9e4066Sahrens 1005fa9e4066Sahrens if (err == ENOENT) { 1006fa9e4066Sahrens /* currently free, want to be allocated */ 1007fa9e4066Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1008fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); 1009fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1010fa9e4066Sahrens if (err) { 1011fa9e4066Sahrens dmu_tx_abort(tx); 1012fa9e4066Sahrens return (err); 1013fa9e4066Sahrens } 1014fa9e4066Sahrens err = dmu_object_claim(os, drro->drr_object, 1015fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1016fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1017fa9e4066Sahrens } else { 1018fa9e4066Sahrens /* currently allocated, want to be allocated */ 1019fa9e4066Sahrens dmu_tx_hold_bonus(tx, drro->drr_object); 1020fa9e4066Sahrens /* 1021fa9e4066Sahrens * We may change blocksize, so need to 1022fa9e4066Sahrens * hold_write 1023fa9e4066Sahrens */ 1024fa9e4066Sahrens dmu_tx_hold_write(tx, drro->drr_object, 0, 1); 1025fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1026fa9e4066Sahrens if (err) { 1027fa9e4066Sahrens dmu_tx_abort(tx); 1028fa9e4066Sahrens return (err); 1029fa9e4066Sahrens } 1030fa9e4066Sahrens 1031fa9e4066Sahrens err = dmu_object_reclaim(os, drro->drr_object, 1032fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1033fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1034fa9e4066Sahrens } 1035fa9e4066Sahrens if (err) { 1036fa9e4066Sahrens dmu_tx_commit(tx); 1037fa9e4066Sahrens return (EINVAL); 1038fa9e4066Sahrens } 1039fa9e4066Sahrens 1040fa9e4066Sahrens dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); 1041fa9e4066Sahrens dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1042fa9e4066Sahrens 1043fa9e4066Sahrens if (drro->drr_bonuslen) { 1044fa9e4066Sahrens dmu_buf_t *db; 1045fa9e4066Sahrens void *data; 1046ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1047fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 1048fa9e4066Sahrens 1049fa9e4066Sahrens ASSERT3U(db->db_size, ==, drro->drr_bonuslen); 1050fa9e4066Sahrens data = restore_read(ra, P2ROUNDUP(db->db_size, 8)); 1051fa9e4066Sahrens if (data == NULL) { 1052fa9e4066Sahrens dmu_tx_commit(tx); 1053fa9e4066Sahrens return (ra->err); 1054fa9e4066Sahrens } 1055fa9e4066Sahrens bcopy(data, db->db_data, db->db_size); 1056fa9e4066Sahrens if (ra->byteswap) { 1057fa9e4066Sahrens dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 1058fa9e4066Sahrens drro->drr_bonuslen); 1059fa9e4066Sahrens } 1060ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 1061fa9e4066Sahrens } 1062fa9e4066Sahrens dmu_tx_commit(tx); 1063fa9e4066Sahrens return (0); 1064fa9e4066Sahrens } 1065fa9e4066Sahrens 1066fa9e4066Sahrens /* ARGSUSED */ 1067fa9e4066Sahrens static int 1068fa9e4066Sahrens restore_freeobjects(struct restorearg *ra, objset_t *os, 1069fa9e4066Sahrens struct drr_freeobjects *drrfo) 1070fa9e4066Sahrens { 1071fa9e4066Sahrens uint64_t obj; 1072fa9e4066Sahrens 1073fa9e4066Sahrens if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1074fa9e4066Sahrens return (EINVAL); 1075fa9e4066Sahrens 1076fa9e4066Sahrens for (obj = drrfo->drr_firstobj; 1077fa9e4066Sahrens obj < drrfo->drr_firstobj + drrfo->drr_numobjs; obj++) { 1078fa9e4066Sahrens dmu_tx_t *tx; 1079fa9e4066Sahrens int err; 1080fa9e4066Sahrens 1081fa9e4066Sahrens if (dmu_object_info(os, obj, NULL) != 0) 1082fa9e4066Sahrens continue; 1083fa9e4066Sahrens 1084fa9e4066Sahrens tx = dmu_tx_create(os); 1085fa9e4066Sahrens dmu_tx_hold_bonus(tx, obj); 1086fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1087fa9e4066Sahrens if (err) { 1088fa9e4066Sahrens dmu_tx_abort(tx); 1089fa9e4066Sahrens return (err); 1090fa9e4066Sahrens } 1091fa9e4066Sahrens err = dmu_object_free(os, obj, tx); 1092fa9e4066Sahrens dmu_tx_commit(tx); 1093fa9e4066Sahrens if (err && err != ENOENT) 1094fa9e4066Sahrens return (EINVAL); 1095fa9e4066Sahrens } 1096fa9e4066Sahrens return (0); 1097fa9e4066Sahrens } 1098fa9e4066Sahrens 1099fa9e4066Sahrens static int 1100fa9e4066Sahrens restore_write(struct restorearg *ra, objset_t *os, 1101fa9e4066Sahrens struct drr_write *drrw) 1102fa9e4066Sahrens { 1103fa9e4066Sahrens dmu_tx_t *tx; 1104fa9e4066Sahrens void *data; 1105fa9e4066Sahrens int err; 1106fa9e4066Sahrens 1107fa9e4066Sahrens if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1108fa9e4066Sahrens drrw->drr_type >= DMU_OT_NUMTYPES) 1109fa9e4066Sahrens return (EINVAL); 1110fa9e4066Sahrens 1111fa9e4066Sahrens data = restore_read(ra, drrw->drr_length); 1112fa9e4066Sahrens if (data == NULL) 1113fa9e4066Sahrens return (ra->err); 1114fa9e4066Sahrens 1115fa9e4066Sahrens if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1116fa9e4066Sahrens return (EINVAL); 1117fa9e4066Sahrens 1118fa9e4066Sahrens tx = dmu_tx_create(os); 1119fa9e4066Sahrens 1120fa9e4066Sahrens dmu_tx_hold_write(tx, drrw->drr_object, 1121fa9e4066Sahrens drrw->drr_offset, drrw->drr_length); 1122fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1123fa9e4066Sahrens if (err) { 1124fa9e4066Sahrens dmu_tx_abort(tx); 1125fa9e4066Sahrens return (err); 1126fa9e4066Sahrens } 1127fa9e4066Sahrens if (ra->byteswap) 1128fa9e4066Sahrens dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 1129fa9e4066Sahrens dmu_write(os, drrw->drr_object, 1130fa9e4066Sahrens drrw->drr_offset, drrw->drr_length, data, tx); 1131fa9e4066Sahrens dmu_tx_commit(tx); 1132fa9e4066Sahrens return (0); 1133fa9e4066Sahrens } 1134fa9e4066Sahrens 1135fa9e4066Sahrens /* ARGSUSED */ 1136fa9e4066Sahrens static int 1137fa9e4066Sahrens restore_free(struct restorearg *ra, objset_t *os, 1138fa9e4066Sahrens struct drr_free *drrf) 1139fa9e4066Sahrens { 1140fa9e4066Sahrens dmu_tx_t *tx; 1141fa9e4066Sahrens int err; 1142fa9e4066Sahrens 1143fa9e4066Sahrens if (drrf->drr_length != -1ULL && 1144fa9e4066Sahrens drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1145fa9e4066Sahrens return (EINVAL); 1146fa9e4066Sahrens 1147fa9e4066Sahrens if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1148fa9e4066Sahrens return (EINVAL); 1149fa9e4066Sahrens 1150fa9e4066Sahrens tx = dmu_tx_create(os); 1151fa9e4066Sahrens 1152fa9e4066Sahrens dmu_tx_hold_free(tx, drrf->drr_object, 1153fa9e4066Sahrens drrf->drr_offset, drrf->drr_length); 1154fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1155fa9e4066Sahrens if (err) { 1156fa9e4066Sahrens dmu_tx_abort(tx); 1157fa9e4066Sahrens return (err); 1158fa9e4066Sahrens } 1159ea8dc4b6Seschrock err = dmu_free_range(os, drrf->drr_object, 1160fa9e4066Sahrens drrf->drr_offset, drrf->drr_length, tx); 1161fa9e4066Sahrens dmu_tx_commit(tx); 1162ea8dc4b6Seschrock return (err); 1163fa9e4066Sahrens } 1164fa9e4066Sahrens 1165fa9e4066Sahrens int 1166ea8dc4b6Seschrock dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, 1167fa9e4066Sahrens vnode_t *vp, uint64_t voffset) 1168fa9e4066Sahrens { 1169fa9e4066Sahrens struct restorearg ra; 1170fa9e4066Sahrens dmu_replay_record_t *drr; 1171ea8dc4b6Seschrock char *cp; 1172fa9e4066Sahrens dsl_dir_t *dd = NULL; 1173fa9e4066Sahrens objset_t *os = NULL; 1174ea8dc4b6Seschrock zio_cksum_t pzc; 1175fa9e4066Sahrens 1176fa9e4066Sahrens bzero(&ra, sizeof (ra)); 1177fa9e4066Sahrens ra.vp = vp; 1178fa9e4066Sahrens ra.voff = voffset; 1179fa9e4066Sahrens ra.bufsize = 1<<20; 1180fa9e4066Sahrens ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1181fa9e4066Sahrens 1182fa9e4066Sahrens if (drrb->drr_magic == DMU_BACKUP_MAGIC) { 1183fa9e4066Sahrens ra.byteswap = FALSE; 1184fa9e4066Sahrens } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { 1185fa9e4066Sahrens ra.byteswap = TRUE; 1186fa9e4066Sahrens } else { 1187fa9e4066Sahrens ra.err = EINVAL; 1188fa9e4066Sahrens goto out; 1189fa9e4066Sahrens } 1190fa9e4066Sahrens 1191ea8dc4b6Seschrock /* 1192ea8dc4b6Seschrock * NB: this assumes that struct drr_begin will be the largest in 1193ea8dc4b6Seschrock * dmu_replay_record_t's drr_u, and thus we don't need to pad it 1194ea8dc4b6Seschrock * with zeros to make it the same length as we wrote out. 1195ea8dc4b6Seschrock */ 1196ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN; 1197ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_pad = 0; 1198ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb; 1199ea8dc4b6Seschrock if (ra.byteswap) { 1200ea8dc4b6Seschrock fletcher_4_incremental_byteswap(ra.buf, 1201ea8dc4b6Seschrock sizeof (dmu_replay_record_t), &ra.zc); 1202ea8dc4b6Seschrock } else { 1203ea8dc4b6Seschrock fletcher_4_incremental_native(ra.buf, 1204ea8dc4b6Seschrock sizeof (dmu_replay_record_t), &ra.zc); 1205ea8dc4b6Seschrock } 1206ea8dc4b6Seschrock (void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */ 1207ea8dc4b6Seschrock 1208fa9e4066Sahrens if (ra.byteswap) { 1209fa9e4066Sahrens drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1210fa9e4066Sahrens drrb->drr_version = BSWAP_64(drrb->drr_version); 1211fa9e4066Sahrens drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1212fa9e4066Sahrens drrb->drr_type = BSWAP_32(drrb->drr_type); 1213fa9e4066Sahrens drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1214fa9e4066Sahrens drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1215fa9e4066Sahrens } 1216fa9e4066Sahrens 1217fa9e4066Sahrens ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 1218fa9e4066Sahrens 1219fa9e4066Sahrens if (drrb->drr_version != DMU_BACKUP_VERSION || 1220fa9e4066Sahrens drrb->drr_type >= DMU_OST_NUMTYPES || 1221fa9e4066Sahrens strchr(drrb->drr_toname, '@') == NULL) { 1222fa9e4066Sahrens ra.err = EINVAL; 1223fa9e4066Sahrens goto out; 1224fa9e4066Sahrens } 1225fa9e4066Sahrens 1226fa9e4066Sahrens /* 1227fa9e4066Sahrens * Process the begin in syncing context. 1228fa9e4066Sahrens */ 1229fa9e4066Sahrens if (drrb->drr_fromguid) { 1230fa9e4066Sahrens /* incremental backup */ 1231fa9e4066Sahrens 1232fa9e4066Sahrens cp = strchr(tosnap, '@'); 1233fa9e4066Sahrens *cp = '\0'; 1234ea8dc4b6Seschrock ra.err = dsl_dir_open(tosnap, FTAG, &dd, NULL); 1235fa9e4066Sahrens *cp = '@'; 1236ea8dc4b6Seschrock if (ra.err) 1237fa9e4066Sahrens goto out; 1238fa9e4066Sahrens 1239fa9e4066Sahrens ra.err = dsl_dir_sync_task(dd, replay_incremental_sync, 1240fa9e4066Sahrens drrb, 1<<20); 1241fa9e4066Sahrens } else { 1242fa9e4066Sahrens /* full backup */ 1243fa9e4066Sahrens const char *tail; 1244fa9e4066Sahrens 1245fa9e4066Sahrens cp = strchr(tosnap, '@'); 1246fa9e4066Sahrens *cp = '\0'; 1247ea8dc4b6Seschrock ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail); 1248fa9e4066Sahrens *cp = '@'; 1249ea8dc4b6Seschrock if (ra.err) 1250fa9e4066Sahrens goto out; 1251fa9e4066Sahrens if (tail == NULL) { 1252fa9e4066Sahrens ra.err = EEXIST; 1253fa9e4066Sahrens goto out; 1254fa9e4066Sahrens } 1255fa9e4066Sahrens 1256fa9e4066Sahrens ra.err = dsl_dir_sync_task(dd, replay_full_sync, 1257fa9e4066Sahrens drrb, 1<<20); 1258fa9e4066Sahrens } 1259fa9e4066Sahrens if (ra.err) 1260fa9e4066Sahrens goto out; 1261fa9e4066Sahrens 1262fa9e4066Sahrens /* 1263fa9e4066Sahrens * Open the objset we are modifying. 1264fa9e4066Sahrens */ 1265fa9e4066Sahrens 1266fa9e4066Sahrens cp = strchr(tosnap, '@'); 1267fa9e4066Sahrens *cp = '\0'; 1268fa9e4066Sahrens ra.err = dmu_objset_open(tosnap, DMU_OST_ANY, 1269e1930233Sbonwick DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); 1270fa9e4066Sahrens *cp = '@'; 1271fa9e4066Sahrens ASSERT3U(ra.err, ==, 0); 1272fa9e4066Sahrens 1273fa9e4066Sahrens /* 1274fa9e4066Sahrens * Read records and process them. 1275fa9e4066Sahrens */ 1276ea8dc4b6Seschrock pzc = ra.zc; 1277fa9e4066Sahrens while (ra.err == 0 && 1278fa9e4066Sahrens NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1279ea8dc4b6Seschrock if (issig(JUSTLOOKING) && issig(FORREAL)) { 1280fa9e4066Sahrens ra.err = EINTR; 1281fa9e4066Sahrens goto out; 1282fa9e4066Sahrens } 1283fa9e4066Sahrens 1284fa9e4066Sahrens if (ra.byteswap) 1285fa9e4066Sahrens backup_byteswap(drr); 1286fa9e4066Sahrens 1287fa9e4066Sahrens switch (drr->drr_type) { 1288fa9e4066Sahrens case DRR_OBJECT: 1289fa9e4066Sahrens { 1290fa9e4066Sahrens /* 1291fa9e4066Sahrens * We need to make a copy of the record header, 1292fa9e4066Sahrens * because restore_{object,write} may need to 1293fa9e4066Sahrens * restore_read(), which will invalidate drr. 1294fa9e4066Sahrens */ 1295fa9e4066Sahrens struct drr_object drro = drr->drr_u.drr_object; 1296fa9e4066Sahrens ra.err = restore_object(&ra, os, &drro); 1297fa9e4066Sahrens break; 1298fa9e4066Sahrens } 1299fa9e4066Sahrens case DRR_FREEOBJECTS: 1300fa9e4066Sahrens { 1301fa9e4066Sahrens struct drr_freeobjects drrfo = 1302fa9e4066Sahrens drr->drr_u.drr_freeobjects; 1303fa9e4066Sahrens ra.err = restore_freeobjects(&ra, os, &drrfo); 1304fa9e4066Sahrens break; 1305fa9e4066Sahrens } 1306fa9e4066Sahrens case DRR_WRITE: 1307fa9e4066Sahrens { 1308fa9e4066Sahrens struct drr_write drrw = drr->drr_u.drr_write; 1309fa9e4066Sahrens ra.err = restore_write(&ra, os, &drrw); 1310fa9e4066Sahrens break; 1311fa9e4066Sahrens } 1312fa9e4066Sahrens case DRR_FREE: 1313fa9e4066Sahrens { 1314fa9e4066Sahrens struct drr_free drrf = drr->drr_u.drr_free; 1315fa9e4066Sahrens ra.err = restore_free(&ra, os, &drrf); 1316fa9e4066Sahrens break; 1317fa9e4066Sahrens } 1318fa9e4066Sahrens case DRR_END: 1319ea8dc4b6Seschrock { 1320ea8dc4b6Seschrock struct drr_end drre = drr->drr_u.drr_end; 1321ea8dc4b6Seschrock /* 1322ea8dc4b6Seschrock * We compare against the *previous* checksum 1323ea8dc4b6Seschrock * value, because the stored checksum is of 1324ea8dc4b6Seschrock * everything before the DRR_END record. 1325ea8dc4b6Seschrock */ 1326ea8dc4b6Seschrock if (drre.drr_checksum.zc_word[0] != 0 && 1327ea8dc4b6Seschrock ((drre.drr_checksum.zc_word[0] - pzc.zc_word[0]) | 1328ea8dc4b6Seschrock (drre.drr_checksum.zc_word[1] - pzc.zc_word[1]) | 1329ea8dc4b6Seschrock (drre.drr_checksum.zc_word[2] - pzc.zc_word[2]) | 1330ea8dc4b6Seschrock (drre.drr_checksum.zc_word[3] - pzc.zc_word[3]))) { 1331ea8dc4b6Seschrock ra.err = ECKSUM; 1332ea8dc4b6Seschrock goto out; 1333ea8dc4b6Seschrock } 1334ea8dc4b6Seschrock 1335fa9e4066Sahrens /* 1336fa9e4066Sahrens * dd may be the parent of the dd we are 1337fa9e4066Sahrens * restoring into (eg. if it's a full backup). 1338fa9e4066Sahrens */ 1339fa9e4066Sahrens ra.err = dsl_dir_sync_task(dmu_objset_ds(os)-> 1340fa9e4066Sahrens ds_dir, replay_end_sync, drrb, 1<<20); 1341fa9e4066Sahrens goto out; 1342ea8dc4b6Seschrock } 1343fa9e4066Sahrens default: 1344fa9e4066Sahrens ra.err = EINVAL; 1345fa9e4066Sahrens goto out; 1346fa9e4066Sahrens } 1347ea8dc4b6Seschrock pzc = ra.zc; 1348fa9e4066Sahrens } 1349fa9e4066Sahrens 1350fa9e4066Sahrens out: 1351fa9e4066Sahrens if (os) 1352fa9e4066Sahrens dmu_objset_close(os); 1353fa9e4066Sahrens 1354fa9e4066Sahrens /* 1355fa9e4066Sahrens * Make sure we don't rollback/destroy unless we actually 1356fa9e4066Sahrens * processed the begin properly. 'os' will only be set if this 1357fa9e4066Sahrens * is the case. 1358fa9e4066Sahrens */ 1359fa9e4066Sahrens if (ra.err && os && dd && tosnap && strchr(tosnap, '@')) { 1360fa9e4066Sahrens /* 1361fa9e4066Sahrens * rollback or destroy what we created, so we don't 1362fa9e4066Sahrens * leave it in the restoring state. 1363fa9e4066Sahrens */ 1364fa9e4066Sahrens txg_wait_synced(dd->dd_pool, 0); 1365fa9e4066Sahrens if (drrb->drr_fromguid) { 1366fa9e4066Sahrens /* incremental: rollback to most recent snapshot */ 1367fa9e4066Sahrens (void) dsl_dir_sync_task(dd, 1368fa9e4066Sahrens dsl_dataset_rollback_sync, NULL, 0); 1369fa9e4066Sahrens } else { 1370fa9e4066Sahrens /* full: destroy whole fs */ 1371fa9e4066Sahrens cp = strchr(tosnap, '@'); 1372fa9e4066Sahrens *cp = '\0'; 1373fa9e4066Sahrens cp = strchr(tosnap, '/'); 1374fa9e4066Sahrens if (cp) { 1375fa9e4066Sahrens (void) dsl_dir_sync_task(dd, 1376fa9e4066Sahrens dsl_dir_destroy_sync, cp+1, 0); 1377fa9e4066Sahrens } 1378fa9e4066Sahrens cp = strchr(tosnap, '\0'); 1379fa9e4066Sahrens *cp = '@'; 1380fa9e4066Sahrens } 1381fa9e4066Sahrens 1382fa9e4066Sahrens } 1383fa9e4066Sahrens 1384fa9e4066Sahrens if (dd) 1385fa9e4066Sahrens dsl_dir_close(dd, FTAG); 1386fa9e4066Sahrens kmem_free(ra.buf, ra.bufsize); 1387fa9e4066Sahrens if (sizep) 1388fa9e4066Sahrens *sizep = ra.voff; 1389fa9e4066Sahrens return (ra.err); 1390fa9e4066Sahrens } 1391fa9e4066Sahrens 1392fa9e4066Sahrens /* 1393fa9e4066Sahrens * Intent log support: sync the block at <os, object, offset> to disk. 1394fa9e4066Sahrens * N.B. and XXX: the caller is responsible for serializing dmu_sync()s 1395fa9e4066Sahrens * of the same block, and for making sure that the data isn't changing 1396fa9e4066Sahrens * while dmu_sync() is writing it. 1397fa9e4066Sahrens * 1398fa9e4066Sahrens * Return values: 1399fa9e4066Sahrens * 1400fa9e4066Sahrens * EALREADY: this txg has already been synced, so there's nothing to to. 1401fa9e4066Sahrens * The caller should not log the write. 1402fa9e4066Sahrens * 1403fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 1404fa9e4066Sahrens * The caller should not log the write. 1405fa9e4066Sahrens * 1406fa9e4066Sahrens * EINPROGRESS: the block is in the process of being synced by the 1407fa9e4066Sahrens * usual mechanism (spa_sync()), so we can't sync it here. 1408fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1409fa9e4066Sahrens * 1410fa9e4066Sahrens * EBUSY: another thread is trying to dmu_sync() the same dbuf. 1411fa9e4066Sahrens * (This case cannot arise under the current locking rules.) 1412fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1413fa9e4066Sahrens * 1414fa9e4066Sahrens * ESTALE: the block was dirtied or freed while we were writing it, 1415fa9e4066Sahrens * so the data is no longer valid. 1416fa9e4066Sahrens * The caller should txg_wait_synced() and not log the write. 1417fa9e4066Sahrens * 1418fa9e4066Sahrens * 0: success. Sets *bp to the blkptr just written, and sets 1419fa9e4066Sahrens * *blkoff to the data's offset within that block. 1420fa9e4066Sahrens * The caller should log this blkptr/blkoff in its lr_write_t. 1421fa9e4066Sahrens */ 1422fa9e4066Sahrens int 1423fa9e4066Sahrens dmu_sync(objset_t *os, uint64_t object, uint64_t offset, uint64_t *blkoff, 1424fa9e4066Sahrens blkptr_t *bp, uint64_t txg) 1425fa9e4066Sahrens { 142644cd46caSbillm objset_impl_t *osi = os->os; 142744cd46caSbillm dsl_pool_t *dp = osi->os_dsl_dataset->ds_dir->dd_pool; 1428fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 1429fa9e4066Sahrens dmu_buf_impl_t *db; 1430fa9e4066Sahrens blkptr_t *blk; 1431fa9e4066Sahrens int err; 1432ea8dc4b6Seschrock zbookmark_t zb; 1433fa9e4066Sahrens 1434fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&tx->tx_suspend)); 1435fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 1436fa9e4066Sahrens ASSERT(txg != 0); 1437fa9e4066Sahrens 1438fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 1439fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 1440fa9e4066Sahrens 1441fa9e4066Sahrens /* 1442ea8dc4b6Seschrock * XXX why is this routine using dmu_buf_*() and casting between 1443ea8dc4b6Seschrock * dmu_buf_impl_t and dmu_buf_t? 1444ea8dc4b6Seschrock */ 1445ea8dc4b6Seschrock 1446ea8dc4b6Seschrock /* 1447fa9e4066Sahrens * If this txg already synced, there's nothing to do. 1448fa9e4066Sahrens */ 1449fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 1450fa9e4066Sahrens /* 1451fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 1452fa9e4066Sahrens */ 1453fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 1454ea8dc4b6Seschrock err = dmu_buf_hold(os, object, offset, 1455ea8dc4b6Seschrock FTAG, (dmu_buf_t **)&db); 1456ea8dc4b6Seschrock if (err) 1457ea8dc4b6Seschrock return (err); 1458fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 1459fa9e4066Sahrens if (db->db_blkptr) 1460fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 1461fa9e4066Sahrens else 1462fa9e4066Sahrens bzero(bp, sizeof (blkptr_t)); 1463fa9e4066Sahrens *blkoff = offset - db->db.db_offset; 1464fa9e4066Sahrens ASSERT3U(*blkoff, <, db->db.db_size); 1465ea8dc4b6Seschrock dmu_buf_rele((dmu_buf_t *)db, FTAG); 1466fa9e4066Sahrens return (0); 1467fa9e4066Sahrens } 1468fa9e4066Sahrens return (EALREADY); 1469fa9e4066Sahrens } 1470fa9e4066Sahrens 1471fa9e4066Sahrens /* 1472fa9e4066Sahrens * If this txg is in the middle of syncing, just wait for it. 1473fa9e4066Sahrens */ 1474fa9e4066Sahrens if (txg == tx->tx_syncing_txg) { 1475fa9e4066Sahrens ASSERT(txg != tx->tx_open_txg); 1476fa9e4066Sahrens return (EINPROGRESS); 1477fa9e4066Sahrens } 1478fa9e4066Sahrens 1479ea8dc4b6Seschrock err = dmu_buf_hold(os, object, offset, FTAG, (dmu_buf_t **)&db); 1480ea8dc4b6Seschrock if (err) 1481ea8dc4b6Seschrock return (err); 1482fa9e4066Sahrens 1483fa9e4066Sahrens mutex_enter(&db->db_mtx); 1484fa9e4066Sahrens 1485fa9e4066Sahrens /* 1486fa9e4066Sahrens * If this dbuf isn't dirty, must have been free_range'd. 1487fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 1488fa9e4066Sahrens */ 1489fa9e4066Sahrens if (!list_link_active(&db->db_dirty_node[txg&TXG_MASK])) { 1490fa9e4066Sahrens mutex_exit(&db->db_mtx); 1491ea8dc4b6Seschrock dmu_buf_rele((dmu_buf_t *)db, FTAG); 1492fa9e4066Sahrens return (ENOENT); 1493fa9e4066Sahrens } 1494fa9e4066Sahrens 1495fa9e4066Sahrens blk = db->db_d.db_overridden_by[txg&TXG_MASK]; 1496fa9e4066Sahrens 1497fa9e4066Sahrens /* 1498fa9e4066Sahrens * If we already did a dmu_sync() of this dbuf in this txg, 1499fa9e4066Sahrens * free the old block before writing the new one. 1500fa9e4066Sahrens */ 1501fa9e4066Sahrens if (blk != NULL) { 1502fa9e4066Sahrens ASSERT(blk != IN_DMU_SYNC); 1503fa9e4066Sahrens if (blk == IN_DMU_SYNC) { 1504fa9e4066Sahrens mutex_exit(&db->db_mtx); 1505ea8dc4b6Seschrock dmu_buf_rele((dmu_buf_t *)db, FTAG); 1506fa9e4066Sahrens return (EBUSY); 1507fa9e4066Sahrens } 1508fa9e4066Sahrens arc_release(db->db_d.db_data_old[txg&TXG_MASK], db); 1509fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 151044cd46caSbillm (void) arc_free(NULL, osi->os_spa, txg, blk, 1511fa9e4066Sahrens NULL, NULL, ARC_WAIT); 1512fa9e4066Sahrens } 1513fa9e4066Sahrens kmem_free(blk, sizeof (blkptr_t)); 1514fa9e4066Sahrens } 1515fa9e4066Sahrens 1516fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; 1517fa9e4066Sahrens mutex_exit(&db->db_mtx); 1518fa9e4066Sahrens 1519fa9e4066Sahrens blk = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); 1520fa9e4066Sahrens blk->blk_birth = 0; /* mark as invalid */ 1521fa9e4066Sahrens 152244cd46caSbillm zb.zb_objset = osi->os_dsl_dataset->ds_object; 1523ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 1524ea8dc4b6Seschrock zb.zb_level = db->db_level; 1525ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 152644cd46caSbillm err = arc_write(NULL, osi->os_spa, 152744cd46caSbillm zio_checksum_select(db->db_dnode->dn_checksum, osi->os_checksum), 152844cd46caSbillm zio_compress_select(db->db_dnode->dn_compress, osi->os_compress), 152944cd46caSbillm dmu_get_replication_level(osi->os_spa, &zb, db->db_dnode->dn_type), 1530fa9e4066Sahrens txg, blk, db->db_d.db_data_old[txg&TXG_MASK], NULL, NULL, 1531ea8dc4b6Seschrock ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, ARC_WAIT, &zb); 1532fa9e4066Sahrens ASSERT(err == 0); 1533fa9e4066Sahrens 1534fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 1535fa9e4066Sahrens blk->blk_fill = 1; 1536fa9e4066Sahrens BP_SET_TYPE(blk, db->db_dnode->dn_type); 1537fa9e4066Sahrens BP_SET_LEVEL(blk, 0); 1538fa9e4066Sahrens } 1539fa9e4066Sahrens 1540fa9e4066Sahrens /* copy the block pointer back to caller */ 1541fa9e4066Sahrens *bp = *blk; /* structure assignment */ 1542fa9e4066Sahrens *blkoff = offset - db->db.db_offset; 1543fa9e4066Sahrens ASSERT3U(*blkoff, <, db->db.db_size); 1544fa9e4066Sahrens 1545fa9e4066Sahrens mutex_enter(&db->db_mtx); 1546fa9e4066Sahrens if (db->db_d.db_overridden_by[txg&TXG_MASK] != IN_DMU_SYNC) { 1547fa9e4066Sahrens /* we were dirtied/freed during the sync */ 1548fa9e4066Sahrens ASSERT3P(db->db_d.db_overridden_by[txg&TXG_MASK], ==, NULL); 1549fa9e4066Sahrens arc_release(db->db_d.db_data_old[txg&TXG_MASK], db); 1550fa9e4066Sahrens mutex_exit(&db->db_mtx); 1551ea8dc4b6Seschrock dmu_buf_rele((dmu_buf_t *)db, FTAG); 1552fa9e4066Sahrens /* Note that this block does not free on disk until txg syncs */ 1553fa9e4066Sahrens 1554fa9e4066Sahrens /* 1555fa9e4066Sahrens * XXX can we use ARC_NOWAIT here? 1556fa9e4066Sahrens * XXX should we be ignoring the return code? 1557fa9e4066Sahrens */ 1558fa9e4066Sahrens if (!BP_IS_HOLE(blk)) { 155944cd46caSbillm (void) arc_free(NULL, osi->os_spa, txg, blk, 1560fa9e4066Sahrens NULL, NULL, ARC_WAIT); 1561fa9e4066Sahrens } 1562fa9e4066Sahrens kmem_free(blk, sizeof (blkptr_t)); 1563fa9e4066Sahrens return (ESTALE); 1564fa9e4066Sahrens } 1565fa9e4066Sahrens 1566fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = blk; 1567fa9e4066Sahrens mutex_exit(&db->db_mtx); 1568ea8dc4b6Seschrock dmu_buf_rele((dmu_buf_t *)db, FTAG); 1569fa9e4066Sahrens ASSERT3U(txg, >, tx->tx_syncing_txg); 1570fa9e4066Sahrens return (0); 1571fa9e4066Sahrens } 1572fa9e4066Sahrens 1573fa9e4066Sahrens uint64_t 1574fa9e4066Sahrens dmu_object_max_nonzero_offset(objset_t *os, uint64_t object) 1575fa9e4066Sahrens { 1576ea8dc4b6Seschrock dnode_t *dn; 1577ea8dc4b6Seschrock 1578ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1579ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1580fa9e4066Sahrens uint64_t rv = dnode_max_nonzero_offset(dn); 1581fa9e4066Sahrens dnode_rele(dn, FTAG); 1582fa9e4066Sahrens return (rv); 1583fa9e4066Sahrens } 1584fa9e4066Sahrens 1585fa9e4066Sahrens int 1586fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 1587fa9e4066Sahrens dmu_tx_t *tx) 1588fa9e4066Sahrens { 1589ea8dc4b6Seschrock dnode_t *dn; 1590ea8dc4b6Seschrock int err; 1591ea8dc4b6Seschrock 1592ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1593ea8dc4b6Seschrock if (err) 1594ea8dc4b6Seschrock return (err); 1595ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 1596fa9e4066Sahrens dnode_rele(dn, FTAG); 1597fa9e4066Sahrens return (err); 1598fa9e4066Sahrens } 1599fa9e4066Sahrens 1600fa9e4066Sahrens void 1601fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 1602fa9e4066Sahrens dmu_tx_t *tx) 1603fa9e4066Sahrens { 1604ea8dc4b6Seschrock dnode_t *dn; 1605ea8dc4b6Seschrock 1606ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1607ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1608fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 1609fa9e4066Sahrens dn->dn_checksum = checksum; 1610fa9e4066Sahrens dnode_setdirty(dn, tx); 1611fa9e4066Sahrens dnode_rele(dn, FTAG); 1612fa9e4066Sahrens } 1613fa9e4066Sahrens 1614fa9e4066Sahrens void 1615fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 1616fa9e4066Sahrens dmu_tx_t *tx) 1617fa9e4066Sahrens { 1618ea8dc4b6Seschrock dnode_t *dn; 1619ea8dc4b6Seschrock 1620ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1621ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1622fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 1623fa9e4066Sahrens dn->dn_compress = compress; 1624fa9e4066Sahrens dnode_setdirty(dn, tx); 1625fa9e4066Sahrens dnode_rele(dn, FTAG); 1626fa9e4066Sahrens } 1627fa9e4066Sahrens 162844cd46caSbillm /* 162944cd46caSbillm * XXX - eventually, this should take into account per-dataset (or 163044cd46caSbillm * even per-object?) user requests for higher levels of replication. 163144cd46caSbillm */ 163244cd46caSbillm int 163344cd46caSbillm dmu_get_replication_level(spa_t *spa, zbookmark_t *zb, dmu_object_type_t ot) 163444cd46caSbillm { 163544cd46caSbillm int ncopies = 1; 163644cd46caSbillm 163744cd46caSbillm if (dmu_ot[ot].ot_metadata) 163844cd46caSbillm ncopies++; 163944cd46caSbillm if (zb->zb_level != 0) 164044cd46caSbillm ncopies++; 164144cd46caSbillm if (zb->zb_objset == 0 && zb->zb_object == 0) 164244cd46caSbillm ncopies++; 164344cd46caSbillm return (MIN(ncopies, spa_max_replication(spa))); 164444cd46caSbillm } 164544cd46caSbillm 1646fa9e4066Sahrens int 1647fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 1648fa9e4066Sahrens { 1649fa9e4066Sahrens dnode_t *dn; 1650fa9e4066Sahrens int i, err; 1651fa9e4066Sahrens 1652ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1653ea8dc4b6Seschrock if (err) 1654ea8dc4b6Seschrock return (err); 1655fa9e4066Sahrens /* 1656fa9e4066Sahrens * Sync any current changes before 1657fa9e4066Sahrens * we go trundling through the block pointers. 1658fa9e4066Sahrens */ 1659fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 1660c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 1661fa9e4066Sahrens break; 1662fa9e4066Sahrens } 1663fa9e4066Sahrens if (i != TXG_SIZE) { 1664fa9e4066Sahrens dnode_rele(dn, FTAG); 1665fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 1666ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1667ea8dc4b6Seschrock if (err) 1668ea8dc4b6Seschrock return (err); 1669fa9e4066Sahrens } 1670fa9e4066Sahrens 1671fa9e4066Sahrens err = dnode_next_offset(dn, hole, off, 1, 1); 1672fa9e4066Sahrens dnode_rele(dn, FTAG); 1673fa9e4066Sahrens 1674fa9e4066Sahrens return (err); 1675fa9e4066Sahrens } 1676fa9e4066Sahrens 1677fa9e4066Sahrens void 1678fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 1679fa9e4066Sahrens { 1680fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1681fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1682fa9e4066Sahrens 1683fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 1684fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 1685fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 1686fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 1687fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 1688fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 1689*99653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 1690*99653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 1691fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 1692fa9e4066Sahrens doi->doi_type = dn->dn_type; 1693fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 1694fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 1695fa9e4066Sahrens 1696fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1697fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1698fa9e4066Sahrens } 1699fa9e4066Sahrens 1700fa9e4066Sahrens /* 1701fa9e4066Sahrens * Get information on a DMU object. 1702fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 1703fa9e4066Sahrens */ 1704fa9e4066Sahrens int 1705fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 1706fa9e4066Sahrens { 1707ea8dc4b6Seschrock dnode_t *dn; 1708ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 1709fa9e4066Sahrens 1710ea8dc4b6Seschrock if (err) 1711ea8dc4b6Seschrock return (err); 1712fa9e4066Sahrens 1713fa9e4066Sahrens if (doi != NULL) 1714fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 1715fa9e4066Sahrens 1716fa9e4066Sahrens dnode_rele(dn, FTAG); 1717fa9e4066Sahrens return (0); 1718fa9e4066Sahrens } 1719fa9e4066Sahrens 1720fa9e4066Sahrens /* 1721fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 1722fa9e4066Sahrens */ 1723fa9e4066Sahrens void 1724fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 1725fa9e4066Sahrens { 1726fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 1727fa9e4066Sahrens } 1728fa9e4066Sahrens 1729fa9e4066Sahrens /* 1730fa9e4066Sahrens * Faster still when you only care about the size. 1731fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 1732fa9e4066Sahrens */ 1733fa9e4066Sahrens void 1734fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 1735fa9e4066Sahrens { 1736fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 1737fa9e4066Sahrens 1738fa9e4066Sahrens *blksize = dn->dn_datablksz; 1739*99653d4eSeschrock /* add 1 for dnode space */ 1740*99653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 1741*99653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 1742fa9e4066Sahrens } 1743fa9e4066Sahrens 1744ea8dc4b6Seschrock /* 1745ea8dc4b6Seschrock * Given a bookmark, return the name of the dataset, object, and range in 1746ea8dc4b6Seschrock * human-readable format. 1747ea8dc4b6Seschrock */ 1748ea8dc4b6Seschrock int 1749ea8dc4b6Seschrock spa_bookmark_name(spa_t *spa, zbookmark_t *zb, char *dsname, size_t dslen, 1750ea8dc4b6Seschrock char *objname, size_t objlen, char *range, size_t rangelen) 1751ea8dc4b6Seschrock { 1752ea8dc4b6Seschrock dsl_pool_t *dp; 1753ea8dc4b6Seschrock dsl_dataset_t *ds = NULL; 1754ea8dc4b6Seschrock objset_t *os = NULL; 1755ea8dc4b6Seschrock dnode_t *dn = NULL; 1756ea8dc4b6Seschrock int err, shift; 1757ea8dc4b6Seschrock 1758ea8dc4b6Seschrock if (dslen < MAXNAMELEN || objlen < 32 || rangelen < 64) 1759ea8dc4b6Seschrock return (ENOSPC); 1760ea8dc4b6Seschrock 1761ea8dc4b6Seschrock dp = spa_get_dsl(spa); 1762ea8dc4b6Seschrock if (zb->zb_objset != 0) { 1763ea8dc4b6Seschrock rw_enter(&dp->dp_config_rwlock, RW_READER); 1764ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, zb->zb_objset, 1765ea8dc4b6Seschrock NULL, DS_MODE_NONE, FTAG, &ds); 1766ea8dc4b6Seschrock if (err) { 1767ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 1768ea8dc4b6Seschrock return (err); 1769ea8dc4b6Seschrock } 1770ea8dc4b6Seschrock dsl_dataset_name(ds, dsname); 1771ea8dc4b6Seschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1772ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 1773ea8dc4b6Seschrock 1774ea8dc4b6Seschrock err = dmu_objset_open(dsname, DMU_OST_ANY, DS_MODE_NONE, &os); 1775ea8dc4b6Seschrock if (err) 1776ea8dc4b6Seschrock goto out; 1777ea8dc4b6Seschrock 1778ea8dc4b6Seschrock } else { 1779ea8dc4b6Seschrock dsl_dataset_name(NULL, dsname); 1780ea8dc4b6Seschrock os = dp->dp_meta_objset; 1781ea8dc4b6Seschrock } 1782ea8dc4b6Seschrock 1783ea8dc4b6Seschrock 1784ea8dc4b6Seschrock if (zb->zb_object == DMU_META_DNODE_OBJECT) { 1785ea8dc4b6Seschrock (void) strncpy(objname, "mdn", objlen); 1786ea8dc4b6Seschrock } else { 1787ea8dc4b6Seschrock (void) snprintf(objname, objlen, "%lld", 1788ea8dc4b6Seschrock (longlong_t)zb->zb_object); 1789ea8dc4b6Seschrock } 1790ea8dc4b6Seschrock 1791ea8dc4b6Seschrock err = dnode_hold(os->os, zb->zb_object, FTAG, &dn); 1792ea8dc4b6Seschrock if (err) 1793ea8dc4b6Seschrock goto out; 1794ea8dc4b6Seschrock 1795ea8dc4b6Seschrock shift = (dn->dn_datablkshift?dn->dn_datablkshift:SPA_MAXBLOCKSHIFT) + 1796ea8dc4b6Seschrock zb->zb_level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT); 1797ea8dc4b6Seschrock (void) snprintf(range, rangelen, "%llu-%llu", 1798ea8dc4b6Seschrock (u_longlong_t)(zb->zb_blkid << shift), 1799ea8dc4b6Seschrock (u_longlong_t)((zb->zb_blkid+1) << shift)); 1800ea8dc4b6Seschrock 1801ea8dc4b6Seschrock out: 1802ea8dc4b6Seschrock if (dn) 1803ea8dc4b6Seschrock dnode_rele(dn, FTAG); 1804ea8dc4b6Seschrock if (os && os != dp->dp_meta_objset) 1805ea8dc4b6Seschrock dmu_objset_close(os); 1806ea8dc4b6Seschrock return (err); 1807ea8dc4b6Seschrock } 1808ea8dc4b6Seschrock 1809fa9e4066Sahrens void 1810fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 1811fa9e4066Sahrens { 1812fa9e4066Sahrens uint64_t *buf = vbuf; 1813fa9e4066Sahrens size_t count = size >> 3; 1814fa9e4066Sahrens int i; 1815fa9e4066Sahrens 1816fa9e4066Sahrens ASSERT((size & 7) == 0); 1817fa9e4066Sahrens 1818fa9e4066Sahrens for (i = 0; i < count; i++) 1819fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1820fa9e4066Sahrens } 1821fa9e4066Sahrens 1822fa9e4066Sahrens void 1823fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1824fa9e4066Sahrens { 1825fa9e4066Sahrens uint32_t *buf = vbuf; 1826fa9e4066Sahrens size_t count = size >> 2; 1827fa9e4066Sahrens int i; 1828fa9e4066Sahrens 1829fa9e4066Sahrens ASSERT((size & 3) == 0); 1830fa9e4066Sahrens 1831fa9e4066Sahrens for (i = 0; i < count; i++) 1832fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1833fa9e4066Sahrens } 1834fa9e4066Sahrens 1835fa9e4066Sahrens void 1836fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1837fa9e4066Sahrens { 1838fa9e4066Sahrens uint16_t *buf = vbuf; 1839fa9e4066Sahrens size_t count = size >> 1; 1840fa9e4066Sahrens int i; 1841fa9e4066Sahrens 1842fa9e4066Sahrens ASSERT((size & 1) == 0); 1843fa9e4066Sahrens 1844fa9e4066Sahrens for (i = 0; i < count; i++) 1845fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1846fa9e4066Sahrens } 1847fa9e4066Sahrens 1848fa9e4066Sahrens /* ARGSUSED */ 1849fa9e4066Sahrens void 1850fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1851fa9e4066Sahrens { 1852fa9e4066Sahrens } 1853fa9e4066Sahrens 1854fa9e4066Sahrens void 1855fa9e4066Sahrens dmu_init(void) 1856fa9e4066Sahrens { 1857fa9e4066Sahrens dbuf_init(); 1858fa9e4066Sahrens dnode_init(); 1859fa9e4066Sahrens arc_init(); 1860fa9e4066Sahrens } 1861fa9e4066Sahrens 1862fa9e4066Sahrens void 1863fa9e4066Sahrens dmu_fini(void) 1864fa9e4066Sahrens { 1865fa9e4066Sahrens arc_fini(); 1866fa9e4066Sahrens dnode_fini(); 1867fa9e4066Sahrens dbuf_fini(); 1868fa9e4066Sahrens } 1869