1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dbuf.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/zfs_context.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dmu_traverse.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dsl_pool.h> 391d452cf5Sahrens #include <sys/dsl_synctask.h> 40fa9e4066Sahrens #include <sys/dmu_zfetch.h> 41fa9e4066Sahrens #include <sys/zfs_ioctl.h> 42fa9e4066Sahrens #include <sys/zap.h> 43ea8dc4b6Seschrock #include <sys/zio_checksum.h> 44fa9e4066Sahrens 45fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 46fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 47fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 48fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 49fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 50fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 51fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 52fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 53fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 55fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 56fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 57fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 58fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 59fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 60fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 61fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 62fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 63fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 64fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 65fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 66fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 67fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 68fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 69fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 70fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 71fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 72fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 73fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 74ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 75fa9e4066Sahrens }; 76fa9e4066Sahrens 77fa9e4066Sahrens int 78ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 79ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 80fa9e4066Sahrens { 81fa9e4066Sahrens dnode_t *dn; 82fa9e4066Sahrens uint64_t blkid; 83fa9e4066Sahrens dmu_buf_impl_t *db; 84ea8dc4b6Seschrock int err; 85fa9e4066Sahrens 86ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 87ea8dc4b6Seschrock if (err) 88ea8dc4b6Seschrock return (err); 89fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 90fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 91ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 92fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 93ea8dc4b6Seschrock if (db == NULL) { 94ea8dc4b6Seschrock err = EIO; 95ea8dc4b6Seschrock } else { 96ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 97ea8dc4b6Seschrock if (err) { 98ea8dc4b6Seschrock dbuf_rele(db, tag); 99ea8dc4b6Seschrock db = NULL; 100ea8dc4b6Seschrock } 101fa9e4066Sahrens } 102fa9e4066Sahrens 103ea8dc4b6Seschrock dnode_rele(dn, FTAG); 104ea8dc4b6Seschrock *dbp = &db->db; 105ea8dc4b6Seschrock return (err); 106fa9e4066Sahrens } 107fa9e4066Sahrens 108fa9e4066Sahrens int 109fa9e4066Sahrens dmu_bonus_max(void) 110fa9e4066Sahrens { 111fa9e4066Sahrens return (DN_MAX_BONUSLEN); 112fa9e4066Sahrens } 113fa9e4066Sahrens 114fa9e4066Sahrens /* 115ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 116fa9e4066Sahrens */ 117ea8dc4b6Seschrock int 118ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 119fa9e4066Sahrens { 120ea8dc4b6Seschrock dnode_t *dn; 121ea8dc4b6Seschrock int err, count; 122fa9e4066Sahrens dmu_buf_impl_t *db; 123fa9e4066Sahrens 124ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 125ea8dc4b6Seschrock if (err) 126ea8dc4b6Seschrock return (err); 127fa9e4066Sahrens 128fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 129ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 130fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 131ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 132ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 133ea8dc4b6Seschrock dn->dn_bonus = dbuf_create_bonus(dn); 134fa9e4066Sahrens } 135ea8dc4b6Seschrock db = dn->dn_bonus; 136ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 137ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 138ea8dc4b6Seschrock count = refcount_add(&db->db_holds, tag); 139ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 140ea8dc4b6Seschrock if (count == 1) 141ea8dc4b6Seschrock dnode_add_ref(dn, db); 142fa9e4066Sahrens dnode_rele(dn, FTAG); 143ea8dc4b6Seschrock 144ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 145ea8dc4b6Seschrock 146ea8dc4b6Seschrock *dbp = &db->db; 147ea8dc4b6Seschrock return (0); 148fa9e4066Sahrens } 149fa9e4066Sahrens 15013506d1eSmaybee /* 15113506d1eSmaybee * Note: longer-term, we should modify all of the dmu_buf_*() interfaces 15213506d1eSmaybee * to take a held dnode rather than <os, object> -- the lookup is wasteful, 15313506d1eSmaybee * and can induce severe lock contention when writing to several files 15413506d1eSmaybee * whose dnodes are in the same block. 15513506d1eSmaybee */ 15613506d1eSmaybee static int 15713506d1eSmaybee dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, 158ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 159fa9e4066Sahrens { 160fa9e4066Sahrens dmu_buf_t **dbp; 161fa9e4066Sahrens uint64_t blkid, nblks, i; 162ea8dc4b6Seschrock uint32_t flags; 163ea8dc4b6Seschrock int err; 164ea8dc4b6Seschrock zio_t *zio; 165ea8dc4b6Seschrock 166ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 167fa9e4066Sahrens 168ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 169e1930233Sbonwick if (length > zfetch_array_rd_sz) 170ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 171ea8dc4b6Seschrock 172fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 173fa9e4066Sahrens if (dn->dn_datablkshift) { 174fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 175fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 176fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 177fa9e4066Sahrens } else { 178fa9e4066Sahrens ASSERT3U(offset + length, <=, dn->dn_datablksz); 179fa9e4066Sahrens nblks = 1; 180fa9e4066Sahrens } 181ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 182fa9e4066Sahrens 183ea8dc4b6Seschrock zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); 184fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 185fa9e4066Sahrens for (i = 0; i < nblks; i++) { 186ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 187ea8dc4b6Seschrock if (db == NULL) { 188ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 189ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 190ea8dc4b6Seschrock zio_nowait(zio); 191ea8dc4b6Seschrock return (EIO); 192ea8dc4b6Seschrock } 193ea8dc4b6Seschrock /* initiate async i/o */ 19413506d1eSmaybee if (read) { 195ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 196ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 197ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 198ea8dc4b6Seschrock } 199ea8dc4b6Seschrock dbp[i] = &db->db; 200fa9e4066Sahrens } 201fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 202fa9e4066Sahrens 203ea8dc4b6Seschrock /* wait for async i/o */ 204ea8dc4b6Seschrock err = zio_wait(zio); 205ea8dc4b6Seschrock if (err) { 206ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 207ea8dc4b6Seschrock return (err); 208ea8dc4b6Seschrock } 209ea8dc4b6Seschrock 210ea8dc4b6Seschrock /* wait for other io to complete */ 211ea8dc4b6Seschrock if (read) { 212ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 213ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 214ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 215ea8dc4b6Seschrock while (db->db_state == DB_READ || 216ea8dc4b6Seschrock db->db_state == DB_FILL) 217ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 218ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 219ea8dc4b6Seschrock err = EIO; 220ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 221ea8dc4b6Seschrock if (err) { 222ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 223ea8dc4b6Seschrock return (err); 224ea8dc4b6Seschrock } 225ea8dc4b6Seschrock } 226ea8dc4b6Seschrock } 227ea8dc4b6Seschrock 228ea8dc4b6Seschrock *numbufsp = nblks; 229ea8dc4b6Seschrock *dbpp = dbp; 230ea8dc4b6Seschrock return (0); 231fa9e4066Sahrens } 232fa9e4066Sahrens 23313506d1eSmaybee int 23413506d1eSmaybee dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 23513506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 23613506d1eSmaybee { 23713506d1eSmaybee dnode_t *dn; 23813506d1eSmaybee int err; 23913506d1eSmaybee 24013506d1eSmaybee err = dnode_hold(os->os, object, FTAG, &dn); 24113506d1eSmaybee if (err) 24213506d1eSmaybee return (err); 24313506d1eSmaybee 24413506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 24513506d1eSmaybee numbufsp, dbpp); 24613506d1eSmaybee 24713506d1eSmaybee dnode_rele(dn, FTAG); 24813506d1eSmaybee 24913506d1eSmaybee return (err); 25013506d1eSmaybee } 25113506d1eSmaybee 25213506d1eSmaybee int 25313506d1eSmaybee dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, 25413506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 25513506d1eSmaybee { 25613506d1eSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 25713506d1eSmaybee int err; 25813506d1eSmaybee 25913506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 26013506d1eSmaybee numbufsp, dbpp); 26113506d1eSmaybee 26213506d1eSmaybee return (err); 26313506d1eSmaybee } 26413506d1eSmaybee 265fa9e4066Sahrens void 266ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 267fa9e4066Sahrens { 268fa9e4066Sahrens int i; 269fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 270fa9e4066Sahrens 271fa9e4066Sahrens if (numbufs == 0) 272fa9e4066Sahrens return; 273fa9e4066Sahrens 274ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 275ea8dc4b6Seschrock if (dbp[i]) 276ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 277ea8dc4b6Seschrock } 278fa9e4066Sahrens 279fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 280fa9e4066Sahrens } 281fa9e4066Sahrens 282fa9e4066Sahrens void 283fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 284fa9e4066Sahrens { 285fa9e4066Sahrens dnode_t *dn; 286fa9e4066Sahrens uint64_t blkid; 287ea8dc4b6Seschrock int nblks, i, err; 288fa9e4066Sahrens 289fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 290fa9e4066Sahrens dn = os->os->os_meta_dnode; 291fa9e4066Sahrens 292fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 293fa9e4066Sahrens return; 294fa9e4066Sahrens 295fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 296fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 297fa9e4066Sahrens dbuf_prefetch(dn, blkid); 298fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 299fa9e4066Sahrens return; 300fa9e4066Sahrens } 301fa9e4066Sahrens 302fa9e4066Sahrens /* 303fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 304fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 305fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 306fa9e4066Sahrens */ 307ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 308ea8dc4b6Seschrock if (err != 0) 309fa9e4066Sahrens return; 310fa9e4066Sahrens 311fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 312fa9e4066Sahrens if (dn->dn_datablkshift) { 313fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 314fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 315fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 316fa9e4066Sahrens } else { 317fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 318fa9e4066Sahrens } 319fa9e4066Sahrens 320fa9e4066Sahrens if (nblks != 0) { 321fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 322fa9e4066Sahrens for (i = 0; i < nblks; i++) 323fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 324fa9e4066Sahrens } 325fa9e4066Sahrens 326fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 327fa9e4066Sahrens 328fa9e4066Sahrens dnode_rele(dn, FTAG); 329fa9e4066Sahrens } 330fa9e4066Sahrens 331ea8dc4b6Seschrock int 332fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 333fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 334fa9e4066Sahrens { 335ea8dc4b6Seschrock dnode_t *dn; 336ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 337ea8dc4b6Seschrock if (err) 338ea8dc4b6Seschrock return (err); 339fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 340fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 341fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 342fa9e4066Sahrens dnode_rele(dn, FTAG); 343ea8dc4b6Seschrock return (0); 344fa9e4066Sahrens } 345fa9e4066Sahrens 346ea8dc4b6Seschrock int 347ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 348ea8dc4b6Seschrock void *buf) 349fa9e4066Sahrens { 350fa9e4066Sahrens dnode_t *dn; 351fa9e4066Sahrens dmu_buf_t **dbp; 352ea8dc4b6Seschrock int numbufs, i, err; 353fa9e4066Sahrens 354ea8dc4b6Seschrock /* 355ea8dc4b6Seschrock * Deal with odd block sizes, where there can't be data past the 356ea8dc4b6Seschrock * first block. 357ea8dc4b6Seschrock */ 358ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 359ea8dc4b6Seschrock if (err) 360ea8dc4b6Seschrock return (err); 361fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 362fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 363fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 364fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 365fa9e4066Sahrens size = newsz; 366fa9e4066Sahrens } 367fa9e4066Sahrens dnode_rele(dn, FTAG); 368fa9e4066Sahrens 369fa9e4066Sahrens while (size > 0) { 370fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 371fa9e4066Sahrens int err; 372fa9e4066Sahrens 373fa9e4066Sahrens /* 374fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 375fa9e4066Sahrens * to be reading in parallel. 376fa9e4066Sahrens */ 377ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, mylen, 378ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 379ea8dc4b6Seschrock if (err) 380fa9e4066Sahrens return (err); 381fa9e4066Sahrens 382fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 383fa9e4066Sahrens int tocpy; 384fa9e4066Sahrens int bufoff; 385fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 386fa9e4066Sahrens 387fa9e4066Sahrens ASSERT(size > 0); 388fa9e4066Sahrens 389fa9e4066Sahrens bufoff = offset - db->db_offset; 390fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 391fa9e4066Sahrens 392fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 393fa9e4066Sahrens 394fa9e4066Sahrens offset += tocpy; 395fa9e4066Sahrens size -= tocpy; 396fa9e4066Sahrens buf = (char *)buf + tocpy; 397fa9e4066Sahrens } 398ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 399fa9e4066Sahrens } 400fa9e4066Sahrens return (0); 401fa9e4066Sahrens } 402fa9e4066Sahrens 403fa9e4066Sahrens void 404fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 405fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 406fa9e4066Sahrens { 407fa9e4066Sahrens dmu_buf_t **dbp; 408fa9e4066Sahrens int numbufs, i; 409fa9e4066Sahrens 41013506d1eSmaybee if (size == 0) 41113506d1eSmaybee return; 41213506d1eSmaybee 413ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 414ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 415fa9e4066Sahrens 416fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 417fa9e4066Sahrens int tocpy; 418fa9e4066Sahrens int bufoff; 419fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 420fa9e4066Sahrens 421fa9e4066Sahrens ASSERT(size > 0); 422fa9e4066Sahrens 423fa9e4066Sahrens bufoff = offset - db->db_offset; 424fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 425fa9e4066Sahrens 426fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 427fa9e4066Sahrens 428fa9e4066Sahrens if (tocpy == db->db_size) 429fa9e4066Sahrens dmu_buf_will_fill(db, tx); 430fa9e4066Sahrens else 431fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 432fa9e4066Sahrens 433fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 434fa9e4066Sahrens 435fa9e4066Sahrens if (tocpy == db->db_size) 436fa9e4066Sahrens dmu_buf_fill_done(db, tx); 437fa9e4066Sahrens 438fa9e4066Sahrens offset += tocpy; 439fa9e4066Sahrens size -= tocpy; 440fa9e4066Sahrens buf = (char *)buf + tocpy; 441fa9e4066Sahrens } 442ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 443fa9e4066Sahrens } 444fa9e4066Sahrens 445fa9e4066Sahrens #ifdef _KERNEL 446fa9e4066Sahrens int 447fa9e4066Sahrens dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 448fa9e4066Sahrens uio_t *uio, dmu_tx_t *tx) 449fa9e4066Sahrens { 450fa9e4066Sahrens dmu_buf_t **dbp; 451fa9e4066Sahrens int numbufs, i; 452fa9e4066Sahrens int err = 0; 453fa9e4066Sahrens 45413506d1eSmaybee if (size == 0) 45513506d1eSmaybee return (0); 45613506d1eSmaybee 457ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, size, 458ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 459ea8dc4b6Seschrock if (err) 460ea8dc4b6Seschrock return (err); 461fa9e4066Sahrens 462fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 463fa9e4066Sahrens int tocpy; 464fa9e4066Sahrens int bufoff; 465fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 466fa9e4066Sahrens 467fa9e4066Sahrens ASSERT(size > 0); 468fa9e4066Sahrens 469fa9e4066Sahrens bufoff = offset - db->db_offset; 470fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 471fa9e4066Sahrens 472fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 473fa9e4066Sahrens 474fa9e4066Sahrens if (tocpy == db->db_size) 475fa9e4066Sahrens dmu_buf_will_fill(db, tx); 476fa9e4066Sahrens else 477fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 478fa9e4066Sahrens 479fa9e4066Sahrens /* 480fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 481fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 482fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 483fa9e4066Sahrens * block. 484fa9e4066Sahrens */ 485fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 486fa9e4066Sahrens UIO_WRITE, uio); 487fa9e4066Sahrens 488fa9e4066Sahrens if (tocpy == db->db_size) 489fa9e4066Sahrens dmu_buf_fill_done(db, tx); 490fa9e4066Sahrens 491fa9e4066Sahrens if (err) 492fa9e4066Sahrens break; 493fa9e4066Sahrens 494fa9e4066Sahrens offset += tocpy; 495fa9e4066Sahrens size -= tocpy; 496fa9e4066Sahrens } 497ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 498fa9e4066Sahrens return (err); 499fa9e4066Sahrens } 500fa9e4066Sahrens #endif 501fa9e4066Sahrens 5021d452cf5Sahrens /* 5031d452cf5Sahrens * XXX move send/recv stuff to its own new file! 5041d452cf5Sahrens */ 5051d452cf5Sahrens 506fa9e4066Sahrens struct backuparg { 507fa9e4066Sahrens dmu_replay_record_t *drr; 508fa9e4066Sahrens vnode_t *vp; 509fa9e4066Sahrens objset_t *os; 510ea8dc4b6Seschrock zio_cksum_t zc; 511fa9e4066Sahrens int err; 512fa9e4066Sahrens }; 513fa9e4066Sahrens 514fa9e4066Sahrens static int 515fa9e4066Sahrens dump_bytes(struct backuparg *ba, void *buf, int len) 516fa9e4066Sahrens { 517fa9e4066Sahrens ssize_t resid; /* have to get resid to get detailed errno */ 518fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 519ea8dc4b6Seschrock 520ea8dc4b6Seschrock fletcher_4_incremental_native(buf, len, &ba->zc); 521fa9e4066Sahrens ba->err = vn_rdwr(UIO_WRITE, ba->vp, 522fa9e4066Sahrens (caddr_t)buf, len, 52393fcfe85Sahrens 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); 524fa9e4066Sahrens return (ba->err); 525fa9e4066Sahrens } 526fa9e4066Sahrens 527fa9e4066Sahrens static int 528fa9e4066Sahrens dump_free(struct backuparg *ba, uint64_t object, uint64_t offset, 529fa9e4066Sahrens uint64_t length) 530fa9e4066Sahrens { 531fa9e4066Sahrens /* write a FREE record */ 532fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 533fa9e4066Sahrens ba->drr->drr_type = DRR_FREE; 534fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_object = object; 535fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_offset = offset; 536fa9e4066Sahrens ba->drr->drr_u.drr_free.drr_length = length; 537fa9e4066Sahrens 538fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 539fa9e4066Sahrens return (EINTR); 540fa9e4066Sahrens return (0); 541fa9e4066Sahrens } 542fa9e4066Sahrens 543fa9e4066Sahrens static int 544fa9e4066Sahrens dump_data(struct backuparg *ba, dmu_object_type_t type, 545fa9e4066Sahrens uint64_t object, uint64_t offset, int blksz, void *data) 546fa9e4066Sahrens { 547fa9e4066Sahrens /* write a DATA record */ 548fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 549fa9e4066Sahrens ba->drr->drr_type = DRR_WRITE; 550fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_object = object; 551fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_type = type; 552fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_offset = offset; 553fa9e4066Sahrens ba->drr->drr_u.drr_write.drr_length = blksz; 554fa9e4066Sahrens 555fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 556fa9e4066Sahrens return (EINTR); 557fa9e4066Sahrens if (dump_bytes(ba, data, blksz)) 558fa9e4066Sahrens return (EINTR); 559fa9e4066Sahrens return (0); 560fa9e4066Sahrens } 561fa9e4066Sahrens 562fa9e4066Sahrens static int 563fa9e4066Sahrens dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs) 564fa9e4066Sahrens { 565fa9e4066Sahrens /* write a FREEOBJECTS record */ 566fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 567fa9e4066Sahrens ba->drr->drr_type = DRR_FREEOBJECTS; 568fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_firstobj = firstobj; 569fa9e4066Sahrens ba->drr->drr_u.drr_freeobjects.drr_numobjs = numobjs; 570fa9e4066Sahrens 571fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 572fa9e4066Sahrens return (EINTR); 573fa9e4066Sahrens return (0); 574fa9e4066Sahrens } 575fa9e4066Sahrens 576fa9e4066Sahrens static int 577fa9e4066Sahrens dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp) 578fa9e4066Sahrens { 579fa9e4066Sahrens if (dnp == NULL || dnp->dn_type == DMU_OT_NONE) 580fa9e4066Sahrens return (dump_freeobjects(ba, object, 1)); 581fa9e4066Sahrens 582fa9e4066Sahrens /* write an OBJECT record */ 583fa9e4066Sahrens bzero(ba->drr, sizeof (dmu_replay_record_t)); 584fa9e4066Sahrens ba->drr->drr_type = DRR_OBJECT; 585fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_object = object; 586fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_type = dnp->dn_type; 587fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonustype = dnp->dn_bonustype; 588fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_blksz = 589fa9e4066Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT; 590fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_bonuslen = dnp->dn_bonuslen; 591fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_checksum = dnp->dn_checksum; 592fa9e4066Sahrens ba->drr->drr_u.drr_object.drr_compress = dnp->dn_compress; 593fa9e4066Sahrens 594fa9e4066Sahrens if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t))) 595fa9e4066Sahrens return (EINTR); 596fa9e4066Sahrens 597fa9e4066Sahrens if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8))) 598fa9e4066Sahrens return (EINTR); 599fa9e4066Sahrens 600fa9e4066Sahrens /* free anything past the end of the file */ 601fa9e4066Sahrens if (dump_free(ba, object, (dnp->dn_maxblkid + 1) * 602fa9e4066Sahrens (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL)) 603fa9e4066Sahrens return (EINTR); 604fa9e4066Sahrens if (ba->err) 605fa9e4066Sahrens return (EINTR); 606fa9e4066Sahrens return (0); 607fa9e4066Sahrens } 608fa9e4066Sahrens 609fa9e4066Sahrens #define BP_SPAN(dnp, level) \ 610fa9e4066Sahrens (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \ 611fa9e4066Sahrens (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) 612fa9e4066Sahrens 613fa9e4066Sahrens static int 614fa9e4066Sahrens backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 615fa9e4066Sahrens { 616fa9e4066Sahrens struct backuparg *ba = arg; 617fa9e4066Sahrens uint64_t object = bc->bc_bookmark.zb_object; 618fa9e4066Sahrens int level = bc->bc_bookmark.zb_level; 619fa9e4066Sahrens uint64_t blkid = bc->bc_bookmark.zb_blkid; 620fa9e4066Sahrens blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL; 621fa9e4066Sahrens dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE; 622fa9e4066Sahrens void *data = bc->bc_data; 623fa9e4066Sahrens int err = 0; 624fa9e4066Sahrens 625ea8dc4b6Seschrock if (issig(JUSTLOOKING) && issig(FORREAL)) 626fa9e4066Sahrens return (EINTR); 627fa9e4066Sahrens 628fa9e4066Sahrens ASSERT(data || bp == NULL); 629fa9e4066Sahrens 630fa9e4066Sahrens if (bp == NULL && object == 0) { 631fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 632fa9e4066Sahrens uint64_t dnobj = (blkid * span) >> DNODE_SHIFT; 633fa9e4066Sahrens err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT); 634fa9e4066Sahrens } else if (bp == NULL) { 635fa9e4066Sahrens uint64_t span = BP_SPAN(bc->bc_dnode, level); 636fa9e4066Sahrens err = dump_free(ba, object, blkid * span, span); 637fa9e4066Sahrens } else if (data && level == 0 && type == DMU_OT_DNODE) { 638fa9e4066Sahrens dnode_phys_t *blk = data; 639fa9e4066Sahrens int i; 640fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 641fa9e4066Sahrens 642fa9e4066Sahrens for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 643fa9e4066Sahrens uint64_t dnobj = 644fa9e4066Sahrens (blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 645fa9e4066Sahrens err = dump_dnode(ba, dnobj, blk+i); 646fa9e4066Sahrens if (err) 647fa9e4066Sahrens break; 648fa9e4066Sahrens } 649fa9e4066Sahrens } else if (level == 0 && 650fa9e4066Sahrens type != DMU_OT_DNODE && type != DMU_OT_OBJSET) { 651fa9e4066Sahrens int blksz = BP_GET_LSIZE(bp); 652fa9e4066Sahrens if (data == NULL) { 65313506d1eSmaybee uint32_t aflags = ARC_WAIT; 654fa9e4066Sahrens arc_buf_t *abuf; 655ea8dc4b6Seschrock zbookmark_t zb; 656fa9e4066Sahrens 657ea8dc4b6Seschrock zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object; 658ea8dc4b6Seschrock zb.zb_object = object; 659ea8dc4b6Seschrock zb.zb_level = level; 660ea8dc4b6Seschrock zb.zb_blkid = blkid; 661fa9e4066Sahrens (void) arc_read(NULL, spa, bp, 662fa9e4066Sahrens dmu_ot[type].ot_byteswap, arc_getbuf_func, &abuf, 663fa9e4066Sahrens ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_MUSTSUCCEED, 66413506d1eSmaybee &aflags, &zb); 665fa9e4066Sahrens 666fa9e4066Sahrens if (abuf) { 667fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 668fa9e4066Sahrens blksz, abuf->b_data); 669ea8dc4b6Seschrock (void) arc_buf_remove_ref(abuf, &abuf); 670fa9e4066Sahrens } 671fa9e4066Sahrens } else { 672fa9e4066Sahrens err = dump_data(ba, type, object, blkid * blksz, 673fa9e4066Sahrens blksz, data); 674fa9e4066Sahrens } 675fa9e4066Sahrens } 676fa9e4066Sahrens 677fa9e4066Sahrens ASSERT(err == 0 || err == EINTR); 678fa9e4066Sahrens return (err); 679fa9e4066Sahrens } 680fa9e4066Sahrens 681fa9e4066Sahrens int 682fa9e4066Sahrens dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, vnode_t *vp) 683fa9e4066Sahrens { 684fa9e4066Sahrens dsl_dataset_t *ds = tosnap->os->os_dsl_dataset; 685fa9e4066Sahrens dsl_dataset_t *fromds = fromsnap ? fromsnap->os->os_dsl_dataset : NULL; 686fa9e4066Sahrens dmu_replay_record_t *drr; 687fa9e4066Sahrens struct backuparg ba; 688fa9e4066Sahrens int err; 689fa9e4066Sahrens 690fa9e4066Sahrens /* tosnap must be a snapshot */ 691fa9e4066Sahrens if (ds->ds_phys->ds_next_snap_obj == 0) 692fa9e4066Sahrens return (EINVAL); 693fa9e4066Sahrens 694fa9e4066Sahrens /* fromsnap must be an earlier snapshot from the same fs as tosnap */ 695fa9e4066Sahrens if (fromds && (ds->ds_dir != fromds->ds_dir || 696fa9e4066Sahrens fromds->ds_phys->ds_creation_txg >= 697fa9e4066Sahrens ds->ds_phys->ds_creation_txg)) 698fa9e4066Sahrens return (EXDEV); 699fa9e4066Sahrens 700fa9e4066Sahrens drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP); 701fa9e4066Sahrens drr->drr_type = DRR_BEGIN; 702fa9e4066Sahrens drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; 703fa9e4066Sahrens drr->drr_u.drr_begin.drr_version = DMU_BACKUP_VERSION; 704fa9e4066Sahrens drr->drr_u.drr_begin.drr_creation_time = 705fa9e4066Sahrens ds->ds_phys->ds_creation_time; 706fa9e4066Sahrens drr->drr_u.drr_begin.drr_type = tosnap->os->os_phys->os_type; 707fa9e4066Sahrens drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid; 708fa9e4066Sahrens if (fromds) 709fa9e4066Sahrens drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid; 710fa9e4066Sahrens dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname); 711fa9e4066Sahrens 712fa9e4066Sahrens ba.drr = drr; 713fa9e4066Sahrens ba.vp = vp; 714fa9e4066Sahrens ba.os = tosnap; 715ea8dc4b6Seschrock ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0); 716fa9e4066Sahrens 717fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) { 718fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 719fa9e4066Sahrens return (ba.err); 720fa9e4066Sahrens } 721fa9e4066Sahrens 722fa9e4066Sahrens err = traverse_dsl_dataset(ds, 723fa9e4066Sahrens fromds ? fromds->ds_phys->ds_creation_txg : 0, 724fa9e4066Sahrens ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK, 725fa9e4066Sahrens backup_cb, &ba); 726fa9e4066Sahrens 727fa9e4066Sahrens if (err) { 728fa9e4066Sahrens if (err == EINTR && ba.err) 729fa9e4066Sahrens err = ba.err; 730fa9e4066Sahrens return (err); 731fa9e4066Sahrens } 732fa9e4066Sahrens 733fa9e4066Sahrens bzero(drr, sizeof (dmu_replay_record_t)); 734fa9e4066Sahrens drr->drr_type = DRR_END; 735ea8dc4b6Seschrock drr->drr_u.drr_end.drr_checksum = ba.zc; 736fa9e4066Sahrens 737fa9e4066Sahrens if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t))) 738fa9e4066Sahrens return (ba.err); 739fa9e4066Sahrens 740fa9e4066Sahrens kmem_free(drr, sizeof (dmu_replay_record_t)); 741fa9e4066Sahrens 742fa9e4066Sahrens return (0); 743fa9e4066Sahrens } 744fa9e4066Sahrens 745fa9e4066Sahrens struct restorearg { 746fa9e4066Sahrens int err; 747fa9e4066Sahrens int byteswap; 748fa9e4066Sahrens vnode_t *vp; 749fa9e4066Sahrens char *buf; 750fa9e4066Sahrens uint64_t voff; 751fa9e4066Sahrens int buflen; /* number of valid bytes in buf */ 752fa9e4066Sahrens int bufoff; /* next offset to read */ 753fa9e4066Sahrens int bufsize; /* amount of memory allocated for buf */ 754ea8dc4b6Seschrock zio_cksum_t zc; 755fa9e4066Sahrens }; 756fa9e4066Sahrens 7571d452cf5Sahrens /* ARGSUSED */ 758fa9e4066Sahrens static int 7591d452cf5Sahrens replay_incremental_check(void *arg1, void *arg2, dmu_tx_t *tx) 760fa9e4066Sahrens { 7611d452cf5Sahrens dsl_dataset_t *ds = arg1; 7621d452cf5Sahrens struct drr_begin *drrb = arg2; 763fa9e4066Sahrens const char *snapname; 7641d452cf5Sahrens int err; 765fa9e4066Sahrens uint64_t val; 766fa9e4066Sahrens 767fa9e4066Sahrens /* must already be a snapshot of this fs */ 7681d452cf5Sahrens if (ds->ds_phys->ds_prev_snap_obj == 0) 7691d452cf5Sahrens return (ENODEV); 770fa9e4066Sahrens 771fa9e4066Sahrens /* most recent snapshot must match fromguid */ 7721d452cf5Sahrens if (ds->ds_prev->ds_phys->ds_guid != drrb->drr_fromguid) 7731d452cf5Sahrens return (ENODEV); 774fa9e4066Sahrens /* must not have any changes since most recent snapshot */ 775fa9e4066Sahrens if (ds->ds_phys->ds_bp.blk_birth > 7761d452cf5Sahrens ds->ds_prev->ds_phys->ds_creation_txg) 7771d452cf5Sahrens return (ETXTBSY); 778fa9e4066Sahrens 779fa9e4066Sahrens /* new snapshot name must not exist */ 780fa9e4066Sahrens snapname = strrchr(drrb->drr_toname, '@'); 7811d452cf5Sahrens if (snapname == NULL) 7821d452cf5Sahrens return (EEXIST); 7831d452cf5Sahrens 784fa9e4066Sahrens snapname++; 7851d452cf5Sahrens err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset, 786fa9e4066Sahrens ds->ds_phys->ds_snapnames_zapobj, snapname, 8, 1, &val); 787fa9e4066Sahrens if (err == 0) 7881d452cf5Sahrens return (EEXIST); 7891d452cf5Sahrens if (err != ENOENT) 790fa9e4066Sahrens return (err); 7911d452cf5Sahrens 7921d452cf5Sahrens return (0); 793fa9e4066Sahrens } 794fa9e4066Sahrens 7951d452cf5Sahrens /* ARGSUSED */ 7961d452cf5Sahrens static void 7971d452cf5Sahrens replay_incremental_sync(void *arg1, void *arg2, dmu_tx_t *tx) 7981d452cf5Sahrens { 7991d452cf5Sahrens dsl_dataset_t *ds = arg1; 800fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 80199653d4eSeschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 802fa9e4066Sahrens } 803fa9e4066Sahrens 8041d452cf5Sahrens /* ARGSUSED */ 805fa9e4066Sahrens static int 8061d452cf5Sahrens replay_full_check(void *arg1, void *arg2, dmu_tx_t *tx) 807fa9e4066Sahrens { 8081d452cf5Sahrens dsl_dir_t *dd = arg1; 8091d452cf5Sahrens struct drr_begin *drrb = arg2; 8101d452cf5Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 8111d452cf5Sahrens char *cp; 8121d452cf5Sahrens uint64_t val; 813fa9e4066Sahrens int err; 814fa9e4066Sahrens 8151d452cf5Sahrens cp = strchr(drrb->drr_toname, '@'); 816fa9e4066Sahrens *cp = '\0'; 8171d452cf5Sahrens err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj, 8181d452cf5Sahrens strrchr(drrb->drr_toname, '/') + 1, 8191d452cf5Sahrens sizeof (uint64_t), 1, &val); 8201d452cf5Sahrens *cp = '@'; 821fa9e4066Sahrens 8221d452cf5Sahrens if (err != ENOENT) 8231d452cf5Sahrens return (err ? err : EEXIST); 8241d452cf5Sahrens 8251d452cf5Sahrens return (0); 826fa9e4066Sahrens } 827fa9e4066Sahrens 8281d452cf5Sahrens static void 8291d452cf5Sahrens replay_full_sync(void *arg1, void *arg2, dmu_tx_t *tx) 8301d452cf5Sahrens { 8311d452cf5Sahrens dsl_dir_t *dd = arg1; 8321d452cf5Sahrens struct drr_begin *drrb = arg2; 8331d452cf5Sahrens char *cp; 8341d452cf5Sahrens dsl_dataset_t *ds; 8351d452cf5Sahrens uint64_t dsobj; 836fa9e4066Sahrens 8371d452cf5Sahrens cp = strchr(drrb->drr_toname, '@'); 8381d452cf5Sahrens *cp = '\0'; 8391d452cf5Sahrens dsobj = dsl_dataset_create_sync(dd, strrchr(drrb->drr_toname, '/') + 1, 8401d452cf5Sahrens NULL, tx); 8411d452cf5Sahrens *cp = '@'; 8421d452cf5Sahrens 8431d452cf5Sahrens VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, dsobj, NULL, 844ea8dc4b6Seschrock DS_MODE_EXCLUSIVE, FTAG, &ds)); 845fa9e4066Sahrens 846fa9e4066Sahrens (void) dmu_objset_create_impl(dsl_dataset_get_spa(ds), 847fa9e4066Sahrens ds, drrb->drr_type, tx); 848fa9e4066Sahrens 849fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 85099653d4eSeschrock ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 851fa9e4066Sahrens 852fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 853fa9e4066Sahrens } 854fa9e4066Sahrens 855fa9e4066Sahrens static int 8561d452cf5Sahrens replay_end_check(void *arg1, void *arg2, dmu_tx_t *tx) 857fa9e4066Sahrens { 8581d452cf5Sahrens objset_t *os = arg1; 8591d452cf5Sahrens struct drr_begin *drrb = arg2; 860fa9e4066Sahrens char *snapname; 861fa9e4066Sahrens 862fa9e4066Sahrens /* XXX verify that drr_toname is in dd */ 863fa9e4066Sahrens 864fa9e4066Sahrens snapname = strchr(drrb->drr_toname, '@'); 865fa9e4066Sahrens if (snapname == NULL) 866fa9e4066Sahrens return (EINVAL); 867fa9e4066Sahrens snapname++; 868fa9e4066Sahrens 8691d452cf5Sahrens return (dsl_dataset_snapshot_check(os, snapname, tx)); 8701d452cf5Sahrens } 8711d452cf5Sahrens 8721d452cf5Sahrens static void 8731d452cf5Sahrens replay_end_sync(void *arg1, void *arg2, dmu_tx_t *tx) 8741d452cf5Sahrens { 8751d452cf5Sahrens objset_t *os = arg1; 8761d452cf5Sahrens struct drr_begin *drrb = arg2; 8771d452cf5Sahrens char *snapname; 8781d452cf5Sahrens dsl_dataset_t *ds, *hds; 8791d452cf5Sahrens 8801d452cf5Sahrens snapname = strchr(drrb->drr_toname, '@') + 1; 8811d452cf5Sahrens 8821d452cf5Sahrens dsl_dataset_snapshot_sync(os, snapname, tx); 883fa9e4066Sahrens 884fa9e4066Sahrens /* set snapshot's creation time and guid */ 8851d452cf5Sahrens hds = os->os->os_dsl_dataset; 8861d452cf5Sahrens VERIFY(0 == dsl_dataset_open_obj(hds->ds_dir->dd_pool, 8871d452cf5Sahrens hds->ds_phys->ds_prev_snap_obj, NULL, 888e1930233Sbonwick DS_MODE_PRIMARY | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 889e1930233Sbonwick FTAG, &ds)); 890fa9e4066Sahrens 891fa9e4066Sahrens dmu_buf_will_dirty(ds->ds_dbuf, tx); 892fa9e4066Sahrens ds->ds_phys->ds_creation_time = drrb->drr_creation_time; 893fa9e4066Sahrens ds->ds_phys->ds_guid = drrb->drr_toguid; 89499653d4eSeschrock ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 895fa9e4066Sahrens 896fa9e4066Sahrens dsl_dataset_close(ds, DS_MODE_PRIMARY, FTAG); 897fa9e4066Sahrens 8981d452cf5Sahrens dmu_buf_will_dirty(hds->ds_dbuf, tx); 8991d452cf5Sahrens hds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT; 900fa9e4066Sahrens } 901fa9e4066Sahrens 902fa9e4066Sahrens void * 903fa9e4066Sahrens restore_read(struct restorearg *ra, int len) 904fa9e4066Sahrens { 905fa9e4066Sahrens void *rv; 906fa9e4066Sahrens 907fa9e4066Sahrens /* some things will require 8-byte alignment, so everything must */ 908fa9e4066Sahrens ASSERT3U(len % 8, ==, 0); 909fa9e4066Sahrens 910fa9e4066Sahrens while (ra->buflen - ra->bufoff < len) { 911fa9e4066Sahrens ssize_t resid; 912fa9e4066Sahrens int leftover = ra->buflen - ra->bufoff; 913fa9e4066Sahrens 914fa9e4066Sahrens (void) memmove(ra->buf, ra->buf + ra->bufoff, leftover); 915fa9e4066Sahrens ra->err = vn_rdwr(UIO_READ, ra->vp, 916fa9e4066Sahrens (caddr_t)ra->buf + leftover, ra->bufsize - leftover, 917fa9e4066Sahrens ra->voff, UIO_SYSSPACE, FAPPEND, 91893fcfe85Sahrens RLIM64_INFINITY, CRED(), &resid); 919fa9e4066Sahrens 920fa9e4066Sahrens ra->voff += ra->bufsize - leftover - resid; 921fa9e4066Sahrens ra->buflen = ra->bufsize - resid; 922fa9e4066Sahrens ra->bufoff = 0; 923fa9e4066Sahrens if (resid == ra->bufsize - leftover) 924fa9e4066Sahrens ra->err = EINVAL; 925fa9e4066Sahrens if (ra->err) 926fa9e4066Sahrens return (NULL); 927ea8dc4b6Seschrock /* Could compute checksum here? */ 928fa9e4066Sahrens } 929fa9e4066Sahrens 930fa9e4066Sahrens ASSERT3U(ra->bufoff % 8, ==, 0); 931fa9e4066Sahrens ASSERT3U(ra->buflen - ra->bufoff, >=, len); 932fa9e4066Sahrens rv = ra->buf + ra->bufoff; 933fa9e4066Sahrens ra->bufoff += len; 934ea8dc4b6Seschrock if (ra->byteswap) 935ea8dc4b6Seschrock fletcher_4_incremental_byteswap(rv, len, &ra->zc); 936ea8dc4b6Seschrock else 937ea8dc4b6Seschrock fletcher_4_incremental_native(rv, len, &ra->zc); 938fa9e4066Sahrens return (rv); 939fa9e4066Sahrens } 940fa9e4066Sahrens 941fa9e4066Sahrens static void 942fa9e4066Sahrens backup_byteswap(dmu_replay_record_t *drr) 943fa9e4066Sahrens { 944fa9e4066Sahrens #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X)) 945fa9e4066Sahrens #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X)) 946fa9e4066Sahrens drr->drr_type = BSWAP_32(drr->drr_type); 947fa9e4066Sahrens switch (drr->drr_type) { 948fa9e4066Sahrens case DRR_BEGIN: 949fa9e4066Sahrens DO64(drr_begin.drr_magic); 950fa9e4066Sahrens DO64(drr_begin.drr_version); 951fa9e4066Sahrens DO64(drr_begin.drr_creation_time); 952fa9e4066Sahrens DO32(drr_begin.drr_type); 953fa9e4066Sahrens DO64(drr_begin.drr_toguid); 954fa9e4066Sahrens DO64(drr_begin.drr_fromguid); 955fa9e4066Sahrens break; 956fa9e4066Sahrens case DRR_OBJECT: 957fa9e4066Sahrens DO64(drr_object.drr_object); 958fa9e4066Sahrens /* DO64(drr_object.drr_allocation_txg); */ 959fa9e4066Sahrens DO32(drr_object.drr_type); 960fa9e4066Sahrens DO32(drr_object.drr_bonustype); 961fa9e4066Sahrens DO32(drr_object.drr_blksz); 962fa9e4066Sahrens DO32(drr_object.drr_bonuslen); 963fa9e4066Sahrens break; 964fa9e4066Sahrens case DRR_FREEOBJECTS: 965fa9e4066Sahrens DO64(drr_freeobjects.drr_firstobj); 966fa9e4066Sahrens DO64(drr_freeobjects.drr_numobjs); 967fa9e4066Sahrens break; 968fa9e4066Sahrens case DRR_WRITE: 969fa9e4066Sahrens DO64(drr_write.drr_object); 970fa9e4066Sahrens DO32(drr_write.drr_type); 971fa9e4066Sahrens DO64(drr_write.drr_offset); 972fa9e4066Sahrens DO64(drr_write.drr_length); 973fa9e4066Sahrens break; 974fa9e4066Sahrens case DRR_FREE: 975fa9e4066Sahrens DO64(drr_free.drr_object); 976fa9e4066Sahrens DO64(drr_free.drr_offset); 977fa9e4066Sahrens DO64(drr_free.drr_length); 978fa9e4066Sahrens break; 979fa9e4066Sahrens case DRR_END: 980ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[0]); 981ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[1]); 982ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[2]); 983ea8dc4b6Seschrock DO64(drr_end.drr_checksum.zc_word[3]); 984fa9e4066Sahrens break; 985fa9e4066Sahrens } 986fa9e4066Sahrens #undef DO64 987fa9e4066Sahrens #undef DO32 988fa9e4066Sahrens } 989fa9e4066Sahrens 990fa9e4066Sahrens static int 991fa9e4066Sahrens restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro) 992fa9e4066Sahrens { 993fa9e4066Sahrens int err; 994fa9e4066Sahrens dmu_tx_t *tx; 995fa9e4066Sahrens 996fa9e4066Sahrens err = dmu_object_info(os, drro->drr_object, NULL); 997fa9e4066Sahrens 998fa9e4066Sahrens if (err != 0 && err != ENOENT) 999fa9e4066Sahrens return (EINVAL); 1000fa9e4066Sahrens 1001fa9e4066Sahrens if (drro->drr_type == DMU_OT_NONE || 1002fa9e4066Sahrens drro->drr_type >= DMU_OT_NUMTYPES || 1003fa9e4066Sahrens drro->drr_bonustype >= DMU_OT_NUMTYPES || 1004fa9e4066Sahrens drro->drr_checksum >= ZIO_CHECKSUM_FUNCTIONS || 1005fa9e4066Sahrens drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS || 1006fa9e4066Sahrens P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) || 1007fa9e4066Sahrens drro->drr_blksz < SPA_MINBLOCKSIZE || 1008fa9e4066Sahrens drro->drr_blksz > SPA_MAXBLOCKSIZE || 1009fa9e4066Sahrens drro->drr_bonuslen > DN_MAX_BONUSLEN) { 1010fa9e4066Sahrens return (EINVAL); 1011fa9e4066Sahrens } 1012fa9e4066Sahrens 1013fa9e4066Sahrens tx = dmu_tx_create(os); 1014fa9e4066Sahrens 1015fa9e4066Sahrens if (err == ENOENT) { 1016fa9e4066Sahrens /* currently free, want to be allocated */ 1017fa9e4066Sahrens dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT); 1018fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 1); 1019fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1020fa9e4066Sahrens if (err) { 1021fa9e4066Sahrens dmu_tx_abort(tx); 1022fa9e4066Sahrens return (err); 1023fa9e4066Sahrens } 1024fa9e4066Sahrens err = dmu_object_claim(os, drro->drr_object, 1025fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1026fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1027fa9e4066Sahrens } else { 1028fa9e4066Sahrens /* currently allocated, want to be allocated */ 1029fa9e4066Sahrens dmu_tx_hold_bonus(tx, drro->drr_object); 1030fa9e4066Sahrens /* 1031fa9e4066Sahrens * We may change blocksize, so need to 1032fa9e4066Sahrens * hold_write 1033fa9e4066Sahrens */ 1034fa9e4066Sahrens dmu_tx_hold_write(tx, drro->drr_object, 0, 1); 1035fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1036fa9e4066Sahrens if (err) { 1037fa9e4066Sahrens dmu_tx_abort(tx); 1038fa9e4066Sahrens return (err); 1039fa9e4066Sahrens } 1040fa9e4066Sahrens 1041fa9e4066Sahrens err = dmu_object_reclaim(os, drro->drr_object, 1042fa9e4066Sahrens drro->drr_type, drro->drr_blksz, 1043fa9e4066Sahrens drro->drr_bonustype, drro->drr_bonuslen, tx); 1044fa9e4066Sahrens } 1045fa9e4066Sahrens if (err) { 1046fa9e4066Sahrens dmu_tx_commit(tx); 1047fa9e4066Sahrens return (EINVAL); 1048fa9e4066Sahrens } 1049fa9e4066Sahrens 1050fa9e4066Sahrens dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksum, tx); 1051fa9e4066Sahrens dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx); 1052fa9e4066Sahrens 1053fa9e4066Sahrens if (drro->drr_bonuslen) { 1054fa9e4066Sahrens dmu_buf_t *db; 1055fa9e4066Sahrens void *data; 1056ea8dc4b6Seschrock VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db)); 1057fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 1058fa9e4066Sahrens 1059fa9e4066Sahrens ASSERT3U(db->db_size, ==, drro->drr_bonuslen); 1060fa9e4066Sahrens data = restore_read(ra, P2ROUNDUP(db->db_size, 8)); 1061fa9e4066Sahrens if (data == NULL) { 1062fa9e4066Sahrens dmu_tx_commit(tx); 1063fa9e4066Sahrens return (ra->err); 1064fa9e4066Sahrens } 1065fa9e4066Sahrens bcopy(data, db->db_data, db->db_size); 1066fa9e4066Sahrens if (ra->byteswap) { 1067fa9e4066Sahrens dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data, 1068fa9e4066Sahrens drro->drr_bonuslen); 1069fa9e4066Sahrens } 1070ea8dc4b6Seschrock dmu_buf_rele(db, FTAG); 1071fa9e4066Sahrens } 1072fa9e4066Sahrens dmu_tx_commit(tx); 1073fa9e4066Sahrens return (0); 1074fa9e4066Sahrens } 1075fa9e4066Sahrens 1076fa9e4066Sahrens /* ARGSUSED */ 1077fa9e4066Sahrens static int 1078fa9e4066Sahrens restore_freeobjects(struct restorearg *ra, objset_t *os, 1079fa9e4066Sahrens struct drr_freeobjects *drrfo) 1080fa9e4066Sahrens { 1081fa9e4066Sahrens uint64_t obj; 1082fa9e4066Sahrens 1083fa9e4066Sahrens if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj) 1084fa9e4066Sahrens return (EINVAL); 1085fa9e4066Sahrens 1086fa9e4066Sahrens for (obj = drrfo->drr_firstobj; 1087fa9e4066Sahrens obj < drrfo->drr_firstobj + drrfo->drr_numobjs; obj++) { 1088fa9e4066Sahrens dmu_tx_t *tx; 1089fa9e4066Sahrens int err; 1090fa9e4066Sahrens 1091fa9e4066Sahrens if (dmu_object_info(os, obj, NULL) != 0) 1092fa9e4066Sahrens continue; 1093fa9e4066Sahrens 1094fa9e4066Sahrens tx = dmu_tx_create(os); 1095fa9e4066Sahrens dmu_tx_hold_bonus(tx, obj); 1096fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1097fa9e4066Sahrens if (err) { 1098fa9e4066Sahrens dmu_tx_abort(tx); 1099fa9e4066Sahrens return (err); 1100fa9e4066Sahrens } 1101fa9e4066Sahrens err = dmu_object_free(os, obj, tx); 1102fa9e4066Sahrens dmu_tx_commit(tx); 1103fa9e4066Sahrens if (err && err != ENOENT) 1104fa9e4066Sahrens return (EINVAL); 1105fa9e4066Sahrens } 1106fa9e4066Sahrens return (0); 1107fa9e4066Sahrens } 1108fa9e4066Sahrens 1109fa9e4066Sahrens static int 1110fa9e4066Sahrens restore_write(struct restorearg *ra, objset_t *os, 1111fa9e4066Sahrens struct drr_write *drrw) 1112fa9e4066Sahrens { 1113fa9e4066Sahrens dmu_tx_t *tx; 1114fa9e4066Sahrens void *data; 1115fa9e4066Sahrens int err; 1116fa9e4066Sahrens 1117fa9e4066Sahrens if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset || 1118fa9e4066Sahrens drrw->drr_type >= DMU_OT_NUMTYPES) 1119fa9e4066Sahrens return (EINVAL); 1120fa9e4066Sahrens 1121fa9e4066Sahrens data = restore_read(ra, drrw->drr_length); 1122fa9e4066Sahrens if (data == NULL) 1123fa9e4066Sahrens return (ra->err); 1124fa9e4066Sahrens 1125fa9e4066Sahrens if (dmu_object_info(os, drrw->drr_object, NULL) != 0) 1126fa9e4066Sahrens return (EINVAL); 1127fa9e4066Sahrens 1128fa9e4066Sahrens tx = dmu_tx_create(os); 1129fa9e4066Sahrens 1130fa9e4066Sahrens dmu_tx_hold_write(tx, drrw->drr_object, 1131fa9e4066Sahrens drrw->drr_offset, drrw->drr_length); 1132fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1133fa9e4066Sahrens if (err) { 1134fa9e4066Sahrens dmu_tx_abort(tx); 1135fa9e4066Sahrens return (err); 1136fa9e4066Sahrens } 1137fa9e4066Sahrens if (ra->byteswap) 1138fa9e4066Sahrens dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length); 1139fa9e4066Sahrens dmu_write(os, drrw->drr_object, 1140fa9e4066Sahrens drrw->drr_offset, drrw->drr_length, data, tx); 1141fa9e4066Sahrens dmu_tx_commit(tx); 1142fa9e4066Sahrens return (0); 1143fa9e4066Sahrens } 1144fa9e4066Sahrens 1145fa9e4066Sahrens /* ARGSUSED */ 1146fa9e4066Sahrens static int 1147fa9e4066Sahrens restore_free(struct restorearg *ra, objset_t *os, 1148fa9e4066Sahrens struct drr_free *drrf) 1149fa9e4066Sahrens { 1150fa9e4066Sahrens dmu_tx_t *tx; 1151fa9e4066Sahrens int err; 1152fa9e4066Sahrens 1153fa9e4066Sahrens if (drrf->drr_length != -1ULL && 1154fa9e4066Sahrens drrf->drr_offset + drrf->drr_length < drrf->drr_offset) 1155fa9e4066Sahrens return (EINVAL); 1156fa9e4066Sahrens 1157fa9e4066Sahrens if (dmu_object_info(os, drrf->drr_object, NULL) != 0) 1158fa9e4066Sahrens return (EINVAL); 1159fa9e4066Sahrens 1160fa9e4066Sahrens tx = dmu_tx_create(os); 1161fa9e4066Sahrens 1162fa9e4066Sahrens dmu_tx_hold_free(tx, drrf->drr_object, 1163fa9e4066Sahrens drrf->drr_offset, drrf->drr_length); 1164fa9e4066Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 1165fa9e4066Sahrens if (err) { 1166fa9e4066Sahrens dmu_tx_abort(tx); 1167fa9e4066Sahrens return (err); 1168fa9e4066Sahrens } 1169ea8dc4b6Seschrock err = dmu_free_range(os, drrf->drr_object, 1170fa9e4066Sahrens drrf->drr_offset, drrf->drr_length, tx); 1171fa9e4066Sahrens dmu_tx_commit(tx); 1172ea8dc4b6Seschrock return (err); 1173fa9e4066Sahrens } 1174fa9e4066Sahrens 1175fa9e4066Sahrens int 1176ea8dc4b6Seschrock dmu_recvbackup(char *tosnap, struct drr_begin *drrb, uint64_t *sizep, 117798579b20Snd150628 boolean_t force, vnode_t *vp, uint64_t voffset) 1178fa9e4066Sahrens { 1179fa9e4066Sahrens struct restorearg ra; 1180fa9e4066Sahrens dmu_replay_record_t *drr; 1181ea8dc4b6Seschrock char *cp; 1182fa9e4066Sahrens objset_t *os = NULL; 1183ea8dc4b6Seschrock zio_cksum_t pzc; 1184fa9e4066Sahrens 1185fa9e4066Sahrens bzero(&ra, sizeof (ra)); 1186fa9e4066Sahrens ra.vp = vp; 1187fa9e4066Sahrens ra.voff = voffset; 1188fa9e4066Sahrens ra.bufsize = 1<<20; 1189fa9e4066Sahrens ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP); 1190fa9e4066Sahrens 1191fa9e4066Sahrens if (drrb->drr_magic == DMU_BACKUP_MAGIC) { 1192fa9e4066Sahrens ra.byteswap = FALSE; 1193fa9e4066Sahrens } else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) { 1194fa9e4066Sahrens ra.byteswap = TRUE; 1195fa9e4066Sahrens } else { 1196fa9e4066Sahrens ra.err = EINVAL; 1197fa9e4066Sahrens goto out; 1198fa9e4066Sahrens } 1199fa9e4066Sahrens 1200ea8dc4b6Seschrock /* 1201ea8dc4b6Seschrock * NB: this assumes that struct drr_begin will be the largest in 1202ea8dc4b6Seschrock * dmu_replay_record_t's drr_u, and thus we don't need to pad it 1203ea8dc4b6Seschrock * with zeros to make it the same length as we wrote out. 1204ea8dc4b6Seschrock */ 1205ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_type = DRR_BEGIN; 1206ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_pad = 0; 1207ea8dc4b6Seschrock ((dmu_replay_record_t *)ra.buf)->drr_u.drr_begin = *drrb; 1208ea8dc4b6Seschrock if (ra.byteswap) { 1209ea8dc4b6Seschrock fletcher_4_incremental_byteswap(ra.buf, 1210ea8dc4b6Seschrock sizeof (dmu_replay_record_t), &ra.zc); 1211ea8dc4b6Seschrock } else { 1212ea8dc4b6Seschrock fletcher_4_incremental_native(ra.buf, 1213ea8dc4b6Seschrock sizeof (dmu_replay_record_t), &ra.zc); 1214ea8dc4b6Seschrock } 1215ea8dc4b6Seschrock (void) strcpy(drrb->drr_toname, tosnap); /* for the sync funcs */ 1216ea8dc4b6Seschrock 1217fa9e4066Sahrens if (ra.byteswap) { 1218fa9e4066Sahrens drrb->drr_magic = BSWAP_64(drrb->drr_magic); 1219fa9e4066Sahrens drrb->drr_version = BSWAP_64(drrb->drr_version); 1220fa9e4066Sahrens drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time); 1221fa9e4066Sahrens drrb->drr_type = BSWAP_32(drrb->drr_type); 1222fa9e4066Sahrens drrb->drr_toguid = BSWAP_64(drrb->drr_toguid); 1223fa9e4066Sahrens drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid); 1224fa9e4066Sahrens } 1225fa9e4066Sahrens 1226fa9e4066Sahrens ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC); 1227fa9e4066Sahrens 1228fa9e4066Sahrens if (drrb->drr_version != DMU_BACKUP_VERSION || 1229fa9e4066Sahrens drrb->drr_type >= DMU_OST_NUMTYPES || 1230fa9e4066Sahrens strchr(drrb->drr_toname, '@') == NULL) { 1231fa9e4066Sahrens ra.err = EINVAL; 1232fa9e4066Sahrens goto out; 1233fa9e4066Sahrens } 1234fa9e4066Sahrens 1235fa9e4066Sahrens /* 1236fa9e4066Sahrens * Process the begin in syncing context. 1237fa9e4066Sahrens */ 1238fa9e4066Sahrens if (drrb->drr_fromguid) { 1239fa9e4066Sahrens /* incremental backup */ 12401d452cf5Sahrens dsl_dataset_t *ds = NULL; 1241fa9e4066Sahrens 1242fa9e4066Sahrens cp = strchr(tosnap, '@'); 1243fa9e4066Sahrens *cp = '\0'; 12441d452cf5Sahrens ra.err = dsl_dataset_open(tosnap, DS_MODE_EXCLUSIVE, FTAG, &ds); 1245fa9e4066Sahrens *cp = '@'; 1246ea8dc4b6Seschrock if (ra.err) 1247fa9e4066Sahrens goto out; 1248fa9e4066Sahrens 124998579b20Snd150628 /* 125098579b20Snd150628 * Only do the rollback if the most recent snapshot 125198579b20Snd150628 * matches the incremental source 125298579b20Snd150628 */ 125398579b20Snd150628 if (force) { 125498579b20Snd150628 if (ds->ds_prev->ds_phys->ds_guid != 125598579b20Snd150628 drrb->drr_fromguid) { 125698579b20Snd150628 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 125798579b20Snd150628 return (ENODEV); 125898579b20Snd150628 } 125998579b20Snd150628 (void) dsl_dataset_rollback(ds); 126098579b20Snd150628 } 12611d452cf5Sahrens ra.err = dsl_sync_task_do(ds->ds_dir->dd_pool, 12621d452cf5Sahrens replay_incremental_check, replay_incremental_sync, 12631d452cf5Sahrens ds, drrb, 1); 12641d452cf5Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1265fa9e4066Sahrens } else { 1266fa9e4066Sahrens /* full backup */ 12671d452cf5Sahrens dsl_dir_t *dd = NULL; 1268fa9e4066Sahrens const char *tail; 1269fa9e4066Sahrens 12701d452cf5Sahrens /* can't restore full backup into topmost fs, for now */ 12711d452cf5Sahrens if (strrchr(drrb->drr_toname, '/') == NULL) { 12721d452cf5Sahrens ra.err = EINVAL; 12731d452cf5Sahrens goto out; 12741d452cf5Sahrens } 12751d452cf5Sahrens 1276fa9e4066Sahrens cp = strchr(tosnap, '@'); 1277fa9e4066Sahrens *cp = '\0'; 1278ea8dc4b6Seschrock ra.err = dsl_dir_open(tosnap, FTAG, &dd, &tail); 1279fa9e4066Sahrens *cp = '@'; 1280ea8dc4b6Seschrock if (ra.err) 1281fa9e4066Sahrens goto out; 1282fa9e4066Sahrens if (tail == NULL) { 1283fa9e4066Sahrens ra.err = EEXIST; 1284fa9e4066Sahrens goto out; 1285fa9e4066Sahrens } 1286fa9e4066Sahrens 12871d452cf5Sahrens ra.err = dsl_sync_task_do(dd->dd_pool, replay_full_check, 12881d452cf5Sahrens replay_full_sync, dd, drrb, 5); 12891d452cf5Sahrens dsl_dir_close(dd, FTAG); 1290fa9e4066Sahrens } 1291fa9e4066Sahrens if (ra.err) 1292fa9e4066Sahrens goto out; 1293fa9e4066Sahrens 1294fa9e4066Sahrens /* 1295fa9e4066Sahrens * Open the objset we are modifying. 1296fa9e4066Sahrens */ 1297fa9e4066Sahrens 1298fa9e4066Sahrens cp = strchr(tosnap, '@'); 1299fa9e4066Sahrens *cp = '\0'; 1300fa9e4066Sahrens ra.err = dmu_objset_open(tosnap, DMU_OST_ANY, 1301e1930233Sbonwick DS_MODE_PRIMARY | DS_MODE_INCONSISTENT, &os); 1302fa9e4066Sahrens *cp = '@'; 1303fa9e4066Sahrens ASSERT3U(ra.err, ==, 0); 1304fa9e4066Sahrens 1305fa9e4066Sahrens /* 1306fa9e4066Sahrens * Read records and process them. 1307fa9e4066Sahrens */ 1308ea8dc4b6Seschrock pzc = ra.zc; 1309fa9e4066Sahrens while (ra.err == 0 && 1310fa9e4066Sahrens NULL != (drr = restore_read(&ra, sizeof (*drr)))) { 1311ea8dc4b6Seschrock if (issig(JUSTLOOKING) && issig(FORREAL)) { 1312fa9e4066Sahrens ra.err = EINTR; 1313fa9e4066Sahrens goto out; 1314fa9e4066Sahrens } 1315fa9e4066Sahrens 1316fa9e4066Sahrens if (ra.byteswap) 1317fa9e4066Sahrens backup_byteswap(drr); 1318fa9e4066Sahrens 1319fa9e4066Sahrens switch (drr->drr_type) { 1320fa9e4066Sahrens case DRR_OBJECT: 1321fa9e4066Sahrens { 1322fa9e4066Sahrens /* 1323fa9e4066Sahrens * We need to make a copy of the record header, 1324fa9e4066Sahrens * because restore_{object,write} may need to 1325fa9e4066Sahrens * restore_read(), which will invalidate drr. 1326fa9e4066Sahrens */ 1327fa9e4066Sahrens struct drr_object drro = drr->drr_u.drr_object; 1328fa9e4066Sahrens ra.err = restore_object(&ra, os, &drro); 1329fa9e4066Sahrens break; 1330fa9e4066Sahrens } 1331fa9e4066Sahrens case DRR_FREEOBJECTS: 1332fa9e4066Sahrens { 1333fa9e4066Sahrens struct drr_freeobjects drrfo = 1334fa9e4066Sahrens drr->drr_u.drr_freeobjects; 1335fa9e4066Sahrens ra.err = restore_freeobjects(&ra, os, &drrfo); 1336fa9e4066Sahrens break; 1337fa9e4066Sahrens } 1338fa9e4066Sahrens case DRR_WRITE: 1339fa9e4066Sahrens { 1340fa9e4066Sahrens struct drr_write drrw = drr->drr_u.drr_write; 1341fa9e4066Sahrens ra.err = restore_write(&ra, os, &drrw); 1342fa9e4066Sahrens break; 1343fa9e4066Sahrens } 1344fa9e4066Sahrens case DRR_FREE: 1345fa9e4066Sahrens { 1346fa9e4066Sahrens struct drr_free drrf = drr->drr_u.drr_free; 1347fa9e4066Sahrens ra.err = restore_free(&ra, os, &drrf); 1348fa9e4066Sahrens break; 1349fa9e4066Sahrens } 1350fa9e4066Sahrens case DRR_END: 1351ea8dc4b6Seschrock { 1352ea8dc4b6Seschrock struct drr_end drre = drr->drr_u.drr_end; 1353ea8dc4b6Seschrock /* 1354ea8dc4b6Seschrock * We compare against the *previous* checksum 1355ea8dc4b6Seschrock * value, because the stored checksum is of 1356ea8dc4b6Seschrock * everything before the DRR_END record. 1357ea8dc4b6Seschrock */ 1358ea8dc4b6Seschrock if (drre.drr_checksum.zc_word[0] != 0 && 1359ea8dc4b6Seschrock ((drre.drr_checksum.zc_word[0] - pzc.zc_word[0]) | 1360ea8dc4b6Seschrock (drre.drr_checksum.zc_word[1] - pzc.zc_word[1]) | 1361ea8dc4b6Seschrock (drre.drr_checksum.zc_word[2] - pzc.zc_word[2]) | 1362ea8dc4b6Seschrock (drre.drr_checksum.zc_word[3] - pzc.zc_word[3]))) { 1363ea8dc4b6Seschrock ra.err = ECKSUM; 1364ea8dc4b6Seschrock goto out; 1365ea8dc4b6Seschrock } 1366ea8dc4b6Seschrock 13671d452cf5Sahrens ra.err = dsl_sync_task_do(dmu_objset_ds(os)-> 13681d452cf5Sahrens ds_dir->dd_pool, replay_end_check, replay_end_sync, 13691d452cf5Sahrens os, drrb, 3); 1370fa9e4066Sahrens goto out; 1371ea8dc4b6Seschrock } 1372fa9e4066Sahrens default: 1373fa9e4066Sahrens ra.err = EINVAL; 1374fa9e4066Sahrens goto out; 1375fa9e4066Sahrens } 1376ea8dc4b6Seschrock pzc = ra.zc; 1377fa9e4066Sahrens } 1378fa9e4066Sahrens 1379fa9e4066Sahrens out: 1380fa9e4066Sahrens if (os) 1381fa9e4066Sahrens dmu_objset_close(os); 1382fa9e4066Sahrens 1383fa9e4066Sahrens /* 1384fa9e4066Sahrens * Make sure we don't rollback/destroy unless we actually 1385fa9e4066Sahrens * processed the begin properly. 'os' will only be set if this 1386fa9e4066Sahrens * is the case. 1387fa9e4066Sahrens */ 13881d452cf5Sahrens if (ra.err && os && tosnap && strchr(tosnap, '@')) { 1389fa9e4066Sahrens /* 1390fa9e4066Sahrens * rollback or destroy what we created, so we don't 1391fa9e4066Sahrens * leave it in the restoring state. 1392fa9e4066Sahrens */ 13931d452cf5Sahrens dsl_dataset_t *ds; 13941d452cf5Sahrens int err; 13951d452cf5Sahrens 1396fa9e4066Sahrens cp = strchr(tosnap, '@'); 1397fa9e4066Sahrens *cp = '\0'; 13981d452cf5Sahrens err = dsl_dataset_open(tosnap, 13991d452cf5Sahrens DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, 14001d452cf5Sahrens FTAG, &ds); 14011d452cf5Sahrens if (err == 0) { 14021d452cf5Sahrens txg_wait_synced(ds->ds_dir->dd_pool, 0); 14031d452cf5Sahrens if (drrb->drr_fromguid) { 14041d452cf5Sahrens /* incremental: rollback to most recent snap */ 14051d452cf5Sahrens (void) dsl_dataset_rollback(ds); 14061d452cf5Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 14071d452cf5Sahrens } else { 14081d452cf5Sahrens /* full: destroy whole fs */ 14091d452cf5Sahrens dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 14101d452cf5Sahrens (void) dsl_dataset_destroy(tosnap); 1411fa9e4066Sahrens } 14121d452cf5Sahrens } 1413fa9e4066Sahrens *cp = '@'; 1414fa9e4066Sahrens } 1415fa9e4066Sahrens 1416fa9e4066Sahrens kmem_free(ra.buf, ra.bufsize); 1417fa9e4066Sahrens if (sizep) 1418fa9e4066Sahrens *sizep = ra.voff; 1419fa9e4066Sahrens return (ra.err); 1420fa9e4066Sahrens } 1421fa9e4066Sahrens 1422c5c6ffa0Smaybee typedef struct { 1423c5c6ffa0Smaybee uint64_t txg; 1424c5c6ffa0Smaybee dmu_buf_impl_t *db; 1425c5c6ffa0Smaybee dmu_sync_cb_t *done; 1426c5c6ffa0Smaybee void *arg; 1427c5c6ffa0Smaybee } dmu_sync_cbin_t; 1428c5c6ffa0Smaybee 1429c5c6ffa0Smaybee typedef union { 1430c5c6ffa0Smaybee dmu_sync_cbin_t data; 1431c5c6ffa0Smaybee blkptr_t blk; 1432c5c6ffa0Smaybee } dmu_sync_cbarg_t; 1433c5c6ffa0Smaybee 1434c5c6ffa0Smaybee /* ARGSUSED */ 1435c5c6ffa0Smaybee static void 1436c5c6ffa0Smaybee dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) 1437c5c6ffa0Smaybee { 1438c5c6ffa0Smaybee dmu_sync_cbin_t *in = (dmu_sync_cbin_t *)varg; 1439c5c6ffa0Smaybee dmu_buf_impl_t *db = in->db; 1440c5c6ffa0Smaybee uint64_t txg = in->txg; 1441c5c6ffa0Smaybee dmu_sync_cb_t *done = in->done; 1442c5c6ffa0Smaybee void *arg = in->arg; 1443c5c6ffa0Smaybee blkptr_t *blk = (blkptr_t *)varg; 1444c5c6ffa0Smaybee 1445c5c6ffa0Smaybee if (!BP_IS_HOLE(zio->io_bp)) { 1446c5c6ffa0Smaybee zio->io_bp->blk_fill = 1; 1447c5c6ffa0Smaybee BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type); 1448c5c6ffa0Smaybee BP_SET_LEVEL(zio->io_bp, 0); 1449c5c6ffa0Smaybee } 1450c5c6ffa0Smaybee 1451c5c6ffa0Smaybee *blk = *zio->io_bp; /* structure assignment */ 1452c5c6ffa0Smaybee 1453c5c6ffa0Smaybee mutex_enter(&db->db_mtx); 1454c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == IN_DMU_SYNC); 1455c5c6ffa0Smaybee db->db_d.db_overridden_by[txg&TXG_MASK] = blk; 1456c5c6ffa0Smaybee cv_broadcast(&db->db_changed); 1457c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 1458c5c6ffa0Smaybee 1459c5c6ffa0Smaybee if (done) 1460c5c6ffa0Smaybee done(&(db->db), arg); 1461c5c6ffa0Smaybee } 1462c5c6ffa0Smaybee 1463fa9e4066Sahrens /* 1464c5c6ffa0Smaybee * Intent log support: sync the block associated with db to disk. 1465c5c6ffa0Smaybee * N.B. and XXX: the caller is responsible for making sure that the 1466c5c6ffa0Smaybee * data isn't changing while dmu_sync() is writing it. 1467fa9e4066Sahrens * 1468fa9e4066Sahrens * Return values: 1469fa9e4066Sahrens * 1470c5c6ffa0Smaybee * EEXIST: this txg has already been synced, so there's nothing to to. 1471fa9e4066Sahrens * The caller should not log the write. 1472fa9e4066Sahrens * 1473fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 1474fa9e4066Sahrens * The caller should not log the write. 1475fa9e4066Sahrens * 1476c5c6ffa0Smaybee * EALREADY: this block is already in the process of being synced. 1477c5c6ffa0Smaybee * The caller should track its progress (somehow). 1478fa9e4066Sahrens * 1479c5c6ffa0Smaybee * EINPROGRESS: the IO has been initiated. 1480c5c6ffa0Smaybee * The caller should log this blkptr in the callback. 1481fa9e4066Sahrens * 1482c5c6ffa0Smaybee * 0: completed. Sets *bp to the blkptr just written. 1483c5c6ffa0Smaybee * The caller should log this blkptr immediately. 1484fa9e4066Sahrens */ 1485fa9e4066Sahrens int 1486c5c6ffa0Smaybee dmu_sync(zio_t *pio, dmu_buf_t *db_fake, 1487c5c6ffa0Smaybee blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) 1488fa9e4066Sahrens { 1489c5c6ffa0Smaybee dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 1490c5c6ffa0Smaybee objset_impl_t *os = db->db_objset; 1491c5c6ffa0Smaybee dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 1492fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 1493c5c6ffa0Smaybee dmu_sync_cbin_t *in; 1494fa9e4066Sahrens blkptr_t *blk; 1495ea8dc4b6Seschrock zbookmark_t zb; 1496c5c6ffa0Smaybee uint32_t arc_flag; 1497c5c6ffa0Smaybee int err; 1498fa9e4066Sahrens 1499fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 1500fa9e4066Sahrens ASSERT(txg != 0); 1501fa9e4066Sahrens 1502c5c6ffa0Smaybee 1503fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 1504fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 1505fa9e4066Sahrens 1506fa9e4066Sahrens /* 1507c5c6ffa0Smaybee * XXX - would be nice if we could do this without suspending... 1508ea8dc4b6Seschrock */ 1509c5c6ffa0Smaybee txg_suspend(dp); 1510ea8dc4b6Seschrock 1511ea8dc4b6Seschrock /* 1512fa9e4066Sahrens * If this txg already synced, there's nothing to do. 1513fa9e4066Sahrens */ 1514fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 1515c5c6ffa0Smaybee txg_resume(dp); 1516fa9e4066Sahrens /* 1517fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 1518fa9e4066Sahrens */ 1519fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 1520fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 1521fa9e4066Sahrens if (db->db_blkptr) 1522fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 1523fa9e4066Sahrens return (0); 1524fa9e4066Sahrens } 1525c5c6ffa0Smaybee return (EEXIST); 1526fa9e4066Sahrens } 1527fa9e4066Sahrens 1528fa9e4066Sahrens mutex_enter(&db->db_mtx); 1529fa9e4066Sahrens 1530c5c6ffa0Smaybee blk = db->db_d.db_overridden_by[txg&TXG_MASK]; 1531c5c6ffa0Smaybee if (blk == IN_DMU_SYNC) { 1532fa9e4066Sahrens /* 1533c5c6ffa0Smaybee * We have already issued a sync write for this buffer. 1534c5c6ffa0Smaybee */ 1535c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 1536c5c6ffa0Smaybee txg_resume(dp); 1537c5c6ffa0Smaybee return (EALREADY); 1538c5c6ffa0Smaybee } else if (blk != NULL) { 1539c5c6ffa0Smaybee /* 1540c5c6ffa0Smaybee * This buffer had already been synced. It could not 1541c5c6ffa0Smaybee * have been dirtied since, or we would have cleared blk. 1542c5c6ffa0Smaybee */ 1543c5c6ffa0Smaybee *bp = *blk; /* structure assignment */ 1544c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 1545c5c6ffa0Smaybee txg_resume(dp); 1546c5c6ffa0Smaybee return (0); 1547c5c6ffa0Smaybee } 1548c5c6ffa0Smaybee 1549c5c6ffa0Smaybee if (txg == tx->tx_syncing_txg) { 1550c5c6ffa0Smaybee while (db->db_data_pending) { 1551c5c6ffa0Smaybee /* 1552c5c6ffa0Smaybee * IO is in-progress. Wait for it to finish. 1553c5c6ffa0Smaybee * XXX - would be nice to be able to somehow "attach" 1554c5c6ffa0Smaybee * this zio to the parent zio passed in. 1555c5c6ffa0Smaybee */ 1556c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 155713506d1eSmaybee if (!db->db_data_pending && 155813506d1eSmaybee db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { 155913506d1eSmaybee /* 156013506d1eSmaybee * IO was compressed away 156113506d1eSmaybee */ 156213506d1eSmaybee *bp = *db->db_blkptr; /* structure assignment */ 156313506d1eSmaybee mutex_exit(&db->db_mtx); 156413506d1eSmaybee txg_resume(dp); 156513506d1eSmaybee return (0); 156613506d1eSmaybee } 1567c5c6ffa0Smaybee ASSERT(db->db_data_pending || 1568c5c6ffa0Smaybee (db->db_blkptr && db->db_blkptr->blk_birth == txg)); 1569c5c6ffa0Smaybee } 1570c5c6ffa0Smaybee 1571c5c6ffa0Smaybee if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { 1572c5c6ffa0Smaybee /* 1573c5c6ffa0Smaybee * IO is already completed. 1574c5c6ffa0Smaybee */ 1575c5c6ffa0Smaybee *bp = *db->db_blkptr; /* structure assignment */ 1576c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 1577c5c6ffa0Smaybee txg_resume(dp); 1578c5c6ffa0Smaybee return (0); 1579c5c6ffa0Smaybee } 1580c5c6ffa0Smaybee } 1581c5c6ffa0Smaybee 1582c5c6ffa0Smaybee if (db->db_d.db_data_old[txg&TXG_MASK] == NULL) { 1583c5c6ffa0Smaybee /* 1584c5c6ffa0Smaybee * This dbuf isn't dirty, must have been free_range'd. 1585fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 1586fa9e4066Sahrens */ 1587fa9e4066Sahrens mutex_exit(&db->db_mtx); 1588c5c6ffa0Smaybee txg_resume(dp); 1589fa9e4066Sahrens return (ENOENT); 1590fa9e4066Sahrens } 1591fa9e4066Sahrens 1592c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == NULL); 1593fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; 1594c5c6ffa0Smaybee /* 1595c5c6ffa0Smaybee * XXX - a little ugly to stash the blkptr in the callback 1596c5c6ffa0Smaybee * buffer. We always need to make sure the following is true: 1597c5c6ffa0Smaybee * ASSERT(sizeof(blkptr_t) >= sizeof(dmu_sync_cbin_t)); 1598c5c6ffa0Smaybee */ 1599c5c6ffa0Smaybee in = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); 1600c5c6ffa0Smaybee in->db = db; 1601c5c6ffa0Smaybee in->txg = txg; 1602c5c6ffa0Smaybee in->done = done; 1603c5c6ffa0Smaybee in->arg = arg; 1604fa9e4066Sahrens mutex_exit(&db->db_mtx); 1605c5c6ffa0Smaybee txg_resume(dp); 1606fa9e4066Sahrens 1607c5c6ffa0Smaybee arc_flag = pio == NULL ? ARC_WAIT : ARC_NOWAIT; 1608c5c6ffa0Smaybee zb.zb_objset = os->os_dsl_dataset->ds_object; 1609ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 1610ea8dc4b6Seschrock zb.zb_level = db->db_level; 1611ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 1612c5c6ffa0Smaybee err = arc_write(pio, os->os_spa, 1613c5c6ffa0Smaybee zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum), 1614c5c6ffa0Smaybee zio_compress_select(db->db_dnode->dn_compress, os->os_compress), 1615c5c6ffa0Smaybee dmu_get_replication_level(os->os_spa, &zb, db->db_dnode->dn_type), 1616c5c6ffa0Smaybee txg, bp, db->db_d.db_data_old[txg&TXG_MASK], dmu_sync_done, in, 1617c5c6ffa0Smaybee ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, arc_flag, &zb); 1618fa9e4066Sahrens ASSERT(err == 0); 1619fa9e4066Sahrens 1620c5c6ffa0Smaybee return (arc_flag == ARC_NOWAIT ? EINPROGRESS : 0); 1621fa9e4066Sahrens } 1622fa9e4066Sahrens 1623fa9e4066Sahrens uint64_t 1624fa9e4066Sahrens dmu_object_max_nonzero_offset(objset_t *os, uint64_t object) 1625fa9e4066Sahrens { 1626ea8dc4b6Seschrock dnode_t *dn; 1627ea8dc4b6Seschrock 1628ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1629ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1630fa9e4066Sahrens uint64_t rv = dnode_max_nonzero_offset(dn); 1631fa9e4066Sahrens dnode_rele(dn, FTAG); 1632fa9e4066Sahrens return (rv); 1633fa9e4066Sahrens } 1634fa9e4066Sahrens 1635fa9e4066Sahrens int 1636fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 1637fa9e4066Sahrens dmu_tx_t *tx) 1638fa9e4066Sahrens { 1639ea8dc4b6Seschrock dnode_t *dn; 1640ea8dc4b6Seschrock int err; 1641ea8dc4b6Seschrock 1642ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1643ea8dc4b6Seschrock if (err) 1644ea8dc4b6Seschrock return (err); 1645ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 1646fa9e4066Sahrens dnode_rele(dn, FTAG); 1647fa9e4066Sahrens return (err); 1648fa9e4066Sahrens } 1649fa9e4066Sahrens 1650fa9e4066Sahrens void 1651fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 1652fa9e4066Sahrens dmu_tx_t *tx) 1653fa9e4066Sahrens { 1654ea8dc4b6Seschrock dnode_t *dn; 1655ea8dc4b6Seschrock 1656ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1657ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1658fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 1659fa9e4066Sahrens dn->dn_checksum = checksum; 1660fa9e4066Sahrens dnode_setdirty(dn, tx); 1661fa9e4066Sahrens dnode_rele(dn, FTAG); 1662fa9e4066Sahrens } 1663fa9e4066Sahrens 1664fa9e4066Sahrens void 1665fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 1666fa9e4066Sahrens dmu_tx_t *tx) 1667fa9e4066Sahrens { 1668ea8dc4b6Seschrock dnode_t *dn; 1669ea8dc4b6Seschrock 1670ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1671ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1672fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 1673fa9e4066Sahrens dn->dn_compress = compress; 1674fa9e4066Sahrens dnode_setdirty(dn, tx); 1675fa9e4066Sahrens dnode_rele(dn, FTAG); 1676fa9e4066Sahrens } 1677fa9e4066Sahrens 167844cd46caSbillm /* 167944cd46caSbillm * XXX - eventually, this should take into account per-dataset (or 168044cd46caSbillm * even per-object?) user requests for higher levels of replication. 168144cd46caSbillm */ 168244cd46caSbillm int 168344cd46caSbillm dmu_get_replication_level(spa_t *spa, zbookmark_t *zb, dmu_object_type_t ot) 168444cd46caSbillm { 168544cd46caSbillm int ncopies = 1; 168644cd46caSbillm 168744cd46caSbillm if (dmu_ot[ot].ot_metadata) 168844cd46caSbillm ncopies++; 168944cd46caSbillm if (zb->zb_level != 0) 169044cd46caSbillm ncopies++; 169144cd46caSbillm if (zb->zb_objset == 0 && zb->zb_object == 0) 169244cd46caSbillm ncopies++; 169344cd46caSbillm return (MIN(ncopies, spa_max_replication(spa))); 169444cd46caSbillm } 169544cd46caSbillm 1696fa9e4066Sahrens int 1697fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 1698fa9e4066Sahrens { 1699fa9e4066Sahrens dnode_t *dn; 1700fa9e4066Sahrens int i, err; 1701fa9e4066Sahrens 1702ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1703ea8dc4b6Seschrock if (err) 1704ea8dc4b6Seschrock return (err); 1705fa9e4066Sahrens /* 1706fa9e4066Sahrens * Sync any current changes before 1707fa9e4066Sahrens * we go trundling through the block pointers. 1708fa9e4066Sahrens */ 1709fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 1710c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 1711fa9e4066Sahrens break; 1712fa9e4066Sahrens } 1713fa9e4066Sahrens if (i != TXG_SIZE) { 1714fa9e4066Sahrens dnode_rele(dn, FTAG); 1715fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 1716ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1717ea8dc4b6Seschrock if (err) 1718ea8dc4b6Seschrock return (err); 1719fa9e4066Sahrens } 1720fa9e4066Sahrens 1721fa9e4066Sahrens err = dnode_next_offset(dn, hole, off, 1, 1); 1722fa9e4066Sahrens dnode_rele(dn, FTAG); 1723fa9e4066Sahrens 1724fa9e4066Sahrens return (err); 1725fa9e4066Sahrens } 1726fa9e4066Sahrens 1727fa9e4066Sahrens void 1728fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 1729fa9e4066Sahrens { 1730fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1731fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1732fa9e4066Sahrens 1733fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 1734fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 1735fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 1736fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 1737fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 1738fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 173999653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 174099653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 1741fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 1742fa9e4066Sahrens doi->doi_type = dn->dn_type; 1743fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 1744fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 1745fa9e4066Sahrens 1746fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1747fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1748fa9e4066Sahrens } 1749fa9e4066Sahrens 1750fa9e4066Sahrens /* 1751fa9e4066Sahrens * Get information on a DMU object. 1752fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 1753fa9e4066Sahrens */ 1754fa9e4066Sahrens int 1755fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 1756fa9e4066Sahrens { 1757ea8dc4b6Seschrock dnode_t *dn; 1758ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 1759fa9e4066Sahrens 1760ea8dc4b6Seschrock if (err) 1761ea8dc4b6Seschrock return (err); 1762fa9e4066Sahrens 1763fa9e4066Sahrens if (doi != NULL) 1764fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 1765fa9e4066Sahrens 1766fa9e4066Sahrens dnode_rele(dn, FTAG); 1767fa9e4066Sahrens return (0); 1768fa9e4066Sahrens } 1769fa9e4066Sahrens 1770fa9e4066Sahrens /* 1771fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 1772fa9e4066Sahrens */ 1773fa9e4066Sahrens void 1774fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 1775fa9e4066Sahrens { 1776fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 1777fa9e4066Sahrens } 1778fa9e4066Sahrens 1779fa9e4066Sahrens /* 1780fa9e4066Sahrens * Faster still when you only care about the size. 1781fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 1782fa9e4066Sahrens */ 1783fa9e4066Sahrens void 1784fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 1785fa9e4066Sahrens { 1786fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 1787fa9e4066Sahrens 1788fa9e4066Sahrens *blksize = dn->dn_datablksz; 178999653d4eSeschrock /* add 1 for dnode space */ 179099653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 179199653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 1792fa9e4066Sahrens } 1793fa9e4066Sahrens 1794ea8dc4b6Seschrock /* 1795ea8dc4b6Seschrock * Given a bookmark, return the name of the dataset, object, and range in 1796ea8dc4b6Seschrock * human-readable format. 1797ea8dc4b6Seschrock */ 1798ea8dc4b6Seschrock int 1799*e9dbad6fSeschrock spa_bookmark_name(spa_t *spa, zbookmark_t *zb, nvlist_t *nvl) 1800ea8dc4b6Seschrock { 1801ea8dc4b6Seschrock dsl_pool_t *dp; 1802ea8dc4b6Seschrock dsl_dataset_t *ds = NULL; 1803ea8dc4b6Seschrock objset_t *os = NULL; 1804ea8dc4b6Seschrock dnode_t *dn = NULL; 1805ea8dc4b6Seschrock int err, shift; 1806*e9dbad6fSeschrock char dsname[MAXNAMELEN]; 1807*e9dbad6fSeschrock char objname[32]; 1808*e9dbad6fSeschrock char range[64]; 1809ea8dc4b6Seschrock 1810ea8dc4b6Seschrock dp = spa_get_dsl(spa); 1811ea8dc4b6Seschrock if (zb->zb_objset != 0) { 1812ea8dc4b6Seschrock rw_enter(&dp->dp_config_rwlock, RW_READER); 1813ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, zb->zb_objset, 1814ea8dc4b6Seschrock NULL, DS_MODE_NONE, FTAG, &ds); 1815ea8dc4b6Seschrock if (err) { 1816ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 1817ea8dc4b6Seschrock return (err); 1818ea8dc4b6Seschrock } 1819ea8dc4b6Seschrock dsl_dataset_name(ds, dsname); 1820ea8dc4b6Seschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1821ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 1822ea8dc4b6Seschrock 1823ea8dc4b6Seschrock err = dmu_objset_open(dsname, DMU_OST_ANY, DS_MODE_NONE, &os); 1824ea8dc4b6Seschrock if (err) 1825ea8dc4b6Seschrock goto out; 1826ea8dc4b6Seschrock 1827ea8dc4b6Seschrock } else { 1828ea8dc4b6Seschrock dsl_dataset_name(NULL, dsname); 1829ea8dc4b6Seschrock os = dp->dp_meta_objset; 1830ea8dc4b6Seschrock } 1831ea8dc4b6Seschrock 1832ea8dc4b6Seschrock 1833ea8dc4b6Seschrock if (zb->zb_object == DMU_META_DNODE_OBJECT) { 1834*e9dbad6fSeschrock (void) strncpy(objname, "mdn", sizeof (objname)); 1835ea8dc4b6Seschrock } else { 1836*e9dbad6fSeschrock (void) snprintf(objname, sizeof (objname), "%lld", 1837ea8dc4b6Seschrock (longlong_t)zb->zb_object); 1838ea8dc4b6Seschrock } 1839ea8dc4b6Seschrock 1840ea8dc4b6Seschrock err = dnode_hold(os->os, zb->zb_object, FTAG, &dn); 1841ea8dc4b6Seschrock if (err) 1842ea8dc4b6Seschrock goto out; 1843ea8dc4b6Seschrock 1844ea8dc4b6Seschrock shift = (dn->dn_datablkshift?dn->dn_datablkshift:SPA_MAXBLOCKSHIFT) + 1845ea8dc4b6Seschrock zb->zb_level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT); 1846*e9dbad6fSeschrock (void) snprintf(range, sizeof (range), "%llu-%llu", 1847ea8dc4b6Seschrock (u_longlong_t)(zb->zb_blkid << shift), 1848ea8dc4b6Seschrock (u_longlong_t)((zb->zb_blkid+1) << shift)); 1849ea8dc4b6Seschrock 1850*e9dbad6fSeschrock if ((err = nvlist_add_string(nvl, ZPOOL_ERR_DATASET, dsname)) != 0 || 1851*e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_OBJECT, objname)) != 0 || 1852*e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_RANGE, range)) != 0) 1853*e9dbad6fSeschrock goto out; 1854*e9dbad6fSeschrock 1855ea8dc4b6Seschrock out: 1856ea8dc4b6Seschrock if (dn) 1857ea8dc4b6Seschrock dnode_rele(dn, FTAG); 1858ea8dc4b6Seschrock if (os && os != dp->dp_meta_objset) 1859ea8dc4b6Seschrock dmu_objset_close(os); 1860ea8dc4b6Seschrock return (err); 1861ea8dc4b6Seschrock } 1862ea8dc4b6Seschrock 1863fa9e4066Sahrens void 1864fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 1865fa9e4066Sahrens { 1866fa9e4066Sahrens uint64_t *buf = vbuf; 1867fa9e4066Sahrens size_t count = size >> 3; 1868fa9e4066Sahrens int i; 1869fa9e4066Sahrens 1870fa9e4066Sahrens ASSERT((size & 7) == 0); 1871fa9e4066Sahrens 1872fa9e4066Sahrens for (i = 0; i < count; i++) 1873fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1874fa9e4066Sahrens } 1875fa9e4066Sahrens 1876fa9e4066Sahrens void 1877fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1878fa9e4066Sahrens { 1879fa9e4066Sahrens uint32_t *buf = vbuf; 1880fa9e4066Sahrens size_t count = size >> 2; 1881fa9e4066Sahrens int i; 1882fa9e4066Sahrens 1883fa9e4066Sahrens ASSERT((size & 3) == 0); 1884fa9e4066Sahrens 1885fa9e4066Sahrens for (i = 0; i < count; i++) 1886fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1887fa9e4066Sahrens } 1888fa9e4066Sahrens 1889fa9e4066Sahrens void 1890fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1891fa9e4066Sahrens { 1892fa9e4066Sahrens uint16_t *buf = vbuf; 1893fa9e4066Sahrens size_t count = size >> 1; 1894fa9e4066Sahrens int i; 1895fa9e4066Sahrens 1896fa9e4066Sahrens ASSERT((size & 1) == 0); 1897fa9e4066Sahrens 1898fa9e4066Sahrens for (i = 0; i < count; i++) 1899fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1900fa9e4066Sahrens } 1901fa9e4066Sahrens 1902fa9e4066Sahrens /* ARGSUSED */ 1903fa9e4066Sahrens void 1904fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1905fa9e4066Sahrens { 1906fa9e4066Sahrens } 1907fa9e4066Sahrens 1908fa9e4066Sahrens void 1909fa9e4066Sahrens dmu_init(void) 1910fa9e4066Sahrens { 1911fa9e4066Sahrens dbuf_init(); 1912fa9e4066Sahrens dnode_init(); 1913fa9e4066Sahrens arc_init(); 1914fa9e4066Sahrens } 1915fa9e4066Sahrens 1916fa9e4066Sahrens void 1917fa9e4066Sahrens dmu_fini(void) 1918fa9e4066Sahrens { 1919fa9e4066Sahrens arc_fini(); 1920fa9e4066Sahrens dnode_fini(); 1921fa9e4066Sahrens dbuf_fini(); 1922fa9e4066Sahrens } 1923