1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2214843421SMatthew Ahrens * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/dmu.h> 27fa9e4066Sahrens #include <sys/dmu_impl.h> 28fa9e4066Sahrens #include <sys/dmu_tx.h> 29fa9e4066Sahrens #include <sys/dbuf.h> 30fa9e4066Sahrens #include <sys/dnode.h> 31fa9e4066Sahrens #include <sys/zfs_context.h> 32fa9e4066Sahrens #include <sys/dmu_objset.h> 33fa9e4066Sahrens #include <sys/dmu_traverse.h> 34fa9e4066Sahrens #include <sys/dsl_dataset.h> 35fa9e4066Sahrens #include <sys/dsl_dir.h> 36fa9e4066Sahrens #include <sys/dsl_pool.h> 371d452cf5Sahrens #include <sys/dsl_synctask.h> 38a2eea2e1Sahrens #include <sys/dsl_prop.h> 39fa9e4066Sahrens #include <sys/dmu_zfetch.h> 40fa9e4066Sahrens #include <sys/zfs_ioctl.h> 41fa9e4066Sahrens #include <sys/zap.h> 42ea8dc4b6Seschrock #include <sys/zio_checksum.h> 4344eda4d7Smaybee #ifdef _KERNEL 4444eda4d7Smaybee #include <sys/vmsystm.h> 450fab61baSJonathan W Adams #include <sys/zfs_znode.h> 4644eda4d7Smaybee #endif 47fa9e4066Sahrens 48fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 49fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 50fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 51fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 52fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 53fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 55fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 56fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 57fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 58fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 59fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 60fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 61fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 62fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 63fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 64fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 65fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 66fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 67da6c28aaSamw { zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" }, 68fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 69fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 70fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 71fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 72fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 73fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 74fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 75fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 76fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 77ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 7806eeb2adSek110237 { byteswap_uint8_array, TRUE, "SPA history" }, 7906eeb2adSek110237 { byteswap_uint64_array, TRUE, "SPA history offsets" }, 80b1b8ab34Slling { zap_byteswap, TRUE, "Pool properties" }, 81da6c28aaSamw { zap_byteswap, TRUE, "DSL permissions" }, 82da6c28aaSamw { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 83da6c28aaSamw { byteswap_uint8_array, TRUE, "ZFS SYSACL" }, 84da6c28aaSamw { byteswap_uint8_array, TRUE, "FUID table" }, 85add89791Smarks { byteswap_uint64_array, TRUE, "FUID table size" }, 86088f3894Sahrens { zap_byteswap, TRUE, "DSL dataset next clones"}, 87088f3894Sahrens { zap_byteswap, TRUE, "scrub work queue" }, 8814843421SMatthew Ahrens { zap_byteswap, TRUE, "ZFS user/group used" }, 8914843421SMatthew Ahrens { zap_byteswap, TRUE, "ZFS user/group quota" }, 90fa9e4066Sahrens }; 91fa9e4066Sahrens 92fa9e4066Sahrens int 93ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 94ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 95fa9e4066Sahrens { 96fa9e4066Sahrens dnode_t *dn; 97fa9e4066Sahrens uint64_t blkid; 98fa9e4066Sahrens dmu_buf_impl_t *db; 99ea8dc4b6Seschrock int err; 100fa9e4066Sahrens 101ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 102ea8dc4b6Seschrock if (err) 103ea8dc4b6Seschrock return (err); 104fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 105fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 106ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 107fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 108ea8dc4b6Seschrock if (db == NULL) { 109ea8dc4b6Seschrock err = EIO; 110ea8dc4b6Seschrock } else { 111ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 112ea8dc4b6Seschrock if (err) { 113ea8dc4b6Seschrock dbuf_rele(db, tag); 114ea8dc4b6Seschrock db = NULL; 115ea8dc4b6Seschrock } 116fa9e4066Sahrens } 117fa9e4066Sahrens 118ea8dc4b6Seschrock dnode_rele(dn, FTAG); 119ea8dc4b6Seschrock *dbp = &db->db; 120ea8dc4b6Seschrock return (err); 121fa9e4066Sahrens } 122fa9e4066Sahrens 123fa9e4066Sahrens int 124fa9e4066Sahrens dmu_bonus_max(void) 125fa9e4066Sahrens { 126fa9e4066Sahrens return (DN_MAX_BONUSLEN); 127fa9e4066Sahrens } 128fa9e4066Sahrens 1291934e92fSmaybee int 1301934e92fSmaybee dmu_set_bonus(dmu_buf_t *db, int newsize, dmu_tx_t *tx) 1311934e92fSmaybee { 1321934e92fSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 1331934e92fSmaybee 1341934e92fSmaybee if (dn->dn_bonus != (dmu_buf_impl_t *)db) 1351934e92fSmaybee return (EINVAL); 1361934e92fSmaybee if (newsize < 0 || newsize > db->db_size) 1371934e92fSmaybee return (EINVAL); 1381934e92fSmaybee dnode_setbonuslen(dn, newsize, tx); 1391934e92fSmaybee return (0); 1401934e92fSmaybee } 1411934e92fSmaybee 142fa9e4066Sahrens /* 143ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 144fa9e4066Sahrens */ 145ea8dc4b6Seschrock int 146ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 147fa9e4066Sahrens { 148ea8dc4b6Seschrock dnode_t *dn; 149fa9e4066Sahrens dmu_buf_impl_t *db; 1501934e92fSmaybee int error; 151fa9e4066Sahrens 1521934e92fSmaybee error = dnode_hold(os->os, object, FTAG, &dn); 1531934e92fSmaybee if (error) 1541934e92fSmaybee return (error); 155fa9e4066Sahrens 156fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 157ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 158fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 159ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 160ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 1611934e92fSmaybee dbuf_create_bonus(dn); 162fa9e4066Sahrens } 163ea8dc4b6Seschrock db = dn->dn_bonus; 164ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 1651934e92fSmaybee 1661934e92fSmaybee /* as long as the bonus buf is held, the dnode will be held */ 1671934e92fSmaybee if (refcount_add(&db->db_holds, tag) == 1) 1681934e92fSmaybee VERIFY(dnode_add_ref(dn, db)); 1691934e92fSmaybee 170fa9e4066Sahrens dnode_rele(dn, FTAG); 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 173ea8dc4b6Seschrock 174ea8dc4b6Seschrock *dbp = &db->db; 175ea8dc4b6Seschrock return (0); 176fa9e4066Sahrens } 177fa9e4066Sahrens 17813506d1eSmaybee /* 17913506d1eSmaybee * Note: longer-term, we should modify all of the dmu_buf_*() interfaces 18013506d1eSmaybee * to take a held dnode rather than <os, object> -- the lookup is wasteful, 18113506d1eSmaybee * and can induce severe lock contention when writing to several files 18213506d1eSmaybee * whose dnodes are in the same block. 18313506d1eSmaybee */ 18413506d1eSmaybee static int 18513506d1eSmaybee dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, 186ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 187fa9e4066Sahrens { 18805715f94SMark Maybee dsl_pool_t *dp = NULL; 189fa9e4066Sahrens dmu_buf_t **dbp; 190fa9e4066Sahrens uint64_t blkid, nblks, i; 191ea8dc4b6Seschrock uint32_t flags; 192ea8dc4b6Seschrock int err; 193ea8dc4b6Seschrock zio_t *zio; 19405715f94SMark Maybee hrtime_t start; 195ea8dc4b6Seschrock 196ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 197fa9e4066Sahrens 198ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 199e1930233Sbonwick if (length > zfetch_array_rd_sz) 200ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 201ea8dc4b6Seschrock 202fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 203fa9e4066Sahrens if (dn->dn_datablkshift) { 204fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 205fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 206fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 207fa9e4066Sahrens } else { 2080125049cSahrens if (offset + length > dn->dn_datablksz) { 2090125049cSahrens zfs_panic_recover("zfs: accessing past end of object " 2100125049cSahrens "%llx/%llx (size=%u access=%llu+%llu)", 2110125049cSahrens (longlong_t)dn->dn_objset-> 2120125049cSahrens os_dsl_dataset->ds_object, 2130125049cSahrens (longlong_t)dn->dn_object, dn->dn_datablksz, 2140125049cSahrens (longlong_t)offset, (longlong_t)length); 2150125049cSahrens return (EIO); 2160125049cSahrens } 217fa9e4066Sahrens nblks = 1; 218fa9e4066Sahrens } 219ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 220fa9e4066Sahrens 22105715f94SMark Maybee if (dn->dn_objset->os_dsl_dataset) 22205715f94SMark Maybee dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool; 22305715f94SMark Maybee if (dp && dsl_pool_sync_context(dp)) 22405715f94SMark Maybee start = gethrtime(); 225e14bb325SJeff Bonwick zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); 226fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 227fa9e4066Sahrens for (i = 0; i < nblks; i++) { 228ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 229ea8dc4b6Seschrock if (db == NULL) { 230ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 231ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 232ea8dc4b6Seschrock zio_nowait(zio); 233ea8dc4b6Seschrock return (EIO); 234ea8dc4b6Seschrock } 235ea8dc4b6Seschrock /* initiate async i/o */ 23613506d1eSmaybee if (read) { 237ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 238ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 239ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 240ea8dc4b6Seschrock } 241ea8dc4b6Seschrock dbp[i] = &db->db; 242fa9e4066Sahrens } 243fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 244fa9e4066Sahrens 245ea8dc4b6Seschrock /* wait for async i/o */ 246ea8dc4b6Seschrock err = zio_wait(zio); 24705715f94SMark Maybee /* track read overhead when we are in sync context */ 24805715f94SMark Maybee if (dp && dsl_pool_sync_context(dp)) 24905715f94SMark Maybee dp->dp_read_overhead += gethrtime() - start; 250ea8dc4b6Seschrock if (err) { 251ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 252ea8dc4b6Seschrock return (err); 253ea8dc4b6Seschrock } 254ea8dc4b6Seschrock 255ea8dc4b6Seschrock /* wait for other io to complete */ 256ea8dc4b6Seschrock if (read) { 257ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 258ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 259ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 260ea8dc4b6Seschrock while (db->db_state == DB_READ || 261ea8dc4b6Seschrock db->db_state == DB_FILL) 262ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 263ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 264ea8dc4b6Seschrock err = EIO; 265ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 266ea8dc4b6Seschrock if (err) { 267ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 268ea8dc4b6Seschrock return (err); 269ea8dc4b6Seschrock } 270ea8dc4b6Seschrock } 271ea8dc4b6Seschrock } 272ea8dc4b6Seschrock 273ea8dc4b6Seschrock *numbufsp = nblks; 274ea8dc4b6Seschrock *dbpp = dbp; 275ea8dc4b6Seschrock return (0); 276fa9e4066Sahrens } 277fa9e4066Sahrens 278a2eea2e1Sahrens static int 27913506d1eSmaybee dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 28013506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 28113506d1eSmaybee { 28213506d1eSmaybee dnode_t *dn; 28313506d1eSmaybee int err; 28413506d1eSmaybee 28513506d1eSmaybee err = dnode_hold(os->os, object, FTAG, &dn); 28613506d1eSmaybee if (err) 28713506d1eSmaybee return (err); 28813506d1eSmaybee 28913506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 29013506d1eSmaybee numbufsp, dbpp); 29113506d1eSmaybee 29213506d1eSmaybee dnode_rele(dn, FTAG); 29313506d1eSmaybee 29413506d1eSmaybee return (err); 29513506d1eSmaybee } 29613506d1eSmaybee 29713506d1eSmaybee int 29813506d1eSmaybee dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, 29913506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 30013506d1eSmaybee { 30113506d1eSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 30213506d1eSmaybee int err; 30313506d1eSmaybee 30413506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 30513506d1eSmaybee numbufsp, dbpp); 30613506d1eSmaybee 30713506d1eSmaybee return (err); 30813506d1eSmaybee } 30913506d1eSmaybee 310fa9e4066Sahrens void 311ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 312fa9e4066Sahrens { 313fa9e4066Sahrens int i; 314fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 315fa9e4066Sahrens 316fa9e4066Sahrens if (numbufs == 0) 317fa9e4066Sahrens return; 318fa9e4066Sahrens 319ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 320ea8dc4b6Seschrock if (dbp[i]) 321ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 322ea8dc4b6Seschrock } 323fa9e4066Sahrens 324fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 325fa9e4066Sahrens } 326fa9e4066Sahrens 327fa9e4066Sahrens void 328fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 329fa9e4066Sahrens { 330fa9e4066Sahrens dnode_t *dn; 331fa9e4066Sahrens uint64_t blkid; 332ea8dc4b6Seschrock int nblks, i, err; 333fa9e4066Sahrens 334416e0cd8Sek110237 if (zfs_prefetch_disable) 335416e0cd8Sek110237 return; 336416e0cd8Sek110237 337fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 338fa9e4066Sahrens dn = os->os->os_meta_dnode; 339fa9e4066Sahrens 340fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 341fa9e4066Sahrens return; 342fa9e4066Sahrens 343fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 344fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 345fa9e4066Sahrens dbuf_prefetch(dn, blkid); 346fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 347fa9e4066Sahrens return; 348fa9e4066Sahrens } 349fa9e4066Sahrens 350fa9e4066Sahrens /* 351fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 352fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 353fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 354fa9e4066Sahrens */ 355ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 356ea8dc4b6Seschrock if (err != 0) 357fa9e4066Sahrens return; 358fa9e4066Sahrens 359fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 360fa9e4066Sahrens if (dn->dn_datablkshift) { 361fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 362fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 363fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 364fa9e4066Sahrens } else { 365fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 366fa9e4066Sahrens } 367fa9e4066Sahrens 368fa9e4066Sahrens if (nblks != 0) { 369fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 370fa9e4066Sahrens for (i = 0; i < nblks; i++) 371fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 372fa9e4066Sahrens } 373fa9e4066Sahrens 374fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 375fa9e4066Sahrens 376fa9e4066Sahrens dnode_rele(dn, FTAG); 377fa9e4066Sahrens } 378fa9e4066Sahrens 379cdb0ab79Smaybee static int 380cdb0ab79Smaybee get_next_chunk(dnode_t *dn, uint64_t *offset, uint64_t limit) 381cdb0ab79Smaybee { 3821c8564a7SMark Maybee uint64_t len = *offset - limit; 383cdb0ab79Smaybee uint64_t chunk_len = dn->dn_datablksz * DMU_MAX_DELETEBLKCNT; 3841c8564a7SMark Maybee uint64_t subchunk = 3851c8564a7SMark Maybee dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT); 386cdb0ab79Smaybee 387cdb0ab79Smaybee ASSERT(limit <= *offset); 388cdb0ab79Smaybee 3891c8564a7SMark Maybee if (len <= chunk_len) { 390cdb0ab79Smaybee *offset = limit; 391cdb0ab79Smaybee return (0); 392cdb0ab79Smaybee } 393cdb0ab79Smaybee 3941c8564a7SMark Maybee ASSERT(ISP2(subchunk)); 3951c8564a7SMark Maybee 396cdb0ab79Smaybee while (*offset > limit) { 3971c8564a7SMark Maybee uint64_t initial_offset = P2ROUNDUP(*offset, subchunk); 398cdb0ab79Smaybee uint64_t delta; 3991c8564a7SMark Maybee int err; 400cdb0ab79Smaybee 401cdb0ab79Smaybee /* skip over allocated data */ 402cdb0ab79Smaybee err = dnode_next_offset(dn, 403cdb0ab79Smaybee DNODE_FIND_HOLE|DNODE_FIND_BACKWARDS, offset, 1, 1, 0); 404cdb0ab79Smaybee if (err == ESRCH) 405cdb0ab79Smaybee *offset = limit; 406cdb0ab79Smaybee else if (err) 407cdb0ab79Smaybee return (err); 408cdb0ab79Smaybee 409cdb0ab79Smaybee ASSERT3U(*offset, <=, initial_offset); 4101c8564a7SMark Maybee *offset = P2ALIGN(*offset, subchunk); 411cdb0ab79Smaybee delta = initial_offset - *offset; 412cdb0ab79Smaybee if (delta >= chunk_len) { 413cdb0ab79Smaybee *offset += delta - chunk_len; 414cdb0ab79Smaybee return (0); 415cdb0ab79Smaybee } 416cdb0ab79Smaybee chunk_len -= delta; 417cdb0ab79Smaybee 418cdb0ab79Smaybee /* skip over unallocated data */ 419cdb0ab79Smaybee err = dnode_next_offset(dn, 420cdb0ab79Smaybee DNODE_FIND_BACKWARDS, offset, 1, 1, 0); 421cdb0ab79Smaybee if (err == ESRCH) 422cdb0ab79Smaybee *offset = limit; 423cdb0ab79Smaybee else if (err) 424cdb0ab79Smaybee return (err); 425cdb0ab79Smaybee 426cdb0ab79Smaybee if (*offset < limit) 427cdb0ab79Smaybee *offset = limit; 428cdb0ab79Smaybee ASSERT3U(*offset, <, initial_offset); 429cdb0ab79Smaybee } 430cdb0ab79Smaybee return (0); 431cdb0ab79Smaybee } 432cdb0ab79Smaybee 433cdb0ab79Smaybee static int 434cdb0ab79Smaybee dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset, 435cdb0ab79Smaybee uint64_t length, boolean_t free_dnode) 436cdb0ab79Smaybee { 437cdb0ab79Smaybee dmu_tx_t *tx; 438cdb0ab79Smaybee uint64_t object_size, start, end, len; 439cdb0ab79Smaybee boolean_t trunc = (length == DMU_OBJECT_END); 440cdb0ab79Smaybee int align, err; 441cdb0ab79Smaybee 442cdb0ab79Smaybee align = 1 << dn->dn_datablkshift; 443cdb0ab79Smaybee ASSERT(align > 0); 444cdb0ab79Smaybee object_size = align == 1 ? dn->dn_datablksz : 445cdb0ab79Smaybee (dn->dn_maxblkid + 1) << dn->dn_datablkshift; 446cdb0ab79Smaybee 44714843421SMatthew Ahrens end = offset + length; 44814843421SMatthew Ahrens if (trunc || end > object_size) 449cdb0ab79Smaybee end = object_size; 450cdb0ab79Smaybee if (end <= offset) 451cdb0ab79Smaybee return (0); 452cdb0ab79Smaybee length = end - offset; 453cdb0ab79Smaybee 454cdb0ab79Smaybee while (length) { 455cdb0ab79Smaybee start = end; 45614843421SMatthew Ahrens /* assert(offset <= start) */ 457cdb0ab79Smaybee err = get_next_chunk(dn, &start, offset); 458cdb0ab79Smaybee if (err) 459cdb0ab79Smaybee return (err); 460cdb0ab79Smaybee len = trunc ? DMU_OBJECT_END : end - start; 461cdb0ab79Smaybee 462cdb0ab79Smaybee tx = dmu_tx_create(os); 463cdb0ab79Smaybee dmu_tx_hold_free(tx, dn->dn_object, start, len); 464cdb0ab79Smaybee err = dmu_tx_assign(tx, TXG_WAIT); 465cdb0ab79Smaybee if (err) { 466cdb0ab79Smaybee dmu_tx_abort(tx); 467cdb0ab79Smaybee return (err); 468cdb0ab79Smaybee } 469cdb0ab79Smaybee 470cdb0ab79Smaybee dnode_free_range(dn, start, trunc ? -1 : len, tx); 471cdb0ab79Smaybee 4721c8564a7SMark Maybee if (start == 0 && free_dnode) { 4731c8564a7SMark Maybee ASSERT(trunc); 474cdb0ab79Smaybee dnode_free(dn, tx); 4751c8564a7SMark Maybee } 476cdb0ab79Smaybee 477cdb0ab79Smaybee length -= end - start; 478cdb0ab79Smaybee 479cdb0ab79Smaybee dmu_tx_commit(tx); 480cdb0ab79Smaybee end = start; 481cdb0ab79Smaybee } 482cdb0ab79Smaybee return (0); 483cdb0ab79Smaybee } 484cdb0ab79Smaybee 485cdb0ab79Smaybee int 486cdb0ab79Smaybee dmu_free_long_range(objset_t *os, uint64_t object, 487cdb0ab79Smaybee uint64_t offset, uint64_t length) 488cdb0ab79Smaybee { 489cdb0ab79Smaybee dnode_t *dn; 490cdb0ab79Smaybee int err; 491cdb0ab79Smaybee 492cdb0ab79Smaybee err = dnode_hold(os->os, object, FTAG, &dn); 493cdb0ab79Smaybee if (err != 0) 494cdb0ab79Smaybee return (err); 495cdb0ab79Smaybee err = dmu_free_long_range_impl(os, dn, offset, length, FALSE); 496cdb0ab79Smaybee dnode_rele(dn, FTAG); 497cdb0ab79Smaybee return (err); 498cdb0ab79Smaybee } 499cdb0ab79Smaybee 500cdb0ab79Smaybee int 501cdb0ab79Smaybee dmu_free_object(objset_t *os, uint64_t object) 502cdb0ab79Smaybee { 503cdb0ab79Smaybee dnode_t *dn; 504cdb0ab79Smaybee dmu_tx_t *tx; 505cdb0ab79Smaybee int err; 506cdb0ab79Smaybee 507cdb0ab79Smaybee err = dnode_hold_impl(os->os, object, DNODE_MUST_BE_ALLOCATED, 508cdb0ab79Smaybee FTAG, &dn); 509cdb0ab79Smaybee if (err != 0) 510cdb0ab79Smaybee return (err); 511cdb0ab79Smaybee if (dn->dn_nlevels == 1) { 512cdb0ab79Smaybee tx = dmu_tx_create(os); 513cdb0ab79Smaybee dmu_tx_hold_bonus(tx, object); 514cdb0ab79Smaybee dmu_tx_hold_free(tx, dn->dn_object, 0, DMU_OBJECT_END); 515cdb0ab79Smaybee err = dmu_tx_assign(tx, TXG_WAIT); 516cdb0ab79Smaybee if (err == 0) { 517cdb0ab79Smaybee dnode_free_range(dn, 0, DMU_OBJECT_END, tx); 518cdb0ab79Smaybee dnode_free(dn, tx); 519cdb0ab79Smaybee dmu_tx_commit(tx); 520cdb0ab79Smaybee } else { 521cdb0ab79Smaybee dmu_tx_abort(tx); 522cdb0ab79Smaybee } 523cdb0ab79Smaybee } else { 524cdb0ab79Smaybee err = dmu_free_long_range_impl(os, dn, 0, DMU_OBJECT_END, TRUE); 525cdb0ab79Smaybee } 526cdb0ab79Smaybee dnode_rele(dn, FTAG); 527cdb0ab79Smaybee return (err); 528cdb0ab79Smaybee } 529cdb0ab79Smaybee 530ea8dc4b6Seschrock int 531fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 532fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 533fa9e4066Sahrens { 534ea8dc4b6Seschrock dnode_t *dn; 535ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 536ea8dc4b6Seschrock if (err) 537ea8dc4b6Seschrock return (err); 538fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 539fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 540fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 541fa9e4066Sahrens dnode_rele(dn, FTAG); 542ea8dc4b6Seschrock return (0); 543fa9e4066Sahrens } 544fa9e4066Sahrens 545ea8dc4b6Seschrock int 546ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 547ea8dc4b6Seschrock void *buf) 548fa9e4066Sahrens { 549fa9e4066Sahrens dnode_t *dn; 550fa9e4066Sahrens dmu_buf_t **dbp; 551ea8dc4b6Seschrock int numbufs, i, err; 552fa9e4066Sahrens 553ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 554ea8dc4b6Seschrock if (err) 555ea8dc4b6Seschrock return (err); 556feb08c6bSbillm 557feb08c6bSbillm /* 558feb08c6bSbillm * Deal with odd block sizes, where there can't be data past the first 559feb08c6bSbillm * block. If we ever do the tail block optimization, we will need to 560feb08c6bSbillm * handle that here as well. 561feb08c6bSbillm */ 562fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 563fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 564fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 565fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 566fa9e4066Sahrens size = newsz; 567fa9e4066Sahrens } 568fa9e4066Sahrens 569fa9e4066Sahrens while (size > 0) { 570fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 571fa9e4066Sahrens 572fa9e4066Sahrens /* 573fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 574fa9e4066Sahrens * to be reading in parallel. 575fa9e4066Sahrens */ 576a2eea2e1Sahrens err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, 577ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 578ea8dc4b6Seschrock if (err) 5791934e92fSmaybee break; 580fa9e4066Sahrens 581fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 582fa9e4066Sahrens int tocpy; 583fa9e4066Sahrens int bufoff; 584fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 585fa9e4066Sahrens 586fa9e4066Sahrens ASSERT(size > 0); 587fa9e4066Sahrens 588fa9e4066Sahrens bufoff = offset - db->db_offset; 589fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 590fa9e4066Sahrens 591fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 592fa9e4066Sahrens 593fa9e4066Sahrens offset += tocpy; 594fa9e4066Sahrens size -= tocpy; 595fa9e4066Sahrens buf = (char *)buf + tocpy; 596fa9e4066Sahrens } 597ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 598fa9e4066Sahrens } 599a2eea2e1Sahrens dnode_rele(dn, FTAG); 6001934e92fSmaybee return (err); 601fa9e4066Sahrens } 602fa9e4066Sahrens 603fa9e4066Sahrens void 604fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 605fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 606fa9e4066Sahrens { 607fa9e4066Sahrens dmu_buf_t **dbp; 608fa9e4066Sahrens int numbufs, i; 609fa9e4066Sahrens 61013506d1eSmaybee if (size == 0) 61113506d1eSmaybee return; 61213506d1eSmaybee 613ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 614ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 615fa9e4066Sahrens 616fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 617fa9e4066Sahrens int tocpy; 618fa9e4066Sahrens int bufoff; 619fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 620fa9e4066Sahrens 621fa9e4066Sahrens ASSERT(size > 0); 622fa9e4066Sahrens 623fa9e4066Sahrens bufoff = offset - db->db_offset; 624fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 625fa9e4066Sahrens 626fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 627fa9e4066Sahrens 628fa9e4066Sahrens if (tocpy == db->db_size) 629fa9e4066Sahrens dmu_buf_will_fill(db, tx); 630fa9e4066Sahrens else 631fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 632fa9e4066Sahrens 633fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 634fa9e4066Sahrens 635fa9e4066Sahrens if (tocpy == db->db_size) 636fa9e4066Sahrens dmu_buf_fill_done(db, tx); 637fa9e4066Sahrens 638fa9e4066Sahrens offset += tocpy; 639fa9e4066Sahrens size -= tocpy; 640fa9e4066Sahrens buf = (char *)buf + tocpy; 641fa9e4066Sahrens } 642ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 643fa9e4066Sahrens } 644fa9e4066Sahrens 64582c9918fSTim Haley void 64682c9918fSTim Haley dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 64782c9918fSTim Haley dmu_tx_t *tx) 64882c9918fSTim Haley { 64982c9918fSTim Haley dmu_buf_t **dbp; 65082c9918fSTim Haley int numbufs, i; 65182c9918fSTim Haley 65282c9918fSTim Haley if (size == 0) 65382c9918fSTim Haley return; 65482c9918fSTim Haley 65582c9918fSTim Haley VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 65682c9918fSTim Haley FALSE, FTAG, &numbufs, &dbp)); 65782c9918fSTim Haley 65882c9918fSTim Haley for (i = 0; i < numbufs; i++) { 65982c9918fSTim Haley dmu_buf_t *db = dbp[i]; 66082c9918fSTim Haley 66182c9918fSTim Haley dmu_buf_will_not_fill(db, tx); 66282c9918fSTim Haley } 66382c9918fSTim Haley dmu_buf_rele_array(dbp, numbufs, FTAG); 66482c9918fSTim Haley } 66582c9918fSTim Haley 666fa9e4066Sahrens #ifdef _KERNEL 667fa9e4066Sahrens int 668feb08c6bSbillm dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) 669feb08c6bSbillm { 670feb08c6bSbillm dmu_buf_t **dbp; 671feb08c6bSbillm int numbufs, i, err; 672feb08c6bSbillm 673feb08c6bSbillm /* 674feb08c6bSbillm * NB: we could do this block-at-a-time, but it's nice 675feb08c6bSbillm * to be reading in parallel. 676feb08c6bSbillm */ 677feb08c6bSbillm err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, 678feb08c6bSbillm &numbufs, &dbp); 679feb08c6bSbillm if (err) 680feb08c6bSbillm return (err); 681feb08c6bSbillm 682feb08c6bSbillm for (i = 0; i < numbufs; i++) { 683feb08c6bSbillm int tocpy; 684feb08c6bSbillm int bufoff; 685feb08c6bSbillm dmu_buf_t *db = dbp[i]; 686feb08c6bSbillm 687feb08c6bSbillm ASSERT(size > 0); 688feb08c6bSbillm 689feb08c6bSbillm bufoff = uio->uio_loffset - db->db_offset; 690feb08c6bSbillm tocpy = (int)MIN(db->db_size - bufoff, size); 691feb08c6bSbillm 692feb08c6bSbillm err = uiomove((char *)db->db_data + bufoff, tocpy, 693feb08c6bSbillm UIO_READ, uio); 694feb08c6bSbillm if (err) 695feb08c6bSbillm break; 696feb08c6bSbillm 697feb08c6bSbillm size -= tocpy; 698feb08c6bSbillm } 699feb08c6bSbillm dmu_buf_rele_array(dbp, numbufs, FTAG); 700feb08c6bSbillm 701feb08c6bSbillm return (err); 702feb08c6bSbillm } 703feb08c6bSbillm 704feb08c6bSbillm int 705feb08c6bSbillm dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, 706feb08c6bSbillm dmu_tx_t *tx) 707fa9e4066Sahrens { 708fa9e4066Sahrens dmu_buf_t **dbp; 709fa9e4066Sahrens int numbufs, i; 710fa9e4066Sahrens int err = 0; 711fa9e4066Sahrens 71213506d1eSmaybee if (size == 0) 71313506d1eSmaybee return (0); 71413506d1eSmaybee 715feb08c6bSbillm err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, 716ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 717ea8dc4b6Seschrock if (err) 718ea8dc4b6Seschrock return (err); 719fa9e4066Sahrens 720fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 721fa9e4066Sahrens int tocpy; 722fa9e4066Sahrens int bufoff; 723fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 724fa9e4066Sahrens 725fa9e4066Sahrens ASSERT(size > 0); 726fa9e4066Sahrens 727feb08c6bSbillm bufoff = uio->uio_loffset - db->db_offset; 728fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 729fa9e4066Sahrens 730fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 731fa9e4066Sahrens 732fa9e4066Sahrens if (tocpy == db->db_size) 733fa9e4066Sahrens dmu_buf_will_fill(db, tx); 734fa9e4066Sahrens else 735fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 736fa9e4066Sahrens 737fa9e4066Sahrens /* 738fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 739fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 740fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 741fa9e4066Sahrens * block. 742fa9e4066Sahrens */ 743fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 744fa9e4066Sahrens UIO_WRITE, uio); 745fa9e4066Sahrens 746fa9e4066Sahrens if (tocpy == db->db_size) 747fa9e4066Sahrens dmu_buf_fill_done(db, tx); 748fa9e4066Sahrens 749fa9e4066Sahrens if (err) 750fa9e4066Sahrens break; 751fa9e4066Sahrens 752fa9e4066Sahrens size -= tocpy; 753fa9e4066Sahrens } 754ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 755fa9e4066Sahrens return (err); 756fa9e4066Sahrens } 75744eda4d7Smaybee 75844eda4d7Smaybee int 75944eda4d7Smaybee dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 76044eda4d7Smaybee page_t *pp, dmu_tx_t *tx) 76144eda4d7Smaybee { 76244eda4d7Smaybee dmu_buf_t **dbp; 76344eda4d7Smaybee int numbufs, i; 76444eda4d7Smaybee int err; 76544eda4d7Smaybee 76644eda4d7Smaybee if (size == 0) 76744eda4d7Smaybee return (0); 76844eda4d7Smaybee 76944eda4d7Smaybee err = dmu_buf_hold_array(os, object, offset, size, 77044eda4d7Smaybee FALSE, FTAG, &numbufs, &dbp); 77144eda4d7Smaybee if (err) 77244eda4d7Smaybee return (err); 77344eda4d7Smaybee 77444eda4d7Smaybee for (i = 0; i < numbufs; i++) { 77544eda4d7Smaybee int tocpy, copied, thiscpy; 77644eda4d7Smaybee int bufoff; 77744eda4d7Smaybee dmu_buf_t *db = dbp[i]; 77844eda4d7Smaybee caddr_t va; 77944eda4d7Smaybee 78044eda4d7Smaybee ASSERT(size > 0); 78144eda4d7Smaybee ASSERT3U(db->db_size, >=, PAGESIZE); 78244eda4d7Smaybee 78344eda4d7Smaybee bufoff = offset - db->db_offset; 78444eda4d7Smaybee tocpy = (int)MIN(db->db_size - bufoff, size); 78544eda4d7Smaybee 78644eda4d7Smaybee ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 78744eda4d7Smaybee 78844eda4d7Smaybee if (tocpy == db->db_size) 78944eda4d7Smaybee dmu_buf_will_fill(db, tx); 79044eda4d7Smaybee else 79144eda4d7Smaybee dmu_buf_will_dirty(db, tx); 79244eda4d7Smaybee 79344eda4d7Smaybee for (copied = 0; copied < tocpy; copied += PAGESIZE) { 79444eda4d7Smaybee ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); 79544eda4d7Smaybee thiscpy = MIN(PAGESIZE, tocpy - copied); 7960fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 79744eda4d7Smaybee bcopy(va, (char *)db->db_data + bufoff, thiscpy); 7980fab61baSJonathan W Adams zfs_unmap_page(pp, va); 79944eda4d7Smaybee pp = pp->p_next; 80044eda4d7Smaybee bufoff += PAGESIZE; 80144eda4d7Smaybee } 80244eda4d7Smaybee 80344eda4d7Smaybee if (tocpy == db->db_size) 80444eda4d7Smaybee dmu_buf_fill_done(db, tx); 80544eda4d7Smaybee 80644eda4d7Smaybee if (err) 80744eda4d7Smaybee break; 80844eda4d7Smaybee 80944eda4d7Smaybee offset += tocpy; 81044eda4d7Smaybee size -= tocpy; 81144eda4d7Smaybee } 81244eda4d7Smaybee dmu_buf_rele_array(dbp, numbufs, FTAG); 81344eda4d7Smaybee return (err); 81444eda4d7Smaybee } 815fa9e4066Sahrens #endif 816fa9e4066Sahrens 817*2fdbea25SAleksandr Guzovskiy /* 818*2fdbea25SAleksandr Guzovskiy * Allocate a loaned anonymous arc buffer. 819*2fdbea25SAleksandr Guzovskiy */ 820*2fdbea25SAleksandr Guzovskiy arc_buf_t * 821*2fdbea25SAleksandr Guzovskiy dmu_request_arcbuf(dmu_buf_t *handle, int size) 822*2fdbea25SAleksandr Guzovskiy { 823*2fdbea25SAleksandr Guzovskiy dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode; 824*2fdbea25SAleksandr Guzovskiy 825*2fdbea25SAleksandr Guzovskiy return (arc_loan_buf(dn->dn_objset->os_spa, size)); 826*2fdbea25SAleksandr Guzovskiy } 827*2fdbea25SAleksandr Guzovskiy 828*2fdbea25SAleksandr Guzovskiy /* 829*2fdbea25SAleksandr Guzovskiy * Free a loaned arc buffer. 830*2fdbea25SAleksandr Guzovskiy */ 831*2fdbea25SAleksandr Guzovskiy void 832*2fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(arc_buf_t *buf) 833*2fdbea25SAleksandr Guzovskiy { 834*2fdbea25SAleksandr Guzovskiy arc_return_buf(buf, FTAG); 835*2fdbea25SAleksandr Guzovskiy VERIFY(arc_buf_remove_ref(buf, FTAG) == 1); 836*2fdbea25SAleksandr Guzovskiy } 837*2fdbea25SAleksandr Guzovskiy 838*2fdbea25SAleksandr Guzovskiy /* 839*2fdbea25SAleksandr Guzovskiy * When possible directly assign passed loaned arc buffer to a dbuf. 840*2fdbea25SAleksandr Guzovskiy * If this is not possible copy the contents of passed arc buf via 841*2fdbea25SAleksandr Guzovskiy * dmu_write(). 842*2fdbea25SAleksandr Guzovskiy */ 843*2fdbea25SAleksandr Guzovskiy void 844*2fdbea25SAleksandr Guzovskiy dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf, 845*2fdbea25SAleksandr Guzovskiy dmu_tx_t *tx) 846*2fdbea25SAleksandr Guzovskiy { 847*2fdbea25SAleksandr Guzovskiy dnode_t *dn = ((dmu_buf_impl_t *)handle)->db_dnode; 848*2fdbea25SAleksandr Guzovskiy dmu_buf_impl_t *db; 849*2fdbea25SAleksandr Guzovskiy uint32_t blksz = (uint32_t)arc_buf_size(buf); 850*2fdbea25SAleksandr Guzovskiy uint64_t blkid; 851*2fdbea25SAleksandr Guzovskiy 852*2fdbea25SAleksandr Guzovskiy rw_enter(&dn->dn_struct_rwlock, RW_READER); 853*2fdbea25SAleksandr Guzovskiy blkid = dbuf_whichblock(dn, offset); 854*2fdbea25SAleksandr Guzovskiy VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL); 855*2fdbea25SAleksandr Guzovskiy rw_exit(&dn->dn_struct_rwlock); 856*2fdbea25SAleksandr Guzovskiy 857*2fdbea25SAleksandr Guzovskiy if (offset == db->db.db_offset && blksz == db->db.db_size) { 858*2fdbea25SAleksandr Guzovskiy dbuf_assign_arcbuf(db, buf, tx); 859*2fdbea25SAleksandr Guzovskiy dbuf_rele(db, FTAG); 860*2fdbea25SAleksandr Guzovskiy } else { 861*2fdbea25SAleksandr Guzovskiy dbuf_rele(db, FTAG); 862*2fdbea25SAleksandr Guzovskiy ASSERT(dn->dn_objset->os.os == dn->dn_objset); 863*2fdbea25SAleksandr Guzovskiy dmu_write(&dn->dn_objset->os, dn->dn_object, offset, blksz, 864*2fdbea25SAleksandr Guzovskiy buf->b_data, tx); 865*2fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(buf); 866*2fdbea25SAleksandr Guzovskiy } 867*2fdbea25SAleksandr Guzovskiy } 868*2fdbea25SAleksandr Guzovskiy 869c5c6ffa0Smaybee typedef struct { 870c717a561Smaybee dbuf_dirty_record_t *dr; 871c5c6ffa0Smaybee dmu_sync_cb_t *done; 872c5c6ffa0Smaybee void *arg; 873c717a561Smaybee } dmu_sync_arg_t; 874c5c6ffa0Smaybee 875c5c6ffa0Smaybee /* ARGSUSED */ 876c5c6ffa0Smaybee static void 877e14bb325SJeff Bonwick dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg) 878e14bb325SJeff Bonwick { 879e14bb325SJeff Bonwick blkptr_t *bp = zio->io_bp; 880e14bb325SJeff Bonwick 881e14bb325SJeff Bonwick if (!BP_IS_HOLE(bp)) { 882e14bb325SJeff Bonwick dmu_sync_arg_t *in = varg; 883e14bb325SJeff Bonwick dbuf_dirty_record_t *dr = in->dr; 884e14bb325SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 885e14bb325SJeff Bonwick ASSERT(BP_GET_TYPE(bp) == db->db_dnode->dn_type); 886e14bb325SJeff Bonwick ASSERT(BP_GET_LEVEL(bp) == 0); 887e14bb325SJeff Bonwick bp->blk_fill = 1; 888e14bb325SJeff Bonwick } 889e14bb325SJeff Bonwick } 890e14bb325SJeff Bonwick 891e14bb325SJeff Bonwick /* ARGSUSED */ 892e14bb325SJeff Bonwick static void 893c5c6ffa0Smaybee dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) 894c5c6ffa0Smaybee { 895c717a561Smaybee dmu_sync_arg_t *in = varg; 896c717a561Smaybee dbuf_dirty_record_t *dr = in->dr; 897c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 898c5c6ffa0Smaybee dmu_sync_cb_t *done = in->done; 899c5c6ffa0Smaybee 900c5c6ffa0Smaybee mutex_enter(&db->db_mtx); 901c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); 902c717a561Smaybee dr->dt.dl.dr_overridden_by = *zio->io_bp; /* structure assignment */ 903c717a561Smaybee dr->dt.dl.dr_override_state = DR_OVERRIDDEN; 904c5c6ffa0Smaybee cv_broadcast(&db->db_changed); 905c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 906c5c6ffa0Smaybee 907c5c6ffa0Smaybee if (done) 908c717a561Smaybee done(&(db->db), in->arg); 909c717a561Smaybee 910c717a561Smaybee kmem_free(in, sizeof (dmu_sync_arg_t)); 911c5c6ffa0Smaybee } 912c5c6ffa0Smaybee 913fa9e4066Sahrens /* 914c5c6ffa0Smaybee * Intent log support: sync the block associated with db to disk. 915c5c6ffa0Smaybee * N.B. and XXX: the caller is responsible for making sure that the 916c5c6ffa0Smaybee * data isn't changing while dmu_sync() is writing it. 917fa9e4066Sahrens * 918fa9e4066Sahrens * Return values: 919fa9e4066Sahrens * 920c5c6ffa0Smaybee * EEXIST: this txg has already been synced, so there's nothing to to. 921fa9e4066Sahrens * The caller should not log the write. 922fa9e4066Sahrens * 923fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 924fa9e4066Sahrens * The caller should not log the write. 925fa9e4066Sahrens * 926c5c6ffa0Smaybee * EALREADY: this block is already in the process of being synced. 927c5c6ffa0Smaybee * The caller should track its progress (somehow). 928fa9e4066Sahrens * 929c5c6ffa0Smaybee * EINPROGRESS: the IO has been initiated. 930c5c6ffa0Smaybee * The caller should log this blkptr in the callback. 931fa9e4066Sahrens * 932c5c6ffa0Smaybee * 0: completed. Sets *bp to the blkptr just written. 933c5c6ffa0Smaybee * The caller should log this blkptr immediately. 934fa9e4066Sahrens */ 935fa9e4066Sahrens int 936c5c6ffa0Smaybee dmu_sync(zio_t *pio, dmu_buf_t *db_fake, 937c5c6ffa0Smaybee blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) 938fa9e4066Sahrens { 939c5c6ffa0Smaybee dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 940c5c6ffa0Smaybee objset_impl_t *os = db->db_objset; 941c5c6ffa0Smaybee dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 942fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 943c717a561Smaybee dbuf_dirty_record_t *dr; 944c717a561Smaybee dmu_sync_arg_t *in; 945ea8dc4b6Seschrock zbookmark_t zb; 946088f3894Sahrens writeprops_t wp = { 0 }; 947c717a561Smaybee zio_t *zio; 948c5c6ffa0Smaybee int err; 949fa9e4066Sahrens 950fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 951fa9e4066Sahrens ASSERT(txg != 0); 952fa9e4066Sahrens 953fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 954fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 955fa9e4066Sahrens 956fa9e4066Sahrens /* 957c5c6ffa0Smaybee * XXX - would be nice if we could do this without suspending... 958ea8dc4b6Seschrock */ 959c5c6ffa0Smaybee txg_suspend(dp); 960ea8dc4b6Seschrock 961ea8dc4b6Seschrock /* 962fa9e4066Sahrens * If this txg already synced, there's nothing to do. 963fa9e4066Sahrens */ 964fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 965c5c6ffa0Smaybee txg_resume(dp); 966fa9e4066Sahrens /* 967fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 968fa9e4066Sahrens */ 969fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 970fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 971fa9e4066Sahrens if (db->db_blkptr) 972fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 973fa9e4066Sahrens return (0); 974fa9e4066Sahrens } 975c5c6ffa0Smaybee return (EEXIST); 976fa9e4066Sahrens } 977fa9e4066Sahrens 978fa9e4066Sahrens mutex_enter(&db->db_mtx); 979fa9e4066Sahrens 980c5c6ffa0Smaybee if (txg == tx->tx_syncing_txg) { 981c5c6ffa0Smaybee while (db->db_data_pending) { 982c5c6ffa0Smaybee /* 983c5c6ffa0Smaybee * IO is in-progress. Wait for it to finish. 984c5c6ffa0Smaybee * XXX - would be nice to be able to somehow "attach" 985c5c6ffa0Smaybee * this zio to the parent zio passed in. 986c5c6ffa0Smaybee */ 987c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 98813506d1eSmaybee if (!db->db_data_pending && 98913506d1eSmaybee db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { 99013506d1eSmaybee /* 99113506d1eSmaybee * IO was compressed away 99213506d1eSmaybee */ 99313506d1eSmaybee *bp = *db->db_blkptr; /* structure assignment */ 99413506d1eSmaybee mutex_exit(&db->db_mtx); 99513506d1eSmaybee txg_resume(dp); 99613506d1eSmaybee return (0); 99713506d1eSmaybee } 998c5c6ffa0Smaybee ASSERT(db->db_data_pending || 999c5c6ffa0Smaybee (db->db_blkptr && db->db_blkptr->blk_birth == txg)); 1000c5c6ffa0Smaybee } 1001c5c6ffa0Smaybee 1002c5c6ffa0Smaybee if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { 1003c5c6ffa0Smaybee /* 1004c5c6ffa0Smaybee * IO is already completed. 1005c5c6ffa0Smaybee */ 1006c5c6ffa0Smaybee *bp = *db->db_blkptr; /* structure assignment */ 1007c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 1008c5c6ffa0Smaybee txg_resume(dp); 1009c5c6ffa0Smaybee return (0); 1010c5c6ffa0Smaybee } 1011c5c6ffa0Smaybee } 1012c5c6ffa0Smaybee 1013c717a561Smaybee dr = db->db_last_dirty; 1014c717a561Smaybee while (dr && dr->dr_txg > txg) 1015c717a561Smaybee dr = dr->dr_next; 1016c717a561Smaybee if (dr == NULL || dr->dr_txg < txg) { 1017c5c6ffa0Smaybee /* 1018c5c6ffa0Smaybee * This dbuf isn't dirty, must have been free_range'd. 1019fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 1020fa9e4066Sahrens */ 1021fa9e4066Sahrens mutex_exit(&db->db_mtx); 1022c5c6ffa0Smaybee txg_resume(dp); 1023fa9e4066Sahrens return (ENOENT); 1024fa9e4066Sahrens } 1025fa9e4066Sahrens 1026c717a561Smaybee ASSERT(dr->dr_txg == txg); 1027c717a561Smaybee if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 1028c5c6ffa0Smaybee /* 1029c717a561Smaybee * We have already issued a sync write for this buffer. 1030c5c6ffa0Smaybee */ 1031c717a561Smaybee mutex_exit(&db->db_mtx); 1032c717a561Smaybee txg_resume(dp); 1033c717a561Smaybee return (EALREADY); 1034c717a561Smaybee } else if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 1035c717a561Smaybee /* 1036c717a561Smaybee * This buffer has already been synced. It could not 1037c717a561Smaybee * have been dirtied since, or we would have cleared the state. 1038c717a561Smaybee */ 1039c717a561Smaybee *bp = dr->dt.dl.dr_overridden_by; /* structure assignment */ 1040c717a561Smaybee mutex_exit(&db->db_mtx); 1041c717a561Smaybee txg_resume(dp); 1042c717a561Smaybee return (0); 1043c717a561Smaybee } 1044c717a561Smaybee 1045c717a561Smaybee dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; 1046c717a561Smaybee in = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); 1047c717a561Smaybee in->dr = dr; 1048c5c6ffa0Smaybee in->done = done; 1049c5c6ffa0Smaybee in->arg = arg; 1050fa9e4066Sahrens mutex_exit(&db->db_mtx); 1051c5c6ffa0Smaybee txg_resume(dp); 1052fa9e4066Sahrens 1053c5c6ffa0Smaybee zb.zb_objset = os->os_dsl_dataset->ds_object; 1054ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 1055ea8dc4b6Seschrock zb.zb_level = db->db_level; 1056ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 1057e14bb325SJeff Bonwick 1058088f3894Sahrens wp.wp_type = db->db_dnode->dn_type; 1059088f3894Sahrens wp.wp_level = db->db_level; 1060e14bb325SJeff Bonwick wp.wp_copies = os->os_copies; 1061088f3894Sahrens wp.wp_dnchecksum = db->db_dnode->dn_checksum; 1062088f3894Sahrens wp.wp_oschecksum = os->os_checksum; 1063088f3894Sahrens wp.wp_dncompress = db->db_dnode->dn_compress; 1064088f3894Sahrens wp.wp_oscompress = os->os_compress; 1065e14bb325SJeff Bonwick 1066e14bb325SJeff Bonwick ASSERT(BP_IS_HOLE(bp)); 1067e14bb325SJeff Bonwick 1068e14bb325SJeff Bonwick zio = arc_write(pio, os->os_spa, &wp, DBUF_IS_L2CACHEABLE(db), 1069e14bb325SJeff Bonwick txg, bp, dr->dt.dl.dr_data, dmu_sync_ready, dmu_sync_done, in, 1070e14bb325SJeff Bonwick ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 1071c717a561Smaybee if (pio) { 1072c717a561Smaybee zio_nowait(zio); 1073c717a561Smaybee err = EINPROGRESS; 1074c717a561Smaybee } else { 1075c717a561Smaybee err = zio_wait(zio); 1076c717a561Smaybee ASSERT(err == 0); 1077c717a561Smaybee } 1078c717a561Smaybee return (err); 1079fa9e4066Sahrens } 1080fa9e4066Sahrens 1081fa9e4066Sahrens int 1082fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 1083fa9e4066Sahrens dmu_tx_t *tx) 1084fa9e4066Sahrens { 1085ea8dc4b6Seschrock dnode_t *dn; 1086ea8dc4b6Seschrock int err; 1087ea8dc4b6Seschrock 1088ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1089ea8dc4b6Seschrock if (err) 1090ea8dc4b6Seschrock return (err); 1091ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 1092fa9e4066Sahrens dnode_rele(dn, FTAG); 1093fa9e4066Sahrens return (err); 1094fa9e4066Sahrens } 1095fa9e4066Sahrens 1096fa9e4066Sahrens void 1097fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 1098fa9e4066Sahrens dmu_tx_t *tx) 1099fa9e4066Sahrens { 1100ea8dc4b6Seschrock dnode_t *dn; 1101ea8dc4b6Seschrock 1102ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1103ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1104fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 1105fa9e4066Sahrens dn->dn_checksum = checksum; 1106fa9e4066Sahrens dnode_setdirty(dn, tx); 1107fa9e4066Sahrens dnode_rele(dn, FTAG); 1108fa9e4066Sahrens } 1109fa9e4066Sahrens 1110fa9e4066Sahrens void 1111fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 1112fa9e4066Sahrens dmu_tx_t *tx) 1113fa9e4066Sahrens { 1114ea8dc4b6Seschrock dnode_t *dn; 1115ea8dc4b6Seschrock 1116ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 1117ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 1118fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 1119fa9e4066Sahrens dn->dn_compress = compress; 1120fa9e4066Sahrens dnode_setdirty(dn, tx); 1121fa9e4066Sahrens dnode_rele(dn, FTAG); 1122fa9e4066Sahrens } 1123fa9e4066Sahrens 112444cd46caSbillm int 1125fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 1126fa9e4066Sahrens { 1127fa9e4066Sahrens dnode_t *dn; 1128fa9e4066Sahrens int i, err; 1129fa9e4066Sahrens 1130ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1131ea8dc4b6Seschrock if (err) 1132ea8dc4b6Seschrock return (err); 1133fa9e4066Sahrens /* 1134fa9e4066Sahrens * Sync any current changes before 1135fa9e4066Sahrens * we go trundling through the block pointers. 1136fa9e4066Sahrens */ 1137fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 1138c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 1139fa9e4066Sahrens break; 1140fa9e4066Sahrens } 1141fa9e4066Sahrens if (i != TXG_SIZE) { 1142fa9e4066Sahrens dnode_rele(dn, FTAG); 1143fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 1144ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 1145ea8dc4b6Seschrock if (err) 1146ea8dc4b6Seschrock return (err); 1147fa9e4066Sahrens } 1148fa9e4066Sahrens 1149cdb0ab79Smaybee err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0); 1150fa9e4066Sahrens dnode_rele(dn, FTAG); 1151fa9e4066Sahrens 1152fa9e4066Sahrens return (err); 1153fa9e4066Sahrens } 1154fa9e4066Sahrens 1155fa9e4066Sahrens void 1156fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 1157fa9e4066Sahrens { 1158fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1159fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1160fa9e4066Sahrens 1161fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 1162fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 1163fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 1164fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 1165fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 1166fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 116799653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 116899653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 1169fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 1170fa9e4066Sahrens doi->doi_type = dn->dn_type; 1171fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 1172fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 1173fa9e4066Sahrens 1174fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1175fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1176fa9e4066Sahrens } 1177fa9e4066Sahrens 1178fa9e4066Sahrens /* 1179fa9e4066Sahrens * Get information on a DMU object. 1180fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 1181fa9e4066Sahrens */ 1182fa9e4066Sahrens int 1183fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 1184fa9e4066Sahrens { 1185ea8dc4b6Seschrock dnode_t *dn; 1186ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 1187fa9e4066Sahrens 1188ea8dc4b6Seschrock if (err) 1189ea8dc4b6Seschrock return (err); 1190fa9e4066Sahrens 1191fa9e4066Sahrens if (doi != NULL) 1192fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 1193fa9e4066Sahrens 1194fa9e4066Sahrens dnode_rele(dn, FTAG); 1195fa9e4066Sahrens return (0); 1196fa9e4066Sahrens } 1197fa9e4066Sahrens 1198fa9e4066Sahrens /* 1199fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 1200fa9e4066Sahrens */ 1201fa9e4066Sahrens void 1202fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 1203fa9e4066Sahrens { 1204fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 1205fa9e4066Sahrens } 1206fa9e4066Sahrens 1207fa9e4066Sahrens /* 1208fa9e4066Sahrens * Faster still when you only care about the size. 1209fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 1210fa9e4066Sahrens */ 1211fa9e4066Sahrens void 1212fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 1213fa9e4066Sahrens { 1214fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 1215fa9e4066Sahrens 1216fa9e4066Sahrens *blksize = dn->dn_datablksz; 121799653d4eSeschrock /* add 1 for dnode space */ 121899653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 121999653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 1220fa9e4066Sahrens } 1221fa9e4066Sahrens 1222fa9e4066Sahrens void 1223fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 1224fa9e4066Sahrens { 1225fa9e4066Sahrens uint64_t *buf = vbuf; 1226fa9e4066Sahrens size_t count = size >> 3; 1227fa9e4066Sahrens int i; 1228fa9e4066Sahrens 1229fa9e4066Sahrens ASSERT((size & 7) == 0); 1230fa9e4066Sahrens 1231fa9e4066Sahrens for (i = 0; i < count; i++) 1232fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1233fa9e4066Sahrens } 1234fa9e4066Sahrens 1235fa9e4066Sahrens void 1236fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1237fa9e4066Sahrens { 1238fa9e4066Sahrens uint32_t *buf = vbuf; 1239fa9e4066Sahrens size_t count = size >> 2; 1240fa9e4066Sahrens int i; 1241fa9e4066Sahrens 1242fa9e4066Sahrens ASSERT((size & 3) == 0); 1243fa9e4066Sahrens 1244fa9e4066Sahrens for (i = 0; i < count; i++) 1245fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1246fa9e4066Sahrens } 1247fa9e4066Sahrens 1248fa9e4066Sahrens void 1249fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1250fa9e4066Sahrens { 1251fa9e4066Sahrens uint16_t *buf = vbuf; 1252fa9e4066Sahrens size_t count = size >> 1; 1253fa9e4066Sahrens int i; 1254fa9e4066Sahrens 1255fa9e4066Sahrens ASSERT((size & 1) == 0); 1256fa9e4066Sahrens 1257fa9e4066Sahrens for (i = 0; i < count; i++) 1258fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1259fa9e4066Sahrens } 1260fa9e4066Sahrens 1261fa9e4066Sahrens /* ARGSUSED */ 1262fa9e4066Sahrens void 1263fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1264fa9e4066Sahrens { 1265fa9e4066Sahrens } 1266fa9e4066Sahrens 1267fa9e4066Sahrens void 1268fa9e4066Sahrens dmu_init(void) 1269fa9e4066Sahrens { 1270fa9e4066Sahrens dbuf_init(); 1271fa9e4066Sahrens dnode_init(); 1272fa9e4066Sahrens arc_init(); 1273fa94a07fSbrendan l2arc_init(); 1274fa9e4066Sahrens } 1275fa9e4066Sahrens 1276fa9e4066Sahrens void 1277fa9e4066Sahrens dmu_fini(void) 1278fa9e4066Sahrens { 1279fa9e4066Sahrens arc_fini(); 1280fa9e4066Sahrens dnode_fini(); 1281fa9e4066Sahrens dbuf_fini(); 1282fa94a07fSbrendan l2arc_fini(); 1283fa9e4066Sahrens } 1284