1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2255434c77Sek110237 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dbuf.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/zfs_context.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dmu_traverse.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dsl_pool.h> 391d452cf5Sahrens #include <sys/dsl_synctask.h> 40a2eea2e1Sahrens #include <sys/dsl_prop.h> 41fa9e4066Sahrens #include <sys/dmu_zfetch.h> 42fa9e4066Sahrens #include <sys/zfs_ioctl.h> 43fa9e4066Sahrens #include <sys/zap.h> 44ea8dc4b6Seschrock #include <sys/zio_checksum.h> 4544eda4d7Smaybee #ifdef _KERNEL 4644eda4d7Smaybee #include <sys/vmsystm.h> 4744eda4d7Smaybee #endif 48fa9e4066Sahrens 49fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 50fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 51fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 52fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 53fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 55fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 56fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 57fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 58fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 59fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 60fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 61fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 62fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 63fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 64fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 65fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 66fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 67fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 68fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 69fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 70fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 71fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 72fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 73fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 74fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 75fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 76fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 77fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 78ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 7906eeb2adSek110237 { byteswap_uint8_array, TRUE, "SPA history" }, 8006eeb2adSek110237 { byteswap_uint64_array, TRUE, "SPA history offsets" }, 81fa9e4066Sahrens }; 82fa9e4066Sahrens 83fa9e4066Sahrens int 84ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 85ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 86fa9e4066Sahrens { 87fa9e4066Sahrens dnode_t *dn; 88fa9e4066Sahrens uint64_t blkid; 89fa9e4066Sahrens dmu_buf_impl_t *db; 90ea8dc4b6Seschrock int err; 91fa9e4066Sahrens 92ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 93ea8dc4b6Seschrock if (err) 94ea8dc4b6Seschrock return (err); 95fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 96fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 97ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 98fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 99ea8dc4b6Seschrock if (db == NULL) { 100ea8dc4b6Seschrock err = EIO; 101ea8dc4b6Seschrock } else { 102ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 103ea8dc4b6Seschrock if (err) { 104ea8dc4b6Seschrock dbuf_rele(db, tag); 105ea8dc4b6Seschrock db = NULL; 106ea8dc4b6Seschrock } 107fa9e4066Sahrens } 108fa9e4066Sahrens 109ea8dc4b6Seschrock dnode_rele(dn, FTAG); 110ea8dc4b6Seschrock *dbp = &db->db; 111ea8dc4b6Seschrock return (err); 112fa9e4066Sahrens } 113fa9e4066Sahrens 114fa9e4066Sahrens int 115fa9e4066Sahrens dmu_bonus_max(void) 116fa9e4066Sahrens { 117fa9e4066Sahrens return (DN_MAX_BONUSLEN); 118fa9e4066Sahrens } 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 122fa9e4066Sahrens */ 123ea8dc4b6Seschrock int 124ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 125fa9e4066Sahrens { 126ea8dc4b6Seschrock dnode_t *dn; 127ea8dc4b6Seschrock int err, count; 128fa9e4066Sahrens dmu_buf_impl_t *db; 129fa9e4066Sahrens 130ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 131ea8dc4b6Seschrock if (err) 132ea8dc4b6Seschrock return (err); 133fa9e4066Sahrens 134fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 135ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 136fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 137ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 138ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 139ea8dc4b6Seschrock dn->dn_bonus = dbuf_create_bonus(dn); 140fa9e4066Sahrens } 141ea8dc4b6Seschrock db = dn->dn_bonus; 142ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 143ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 144ea8dc4b6Seschrock count = refcount_add(&db->db_holds, tag); 145ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 146ea8dc4b6Seschrock if (count == 1) 147ea8dc4b6Seschrock dnode_add_ref(dn, db); 148fa9e4066Sahrens dnode_rele(dn, FTAG); 149ea8dc4b6Seschrock 150ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 151ea8dc4b6Seschrock 152ea8dc4b6Seschrock *dbp = &db->db; 153ea8dc4b6Seschrock return (0); 154fa9e4066Sahrens } 155fa9e4066Sahrens 15613506d1eSmaybee /* 15713506d1eSmaybee * Note: longer-term, we should modify all of the dmu_buf_*() interfaces 15813506d1eSmaybee * to take a held dnode rather than <os, object> -- the lookup is wasteful, 15913506d1eSmaybee * and can induce severe lock contention when writing to several files 16013506d1eSmaybee * whose dnodes are in the same block. 16113506d1eSmaybee */ 16213506d1eSmaybee static int 16313506d1eSmaybee dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, 164ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 165fa9e4066Sahrens { 166fa9e4066Sahrens dmu_buf_t **dbp; 167fa9e4066Sahrens uint64_t blkid, nblks, i; 168ea8dc4b6Seschrock uint32_t flags; 169ea8dc4b6Seschrock int err; 170ea8dc4b6Seschrock zio_t *zio; 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 173fa9e4066Sahrens 174ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 175e1930233Sbonwick if (length > zfetch_array_rd_sz) 176ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 177ea8dc4b6Seschrock 178fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 179fa9e4066Sahrens if (dn->dn_datablkshift) { 180fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 181fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 182fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 183fa9e4066Sahrens } else { 1840125049cSahrens if (offset + length > dn->dn_datablksz) { 1850125049cSahrens zfs_panic_recover("zfs: accessing past end of object " 1860125049cSahrens "%llx/%llx (size=%u access=%llu+%llu)", 1870125049cSahrens (longlong_t)dn->dn_objset-> 1880125049cSahrens os_dsl_dataset->ds_object, 1890125049cSahrens (longlong_t)dn->dn_object, dn->dn_datablksz, 1900125049cSahrens (longlong_t)offset, (longlong_t)length); 1910125049cSahrens return (EIO); 1920125049cSahrens } 193fa9e4066Sahrens nblks = 1; 194fa9e4066Sahrens } 195ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 196fa9e4066Sahrens 197ea8dc4b6Seschrock zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); 198fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 199fa9e4066Sahrens for (i = 0; i < nblks; i++) { 200ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 201ea8dc4b6Seschrock if (db == NULL) { 202ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 203ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 204ea8dc4b6Seschrock zio_nowait(zio); 205ea8dc4b6Seschrock return (EIO); 206ea8dc4b6Seschrock } 207ea8dc4b6Seschrock /* initiate async i/o */ 20813506d1eSmaybee if (read) { 209ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 210ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 211ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 212ea8dc4b6Seschrock } 213ea8dc4b6Seschrock dbp[i] = &db->db; 214fa9e4066Sahrens } 215fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 216fa9e4066Sahrens 217ea8dc4b6Seschrock /* wait for async i/o */ 218ea8dc4b6Seschrock err = zio_wait(zio); 219ea8dc4b6Seschrock if (err) { 220ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 221ea8dc4b6Seschrock return (err); 222ea8dc4b6Seschrock } 223ea8dc4b6Seschrock 224ea8dc4b6Seschrock /* wait for other io to complete */ 225ea8dc4b6Seschrock if (read) { 226ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 227ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 228ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 229ea8dc4b6Seschrock while (db->db_state == DB_READ || 230ea8dc4b6Seschrock db->db_state == DB_FILL) 231ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 232ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 233ea8dc4b6Seschrock err = EIO; 234ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 235ea8dc4b6Seschrock if (err) { 236ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 237ea8dc4b6Seschrock return (err); 238ea8dc4b6Seschrock } 239ea8dc4b6Seschrock } 240ea8dc4b6Seschrock } 241ea8dc4b6Seschrock 242ea8dc4b6Seschrock *numbufsp = nblks; 243ea8dc4b6Seschrock *dbpp = dbp; 244ea8dc4b6Seschrock return (0); 245fa9e4066Sahrens } 246fa9e4066Sahrens 247a2eea2e1Sahrens static int 24813506d1eSmaybee dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 24913506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 25013506d1eSmaybee { 25113506d1eSmaybee dnode_t *dn; 25213506d1eSmaybee int err; 25313506d1eSmaybee 25413506d1eSmaybee err = dnode_hold(os->os, object, FTAG, &dn); 25513506d1eSmaybee if (err) 25613506d1eSmaybee return (err); 25713506d1eSmaybee 25813506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 25913506d1eSmaybee numbufsp, dbpp); 26013506d1eSmaybee 26113506d1eSmaybee dnode_rele(dn, FTAG); 26213506d1eSmaybee 26313506d1eSmaybee return (err); 26413506d1eSmaybee } 26513506d1eSmaybee 26613506d1eSmaybee int 26713506d1eSmaybee dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, 26813506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 26913506d1eSmaybee { 27013506d1eSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 27113506d1eSmaybee int err; 27213506d1eSmaybee 27313506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 27413506d1eSmaybee numbufsp, dbpp); 27513506d1eSmaybee 27613506d1eSmaybee return (err); 27713506d1eSmaybee } 27813506d1eSmaybee 279fa9e4066Sahrens void 280ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 281fa9e4066Sahrens { 282fa9e4066Sahrens int i; 283fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 284fa9e4066Sahrens 285fa9e4066Sahrens if (numbufs == 0) 286fa9e4066Sahrens return; 287fa9e4066Sahrens 288ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 289ea8dc4b6Seschrock if (dbp[i]) 290ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 291ea8dc4b6Seschrock } 292fa9e4066Sahrens 293fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 294fa9e4066Sahrens } 295fa9e4066Sahrens 296fa9e4066Sahrens void 297fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 298fa9e4066Sahrens { 299fa9e4066Sahrens dnode_t *dn; 300fa9e4066Sahrens uint64_t blkid; 301ea8dc4b6Seschrock int nblks, i, err; 302fa9e4066Sahrens 303416e0cd8Sek110237 if (zfs_prefetch_disable) 304416e0cd8Sek110237 return; 305416e0cd8Sek110237 306fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 307fa9e4066Sahrens dn = os->os->os_meta_dnode; 308fa9e4066Sahrens 309fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 310fa9e4066Sahrens return; 311fa9e4066Sahrens 312fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 313fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 314fa9e4066Sahrens dbuf_prefetch(dn, blkid); 315fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 316fa9e4066Sahrens return; 317fa9e4066Sahrens } 318fa9e4066Sahrens 319fa9e4066Sahrens /* 320fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 321fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 322fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 323fa9e4066Sahrens */ 324ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 325ea8dc4b6Seschrock if (err != 0) 326fa9e4066Sahrens return; 327fa9e4066Sahrens 328fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 329fa9e4066Sahrens if (dn->dn_datablkshift) { 330fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 331fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 332fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 333fa9e4066Sahrens } else { 334fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 335fa9e4066Sahrens } 336fa9e4066Sahrens 337fa9e4066Sahrens if (nblks != 0) { 338fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 339fa9e4066Sahrens for (i = 0; i < nblks; i++) 340fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 341fa9e4066Sahrens } 342fa9e4066Sahrens 343fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 344fa9e4066Sahrens 345fa9e4066Sahrens dnode_rele(dn, FTAG); 346fa9e4066Sahrens } 347fa9e4066Sahrens 348ea8dc4b6Seschrock int 349fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 350fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 351fa9e4066Sahrens { 352ea8dc4b6Seschrock dnode_t *dn; 353ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 354ea8dc4b6Seschrock if (err) 355ea8dc4b6Seschrock return (err); 356fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 357fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 358fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 359fa9e4066Sahrens dnode_rele(dn, FTAG); 360ea8dc4b6Seschrock return (0); 361fa9e4066Sahrens } 362fa9e4066Sahrens 363ea8dc4b6Seschrock int 364ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 365ea8dc4b6Seschrock void *buf) 366fa9e4066Sahrens { 367fa9e4066Sahrens dnode_t *dn; 368fa9e4066Sahrens dmu_buf_t **dbp; 369ea8dc4b6Seschrock int numbufs, i, err; 370fa9e4066Sahrens 371ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 372ea8dc4b6Seschrock if (err) 373ea8dc4b6Seschrock return (err); 374feb08c6bSbillm 375feb08c6bSbillm /* 376feb08c6bSbillm * Deal with odd block sizes, where there can't be data past the first 377feb08c6bSbillm * block. If we ever do the tail block optimization, we will need to 378feb08c6bSbillm * handle that here as well. 379feb08c6bSbillm */ 380fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 381fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 382fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 383fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 384fa9e4066Sahrens size = newsz; 385fa9e4066Sahrens } 386fa9e4066Sahrens 387fa9e4066Sahrens while (size > 0) { 388fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 389fa9e4066Sahrens int err; 390fa9e4066Sahrens 391fa9e4066Sahrens /* 392fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 393fa9e4066Sahrens * to be reading in parallel. 394fa9e4066Sahrens */ 395a2eea2e1Sahrens err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, 396ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 397ea8dc4b6Seschrock if (err) 398fa9e4066Sahrens return (err); 399fa9e4066Sahrens 400fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 401fa9e4066Sahrens int tocpy; 402fa9e4066Sahrens int bufoff; 403fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 404fa9e4066Sahrens 405fa9e4066Sahrens ASSERT(size > 0); 406fa9e4066Sahrens 407fa9e4066Sahrens bufoff = offset - db->db_offset; 408fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 409fa9e4066Sahrens 410fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 411fa9e4066Sahrens 412fa9e4066Sahrens offset += tocpy; 413fa9e4066Sahrens size -= tocpy; 414fa9e4066Sahrens buf = (char *)buf + tocpy; 415fa9e4066Sahrens } 416ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 417fa9e4066Sahrens } 418a2eea2e1Sahrens dnode_rele(dn, FTAG); 419fa9e4066Sahrens return (0); 420fa9e4066Sahrens } 421fa9e4066Sahrens 422fa9e4066Sahrens void 423fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 424fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 425fa9e4066Sahrens { 426fa9e4066Sahrens dmu_buf_t **dbp; 427fa9e4066Sahrens int numbufs, i; 428fa9e4066Sahrens 42913506d1eSmaybee if (size == 0) 43013506d1eSmaybee return; 43113506d1eSmaybee 432ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 433ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 434fa9e4066Sahrens 435fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 436fa9e4066Sahrens int tocpy; 437fa9e4066Sahrens int bufoff; 438fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 439fa9e4066Sahrens 440fa9e4066Sahrens ASSERT(size > 0); 441fa9e4066Sahrens 442fa9e4066Sahrens bufoff = offset - db->db_offset; 443fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 444fa9e4066Sahrens 445fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 446fa9e4066Sahrens 447fa9e4066Sahrens if (tocpy == db->db_size) 448fa9e4066Sahrens dmu_buf_will_fill(db, tx); 449fa9e4066Sahrens else 450fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 451fa9e4066Sahrens 452fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 453fa9e4066Sahrens 454fa9e4066Sahrens if (tocpy == db->db_size) 455fa9e4066Sahrens dmu_buf_fill_done(db, tx); 456fa9e4066Sahrens 457fa9e4066Sahrens offset += tocpy; 458fa9e4066Sahrens size -= tocpy; 459fa9e4066Sahrens buf = (char *)buf + tocpy; 460fa9e4066Sahrens } 461ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 462fa9e4066Sahrens } 463fa9e4066Sahrens 464fa9e4066Sahrens #ifdef _KERNEL 465fa9e4066Sahrens int 466feb08c6bSbillm dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size) 467feb08c6bSbillm { 468feb08c6bSbillm dmu_buf_t **dbp; 469feb08c6bSbillm int numbufs, i, err; 470feb08c6bSbillm 471feb08c6bSbillm /* 472feb08c6bSbillm * NB: we could do this block-at-a-time, but it's nice 473feb08c6bSbillm * to be reading in parallel. 474feb08c6bSbillm */ 475feb08c6bSbillm err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, TRUE, FTAG, 476feb08c6bSbillm &numbufs, &dbp); 477feb08c6bSbillm if (err) 478feb08c6bSbillm return (err); 479feb08c6bSbillm 480feb08c6bSbillm for (i = 0; i < numbufs; i++) { 481feb08c6bSbillm int tocpy; 482feb08c6bSbillm int bufoff; 483feb08c6bSbillm dmu_buf_t *db = dbp[i]; 484feb08c6bSbillm 485feb08c6bSbillm ASSERT(size > 0); 486feb08c6bSbillm 487feb08c6bSbillm bufoff = uio->uio_loffset - db->db_offset; 488feb08c6bSbillm tocpy = (int)MIN(db->db_size - bufoff, size); 489feb08c6bSbillm 490feb08c6bSbillm err = uiomove((char *)db->db_data + bufoff, tocpy, 491feb08c6bSbillm UIO_READ, uio); 492feb08c6bSbillm if (err) 493feb08c6bSbillm break; 494feb08c6bSbillm 495feb08c6bSbillm size -= tocpy; 496feb08c6bSbillm } 497feb08c6bSbillm dmu_buf_rele_array(dbp, numbufs, FTAG); 498feb08c6bSbillm 499feb08c6bSbillm return (err); 500feb08c6bSbillm } 501feb08c6bSbillm 502feb08c6bSbillm int 503feb08c6bSbillm dmu_write_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size, 504feb08c6bSbillm dmu_tx_t *tx) 505fa9e4066Sahrens { 506fa9e4066Sahrens dmu_buf_t **dbp; 507fa9e4066Sahrens int numbufs, i; 508fa9e4066Sahrens int err = 0; 509fa9e4066Sahrens 51013506d1eSmaybee if (size == 0) 51113506d1eSmaybee return (0); 51213506d1eSmaybee 513feb08c6bSbillm err = dmu_buf_hold_array(os, object, uio->uio_loffset, size, 514ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 515ea8dc4b6Seschrock if (err) 516ea8dc4b6Seschrock return (err); 517fa9e4066Sahrens 518fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 519fa9e4066Sahrens int tocpy; 520fa9e4066Sahrens int bufoff; 521fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 522fa9e4066Sahrens 523fa9e4066Sahrens ASSERT(size > 0); 524fa9e4066Sahrens 525feb08c6bSbillm bufoff = uio->uio_loffset - db->db_offset; 526fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 527fa9e4066Sahrens 528fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 529fa9e4066Sahrens 530fa9e4066Sahrens if (tocpy == db->db_size) 531fa9e4066Sahrens dmu_buf_will_fill(db, tx); 532fa9e4066Sahrens else 533fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 534fa9e4066Sahrens 535fa9e4066Sahrens /* 536fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 537fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 538fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 539fa9e4066Sahrens * block. 540fa9e4066Sahrens */ 541fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 542fa9e4066Sahrens UIO_WRITE, uio); 543fa9e4066Sahrens 544fa9e4066Sahrens if (tocpy == db->db_size) 545fa9e4066Sahrens dmu_buf_fill_done(db, tx); 546fa9e4066Sahrens 547fa9e4066Sahrens if (err) 548fa9e4066Sahrens break; 549fa9e4066Sahrens 550fa9e4066Sahrens size -= tocpy; 551fa9e4066Sahrens } 552ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 553fa9e4066Sahrens return (err); 554fa9e4066Sahrens } 55544eda4d7Smaybee 55644eda4d7Smaybee int 55744eda4d7Smaybee dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 55844eda4d7Smaybee page_t *pp, dmu_tx_t *tx) 55944eda4d7Smaybee { 56044eda4d7Smaybee dmu_buf_t **dbp; 56144eda4d7Smaybee int numbufs, i; 56244eda4d7Smaybee int err; 56344eda4d7Smaybee 56444eda4d7Smaybee if (size == 0) 56544eda4d7Smaybee return (0); 56644eda4d7Smaybee 56744eda4d7Smaybee err = dmu_buf_hold_array(os, object, offset, size, 56844eda4d7Smaybee FALSE, FTAG, &numbufs, &dbp); 56944eda4d7Smaybee if (err) 57044eda4d7Smaybee return (err); 57144eda4d7Smaybee 57244eda4d7Smaybee for (i = 0; i < numbufs; i++) { 57344eda4d7Smaybee int tocpy, copied, thiscpy; 57444eda4d7Smaybee int bufoff; 57544eda4d7Smaybee dmu_buf_t *db = dbp[i]; 57644eda4d7Smaybee caddr_t va; 57744eda4d7Smaybee 57844eda4d7Smaybee ASSERT(size > 0); 57944eda4d7Smaybee ASSERT3U(db->db_size, >=, PAGESIZE); 58044eda4d7Smaybee 58144eda4d7Smaybee bufoff = offset - db->db_offset; 58244eda4d7Smaybee tocpy = (int)MIN(db->db_size - bufoff, size); 58344eda4d7Smaybee 58444eda4d7Smaybee ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 58544eda4d7Smaybee 58644eda4d7Smaybee if (tocpy == db->db_size) 58744eda4d7Smaybee dmu_buf_will_fill(db, tx); 58844eda4d7Smaybee else 58944eda4d7Smaybee dmu_buf_will_dirty(db, tx); 59044eda4d7Smaybee 59144eda4d7Smaybee for (copied = 0; copied < tocpy; copied += PAGESIZE) { 59244eda4d7Smaybee ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); 59344eda4d7Smaybee thiscpy = MIN(PAGESIZE, tocpy - copied); 59444eda4d7Smaybee va = ppmapin(pp, PROT_READ, (caddr_t)-1); 59544eda4d7Smaybee bcopy(va, (char *)db->db_data + bufoff, thiscpy); 59644eda4d7Smaybee ppmapout(va); 59744eda4d7Smaybee pp = pp->p_next; 59844eda4d7Smaybee bufoff += PAGESIZE; 59944eda4d7Smaybee } 60044eda4d7Smaybee 60144eda4d7Smaybee if (tocpy == db->db_size) 60244eda4d7Smaybee dmu_buf_fill_done(db, tx); 60344eda4d7Smaybee 60444eda4d7Smaybee if (err) 60544eda4d7Smaybee break; 60644eda4d7Smaybee 60744eda4d7Smaybee offset += tocpy; 60844eda4d7Smaybee size -= tocpy; 60944eda4d7Smaybee } 61044eda4d7Smaybee dmu_buf_rele_array(dbp, numbufs, FTAG); 61144eda4d7Smaybee return (err); 61244eda4d7Smaybee } 613fa9e4066Sahrens #endif 614fa9e4066Sahrens 615c5c6ffa0Smaybee typedef struct { 616c717a561Smaybee dbuf_dirty_record_t *dr; 617c5c6ffa0Smaybee dmu_sync_cb_t *done; 618c5c6ffa0Smaybee void *arg; 619c717a561Smaybee } dmu_sync_arg_t; 620c5c6ffa0Smaybee 621c5c6ffa0Smaybee /* ARGSUSED */ 622c5c6ffa0Smaybee static void 623c5c6ffa0Smaybee dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) 624c5c6ffa0Smaybee { 625c717a561Smaybee dmu_sync_arg_t *in = varg; 626c717a561Smaybee dbuf_dirty_record_t *dr = in->dr; 627c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 628c5c6ffa0Smaybee dmu_sync_cb_t *done = in->done; 629c5c6ffa0Smaybee 630c5c6ffa0Smaybee if (!BP_IS_HOLE(zio->io_bp)) { 631c5c6ffa0Smaybee zio->io_bp->blk_fill = 1; 632c5c6ffa0Smaybee BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type); 633c5c6ffa0Smaybee BP_SET_LEVEL(zio->io_bp, 0); 634c5c6ffa0Smaybee } 635c5c6ffa0Smaybee 636c5c6ffa0Smaybee mutex_enter(&db->db_mtx); 637c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC); 638c717a561Smaybee dr->dt.dl.dr_overridden_by = *zio->io_bp; /* structure assignment */ 639c717a561Smaybee dr->dt.dl.dr_override_state = DR_OVERRIDDEN; 640c5c6ffa0Smaybee cv_broadcast(&db->db_changed); 641c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 642c5c6ffa0Smaybee 643c5c6ffa0Smaybee if (done) 644c717a561Smaybee done(&(db->db), in->arg); 645c717a561Smaybee 646c717a561Smaybee kmem_free(in, sizeof (dmu_sync_arg_t)); 647c5c6ffa0Smaybee } 648c5c6ffa0Smaybee 649fa9e4066Sahrens /* 650c5c6ffa0Smaybee * Intent log support: sync the block associated with db to disk. 651c5c6ffa0Smaybee * N.B. and XXX: the caller is responsible for making sure that the 652c5c6ffa0Smaybee * data isn't changing while dmu_sync() is writing it. 653fa9e4066Sahrens * 654fa9e4066Sahrens * Return values: 655fa9e4066Sahrens * 656c5c6ffa0Smaybee * EEXIST: this txg has already been synced, so there's nothing to to. 657fa9e4066Sahrens * The caller should not log the write. 658fa9e4066Sahrens * 659fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 660fa9e4066Sahrens * The caller should not log the write. 661fa9e4066Sahrens * 662c5c6ffa0Smaybee * EALREADY: this block is already in the process of being synced. 663c5c6ffa0Smaybee * The caller should track its progress (somehow). 664fa9e4066Sahrens * 665c5c6ffa0Smaybee * EINPROGRESS: the IO has been initiated. 666c5c6ffa0Smaybee * The caller should log this blkptr in the callback. 667fa9e4066Sahrens * 668c5c6ffa0Smaybee * 0: completed. Sets *bp to the blkptr just written. 669c5c6ffa0Smaybee * The caller should log this blkptr immediately. 670fa9e4066Sahrens */ 671fa9e4066Sahrens int 672c5c6ffa0Smaybee dmu_sync(zio_t *pio, dmu_buf_t *db_fake, 673c5c6ffa0Smaybee blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) 674fa9e4066Sahrens { 675c5c6ffa0Smaybee dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 676c5c6ffa0Smaybee objset_impl_t *os = db->db_objset; 677c5c6ffa0Smaybee dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 678fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 679c717a561Smaybee dbuf_dirty_record_t *dr; 680c717a561Smaybee dmu_sync_arg_t *in; 681ea8dc4b6Seschrock zbookmark_t zb; 682c717a561Smaybee zio_t *zio; 6839bc11082Sek110237 int zio_flags; 684c5c6ffa0Smaybee int err; 685fa9e4066Sahrens 686fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 687fa9e4066Sahrens ASSERT(txg != 0); 688fa9e4066Sahrens 689c5c6ffa0Smaybee 690fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 691fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 692fa9e4066Sahrens 693fa9e4066Sahrens /* 694c5c6ffa0Smaybee * XXX - would be nice if we could do this without suspending... 695ea8dc4b6Seschrock */ 696c5c6ffa0Smaybee txg_suspend(dp); 697ea8dc4b6Seschrock 698ea8dc4b6Seschrock /* 699fa9e4066Sahrens * If this txg already synced, there's nothing to do. 700fa9e4066Sahrens */ 701fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 702c5c6ffa0Smaybee txg_resume(dp); 703fa9e4066Sahrens /* 704fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 705fa9e4066Sahrens */ 706fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 707fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 708fa9e4066Sahrens if (db->db_blkptr) 709fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 710fa9e4066Sahrens return (0); 711fa9e4066Sahrens } 712c5c6ffa0Smaybee return (EEXIST); 713fa9e4066Sahrens } 714fa9e4066Sahrens 715fa9e4066Sahrens mutex_enter(&db->db_mtx); 716fa9e4066Sahrens 717c5c6ffa0Smaybee if (txg == tx->tx_syncing_txg) { 718c5c6ffa0Smaybee while (db->db_data_pending) { 719c5c6ffa0Smaybee /* 720c5c6ffa0Smaybee * IO is in-progress. Wait for it to finish. 721c5c6ffa0Smaybee * XXX - would be nice to be able to somehow "attach" 722c5c6ffa0Smaybee * this zio to the parent zio passed in. 723c5c6ffa0Smaybee */ 724c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 72513506d1eSmaybee if (!db->db_data_pending && 72613506d1eSmaybee db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { 72713506d1eSmaybee /* 72813506d1eSmaybee * IO was compressed away 72913506d1eSmaybee */ 73013506d1eSmaybee *bp = *db->db_blkptr; /* structure assignment */ 73113506d1eSmaybee mutex_exit(&db->db_mtx); 73213506d1eSmaybee txg_resume(dp); 73313506d1eSmaybee return (0); 73413506d1eSmaybee } 735c5c6ffa0Smaybee ASSERT(db->db_data_pending || 736c5c6ffa0Smaybee (db->db_blkptr && db->db_blkptr->blk_birth == txg)); 737c5c6ffa0Smaybee } 738c5c6ffa0Smaybee 739c5c6ffa0Smaybee if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { 740c5c6ffa0Smaybee /* 741c5c6ffa0Smaybee * IO is already completed. 742c5c6ffa0Smaybee */ 743c5c6ffa0Smaybee *bp = *db->db_blkptr; /* structure assignment */ 744c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 745c5c6ffa0Smaybee txg_resume(dp); 746c5c6ffa0Smaybee return (0); 747c5c6ffa0Smaybee } 748c5c6ffa0Smaybee } 749c5c6ffa0Smaybee 750c717a561Smaybee dr = db->db_last_dirty; 751c717a561Smaybee while (dr && dr->dr_txg > txg) 752c717a561Smaybee dr = dr->dr_next; 753c717a561Smaybee if (dr == NULL || dr->dr_txg < txg) { 754c5c6ffa0Smaybee /* 755c5c6ffa0Smaybee * This dbuf isn't dirty, must have been free_range'd. 756fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 757fa9e4066Sahrens */ 758fa9e4066Sahrens mutex_exit(&db->db_mtx); 759c5c6ffa0Smaybee txg_resume(dp); 760fa9e4066Sahrens return (ENOENT); 761fa9e4066Sahrens } 762fa9e4066Sahrens 763c717a561Smaybee ASSERT(dr->dr_txg == txg); 764c717a561Smaybee if (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 765c5c6ffa0Smaybee /* 766c717a561Smaybee * We have already issued a sync write for this buffer. 767c5c6ffa0Smaybee */ 768c717a561Smaybee mutex_exit(&db->db_mtx); 769c717a561Smaybee txg_resume(dp); 770c717a561Smaybee return (EALREADY); 771c717a561Smaybee } else if (dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 772c717a561Smaybee /* 773c717a561Smaybee * This buffer has already been synced. It could not 774c717a561Smaybee * have been dirtied since, or we would have cleared the state. 775c717a561Smaybee */ 776c717a561Smaybee *bp = dr->dt.dl.dr_overridden_by; /* structure assignment */ 777c717a561Smaybee mutex_exit(&db->db_mtx); 778c717a561Smaybee txg_resume(dp); 779c717a561Smaybee return (0); 780c717a561Smaybee } 781c717a561Smaybee 782c717a561Smaybee dr->dt.dl.dr_override_state = DR_IN_DMU_SYNC; 783c717a561Smaybee in = kmem_alloc(sizeof (dmu_sync_arg_t), KM_SLEEP); 784c717a561Smaybee in->dr = dr; 785c5c6ffa0Smaybee in->done = done; 786c5c6ffa0Smaybee in->arg = arg; 787fa9e4066Sahrens mutex_exit(&db->db_mtx); 788c5c6ffa0Smaybee txg_resume(dp); 789fa9e4066Sahrens 790c5c6ffa0Smaybee zb.zb_objset = os->os_dsl_dataset->ds_object; 791ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 792ea8dc4b6Seschrock zb.zb_level = db->db_level; 793ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 7949bc11082Sek110237 zio_flags = ZIO_FLAG_MUSTSUCCEED; 7959bc11082Sek110237 if (dmu_ot[db->db_dnode->dn_type].ot_metadata || zb.zb_level != 0) 7969bc11082Sek110237 zio_flags |= ZIO_FLAG_METADATA; 797c717a561Smaybee zio = arc_write(pio, os->os_spa, 798c5c6ffa0Smaybee zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum), 799c5c6ffa0Smaybee zio_compress_select(db->db_dnode->dn_compress, os->os_compress), 800*d0ad202dSahrens dmu_get_replication_level(os, &zb, db->db_dnode->dn_type), 801c717a561Smaybee txg, bp, dr->dt.dl.dr_data, NULL, dmu_sync_done, in, 8029bc11082Sek110237 ZIO_PRIORITY_SYNC_WRITE, zio_flags, &zb); 803fa9e4066Sahrens 804c717a561Smaybee if (pio) { 805c717a561Smaybee zio_nowait(zio); 806c717a561Smaybee err = EINPROGRESS; 807c717a561Smaybee } else { 808c717a561Smaybee err = zio_wait(zio); 809c717a561Smaybee ASSERT(err == 0); 810c717a561Smaybee } 811c717a561Smaybee return (err); 812fa9e4066Sahrens } 813fa9e4066Sahrens 814fa9e4066Sahrens int 815fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 816fa9e4066Sahrens dmu_tx_t *tx) 817fa9e4066Sahrens { 818ea8dc4b6Seschrock dnode_t *dn; 819ea8dc4b6Seschrock int err; 820ea8dc4b6Seschrock 821ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 822ea8dc4b6Seschrock if (err) 823ea8dc4b6Seschrock return (err); 824ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 825fa9e4066Sahrens dnode_rele(dn, FTAG); 826fa9e4066Sahrens return (err); 827fa9e4066Sahrens } 828fa9e4066Sahrens 829fa9e4066Sahrens void 830fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 831fa9e4066Sahrens dmu_tx_t *tx) 832fa9e4066Sahrens { 833ea8dc4b6Seschrock dnode_t *dn; 834ea8dc4b6Seschrock 835ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 836ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 837fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 838fa9e4066Sahrens dn->dn_checksum = checksum; 839fa9e4066Sahrens dnode_setdirty(dn, tx); 840fa9e4066Sahrens dnode_rele(dn, FTAG); 841fa9e4066Sahrens } 842fa9e4066Sahrens 843fa9e4066Sahrens void 844fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 845fa9e4066Sahrens dmu_tx_t *tx) 846fa9e4066Sahrens { 847ea8dc4b6Seschrock dnode_t *dn; 848ea8dc4b6Seschrock 849ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 850ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 851fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 852fa9e4066Sahrens dn->dn_compress = compress; 853fa9e4066Sahrens dnode_setdirty(dn, tx); 854fa9e4066Sahrens dnode_rele(dn, FTAG); 855fa9e4066Sahrens } 856fa9e4066Sahrens 85744cd46caSbillm int 858*d0ad202dSahrens dmu_get_replication_level(objset_impl_t *os, 859*d0ad202dSahrens zbookmark_t *zb, dmu_object_type_t ot) 86044cd46caSbillm { 861*d0ad202dSahrens int ncopies = os->os_copies; 86244cd46caSbillm 863*d0ad202dSahrens /* If it's the mos, it should have max copies set. */ 864*d0ad202dSahrens ASSERT(zb->zb_objset != 0 || 865*d0ad202dSahrens ncopies == spa_max_replication(os->os_spa)); 866*d0ad202dSahrens 867*d0ad202dSahrens if (dmu_ot[ot].ot_metadata || zb->zb_level != 0) 86844cd46caSbillm ncopies++; 869*d0ad202dSahrens return (MIN(ncopies, spa_max_replication(os->os_spa))); 87044cd46caSbillm } 87144cd46caSbillm 872fa9e4066Sahrens int 873fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 874fa9e4066Sahrens { 875fa9e4066Sahrens dnode_t *dn; 876fa9e4066Sahrens int i, err; 877fa9e4066Sahrens 878ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 879ea8dc4b6Seschrock if (err) 880ea8dc4b6Seschrock return (err); 881fa9e4066Sahrens /* 882fa9e4066Sahrens * Sync any current changes before 883fa9e4066Sahrens * we go trundling through the block pointers. 884fa9e4066Sahrens */ 885fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 886c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 887fa9e4066Sahrens break; 888fa9e4066Sahrens } 889fa9e4066Sahrens if (i != TXG_SIZE) { 890fa9e4066Sahrens dnode_rele(dn, FTAG); 891fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 892ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 893ea8dc4b6Seschrock if (err) 894ea8dc4b6Seschrock return (err); 895fa9e4066Sahrens } 896fa9e4066Sahrens 8976754306eSahrens err = dnode_next_offset(dn, hole, off, 1, 1, 0); 898fa9e4066Sahrens dnode_rele(dn, FTAG); 899fa9e4066Sahrens 900fa9e4066Sahrens return (err); 901fa9e4066Sahrens } 902fa9e4066Sahrens 903fa9e4066Sahrens void 904fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 905fa9e4066Sahrens { 906fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 907fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 908fa9e4066Sahrens 909fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 910fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 911fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 912fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 913fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 914fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 91599653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 91699653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 917fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 918fa9e4066Sahrens doi->doi_type = dn->dn_type; 919fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 920fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 921fa9e4066Sahrens 922fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 923fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 924fa9e4066Sahrens } 925fa9e4066Sahrens 926fa9e4066Sahrens /* 927fa9e4066Sahrens * Get information on a DMU object. 928fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 929fa9e4066Sahrens */ 930fa9e4066Sahrens int 931fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 932fa9e4066Sahrens { 933ea8dc4b6Seschrock dnode_t *dn; 934ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 935fa9e4066Sahrens 936ea8dc4b6Seschrock if (err) 937ea8dc4b6Seschrock return (err); 938fa9e4066Sahrens 939fa9e4066Sahrens if (doi != NULL) 940fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 941fa9e4066Sahrens 942fa9e4066Sahrens dnode_rele(dn, FTAG); 943fa9e4066Sahrens return (0); 944fa9e4066Sahrens } 945fa9e4066Sahrens 946fa9e4066Sahrens /* 947fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 948fa9e4066Sahrens */ 949fa9e4066Sahrens void 950fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 951fa9e4066Sahrens { 952fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 953fa9e4066Sahrens } 954fa9e4066Sahrens 955fa9e4066Sahrens /* 956fa9e4066Sahrens * Faster still when you only care about the size. 957fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 958fa9e4066Sahrens */ 959fa9e4066Sahrens void 960fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 961fa9e4066Sahrens { 962fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 963fa9e4066Sahrens 964fa9e4066Sahrens *blksize = dn->dn_datablksz; 96599653d4eSeschrock /* add 1 for dnode space */ 96699653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 96799653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 968fa9e4066Sahrens } 969fa9e4066Sahrens 970fa9e4066Sahrens void 971fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 972fa9e4066Sahrens { 973fa9e4066Sahrens uint64_t *buf = vbuf; 974fa9e4066Sahrens size_t count = size >> 3; 975fa9e4066Sahrens int i; 976fa9e4066Sahrens 977fa9e4066Sahrens ASSERT((size & 7) == 0); 978fa9e4066Sahrens 979fa9e4066Sahrens for (i = 0; i < count; i++) 980fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 981fa9e4066Sahrens } 982fa9e4066Sahrens 983fa9e4066Sahrens void 984fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 985fa9e4066Sahrens { 986fa9e4066Sahrens uint32_t *buf = vbuf; 987fa9e4066Sahrens size_t count = size >> 2; 988fa9e4066Sahrens int i; 989fa9e4066Sahrens 990fa9e4066Sahrens ASSERT((size & 3) == 0); 991fa9e4066Sahrens 992fa9e4066Sahrens for (i = 0; i < count; i++) 993fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 994fa9e4066Sahrens } 995fa9e4066Sahrens 996fa9e4066Sahrens void 997fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 998fa9e4066Sahrens { 999fa9e4066Sahrens uint16_t *buf = vbuf; 1000fa9e4066Sahrens size_t count = size >> 1; 1001fa9e4066Sahrens int i; 1002fa9e4066Sahrens 1003fa9e4066Sahrens ASSERT((size & 1) == 0); 1004fa9e4066Sahrens 1005fa9e4066Sahrens for (i = 0; i < count; i++) 1006fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1007fa9e4066Sahrens } 1008fa9e4066Sahrens 1009fa9e4066Sahrens /* ARGSUSED */ 1010fa9e4066Sahrens void 1011fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1012fa9e4066Sahrens { 1013fa9e4066Sahrens } 1014fa9e4066Sahrens 1015fa9e4066Sahrens void 1016fa9e4066Sahrens dmu_init(void) 1017fa9e4066Sahrens { 1018fa9e4066Sahrens dbuf_init(); 1019fa9e4066Sahrens dnode_init(); 1020fa9e4066Sahrens arc_init(); 1021fa9e4066Sahrens } 1022fa9e4066Sahrens 1023fa9e4066Sahrens void 1024fa9e4066Sahrens dmu_fini(void) 1025fa9e4066Sahrens { 1026fa9e4066Sahrens arc_fini(); 1027fa9e4066Sahrens dnode_fini(); 1028fa9e4066Sahrens dbuf_fini(); 1029fa9e4066Sahrens } 1030