1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dbuf.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/zfs_context.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dmu_traverse.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dsl_pool.h> 391d452cf5Sahrens #include <sys/dsl_synctask.h> 40a2eea2e1Sahrens #include <sys/dsl_prop.h> 41fa9e4066Sahrens #include <sys/dmu_zfetch.h> 42fa9e4066Sahrens #include <sys/zfs_ioctl.h> 43fa9e4066Sahrens #include <sys/zap.h> 44ea8dc4b6Seschrock #include <sys/zio_checksum.h> 4544eda4d7Smaybee #ifdef _KERNEL 4644eda4d7Smaybee #include <sys/vmsystm.h> 4744eda4d7Smaybee #endif 48fa9e4066Sahrens 49fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 50fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 51fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 52fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 53fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 55fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 56fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 57fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 58fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 59fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 60fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 61fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 62fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 63fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 64fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 65fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 66fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 67fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 68fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 69fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 70fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 71fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 72fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 73fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 74fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 75fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 76fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 77fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 78ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 79*06eeb2adSek110237 { byteswap_uint8_array, TRUE, "SPA history" }, 80*06eeb2adSek110237 { byteswap_uint64_array, TRUE, "SPA history offsets" }, 81fa9e4066Sahrens }; 82fa9e4066Sahrens 83fa9e4066Sahrens int 84ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 85ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 86fa9e4066Sahrens { 87fa9e4066Sahrens dnode_t *dn; 88fa9e4066Sahrens uint64_t blkid; 89fa9e4066Sahrens dmu_buf_impl_t *db; 90ea8dc4b6Seschrock int err; 91fa9e4066Sahrens 92ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 93ea8dc4b6Seschrock if (err) 94ea8dc4b6Seschrock return (err); 95fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 96fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 97ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 98fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 99ea8dc4b6Seschrock if (db == NULL) { 100ea8dc4b6Seschrock err = EIO; 101ea8dc4b6Seschrock } else { 102ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 103ea8dc4b6Seschrock if (err) { 104ea8dc4b6Seschrock dbuf_rele(db, tag); 105ea8dc4b6Seschrock db = NULL; 106ea8dc4b6Seschrock } 107fa9e4066Sahrens } 108fa9e4066Sahrens 109ea8dc4b6Seschrock dnode_rele(dn, FTAG); 110ea8dc4b6Seschrock *dbp = &db->db; 111ea8dc4b6Seschrock return (err); 112fa9e4066Sahrens } 113fa9e4066Sahrens 114fa9e4066Sahrens int 115fa9e4066Sahrens dmu_bonus_max(void) 116fa9e4066Sahrens { 117fa9e4066Sahrens return (DN_MAX_BONUSLEN); 118fa9e4066Sahrens } 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 122fa9e4066Sahrens */ 123ea8dc4b6Seschrock int 124ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 125fa9e4066Sahrens { 126ea8dc4b6Seschrock dnode_t *dn; 127ea8dc4b6Seschrock int err, count; 128fa9e4066Sahrens dmu_buf_impl_t *db; 129fa9e4066Sahrens 130ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 131ea8dc4b6Seschrock if (err) 132ea8dc4b6Seschrock return (err); 133fa9e4066Sahrens 134fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 135ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 136fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 137ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 138ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 139ea8dc4b6Seschrock dn->dn_bonus = dbuf_create_bonus(dn); 140fa9e4066Sahrens } 141ea8dc4b6Seschrock db = dn->dn_bonus; 142ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 143ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 144ea8dc4b6Seschrock count = refcount_add(&db->db_holds, tag); 145ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 146ea8dc4b6Seschrock if (count == 1) 147ea8dc4b6Seschrock dnode_add_ref(dn, db); 148fa9e4066Sahrens dnode_rele(dn, FTAG); 149ea8dc4b6Seschrock 150ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 151ea8dc4b6Seschrock 152ea8dc4b6Seschrock *dbp = &db->db; 153ea8dc4b6Seschrock return (0); 154fa9e4066Sahrens } 155fa9e4066Sahrens 15613506d1eSmaybee /* 15713506d1eSmaybee * Note: longer-term, we should modify all of the dmu_buf_*() interfaces 15813506d1eSmaybee * to take a held dnode rather than <os, object> -- the lookup is wasteful, 15913506d1eSmaybee * and can induce severe lock contention when writing to several files 16013506d1eSmaybee * whose dnodes are in the same block. 16113506d1eSmaybee */ 16213506d1eSmaybee static int 16313506d1eSmaybee dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, 164ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 165fa9e4066Sahrens { 166fa9e4066Sahrens dmu_buf_t **dbp; 167fa9e4066Sahrens uint64_t blkid, nblks, i; 168ea8dc4b6Seschrock uint32_t flags; 169ea8dc4b6Seschrock int err; 170ea8dc4b6Seschrock zio_t *zio; 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 173fa9e4066Sahrens 174ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 175e1930233Sbonwick if (length > zfetch_array_rd_sz) 176ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 177ea8dc4b6Seschrock 178fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 179fa9e4066Sahrens if (dn->dn_datablkshift) { 180fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 181fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 182fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 183fa9e4066Sahrens } else { 184fa9e4066Sahrens ASSERT3U(offset + length, <=, dn->dn_datablksz); 185fa9e4066Sahrens nblks = 1; 186fa9e4066Sahrens } 187ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 188fa9e4066Sahrens 189ea8dc4b6Seschrock zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); 190fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 191fa9e4066Sahrens for (i = 0; i < nblks; i++) { 192ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 193ea8dc4b6Seschrock if (db == NULL) { 194ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 195ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 196ea8dc4b6Seschrock zio_nowait(zio); 197ea8dc4b6Seschrock return (EIO); 198ea8dc4b6Seschrock } 199ea8dc4b6Seschrock /* initiate async i/o */ 20013506d1eSmaybee if (read) { 201ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 202ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 203ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 204ea8dc4b6Seschrock } 205ea8dc4b6Seschrock dbp[i] = &db->db; 206fa9e4066Sahrens } 207fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 208fa9e4066Sahrens 209ea8dc4b6Seschrock /* wait for async i/o */ 210ea8dc4b6Seschrock err = zio_wait(zio); 211ea8dc4b6Seschrock if (err) { 212ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 213ea8dc4b6Seschrock return (err); 214ea8dc4b6Seschrock } 215ea8dc4b6Seschrock 216ea8dc4b6Seschrock /* wait for other io to complete */ 217ea8dc4b6Seschrock if (read) { 218ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 219ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 220ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 221ea8dc4b6Seschrock while (db->db_state == DB_READ || 222ea8dc4b6Seschrock db->db_state == DB_FILL) 223ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 224ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 225ea8dc4b6Seschrock err = EIO; 226ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 227ea8dc4b6Seschrock if (err) { 228ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 229ea8dc4b6Seschrock return (err); 230ea8dc4b6Seschrock } 231ea8dc4b6Seschrock } 232ea8dc4b6Seschrock } 233ea8dc4b6Seschrock 234ea8dc4b6Seschrock *numbufsp = nblks; 235ea8dc4b6Seschrock *dbpp = dbp; 236ea8dc4b6Seschrock return (0); 237fa9e4066Sahrens } 238fa9e4066Sahrens 239a2eea2e1Sahrens static int 24013506d1eSmaybee dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 24113506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 24213506d1eSmaybee { 24313506d1eSmaybee dnode_t *dn; 24413506d1eSmaybee int err; 24513506d1eSmaybee 24613506d1eSmaybee err = dnode_hold(os->os, object, FTAG, &dn); 24713506d1eSmaybee if (err) 24813506d1eSmaybee return (err); 24913506d1eSmaybee 25013506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 25113506d1eSmaybee numbufsp, dbpp); 25213506d1eSmaybee 25313506d1eSmaybee dnode_rele(dn, FTAG); 25413506d1eSmaybee 25513506d1eSmaybee return (err); 25613506d1eSmaybee } 25713506d1eSmaybee 25813506d1eSmaybee int 25913506d1eSmaybee dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, 26013506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 26113506d1eSmaybee { 26213506d1eSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 26313506d1eSmaybee int err; 26413506d1eSmaybee 26513506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 26613506d1eSmaybee numbufsp, dbpp); 26713506d1eSmaybee 26813506d1eSmaybee return (err); 26913506d1eSmaybee } 27013506d1eSmaybee 271fa9e4066Sahrens void 272ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 273fa9e4066Sahrens { 274fa9e4066Sahrens int i; 275fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 276fa9e4066Sahrens 277fa9e4066Sahrens if (numbufs == 0) 278fa9e4066Sahrens return; 279fa9e4066Sahrens 280ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 281ea8dc4b6Seschrock if (dbp[i]) 282ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 283ea8dc4b6Seschrock } 284fa9e4066Sahrens 285fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 286fa9e4066Sahrens } 287fa9e4066Sahrens 288fa9e4066Sahrens void 289fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 290fa9e4066Sahrens { 291fa9e4066Sahrens dnode_t *dn; 292fa9e4066Sahrens uint64_t blkid; 293ea8dc4b6Seschrock int nblks, i, err; 294fa9e4066Sahrens 295fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 296fa9e4066Sahrens dn = os->os->os_meta_dnode; 297fa9e4066Sahrens 298fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 299fa9e4066Sahrens return; 300fa9e4066Sahrens 301fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 302fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 303fa9e4066Sahrens dbuf_prefetch(dn, blkid); 304fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 305fa9e4066Sahrens return; 306fa9e4066Sahrens } 307fa9e4066Sahrens 308fa9e4066Sahrens /* 309fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 310fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 311fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 312fa9e4066Sahrens */ 313ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 314ea8dc4b6Seschrock if (err != 0) 315fa9e4066Sahrens return; 316fa9e4066Sahrens 317fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 318fa9e4066Sahrens if (dn->dn_datablkshift) { 319fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 320fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 321fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 322fa9e4066Sahrens } else { 323fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 324fa9e4066Sahrens } 325fa9e4066Sahrens 326fa9e4066Sahrens if (nblks != 0) { 327fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 328fa9e4066Sahrens for (i = 0; i < nblks; i++) 329fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 330fa9e4066Sahrens } 331fa9e4066Sahrens 332fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 333fa9e4066Sahrens 334fa9e4066Sahrens dnode_rele(dn, FTAG); 335fa9e4066Sahrens } 336fa9e4066Sahrens 337ea8dc4b6Seschrock int 338fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 339fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 340fa9e4066Sahrens { 341ea8dc4b6Seschrock dnode_t *dn; 342ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 343ea8dc4b6Seschrock if (err) 344ea8dc4b6Seschrock return (err); 345fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 346fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 347fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 348fa9e4066Sahrens dnode_rele(dn, FTAG); 349ea8dc4b6Seschrock return (0); 350fa9e4066Sahrens } 351fa9e4066Sahrens 352ea8dc4b6Seschrock int 353ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 354ea8dc4b6Seschrock void *buf) 355fa9e4066Sahrens { 356fa9e4066Sahrens dnode_t *dn; 357fa9e4066Sahrens dmu_buf_t **dbp; 358ea8dc4b6Seschrock int numbufs, i, err; 359fa9e4066Sahrens 360ea8dc4b6Seschrock /* 361ea8dc4b6Seschrock * Deal with odd block sizes, where there can't be data past the 362ea8dc4b6Seschrock * first block. 363ea8dc4b6Seschrock */ 364ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 365ea8dc4b6Seschrock if (err) 366ea8dc4b6Seschrock return (err); 367fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 368fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 369fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 370fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 371fa9e4066Sahrens size = newsz; 372fa9e4066Sahrens } 373fa9e4066Sahrens 374fa9e4066Sahrens while (size > 0) { 375fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 376fa9e4066Sahrens int err; 377fa9e4066Sahrens 378fa9e4066Sahrens /* 379fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 380fa9e4066Sahrens * to be reading in parallel. 381fa9e4066Sahrens */ 382a2eea2e1Sahrens err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, 383ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 384ea8dc4b6Seschrock if (err) 385fa9e4066Sahrens return (err); 386fa9e4066Sahrens 387fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 388fa9e4066Sahrens int tocpy; 389fa9e4066Sahrens int bufoff; 390fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 391fa9e4066Sahrens 392fa9e4066Sahrens ASSERT(size > 0); 393fa9e4066Sahrens 394fa9e4066Sahrens bufoff = offset - db->db_offset; 395fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 396fa9e4066Sahrens 397fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 398fa9e4066Sahrens 399fa9e4066Sahrens offset += tocpy; 400fa9e4066Sahrens size -= tocpy; 401fa9e4066Sahrens buf = (char *)buf + tocpy; 402fa9e4066Sahrens } 403ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 404fa9e4066Sahrens } 405a2eea2e1Sahrens dnode_rele(dn, FTAG); 406fa9e4066Sahrens return (0); 407fa9e4066Sahrens } 408fa9e4066Sahrens 409fa9e4066Sahrens void 410fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 411fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 412fa9e4066Sahrens { 413fa9e4066Sahrens dmu_buf_t **dbp; 414fa9e4066Sahrens int numbufs, i; 415fa9e4066Sahrens 41613506d1eSmaybee if (size == 0) 41713506d1eSmaybee return; 41813506d1eSmaybee 419ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 420ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 421fa9e4066Sahrens 422fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 423fa9e4066Sahrens int tocpy; 424fa9e4066Sahrens int bufoff; 425fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 426fa9e4066Sahrens 427fa9e4066Sahrens ASSERT(size > 0); 428fa9e4066Sahrens 429fa9e4066Sahrens bufoff = offset - db->db_offset; 430fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 431fa9e4066Sahrens 432fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 433fa9e4066Sahrens 434fa9e4066Sahrens if (tocpy == db->db_size) 435fa9e4066Sahrens dmu_buf_will_fill(db, tx); 436fa9e4066Sahrens else 437fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 438fa9e4066Sahrens 439fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 440fa9e4066Sahrens 441fa9e4066Sahrens if (tocpy == db->db_size) 442fa9e4066Sahrens dmu_buf_fill_done(db, tx); 443fa9e4066Sahrens 444fa9e4066Sahrens offset += tocpy; 445fa9e4066Sahrens size -= tocpy; 446fa9e4066Sahrens buf = (char *)buf + tocpy; 447fa9e4066Sahrens } 448ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 449fa9e4066Sahrens } 450fa9e4066Sahrens 451fa9e4066Sahrens #ifdef _KERNEL 452fa9e4066Sahrens int 453fa9e4066Sahrens dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 454fa9e4066Sahrens uio_t *uio, dmu_tx_t *tx) 455fa9e4066Sahrens { 456fa9e4066Sahrens dmu_buf_t **dbp; 457fa9e4066Sahrens int numbufs, i; 458fa9e4066Sahrens int err = 0; 459fa9e4066Sahrens 46013506d1eSmaybee if (size == 0) 46113506d1eSmaybee return (0); 46213506d1eSmaybee 463ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, size, 464ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 465ea8dc4b6Seschrock if (err) 466ea8dc4b6Seschrock return (err); 467fa9e4066Sahrens 468fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 469fa9e4066Sahrens int tocpy; 470fa9e4066Sahrens int bufoff; 471fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 472fa9e4066Sahrens 473fa9e4066Sahrens ASSERT(size > 0); 474fa9e4066Sahrens 475fa9e4066Sahrens bufoff = offset - db->db_offset; 476fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 477fa9e4066Sahrens 478fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 479fa9e4066Sahrens 480fa9e4066Sahrens if (tocpy == db->db_size) 481fa9e4066Sahrens dmu_buf_will_fill(db, tx); 482fa9e4066Sahrens else 483fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 484fa9e4066Sahrens 485fa9e4066Sahrens /* 486fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 487fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 488fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 489fa9e4066Sahrens * block. 490fa9e4066Sahrens */ 491fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 492fa9e4066Sahrens UIO_WRITE, uio); 493fa9e4066Sahrens 494fa9e4066Sahrens if (tocpy == db->db_size) 495fa9e4066Sahrens dmu_buf_fill_done(db, tx); 496fa9e4066Sahrens 497fa9e4066Sahrens if (err) 498fa9e4066Sahrens break; 499fa9e4066Sahrens 500fa9e4066Sahrens offset += tocpy; 501fa9e4066Sahrens size -= tocpy; 502fa9e4066Sahrens } 503ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 504fa9e4066Sahrens return (err); 505fa9e4066Sahrens } 50644eda4d7Smaybee 50744eda4d7Smaybee int 50844eda4d7Smaybee dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 50944eda4d7Smaybee page_t *pp, dmu_tx_t *tx) 51044eda4d7Smaybee { 51144eda4d7Smaybee dmu_buf_t **dbp; 51244eda4d7Smaybee int numbufs, i; 51344eda4d7Smaybee int err; 51444eda4d7Smaybee 51544eda4d7Smaybee if (size == 0) 51644eda4d7Smaybee return (0); 51744eda4d7Smaybee 51844eda4d7Smaybee err = dmu_buf_hold_array(os, object, offset, size, 51944eda4d7Smaybee FALSE, FTAG, &numbufs, &dbp); 52044eda4d7Smaybee if (err) 52144eda4d7Smaybee return (err); 52244eda4d7Smaybee 52344eda4d7Smaybee for (i = 0; i < numbufs; i++) { 52444eda4d7Smaybee int tocpy, copied, thiscpy; 52544eda4d7Smaybee int bufoff; 52644eda4d7Smaybee dmu_buf_t *db = dbp[i]; 52744eda4d7Smaybee caddr_t va; 52844eda4d7Smaybee 52944eda4d7Smaybee ASSERT(size > 0); 53044eda4d7Smaybee ASSERT3U(db->db_size, >=, PAGESIZE); 53144eda4d7Smaybee 53244eda4d7Smaybee bufoff = offset - db->db_offset; 53344eda4d7Smaybee tocpy = (int)MIN(db->db_size - bufoff, size); 53444eda4d7Smaybee 53544eda4d7Smaybee ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 53644eda4d7Smaybee 53744eda4d7Smaybee if (tocpy == db->db_size) 53844eda4d7Smaybee dmu_buf_will_fill(db, tx); 53944eda4d7Smaybee else 54044eda4d7Smaybee dmu_buf_will_dirty(db, tx); 54144eda4d7Smaybee 54244eda4d7Smaybee for (copied = 0; copied < tocpy; copied += PAGESIZE) { 54344eda4d7Smaybee ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); 54444eda4d7Smaybee thiscpy = MIN(PAGESIZE, tocpy - copied); 54544eda4d7Smaybee va = ppmapin(pp, PROT_READ, (caddr_t)-1); 54644eda4d7Smaybee bcopy(va, (char *)db->db_data + bufoff, thiscpy); 54744eda4d7Smaybee ppmapout(va); 54844eda4d7Smaybee pp = pp->p_next; 54944eda4d7Smaybee bufoff += PAGESIZE; 55044eda4d7Smaybee } 55144eda4d7Smaybee 55244eda4d7Smaybee if (tocpy == db->db_size) 55344eda4d7Smaybee dmu_buf_fill_done(db, tx); 55444eda4d7Smaybee 55544eda4d7Smaybee if (err) 55644eda4d7Smaybee break; 55744eda4d7Smaybee 55844eda4d7Smaybee offset += tocpy; 55944eda4d7Smaybee size -= tocpy; 56044eda4d7Smaybee } 56144eda4d7Smaybee dmu_buf_rele_array(dbp, numbufs, FTAG); 56244eda4d7Smaybee return (err); 56344eda4d7Smaybee } 564fa9e4066Sahrens #endif 565fa9e4066Sahrens 566c5c6ffa0Smaybee typedef struct { 567c5c6ffa0Smaybee uint64_t txg; 568c5c6ffa0Smaybee dmu_buf_impl_t *db; 569c5c6ffa0Smaybee dmu_sync_cb_t *done; 570c5c6ffa0Smaybee void *arg; 571c5c6ffa0Smaybee } dmu_sync_cbin_t; 572c5c6ffa0Smaybee 573c5c6ffa0Smaybee typedef union { 574c5c6ffa0Smaybee dmu_sync_cbin_t data; 575c5c6ffa0Smaybee blkptr_t blk; 576c5c6ffa0Smaybee } dmu_sync_cbarg_t; 577c5c6ffa0Smaybee 578c5c6ffa0Smaybee /* ARGSUSED */ 579c5c6ffa0Smaybee static void 580c5c6ffa0Smaybee dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) 581c5c6ffa0Smaybee { 582c5c6ffa0Smaybee dmu_sync_cbin_t *in = (dmu_sync_cbin_t *)varg; 583c5c6ffa0Smaybee dmu_buf_impl_t *db = in->db; 584c5c6ffa0Smaybee uint64_t txg = in->txg; 585c5c6ffa0Smaybee dmu_sync_cb_t *done = in->done; 586c5c6ffa0Smaybee void *arg = in->arg; 587c5c6ffa0Smaybee blkptr_t *blk = (blkptr_t *)varg; 588c5c6ffa0Smaybee 589c5c6ffa0Smaybee if (!BP_IS_HOLE(zio->io_bp)) { 590c5c6ffa0Smaybee zio->io_bp->blk_fill = 1; 591c5c6ffa0Smaybee BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type); 592c5c6ffa0Smaybee BP_SET_LEVEL(zio->io_bp, 0); 593c5c6ffa0Smaybee } 594c5c6ffa0Smaybee 595c5c6ffa0Smaybee *blk = *zio->io_bp; /* structure assignment */ 596c5c6ffa0Smaybee 597c5c6ffa0Smaybee mutex_enter(&db->db_mtx); 598c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == IN_DMU_SYNC); 599c5c6ffa0Smaybee db->db_d.db_overridden_by[txg&TXG_MASK] = blk; 600c5c6ffa0Smaybee cv_broadcast(&db->db_changed); 601c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 602c5c6ffa0Smaybee 603c5c6ffa0Smaybee if (done) 604c5c6ffa0Smaybee done(&(db->db), arg); 605c5c6ffa0Smaybee } 606c5c6ffa0Smaybee 607fa9e4066Sahrens /* 608c5c6ffa0Smaybee * Intent log support: sync the block associated with db to disk. 609c5c6ffa0Smaybee * N.B. and XXX: the caller is responsible for making sure that the 610c5c6ffa0Smaybee * data isn't changing while dmu_sync() is writing it. 611fa9e4066Sahrens * 612fa9e4066Sahrens * Return values: 613fa9e4066Sahrens * 614c5c6ffa0Smaybee * EEXIST: this txg has already been synced, so there's nothing to to. 615fa9e4066Sahrens * The caller should not log the write. 616fa9e4066Sahrens * 617fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 618fa9e4066Sahrens * The caller should not log the write. 619fa9e4066Sahrens * 620c5c6ffa0Smaybee * EALREADY: this block is already in the process of being synced. 621c5c6ffa0Smaybee * The caller should track its progress (somehow). 622fa9e4066Sahrens * 623c5c6ffa0Smaybee * EINPROGRESS: the IO has been initiated. 624c5c6ffa0Smaybee * The caller should log this blkptr in the callback. 625fa9e4066Sahrens * 626c5c6ffa0Smaybee * 0: completed. Sets *bp to the blkptr just written. 627c5c6ffa0Smaybee * The caller should log this blkptr immediately. 628fa9e4066Sahrens */ 629fa9e4066Sahrens int 630c5c6ffa0Smaybee dmu_sync(zio_t *pio, dmu_buf_t *db_fake, 631c5c6ffa0Smaybee blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) 632fa9e4066Sahrens { 633c5c6ffa0Smaybee dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 634c5c6ffa0Smaybee objset_impl_t *os = db->db_objset; 635c5c6ffa0Smaybee dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 636fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 637c5c6ffa0Smaybee dmu_sync_cbin_t *in; 638fa9e4066Sahrens blkptr_t *blk; 639ea8dc4b6Seschrock zbookmark_t zb; 640c5c6ffa0Smaybee uint32_t arc_flag; 641c5c6ffa0Smaybee int err; 642fa9e4066Sahrens 643fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 644fa9e4066Sahrens ASSERT(txg != 0); 645fa9e4066Sahrens 646c5c6ffa0Smaybee 647fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 648fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 649fa9e4066Sahrens 650fa9e4066Sahrens /* 651c5c6ffa0Smaybee * XXX - would be nice if we could do this without suspending... 652ea8dc4b6Seschrock */ 653c5c6ffa0Smaybee txg_suspend(dp); 654ea8dc4b6Seschrock 655ea8dc4b6Seschrock /* 656fa9e4066Sahrens * If this txg already synced, there's nothing to do. 657fa9e4066Sahrens */ 658fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 659c5c6ffa0Smaybee txg_resume(dp); 660fa9e4066Sahrens /* 661fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 662fa9e4066Sahrens */ 663fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 664fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 665fa9e4066Sahrens if (db->db_blkptr) 666fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 667fa9e4066Sahrens return (0); 668fa9e4066Sahrens } 669c5c6ffa0Smaybee return (EEXIST); 670fa9e4066Sahrens } 671fa9e4066Sahrens 672fa9e4066Sahrens mutex_enter(&db->db_mtx); 673fa9e4066Sahrens 674c5c6ffa0Smaybee blk = db->db_d.db_overridden_by[txg&TXG_MASK]; 675c5c6ffa0Smaybee if (blk == IN_DMU_SYNC) { 676fa9e4066Sahrens /* 677c5c6ffa0Smaybee * We have already issued a sync write for this buffer. 678c5c6ffa0Smaybee */ 679c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 680c5c6ffa0Smaybee txg_resume(dp); 681c5c6ffa0Smaybee return (EALREADY); 682c5c6ffa0Smaybee } else if (blk != NULL) { 683c5c6ffa0Smaybee /* 684c5c6ffa0Smaybee * This buffer had already been synced. It could not 685c5c6ffa0Smaybee * have been dirtied since, or we would have cleared blk. 686c5c6ffa0Smaybee */ 687c5c6ffa0Smaybee *bp = *blk; /* structure assignment */ 688c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 689c5c6ffa0Smaybee txg_resume(dp); 690c5c6ffa0Smaybee return (0); 691c5c6ffa0Smaybee } 692c5c6ffa0Smaybee 693c5c6ffa0Smaybee if (txg == tx->tx_syncing_txg) { 694c5c6ffa0Smaybee while (db->db_data_pending) { 695c5c6ffa0Smaybee /* 696c5c6ffa0Smaybee * IO is in-progress. Wait for it to finish. 697c5c6ffa0Smaybee * XXX - would be nice to be able to somehow "attach" 698c5c6ffa0Smaybee * this zio to the parent zio passed in. 699c5c6ffa0Smaybee */ 700c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 70113506d1eSmaybee if (!db->db_data_pending && 70213506d1eSmaybee db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { 70313506d1eSmaybee /* 70413506d1eSmaybee * IO was compressed away 70513506d1eSmaybee */ 70613506d1eSmaybee *bp = *db->db_blkptr; /* structure assignment */ 70713506d1eSmaybee mutex_exit(&db->db_mtx); 70813506d1eSmaybee txg_resume(dp); 70913506d1eSmaybee return (0); 71013506d1eSmaybee } 711c5c6ffa0Smaybee ASSERT(db->db_data_pending || 712c5c6ffa0Smaybee (db->db_blkptr && db->db_blkptr->blk_birth == txg)); 713c5c6ffa0Smaybee } 714c5c6ffa0Smaybee 715c5c6ffa0Smaybee if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { 716c5c6ffa0Smaybee /* 717c5c6ffa0Smaybee * IO is already completed. 718c5c6ffa0Smaybee */ 719c5c6ffa0Smaybee *bp = *db->db_blkptr; /* structure assignment */ 720c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 721c5c6ffa0Smaybee txg_resume(dp); 722c5c6ffa0Smaybee return (0); 723c5c6ffa0Smaybee } 724c5c6ffa0Smaybee } 725c5c6ffa0Smaybee 726c5c6ffa0Smaybee if (db->db_d.db_data_old[txg&TXG_MASK] == NULL) { 727c5c6ffa0Smaybee /* 728c5c6ffa0Smaybee * This dbuf isn't dirty, must have been free_range'd. 729fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 730fa9e4066Sahrens */ 731fa9e4066Sahrens mutex_exit(&db->db_mtx); 732c5c6ffa0Smaybee txg_resume(dp); 733fa9e4066Sahrens return (ENOENT); 734fa9e4066Sahrens } 735fa9e4066Sahrens 736c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == NULL); 737fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; 738c5c6ffa0Smaybee /* 739c5c6ffa0Smaybee * XXX - a little ugly to stash the blkptr in the callback 740c5c6ffa0Smaybee * buffer. We always need to make sure the following is true: 741c5c6ffa0Smaybee * ASSERT(sizeof(blkptr_t) >= sizeof(dmu_sync_cbin_t)); 742c5c6ffa0Smaybee */ 743c5c6ffa0Smaybee in = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); 744c5c6ffa0Smaybee in->db = db; 745c5c6ffa0Smaybee in->txg = txg; 746c5c6ffa0Smaybee in->done = done; 747c5c6ffa0Smaybee in->arg = arg; 748fa9e4066Sahrens mutex_exit(&db->db_mtx); 749c5c6ffa0Smaybee txg_resume(dp); 750fa9e4066Sahrens 751c5c6ffa0Smaybee arc_flag = pio == NULL ? ARC_WAIT : ARC_NOWAIT; 752c5c6ffa0Smaybee zb.zb_objset = os->os_dsl_dataset->ds_object; 753ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 754ea8dc4b6Seschrock zb.zb_level = db->db_level; 755ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 756c5c6ffa0Smaybee err = arc_write(pio, os->os_spa, 757c5c6ffa0Smaybee zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum), 758c5c6ffa0Smaybee zio_compress_select(db->db_dnode->dn_compress, os->os_compress), 759c5c6ffa0Smaybee dmu_get_replication_level(os->os_spa, &zb, db->db_dnode->dn_type), 760c5c6ffa0Smaybee txg, bp, db->db_d.db_data_old[txg&TXG_MASK], dmu_sync_done, in, 761c5c6ffa0Smaybee ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, arc_flag, &zb); 762fa9e4066Sahrens ASSERT(err == 0); 763fa9e4066Sahrens 764c5c6ffa0Smaybee return (arc_flag == ARC_NOWAIT ? EINPROGRESS : 0); 765fa9e4066Sahrens } 766fa9e4066Sahrens 767fa9e4066Sahrens int 768fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 769fa9e4066Sahrens dmu_tx_t *tx) 770fa9e4066Sahrens { 771ea8dc4b6Seschrock dnode_t *dn; 772ea8dc4b6Seschrock int err; 773ea8dc4b6Seschrock 774ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 775ea8dc4b6Seschrock if (err) 776ea8dc4b6Seschrock return (err); 777ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 778fa9e4066Sahrens dnode_rele(dn, FTAG); 779fa9e4066Sahrens return (err); 780fa9e4066Sahrens } 781fa9e4066Sahrens 782fa9e4066Sahrens void 783fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 784fa9e4066Sahrens dmu_tx_t *tx) 785fa9e4066Sahrens { 786ea8dc4b6Seschrock dnode_t *dn; 787ea8dc4b6Seschrock 788ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 789ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 790fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 791fa9e4066Sahrens dn->dn_checksum = checksum; 792fa9e4066Sahrens dnode_setdirty(dn, tx); 793fa9e4066Sahrens dnode_rele(dn, FTAG); 794fa9e4066Sahrens } 795fa9e4066Sahrens 796fa9e4066Sahrens void 797fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 798fa9e4066Sahrens dmu_tx_t *tx) 799fa9e4066Sahrens { 800ea8dc4b6Seschrock dnode_t *dn; 801ea8dc4b6Seschrock 802ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 803ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 804fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 805fa9e4066Sahrens dn->dn_compress = compress; 806fa9e4066Sahrens dnode_setdirty(dn, tx); 807fa9e4066Sahrens dnode_rele(dn, FTAG); 808fa9e4066Sahrens } 809fa9e4066Sahrens 81044cd46caSbillm /* 81144cd46caSbillm * XXX - eventually, this should take into account per-dataset (or 81244cd46caSbillm * even per-object?) user requests for higher levels of replication. 81344cd46caSbillm */ 81444cd46caSbillm int 81544cd46caSbillm dmu_get_replication_level(spa_t *spa, zbookmark_t *zb, dmu_object_type_t ot) 81644cd46caSbillm { 81744cd46caSbillm int ncopies = 1; 81844cd46caSbillm 81944cd46caSbillm if (dmu_ot[ot].ot_metadata) 82044cd46caSbillm ncopies++; 82144cd46caSbillm if (zb->zb_level != 0) 82244cd46caSbillm ncopies++; 82344cd46caSbillm if (zb->zb_objset == 0 && zb->zb_object == 0) 82444cd46caSbillm ncopies++; 82544cd46caSbillm return (MIN(ncopies, spa_max_replication(spa))); 82644cd46caSbillm } 82744cd46caSbillm 828fa9e4066Sahrens int 829fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 830fa9e4066Sahrens { 831fa9e4066Sahrens dnode_t *dn; 832fa9e4066Sahrens int i, err; 833fa9e4066Sahrens 834ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 835ea8dc4b6Seschrock if (err) 836ea8dc4b6Seschrock return (err); 837fa9e4066Sahrens /* 838fa9e4066Sahrens * Sync any current changes before 839fa9e4066Sahrens * we go trundling through the block pointers. 840fa9e4066Sahrens */ 841fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 842c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 843fa9e4066Sahrens break; 844fa9e4066Sahrens } 845fa9e4066Sahrens if (i != TXG_SIZE) { 846fa9e4066Sahrens dnode_rele(dn, FTAG); 847fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 848ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 849ea8dc4b6Seschrock if (err) 850ea8dc4b6Seschrock return (err); 851fa9e4066Sahrens } 852fa9e4066Sahrens 853fa9e4066Sahrens err = dnode_next_offset(dn, hole, off, 1, 1); 854fa9e4066Sahrens dnode_rele(dn, FTAG); 855fa9e4066Sahrens 856fa9e4066Sahrens return (err); 857fa9e4066Sahrens } 858fa9e4066Sahrens 859fa9e4066Sahrens void 860fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 861fa9e4066Sahrens { 862fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 863fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 864fa9e4066Sahrens 865fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 866fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 867fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 868fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 869fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 870fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 87199653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 87299653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 873fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 874fa9e4066Sahrens doi->doi_type = dn->dn_type; 875fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 876fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 877fa9e4066Sahrens 878fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 879fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 880fa9e4066Sahrens } 881fa9e4066Sahrens 882fa9e4066Sahrens /* 883fa9e4066Sahrens * Get information on a DMU object. 884fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 885fa9e4066Sahrens */ 886fa9e4066Sahrens int 887fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 888fa9e4066Sahrens { 889ea8dc4b6Seschrock dnode_t *dn; 890ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 891fa9e4066Sahrens 892ea8dc4b6Seschrock if (err) 893ea8dc4b6Seschrock return (err); 894fa9e4066Sahrens 895fa9e4066Sahrens if (doi != NULL) 896fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 897fa9e4066Sahrens 898fa9e4066Sahrens dnode_rele(dn, FTAG); 899fa9e4066Sahrens return (0); 900fa9e4066Sahrens } 901fa9e4066Sahrens 902fa9e4066Sahrens /* 903fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 904fa9e4066Sahrens */ 905fa9e4066Sahrens void 906fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 907fa9e4066Sahrens { 908fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 909fa9e4066Sahrens } 910fa9e4066Sahrens 911fa9e4066Sahrens /* 912fa9e4066Sahrens * Faster still when you only care about the size. 913fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 914fa9e4066Sahrens */ 915fa9e4066Sahrens void 916fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 917fa9e4066Sahrens { 918fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 919fa9e4066Sahrens 920fa9e4066Sahrens *blksize = dn->dn_datablksz; 92199653d4eSeschrock /* add 1 for dnode space */ 92299653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 92399653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 924fa9e4066Sahrens } 925fa9e4066Sahrens 926ea8dc4b6Seschrock /* 927ea8dc4b6Seschrock * Given a bookmark, return the name of the dataset, object, and range in 928ea8dc4b6Seschrock * human-readable format. 929ea8dc4b6Seschrock */ 930ea8dc4b6Seschrock int 931e9dbad6fSeschrock spa_bookmark_name(spa_t *spa, zbookmark_t *zb, nvlist_t *nvl) 932ea8dc4b6Seschrock { 933ea8dc4b6Seschrock dsl_pool_t *dp; 934ea8dc4b6Seschrock dsl_dataset_t *ds = NULL; 935ea8dc4b6Seschrock objset_t *os = NULL; 936ea8dc4b6Seschrock dnode_t *dn = NULL; 937ea8dc4b6Seschrock int err, shift; 938e9dbad6fSeschrock char dsname[MAXNAMELEN]; 939e9dbad6fSeschrock char objname[32]; 940e9dbad6fSeschrock char range[64]; 941ea8dc4b6Seschrock 942ea8dc4b6Seschrock dp = spa_get_dsl(spa); 943ea8dc4b6Seschrock if (zb->zb_objset != 0) { 944ea8dc4b6Seschrock rw_enter(&dp->dp_config_rwlock, RW_READER); 945ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, zb->zb_objset, 946ea8dc4b6Seschrock NULL, DS_MODE_NONE, FTAG, &ds); 947ea8dc4b6Seschrock if (err) { 948ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 949ea8dc4b6Seschrock return (err); 950ea8dc4b6Seschrock } 951ea8dc4b6Seschrock dsl_dataset_name(ds, dsname); 952ea8dc4b6Seschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 953ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 954ea8dc4b6Seschrock 955ea8dc4b6Seschrock err = dmu_objset_open(dsname, DMU_OST_ANY, DS_MODE_NONE, &os); 956ea8dc4b6Seschrock if (err) 957ea8dc4b6Seschrock goto out; 958ea8dc4b6Seschrock 959ea8dc4b6Seschrock } else { 960ea8dc4b6Seschrock dsl_dataset_name(NULL, dsname); 961ea8dc4b6Seschrock os = dp->dp_meta_objset; 962ea8dc4b6Seschrock } 963ea8dc4b6Seschrock 964ea8dc4b6Seschrock 965ea8dc4b6Seschrock if (zb->zb_object == DMU_META_DNODE_OBJECT) { 966e9dbad6fSeschrock (void) strncpy(objname, "mdn", sizeof (objname)); 967ea8dc4b6Seschrock } else { 968e9dbad6fSeschrock (void) snprintf(objname, sizeof (objname), "%lld", 969ea8dc4b6Seschrock (longlong_t)zb->zb_object); 970ea8dc4b6Seschrock } 971ea8dc4b6Seschrock 972ea8dc4b6Seschrock err = dnode_hold(os->os, zb->zb_object, FTAG, &dn); 973ea8dc4b6Seschrock if (err) 974ea8dc4b6Seschrock goto out; 975ea8dc4b6Seschrock 976ea8dc4b6Seschrock shift = (dn->dn_datablkshift?dn->dn_datablkshift:SPA_MAXBLOCKSHIFT) + 977ea8dc4b6Seschrock zb->zb_level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT); 978e9dbad6fSeschrock (void) snprintf(range, sizeof (range), "%llu-%llu", 979ea8dc4b6Seschrock (u_longlong_t)(zb->zb_blkid << shift), 980ea8dc4b6Seschrock (u_longlong_t)((zb->zb_blkid+1) << shift)); 981ea8dc4b6Seschrock 982e9dbad6fSeschrock if ((err = nvlist_add_string(nvl, ZPOOL_ERR_DATASET, dsname)) != 0 || 983e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_OBJECT, objname)) != 0 || 984e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_RANGE, range)) != 0) 985e9dbad6fSeschrock goto out; 986e9dbad6fSeschrock 987ea8dc4b6Seschrock out: 988ea8dc4b6Seschrock if (dn) 989ea8dc4b6Seschrock dnode_rele(dn, FTAG); 990ea8dc4b6Seschrock if (os && os != dp->dp_meta_objset) 991ea8dc4b6Seschrock dmu_objset_close(os); 992ea8dc4b6Seschrock return (err); 993ea8dc4b6Seschrock } 994ea8dc4b6Seschrock 995fa9e4066Sahrens void 996fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 997fa9e4066Sahrens { 998fa9e4066Sahrens uint64_t *buf = vbuf; 999fa9e4066Sahrens size_t count = size >> 3; 1000fa9e4066Sahrens int i; 1001fa9e4066Sahrens 1002fa9e4066Sahrens ASSERT((size & 7) == 0); 1003fa9e4066Sahrens 1004fa9e4066Sahrens for (i = 0; i < count; i++) 1005fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1006fa9e4066Sahrens } 1007fa9e4066Sahrens 1008fa9e4066Sahrens void 1009fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1010fa9e4066Sahrens { 1011fa9e4066Sahrens uint32_t *buf = vbuf; 1012fa9e4066Sahrens size_t count = size >> 2; 1013fa9e4066Sahrens int i; 1014fa9e4066Sahrens 1015fa9e4066Sahrens ASSERT((size & 3) == 0); 1016fa9e4066Sahrens 1017fa9e4066Sahrens for (i = 0; i < count; i++) 1018fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1019fa9e4066Sahrens } 1020fa9e4066Sahrens 1021fa9e4066Sahrens void 1022fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1023fa9e4066Sahrens { 1024fa9e4066Sahrens uint16_t *buf = vbuf; 1025fa9e4066Sahrens size_t count = size >> 1; 1026fa9e4066Sahrens int i; 1027fa9e4066Sahrens 1028fa9e4066Sahrens ASSERT((size & 1) == 0); 1029fa9e4066Sahrens 1030fa9e4066Sahrens for (i = 0; i < count; i++) 1031fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1032fa9e4066Sahrens } 1033fa9e4066Sahrens 1034fa9e4066Sahrens /* ARGSUSED */ 1035fa9e4066Sahrens void 1036fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1037fa9e4066Sahrens { 1038fa9e4066Sahrens } 1039fa9e4066Sahrens 1040fa9e4066Sahrens void 1041fa9e4066Sahrens dmu_init(void) 1042fa9e4066Sahrens { 1043fa9e4066Sahrens dbuf_init(); 1044fa9e4066Sahrens dnode_init(); 1045fa9e4066Sahrens arc_init(); 1046fa9e4066Sahrens } 1047fa9e4066Sahrens 1048fa9e4066Sahrens void 1049fa9e4066Sahrens dmu_fini(void) 1050fa9e4066Sahrens { 1051fa9e4066Sahrens arc_fini(); 1052fa9e4066Sahrens dnode_fini(); 1053fa9e4066Sahrens dbuf_fini(); 1054fa9e4066Sahrens } 1055