1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22ea8dc4b6Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens #include <sys/dmu.h> 29fa9e4066Sahrens #include <sys/dmu_impl.h> 30fa9e4066Sahrens #include <sys/dmu_tx.h> 31fa9e4066Sahrens #include <sys/dbuf.h> 32fa9e4066Sahrens #include <sys/dnode.h> 33fa9e4066Sahrens #include <sys/zfs_context.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dmu_traverse.h> 36fa9e4066Sahrens #include <sys/dsl_dataset.h> 37fa9e4066Sahrens #include <sys/dsl_dir.h> 38fa9e4066Sahrens #include <sys/dsl_pool.h> 391d452cf5Sahrens #include <sys/dsl_synctask.h> 40a2eea2e1Sahrens #include <sys/dsl_prop.h> 41fa9e4066Sahrens #include <sys/dmu_zfetch.h> 42fa9e4066Sahrens #include <sys/zfs_ioctl.h> 43fa9e4066Sahrens #include <sys/zap.h> 44ea8dc4b6Seschrock #include <sys/zio_checksum.h> 4544eda4d7Smaybee #ifdef _KERNEL 4644eda4d7Smaybee #include <sys/vmsystm.h> 4744eda4d7Smaybee #endif 48fa9e4066Sahrens 49fa9e4066Sahrens const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = { 50fa9e4066Sahrens { byteswap_uint8_array, TRUE, "unallocated" }, 51fa9e4066Sahrens { zap_byteswap, TRUE, "object directory" }, 52fa9e4066Sahrens { byteswap_uint64_array, TRUE, "object array" }, 53fa9e4066Sahrens { byteswap_uint8_array, TRUE, "packed nvlist" }, 54fa9e4066Sahrens { byteswap_uint64_array, TRUE, "packed nvlist size" }, 55fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist" }, 56fa9e4066Sahrens { byteswap_uint64_array, TRUE, "bplist header" }, 57fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map header" }, 58fa9e4066Sahrens { byteswap_uint64_array, TRUE, "SPA space map" }, 59fa9e4066Sahrens { byteswap_uint64_array, TRUE, "ZIL intent log" }, 60fa9e4066Sahrens { dnode_buf_byteswap, TRUE, "DMU dnode" }, 61fa9e4066Sahrens { dmu_objset_byteswap, TRUE, "DMU objset" }, 62fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL directory" }, 63fa9e4066Sahrens { zap_byteswap, TRUE, "DSL directory child map"}, 64fa9e4066Sahrens { zap_byteswap, TRUE, "DSL dataset snap map" }, 65fa9e4066Sahrens { zap_byteswap, TRUE, "DSL props" }, 66fa9e4066Sahrens { byteswap_uint64_array, TRUE, "DSL dataset" }, 67fa9e4066Sahrens { zfs_znode_byteswap, TRUE, "ZFS znode" }, 68fa9e4066Sahrens { zfs_acl_byteswap, TRUE, "ZFS ACL" }, 69fa9e4066Sahrens { byteswap_uint8_array, FALSE, "ZFS plain file" }, 70fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS directory" }, 71fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS master node" }, 72fa9e4066Sahrens { zap_byteswap, TRUE, "ZFS delete queue" }, 73fa9e4066Sahrens { byteswap_uint8_array, FALSE, "zvol object" }, 74fa9e4066Sahrens { zap_byteswap, TRUE, "zvol prop" }, 75fa9e4066Sahrens { byteswap_uint8_array, FALSE, "other uint8[]" }, 76fa9e4066Sahrens { byteswap_uint64_array, FALSE, "other uint64[]" }, 77fa9e4066Sahrens { zap_byteswap, TRUE, "other ZAP" }, 78ea8dc4b6Seschrock { zap_byteswap, TRUE, "persistent error log" }, 7906eeb2adSek110237 { byteswap_uint8_array, TRUE, "SPA history" }, 8006eeb2adSek110237 { byteswap_uint64_array, TRUE, "SPA history offsets" }, 81fa9e4066Sahrens }; 82fa9e4066Sahrens 83fa9e4066Sahrens int 84ea8dc4b6Seschrock dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, 85ea8dc4b6Seschrock void *tag, dmu_buf_t **dbp) 86fa9e4066Sahrens { 87fa9e4066Sahrens dnode_t *dn; 88fa9e4066Sahrens uint64_t blkid; 89fa9e4066Sahrens dmu_buf_impl_t *db; 90ea8dc4b6Seschrock int err; 91fa9e4066Sahrens 92ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 93ea8dc4b6Seschrock if (err) 94ea8dc4b6Seschrock return (err); 95fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 96fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 97ea8dc4b6Seschrock db = dbuf_hold(dn, blkid, tag); 98fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 99ea8dc4b6Seschrock if (db == NULL) { 100ea8dc4b6Seschrock err = EIO; 101ea8dc4b6Seschrock } else { 102ea8dc4b6Seschrock err = dbuf_read(db, NULL, DB_RF_CANFAIL); 103ea8dc4b6Seschrock if (err) { 104ea8dc4b6Seschrock dbuf_rele(db, tag); 105ea8dc4b6Seschrock db = NULL; 106ea8dc4b6Seschrock } 107fa9e4066Sahrens } 108fa9e4066Sahrens 109ea8dc4b6Seschrock dnode_rele(dn, FTAG); 110ea8dc4b6Seschrock *dbp = &db->db; 111ea8dc4b6Seschrock return (err); 112fa9e4066Sahrens } 113fa9e4066Sahrens 114fa9e4066Sahrens int 115fa9e4066Sahrens dmu_bonus_max(void) 116fa9e4066Sahrens { 117fa9e4066Sahrens return (DN_MAX_BONUSLEN); 118fa9e4066Sahrens } 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121ea8dc4b6Seschrock * returns ENOENT, EIO, or 0. 122fa9e4066Sahrens */ 123ea8dc4b6Seschrock int 124ea8dc4b6Seschrock dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **dbp) 125fa9e4066Sahrens { 126ea8dc4b6Seschrock dnode_t *dn; 127ea8dc4b6Seschrock int err, count; 128fa9e4066Sahrens dmu_buf_impl_t *db; 129fa9e4066Sahrens 130ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 131ea8dc4b6Seschrock if (err) 132ea8dc4b6Seschrock return (err); 133fa9e4066Sahrens 134fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 135ea8dc4b6Seschrock if (dn->dn_bonus == NULL) { 136fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 137ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 138ea8dc4b6Seschrock if (dn->dn_bonus == NULL) 139ea8dc4b6Seschrock dn->dn_bonus = dbuf_create_bonus(dn); 140fa9e4066Sahrens } 141ea8dc4b6Seschrock db = dn->dn_bonus; 142ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 143ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 144ea8dc4b6Seschrock count = refcount_add(&db->db_holds, tag); 145ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 146ea8dc4b6Seschrock if (count == 1) 147ea8dc4b6Seschrock dnode_add_ref(dn, db); 148fa9e4066Sahrens dnode_rele(dn, FTAG); 149ea8dc4b6Seschrock 150ea8dc4b6Seschrock VERIFY(0 == dbuf_read(db, NULL, DB_RF_MUST_SUCCEED)); 151ea8dc4b6Seschrock 152ea8dc4b6Seschrock *dbp = &db->db; 153ea8dc4b6Seschrock return (0); 154fa9e4066Sahrens } 155fa9e4066Sahrens 15613506d1eSmaybee /* 15713506d1eSmaybee * Note: longer-term, we should modify all of the dmu_buf_*() interfaces 15813506d1eSmaybee * to take a held dnode rather than <os, object> -- the lookup is wasteful, 15913506d1eSmaybee * and can induce severe lock contention when writing to several files 16013506d1eSmaybee * whose dnodes are in the same block. 16113506d1eSmaybee */ 16213506d1eSmaybee static int 16313506d1eSmaybee dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, 164ea8dc4b6Seschrock uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 165fa9e4066Sahrens { 166fa9e4066Sahrens dmu_buf_t **dbp; 167fa9e4066Sahrens uint64_t blkid, nblks, i; 168ea8dc4b6Seschrock uint32_t flags; 169ea8dc4b6Seschrock int err; 170ea8dc4b6Seschrock zio_t *zio; 171ea8dc4b6Seschrock 172ea8dc4b6Seschrock ASSERT(length <= DMU_MAX_ACCESS); 173fa9e4066Sahrens 174ea8dc4b6Seschrock flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT; 175e1930233Sbonwick if (length > zfetch_array_rd_sz) 176ea8dc4b6Seschrock flags |= DB_RF_NOPREFETCH; 177ea8dc4b6Seschrock 178fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 179fa9e4066Sahrens if (dn->dn_datablkshift) { 180fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 181fa9e4066Sahrens nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) - 182fa9e4066Sahrens P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift; 183fa9e4066Sahrens } else { 184fa9e4066Sahrens ASSERT3U(offset + length, <=, dn->dn_datablksz); 185fa9e4066Sahrens nblks = 1; 186fa9e4066Sahrens } 187ea8dc4b6Seschrock dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); 188fa9e4066Sahrens 189ea8dc4b6Seschrock zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, TRUE); 190fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 191fa9e4066Sahrens for (i = 0; i < nblks; i++) { 192ea8dc4b6Seschrock dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); 193ea8dc4b6Seschrock if (db == NULL) { 194ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 195ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 196ea8dc4b6Seschrock zio_nowait(zio); 197ea8dc4b6Seschrock return (EIO); 198ea8dc4b6Seschrock } 199ea8dc4b6Seschrock /* initiate async i/o */ 20013506d1eSmaybee if (read) { 201ea8dc4b6Seschrock rw_exit(&dn->dn_struct_rwlock); 202ea8dc4b6Seschrock (void) dbuf_read(db, zio, flags); 203ea8dc4b6Seschrock rw_enter(&dn->dn_struct_rwlock, RW_READER); 204ea8dc4b6Seschrock } 205ea8dc4b6Seschrock dbp[i] = &db->db; 206fa9e4066Sahrens } 207fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 208fa9e4066Sahrens 209ea8dc4b6Seschrock /* wait for async i/o */ 210ea8dc4b6Seschrock err = zio_wait(zio); 211ea8dc4b6Seschrock if (err) { 212ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 213ea8dc4b6Seschrock return (err); 214ea8dc4b6Seschrock } 215ea8dc4b6Seschrock 216ea8dc4b6Seschrock /* wait for other io to complete */ 217ea8dc4b6Seschrock if (read) { 218ea8dc4b6Seschrock for (i = 0; i < nblks; i++) { 219ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; 220ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 221ea8dc4b6Seschrock while (db->db_state == DB_READ || 222ea8dc4b6Seschrock db->db_state == DB_FILL) 223ea8dc4b6Seschrock cv_wait(&db->db_changed, &db->db_mtx); 224ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 225ea8dc4b6Seschrock err = EIO; 226ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 227ea8dc4b6Seschrock if (err) { 228ea8dc4b6Seschrock dmu_buf_rele_array(dbp, nblks, tag); 229ea8dc4b6Seschrock return (err); 230ea8dc4b6Seschrock } 231ea8dc4b6Seschrock } 232ea8dc4b6Seschrock } 233ea8dc4b6Seschrock 234ea8dc4b6Seschrock *numbufsp = nblks; 235ea8dc4b6Seschrock *dbpp = dbp; 236ea8dc4b6Seschrock return (0); 237fa9e4066Sahrens } 238fa9e4066Sahrens 239a2eea2e1Sahrens static int 24013506d1eSmaybee dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, 24113506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 24213506d1eSmaybee { 24313506d1eSmaybee dnode_t *dn; 24413506d1eSmaybee int err; 24513506d1eSmaybee 24613506d1eSmaybee err = dnode_hold(os->os, object, FTAG, &dn); 24713506d1eSmaybee if (err) 24813506d1eSmaybee return (err); 24913506d1eSmaybee 25013506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 25113506d1eSmaybee numbufsp, dbpp); 25213506d1eSmaybee 25313506d1eSmaybee dnode_rele(dn, FTAG); 25413506d1eSmaybee 25513506d1eSmaybee return (err); 25613506d1eSmaybee } 25713506d1eSmaybee 25813506d1eSmaybee int 25913506d1eSmaybee dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset, 26013506d1eSmaybee uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp) 26113506d1eSmaybee { 26213506d1eSmaybee dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 26313506d1eSmaybee int err; 26413506d1eSmaybee 26513506d1eSmaybee err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, 26613506d1eSmaybee numbufsp, dbpp); 26713506d1eSmaybee 26813506d1eSmaybee return (err); 26913506d1eSmaybee } 27013506d1eSmaybee 271fa9e4066Sahrens void 272ea8dc4b6Seschrock dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag) 273fa9e4066Sahrens { 274fa9e4066Sahrens int i; 275fa9e4066Sahrens dmu_buf_impl_t **dbp = (dmu_buf_impl_t **)dbp_fake; 276fa9e4066Sahrens 277fa9e4066Sahrens if (numbufs == 0) 278fa9e4066Sahrens return; 279fa9e4066Sahrens 280ea8dc4b6Seschrock for (i = 0; i < numbufs; i++) { 281ea8dc4b6Seschrock if (dbp[i]) 282ea8dc4b6Seschrock dbuf_rele(dbp[i], tag); 283ea8dc4b6Seschrock } 284fa9e4066Sahrens 285fa9e4066Sahrens kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); 286fa9e4066Sahrens } 287fa9e4066Sahrens 288fa9e4066Sahrens void 289fa9e4066Sahrens dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) 290fa9e4066Sahrens { 291fa9e4066Sahrens dnode_t *dn; 292fa9e4066Sahrens uint64_t blkid; 293ea8dc4b6Seschrock int nblks, i, err; 294fa9e4066Sahrens 295416e0cd8Sek110237 if (zfs_prefetch_disable) 296416e0cd8Sek110237 return; 297416e0cd8Sek110237 298fa9e4066Sahrens if (len == 0) { /* they're interested in the bonus buffer */ 299fa9e4066Sahrens dn = os->os->os_meta_dnode; 300fa9e4066Sahrens 301fa9e4066Sahrens if (object == 0 || object >= DN_MAX_OBJECT) 302fa9e4066Sahrens return; 303fa9e4066Sahrens 304fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 305fa9e4066Sahrens blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); 306fa9e4066Sahrens dbuf_prefetch(dn, blkid); 307fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 308fa9e4066Sahrens return; 309fa9e4066Sahrens } 310fa9e4066Sahrens 311fa9e4066Sahrens /* 312fa9e4066Sahrens * XXX - Note, if the dnode for the requested object is not 313fa9e4066Sahrens * already cached, we will do a *synchronous* read in the 314fa9e4066Sahrens * dnode_hold() call. The same is true for any indirects. 315fa9e4066Sahrens */ 316ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 317ea8dc4b6Seschrock if (err != 0) 318fa9e4066Sahrens return; 319fa9e4066Sahrens 320fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 321fa9e4066Sahrens if (dn->dn_datablkshift) { 322fa9e4066Sahrens int blkshift = dn->dn_datablkshift; 323fa9e4066Sahrens nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - 324fa9e4066Sahrens P2ALIGN(offset, 1<<blkshift)) >> blkshift; 325fa9e4066Sahrens } else { 326fa9e4066Sahrens nblks = (offset < dn->dn_datablksz); 327fa9e4066Sahrens } 328fa9e4066Sahrens 329fa9e4066Sahrens if (nblks != 0) { 330fa9e4066Sahrens blkid = dbuf_whichblock(dn, offset); 331fa9e4066Sahrens for (i = 0; i < nblks; i++) 332fa9e4066Sahrens dbuf_prefetch(dn, blkid+i); 333fa9e4066Sahrens } 334fa9e4066Sahrens 335fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 336fa9e4066Sahrens 337fa9e4066Sahrens dnode_rele(dn, FTAG); 338fa9e4066Sahrens } 339fa9e4066Sahrens 340ea8dc4b6Seschrock int 341fa9e4066Sahrens dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, 342fa9e4066Sahrens uint64_t size, dmu_tx_t *tx) 343fa9e4066Sahrens { 344ea8dc4b6Seschrock dnode_t *dn; 345ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 346ea8dc4b6Seschrock if (err) 347ea8dc4b6Seschrock return (err); 348fa9e4066Sahrens ASSERT(offset < UINT64_MAX); 349fa9e4066Sahrens ASSERT(size == -1ULL || size <= UINT64_MAX - offset); 350fa9e4066Sahrens dnode_free_range(dn, offset, size, tx); 351fa9e4066Sahrens dnode_rele(dn, FTAG); 352ea8dc4b6Seschrock return (0); 353fa9e4066Sahrens } 354fa9e4066Sahrens 355ea8dc4b6Seschrock int 356ea8dc4b6Seschrock dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 357ea8dc4b6Seschrock void *buf) 358fa9e4066Sahrens { 359fa9e4066Sahrens dnode_t *dn; 360fa9e4066Sahrens dmu_buf_t **dbp; 361ea8dc4b6Seschrock int numbufs, i, err; 362fa9e4066Sahrens 363ea8dc4b6Seschrock /* 364ea8dc4b6Seschrock * Deal with odd block sizes, where there can't be data past the 365ea8dc4b6Seschrock * first block. 366ea8dc4b6Seschrock */ 367ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 368ea8dc4b6Seschrock if (err) 369ea8dc4b6Seschrock return (err); 370fa9e4066Sahrens if (dn->dn_datablkshift == 0) { 371fa9e4066Sahrens int newsz = offset > dn->dn_datablksz ? 0 : 372fa9e4066Sahrens MIN(size, dn->dn_datablksz - offset); 373fa9e4066Sahrens bzero((char *)buf + newsz, size - newsz); 374fa9e4066Sahrens size = newsz; 375fa9e4066Sahrens } 376fa9e4066Sahrens 377fa9e4066Sahrens while (size > 0) { 378fa9e4066Sahrens uint64_t mylen = MIN(size, DMU_MAX_ACCESS / 2); 379fa9e4066Sahrens int err; 380fa9e4066Sahrens 381fa9e4066Sahrens /* 382fa9e4066Sahrens * NB: we could do this block-at-a-time, but it's nice 383fa9e4066Sahrens * to be reading in parallel. 384fa9e4066Sahrens */ 385a2eea2e1Sahrens err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, 386ea8dc4b6Seschrock TRUE, FTAG, &numbufs, &dbp); 387ea8dc4b6Seschrock if (err) 388fa9e4066Sahrens return (err); 389fa9e4066Sahrens 390fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 391fa9e4066Sahrens int tocpy; 392fa9e4066Sahrens int bufoff; 393fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 394fa9e4066Sahrens 395fa9e4066Sahrens ASSERT(size > 0); 396fa9e4066Sahrens 397fa9e4066Sahrens bufoff = offset - db->db_offset; 398fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 399fa9e4066Sahrens 400fa9e4066Sahrens bcopy((char *)db->db_data + bufoff, buf, tocpy); 401fa9e4066Sahrens 402fa9e4066Sahrens offset += tocpy; 403fa9e4066Sahrens size -= tocpy; 404fa9e4066Sahrens buf = (char *)buf + tocpy; 405fa9e4066Sahrens } 406ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 407fa9e4066Sahrens } 408a2eea2e1Sahrens dnode_rele(dn, FTAG); 409fa9e4066Sahrens return (0); 410fa9e4066Sahrens } 411fa9e4066Sahrens 412fa9e4066Sahrens void 413fa9e4066Sahrens dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 414fa9e4066Sahrens const void *buf, dmu_tx_t *tx) 415fa9e4066Sahrens { 416fa9e4066Sahrens dmu_buf_t **dbp; 417fa9e4066Sahrens int numbufs, i; 418fa9e4066Sahrens 41913506d1eSmaybee if (size == 0) 42013506d1eSmaybee return; 42113506d1eSmaybee 422ea8dc4b6Seschrock VERIFY(0 == dmu_buf_hold_array(os, object, offset, size, 423ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp)); 424fa9e4066Sahrens 425fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 426fa9e4066Sahrens int tocpy; 427fa9e4066Sahrens int bufoff; 428fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 429fa9e4066Sahrens 430fa9e4066Sahrens ASSERT(size > 0); 431fa9e4066Sahrens 432fa9e4066Sahrens bufoff = offset - db->db_offset; 433fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 434fa9e4066Sahrens 435fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 436fa9e4066Sahrens 437fa9e4066Sahrens if (tocpy == db->db_size) 438fa9e4066Sahrens dmu_buf_will_fill(db, tx); 439fa9e4066Sahrens else 440fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 441fa9e4066Sahrens 442fa9e4066Sahrens bcopy(buf, (char *)db->db_data + bufoff, tocpy); 443fa9e4066Sahrens 444fa9e4066Sahrens if (tocpy == db->db_size) 445fa9e4066Sahrens dmu_buf_fill_done(db, tx); 446fa9e4066Sahrens 447fa9e4066Sahrens offset += tocpy; 448fa9e4066Sahrens size -= tocpy; 449fa9e4066Sahrens buf = (char *)buf + tocpy; 450fa9e4066Sahrens } 451ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 452fa9e4066Sahrens } 453fa9e4066Sahrens 454fa9e4066Sahrens #ifdef _KERNEL 455fa9e4066Sahrens int 456fa9e4066Sahrens dmu_write_uio(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 457fa9e4066Sahrens uio_t *uio, dmu_tx_t *tx) 458fa9e4066Sahrens { 459fa9e4066Sahrens dmu_buf_t **dbp; 460fa9e4066Sahrens int numbufs, i; 461fa9e4066Sahrens int err = 0; 462fa9e4066Sahrens 46313506d1eSmaybee if (size == 0) 46413506d1eSmaybee return (0); 46513506d1eSmaybee 466ea8dc4b6Seschrock err = dmu_buf_hold_array(os, object, offset, size, 467ea8dc4b6Seschrock FALSE, FTAG, &numbufs, &dbp); 468ea8dc4b6Seschrock if (err) 469ea8dc4b6Seschrock return (err); 470fa9e4066Sahrens 471fa9e4066Sahrens for (i = 0; i < numbufs; i++) { 472fa9e4066Sahrens int tocpy; 473fa9e4066Sahrens int bufoff; 474fa9e4066Sahrens dmu_buf_t *db = dbp[i]; 475fa9e4066Sahrens 476fa9e4066Sahrens ASSERT(size > 0); 477fa9e4066Sahrens 478fa9e4066Sahrens bufoff = offset - db->db_offset; 479fa9e4066Sahrens tocpy = (int)MIN(db->db_size - bufoff, size); 480fa9e4066Sahrens 481fa9e4066Sahrens ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 482fa9e4066Sahrens 483fa9e4066Sahrens if (tocpy == db->db_size) 484fa9e4066Sahrens dmu_buf_will_fill(db, tx); 485fa9e4066Sahrens else 486fa9e4066Sahrens dmu_buf_will_dirty(db, tx); 487fa9e4066Sahrens 488fa9e4066Sahrens /* 489fa9e4066Sahrens * XXX uiomove could block forever (eg. nfs-backed 490fa9e4066Sahrens * pages). There needs to be a uiolockdown() function 491fa9e4066Sahrens * to lock the pages in memory, so that uiomove won't 492fa9e4066Sahrens * block. 493fa9e4066Sahrens */ 494fa9e4066Sahrens err = uiomove((char *)db->db_data + bufoff, tocpy, 495fa9e4066Sahrens UIO_WRITE, uio); 496fa9e4066Sahrens 497fa9e4066Sahrens if (tocpy == db->db_size) 498fa9e4066Sahrens dmu_buf_fill_done(db, tx); 499fa9e4066Sahrens 500fa9e4066Sahrens if (err) 501fa9e4066Sahrens break; 502fa9e4066Sahrens 503fa9e4066Sahrens offset += tocpy; 504fa9e4066Sahrens size -= tocpy; 505fa9e4066Sahrens } 506ea8dc4b6Seschrock dmu_buf_rele_array(dbp, numbufs, FTAG); 507fa9e4066Sahrens return (err); 508fa9e4066Sahrens } 50944eda4d7Smaybee 51044eda4d7Smaybee int 51144eda4d7Smaybee dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, 51244eda4d7Smaybee page_t *pp, dmu_tx_t *tx) 51344eda4d7Smaybee { 51444eda4d7Smaybee dmu_buf_t **dbp; 51544eda4d7Smaybee int numbufs, i; 51644eda4d7Smaybee int err; 51744eda4d7Smaybee 51844eda4d7Smaybee if (size == 0) 51944eda4d7Smaybee return (0); 52044eda4d7Smaybee 52144eda4d7Smaybee err = dmu_buf_hold_array(os, object, offset, size, 52244eda4d7Smaybee FALSE, FTAG, &numbufs, &dbp); 52344eda4d7Smaybee if (err) 52444eda4d7Smaybee return (err); 52544eda4d7Smaybee 52644eda4d7Smaybee for (i = 0; i < numbufs; i++) { 52744eda4d7Smaybee int tocpy, copied, thiscpy; 52844eda4d7Smaybee int bufoff; 52944eda4d7Smaybee dmu_buf_t *db = dbp[i]; 53044eda4d7Smaybee caddr_t va; 53144eda4d7Smaybee 53244eda4d7Smaybee ASSERT(size > 0); 53344eda4d7Smaybee ASSERT3U(db->db_size, >=, PAGESIZE); 53444eda4d7Smaybee 53544eda4d7Smaybee bufoff = offset - db->db_offset; 53644eda4d7Smaybee tocpy = (int)MIN(db->db_size - bufoff, size); 53744eda4d7Smaybee 53844eda4d7Smaybee ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); 53944eda4d7Smaybee 54044eda4d7Smaybee if (tocpy == db->db_size) 54144eda4d7Smaybee dmu_buf_will_fill(db, tx); 54244eda4d7Smaybee else 54344eda4d7Smaybee dmu_buf_will_dirty(db, tx); 54444eda4d7Smaybee 54544eda4d7Smaybee for (copied = 0; copied < tocpy; copied += PAGESIZE) { 54644eda4d7Smaybee ASSERT3U(pp->p_offset, ==, db->db_offset + bufoff); 54744eda4d7Smaybee thiscpy = MIN(PAGESIZE, tocpy - copied); 54844eda4d7Smaybee va = ppmapin(pp, PROT_READ, (caddr_t)-1); 54944eda4d7Smaybee bcopy(va, (char *)db->db_data + bufoff, thiscpy); 55044eda4d7Smaybee ppmapout(va); 55144eda4d7Smaybee pp = pp->p_next; 55244eda4d7Smaybee bufoff += PAGESIZE; 55344eda4d7Smaybee } 55444eda4d7Smaybee 55544eda4d7Smaybee if (tocpy == db->db_size) 55644eda4d7Smaybee dmu_buf_fill_done(db, tx); 55744eda4d7Smaybee 55844eda4d7Smaybee if (err) 55944eda4d7Smaybee break; 56044eda4d7Smaybee 56144eda4d7Smaybee offset += tocpy; 56244eda4d7Smaybee size -= tocpy; 56344eda4d7Smaybee } 56444eda4d7Smaybee dmu_buf_rele_array(dbp, numbufs, FTAG); 56544eda4d7Smaybee return (err); 56644eda4d7Smaybee } 567fa9e4066Sahrens #endif 568fa9e4066Sahrens 569c5c6ffa0Smaybee typedef struct { 570c5c6ffa0Smaybee uint64_t txg; 571c5c6ffa0Smaybee dmu_buf_impl_t *db; 572c5c6ffa0Smaybee dmu_sync_cb_t *done; 573c5c6ffa0Smaybee void *arg; 574c5c6ffa0Smaybee } dmu_sync_cbin_t; 575c5c6ffa0Smaybee 576c5c6ffa0Smaybee typedef union { 577c5c6ffa0Smaybee dmu_sync_cbin_t data; 578c5c6ffa0Smaybee blkptr_t blk; 579c5c6ffa0Smaybee } dmu_sync_cbarg_t; 580c5c6ffa0Smaybee 581c5c6ffa0Smaybee /* ARGSUSED */ 582c5c6ffa0Smaybee static void 583c5c6ffa0Smaybee dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg) 584c5c6ffa0Smaybee { 585c5c6ffa0Smaybee dmu_sync_cbin_t *in = (dmu_sync_cbin_t *)varg; 586c5c6ffa0Smaybee dmu_buf_impl_t *db = in->db; 587c5c6ffa0Smaybee uint64_t txg = in->txg; 588c5c6ffa0Smaybee dmu_sync_cb_t *done = in->done; 589c5c6ffa0Smaybee void *arg = in->arg; 590c5c6ffa0Smaybee blkptr_t *blk = (blkptr_t *)varg; 591c5c6ffa0Smaybee 592c5c6ffa0Smaybee if (!BP_IS_HOLE(zio->io_bp)) { 593c5c6ffa0Smaybee zio->io_bp->blk_fill = 1; 594c5c6ffa0Smaybee BP_SET_TYPE(zio->io_bp, db->db_dnode->dn_type); 595c5c6ffa0Smaybee BP_SET_LEVEL(zio->io_bp, 0); 596c5c6ffa0Smaybee } 597c5c6ffa0Smaybee 598c5c6ffa0Smaybee *blk = *zio->io_bp; /* structure assignment */ 599c5c6ffa0Smaybee 600c5c6ffa0Smaybee mutex_enter(&db->db_mtx); 601c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == IN_DMU_SYNC); 602c5c6ffa0Smaybee db->db_d.db_overridden_by[txg&TXG_MASK] = blk; 603c5c6ffa0Smaybee cv_broadcast(&db->db_changed); 604c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 605c5c6ffa0Smaybee 606c5c6ffa0Smaybee if (done) 607c5c6ffa0Smaybee done(&(db->db), arg); 608c5c6ffa0Smaybee } 609c5c6ffa0Smaybee 610fa9e4066Sahrens /* 611c5c6ffa0Smaybee * Intent log support: sync the block associated with db to disk. 612c5c6ffa0Smaybee * N.B. and XXX: the caller is responsible for making sure that the 613c5c6ffa0Smaybee * data isn't changing while dmu_sync() is writing it. 614fa9e4066Sahrens * 615fa9e4066Sahrens * Return values: 616fa9e4066Sahrens * 617c5c6ffa0Smaybee * EEXIST: this txg has already been synced, so there's nothing to to. 618fa9e4066Sahrens * The caller should not log the write. 619fa9e4066Sahrens * 620fa9e4066Sahrens * ENOENT: the block was dbuf_free_range()'d, so there's nothing to do. 621fa9e4066Sahrens * The caller should not log the write. 622fa9e4066Sahrens * 623c5c6ffa0Smaybee * EALREADY: this block is already in the process of being synced. 624c5c6ffa0Smaybee * The caller should track its progress (somehow). 625fa9e4066Sahrens * 626c5c6ffa0Smaybee * EINPROGRESS: the IO has been initiated. 627c5c6ffa0Smaybee * The caller should log this blkptr in the callback. 628fa9e4066Sahrens * 629c5c6ffa0Smaybee * 0: completed. Sets *bp to the blkptr just written. 630c5c6ffa0Smaybee * The caller should log this blkptr immediately. 631fa9e4066Sahrens */ 632fa9e4066Sahrens int 633c5c6ffa0Smaybee dmu_sync(zio_t *pio, dmu_buf_t *db_fake, 634c5c6ffa0Smaybee blkptr_t *bp, uint64_t txg, dmu_sync_cb_t *done, void *arg) 635fa9e4066Sahrens { 636c5c6ffa0Smaybee dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 637c5c6ffa0Smaybee objset_impl_t *os = db->db_objset; 638c5c6ffa0Smaybee dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 639fa9e4066Sahrens tx_state_t *tx = &dp->dp_tx; 640c5c6ffa0Smaybee dmu_sync_cbin_t *in; 641fa9e4066Sahrens blkptr_t *blk; 642ea8dc4b6Seschrock zbookmark_t zb; 643c5c6ffa0Smaybee uint32_t arc_flag; 644c5c6ffa0Smaybee int err; 645fa9e4066Sahrens 646fa9e4066Sahrens ASSERT(BP_IS_HOLE(bp)); 647fa9e4066Sahrens ASSERT(txg != 0); 648fa9e4066Sahrens 649c5c6ffa0Smaybee 650fa9e4066Sahrens dprintf("dmu_sync txg=%llu, s,o,q %llu %llu %llu\n", 651fa9e4066Sahrens txg, tx->tx_synced_txg, tx->tx_open_txg, tx->tx_quiesced_txg); 652fa9e4066Sahrens 653fa9e4066Sahrens /* 654c5c6ffa0Smaybee * XXX - would be nice if we could do this without suspending... 655ea8dc4b6Seschrock */ 656c5c6ffa0Smaybee txg_suspend(dp); 657ea8dc4b6Seschrock 658ea8dc4b6Seschrock /* 659fa9e4066Sahrens * If this txg already synced, there's nothing to do. 660fa9e4066Sahrens */ 661fa9e4066Sahrens if (txg <= tx->tx_synced_txg) { 662c5c6ffa0Smaybee txg_resume(dp); 663fa9e4066Sahrens /* 664fa9e4066Sahrens * If we're running ziltest, we need the blkptr regardless. 665fa9e4066Sahrens */ 666fa9e4066Sahrens if (txg > spa_freeze_txg(dp->dp_spa)) { 667fa9e4066Sahrens /* if db_blkptr == NULL, this was an empty write */ 668fa9e4066Sahrens if (db->db_blkptr) 669fa9e4066Sahrens *bp = *db->db_blkptr; /* structure assignment */ 670fa9e4066Sahrens return (0); 671fa9e4066Sahrens } 672c5c6ffa0Smaybee return (EEXIST); 673fa9e4066Sahrens } 674fa9e4066Sahrens 675fa9e4066Sahrens mutex_enter(&db->db_mtx); 676fa9e4066Sahrens 677c5c6ffa0Smaybee blk = db->db_d.db_overridden_by[txg&TXG_MASK]; 678c5c6ffa0Smaybee if (blk == IN_DMU_SYNC) { 679fa9e4066Sahrens /* 680c5c6ffa0Smaybee * We have already issued a sync write for this buffer. 681c5c6ffa0Smaybee */ 682c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 683c5c6ffa0Smaybee txg_resume(dp); 684c5c6ffa0Smaybee return (EALREADY); 685c5c6ffa0Smaybee } else if (blk != NULL) { 686c5c6ffa0Smaybee /* 687c5c6ffa0Smaybee * This buffer had already been synced. It could not 688c5c6ffa0Smaybee * have been dirtied since, or we would have cleared blk. 689c5c6ffa0Smaybee */ 690c5c6ffa0Smaybee *bp = *blk; /* structure assignment */ 691c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 692c5c6ffa0Smaybee txg_resume(dp); 693c5c6ffa0Smaybee return (0); 694c5c6ffa0Smaybee } 695c5c6ffa0Smaybee 696c5c6ffa0Smaybee if (txg == tx->tx_syncing_txg) { 697c5c6ffa0Smaybee while (db->db_data_pending) { 698c5c6ffa0Smaybee /* 699c5c6ffa0Smaybee * IO is in-progress. Wait for it to finish. 700c5c6ffa0Smaybee * XXX - would be nice to be able to somehow "attach" 701c5c6ffa0Smaybee * this zio to the parent zio passed in. 702c5c6ffa0Smaybee */ 703c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 70413506d1eSmaybee if (!db->db_data_pending && 70513506d1eSmaybee db->db_blkptr && BP_IS_HOLE(db->db_blkptr)) { 70613506d1eSmaybee /* 70713506d1eSmaybee * IO was compressed away 70813506d1eSmaybee */ 70913506d1eSmaybee *bp = *db->db_blkptr; /* structure assignment */ 71013506d1eSmaybee mutex_exit(&db->db_mtx); 71113506d1eSmaybee txg_resume(dp); 71213506d1eSmaybee return (0); 71313506d1eSmaybee } 714c5c6ffa0Smaybee ASSERT(db->db_data_pending || 715c5c6ffa0Smaybee (db->db_blkptr && db->db_blkptr->blk_birth == txg)); 716c5c6ffa0Smaybee } 717c5c6ffa0Smaybee 718c5c6ffa0Smaybee if (db->db_blkptr && db->db_blkptr->blk_birth == txg) { 719c5c6ffa0Smaybee /* 720c5c6ffa0Smaybee * IO is already completed. 721c5c6ffa0Smaybee */ 722c5c6ffa0Smaybee *bp = *db->db_blkptr; /* structure assignment */ 723c5c6ffa0Smaybee mutex_exit(&db->db_mtx); 724c5c6ffa0Smaybee txg_resume(dp); 725c5c6ffa0Smaybee return (0); 726c5c6ffa0Smaybee } 727c5c6ffa0Smaybee } 728c5c6ffa0Smaybee 729c5c6ffa0Smaybee if (db->db_d.db_data_old[txg&TXG_MASK] == NULL) { 730c5c6ffa0Smaybee /* 731c5c6ffa0Smaybee * This dbuf isn't dirty, must have been free_range'd. 732fa9e4066Sahrens * There's no need to log writes to freed blocks, so we're done. 733fa9e4066Sahrens */ 734fa9e4066Sahrens mutex_exit(&db->db_mtx); 735c5c6ffa0Smaybee txg_resume(dp); 736fa9e4066Sahrens return (ENOENT); 737fa9e4066Sahrens } 738fa9e4066Sahrens 739c5c6ffa0Smaybee ASSERT(db->db_d.db_overridden_by[txg&TXG_MASK] == NULL); 740fa9e4066Sahrens db->db_d.db_overridden_by[txg&TXG_MASK] = IN_DMU_SYNC; 741c5c6ffa0Smaybee /* 742c5c6ffa0Smaybee * XXX - a little ugly to stash the blkptr in the callback 743c5c6ffa0Smaybee * buffer. We always need to make sure the following is true: 744c5c6ffa0Smaybee * ASSERT(sizeof(blkptr_t) >= sizeof(dmu_sync_cbin_t)); 745c5c6ffa0Smaybee */ 746c5c6ffa0Smaybee in = kmem_alloc(sizeof (blkptr_t), KM_SLEEP); 747c5c6ffa0Smaybee in->db = db; 748c5c6ffa0Smaybee in->txg = txg; 749c5c6ffa0Smaybee in->done = done; 750c5c6ffa0Smaybee in->arg = arg; 751fa9e4066Sahrens mutex_exit(&db->db_mtx); 752c5c6ffa0Smaybee txg_resume(dp); 753fa9e4066Sahrens 754c5c6ffa0Smaybee arc_flag = pio == NULL ? ARC_WAIT : ARC_NOWAIT; 755c5c6ffa0Smaybee zb.zb_objset = os->os_dsl_dataset->ds_object; 756ea8dc4b6Seschrock zb.zb_object = db->db.db_object; 757ea8dc4b6Seschrock zb.zb_level = db->db_level; 758ea8dc4b6Seschrock zb.zb_blkid = db->db_blkid; 759c5c6ffa0Smaybee err = arc_write(pio, os->os_spa, 760c5c6ffa0Smaybee zio_checksum_select(db->db_dnode->dn_checksum, os->os_checksum), 761c5c6ffa0Smaybee zio_compress_select(db->db_dnode->dn_compress, os->os_compress), 762c5c6ffa0Smaybee dmu_get_replication_level(os->os_spa, &zb, db->db_dnode->dn_type), 763c5c6ffa0Smaybee txg, bp, db->db_d.db_data_old[txg&TXG_MASK], dmu_sync_done, in, 764c5c6ffa0Smaybee ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, arc_flag, &zb); 765fa9e4066Sahrens ASSERT(err == 0); 766fa9e4066Sahrens 767c5c6ffa0Smaybee return (arc_flag == ARC_NOWAIT ? EINPROGRESS : 0); 768fa9e4066Sahrens } 769fa9e4066Sahrens 770fa9e4066Sahrens int 771fa9e4066Sahrens dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs, 772fa9e4066Sahrens dmu_tx_t *tx) 773fa9e4066Sahrens { 774ea8dc4b6Seschrock dnode_t *dn; 775ea8dc4b6Seschrock int err; 776ea8dc4b6Seschrock 777ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 778ea8dc4b6Seschrock if (err) 779ea8dc4b6Seschrock return (err); 780ea8dc4b6Seschrock err = dnode_set_blksz(dn, size, ibs, tx); 781fa9e4066Sahrens dnode_rele(dn, FTAG); 782fa9e4066Sahrens return (err); 783fa9e4066Sahrens } 784fa9e4066Sahrens 785fa9e4066Sahrens void 786fa9e4066Sahrens dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum, 787fa9e4066Sahrens dmu_tx_t *tx) 788fa9e4066Sahrens { 789ea8dc4b6Seschrock dnode_t *dn; 790ea8dc4b6Seschrock 791ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 792ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 793fa9e4066Sahrens ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS); 794fa9e4066Sahrens dn->dn_checksum = checksum; 795fa9e4066Sahrens dnode_setdirty(dn, tx); 796fa9e4066Sahrens dnode_rele(dn, FTAG); 797fa9e4066Sahrens } 798fa9e4066Sahrens 799fa9e4066Sahrens void 800fa9e4066Sahrens dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress, 801fa9e4066Sahrens dmu_tx_t *tx) 802fa9e4066Sahrens { 803ea8dc4b6Seschrock dnode_t *dn; 804ea8dc4b6Seschrock 805ea8dc4b6Seschrock /* XXX assumes dnode_hold will not get an i/o error */ 806ea8dc4b6Seschrock (void) dnode_hold(os->os, object, FTAG, &dn); 807fa9e4066Sahrens ASSERT(compress < ZIO_COMPRESS_FUNCTIONS); 808fa9e4066Sahrens dn->dn_compress = compress; 809fa9e4066Sahrens dnode_setdirty(dn, tx); 810fa9e4066Sahrens dnode_rele(dn, FTAG); 811fa9e4066Sahrens } 812fa9e4066Sahrens 81344cd46caSbillm /* 81444cd46caSbillm * XXX - eventually, this should take into account per-dataset (or 81544cd46caSbillm * even per-object?) user requests for higher levels of replication. 81644cd46caSbillm */ 81744cd46caSbillm int 81844cd46caSbillm dmu_get_replication_level(spa_t *spa, zbookmark_t *zb, dmu_object_type_t ot) 81944cd46caSbillm { 82044cd46caSbillm int ncopies = 1; 82144cd46caSbillm 82244cd46caSbillm if (dmu_ot[ot].ot_metadata) 82344cd46caSbillm ncopies++; 82444cd46caSbillm if (zb->zb_level != 0) 82544cd46caSbillm ncopies++; 82644cd46caSbillm if (zb->zb_objset == 0 && zb->zb_object == 0) 82744cd46caSbillm ncopies++; 82844cd46caSbillm return (MIN(ncopies, spa_max_replication(spa))); 82944cd46caSbillm } 83044cd46caSbillm 831fa9e4066Sahrens int 832fa9e4066Sahrens dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off) 833fa9e4066Sahrens { 834fa9e4066Sahrens dnode_t *dn; 835fa9e4066Sahrens int i, err; 836fa9e4066Sahrens 837ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 838ea8dc4b6Seschrock if (err) 839ea8dc4b6Seschrock return (err); 840fa9e4066Sahrens /* 841fa9e4066Sahrens * Sync any current changes before 842fa9e4066Sahrens * we go trundling through the block pointers. 843fa9e4066Sahrens */ 844fa9e4066Sahrens for (i = 0; i < TXG_SIZE; i++) { 845c543ec06Sahrens if (list_link_active(&dn->dn_dirty_link[i])) 846fa9e4066Sahrens break; 847fa9e4066Sahrens } 848fa9e4066Sahrens if (i != TXG_SIZE) { 849fa9e4066Sahrens dnode_rele(dn, FTAG); 850fa9e4066Sahrens txg_wait_synced(dmu_objset_pool(os), 0); 851ea8dc4b6Seschrock err = dnode_hold(os->os, object, FTAG, &dn); 852ea8dc4b6Seschrock if (err) 853ea8dc4b6Seschrock return (err); 854fa9e4066Sahrens } 855fa9e4066Sahrens 856*6754306eSahrens err = dnode_next_offset(dn, hole, off, 1, 1, 0); 857fa9e4066Sahrens dnode_rele(dn, FTAG); 858fa9e4066Sahrens 859fa9e4066Sahrens return (err); 860fa9e4066Sahrens } 861fa9e4066Sahrens 862fa9e4066Sahrens void 863fa9e4066Sahrens dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi) 864fa9e4066Sahrens { 865fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 866fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 867fa9e4066Sahrens 868fa9e4066Sahrens doi->doi_data_block_size = dn->dn_datablksz; 869fa9e4066Sahrens doi->doi_metadata_block_size = dn->dn_indblkshift ? 870fa9e4066Sahrens 1ULL << dn->dn_indblkshift : 0; 871fa9e4066Sahrens doi->doi_indirection = dn->dn_nlevels; 872fa9e4066Sahrens doi->doi_checksum = dn->dn_checksum; 873fa9e4066Sahrens doi->doi_compress = dn->dn_compress; 87499653d4eSeschrock doi->doi_physical_blks = (DN_USED_BYTES(dn->dn_phys) + 87599653d4eSeschrock SPA_MINBLOCKSIZE/2) >> SPA_MINBLOCKSHIFT; 876fa9e4066Sahrens doi->doi_max_block_offset = dn->dn_phys->dn_maxblkid; 877fa9e4066Sahrens doi->doi_type = dn->dn_type; 878fa9e4066Sahrens doi->doi_bonus_size = dn->dn_bonuslen; 879fa9e4066Sahrens doi->doi_bonus_type = dn->dn_bonustype; 880fa9e4066Sahrens 881fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 882fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 883fa9e4066Sahrens } 884fa9e4066Sahrens 885fa9e4066Sahrens /* 886fa9e4066Sahrens * Get information on a DMU object. 887fa9e4066Sahrens * If doi is NULL, just indicates whether the object exists. 888fa9e4066Sahrens */ 889fa9e4066Sahrens int 890fa9e4066Sahrens dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi) 891fa9e4066Sahrens { 892ea8dc4b6Seschrock dnode_t *dn; 893ea8dc4b6Seschrock int err = dnode_hold(os->os, object, FTAG, &dn); 894fa9e4066Sahrens 895ea8dc4b6Seschrock if (err) 896ea8dc4b6Seschrock return (err); 897fa9e4066Sahrens 898fa9e4066Sahrens if (doi != NULL) 899fa9e4066Sahrens dmu_object_info_from_dnode(dn, doi); 900fa9e4066Sahrens 901fa9e4066Sahrens dnode_rele(dn, FTAG); 902fa9e4066Sahrens return (0); 903fa9e4066Sahrens } 904fa9e4066Sahrens 905fa9e4066Sahrens /* 906fa9e4066Sahrens * As above, but faster; can be used when you have a held dbuf in hand. 907fa9e4066Sahrens */ 908fa9e4066Sahrens void 909fa9e4066Sahrens dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi) 910fa9e4066Sahrens { 911fa9e4066Sahrens dmu_object_info_from_dnode(((dmu_buf_impl_t *)db)->db_dnode, doi); 912fa9e4066Sahrens } 913fa9e4066Sahrens 914fa9e4066Sahrens /* 915fa9e4066Sahrens * Faster still when you only care about the size. 916fa9e4066Sahrens * This is specifically optimized for zfs_getattr(). 917fa9e4066Sahrens */ 918fa9e4066Sahrens void 919fa9e4066Sahrens dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize, u_longlong_t *nblk512) 920fa9e4066Sahrens { 921fa9e4066Sahrens dnode_t *dn = ((dmu_buf_impl_t *)db)->db_dnode; 922fa9e4066Sahrens 923fa9e4066Sahrens *blksize = dn->dn_datablksz; 92499653d4eSeschrock /* add 1 for dnode space */ 92599653d4eSeschrock *nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >> 92699653d4eSeschrock SPA_MINBLOCKSHIFT) + 1; 927fa9e4066Sahrens } 928fa9e4066Sahrens 929ea8dc4b6Seschrock /* 930ea8dc4b6Seschrock * Given a bookmark, return the name of the dataset, object, and range in 931ea8dc4b6Seschrock * human-readable format. 932ea8dc4b6Seschrock */ 933ea8dc4b6Seschrock int 934e9dbad6fSeschrock spa_bookmark_name(spa_t *spa, zbookmark_t *zb, nvlist_t *nvl) 935ea8dc4b6Seschrock { 936ea8dc4b6Seschrock dsl_pool_t *dp; 937ea8dc4b6Seschrock dsl_dataset_t *ds = NULL; 938ea8dc4b6Seschrock objset_t *os = NULL; 939ea8dc4b6Seschrock dnode_t *dn = NULL; 940ea8dc4b6Seschrock int err, shift; 941e9dbad6fSeschrock char dsname[MAXNAMELEN]; 942e9dbad6fSeschrock char objname[32]; 943e9dbad6fSeschrock char range[64]; 944ea8dc4b6Seschrock 945ea8dc4b6Seschrock dp = spa_get_dsl(spa); 946ea8dc4b6Seschrock if (zb->zb_objset != 0) { 947ea8dc4b6Seschrock rw_enter(&dp->dp_config_rwlock, RW_READER); 948ea8dc4b6Seschrock err = dsl_dataset_open_obj(dp, zb->zb_objset, 949ea8dc4b6Seschrock NULL, DS_MODE_NONE, FTAG, &ds); 950ea8dc4b6Seschrock if (err) { 951ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 952ea8dc4b6Seschrock return (err); 953ea8dc4b6Seschrock } 954ea8dc4b6Seschrock dsl_dataset_name(ds, dsname); 955ea8dc4b6Seschrock dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 956ea8dc4b6Seschrock rw_exit(&dp->dp_config_rwlock); 957ea8dc4b6Seschrock 958ea8dc4b6Seschrock err = dmu_objset_open(dsname, DMU_OST_ANY, DS_MODE_NONE, &os); 959ea8dc4b6Seschrock if (err) 960ea8dc4b6Seschrock goto out; 961ea8dc4b6Seschrock 962ea8dc4b6Seschrock } else { 963ea8dc4b6Seschrock dsl_dataset_name(NULL, dsname); 964ea8dc4b6Seschrock os = dp->dp_meta_objset; 965ea8dc4b6Seschrock } 966ea8dc4b6Seschrock 967ea8dc4b6Seschrock 968ea8dc4b6Seschrock if (zb->zb_object == DMU_META_DNODE_OBJECT) { 969e9dbad6fSeschrock (void) strncpy(objname, "mdn", sizeof (objname)); 970ea8dc4b6Seschrock } else { 971e9dbad6fSeschrock (void) snprintf(objname, sizeof (objname), "%lld", 972ea8dc4b6Seschrock (longlong_t)zb->zb_object); 973ea8dc4b6Seschrock } 974ea8dc4b6Seschrock 975ea8dc4b6Seschrock err = dnode_hold(os->os, zb->zb_object, FTAG, &dn); 976ea8dc4b6Seschrock if (err) 977ea8dc4b6Seschrock goto out; 978ea8dc4b6Seschrock 979ea8dc4b6Seschrock shift = (dn->dn_datablkshift?dn->dn_datablkshift:SPA_MAXBLOCKSHIFT) + 980ea8dc4b6Seschrock zb->zb_level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT); 981e9dbad6fSeschrock (void) snprintf(range, sizeof (range), "%llu-%llu", 982ea8dc4b6Seschrock (u_longlong_t)(zb->zb_blkid << shift), 983ea8dc4b6Seschrock (u_longlong_t)((zb->zb_blkid+1) << shift)); 984ea8dc4b6Seschrock 985e9dbad6fSeschrock if ((err = nvlist_add_string(nvl, ZPOOL_ERR_DATASET, dsname)) != 0 || 986e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_OBJECT, objname)) != 0 || 987e9dbad6fSeschrock (err = nvlist_add_string(nvl, ZPOOL_ERR_RANGE, range)) != 0) 988e9dbad6fSeschrock goto out; 989e9dbad6fSeschrock 990ea8dc4b6Seschrock out: 991ea8dc4b6Seschrock if (dn) 992ea8dc4b6Seschrock dnode_rele(dn, FTAG); 993ea8dc4b6Seschrock if (os && os != dp->dp_meta_objset) 994ea8dc4b6Seschrock dmu_objset_close(os); 995ea8dc4b6Seschrock return (err); 996ea8dc4b6Seschrock } 997ea8dc4b6Seschrock 998fa9e4066Sahrens void 999fa9e4066Sahrens byteswap_uint64_array(void *vbuf, size_t size) 1000fa9e4066Sahrens { 1001fa9e4066Sahrens uint64_t *buf = vbuf; 1002fa9e4066Sahrens size_t count = size >> 3; 1003fa9e4066Sahrens int i; 1004fa9e4066Sahrens 1005fa9e4066Sahrens ASSERT((size & 7) == 0); 1006fa9e4066Sahrens 1007fa9e4066Sahrens for (i = 0; i < count; i++) 1008fa9e4066Sahrens buf[i] = BSWAP_64(buf[i]); 1009fa9e4066Sahrens } 1010fa9e4066Sahrens 1011fa9e4066Sahrens void 1012fa9e4066Sahrens byteswap_uint32_array(void *vbuf, size_t size) 1013fa9e4066Sahrens { 1014fa9e4066Sahrens uint32_t *buf = vbuf; 1015fa9e4066Sahrens size_t count = size >> 2; 1016fa9e4066Sahrens int i; 1017fa9e4066Sahrens 1018fa9e4066Sahrens ASSERT((size & 3) == 0); 1019fa9e4066Sahrens 1020fa9e4066Sahrens for (i = 0; i < count; i++) 1021fa9e4066Sahrens buf[i] = BSWAP_32(buf[i]); 1022fa9e4066Sahrens } 1023fa9e4066Sahrens 1024fa9e4066Sahrens void 1025fa9e4066Sahrens byteswap_uint16_array(void *vbuf, size_t size) 1026fa9e4066Sahrens { 1027fa9e4066Sahrens uint16_t *buf = vbuf; 1028fa9e4066Sahrens size_t count = size >> 1; 1029fa9e4066Sahrens int i; 1030fa9e4066Sahrens 1031fa9e4066Sahrens ASSERT((size & 1) == 0); 1032fa9e4066Sahrens 1033fa9e4066Sahrens for (i = 0; i < count; i++) 1034fa9e4066Sahrens buf[i] = BSWAP_16(buf[i]); 1035fa9e4066Sahrens } 1036fa9e4066Sahrens 1037fa9e4066Sahrens /* ARGSUSED */ 1038fa9e4066Sahrens void 1039fa9e4066Sahrens byteswap_uint8_array(void *vbuf, size_t size) 1040fa9e4066Sahrens { 1041fa9e4066Sahrens } 1042fa9e4066Sahrens 1043fa9e4066Sahrens void 1044fa9e4066Sahrens dmu_init(void) 1045fa9e4066Sahrens { 1046fa9e4066Sahrens dbuf_init(); 1047fa9e4066Sahrens dnode_init(); 1048fa9e4066Sahrens arc_init(); 1049fa9e4066Sahrens } 1050fa9e4066Sahrens 1051fa9e4066Sahrens void 1052fa9e4066Sahrens dmu_fini(void) 1053fa9e4066Sahrens { 1054fa9e4066Sahrens arc_fini(); 1055fa9e4066Sahrens dnode_fini(); 1056fa9e4066Sahrens dbuf_fini(); 1057fa9e4066Sahrens } 1058