1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5f65e61c0Sahrens * Common Development and Distribution License (the "License"). 6f65e61c0Sahrens * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2206e0070dSMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 233f2366c2SGordon Ross * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24bf16b11eSMatthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved. 25aad02571SSaso Kiselkov * Copyright (c) 2013 by Saso Kiselkov. All rights reserved. 26810e43b2SBill Pijewski * Copyright (c) 2013, Joyent, Inc. All rights reserved. 27fa9e4066Sahrens */ 28fa9e4066Sahrens 29fa9e4066Sahrens #include <sys/zfs_context.h> 30fa9e4066Sahrens #include <sys/dmu.h> 312f3d8780SMatthew Ahrens #include <sys/dmu_send.h> 32fa9e4066Sahrens #include <sys/dmu_impl.h> 33fa9e4066Sahrens #include <sys/dbuf.h> 34fa9e4066Sahrens #include <sys/dmu_objset.h> 35fa9e4066Sahrens #include <sys/dsl_dataset.h> 36fa9e4066Sahrens #include <sys/dsl_dir.h> 37fa9e4066Sahrens #include <sys/dmu_tx.h> 38fa9e4066Sahrens #include <sys/spa.h> 39fa9e4066Sahrens #include <sys/zio.h> 40fa9e4066Sahrens #include <sys/dmu_zfetch.h> 410a586ceaSMark Shellenbaum #include <sys/sa.h> 420a586ceaSMark Shellenbaum #include <sys/sa_impl.h> 435d7b4d43SMatthew Ahrens #include <sys/zfeature.h> 445d7b4d43SMatthew Ahrens #include <sys/blkptr.h> 45bf16b11eSMatthew Ahrens #include <sys/range_tree.h> 46fa9e4066Sahrens 47713d6c20SMatthew Ahrens /* 48713d6c20SMatthew Ahrens * Number of times that zfs_free_range() took the slow path while doing 49713d6c20SMatthew Ahrens * a zfs receive. A nonzero value indicates a potential performance problem. 50713d6c20SMatthew Ahrens */ 51713d6c20SMatthew Ahrens uint64_t zfs_free_range_recv_miss; 52713d6c20SMatthew Ahrens 53fa9e4066Sahrens static void dbuf_destroy(dmu_buf_impl_t *db); 543b2aab18SMatthew Ahrens static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx); 55088f3894Sahrens static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx); 56fa9e4066Sahrens 57fa9e4066Sahrens /* 58fa9e4066Sahrens * Global data structures and functions for the dbuf cache. 59fa9e4066Sahrens */ 60fa9e4066Sahrens static kmem_cache_t *dbuf_cache; 61fa9e4066Sahrens 62fa9e4066Sahrens /* ARGSUSED */ 63fa9e4066Sahrens static int 64fa9e4066Sahrens dbuf_cons(void *vdb, void *unused, int kmflag) 65fa9e4066Sahrens { 66fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 67fa9e4066Sahrens bzero(db, sizeof (dmu_buf_impl_t)); 68fa9e4066Sahrens 69fa9e4066Sahrens mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL); 70fa9e4066Sahrens cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL); 71fa9e4066Sahrens refcount_create(&db->db_holds); 720f6d88adSAlex Reece 730f6d88adSAlex Reece db->db_creation = gethrtime(); 740f6d88adSAlex Reece 75fa9e4066Sahrens return (0); 76fa9e4066Sahrens } 77fa9e4066Sahrens 78fa9e4066Sahrens /* ARGSUSED */ 79fa9e4066Sahrens static void 80fa9e4066Sahrens dbuf_dest(void *vdb, void *unused) 81fa9e4066Sahrens { 82fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 83fa9e4066Sahrens mutex_destroy(&db->db_mtx); 84fa9e4066Sahrens cv_destroy(&db->db_changed); 85fa9e4066Sahrens refcount_destroy(&db->db_holds); 86fa9e4066Sahrens } 87fa9e4066Sahrens 88fa9e4066Sahrens /* 89fa9e4066Sahrens * dbuf hash table routines 90fa9e4066Sahrens */ 91fa9e4066Sahrens static dbuf_hash_table_t dbuf_hash_table; 92fa9e4066Sahrens 93fa9e4066Sahrens static uint64_t dbuf_hash_count; 94fa9e4066Sahrens 95fa9e4066Sahrens static uint64_t 96fa9e4066Sahrens dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) 97fa9e4066Sahrens { 98fa9e4066Sahrens uintptr_t osv = (uintptr_t)os; 99fa9e4066Sahrens uint64_t crc = -1ULL; 100fa9e4066Sahrens 101fa9e4066Sahrens ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 102fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF]; 103fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF]; 104fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF]; 105fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF]; 106fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF]; 107fa9e4066Sahrens crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF]; 108fa9e4066Sahrens 109fa9e4066Sahrens crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16); 110fa9e4066Sahrens 111fa9e4066Sahrens return (crc); 112fa9e4066Sahrens } 113fa9e4066Sahrens 114fa9e4066Sahrens #define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid); 115fa9e4066Sahrens 116fa9e4066Sahrens #define DBUF_EQUAL(dbuf, os, obj, level, blkid) \ 117fa9e4066Sahrens ((dbuf)->db.db_object == (obj) && \ 118fa9e4066Sahrens (dbuf)->db_objset == (os) && \ 119fa9e4066Sahrens (dbuf)->db_level == (level) && \ 120fa9e4066Sahrens (dbuf)->db_blkid == (blkid)) 121fa9e4066Sahrens 122fa9e4066Sahrens dmu_buf_impl_t * 123fa9e4066Sahrens dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) 124fa9e4066Sahrens { 125fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 126503ad85cSMatthew Ahrens objset_t *os = dn->dn_objset; 127fa9e4066Sahrens uint64_t obj = dn->dn_object; 128fa9e4066Sahrens uint64_t hv = DBUF_HASH(os, obj, level, blkid); 129fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 130fa9e4066Sahrens dmu_buf_impl_t *db; 131fa9e4066Sahrens 132fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 133fa9e4066Sahrens for (db = h->hash_table[idx]; db != NULL; db = db->db_hash_next) { 134fa9e4066Sahrens if (DBUF_EQUAL(db, os, obj, level, blkid)) { 135fa9e4066Sahrens mutex_enter(&db->db_mtx); 136ea8dc4b6Seschrock if (db->db_state != DB_EVICTING) { 137fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 138fa9e4066Sahrens return (db); 139fa9e4066Sahrens } 140fa9e4066Sahrens mutex_exit(&db->db_mtx); 141fa9e4066Sahrens } 142fa9e4066Sahrens } 143fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 144fa9e4066Sahrens return (NULL); 145fa9e4066Sahrens } 146fa9e4066Sahrens 147fa9e4066Sahrens /* 148fa9e4066Sahrens * Insert an entry into the hash table. If there is already an element 149fa9e4066Sahrens * equal to elem in the hash table, then the already existing element 150fa9e4066Sahrens * will be returned and the new element will not be inserted. 151fa9e4066Sahrens * Otherwise returns NULL. 152fa9e4066Sahrens */ 153fa9e4066Sahrens static dmu_buf_impl_t * 154fa9e4066Sahrens dbuf_hash_insert(dmu_buf_impl_t *db) 155fa9e4066Sahrens { 156fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 157503ad85cSMatthew Ahrens objset_t *os = db->db_objset; 158fa9e4066Sahrens uint64_t obj = db->db.db_object; 159fa9e4066Sahrens int level = db->db_level; 160fa9e4066Sahrens uint64_t blkid = db->db_blkid; 161fa9e4066Sahrens uint64_t hv = DBUF_HASH(os, obj, level, blkid); 162fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 163fa9e4066Sahrens dmu_buf_impl_t *dbf; 164fa9e4066Sahrens 165fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 166fa9e4066Sahrens for (dbf = h->hash_table[idx]; dbf != NULL; dbf = dbf->db_hash_next) { 167fa9e4066Sahrens if (DBUF_EQUAL(dbf, os, obj, level, blkid)) { 168fa9e4066Sahrens mutex_enter(&dbf->db_mtx); 169ea8dc4b6Seschrock if (dbf->db_state != DB_EVICTING) { 170fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 171fa9e4066Sahrens return (dbf); 172fa9e4066Sahrens } 173fa9e4066Sahrens mutex_exit(&dbf->db_mtx); 174fa9e4066Sahrens } 175fa9e4066Sahrens } 176fa9e4066Sahrens 177fa9e4066Sahrens mutex_enter(&db->db_mtx); 178fa9e4066Sahrens db->db_hash_next = h->hash_table[idx]; 179fa9e4066Sahrens h->hash_table[idx] = db; 180fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 181*1a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&dbuf_hash_count); 182fa9e4066Sahrens 183fa9e4066Sahrens return (NULL); 184fa9e4066Sahrens } 185fa9e4066Sahrens 186fa9e4066Sahrens /* 187bbfa8ea8SMatthew Ahrens * Remove an entry from the hash table. It must be in the EVICTING state. 188fa9e4066Sahrens */ 189fa9e4066Sahrens static void 190fa9e4066Sahrens dbuf_hash_remove(dmu_buf_impl_t *db) 191fa9e4066Sahrens { 192fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 193fa9e4066Sahrens uint64_t hv = DBUF_HASH(db->db_objset, db->db.db_object, 194fa9e4066Sahrens db->db_level, db->db_blkid); 195fa9e4066Sahrens uint64_t idx = hv & h->hash_table_mask; 196fa9e4066Sahrens dmu_buf_impl_t *dbf, **dbp; 197fa9e4066Sahrens 198fa9e4066Sahrens /* 199bbfa8ea8SMatthew Ahrens * We musn't hold db_mtx to maintain lock ordering: 200fa9e4066Sahrens * DBUF_HASH_MUTEX > db_mtx. 201fa9e4066Sahrens */ 202fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 203ea8dc4b6Seschrock ASSERT(db->db_state == DB_EVICTING); 204fa9e4066Sahrens ASSERT(!MUTEX_HELD(&db->db_mtx)); 205fa9e4066Sahrens 206fa9e4066Sahrens mutex_enter(DBUF_HASH_MUTEX(h, idx)); 207fa9e4066Sahrens dbp = &h->hash_table[idx]; 208fa9e4066Sahrens while ((dbf = *dbp) != db) { 209fa9e4066Sahrens dbp = &dbf->db_hash_next; 210fa9e4066Sahrens ASSERT(dbf != NULL); 211fa9e4066Sahrens } 212fa9e4066Sahrens *dbp = db->db_hash_next; 213fa9e4066Sahrens db->db_hash_next = NULL; 214fa9e4066Sahrens mutex_exit(DBUF_HASH_MUTEX(h, idx)); 215*1a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&dbuf_hash_count); 216fa9e4066Sahrens } 217fa9e4066Sahrens 218ea8dc4b6Seschrock static arc_evict_func_t dbuf_do_evict; 219fa9e4066Sahrens 220fa9e4066Sahrens static void 221fa9e4066Sahrens dbuf_evict_user(dmu_buf_impl_t *db) 222fa9e4066Sahrens { 223fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 224fa9e4066Sahrens 225c717a561Smaybee if (db->db_level != 0 || db->db_evict_func == NULL) 226fa9e4066Sahrens return; 227fa9e4066Sahrens 228c717a561Smaybee if (db->db_user_data_ptr_ptr) 229c717a561Smaybee *db->db_user_data_ptr_ptr = db->db.db_data; 230c717a561Smaybee db->db_evict_func(&db->db, db->db_user_ptr); 231c717a561Smaybee db->db_user_ptr = NULL; 232c717a561Smaybee db->db_user_data_ptr_ptr = NULL; 233c717a561Smaybee db->db_evict_func = NULL; 234fa9e4066Sahrens } 235fa9e4066Sahrens 236744947dcSTom Erickson boolean_t 237744947dcSTom Erickson dbuf_is_metadata(dmu_buf_impl_t *db) 238744947dcSTom Erickson { 239744947dcSTom Erickson if (db->db_level > 0) { 240744947dcSTom Erickson return (B_TRUE); 241744947dcSTom Erickson } else { 242744947dcSTom Erickson boolean_t is_metadata; 243744947dcSTom Erickson 244744947dcSTom Erickson DB_DNODE_ENTER(db); 245ad135b5dSChristopher Siden is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type); 246744947dcSTom Erickson DB_DNODE_EXIT(db); 247744947dcSTom Erickson 248744947dcSTom Erickson return (is_metadata); 249744947dcSTom Erickson } 250744947dcSTom Erickson } 251744947dcSTom Erickson 252fa9e4066Sahrens void 253ea8dc4b6Seschrock dbuf_evict(dmu_buf_impl_t *db) 254ea8dc4b6Seschrock { 255ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&db->db_mtx)); 256ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 257c717a561Smaybee ASSERT(db->db_data_pending == NULL); 258ea8dc4b6Seschrock 259ea8dc4b6Seschrock dbuf_clear(db); 260ea8dc4b6Seschrock dbuf_destroy(db); 261ea8dc4b6Seschrock } 262ea8dc4b6Seschrock 263ea8dc4b6Seschrock void 264fa9e4066Sahrens dbuf_init(void) 265fa9e4066Sahrens { 266ea8dc4b6Seschrock uint64_t hsize = 1ULL << 16; 267fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 268fa9e4066Sahrens int i; 269fa9e4066Sahrens 270fa9e4066Sahrens /* 271fa9e4066Sahrens * The hash table is big enough to fill all of physical memory 272ea8dc4b6Seschrock * with an average 4K block size. The table will take up 273ea8dc4b6Seschrock * totalmem*sizeof(void*)/4K (i.e. 2MB/GB with 8-byte pointers). 274fa9e4066Sahrens */ 275ea8dc4b6Seschrock while (hsize * 4096 < physmem * PAGESIZE) 276fa9e4066Sahrens hsize <<= 1; 277fa9e4066Sahrens 278ea8dc4b6Seschrock retry: 279fa9e4066Sahrens h->hash_table_mask = hsize - 1; 280ea8dc4b6Seschrock h->hash_table = kmem_zalloc(hsize * sizeof (void *), KM_NOSLEEP); 281ea8dc4b6Seschrock if (h->hash_table == NULL) { 282ea8dc4b6Seschrock /* XXX - we should really return an error instead of assert */ 283ea8dc4b6Seschrock ASSERT(hsize > (1ULL << 10)); 284ea8dc4b6Seschrock hsize >>= 1; 285ea8dc4b6Seschrock goto retry; 286ea8dc4b6Seschrock } 287fa9e4066Sahrens 288fa9e4066Sahrens dbuf_cache = kmem_cache_create("dmu_buf_impl_t", 289fa9e4066Sahrens sizeof (dmu_buf_impl_t), 290fa9e4066Sahrens 0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0); 291fa9e4066Sahrens 292fa9e4066Sahrens for (i = 0; i < DBUF_MUTEXES; i++) 293fa9e4066Sahrens mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL); 294fa9e4066Sahrens } 295fa9e4066Sahrens 296fa9e4066Sahrens void 297fa9e4066Sahrens dbuf_fini(void) 298fa9e4066Sahrens { 299fa9e4066Sahrens dbuf_hash_table_t *h = &dbuf_hash_table; 300fa9e4066Sahrens int i; 301fa9e4066Sahrens 302fa9e4066Sahrens for (i = 0; i < DBUF_MUTEXES; i++) 303fa9e4066Sahrens mutex_destroy(&h->hash_mutexes[i]); 304fa9e4066Sahrens kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *)); 305fa9e4066Sahrens kmem_cache_destroy(dbuf_cache); 306fa9e4066Sahrens } 307fa9e4066Sahrens 308fa9e4066Sahrens /* 309fa9e4066Sahrens * Other stuff. 310fa9e4066Sahrens */ 311fa9e4066Sahrens 3129c9dc39aSek110237 #ifdef ZFS_DEBUG 313fa9e4066Sahrens static void 314fa9e4066Sahrens dbuf_verify(dmu_buf_impl_t *db) 315fa9e4066Sahrens { 316744947dcSTom Erickson dnode_t *dn; 317b24ab676SJeff Bonwick dbuf_dirty_record_t *dr; 318fa9e4066Sahrens 319fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 320fa9e4066Sahrens 321fa9e4066Sahrens if (!(zfs_flags & ZFS_DEBUG_DBUF_VERIFY)) 322fa9e4066Sahrens return; 323fa9e4066Sahrens 324fa9e4066Sahrens ASSERT(db->db_objset != NULL); 325744947dcSTom Erickson DB_DNODE_ENTER(db); 326744947dcSTom Erickson dn = DB_DNODE(db); 327fa9e4066Sahrens if (dn == NULL) { 328fa9e4066Sahrens ASSERT(db->db_parent == NULL); 329fa9e4066Sahrens ASSERT(db->db_blkptr == NULL); 330fa9e4066Sahrens } else { 331fa9e4066Sahrens ASSERT3U(db->db.db_object, ==, dn->dn_object); 332fa9e4066Sahrens ASSERT3P(db->db_objset, ==, dn->dn_objset); 333fa9e4066Sahrens ASSERT3U(db->db_level, <, dn->dn_nlevels); 334744947dcSTom Erickson ASSERT(db->db_blkid == DMU_BONUS_BLKID || 335744947dcSTom Erickson db->db_blkid == DMU_SPILL_BLKID || 3360f6d88adSAlex Reece !avl_is_empty(&dn->dn_dbufs)); 337fa9e4066Sahrens } 3380a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 339fa9e4066Sahrens ASSERT(dn != NULL); 3401934e92fSmaybee ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 3410a586ceaSMark Shellenbaum ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID); 3420a586ceaSMark Shellenbaum } else if (db->db_blkid == DMU_SPILL_BLKID) { 3430a586ceaSMark Shellenbaum ASSERT(dn != NULL); 3440a586ceaSMark Shellenbaum ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 345fb09f5aaSMadhav Suresh ASSERT0(db->db.db_offset); 346fa9e4066Sahrens } else { 347fa9e4066Sahrens ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size); 348fa9e4066Sahrens } 349fa9e4066Sahrens 350b24ab676SJeff Bonwick for (dr = db->db_data_pending; dr != NULL; dr = dr->dr_next) 351b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 352b24ab676SJeff Bonwick 353b24ab676SJeff Bonwick for (dr = db->db_last_dirty; dr != NULL; dr = dr->dr_next) 354b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 355b24ab676SJeff Bonwick 35688b7b0f2SMatthew Ahrens /* 35788b7b0f2SMatthew Ahrens * We can't assert that db_size matches dn_datablksz because it 35888b7b0f2SMatthew Ahrens * can be momentarily different when another thread is doing 35988b7b0f2SMatthew Ahrens * dnode_set_blksz(). 36088b7b0f2SMatthew Ahrens */ 36188b7b0f2SMatthew Ahrens if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) { 362b24ab676SJeff Bonwick dr = db->db_data_pending; 363fa9e4066Sahrens /* 36488b7b0f2SMatthew Ahrens * It should only be modified in syncing context, so 36588b7b0f2SMatthew Ahrens * make sure we only have one copy of the data. 366fa9e4066Sahrens */ 367c717a561Smaybee ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf); 368fa9e4066Sahrens } 369fa9e4066Sahrens 370fa9e4066Sahrens /* verify db->db_blkptr */ 371fa9e4066Sahrens if (db->db_blkptr) { 372fa9e4066Sahrens if (db->db_parent == dn->dn_dbuf) { 373fa9e4066Sahrens /* db is pointed to by the dnode */ 374fa9e4066Sahrens /* ASSERT3U(db->db_blkid, <, dn->dn_nblkptr); */ 37514843421SMatthew Ahrens if (DMU_OBJECT_IS_SPECIAL(db->db.db_object)) 376fa9e4066Sahrens ASSERT(db->db_parent == NULL); 377fa9e4066Sahrens else 378fa9e4066Sahrens ASSERT(db->db_parent != NULL); 3790a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 380fa9e4066Sahrens ASSERT3P(db->db_blkptr, ==, 381fa9e4066Sahrens &dn->dn_phys->dn_blkptr[db->db_blkid]); 382fa9e4066Sahrens } else { 383fa9e4066Sahrens /* db is pointed to by an indirect block */ 384fa9e4066Sahrens int epb = db->db_parent->db.db_size >> SPA_BLKPTRSHIFT; 385fa9e4066Sahrens ASSERT3U(db->db_parent->db_level, ==, db->db_level+1); 386fa9e4066Sahrens ASSERT3U(db->db_parent->db.db_object, ==, 387fa9e4066Sahrens db->db.db_object); 388fa9e4066Sahrens /* 389fa9e4066Sahrens * dnode_grow_indblksz() can make this fail if we don't 390fa9e4066Sahrens * have the struct_rwlock. XXX indblksz no longer 391fa9e4066Sahrens * grows. safe to do this now? 392fa9e4066Sahrens */ 393744947dcSTom Erickson if (RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 394fa9e4066Sahrens ASSERT3P(db->db_blkptr, ==, 395fa9e4066Sahrens ((blkptr_t *)db->db_parent->db.db_data + 396fa9e4066Sahrens db->db_blkid % epb)); 397fa9e4066Sahrens } 398fa9e4066Sahrens } 399fa9e4066Sahrens } 400fa9e4066Sahrens if ((db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr)) && 4013f9d6ad7SLin Ling (db->db_buf == NULL || db->db_buf->b_data) && 4020a586ceaSMark Shellenbaum db->db.db_data && db->db_blkid != DMU_BONUS_BLKID && 403fa9e4066Sahrens db->db_state != DB_FILL && !dn->dn_free_txg) { 404fa9e4066Sahrens /* 405fa9e4066Sahrens * If the blkptr isn't set but they have nonzero data, 406fa9e4066Sahrens * it had better be dirty, otherwise we'll lose that 407fa9e4066Sahrens * data when we evict this buffer. 408fa9e4066Sahrens */ 409fa9e4066Sahrens if (db->db_dirtycnt == 0) { 410fa9e4066Sahrens uint64_t *buf = db->db.db_data; 411fa9e4066Sahrens int i; 412fa9e4066Sahrens 413fa9e4066Sahrens for (i = 0; i < db->db.db_size >> 3; i++) { 414fa9e4066Sahrens ASSERT(buf[i] == 0); 415fa9e4066Sahrens } 416fa9e4066Sahrens } 417fa9e4066Sahrens } 418744947dcSTom Erickson DB_DNODE_EXIT(db); 419fa9e4066Sahrens } 4209c9dc39aSek110237 #endif 421fa9e4066Sahrens 422fa9e4066Sahrens static void 423fa9e4066Sahrens dbuf_update_data(dmu_buf_impl_t *db) 424fa9e4066Sahrens { 425fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 426c717a561Smaybee if (db->db_level == 0 && db->db_user_data_ptr_ptr) { 427fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 428c717a561Smaybee *db->db_user_data_ptr_ptr = db->db.db_data; 429fa9e4066Sahrens } 430fa9e4066Sahrens } 431fa9e4066Sahrens 432fa9e4066Sahrens static void 433fa9e4066Sahrens dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) 434fa9e4066Sahrens { 435fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 436fa9e4066Sahrens db->db_buf = buf; 437ea8dc4b6Seschrock if (buf != NULL) { 438ea8dc4b6Seschrock ASSERT(buf->b_data != NULL); 439fa9e4066Sahrens db->db.db_data = buf->b_data; 440ea8dc4b6Seschrock if (!arc_released(buf)) 441ea8dc4b6Seschrock arc_set_callback(buf, dbuf_do_evict, db); 442fa9e4066Sahrens dbuf_update_data(db); 443ea8dc4b6Seschrock } else { 444ea8dc4b6Seschrock dbuf_evict_user(db); 445ea8dc4b6Seschrock db->db.db_data = NULL; 44682c9918fSTim Haley if (db->db_state != DB_NOFILL) 447ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 448ea8dc4b6Seschrock } 449fa9e4066Sahrens } 450fa9e4066Sahrens 451c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 452c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read. Return the loaned arc_buf. 453c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 454c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t * 455c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dbuf_loan_arcbuf(dmu_buf_impl_t *db) 456c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 457c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 458c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 459c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_enter(&db->db_mtx); 460c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) { 461c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = db->db.db_size; 46243466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 463744947dcSTom Erickson 464c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_exit(&db->db_mtx); 465744947dcSTom Erickson abuf = arc_loan_buf(spa, blksz); 466c242f9a0Schunli zhang - Sun Microsystems - Irvine United States bcopy(db->db.db_data, abuf->b_data, blksz); 467c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 468c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = db->db_buf; 469c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_loan_inuse_buf(abuf, db); 470c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dbuf_set_data(db, NULL); 471c242f9a0Schunli zhang - Sun Microsystems - Irvine United States mutex_exit(&db->db_mtx); 472c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 473c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (abuf); 474c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 475c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 476fa9e4066Sahrens uint64_t 477fa9e4066Sahrens dbuf_whichblock(dnode_t *dn, uint64_t offset) 478fa9e4066Sahrens { 479fa9e4066Sahrens if (dn->dn_datablkshift) { 480fa9e4066Sahrens return (offset >> dn->dn_datablkshift); 481fa9e4066Sahrens } else { 482fa9e4066Sahrens ASSERT3U(offset, <, dn->dn_datablksz); 483fa9e4066Sahrens return (0); 484fa9e4066Sahrens } 485fa9e4066Sahrens } 486fa9e4066Sahrens 487fa9e4066Sahrens static void 488fa9e4066Sahrens dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb) 489fa9e4066Sahrens { 490fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 491fa9e4066Sahrens 492fa9e4066Sahrens mutex_enter(&db->db_mtx); 493fa9e4066Sahrens ASSERT3U(db->db_state, ==, DB_READ); 494fa9e4066Sahrens /* 495fa9e4066Sahrens * All reads are synchronous, so we must have a hold on the dbuf 496fa9e4066Sahrens */ 497fa9e4066Sahrens ASSERT(refcount_count(&db->db_holds) > 0); 498ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 499fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 500c717a561Smaybee if (db->db_level == 0 && db->db_freed_in_flight) { 501fa9e4066Sahrens /* we were freed in flight; disregard any error */ 502fa9e4066Sahrens arc_release(buf, db); 503fa9e4066Sahrens bzero(buf->b_data, db->db.db_size); 5046b4acc8bSahrens arc_buf_freeze(buf); 505c717a561Smaybee db->db_freed_in_flight = FALSE; 506fa9e4066Sahrens dbuf_set_data(db, buf); 507fa9e4066Sahrens db->db_state = DB_CACHED; 508fa9e4066Sahrens } else if (zio == NULL || zio->io_error == 0) { 509fa9e4066Sahrens dbuf_set_data(db, buf); 510fa9e4066Sahrens db->db_state = DB_CACHED; 511fa9e4066Sahrens } else { 5120a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 513fa9e4066Sahrens ASSERT3P(db->db_buf, ==, NULL); 5143b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 515ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 516fa9e4066Sahrens } 517fa9e4066Sahrens cv_broadcast(&db->db_changed); 5183f9d6ad7SLin Ling dbuf_rele_and_unlock(db, NULL); 519fa9e4066Sahrens } 520fa9e4066Sahrens 521ea8dc4b6Seschrock static void 52213506d1eSmaybee dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags) 523fa9e4066Sahrens { 524744947dcSTom Erickson dnode_t *dn; 5257802d7bfSMatthew Ahrens zbookmark_phys_t zb; 52613506d1eSmaybee uint32_t aflags = ARC_NOWAIT; 527fa9e4066Sahrens 528744947dcSTom Erickson DB_DNODE_ENTER(db); 529744947dcSTom Erickson dn = DB_DNODE(db); 530fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 531fa9e4066Sahrens /* We need the struct_rwlock to prevent db_blkptr from changing. */ 532088f3894Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 533ea8dc4b6Seschrock ASSERT(MUTEX_HELD(&db->db_mtx)); 534ea8dc4b6Seschrock ASSERT(db->db_state == DB_UNCACHED); 535ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 536fa9e4066Sahrens 5370a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 538cf04dda1SMark Maybee int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen); 5391934e92fSmaybee 5401934e92fSmaybee ASSERT3U(bonuslen, <=, db->db.db_size); 541ea8dc4b6Seschrock db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN); 5425a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 5431934e92fSmaybee if (bonuslen < DN_MAX_BONUSLEN) 544ea8dc4b6Seschrock bzero(db->db.db_data, DN_MAX_BONUSLEN); 545cf04dda1SMark Maybee if (bonuslen) 546cf04dda1SMark Maybee bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen); 547744947dcSTom Erickson DB_DNODE_EXIT(db); 548ea8dc4b6Seschrock dbuf_update_data(db); 549fa9e4066Sahrens db->db_state = DB_CACHED; 550fa9e4066Sahrens mutex_exit(&db->db_mtx); 551fa9e4066Sahrens return; 552fa9e4066Sahrens } 553fa9e4066Sahrens 5541c8564a7SMark Maybee /* 5551c8564a7SMark Maybee * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync() 5561c8564a7SMark Maybee * processes the delete record and clears the bp while we are waiting 5571c8564a7SMark Maybee * for the dn_mtx (resulting in a "no" from block_freed). 5581c8564a7SMark Maybee */ 559088f3894Sahrens if (db->db_blkptr == NULL || BP_IS_HOLE(db->db_blkptr) || 5601c8564a7SMark Maybee (db->db_level == 0 && (dnode_block_freed(dn, db->db_blkid) || 5611c8564a7SMark Maybee BP_IS_HOLE(db->db_blkptr)))) { 562ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 563ad23a2dbSjohansen 564744947dcSTom Erickson DB_DNODE_EXIT(db); 56543466aaeSMax Grossman dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa, 56643466aaeSMax Grossman db->db.db_size, db, type)); 567fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 568fa9e4066Sahrens db->db_state = DB_CACHED; 56913506d1eSmaybee *flags |= DB_RF_CACHED; 570fa9e4066Sahrens mutex_exit(&db->db_mtx); 571fa9e4066Sahrens return; 572fa9e4066Sahrens } 573fa9e4066Sahrens 574744947dcSTom Erickson DB_DNODE_EXIT(db); 575744947dcSTom Erickson 576fa9e4066Sahrens db->db_state = DB_READ; 577fa9e4066Sahrens mutex_exit(&db->db_mtx); 578fa9e4066Sahrens 5793baa08fcSek110237 if (DBUF_IS_L2CACHEABLE(db)) 5803baa08fcSek110237 aflags |= ARC_L2CACHE; 581aad02571SSaso Kiselkov if (DBUF_IS_L2COMPRESSIBLE(db)) 582aad02571SSaso Kiselkov aflags |= ARC_L2COMPRESS; 5833baa08fcSek110237 584b24ab676SJeff Bonwick SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ? 585b24ab676SJeff Bonwick db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET, 586b24ab676SJeff Bonwick db->db.db_object, db->db_level, db->db_blkid); 587ea8dc4b6Seschrock 588ea8dc4b6Seschrock dbuf_add_ref(db, NULL); 589088f3894Sahrens 59043466aaeSMax Grossman (void) arc_read(zio, db->db_objset->os_spa, db->db_blkptr, 591fa9e4066Sahrens dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ, 59213506d1eSmaybee (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED, 59313506d1eSmaybee &aflags, &zb); 59413506d1eSmaybee if (aflags & ARC_CACHED) 59513506d1eSmaybee *flags |= DB_RF_CACHED; 596fa9e4066Sahrens } 597fa9e4066Sahrens 598ea8dc4b6Seschrock int 599ea8dc4b6Seschrock dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags) 600fa9e4066Sahrens { 601ea8dc4b6Seschrock int err = 0; 60243466aaeSMax Grossman boolean_t havepzio = (zio != NULL); 60343466aaeSMax Grossman boolean_t prefetch; 604744947dcSTom Erickson dnode_t *dn; 605fa9e4066Sahrens 606fa9e4066Sahrens /* 607fa9e4066Sahrens * We don't have to hold the mutex to check db_state because it 608fa9e4066Sahrens * can't be freed while we have a hold on the buffer. 609fa9e4066Sahrens */ 610fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 611fa9e4066Sahrens 61282c9918fSTim Haley if (db->db_state == DB_NOFILL) 613be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 61482c9918fSTim Haley 615744947dcSTom Erickson DB_DNODE_ENTER(db); 616744947dcSTom Erickson dn = DB_DNODE(db); 617fa9e4066Sahrens if ((flags & DB_RF_HAVESTRUCT) == 0) 618744947dcSTom Erickson rw_enter(&dn->dn_struct_rwlock, RW_READER); 619fa9e4066Sahrens 6200a586ceaSMark Shellenbaum prefetch = db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 621744947dcSTom Erickson (flags & DB_RF_NOPREFETCH) == 0 && dn != NULL && 6223baa08fcSek110237 DBUF_IS_CACHEABLE(db); 62313506d1eSmaybee 624fa9e4066Sahrens mutex_enter(&db->db_mtx); 625ea8dc4b6Seschrock if (db->db_state == DB_CACHED) { 626ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 62713506d1eSmaybee if (prefetch) 628744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 62913506d1eSmaybee db->db.db_size, TRUE); 630ea8dc4b6Seschrock if ((flags & DB_RF_HAVESTRUCT) == 0) 631744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 632744947dcSTom Erickson DB_DNODE_EXIT(db); 633ea8dc4b6Seschrock } else if (db->db_state == DB_UNCACHED) { 634744947dcSTom Erickson spa_t *spa = dn->dn_objset->os_spa; 635744947dcSTom Erickson 636744947dcSTom Erickson if (zio == NULL) 637744947dcSTom Erickson zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 63813506d1eSmaybee dbuf_read_impl(db, zio, &flags); 63913506d1eSmaybee 640ea8dc4b6Seschrock /* dbuf_read_impl has dropped db_mtx for us */ 641ea8dc4b6Seschrock 64213506d1eSmaybee if (prefetch) 643744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 64413506d1eSmaybee db->db.db_size, flags & DB_RF_CACHED); 645ea8dc4b6Seschrock 646ea8dc4b6Seschrock if ((flags & DB_RF_HAVESTRUCT) == 0) 647744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 648744947dcSTom Erickson DB_DNODE_EXIT(db); 649ea8dc4b6Seschrock 650ea8dc4b6Seschrock if (!havepzio) 651ea8dc4b6Seschrock err = zio_wait(zio); 652ea8dc4b6Seschrock } else { 6533e30c24aSWill Andrews /* 6543e30c24aSWill Andrews * Another reader came in while the dbuf was in flight 6553e30c24aSWill Andrews * between UNCACHED and CACHED. Either a writer will finish 6563e30c24aSWill Andrews * writing the buffer (sending the dbuf to CACHED) or the 6573e30c24aSWill Andrews * first reader's request will reach the read_done callback 6583e30c24aSWill Andrews * and send the dbuf to CACHED. Otherwise, a failure 6593e30c24aSWill Andrews * occurred and the dbuf went to UNCACHED. 6603e30c24aSWill Andrews */ 66113506d1eSmaybee mutex_exit(&db->db_mtx); 66213506d1eSmaybee if (prefetch) 663744947dcSTom Erickson dmu_zfetch(&dn->dn_zfetch, db->db.db_offset, 66413506d1eSmaybee db->db.db_size, TRUE); 665ea8dc4b6Seschrock if ((flags & DB_RF_HAVESTRUCT) == 0) 666744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 667744947dcSTom Erickson DB_DNODE_EXIT(db); 66813506d1eSmaybee 6693e30c24aSWill Andrews /* Skip the wait per the caller's request. */ 67013506d1eSmaybee mutex_enter(&db->db_mtx); 671ea8dc4b6Seschrock if ((flags & DB_RF_NEVERWAIT) == 0) { 672ea8dc4b6Seschrock while (db->db_state == DB_READ || 673ea8dc4b6Seschrock db->db_state == DB_FILL) { 674fa9e4066Sahrens ASSERT(db->db_state == DB_READ || 675fa9e4066Sahrens (flags & DB_RF_HAVESTRUCT) == 0); 676fa9e4066Sahrens cv_wait(&db->db_changed, &db->db_mtx); 677fa9e4066Sahrens } 678ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED) 679be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 680ea8dc4b6Seschrock } 681fa9e4066Sahrens mutex_exit(&db->db_mtx); 682fa9e4066Sahrens } 683fa9e4066Sahrens 684ea8dc4b6Seschrock ASSERT(err || havepzio || db->db_state == DB_CACHED); 685ea8dc4b6Seschrock return (err); 686fa9e4066Sahrens } 687fa9e4066Sahrens 688fa9e4066Sahrens static void 689fa9e4066Sahrens dbuf_noread(dmu_buf_impl_t *db) 690fa9e4066Sahrens { 691fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 6920a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 693fa9e4066Sahrens mutex_enter(&db->db_mtx); 694fa9e4066Sahrens while (db->db_state == DB_READ || db->db_state == DB_FILL) 695fa9e4066Sahrens cv_wait(&db->db_changed, &db->db_mtx); 696fa9e4066Sahrens if (db->db_state == DB_UNCACHED) { 697ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 69843466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 699ad23a2dbSjohansen 700ea8dc4b6Seschrock ASSERT(db->db_buf == NULL); 701fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 702744947dcSTom Erickson dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type)); 703fa9e4066Sahrens db->db_state = DB_FILL; 70482c9918fSTim Haley } else if (db->db_state == DB_NOFILL) { 70582c9918fSTim Haley dbuf_set_data(db, NULL); 706fa9e4066Sahrens } else { 707fa9e4066Sahrens ASSERT3U(db->db_state, ==, DB_CACHED); 708fa9e4066Sahrens } 709fa9e4066Sahrens mutex_exit(&db->db_mtx); 710fa9e4066Sahrens } 711fa9e4066Sahrens 712fa9e4066Sahrens /* 713fa9e4066Sahrens * This is our just-in-time copy function. It makes a copy of 714fa9e4066Sahrens * buffers, that have been modified in a previous transaction 715fa9e4066Sahrens * group, before we modify them in the current active group. 716fa9e4066Sahrens * 717fa9e4066Sahrens * This function is used in two places: when we are dirtying a 718fa9e4066Sahrens * buffer for the first time in a txg, and when we are freeing 719fa9e4066Sahrens * a range in a dnode that includes this buffer. 720fa9e4066Sahrens * 721fa9e4066Sahrens * Note that when we are called from dbuf_free_range() we do 722fa9e4066Sahrens * not put a hold on the buffer, we just traverse the active 723fa9e4066Sahrens * dbuf list for the dnode. 724fa9e4066Sahrens */ 725fa9e4066Sahrens static void 726fa9e4066Sahrens dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg) 727fa9e4066Sahrens { 728c717a561Smaybee dbuf_dirty_record_t *dr = db->db_last_dirty; 729fa9e4066Sahrens 730fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 731fa9e4066Sahrens ASSERT(db->db.db_data != NULL); 732c717a561Smaybee ASSERT(db->db_level == 0); 733c717a561Smaybee ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT); 734fa9e4066Sahrens 7354d31c452Smaybee if (dr == NULL || 7364d31c452Smaybee (dr->dt.dl.dr_data != 7370a586ceaSMark Shellenbaum ((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf))) 738fa9e4066Sahrens return; 739fa9e4066Sahrens 740fa9e4066Sahrens /* 741c717a561Smaybee * If the last dirty record for this dbuf has not yet synced 742c717a561Smaybee * and its referencing the dbuf data, either: 743c717a561Smaybee * reset the reference to point to a new copy, 744c717a561Smaybee * or (if there a no active holders) 745c717a561Smaybee * just null out the current db_data pointer. 746fa9e4066Sahrens */ 747c717a561Smaybee ASSERT(dr->dr_txg >= txg - 2); 7480a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 749c717a561Smaybee /* Note that the data bufs here are zio_bufs */ 750c717a561Smaybee dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN); 7515a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 752c717a561Smaybee bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN); 753c717a561Smaybee } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) { 754ea8dc4b6Seschrock int size = db->db.db_size; 755c717a561Smaybee arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 75643466aaeSMax Grossman spa_t *spa = db->db_objset->os_spa; 757744947dcSTom Erickson 758744947dcSTom Erickson dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type); 759c717a561Smaybee bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size); 760fa9e4066Sahrens } else { 761ea8dc4b6Seschrock dbuf_set_data(db, NULL); 762fa9e4066Sahrens } 763fa9e4066Sahrens } 764ea8dc4b6Seschrock 765fa9e4066Sahrens void 766c717a561Smaybee dbuf_unoverride(dbuf_dirty_record_t *dr) 767fa9e4066Sahrens { 768c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 769b24ab676SJeff Bonwick blkptr_t *bp = &dr->dt.dl.dr_overridden_by; 770c717a561Smaybee uint64_t txg = dr->dr_txg; 771c5c6ffa0Smaybee 772c717a561Smaybee ASSERT(MUTEX_HELD(&db->db_mtx)); 773c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC); 774c717a561Smaybee ASSERT(db->db_level == 0); 775c717a561Smaybee 7760a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID || 777c717a561Smaybee dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN) 778c717a561Smaybee return; 779c717a561Smaybee 780b24ab676SJeff Bonwick ASSERT(db->db_data_pending != dr); 781b24ab676SJeff Bonwick 782fa9e4066Sahrens /* free this block */ 78343466aaeSMax Grossman if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) 78443466aaeSMax Grossman zio_free(db->db_objset->os_spa, txg, bp); 785b24ab676SJeff Bonwick 786c717a561Smaybee dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 78780901aeaSGeorge Wilson dr->dt.dl.dr_nopwrite = B_FALSE; 78880901aeaSGeorge Wilson 7896b4acc8bSahrens /* 7906b4acc8bSahrens * Release the already-written buffer, so we leave it in 7916b4acc8bSahrens * a consistent dirty state. Note that all callers are 7926b4acc8bSahrens * modifying the buffer, so they will immediately do 7936b4acc8bSahrens * another (redundant) arc_release(). Therefore, leave 7946b4acc8bSahrens * the buf thawed to save the effort of freezing & 7956b4acc8bSahrens * immediately re-thawing it. 7966b4acc8bSahrens */ 797c717a561Smaybee arc_release(dr->dt.dl.dr_data, db); 798fa9e4066Sahrens } 799fa9e4066Sahrens 800cdb0ab79Smaybee /* 801cdb0ab79Smaybee * Evict (if its unreferenced) or clear (if its referenced) any level-0 802cdb0ab79Smaybee * data blocks in the free range, so that any future readers will find 80343466aaeSMax Grossman * empty blocks. 8042f3d8780SMatthew Ahrens * 8052f3d8780SMatthew Ahrens * This is a no-op if the dataset is in the middle of an incremental 8062f3d8780SMatthew Ahrens * receive; see comment below for details. 807cdb0ab79Smaybee */ 808fa9e4066Sahrens void 8090f6d88adSAlex Reece dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, 8100f6d88adSAlex Reece dmu_tx_t *tx) 811fa9e4066Sahrens { 8120f6d88adSAlex Reece dmu_buf_impl_t *db, *db_next, db_search; 813fa9e4066Sahrens uint64_t txg = tx->tx_txg; 8140f6d88adSAlex Reece avl_index_t where; 815fa9e4066Sahrens 8160f6d88adSAlex Reece if (end_blkid > dn->dn_maxblkid && (end_blkid != DMU_SPILL_BLKID)) 8170f6d88adSAlex Reece end_blkid = dn->dn_maxblkid; 8180f6d88adSAlex Reece dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid); 8190f6d88adSAlex Reece 8200f6d88adSAlex Reece db_search.db_level = 0; 8210f6d88adSAlex Reece db_search.db_blkid = start_blkid; 8220f6d88adSAlex Reece db_search.db_creation = 0; 8232f3d8780SMatthew Ahrens 824713d6c20SMatthew Ahrens mutex_enter(&dn->dn_dbufs_mtx); 8250f6d88adSAlex Reece if (start_blkid >= dn->dn_unlisted_l0_blkid) { 826713d6c20SMatthew Ahrens /* There can't be any dbufs in this range; no need to search. */ 8270f6d88adSAlex Reece #ifdef DEBUG 8280f6d88adSAlex Reece db = avl_find(&dn->dn_dbufs, &db_search, &where); 8290f6d88adSAlex Reece ASSERT3P(db, ==, NULL); 8300f6d88adSAlex Reece db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); 8310f6d88adSAlex Reece ASSERT(db == NULL || db->db_level > 0); 8320f6d88adSAlex Reece #endif 833713d6c20SMatthew Ahrens mutex_exit(&dn->dn_dbufs_mtx); 8342f3d8780SMatthew Ahrens return; 835713d6c20SMatthew Ahrens } else if (dmu_objset_is_receiving(dn->dn_objset)) { 836713d6c20SMatthew Ahrens /* 837713d6c20SMatthew Ahrens * If we are receiving, we expect there to be no dbufs in 838713d6c20SMatthew Ahrens * the range to be freed, because receive modifies each 839713d6c20SMatthew Ahrens * block at most once, and in offset order. If this is 840713d6c20SMatthew Ahrens * not the case, it can lead to performance problems, 841713d6c20SMatthew Ahrens * so note that we unexpectedly took the slow path. 842713d6c20SMatthew Ahrens */ 843713d6c20SMatthew Ahrens atomic_inc_64(&zfs_free_range_recv_miss); 8442f3d8780SMatthew Ahrens } 8452f3d8780SMatthew Ahrens 8460f6d88adSAlex Reece db = avl_find(&dn->dn_dbufs, &db_search, &where); 8470f6d88adSAlex Reece ASSERT3P(db, ==, NULL); 8480f6d88adSAlex Reece db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); 8490f6d88adSAlex Reece 8500f6d88adSAlex Reece for (; db != NULL; db = db_next) { 8510f6d88adSAlex Reece db_next = AVL_NEXT(&dn->dn_dbufs, db); 8520a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 853cdb0ab79Smaybee 8540f6d88adSAlex Reece if (db->db_level != 0 || db->db_blkid > end_blkid) { 8550f6d88adSAlex Reece break; 8560f6d88adSAlex Reece } 8570f6d88adSAlex Reece ASSERT3U(db->db_blkid, >=, start_blkid); 858fa9e4066Sahrens 859fa9e4066Sahrens /* found a level 0 buffer in the range */ 860fa9e4066Sahrens mutex_enter(&db->db_mtx); 8613b2aab18SMatthew Ahrens if (dbuf_undirty(db, tx)) { 8623b2aab18SMatthew Ahrens /* mutex has been dropped and dbuf destroyed */ 8633b2aab18SMatthew Ahrens continue; 8643b2aab18SMatthew Ahrens } 8653b2aab18SMatthew Ahrens 866ea8dc4b6Seschrock if (db->db_state == DB_UNCACHED || 86782c9918fSTim Haley db->db_state == DB_NOFILL || 868ea8dc4b6Seschrock db->db_state == DB_EVICTING) { 869fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 870fa9e4066Sahrens mutex_exit(&db->db_mtx); 871fa9e4066Sahrens continue; 872fa9e4066Sahrens } 873c543ec06Sahrens if (db->db_state == DB_READ || db->db_state == DB_FILL) { 874c543ec06Sahrens /* will be handled in dbuf_read_done or dbuf_rele */ 875c717a561Smaybee db->db_freed_in_flight = TRUE; 876fa9e4066Sahrens mutex_exit(&db->db_mtx); 877fa9e4066Sahrens continue; 878fa9e4066Sahrens } 879ea8dc4b6Seschrock if (refcount_count(&db->db_holds) == 0) { 880ea8dc4b6Seschrock ASSERT(db->db_buf); 881ea8dc4b6Seschrock dbuf_clear(db); 882ea8dc4b6Seschrock continue; 883ea8dc4b6Seschrock } 884c717a561Smaybee /* The dbuf is referenced */ 885fa9e4066Sahrens 886c717a561Smaybee if (db->db_last_dirty != NULL) { 887c717a561Smaybee dbuf_dirty_record_t *dr = db->db_last_dirty; 888c717a561Smaybee 889c717a561Smaybee if (dr->dr_txg == txg) { 890ea8dc4b6Seschrock /* 891c717a561Smaybee * This buffer is "in-use", re-adjust the file 892c717a561Smaybee * size to reflect that this buffer may 893c717a561Smaybee * contain new data when we sync. 894ea8dc4b6Seschrock */ 89506e0070dSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID && 89606e0070dSMark Shellenbaum db->db_blkid > dn->dn_maxblkid) 89744eda4d7Smaybee dn->dn_maxblkid = db->db_blkid; 898c717a561Smaybee dbuf_unoverride(dr); 899c717a561Smaybee } else { 900c717a561Smaybee /* 901c717a561Smaybee * This dbuf is not dirty in the open context. 902c717a561Smaybee * Either uncache it (if its not referenced in 903c717a561Smaybee * the open context) or reset its contents to 904c717a561Smaybee * empty. 905c717a561Smaybee */ 906c717a561Smaybee dbuf_fix_old_data(db, txg); 90744eda4d7Smaybee } 908c717a561Smaybee } 909c717a561Smaybee /* clear the contents if its cached */ 910ea8dc4b6Seschrock if (db->db_state == DB_CACHED) { 911ea8dc4b6Seschrock ASSERT(db->db.db_data != NULL); 912fa9e4066Sahrens arc_release(db->db_buf, db); 913fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 9146b4acc8bSahrens arc_buf_freeze(db->db_buf); 915fa9e4066Sahrens } 916ea8dc4b6Seschrock 917fa9e4066Sahrens mutex_exit(&db->db_mtx); 918fa9e4066Sahrens } 919fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 920fa9e4066Sahrens } 921fa9e4066Sahrens 922fa9e4066Sahrens static int 9231934e92fSmaybee dbuf_block_freeable(dmu_buf_impl_t *db) 924fa9e4066Sahrens { 925fa9e4066Sahrens dsl_dataset_t *ds = db->db_objset->os_dsl_dataset; 926fa9e4066Sahrens uint64_t birth_txg = 0; 927fa9e4066Sahrens 928fa9e4066Sahrens /* 929fa9e4066Sahrens * We don't need any locking to protect db_blkptr: 930c717a561Smaybee * If it's syncing, then db_last_dirty will be set 931c717a561Smaybee * so we'll ignore db_blkptr. 93243466aaeSMax Grossman * 93343466aaeSMax Grossman * This logic ensures that only block births for 93443466aaeSMax Grossman * filled blocks are considered. 935fa9e4066Sahrens */ 936c717a561Smaybee ASSERT(MUTEX_HELD(&db->db_mtx)); 93743466aaeSMax Grossman if (db->db_last_dirty && (db->db_blkptr == NULL || 93843466aaeSMax Grossman !BP_IS_HOLE(db->db_blkptr))) { 939c717a561Smaybee birth_txg = db->db_last_dirty->dr_txg; 94043466aaeSMax Grossman } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) { 941fa9e4066Sahrens birth_txg = db->db_blkptr->blk_birth; 94243466aaeSMax Grossman } 943fa9e4066Sahrens 944837b568bSGeorge Wilson /* 94543466aaeSMax Grossman * If this block don't exist or is in a snapshot, it can't be freed. 946837b568bSGeorge Wilson * Don't pass the bp to dsl_dataset_block_freeable() since we 947837b568bSGeorge Wilson * are holding the db_mtx lock and might deadlock if we are 948837b568bSGeorge Wilson * prefetching a dedup-ed block. 949837b568bSGeorge Wilson */ 95043466aaeSMax Grossman if (birth_txg != 0) 9511934e92fSmaybee return (ds == NULL || 952837b568bSGeorge Wilson dsl_dataset_block_freeable(ds, NULL, birth_txg)); 953fa9e4066Sahrens else 95443466aaeSMax Grossman return (B_FALSE); 955fa9e4066Sahrens } 956fa9e4066Sahrens 957fa9e4066Sahrens void 958fa9e4066Sahrens dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx) 959fa9e4066Sahrens { 960fa9e4066Sahrens arc_buf_t *buf, *obuf; 961fa9e4066Sahrens int osize = db->db.db_size; 962ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 963744947dcSTom Erickson dnode_t *dn; 964fa9e4066Sahrens 9650a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 966ea8dc4b6Seschrock 967744947dcSTom Erickson DB_DNODE_ENTER(db); 968744947dcSTom Erickson dn = DB_DNODE(db); 969744947dcSTom Erickson 970fa9e4066Sahrens /* XXX does *this* func really need the lock? */ 971744947dcSTom Erickson ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 972fa9e4066Sahrens 973fa9e4066Sahrens /* 97443466aaeSMax Grossman * This call to dmu_buf_will_dirty() with the dn_struct_rwlock held 975fa9e4066Sahrens * is OK, because there can be no other references to the db 976fa9e4066Sahrens * when we are changing its size, so no concurrent DB_FILL can 977fa9e4066Sahrens * be happening. 978fa9e4066Sahrens */ 979ea8dc4b6Seschrock /* 980ea8dc4b6Seschrock * XXX we should be doing a dbuf_read, checking the return 981ea8dc4b6Seschrock * value and returning that up to our callers 982ea8dc4b6Seschrock */ 98343466aaeSMax Grossman dmu_buf_will_dirty(&db->db, tx); 984fa9e4066Sahrens 985fa9e4066Sahrens /* create the data buffer for the new block */ 986744947dcSTom Erickson buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type); 987fa9e4066Sahrens 988fa9e4066Sahrens /* copy old block data to the new block */ 989fa9e4066Sahrens obuf = db->db_buf; 990f65e61c0Sahrens bcopy(obuf->b_data, buf->b_data, MIN(osize, size)); 991fa9e4066Sahrens /* zero the remainder */ 992f65e61c0Sahrens if (size > osize) 993fa9e4066Sahrens bzero((uint8_t *)buf->b_data + osize, size - osize); 994fa9e4066Sahrens 995fa9e4066Sahrens mutex_enter(&db->db_mtx); 996fa9e4066Sahrens dbuf_set_data(db, buf); 9973b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(obuf, db)); 998fa9e4066Sahrens db->db.db_size = size; 999fa9e4066Sahrens 1000c717a561Smaybee if (db->db_level == 0) { 1001c717a561Smaybee ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); 1002c717a561Smaybee db->db_last_dirty->dt.dl.dr_data = buf; 1003c717a561Smaybee } 1004fa9e4066Sahrens mutex_exit(&db->db_mtx); 1005fa9e4066Sahrens 1006744947dcSTom Erickson dnode_willuse_space(dn, size-osize, tx); 1007744947dcSTom Erickson DB_DNODE_EXIT(db); 1008fa9e4066Sahrens } 1009fa9e4066Sahrens 10103f9d6ad7SLin Ling void 10113f9d6ad7SLin Ling dbuf_release_bp(dmu_buf_impl_t *db) 10123f9d6ad7SLin Ling { 101343466aaeSMax Grossman objset_t *os = db->db_objset; 10143f9d6ad7SLin Ling 10153f9d6ad7SLin Ling ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); 10163f9d6ad7SLin Ling ASSERT(arc_released(os->os_phys_buf) || 10173f9d6ad7SLin Ling list_link_active(&os->os_dsl_dataset->ds_synced_link)); 10183f9d6ad7SLin Ling ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf)); 10193f9d6ad7SLin Ling 10201b912ec7SGeorge Wilson (void) arc_release(db->db_buf, db); 10213f9d6ad7SLin Ling } 10223f9d6ad7SLin Ling 1023c717a561Smaybee dbuf_dirty_record_t * 1024fa9e4066Sahrens dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1025fa9e4066Sahrens { 1026744947dcSTom Erickson dnode_t *dn; 1027744947dcSTom Erickson objset_t *os; 1028c717a561Smaybee dbuf_dirty_record_t **drp, *dr; 1029fa9e4066Sahrens int drop_struct_lock = FALSE; 1030d3469faaSMark Maybee boolean_t do_free_accounting = B_FALSE; 1031fa9e4066Sahrens int txgoff = tx->tx_txg & TXG_MASK; 1032fa9e4066Sahrens 1033fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1034fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 10359c9dc39aSek110237 DMU_TX_DIRTY_BUF(tx, db); 1036fa9e4066Sahrens 1037744947dcSTom Erickson DB_DNODE_ENTER(db); 1038744947dcSTom Erickson dn = DB_DNODE(db); 1039fa9e4066Sahrens /* 1040fa9e4066Sahrens * Shouldn't dirty a regular buffer in syncing context. Private 1041fa9e4066Sahrens * objects may be dirtied in syncing context, but only if they 1042fa9e4066Sahrens * were already pre-dirtied in open context. 1043fa9e4066Sahrens */ 1044c717a561Smaybee ASSERT(!dmu_tx_is_syncing(tx) || 1045c717a561Smaybee BP_IS_HOLE(dn->dn_objset->os_rootbp) || 104614843421SMatthew Ahrens DMU_OBJECT_IS_SPECIAL(dn->dn_object) || 104714843421SMatthew Ahrens dn->dn_objset->os_dsl_dataset == NULL); 1048fa9e4066Sahrens /* 1049fa9e4066Sahrens * We make this assert for private objects as well, but after we 1050fa9e4066Sahrens * check if we're already dirty. They are allowed to re-dirty 1051fa9e4066Sahrens * in syncing context. 1052fa9e4066Sahrens */ 1053ea8dc4b6Seschrock ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1054c717a561Smaybee dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 1055fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 1056fa9e4066Sahrens 1057fa9e4066Sahrens mutex_enter(&db->db_mtx); 1058fa9e4066Sahrens /* 1059c717a561Smaybee * XXX make this true for indirects too? The problem is that 1060c717a561Smaybee * transactions created with dmu_tx_create_assigned() from 1061c717a561Smaybee * syncing context don't bother holding ahead. 1062fa9e4066Sahrens */ 1063c717a561Smaybee ASSERT(db->db_level != 0 || 106482c9918fSTim Haley db->db_state == DB_CACHED || db->db_state == DB_FILL || 106582c9918fSTim Haley db->db_state == DB_NOFILL); 1066fa9e4066Sahrens 1067fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1068fa9e4066Sahrens /* 1069fa9e4066Sahrens * Don't set dirtyctx to SYNC if we're just modifying this as we 1070fa9e4066Sahrens * initialize the objset. 1071fa9e4066Sahrens */ 1072fa9e4066Sahrens if (dn->dn_dirtyctx == DN_UNDIRTIED && 1073c717a561Smaybee !BP_IS_HOLE(dn->dn_objset->os_rootbp)) { 1074fa9e4066Sahrens dn->dn_dirtyctx = 1075fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN); 1076fa9e4066Sahrens ASSERT(dn->dn_dirtyctx_firstset == NULL); 1077fa9e4066Sahrens dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP); 1078fa9e4066Sahrens } 1079fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1080fa9e4066Sahrens 10810a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) 10820a586ceaSMark Shellenbaum dn->dn_have_spill = B_TRUE; 10830a586ceaSMark Shellenbaum 1084fa9e4066Sahrens /* 1085fa9e4066Sahrens * If this buffer is already dirty, we're done. 1086fa9e4066Sahrens */ 1087c717a561Smaybee drp = &db->db_last_dirty; 1088c717a561Smaybee ASSERT(*drp == NULL || (*drp)->dr_txg <= tx->tx_txg || 1089c717a561Smaybee db->db.db_object == DMU_META_DNODE_OBJECT); 10907e2186e3Sbonwick while ((dr = *drp) != NULL && dr->dr_txg > tx->tx_txg) 10917e2186e3Sbonwick drp = &dr->dr_next; 10927e2186e3Sbonwick if (dr && dr->dr_txg == tx->tx_txg) { 1093744947dcSTom Erickson DB_DNODE_EXIT(db); 1094744947dcSTom Erickson 10950a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) { 1096c717a561Smaybee /* 1097c717a561Smaybee * If this buffer has already been written out, 1098c717a561Smaybee * we now need to reset its state. 1099c717a561Smaybee */ 11007e2186e3Sbonwick dbuf_unoverride(dr); 1101b24ab676SJeff Bonwick if (db->db.db_object != DMU_META_DNODE_OBJECT && 1102b24ab676SJeff Bonwick db->db_state != DB_NOFILL) 11036b4acc8bSahrens arc_buf_thaw(db->db_buf); 1104c717a561Smaybee } 1105fa9e4066Sahrens mutex_exit(&db->db_mtx); 11067e2186e3Sbonwick return (dr); 1107fa9e4066Sahrens } 1108fa9e4066Sahrens 1109fa9e4066Sahrens /* 1110fa9e4066Sahrens * Only valid if not already dirty. 1111fa9e4066Sahrens */ 111214843421SMatthew Ahrens ASSERT(dn->dn_object == 0 || 111314843421SMatthew Ahrens dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx == 1114fa9e4066Sahrens (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN)); 1115fa9e4066Sahrens 1116fa9e4066Sahrens ASSERT3U(dn->dn_nlevels, >, db->db_level); 1117fa9e4066Sahrens ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) || 1118fa9e4066Sahrens dn->dn_phys->dn_nlevels > db->db_level || 1119fa9e4066Sahrens dn->dn_next_nlevels[txgoff] > db->db_level || 1120fa9e4066Sahrens dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level || 1121fa9e4066Sahrens dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level); 1122fa9e4066Sahrens 1123fa9e4066Sahrens /* 1124fa9e4066Sahrens * We should only be dirtying in syncing context if it's the 112514843421SMatthew Ahrens * mos or we're initializing the os or it's a special object. 112614843421SMatthew Ahrens * However, we are allowed to dirty in syncing context provided 112714843421SMatthew Ahrens * we already dirtied it in open context. Hence we must make 112814843421SMatthew Ahrens * this assertion only if we're not already dirty. 1129fa9e4066Sahrens */ 1130744947dcSTom Erickson os = dn->dn_objset; 113114843421SMatthew Ahrens ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) || 113214843421SMatthew Ahrens os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp)); 1133fa9e4066Sahrens ASSERT(db->db.db_size != 0); 1134fa9e4066Sahrens 1135fa9e4066Sahrens dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1136fa9e4066Sahrens 11370a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID) { 11381934e92fSmaybee /* 11391934e92fSmaybee * Update the accounting. 1140d3469faaSMark Maybee * Note: we delay "free accounting" until after we drop 1141d3469faaSMark Maybee * the db_mtx. This keeps us from grabbing other locks 1142b24ab676SJeff Bonwick * (and possibly deadlocking) in bp_get_dsize() while 1143d3469faaSMark Maybee * also holding the db_mtx. 11441934e92fSmaybee */ 11451934e92fSmaybee dnode_willuse_space(dn, db->db.db_size, tx); 1146d3469faaSMark Maybee do_free_accounting = dbuf_block_freeable(db); 11471934e92fSmaybee } 11481934e92fSmaybee 1149ea8dc4b6Seschrock /* 1150ea8dc4b6Seschrock * If this buffer is dirty in an old transaction group we need 1151ea8dc4b6Seschrock * to make a copy of it so that the changes we make in this 1152ea8dc4b6Seschrock * transaction group won't leak out when we sync the older txg. 1153ea8dc4b6Seschrock */ 1154c717a561Smaybee dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP); 1155c717a561Smaybee if (db->db_level == 0) { 1156c717a561Smaybee void *data_old = db->db_buf; 1157c717a561Smaybee 115882c9918fSTim Haley if (db->db_state != DB_NOFILL) { 11590a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 1160c717a561Smaybee dbuf_fix_old_data(db, tx->tx_txg); 1161c717a561Smaybee data_old = db->db.db_data; 1162c717a561Smaybee } else if (db->db.db_object != DMU_META_DNODE_OBJECT) { 1163fa9e4066Sahrens /* 116482c9918fSTim Haley * Release the data buffer from the cache so 116582c9918fSTim Haley * that we can modify it without impacting 116682c9918fSTim Haley * possible other users of this cached data 116782c9918fSTim Haley * block. Note that indirect blocks and 116882c9918fSTim Haley * private objects are not released until the 116982c9918fSTim Haley * syncing state (since they are only modified 117082c9918fSTim Haley * then). 1171fa9e4066Sahrens */ 1172fa9e4066Sahrens arc_release(db->db_buf, db); 1173fa9e4066Sahrens dbuf_fix_old_data(db, tx->tx_txg); 1174c717a561Smaybee data_old = db->db_buf; 1175fa9e4066Sahrens } 1176c717a561Smaybee ASSERT(data_old != NULL); 117782c9918fSTim Haley } 1178c717a561Smaybee dr->dt.dl.dr_data = data_old; 1179c717a561Smaybee } else { 1180c717a561Smaybee mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL); 1181c717a561Smaybee list_create(&dr->dt.di.dr_children, 1182c717a561Smaybee sizeof (dbuf_dirty_record_t), 1183c717a561Smaybee offsetof(dbuf_dirty_record_t, dr_dirty_node)); 1184fa9e4066Sahrens } 118569962b56SMatthew Ahrens if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL) 118669962b56SMatthew Ahrens dr->dr_accounted = db->db.db_size; 1187c717a561Smaybee dr->dr_dbuf = db; 1188c717a561Smaybee dr->dr_txg = tx->tx_txg; 1189c717a561Smaybee dr->dr_next = *drp; 1190c717a561Smaybee *drp = dr; 1191fa9e4066Sahrens 1192fa9e4066Sahrens /* 1193fa9e4066Sahrens * We could have been freed_in_flight between the dbuf_noread 1194fa9e4066Sahrens * and dbuf_dirty. We win, as though the dbuf_noread() had 1195fa9e4066Sahrens * happened after the free. 1196fa9e4066Sahrens */ 11970a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 11980a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) { 1199c717a561Smaybee mutex_enter(&dn->dn_mtx); 1200bf16b11eSMatthew Ahrens if (dn->dn_free_ranges[txgoff] != NULL) { 1201bf16b11eSMatthew Ahrens range_tree_clear(dn->dn_free_ranges[txgoff], 1202bf16b11eSMatthew Ahrens db->db_blkid, 1); 1203bf16b11eSMatthew Ahrens } 1204fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1205c717a561Smaybee db->db_freed_in_flight = FALSE; 1206c717a561Smaybee } 1207fa9e4066Sahrens 1208fa9e4066Sahrens /* 1209fa9e4066Sahrens * This buffer is now part of this txg 1210fa9e4066Sahrens */ 1211fa9e4066Sahrens dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg); 1212fa9e4066Sahrens db->db_dirtycnt += 1; 1213fa9e4066Sahrens ASSERT3U(db->db_dirtycnt, <=, 3); 1214fa9e4066Sahrens 1215fa9e4066Sahrens mutex_exit(&db->db_mtx); 1216fa9e4066Sahrens 12170a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID || 12180a586ceaSMark Shellenbaum db->db_blkid == DMU_SPILL_BLKID) { 1219c717a561Smaybee mutex_enter(&dn->dn_mtx); 1220c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1221c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1222c717a561Smaybee mutex_exit(&dn->dn_mtx); 1223fa9e4066Sahrens dnode_setdirty(dn, tx); 1224744947dcSTom Erickson DB_DNODE_EXIT(db); 1225c717a561Smaybee return (dr); 1226d3469faaSMark Maybee } else if (do_free_accounting) { 1227d3469faaSMark Maybee blkptr_t *bp = db->db_blkptr; 1228d3469faaSMark Maybee int64_t willfree = (bp && !BP_IS_HOLE(bp)) ? 1229b24ab676SJeff Bonwick bp_get_dsize(os->os_spa, bp) : db->db.db_size; 1230d3469faaSMark Maybee /* 1231d3469faaSMark Maybee * This is only a guess -- if the dbuf is dirty 1232d3469faaSMark Maybee * in a previous txg, we don't know how much 1233d3469faaSMark Maybee * space it will use on disk yet. We should 1234d3469faaSMark Maybee * really have the struct_rwlock to access 1235d3469faaSMark Maybee * db_blkptr, but since this is just a guess, 1236d3469faaSMark Maybee * it's OK if we get an odd answer. 1237d3469faaSMark Maybee */ 1238837b568bSGeorge Wilson ddt_prefetch(os->os_spa, bp); 1239d3469faaSMark Maybee dnode_willuse_space(dn, -willfree, tx); 1240fa9e4066Sahrens } 1241fa9e4066Sahrens 1242fa9e4066Sahrens if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) { 1243fa9e4066Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 1244fa9e4066Sahrens drop_struct_lock = TRUE; 1245fa9e4066Sahrens } 1246fa9e4066Sahrens 12478346f03fSJonathan W Adams if (db->db_level == 0) { 12488346f03fSJonathan W Adams dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock); 12498346f03fSJonathan W Adams ASSERT(dn->dn_maxblkid >= db->db_blkid); 12508346f03fSJonathan W Adams } 12518346f03fSJonathan W Adams 125244eda4d7Smaybee if (db->db_level+1 < dn->dn_nlevels) { 1253c717a561Smaybee dmu_buf_impl_t *parent = db->db_parent; 1254c717a561Smaybee dbuf_dirty_record_t *di; 1255c717a561Smaybee int parent_held = FALSE; 1256c717a561Smaybee 1257c717a561Smaybee if (db->db_parent == NULL || db->db_parent == dn->dn_dbuf) { 1258fa9e4066Sahrens int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1259c717a561Smaybee 1260fa9e4066Sahrens parent = dbuf_hold_level(dn, db->db_level+1, 1261fa9e4066Sahrens db->db_blkid >> epbs, FTAG); 126201025c89SJohn Harres ASSERT(parent != NULL); 1263c717a561Smaybee parent_held = TRUE; 1264c717a561Smaybee } 1265fa9e4066Sahrens if (drop_struct_lock) 1266fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1267c717a561Smaybee ASSERT3U(db->db_level+1, ==, parent->db_level); 1268c717a561Smaybee di = dbuf_dirty(parent, tx); 1269c717a561Smaybee if (parent_held) 1270ea8dc4b6Seschrock dbuf_rele(parent, FTAG); 1271c717a561Smaybee 1272c717a561Smaybee mutex_enter(&db->db_mtx); 127369962b56SMatthew Ahrens /* 127469962b56SMatthew Ahrens * Since we've dropped the mutex, it's possible that 127569962b56SMatthew Ahrens * dbuf_undirty() might have changed this out from under us. 127669962b56SMatthew Ahrens */ 1277c717a561Smaybee if (db->db_last_dirty == dr || 1278c717a561Smaybee dn->dn_object == DMU_META_DNODE_OBJECT) { 1279c717a561Smaybee mutex_enter(&di->dt.di.dr_mtx); 1280c717a561Smaybee ASSERT3U(di->dr_txg, ==, tx->tx_txg); 1281c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1282c717a561Smaybee list_insert_tail(&di->dt.di.dr_children, dr); 1283c717a561Smaybee mutex_exit(&di->dt.di.dr_mtx); 1284c717a561Smaybee dr->dr_parent = di; 1285c717a561Smaybee } 1286c717a561Smaybee mutex_exit(&db->db_mtx); 1287fa9e4066Sahrens } else { 1288c717a561Smaybee ASSERT(db->db_level+1 == dn->dn_nlevels); 1289c717a561Smaybee ASSERT(db->db_blkid < dn->dn_nblkptr); 1290744947dcSTom Erickson ASSERT(db->db_parent == NULL || db->db_parent == dn->dn_dbuf); 1291c717a561Smaybee mutex_enter(&dn->dn_mtx); 1292c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 1293c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txgoff], dr); 1294c717a561Smaybee mutex_exit(&dn->dn_mtx); 1295fa9e4066Sahrens if (drop_struct_lock) 1296fa9e4066Sahrens rw_exit(&dn->dn_struct_rwlock); 1297fa9e4066Sahrens } 1298fa9e4066Sahrens 1299fa9e4066Sahrens dnode_setdirty(dn, tx); 1300744947dcSTom Erickson DB_DNODE_EXIT(db); 1301c717a561Smaybee return (dr); 1302fa9e4066Sahrens } 1303fa9e4066Sahrens 13043b2aab18SMatthew Ahrens /* 13053e30c24aSWill Andrews * Undirty a buffer in the transaction group referenced by the given 13063e30c24aSWill Andrews * transaction. Return whether this evicted the dbuf. 13073b2aab18SMatthew Ahrens */ 13083b2aab18SMatthew Ahrens static boolean_t 1309fa9e4066Sahrens dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx) 1310fa9e4066Sahrens { 1311744947dcSTom Erickson dnode_t *dn; 1312c717a561Smaybee uint64_t txg = tx->tx_txg; 131317f17c2dSbonwick dbuf_dirty_record_t *dr, **drp; 1314fa9e4066Sahrens 1315c717a561Smaybee ASSERT(txg != 0); 13160a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 13173b2aab18SMatthew Ahrens ASSERT0(db->db_level); 13183b2aab18SMatthew Ahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 1319fa9e4066Sahrens 1320fa9e4066Sahrens /* 1321fa9e4066Sahrens * If this buffer is not dirty, we're done. 1322fa9e4066Sahrens */ 132317f17c2dSbonwick for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next) 1324c717a561Smaybee if (dr->dr_txg <= txg) 1325c717a561Smaybee break; 13263b2aab18SMatthew Ahrens if (dr == NULL || dr->dr_txg < txg) 13273b2aab18SMatthew Ahrens return (B_FALSE); 1328c717a561Smaybee ASSERT(dr->dr_txg == txg); 1329b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 1330fa9e4066Sahrens 1331744947dcSTom Erickson DB_DNODE_ENTER(db); 1332744947dcSTom Erickson dn = DB_DNODE(db); 1333744947dcSTom Erickson 1334fa9e4066Sahrens dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size); 1335fa9e4066Sahrens 1336fa9e4066Sahrens ASSERT(db->db.db_size != 0); 1337fa9e4066Sahrens 133869962b56SMatthew Ahrens /* 133969962b56SMatthew Ahrens * Any space we accounted for in dp_dirty_* will be cleaned up by 134069962b56SMatthew Ahrens * dsl_pool_sync(). This is relatively rare so the discrepancy 134169962b56SMatthew Ahrens * is not a big deal. 134269962b56SMatthew Ahrens */ 1343fa9e4066Sahrens 134417f17c2dSbonwick *drp = dr->dr_next; 1345c717a561Smaybee 13463f2366c2SGordon Ross /* 13473f2366c2SGordon Ross * Note that there are three places in dbuf_dirty() 13483f2366c2SGordon Ross * where this dirty record may be put on a list. 13493f2366c2SGordon Ross * Make sure to do a list_remove corresponding to 13503f2366c2SGordon Ross * every one of those list_insert calls. 13513f2366c2SGordon Ross */ 1352c717a561Smaybee if (dr->dr_parent) { 1353c717a561Smaybee mutex_enter(&dr->dr_parent->dt.di.dr_mtx); 1354c717a561Smaybee list_remove(&dr->dr_parent->dt.di.dr_children, dr); 1355c717a561Smaybee mutex_exit(&dr->dr_parent->dt.di.dr_mtx); 13563f2366c2SGordon Ross } else if (db->db_blkid == DMU_SPILL_BLKID || 13573f2366c2SGordon Ross db->db_level+1 == dn->dn_nlevels) { 1358cdb0ab79Smaybee ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf); 1359fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 1360c717a561Smaybee list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr); 1361fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 1362c717a561Smaybee } 1363744947dcSTom Erickson DB_DNODE_EXIT(db); 1364c717a561Smaybee 136582c9918fSTim Haley if (db->db_state != DB_NOFILL) { 1366c717a561Smaybee dbuf_unoverride(dr); 1367c717a561Smaybee 1368c717a561Smaybee ASSERT(db->db_buf != NULL); 1369c717a561Smaybee ASSERT(dr->dt.dl.dr_data != NULL); 1370c717a561Smaybee if (dr->dt.dl.dr_data != db->db_buf) 13713b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db)); 1372c717a561Smaybee } 1373d2b3cbbdSJorgen Lundman 1374d2b3cbbdSJorgen Lundman if (db->db_level != 0) { 1375d2b3cbbdSJorgen Lundman mutex_destroy(&dr->dt.di.dr_mtx); 1376d2b3cbbdSJorgen Lundman list_destroy(&dr->dt.di.dr_children); 1377d2b3cbbdSJorgen Lundman } 1378d2b3cbbdSJorgen Lundman 1379c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 1380fa9e4066Sahrens 1381fa9e4066Sahrens ASSERT(db->db_dirtycnt > 0); 1382fa9e4066Sahrens db->db_dirtycnt -= 1; 1383fa9e4066Sahrens 1384c717a561Smaybee if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) { 1385ea8dc4b6Seschrock arc_buf_t *buf = db->db_buf; 1386fa9e4066Sahrens 1387b24ab676SJeff Bonwick ASSERT(db->db_state == DB_NOFILL || arc_released(buf)); 1388ea8dc4b6Seschrock dbuf_set_data(db, NULL); 13893b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 1390fa9e4066Sahrens dbuf_evict(db); 13913b2aab18SMatthew Ahrens return (B_TRUE); 1392fa9e4066Sahrens } 1393fa9e4066Sahrens 13943b2aab18SMatthew Ahrens return (B_FALSE); 1395fa9e4066Sahrens } 1396fa9e4066Sahrens 1397fa9e4066Sahrens void 139843466aaeSMax Grossman dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx) 1399fa9e4066Sahrens { 140043466aaeSMax Grossman dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 14011ab7f2deSmaybee int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH; 1402fa9e4066Sahrens 1403fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1404fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 1405fa9e4066Sahrens 1406744947dcSTom Erickson DB_DNODE_ENTER(db); 1407744947dcSTom Erickson if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock)) 1408fa9e4066Sahrens rf |= DB_RF_HAVESTRUCT; 1409744947dcSTom Erickson DB_DNODE_EXIT(db); 1410ea8dc4b6Seschrock (void) dbuf_read(db, NULL, rf); 1411c717a561Smaybee (void) dbuf_dirty(db, tx); 1412fa9e4066Sahrens } 1413fa9e4066Sahrens 1414fa9e4066Sahrens void 141582c9918fSTim Haley dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) 141682c9918fSTim Haley { 141782c9918fSTim Haley dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 141882c9918fSTim Haley 141982c9918fSTim Haley db->db_state = DB_NOFILL; 142082c9918fSTim Haley 142182c9918fSTim Haley dmu_buf_will_fill(db_fake, tx); 142282c9918fSTim Haley } 142382c9918fSTim Haley 142482c9918fSTim Haley void 1425ea8dc4b6Seschrock dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) 1426fa9e4066Sahrens { 1427ea8dc4b6Seschrock dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 1428ea8dc4b6Seschrock 14290a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 1430fa9e4066Sahrens ASSERT(tx->tx_txg != 0); 1431fa9e4066Sahrens ASSERT(db->db_level == 0); 1432fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 1433fa9e4066Sahrens 1434ea8dc4b6Seschrock ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT || 1435fa9e4066Sahrens dmu_tx_private_ok(tx)); 1436fa9e4066Sahrens 1437fa9e4066Sahrens dbuf_noread(db); 1438c717a561Smaybee (void) dbuf_dirty(db, tx); 1439fa9e4066Sahrens } 1440fa9e4066Sahrens 1441fa9e4066Sahrens #pragma weak dmu_buf_fill_done = dbuf_fill_done 1442fa9e4066Sahrens /* ARGSUSED */ 1443fa9e4066Sahrens void 1444fa9e4066Sahrens dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx) 1445fa9e4066Sahrens { 1446fa9e4066Sahrens mutex_enter(&db->db_mtx); 14479c9dc39aSek110237 DBUF_VERIFY(db); 1448fa9e4066Sahrens 1449fa9e4066Sahrens if (db->db_state == DB_FILL) { 1450c717a561Smaybee if (db->db_level == 0 && db->db_freed_in_flight) { 14510a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 1452fa9e4066Sahrens /* we were freed while filling */ 1453fa9e4066Sahrens /* XXX dbuf_undirty? */ 1454fa9e4066Sahrens bzero(db->db.db_data, db->db.db_size); 1455c717a561Smaybee db->db_freed_in_flight = FALSE; 1456fa9e4066Sahrens } 1457fa9e4066Sahrens db->db_state = DB_CACHED; 1458fa9e4066Sahrens cv_broadcast(&db->db_changed); 1459fa9e4066Sahrens } 1460fa9e4066Sahrens mutex_exit(&db->db_mtx); 1461fa9e4066Sahrens } 1462fa9e4066Sahrens 14635d7b4d43SMatthew Ahrens void 14645d7b4d43SMatthew Ahrens dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data, 14655d7b4d43SMatthew Ahrens bp_embedded_type_t etype, enum zio_compress comp, 14665d7b4d43SMatthew Ahrens int uncompressed_size, int compressed_size, int byteorder, 14675d7b4d43SMatthew Ahrens dmu_tx_t *tx) 14685d7b4d43SMatthew Ahrens { 14695d7b4d43SMatthew Ahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 14705d7b4d43SMatthew Ahrens struct dirty_leaf *dl; 14715d7b4d43SMatthew Ahrens dmu_object_type_t type; 14725d7b4d43SMatthew Ahrens 14735d7b4d43SMatthew Ahrens DB_DNODE_ENTER(db); 14745d7b4d43SMatthew Ahrens type = DB_DNODE(db)->dn_type; 14755d7b4d43SMatthew Ahrens DB_DNODE_EXIT(db); 14765d7b4d43SMatthew Ahrens 14775d7b4d43SMatthew Ahrens ASSERT0(db->db_level); 14785d7b4d43SMatthew Ahrens ASSERT(db->db_blkid != DMU_BONUS_BLKID); 14795d7b4d43SMatthew Ahrens 14805d7b4d43SMatthew Ahrens dmu_buf_will_not_fill(dbuf, tx); 14815d7b4d43SMatthew Ahrens 14825d7b4d43SMatthew Ahrens ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg); 14835d7b4d43SMatthew Ahrens dl = &db->db_last_dirty->dt.dl; 14845d7b4d43SMatthew Ahrens encode_embedded_bp_compressed(&dl->dr_overridden_by, 14855d7b4d43SMatthew Ahrens data, comp, uncompressed_size, compressed_size); 14865d7b4d43SMatthew Ahrens BPE_SET_ETYPE(&dl->dr_overridden_by, etype); 14875d7b4d43SMatthew Ahrens BP_SET_TYPE(&dl->dr_overridden_by, type); 14885d7b4d43SMatthew Ahrens BP_SET_LEVEL(&dl->dr_overridden_by, 0); 14895d7b4d43SMatthew Ahrens BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder); 14905d7b4d43SMatthew Ahrens 14915d7b4d43SMatthew Ahrens dl->dr_override_state = DR_OVERRIDDEN; 14925d7b4d43SMatthew Ahrens dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg; 14935d7b4d43SMatthew Ahrens } 14945d7b4d43SMatthew Ahrens 1495ea8dc4b6Seschrock /* 14962fdbea25SAleksandr Guzovskiy * Directly assign a provided arc buf to a given dbuf if it's not referenced 14972fdbea25SAleksandr Guzovskiy * by anybody except our caller. Otherwise copy arcbuf's contents to dbuf. 14982fdbea25SAleksandr Guzovskiy */ 14992fdbea25SAleksandr Guzovskiy void 15002fdbea25SAleksandr Guzovskiy dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) 15012fdbea25SAleksandr Guzovskiy { 15022fdbea25SAleksandr Guzovskiy ASSERT(!refcount_is_zero(&db->db_holds)); 15030a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 15042fdbea25SAleksandr Guzovskiy ASSERT(db->db_level == 0); 15052fdbea25SAleksandr Guzovskiy ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA); 15062fdbea25SAleksandr Guzovskiy ASSERT(buf != NULL); 15072fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(buf) == db->db.db_size); 15082fdbea25SAleksandr Guzovskiy ASSERT(tx->tx_txg != 0); 15092fdbea25SAleksandr Guzovskiy 15102fdbea25SAleksandr Guzovskiy arc_return_buf(buf, db); 15112fdbea25SAleksandr Guzovskiy ASSERT(arc_released(buf)); 15122fdbea25SAleksandr Guzovskiy 15132fdbea25SAleksandr Guzovskiy mutex_enter(&db->db_mtx); 15142fdbea25SAleksandr Guzovskiy 15152fdbea25SAleksandr Guzovskiy while (db->db_state == DB_READ || db->db_state == DB_FILL) 15162fdbea25SAleksandr Guzovskiy cv_wait(&db->db_changed, &db->db_mtx); 15172fdbea25SAleksandr Guzovskiy 15182fdbea25SAleksandr Guzovskiy ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); 15192fdbea25SAleksandr Guzovskiy 15202fdbea25SAleksandr Guzovskiy if (db->db_state == DB_CACHED && 15212fdbea25SAleksandr Guzovskiy refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { 15222fdbea25SAleksandr Guzovskiy mutex_exit(&db->db_mtx); 15232fdbea25SAleksandr Guzovskiy (void) dbuf_dirty(db, tx); 15242fdbea25SAleksandr Guzovskiy bcopy(buf->b_data, db->db.db_data, db->db.db_size); 15253b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 1526c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 15272fdbea25SAleksandr Guzovskiy return; 15282fdbea25SAleksandr Guzovskiy } 15292fdbea25SAleksandr Guzovskiy 1530c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_nocopy(); 15312fdbea25SAleksandr Guzovskiy if (db->db_state == DB_CACHED) { 15322fdbea25SAleksandr Guzovskiy dbuf_dirty_record_t *dr = db->db_last_dirty; 15332fdbea25SAleksandr Guzovskiy 15342fdbea25SAleksandr Guzovskiy ASSERT(db->db_buf != NULL); 15352fdbea25SAleksandr Guzovskiy if (dr != NULL && dr->dr_txg == tx->tx_txg) { 15362fdbea25SAleksandr Guzovskiy ASSERT(dr->dt.dl.dr_data == db->db_buf); 15372fdbea25SAleksandr Guzovskiy if (!arc_released(db->db_buf)) { 15382fdbea25SAleksandr Guzovskiy ASSERT(dr->dt.dl.dr_override_state == 15392fdbea25SAleksandr Guzovskiy DR_OVERRIDDEN); 15402fdbea25SAleksandr Guzovskiy arc_release(db->db_buf, db); 15412fdbea25SAleksandr Guzovskiy } 15422fdbea25SAleksandr Guzovskiy dr->dt.dl.dr_data = buf; 15433b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(db->db_buf, db)); 15442fdbea25SAleksandr Guzovskiy } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) { 15452fdbea25SAleksandr Guzovskiy arc_release(db->db_buf, db); 15463b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(db->db_buf, db)); 15472fdbea25SAleksandr Guzovskiy } 15482fdbea25SAleksandr Guzovskiy db->db_buf = NULL; 15492fdbea25SAleksandr Guzovskiy } 15502fdbea25SAleksandr Guzovskiy ASSERT(db->db_buf == NULL); 15512fdbea25SAleksandr Guzovskiy dbuf_set_data(db, buf); 15522fdbea25SAleksandr Guzovskiy db->db_state = DB_FILL; 15532fdbea25SAleksandr Guzovskiy mutex_exit(&db->db_mtx); 15542fdbea25SAleksandr Guzovskiy (void) dbuf_dirty(db, tx); 155543466aaeSMax Grossman dmu_buf_fill_done(&db->db, tx); 15562fdbea25SAleksandr Guzovskiy } 15572fdbea25SAleksandr Guzovskiy 15582fdbea25SAleksandr Guzovskiy /* 1559ea8dc4b6Seschrock * "Clear" the contents of this dbuf. This will mark the dbuf 156069962b56SMatthew Ahrens * EVICTING and clear *most* of its references. Unfortunately, 1561ea8dc4b6Seschrock * when we are not holding the dn_dbufs_mtx, we can't clear the 1562ea8dc4b6Seschrock * entry in the dn_dbufs list. We have to wait until dbuf_destroy() 1563ea8dc4b6Seschrock * in this case. For callers from the DMU we will usually see: 1564bbfa8ea8SMatthew Ahrens * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy() 1565ea8dc4b6Seschrock * For the arc callback, we will usually see: 1566ea8dc4b6Seschrock * dbuf_do_evict()->dbuf_clear();dbuf_destroy() 1567ea8dc4b6Seschrock * Sometimes, though, we will get a mix of these two: 1568bbfa8ea8SMatthew Ahrens * DMU: dbuf_clear()->arc_clear_callback() 1569ea8dc4b6Seschrock * ARC: dbuf_do_evict()->dbuf_destroy() 1570bbfa8ea8SMatthew Ahrens * 1571bbfa8ea8SMatthew Ahrens * This routine will dissociate the dbuf from the arc, by calling 1572bbfa8ea8SMatthew Ahrens * arc_clear_callback(), but will not evict the data from the ARC. 1573ea8dc4b6Seschrock */ 1574ea8dc4b6Seschrock void 1575fa9e4066Sahrens dbuf_clear(dmu_buf_impl_t *db) 1576fa9e4066Sahrens { 1577744947dcSTom Erickson dnode_t *dn; 1578ea8dc4b6Seschrock dmu_buf_impl_t *parent = db->db_parent; 1579744947dcSTom Erickson dmu_buf_impl_t *dndb; 1580bbfa8ea8SMatthew Ahrens boolean_t dbuf_gone = B_FALSE; 1581fa9e4066Sahrens 1582fa9e4066Sahrens ASSERT(MUTEX_HELD(&db->db_mtx)); 1583fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 1584fa9e4066Sahrens 1585ea8dc4b6Seschrock dbuf_evict_user(db); 1586ea8dc4b6Seschrock 1587fa9e4066Sahrens if (db->db_state == DB_CACHED) { 1588ea8dc4b6Seschrock ASSERT(db->db.db_data != NULL); 15890a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 1590ea8dc4b6Seschrock zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN); 15915a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 15920e8c6158Smaybee } 1593fa9e4066Sahrens db->db.db_data = NULL; 1594fa9e4066Sahrens db->db_state = DB_UNCACHED; 1595fa9e4066Sahrens } 1596fa9e4066Sahrens 159782c9918fSTim Haley ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL); 1598fa9e4066Sahrens ASSERT(db->db_data_pending == NULL); 1599fa9e4066Sahrens 1600ea8dc4b6Seschrock db->db_state = DB_EVICTING; 1601ea8dc4b6Seschrock db->db_blkptr = NULL; 1602ea8dc4b6Seschrock 1603744947dcSTom Erickson DB_DNODE_ENTER(db); 1604744947dcSTom Erickson dn = DB_DNODE(db); 1605744947dcSTom Erickson dndb = dn->dn_dbuf; 16060a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) { 16070f6d88adSAlex Reece avl_remove(&dn->dn_dbufs, db); 1608744947dcSTom Erickson (void) atomic_dec_32_nv(&dn->dn_dbufs_count); 1609744947dcSTom Erickson membar_producer(); 1610744947dcSTom Erickson DB_DNODE_EXIT(db); 1611744947dcSTom Erickson /* 1612744947dcSTom Erickson * Decrementing the dbuf count means that the hold corresponding 1613744947dcSTom Erickson * to the removed dbuf is no longer discounted in dnode_move(), 1614744947dcSTom Erickson * so the dnode cannot be moved until after we release the hold. 1615744947dcSTom Erickson * The membar_producer() ensures visibility of the decremented 1616744947dcSTom Erickson * value in dnode_move(), since DB_DNODE_EXIT doesn't actually 1617744947dcSTom Erickson * release any lock. 1618744947dcSTom Erickson */ 1619ea8dc4b6Seschrock dnode_rele(dn, db); 1620744947dcSTom Erickson db->db_dnode_handle = NULL; 1621744947dcSTom Erickson } else { 1622744947dcSTom Erickson DB_DNODE_EXIT(db); 1623ea8dc4b6Seschrock } 1624ea8dc4b6Seschrock 1625ea8dc4b6Seschrock if (db->db_buf) 1626bbfa8ea8SMatthew Ahrens dbuf_gone = arc_clear_callback(db->db_buf); 1627ea8dc4b6Seschrock 1628ea8dc4b6Seschrock if (!dbuf_gone) 1629fa9e4066Sahrens mutex_exit(&db->db_mtx); 1630fa9e4066Sahrens 1631fa9e4066Sahrens /* 1632744947dcSTom Erickson * If this dbuf is referenced from an indirect dbuf, 1633fa9e4066Sahrens * decrement the ref count on the indirect dbuf. 1634fa9e4066Sahrens */ 1635c543ec06Sahrens if (parent && parent != dndb) 1636ea8dc4b6Seschrock dbuf_rele(parent, db); 1637fa9e4066Sahrens } 1638fa9e4066Sahrens 1639fa9e4066Sahrens static int 1640fa9e4066Sahrens dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse, 1641fa9e4066Sahrens dmu_buf_impl_t **parentp, blkptr_t **bpp) 1642fa9e4066Sahrens { 1643fa9e4066Sahrens int nlevels, epbs; 1644fa9e4066Sahrens 16450b69c2f0Sahrens *parentp = NULL; 16460b69c2f0Sahrens *bpp = NULL; 16470b69c2f0Sahrens 16480a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 16490a586ceaSMark Shellenbaum 16500a586ceaSMark Shellenbaum if (blkid == DMU_SPILL_BLKID) { 16510a586ceaSMark Shellenbaum mutex_enter(&dn->dn_mtx); 165206e0070dSMark Shellenbaum if (dn->dn_have_spill && 165306e0070dSMark Shellenbaum (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) 16540a586ceaSMark Shellenbaum *bpp = &dn->dn_phys->dn_spill; 16550a586ceaSMark Shellenbaum else 16560a586ceaSMark Shellenbaum *bpp = NULL; 16570a586ceaSMark Shellenbaum dbuf_add_ref(dn->dn_dbuf, NULL); 16580a586ceaSMark Shellenbaum *parentp = dn->dn_dbuf; 16590a586ceaSMark Shellenbaum mutex_exit(&dn->dn_mtx); 16600a586ceaSMark Shellenbaum return (0); 16610a586ceaSMark Shellenbaum } 1662ea8dc4b6Seschrock 1663fa9e4066Sahrens if (dn->dn_phys->dn_nlevels == 0) 1664fa9e4066Sahrens nlevels = 1; 1665fa9e4066Sahrens else 1666fa9e4066Sahrens nlevels = dn->dn_phys->dn_nlevels; 1667fa9e4066Sahrens 1668fa9e4066Sahrens epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT; 1669fa9e4066Sahrens 1670fa9e4066Sahrens ASSERT3U(level * epbs, <, 64); 1671fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1672ea8dc4b6Seschrock if (level >= nlevels || 1673fa9e4066Sahrens (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) { 1674fa9e4066Sahrens /* the buffer has no parent yet */ 1675be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1676fa9e4066Sahrens } else if (level < nlevels-1) { 1677fa9e4066Sahrens /* this block is referenced from an indirect block */ 1678fa9e4066Sahrens int err = dbuf_hold_impl(dn, level+1, 1679fa9e4066Sahrens blkid >> epbs, fail_sparse, NULL, parentp); 1680fa9e4066Sahrens if (err) 1681fa9e4066Sahrens return (err); 1682ea8dc4b6Seschrock err = dbuf_read(*parentp, NULL, 1683ea8dc4b6Seschrock (DB_RF_HAVESTRUCT | DB_RF_NOPREFETCH | DB_RF_CANFAIL)); 1684c543ec06Sahrens if (err) { 1685c543ec06Sahrens dbuf_rele(*parentp, NULL); 1686c543ec06Sahrens *parentp = NULL; 1687c543ec06Sahrens return (err); 1688c543ec06Sahrens } 1689fa9e4066Sahrens *bpp = ((blkptr_t *)(*parentp)->db.db_data) + 1690fa9e4066Sahrens (blkid & ((1ULL << epbs) - 1)); 1691c543ec06Sahrens return (0); 1692fa9e4066Sahrens } else { 1693fa9e4066Sahrens /* the block is referenced from the dnode */ 1694fa9e4066Sahrens ASSERT3U(level, ==, nlevels-1); 1695fa9e4066Sahrens ASSERT(dn->dn_phys->dn_nblkptr == 0 || 1696fa9e4066Sahrens blkid < dn->dn_phys->dn_nblkptr); 1697c543ec06Sahrens if (dn->dn_dbuf) { 1698c543ec06Sahrens dbuf_add_ref(dn->dn_dbuf, NULL); 1699fa9e4066Sahrens *parentp = dn->dn_dbuf; 1700c543ec06Sahrens } 1701fa9e4066Sahrens *bpp = &dn->dn_phys->dn_blkptr[blkid]; 1702fa9e4066Sahrens return (0); 1703fa9e4066Sahrens } 1704fa9e4066Sahrens } 1705fa9e4066Sahrens 1706fa9e4066Sahrens static dmu_buf_impl_t * 1707fa9e4066Sahrens dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, 1708fa9e4066Sahrens dmu_buf_impl_t *parent, blkptr_t *blkptr) 1709fa9e4066Sahrens { 1710503ad85cSMatthew Ahrens objset_t *os = dn->dn_objset; 1711fa9e4066Sahrens dmu_buf_impl_t *db, *odb; 1712fa9e4066Sahrens 1713fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1714fa9e4066Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 1715fa9e4066Sahrens 1716fa9e4066Sahrens db = kmem_cache_alloc(dbuf_cache, KM_SLEEP); 1717fa9e4066Sahrens 1718fa9e4066Sahrens db->db_objset = os; 1719fa9e4066Sahrens db->db.db_object = dn->dn_object; 1720fa9e4066Sahrens db->db_level = level; 1721fa9e4066Sahrens db->db_blkid = blkid; 1722c717a561Smaybee db->db_last_dirty = NULL; 1723ea8dc4b6Seschrock db->db_dirtycnt = 0; 1724744947dcSTom Erickson db->db_dnode_handle = dn->dn_handle; 1725ea8dc4b6Seschrock db->db_parent = parent; 1726ea8dc4b6Seschrock db->db_blkptr = blkptr; 1727fa9e4066Sahrens 1728c717a561Smaybee db->db_user_ptr = NULL; 1729c717a561Smaybee db->db_user_data_ptr_ptr = NULL; 1730c717a561Smaybee db->db_evict_func = NULL; 1731c717a561Smaybee db->db_immediate_evict = 0; 1732c717a561Smaybee db->db_freed_in_flight = 0; 1733ea8dc4b6Seschrock 17340a586ceaSMark Shellenbaum if (blkid == DMU_BONUS_BLKID) { 1735ea8dc4b6Seschrock ASSERT3P(parent, ==, dn->dn_dbuf); 17361934e92fSmaybee db->db.db_size = DN_MAX_BONUSLEN - 17371934e92fSmaybee (dn->dn_nblkptr-1) * sizeof (blkptr_t); 17381934e92fSmaybee ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen); 17390a586ceaSMark Shellenbaum db->db.db_offset = DMU_BONUS_BLKID; 1740ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 1741ea8dc4b6Seschrock /* the bonus dbuf is not placed in the hash table */ 17425a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1743ea8dc4b6Seschrock return (db); 17440a586ceaSMark Shellenbaum } else if (blkid == DMU_SPILL_BLKID) { 17450a586ceaSMark Shellenbaum db->db.db_size = (blkptr != NULL) ? 17460a586ceaSMark Shellenbaum BP_GET_LSIZE(blkptr) : SPA_MINBLOCKSIZE; 17470a586ceaSMark Shellenbaum db->db.db_offset = 0; 1748fa9e4066Sahrens } else { 1749fa9e4066Sahrens int blocksize = 1750fa9e4066Sahrens db->db_level ? 1 << dn->dn_indblkshift : dn->dn_datablksz; 1751fa9e4066Sahrens db->db.db_size = blocksize; 1752fa9e4066Sahrens db->db.db_offset = db->db_blkid * blocksize; 1753fa9e4066Sahrens } 1754fa9e4066Sahrens 1755fa9e4066Sahrens /* 1756fa9e4066Sahrens * Hold the dn_dbufs_mtx while we get the new dbuf 1757fa9e4066Sahrens * in the hash table *and* added to the dbufs list. 1758fa9e4066Sahrens * This prevents a possible deadlock with someone 1759fa9e4066Sahrens * trying to look up this dbuf before its added to the 1760fa9e4066Sahrens * dn_dbufs list. 1761fa9e4066Sahrens */ 1762fa9e4066Sahrens mutex_enter(&dn->dn_dbufs_mtx); 1763ea8dc4b6Seschrock db->db_state = DB_EVICTING; 1764fa9e4066Sahrens if ((odb = dbuf_hash_insert(db)) != NULL) { 1765fa9e4066Sahrens /* someone else inserted it first */ 1766fa9e4066Sahrens kmem_cache_free(dbuf_cache, db); 1767fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 1768fa9e4066Sahrens return (odb); 1769fa9e4066Sahrens } 17700f6d88adSAlex Reece avl_add(&dn->dn_dbufs, db); 1771713d6c20SMatthew Ahrens if (db->db_level == 0 && db->db_blkid >= 1772713d6c20SMatthew Ahrens dn->dn_unlisted_l0_blkid) 1773713d6c20SMatthew Ahrens dn->dn_unlisted_l0_blkid = db->db_blkid + 1; 1774ea8dc4b6Seschrock db->db_state = DB_UNCACHED; 1775fa9e4066Sahrens mutex_exit(&dn->dn_dbufs_mtx); 17765a98e54bSBrendan Gregg - Sun Microsystems arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1777fa9e4066Sahrens 1778fa9e4066Sahrens if (parent && parent != dn->dn_dbuf) 1779fa9e4066Sahrens dbuf_add_ref(parent, db); 1780fa9e4066Sahrens 1781ea8dc4b6Seschrock ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT || 1782ea8dc4b6Seschrock refcount_count(&dn->dn_holds) > 0); 1783fa9e4066Sahrens (void) refcount_add(&dn->dn_holds, db); 1784744947dcSTom Erickson (void) atomic_inc_32_nv(&dn->dn_dbufs_count); 1785fa9e4066Sahrens 1786fa9e4066Sahrens dprintf_dbuf(db, "db=%p\n", db); 1787fa9e4066Sahrens 1788fa9e4066Sahrens return (db); 1789fa9e4066Sahrens } 1790fa9e4066Sahrens 1791fa9e4066Sahrens static int 1792ea8dc4b6Seschrock dbuf_do_evict(void *private) 1793fa9e4066Sahrens { 1794bbfa8ea8SMatthew Ahrens dmu_buf_impl_t *db = private; 1795fa9e4066Sahrens 1796ea8dc4b6Seschrock if (!MUTEX_HELD(&db->db_mtx)) 1797ea8dc4b6Seschrock mutex_enter(&db->db_mtx); 1798ea8dc4b6Seschrock 1799ea8dc4b6Seschrock ASSERT(refcount_is_zero(&db->db_holds)); 1800ea8dc4b6Seschrock 1801ea8dc4b6Seschrock if (db->db_state != DB_EVICTING) { 1802ea8dc4b6Seschrock ASSERT(db->db_state == DB_CACHED); 18039c9dc39aSek110237 DBUF_VERIFY(db); 1804ea8dc4b6Seschrock db->db_buf = NULL; 1805ea8dc4b6Seschrock dbuf_evict(db); 1806ea8dc4b6Seschrock } else { 1807ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 1808ea8dc4b6Seschrock dbuf_destroy(db); 1809fa9e4066Sahrens } 1810ea8dc4b6Seschrock return (0); 1811fa9e4066Sahrens } 1812fa9e4066Sahrens 1813fa9e4066Sahrens static void 1814fa9e4066Sahrens dbuf_destroy(dmu_buf_impl_t *db) 1815fa9e4066Sahrens { 1816fa9e4066Sahrens ASSERT(refcount_is_zero(&db->db_holds)); 1817fa9e4066Sahrens 18180a586ceaSMark Shellenbaum if (db->db_blkid != DMU_BONUS_BLKID) { 1819ea8dc4b6Seschrock /* 1820ea8dc4b6Seschrock * If this dbuf is still on the dn_dbufs list, 1821ea8dc4b6Seschrock * remove it from that list. 1822ea8dc4b6Seschrock */ 1823744947dcSTom Erickson if (db->db_dnode_handle != NULL) { 1824744947dcSTom Erickson dnode_t *dn; 18251934e92fSmaybee 1826744947dcSTom Erickson DB_DNODE_ENTER(db); 1827744947dcSTom Erickson dn = DB_DNODE(db); 18281934e92fSmaybee mutex_enter(&dn->dn_dbufs_mtx); 18290f6d88adSAlex Reece avl_remove(&dn->dn_dbufs, db); 1830744947dcSTom Erickson (void) atomic_dec_32_nv(&dn->dn_dbufs_count); 1831ea8dc4b6Seschrock mutex_exit(&dn->dn_dbufs_mtx); 1832744947dcSTom Erickson DB_DNODE_EXIT(db); 1833744947dcSTom Erickson /* 1834744947dcSTom Erickson * Decrementing the dbuf count means that the hold 1835744947dcSTom Erickson * corresponding to the removed dbuf is no longer 1836744947dcSTom Erickson * discounted in dnode_move(), so the dnode cannot be 1837744947dcSTom Erickson * moved until after we release the hold. 1838744947dcSTom Erickson */ 1839ea8dc4b6Seschrock dnode_rele(dn, db); 1840744947dcSTom Erickson db->db_dnode_handle = NULL; 1841ea8dc4b6Seschrock } 1842ea8dc4b6Seschrock dbuf_hash_remove(db); 1843ea8dc4b6Seschrock } 1844ea8dc4b6Seschrock db->db_parent = NULL; 1845ea8dc4b6Seschrock db->db_buf = NULL; 1846ea8dc4b6Seschrock 1847fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 1848fa9e4066Sahrens ASSERT(db->db_hash_next == NULL); 1849fa9e4066Sahrens ASSERT(db->db_blkptr == NULL); 1850fa9e4066Sahrens ASSERT(db->db_data_pending == NULL); 1851fa9e4066Sahrens 1852fa9e4066Sahrens kmem_cache_free(dbuf_cache, db); 18535a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER); 1854fa9e4066Sahrens } 1855fa9e4066Sahrens 1856fa9e4066Sahrens void 185769962b56SMatthew Ahrens dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio) 1858fa9e4066Sahrens { 185913506d1eSmaybee dmu_buf_impl_t *db = NULL; 1860fa9e4066Sahrens blkptr_t *bp = NULL; 1861fa9e4066Sahrens 18620a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 1863fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1864fa9e4066Sahrens 1865fa9e4066Sahrens if (dnode_block_freed(dn, blkid)) 1866fa9e4066Sahrens return; 1867fa9e4066Sahrens 1868fa9e4066Sahrens /* dbuf_find() returns with db_mtx held */ 1869fa9e4066Sahrens if (db = dbuf_find(dn, 0, blkid)) { 1870fa9e4066Sahrens /* 18719e9c486fSGeorge Wilson * This dbuf is already in the cache. We assume that 18729e9c486fSGeorge Wilson * it is already CACHED, or else about to be either 1873fa9e4066Sahrens * read or filled. 1874fa9e4066Sahrens */ 1875fa9e4066Sahrens mutex_exit(&db->db_mtx); 1876fa9e4066Sahrens return; 1877fa9e4066Sahrens } 1878fa9e4066Sahrens 187913506d1eSmaybee if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) { 18805d7b4d43SMatthew Ahrens if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) { 1881b24ab676SJeff Bonwick dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 188213506d1eSmaybee uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; 18837802d7bfSMatthew Ahrens zbookmark_phys_t zb; 1884b24ab676SJeff Bonwick 1885b24ab676SJeff Bonwick SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, 1886b24ab676SJeff Bonwick dn->dn_object, 0, blkid); 1887ea8dc4b6Seschrock 18881b912ec7SGeorge Wilson (void) arc_read(NULL, dn->dn_objset->os_spa, 188969962b56SMatthew Ahrens bp, NULL, NULL, prio, 1890fa9e4066Sahrens ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, 189113506d1eSmaybee &aflags, &zb); 1892fa9e4066Sahrens } 189313506d1eSmaybee if (db) 189413506d1eSmaybee dbuf_rele(db, NULL); 1895fa9e4066Sahrens } 1896fa9e4066Sahrens } 1897fa9e4066Sahrens 1898fa9e4066Sahrens /* 1899fa9e4066Sahrens * Returns with db_holds incremented, and db_mtx not held. 1900fa9e4066Sahrens * Note: dn_struct_rwlock must be held. 1901fa9e4066Sahrens */ 1902fa9e4066Sahrens int 1903fa9e4066Sahrens dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse, 1904fa9e4066Sahrens void *tag, dmu_buf_impl_t **dbp) 1905fa9e4066Sahrens { 1906fa9e4066Sahrens dmu_buf_impl_t *db, *parent = NULL; 1907fa9e4066Sahrens 19080a586ceaSMark Shellenbaum ASSERT(blkid != DMU_BONUS_BLKID); 1909fa9e4066Sahrens ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock)); 1910fa9e4066Sahrens ASSERT3U(dn->dn_nlevels, >, level); 1911fa9e4066Sahrens 1912fa9e4066Sahrens *dbp = NULL; 1913ea8dc4b6Seschrock top: 1914fa9e4066Sahrens /* dbuf_find() returns with db_mtx held */ 1915fa9e4066Sahrens db = dbuf_find(dn, level, blkid); 1916fa9e4066Sahrens 1917fa9e4066Sahrens if (db == NULL) { 1918fa9e4066Sahrens blkptr_t *bp = NULL; 1919fa9e4066Sahrens int err; 1920fa9e4066Sahrens 1921c543ec06Sahrens ASSERT3P(parent, ==, NULL); 1922fa9e4066Sahrens err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp); 1923fa9e4066Sahrens if (fail_sparse) { 1924fa9e4066Sahrens if (err == 0 && bp && BP_IS_HOLE(bp)) 1925be6fd75aSMatthew Ahrens err = SET_ERROR(ENOENT); 1926fa9e4066Sahrens if (err) { 1927c543ec06Sahrens if (parent) 1928ea8dc4b6Seschrock dbuf_rele(parent, NULL); 1929fa9e4066Sahrens return (err); 1930fa9e4066Sahrens } 1931fa9e4066Sahrens } 1932ea8dc4b6Seschrock if (err && err != ENOENT) 1933ea8dc4b6Seschrock return (err); 1934fa9e4066Sahrens db = dbuf_create(dn, level, blkid, parent, bp); 1935fa9e4066Sahrens } 1936fa9e4066Sahrens 1937ea8dc4b6Seschrock if (db->db_buf && refcount_is_zero(&db->db_holds)) { 1938ea8dc4b6Seschrock arc_buf_add_ref(db->db_buf, db); 1939ea8dc4b6Seschrock if (db->db_buf->b_data == NULL) { 1940ea8dc4b6Seschrock dbuf_clear(db); 1941c543ec06Sahrens if (parent) { 1942c543ec06Sahrens dbuf_rele(parent, NULL); 1943c543ec06Sahrens parent = NULL; 1944c543ec06Sahrens } 1945ea8dc4b6Seschrock goto top; 1946ea8dc4b6Seschrock } 1947ea8dc4b6Seschrock ASSERT3P(db->db.db_data, ==, db->db_buf->b_data); 1948ea8dc4b6Seschrock } 1949ea8dc4b6Seschrock 1950ea8dc4b6Seschrock ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf)); 1951ea8dc4b6Seschrock 1952fa9e4066Sahrens /* 1953c717a561Smaybee * If this buffer is currently syncing out, and we are are 1954c717a561Smaybee * still referencing it from db_data, we need to make a copy 1955c717a561Smaybee * of it in case we decide we want to dirty it again in this txg. 1956fa9e4066Sahrens */ 19570a586ceaSMark Shellenbaum if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID && 1958ea8dc4b6Seschrock dn->dn_object != DMU_META_DNODE_OBJECT && 1959c717a561Smaybee db->db_state == DB_CACHED && db->db_data_pending) { 1960c717a561Smaybee dbuf_dirty_record_t *dr = db->db_data_pending; 1961c717a561Smaybee 1962c717a561Smaybee if (dr->dt.dl.dr_data == db->db_buf) { 1963ad23a2dbSjohansen arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 1964fa9e4066Sahrens 1965c717a561Smaybee dbuf_set_data(db, 1966744947dcSTom Erickson arc_buf_alloc(dn->dn_objset->os_spa, 1967c717a561Smaybee db->db.db_size, db, type)); 1968c717a561Smaybee bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data, 1969fa9e4066Sahrens db->db.db_size); 1970fa9e4066Sahrens } 1971c717a561Smaybee } 1972fa9e4066Sahrens 1973ea8dc4b6Seschrock (void) refcount_add(&db->db_holds, tag); 1974fa9e4066Sahrens dbuf_update_data(db); 19759c9dc39aSek110237 DBUF_VERIFY(db); 1976fa9e4066Sahrens mutex_exit(&db->db_mtx); 1977fa9e4066Sahrens 1978fa9e4066Sahrens /* NOTE: we can't rele the parent until after we drop the db_mtx */ 1979c543ec06Sahrens if (parent) 1980ea8dc4b6Seschrock dbuf_rele(parent, NULL); 1981fa9e4066Sahrens 1982744947dcSTom Erickson ASSERT3P(DB_DNODE(db), ==, dn); 1983fa9e4066Sahrens ASSERT3U(db->db_blkid, ==, blkid); 1984fa9e4066Sahrens ASSERT3U(db->db_level, ==, level); 1985fa9e4066Sahrens *dbp = db; 1986fa9e4066Sahrens 1987fa9e4066Sahrens return (0); 1988fa9e4066Sahrens } 1989fa9e4066Sahrens 1990fa9e4066Sahrens dmu_buf_impl_t * 1991ea8dc4b6Seschrock dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag) 1992fa9e4066Sahrens { 1993fa9e4066Sahrens dmu_buf_impl_t *db; 1994ea8dc4b6Seschrock int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db); 1995ea8dc4b6Seschrock return (err ? NULL : db); 1996fa9e4066Sahrens } 1997fa9e4066Sahrens 1998fa9e4066Sahrens dmu_buf_impl_t * 1999fa9e4066Sahrens dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag) 2000fa9e4066Sahrens { 2001fa9e4066Sahrens dmu_buf_impl_t *db; 2002ea8dc4b6Seschrock int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db); 2003ea8dc4b6Seschrock return (err ? NULL : db); 2004fa9e4066Sahrens } 2005fa9e4066Sahrens 20061934e92fSmaybee void 2007ea8dc4b6Seschrock dbuf_create_bonus(dnode_t *dn) 2008fa9e4066Sahrens { 2009ea8dc4b6Seschrock ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 2010ea8dc4b6Seschrock 2011ea8dc4b6Seschrock ASSERT(dn->dn_bonus == NULL); 20120a586ceaSMark Shellenbaum dn->dn_bonus = dbuf_create(dn, 0, DMU_BONUS_BLKID, dn->dn_dbuf, NULL); 20130a586ceaSMark Shellenbaum } 20140a586ceaSMark Shellenbaum 20150a586ceaSMark Shellenbaum int 20160a586ceaSMark Shellenbaum dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx) 20170a586ceaSMark Shellenbaum { 20180a586ceaSMark Shellenbaum dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2019744947dcSTom Erickson dnode_t *dn; 2020744947dcSTom Erickson 20210a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 2022be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTSUP)); 20230a586ceaSMark Shellenbaum if (blksz == 0) 20240a586ceaSMark Shellenbaum blksz = SPA_MINBLOCKSIZE; 20250a586ceaSMark Shellenbaum if (blksz > SPA_MAXBLOCKSIZE) 20260a586ceaSMark Shellenbaum blksz = SPA_MAXBLOCKSIZE; 20270a586ceaSMark Shellenbaum else 20280a586ceaSMark Shellenbaum blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE); 20290a586ceaSMark Shellenbaum 2030744947dcSTom Erickson DB_DNODE_ENTER(db); 2031744947dcSTom Erickson dn = DB_DNODE(db); 2032744947dcSTom Erickson rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 20330a586ceaSMark Shellenbaum dbuf_new_size(db, blksz, tx); 2034744947dcSTom Erickson rw_exit(&dn->dn_struct_rwlock); 2035744947dcSTom Erickson DB_DNODE_EXIT(db); 20360a586ceaSMark Shellenbaum 20370a586ceaSMark Shellenbaum return (0); 20380a586ceaSMark Shellenbaum } 20390a586ceaSMark Shellenbaum 20400a586ceaSMark Shellenbaum void 20410a586ceaSMark Shellenbaum dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx) 20420a586ceaSMark Shellenbaum { 20430a586ceaSMark Shellenbaum dbuf_free_range(dn, DMU_SPILL_BLKID, DMU_SPILL_BLKID, tx); 2044fa9e4066Sahrens } 2045fa9e4066Sahrens 2046ea8dc4b6Seschrock #pragma weak dmu_buf_add_ref = dbuf_add_ref 2047fa9e4066Sahrens void 2048fa9e4066Sahrens dbuf_add_ref(dmu_buf_impl_t *db, void *tag) 2049fa9e4066Sahrens { 2050ea8dc4b6Seschrock int64_t holds = refcount_add(&db->db_holds, tag); 2051ea8dc4b6Seschrock ASSERT(holds > 1); 2052fa9e4066Sahrens } 2053fa9e4066Sahrens 2054744947dcSTom Erickson /* 2055744947dcSTom Erickson * If you call dbuf_rele() you had better not be referencing the dnode handle 2056744947dcSTom Erickson * unless you have some other direct or indirect hold on the dnode. (An indirect 2057744947dcSTom Erickson * hold is a hold on one of the dnode's dbufs, including the bonus buffer.) 2058744947dcSTom Erickson * Without that, the dbuf_rele() could lead to a dnode_rele() followed by the 2059744947dcSTom Erickson * dnode's parent dbuf evicting its dnode handles. 2060744947dcSTom Erickson */ 2061fa9e4066Sahrens void 2062ea8dc4b6Seschrock dbuf_rele(dmu_buf_impl_t *db, void *tag) 2063fa9e4066Sahrens { 2064b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2065b24ab676SJeff Bonwick dbuf_rele_and_unlock(db, tag); 2066b24ab676SJeff Bonwick } 2067b24ab676SJeff Bonwick 206843466aaeSMax Grossman void 206943466aaeSMax Grossman dmu_buf_rele(dmu_buf_t *db, void *tag) 207043466aaeSMax Grossman { 207143466aaeSMax Grossman dbuf_rele((dmu_buf_impl_t *)db, tag); 207243466aaeSMax Grossman } 207343466aaeSMax Grossman 2074b24ab676SJeff Bonwick /* 2075b24ab676SJeff Bonwick * dbuf_rele() for an already-locked dbuf. This is necessary to allow 2076b24ab676SJeff Bonwick * db_dirtycnt and db_holds to be updated atomically. 2077b24ab676SJeff Bonwick */ 2078b24ab676SJeff Bonwick void 2079b24ab676SJeff Bonwick dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) 2080b24ab676SJeff Bonwick { 2081fa9e4066Sahrens int64_t holds; 2082fa9e4066Sahrens 2083b24ab676SJeff Bonwick ASSERT(MUTEX_HELD(&db->db_mtx)); 20849c9dc39aSek110237 DBUF_VERIFY(db); 2085fa9e4066Sahrens 2086744947dcSTom Erickson /* 2087744947dcSTom Erickson * Remove the reference to the dbuf before removing its hold on the 2088744947dcSTom Erickson * dnode so we can guarantee in dnode_move() that a referenced bonus 2089744947dcSTom Erickson * buffer has a corresponding dnode hold. 2090744947dcSTom Erickson */ 2091fa9e4066Sahrens holds = refcount_remove(&db->db_holds, tag); 2092ea8dc4b6Seschrock ASSERT(holds >= 0); 2093fa9e4066Sahrens 2094c717a561Smaybee /* 2095c717a561Smaybee * We can't freeze indirects if there is a possibility that they 2096c717a561Smaybee * may be modified in the current syncing context. 2097c717a561Smaybee */ 2098c717a561Smaybee if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) 20996b4acc8bSahrens arc_buf_freeze(db->db_buf); 21006b4acc8bSahrens 2101fa9e4066Sahrens if (holds == db->db_dirtycnt && 2102c717a561Smaybee db->db_level == 0 && db->db_immediate_evict) 2103fa9e4066Sahrens dbuf_evict_user(db); 2104ea8dc4b6Seschrock 2105ea8dc4b6Seschrock if (holds == 0) { 21060a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 2107ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 2108744947dcSTom Erickson 2109744947dcSTom Erickson /* 2110744947dcSTom Erickson * If the dnode moves here, we cannot cross this barrier 2111744947dcSTom Erickson * until the move completes. 2112744947dcSTom Erickson */ 2113744947dcSTom Erickson DB_DNODE_ENTER(db); 2114744947dcSTom Erickson (void) atomic_dec_32_nv(&DB_DNODE(db)->dn_dbufs_count); 2115744947dcSTom Erickson DB_DNODE_EXIT(db); 2116744947dcSTom Erickson /* 2117744947dcSTom Erickson * The bonus buffer's dnode hold is no longer discounted 2118744947dcSTom Erickson * in dnode_move(). The dnode cannot move until after 2119744947dcSTom Erickson * the dnode_rele(). 2120744947dcSTom Erickson */ 2121744947dcSTom Erickson dnode_rele(DB_DNODE(db), db); 2122ea8dc4b6Seschrock } else if (db->db_buf == NULL) { 2123ea8dc4b6Seschrock /* 2124ea8dc4b6Seschrock * This is a special case: we never associated this 2125ea8dc4b6Seschrock * dbuf with any data allocated from the ARC. 2126ea8dc4b6Seschrock */ 212782c9918fSTim Haley ASSERT(db->db_state == DB_UNCACHED || 212882c9918fSTim Haley db->db_state == DB_NOFILL); 2129ea8dc4b6Seschrock dbuf_evict(db); 2130ea8dc4b6Seschrock } else if (arc_released(db->db_buf)) { 2131ea8dc4b6Seschrock arc_buf_t *buf = db->db_buf; 2132ea8dc4b6Seschrock /* 2133ea8dc4b6Seschrock * This dbuf has anonymous data associated with it. 2134ea8dc4b6Seschrock */ 2135ea8dc4b6Seschrock dbuf_set_data(db, NULL); 21363b2aab18SMatthew Ahrens VERIFY(arc_buf_remove_ref(buf, db)); 2137ea8dc4b6Seschrock dbuf_evict(db); 2138ea8dc4b6Seschrock } else { 21393b2aab18SMatthew Ahrens VERIFY(!arc_buf_remove_ref(db->db_buf, db)); 21409253d63dSGeorge Wilson 21419253d63dSGeorge Wilson /* 21429253d63dSGeorge Wilson * A dbuf will be eligible for eviction if either the 21439253d63dSGeorge Wilson * 'primarycache' property is set or a duplicate 21449253d63dSGeorge Wilson * copy of this buffer is already cached in the arc. 21459253d63dSGeorge Wilson * 21469253d63dSGeorge Wilson * In the case of the 'primarycache' a buffer 21479253d63dSGeorge Wilson * is considered for eviction if it matches the 21489253d63dSGeorge Wilson * criteria set in the property. 21499253d63dSGeorge Wilson * 21509253d63dSGeorge Wilson * To decide if our buffer is considered a 21519253d63dSGeorge Wilson * duplicate, we must call into the arc to determine 21529253d63dSGeorge Wilson * if multiple buffers are referencing the same 21539253d63dSGeorge Wilson * block on-disk. If so, then we simply evict 21549253d63dSGeorge Wilson * ourselves. 21559253d63dSGeorge Wilson */ 2156bbfa8ea8SMatthew Ahrens if (!DBUF_IS_CACHEABLE(db)) { 2157bbfa8ea8SMatthew Ahrens if (db->db_blkptr != NULL && 2158bbfa8ea8SMatthew Ahrens !BP_IS_HOLE(db->db_blkptr) && 2159bbfa8ea8SMatthew Ahrens !BP_IS_EMBEDDED(db->db_blkptr)) { 2160bbfa8ea8SMatthew Ahrens spa_t *spa = 2161bbfa8ea8SMatthew Ahrens dmu_objset_spa(db->db_objset); 2162bbfa8ea8SMatthew Ahrens blkptr_t bp = *db->db_blkptr; 21633baa08fcSek110237 dbuf_clear(db); 2164bbfa8ea8SMatthew Ahrens arc_freed(spa, &bp); 2165bbfa8ea8SMatthew Ahrens } else { 2166bbfa8ea8SMatthew Ahrens dbuf_clear(db); 2167bbfa8ea8SMatthew Ahrens } 2168bbfa8ea8SMatthew Ahrens } else if (arc_buf_eviction_needed(db->db_buf)) { 2169bbfa8ea8SMatthew Ahrens dbuf_clear(db); 2170bbfa8ea8SMatthew Ahrens } else { 2171fa9e4066Sahrens mutex_exit(&db->db_mtx); 2172fa9e4066Sahrens } 2173bbfa8ea8SMatthew Ahrens } 2174ea8dc4b6Seschrock } else { 2175ea8dc4b6Seschrock mutex_exit(&db->db_mtx); 2176fa9e4066Sahrens } 2177fa9e4066Sahrens } 2178fa9e4066Sahrens 2179fa9e4066Sahrens #pragma weak dmu_buf_refcount = dbuf_refcount 2180fa9e4066Sahrens uint64_t 2181fa9e4066Sahrens dbuf_refcount(dmu_buf_impl_t *db) 2182fa9e4066Sahrens { 2183fa9e4066Sahrens return (refcount_count(&db->db_holds)); 2184fa9e4066Sahrens } 2185fa9e4066Sahrens 2186fa9e4066Sahrens void * 2187fa9e4066Sahrens dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, 2188fa9e4066Sahrens dmu_buf_evict_func_t *evict_func) 2189fa9e4066Sahrens { 2190fa9e4066Sahrens return (dmu_buf_update_user(db_fake, NULL, user_ptr, 2191fa9e4066Sahrens user_data_ptr_ptr, evict_func)); 2192fa9e4066Sahrens } 2193fa9e4066Sahrens 2194fa9e4066Sahrens void * 2195fa9e4066Sahrens dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr, 2196fa9e4066Sahrens dmu_buf_evict_func_t *evict_func) 2197fa9e4066Sahrens { 2198fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2199fa9e4066Sahrens 2200c717a561Smaybee db->db_immediate_evict = TRUE; 2201fa9e4066Sahrens return (dmu_buf_update_user(db_fake, NULL, user_ptr, 2202fa9e4066Sahrens user_data_ptr_ptr, evict_func)); 2203fa9e4066Sahrens } 2204fa9e4066Sahrens 2205fa9e4066Sahrens void * 2206fa9e4066Sahrens dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr, 2207fa9e4066Sahrens void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func) 2208fa9e4066Sahrens { 2209fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2210fa9e4066Sahrens ASSERT(db->db_level == 0); 2211fa9e4066Sahrens 2212fa9e4066Sahrens ASSERT((user_ptr == NULL) == (evict_func == NULL)); 2213fa9e4066Sahrens 2214fa9e4066Sahrens mutex_enter(&db->db_mtx); 2215fa9e4066Sahrens 2216c717a561Smaybee if (db->db_user_ptr == old_user_ptr) { 2217c717a561Smaybee db->db_user_ptr = user_ptr; 2218c717a561Smaybee db->db_user_data_ptr_ptr = user_data_ptr_ptr; 2219c717a561Smaybee db->db_evict_func = evict_func; 2220fa9e4066Sahrens 2221fa9e4066Sahrens dbuf_update_data(db); 2222fa9e4066Sahrens } else { 2223c717a561Smaybee old_user_ptr = db->db_user_ptr; 2224fa9e4066Sahrens } 2225fa9e4066Sahrens 2226fa9e4066Sahrens mutex_exit(&db->db_mtx); 2227fa9e4066Sahrens return (old_user_ptr); 2228fa9e4066Sahrens } 2229fa9e4066Sahrens 2230fa9e4066Sahrens void * 2231fa9e4066Sahrens dmu_buf_get_user(dmu_buf_t *db_fake) 2232fa9e4066Sahrens { 2233fa9e4066Sahrens dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; 2234fa9e4066Sahrens ASSERT(!refcount_is_zero(&db->db_holds)); 2235fa9e4066Sahrens 2236c717a561Smaybee return (db->db_user_ptr); 2237fa9e4066Sahrens } 2238fa9e4066Sahrens 22393d692628SSanjeev Bagewadi boolean_t 22403d692628SSanjeev Bagewadi dmu_buf_freeable(dmu_buf_t *dbuf) 22413d692628SSanjeev Bagewadi { 22423d692628SSanjeev Bagewadi boolean_t res = B_FALSE; 22433d692628SSanjeev Bagewadi dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; 22443d692628SSanjeev Bagewadi 22453d692628SSanjeev Bagewadi if (db->db_blkptr) 22463d692628SSanjeev Bagewadi res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset, 2247c7cd2421SGeorge Wilson db->db_blkptr, db->db_blkptr->blk_birth); 22483d692628SSanjeev Bagewadi 22493d692628SSanjeev Bagewadi return (res); 22503d692628SSanjeev Bagewadi } 22513d692628SSanjeev Bagewadi 225280901aeaSGeorge Wilson blkptr_t * 225380901aeaSGeorge Wilson dmu_buf_get_blkptr(dmu_buf_t *db) 225480901aeaSGeorge Wilson { 225580901aeaSGeorge Wilson dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db; 225680901aeaSGeorge Wilson return (dbi->db_blkptr); 225780901aeaSGeorge Wilson } 225880901aeaSGeorge Wilson 2259c717a561Smaybee static void 2260c717a561Smaybee dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db) 2261fa9e4066Sahrens { 2262c717a561Smaybee /* ASSERT(dmu_tx_is_syncing(tx) */ 2263c717a561Smaybee ASSERT(MUTEX_HELD(&db->db_mtx)); 2264c717a561Smaybee 2265c717a561Smaybee if (db->db_blkptr != NULL) 2266c717a561Smaybee return; 2267c717a561Smaybee 22680a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 22690a586ceaSMark Shellenbaum db->db_blkptr = &dn->dn_phys->dn_spill; 22700a586ceaSMark Shellenbaum BP_ZERO(db->db_blkptr); 22710a586ceaSMark Shellenbaum return; 22720a586ceaSMark Shellenbaum } 2273c717a561Smaybee if (db->db_level == dn->dn_phys->dn_nlevels-1) { 2274c717a561Smaybee /* 2275c717a561Smaybee * This buffer was allocated at a time when there was 2276c717a561Smaybee * no available blkptrs from the dnode, or it was 2277c717a561Smaybee * inappropriate to hook it in (i.e., nlevels mis-match). 2278c717a561Smaybee */ 2279c717a561Smaybee ASSERT(db->db_blkid < dn->dn_phys->dn_nblkptr); 2280c717a561Smaybee ASSERT(db->db_parent == NULL); 2281c717a561Smaybee db->db_parent = dn->dn_dbuf; 2282c717a561Smaybee db->db_blkptr = &dn->dn_phys->dn_blkptr[db->db_blkid]; 2283c717a561Smaybee DBUF_VERIFY(db); 2284c717a561Smaybee } else { 2285c717a561Smaybee dmu_buf_impl_t *parent = db->db_parent; 2286c717a561Smaybee int epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 2287c717a561Smaybee 2288c717a561Smaybee ASSERT(dn->dn_phys->dn_nlevels > 1); 2289c717a561Smaybee if (parent == NULL) { 2290c717a561Smaybee mutex_exit(&db->db_mtx); 2291c717a561Smaybee rw_enter(&dn->dn_struct_rwlock, RW_READER); 2292c717a561Smaybee (void) dbuf_hold_impl(dn, db->db_level+1, 2293c717a561Smaybee db->db_blkid >> epbs, FALSE, db, &parent); 2294c717a561Smaybee rw_exit(&dn->dn_struct_rwlock); 2295c717a561Smaybee mutex_enter(&db->db_mtx); 2296c717a561Smaybee db->db_parent = parent; 2297c717a561Smaybee } 2298c717a561Smaybee db->db_blkptr = (blkptr_t *)parent->db.db_data + 2299c717a561Smaybee (db->db_blkid & ((1ULL << epbs) - 1)); 2300c717a561Smaybee DBUF_VERIFY(db); 2301c717a561Smaybee } 2302c717a561Smaybee } 2303c717a561Smaybee 2304c717a561Smaybee static void 2305c717a561Smaybee dbuf_sync_indirect(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 2306c717a561Smaybee { 2307c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 2308744947dcSTom Erickson dnode_t *dn; 2309c717a561Smaybee zio_t *zio; 2310c717a561Smaybee 2311c717a561Smaybee ASSERT(dmu_tx_is_syncing(tx)); 2312c717a561Smaybee 2313c717a561Smaybee dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 2314c717a561Smaybee 2315c717a561Smaybee mutex_enter(&db->db_mtx); 2316c717a561Smaybee 2317c717a561Smaybee ASSERT(db->db_level > 0); 2318c717a561Smaybee DBUF_VERIFY(db); 2319c717a561Smaybee 23203e30c24aSWill Andrews /* Read the block if it hasn't been read yet. */ 2321c717a561Smaybee if (db->db_buf == NULL) { 2322c717a561Smaybee mutex_exit(&db->db_mtx); 2323c717a561Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); 2324c717a561Smaybee mutex_enter(&db->db_mtx); 2325c717a561Smaybee } 2326c717a561Smaybee ASSERT3U(db->db_state, ==, DB_CACHED); 2327c717a561Smaybee ASSERT(db->db_buf != NULL); 2328c717a561Smaybee 2329744947dcSTom Erickson DB_DNODE_ENTER(db); 2330744947dcSTom Erickson dn = DB_DNODE(db); 23313e30c24aSWill Andrews /* Indirect block size must match what the dnode thinks it is. */ 2332744947dcSTom Erickson ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 2333c717a561Smaybee dbuf_check_blkptr(dn, db); 2334744947dcSTom Erickson DB_DNODE_EXIT(db); 2335c717a561Smaybee 23363e30c24aSWill Andrews /* Provide the pending dirty record to child dbufs */ 2337c717a561Smaybee db->db_data_pending = dr; 2338c717a561Smaybee 2339af2c4821Smaybee mutex_exit(&db->db_mtx); 2340088f3894Sahrens dbuf_write(dr, db->db_buf, tx); 2341c717a561Smaybee 2342c717a561Smaybee zio = dr->dr_zio; 2343c717a561Smaybee mutex_enter(&dr->dt.di.dr_mtx); 2344c717a561Smaybee dbuf_sync_list(&dr->dt.di.dr_children, tx); 2345c717a561Smaybee ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 2346c717a561Smaybee mutex_exit(&dr->dt.di.dr_mtx); 2347c717a561Smaybee zio_nowait(zio); 2348c717a561Smaybee } 2349c717a561Smaybee 2350c717a561Smaybee static void 2351c717a561Smaybee dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx) 2352c717a561Smaybee { 2353c717a561Smaybee arc_buf_t **datap = &dr->dt.dl.dr_data; 2354c717a561Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 2355744947dcSTom Erickson dnode_t *dn; 2356744947dcSTom Erickson objset_t *os; 2357c717a561Smaybee uint64_t txg = tx->tx_txg; 2358fa9e4066Sahrens 2359fa9e4066Sahrens ASSERT(dmu_tx_is_syncing(tx)); 2360fa9e4066Sahrens 2361fa9e4066Sahrens dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr); 2362fa9e4066Sahrens 2363fa9e4066Sahrens mutex_enter(&db->db_mtx); 2364fa9e4066Sahrens /* 2365fa9e4066Sahrens * To be synced, we must be dirtied. But we 2366fa9e4066Sahrens * might have been freed after the dirty. 2367fa9e4066Sahrens */ 2368fa9e4066Sahrens if (db->db_state == DB_UNCACHED) { 2369fa9e4066Sahrens /* This buffer has been freed since it was dirtied */ 2370fa9e4066Sahrens ASSERT(db->db.db_data == NULL); 2371fa9e4066Sahrens } else if (db->db_state == DB_FILL) { 2372fa9e4066Sahrens /* This buffer was freed and is now being re-filled */ 2373c717a561Smaybee ASSERT(db->db.db_data != dr->dt.dl.dr_data); 2374fa9e4066Sahrens } else { 237582c9918fSTim Haley ASSERT(db->db_state == DB_CACHED || db->db_state == DB_NOFILL); 2376fa9e4066Sahrens } 23779c9dc39aSek110237 DBUF_VERIFY(db); 2378fa9e4066Sahrens 2379744947dcSTom Erickson DB_DNODE_ENTER(db); 2380744947dcSTom Erickson dn = DB_DNODE(db); 2381744947dcSTom Erickson 23820a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 23830a586ceaSMark Shellenbaum mutex_enter(&dn->dn_mtx); 23840a586ceaSMark Shellenbaum dn->dn_phys->dn_flags |= DNODE_FLAG_SPILL_BLKPTR; 23850a586ceaSMark Shellenbaum mutex_exit(&dn->dn_mtx); 23860a586ceaSMark Shellenbaum } 23870a586ceaSMark Shellenbaum 2388fa9e4066Sahrens /* 2389c717a561Smaybee * If this is a bonus buffer, simply copy the bonus data into the 2390c717a561Smaybee * dnode. It will be written out when the dnode is synced (and it 2391c717a561Smaybee * will be synced, since it must have been dirty for dbuf_sync to 2392c717a561Smaybee * be called). 2393fa9e4066Sahrens */ 23940a586ceaSMark Shellenbaum if (db->db_blkid == DMU_BONUS_BLKID) { 2395c717a561Smaybee dbuf_dirty_record_t **drp; 23961934e92fSmaybee 2397ea8dc4b6Seschrock ASSERT(*datap != NULL); 2398fb09f5aaSMadhav Suresh ASSERT0(db->db_level); 2399ea8dc4b6Seschrock ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN); 2400ea8dc4b6Seschrock bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen); 2401744947dcSTom Erickson DB_DNODE_EXIT(db); 2402744947dcSTom Erickson 24030e8c6158Smaybee if (*datap != db->db.db_data) { 2404ea8dc4b6Seschrock zio_buf_free(*datap, DN_MAX_BONUSLEN); 24055a98e54bSBrendan Gregg - Sun Microsystems arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER); 24060e8c6158Smaybee } 2407ea8dc4b6Seschrock db->db_data_pending = NULL; 2408c717a561Smaybee drp = &db->db_last_dirty; 2409c717a561Smaybee while (*drp != dr) 2410c717a561Smaybee drp = &(*drp)->dr_next; 241117f17c2dSbonwick ASSERT(dr->dr_next == NULL); 2412b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 241317f17c2dSbonwick *drp = dr->dr_next; 2414c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2415ea8dc4b6Seschrock ASSERT(db->db_dirtycnt > 0); 2416ea8dc4b6Seschrock db->db_dirtycnt -= 1; 2417b24ab676SJeff Bonwick dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); 2418ea8dc4b6Seschrock return; 2419ea8dc4b6Seschrock } 2420ea8dc4b6Seschrock 2421744947dcSTom Erickson os = dn->dn_objset; 2422744947dcSTom Erickson 2423c5c6ffa0Smaybee /* 2424f82bfe17Sgw25295 * This function may have dropped the db_mtx lock allowing a dmu_sync 2425f82bfe17Sgw25295 * operation to sneak in. As a result, we need to ensure that we 2426f82bfe17Sgw25295 * don't check the dr_override_state until we have returned from 2427f82bfe17Sgw25295 * dbuf_check_blkptr. 2428f82bfe17Sgw25295 */ 2429f82bfe17Sgw25295 dbuf_check_blkptr(dn, db); 2430f82bfe17Sgw25295 2431f82bfe17Sgw25295 /* 2432744947dcSTom Erickson * If this buffer is in the middle of an immediate write, 2433c717a561Smaybee * wait for the synchronous IO to complete. 2434c5c6ffa0Smaybee */ 2435c717a561Smaybee while (dr->dt.dl.dr_override_state == DR_IN_DMU_SYNC) { 2436c5c6ffa0Smaybee ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 2437c5c6ffa0Smaybee cv_wait(&db->db_changed, &db->db_mtx); 2438c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state != DR_NOT_OVERRIDDEN); 2439c5c6ffa0Smaybee } 2440c717a561Smaybee 2441ab69d62fSMatthew Ahrens if (db->db_state != DB_NOFILL && 2442ab69d62fSMatthew Ahrens dn->dn_object != DMU_META_DNODE_OBJECT && 2443ab69d62fSMatthew Ahrens refcount_count(&db->db_holds) > 1 && 2444b24ab676SJeff Bonwick dr->dt.dl.dr_override_state != DR_OVERRIDDEN && 2445ab69d62fSMatthew Ahrens *datap == db->db_buf) { 2446fa9e4066Sahrens /* 244782c9918fSTim Haley * If this buffer is currently "in use" (i.e., there 244882c9918fSTim Haley * are active holds and db_data still references it), 244982c9918fSTim Haley * then make a copy before we start the write so that 245082c9918fSTim Haley * any modifications from the open txg will not leak 245182c9918fSTim Haley * into this write. 2452fa9e4066Sahrens * 245382c9918fSTim Haley * NOTE: this copy does not need to be made for 245482c9918fSTim Haley * objects only modified in the syncing context (e.g. 245582c9918fSTim Haley * DNONE_DNODE blocks). 2456fa9e4066Sahrens */ 2457ab69d62fSMatthew Ahrens int blksz = arc_buf_size(*datap); 2458ab69d62fSMatthew Ahrens arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db); 2459ab69d62fSMatthew Ahrens *datap = arc_buf_alloc(os->os_spa, blksz, db, type); 2460c717a561Smaybee bcopy(db->db.db_data, (*datap)->b_data, blksz); 2461fa9e4066Sahrens } 2462c717a561Smaybee db->db_data_pending = dr; 2463fa9e4066Sahrens 2464fa9e4066Sahrens mutex_exit(&db->db_mtx); 2465fa9e4066Sahrens 2466088f3894Sahrens dbuf_write(dr, *datap, tx); 2467c717a561Smaybee 2468c717a561Smaybee ASSERT(!list_link_active(&dr->dr_dirty_node)); 2469744947dcSTom Erickson if (dn->dn_object == DMU_META_DNODE_OBJECT) { 2470c717a561Smaybee list_insert_tail(&dn->dn_dirty_records[txg&TXG_MASK], dr); 2471744947dcSTom Erickson DB_DNODE_EXIT(db); 2472744947dcSTom Erickson } else { 2473744947dcSTom Erickson /* 2474744947dcSTom Erickson * Although zio_nowait() does not "wait for an IO", it does 2475744947dcSTom Erickson * initiate the IO. If this is an empty write it seems plausible 2476744947dcSTom Erickson * that the IO could actually be completed before the nowait 2477744947dcSTom Erickson * returns. We need to DB_DNODE_EXIT() first in case 2478744947dcSTom Erickson * zio_nowait() invalidates the dbuf. 2479744947dcSTom Erickson */ 2480744947dcSTom Erickson DB_DNODE_EXIT(db); 2481c717a561Smaybee zio_nowait(dr->dr_zio); 2482fa9e4066Sahrens } 2483744947dcSTom Erickson } 2484c717a561Smaybee 2485c717a561Smaybee void 2486c717a561Smaybee dbuf_sync_list(list_t *list, dmu_tx_t *tx) 2487c717a561Smaybee { 2488c717a561Smaybee dbuf_dirty_record_t *dr; 2489c717a561Smaybee 2490c717a561Smaybee while (dr = list_head(list)) { 2491c717a561Smaybee if (dr->dr_zio != NULL) { 2492c717a561Smaybee /* 2493c717a561Smaybee * If we find an already initialized zio then we 2494c717a561Smaybee * are processing the meta-dnode, and we have finished. 2495c717a561Smaybee * The dbufs for all dnodes are put back on the list 2496c717a561Smaybee * during processing, so that we can zio_wait() 2497c717a561Smaybee * these IOs after initiating all child IOs. 2498c717a561Smaybee */ 2499c717a561Smaybee ASSERT3U(dr->dr_dbuf->db.db_object, ==, 2500c717a561Smaybee DMU_META_DNODE_OBJECT); 2501c717a561Smaybee break; 2502fa9e4066Sahrens } 2503c717a561Smaybee list_remove(list, dr); 2504c717a561Smaybee if (dr->dr_dbuf->db_level > 0) 2505c717a561Smaybee dbuf_sync_indirect(dr, tx); 2506c717a561Smaybee else 2507c717a561Smaybee dbuf_sync_leaf(dr, tx); 2508c717a561Smaybee } 2509c717a561Smaybee } 2510c717a561Smaybee 2511fa9e4066Sahrens /* ARGSUSED */ 2512fa9e4066Sahrens static void 2513c717a561Smaybee dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb) 2514fa9e4066Sahrens { 2515fa9e4066Sahrens dmu_buf_impl_t *db = vdb; 2516744947dcSTom Erickson dnode_t *dn; 2517e14bb325SJeff Bonwick blkptr_t *bp = zio->io_bp; 2518c717a561Smaybee blkptr_t *bp_orig = &zio->io_bp_orig; 2519b24ab676SJeff Bonwick spa_t *spa = zio->io_spa; 2520b24ab676SJeff Bonwick int64_t delta; 2521fa9e4066Sahrens uint64_t fill = 0; 2522b24ab676SJeff Bonwick int i; 2523fa9e4066Sahrens 25245d7b4d43SMatthew Ahrens ASSERT3P(db->db_blkptr, ==, bp); 2525e14bb325SJeff Bonwick 2526744947dcSTom Erickson DB_DNODE_ENTER(db); 2527744947dcSTom Erickson dn = DB_DNODE(db); 2528b24ab676SJeff Bonwick delta = bp_get_dsize_sync(spa, bp) - bp_get_dsize_sync(spa, bp_orig); 2529b24ab676SJeff Bonwick dnode_diduse_space(dn, delta - zio->io_prev_space_delta); 2530b24ab676SJeff Bonwick zio->io_prev_space_delta = delta; 2531fa9e4066Sahrens 253243466aaeSMax Grossman if (bp->blk_birth != 0) { 25330a586ceaSMark Shellenbaum ASSERT((db->db_blkid != DMU_SPILL_BLKID && 25340a586ceaSMark Shellenbaum BP_GET_TYPE(bp) == dn->dn_type) || 25350a586ceaSMark Shellenbaum (db->db_blkid == DMU_SPILL_BLKID && 25365d7b4d43SMatthew Ahrens BP_GET_TYPE(bp) == dn->dn_bonustype) || 25375d7b4d43SMatthew Ahrens BP_IS_EMBEDDED(bp)); 2538e14bb325SJeff Bonwick ASSERT(BP_GET_LEVEL(bp) == db->db_level); 253943466aaeSMax Grossman } 2540e14bb325SJeff Bonwick 2541fa9e4066Sahrens mutex_enter(&db->db_mtx); 2542fa9e4066Sahrens 25430a586ceaSMark Shellenbaum #ifdef ZFS_DEBUG 25440a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 25450a586ceaSMark Shellenbaum ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); 25460a586ceaSMark Shellenbaum ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && 25470a586ceaSMark Shellenbaum db->db_blkptr == &dn->dn_phys->dn_spill); 25480a586ceaSMark Shellenbaum } 25490a586ceaSMark Shellenbaum #endif 25500a586ceaSMark Shellenbaum 2551fa9e4066Sahrens if (db->db_level == 0) { 2552fa9e4066Sahrens mutex_enter(&dn->dn_mtx); 25530a586ceaSMark Shellenbaum if (db->db_blkid > dn->dn_phys->dn_maxblkid && 25540a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) 2555fa9e4066Sahrens dn->dn_phys->dn_maxblkid = db->db_blkid; 2556fa9e4066Sahrens mutex_exit(&dn->dn_mtx); 2557fa9e4066Sahrens 2558fa9e4066Sahrens if (dn->dn_type == DMU_OT_DNODE) { 2559fa9e4066Sahrens dnode_phys_t *dnp = db->db.db_data; 2560fa9e4066Sahrens for (i = db->db.db_size >> DNODE_SHIFT; i > 0; 2561fa9e4066Sahrens i--, dnp++) { 2562fa9e4066Sahrens if (dnp->dn_type != DMU_OT_NONE) 2563fa9e4066Sahrens fill++; 2564fa9e4066Sahrens } 2565fa9e4066Sahrens } else { 256643466aaeSMax Grossman if (BP_IS_HOLE(bp)) { 256743466aaeSMax Grossman fill = 0; 256843466aaeSMax Grossman } else { 2569fa9e4066Sahrens fill = 1; 2570fa9e4066Sahrens } 257143466aaeSMax Grossman } 2572fa9e4066Sahrens } else { 2573e14bb325SJeff Bonwick blkptr_t *ibp = db->db.db_data; 2574fa9e4066Sahrens ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift); 2575e14bb325SJeff Bonwick for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) { 2576e14bb325SJeff Bonwick if (BP_IS_HOLE(ibp)) 2577fa9e4066Sahrens continue; 25785d7b4d43SMatthew Ahrens fill += BP_GET_FILL(ibp); 2579fa9e4066Sahrens } 2580fa9e4066Sahrens } 2581744947dcSTom Erickson DB_DNODE_EXIT(db); 2582fa9e4066Sahrens 25835d7b4d43SMatthew Ahrens if (!BP_IS_EMBEDDED(bp)) 2584e14bb325SJeff Bonwick bp->blk_fill = fill; 2585fa9e4066Sahrens 2586fa9e4066Sahrens mutex_exit(&db->db_mtx); 2587fa9e4066Sahrens } 2588fa9e4066Sahrens 258969962b56SMatthew Ahrens /* 259069962b56SMatthew Ahrens * The SPA will call this callback several times for each zio - once 259169962b56SMatthew Ahrens * for every physical child i/o (zio->io_phys_children times). This 259269962b56SMatthew Ahrens * allows the DMU to monitor the progress of each logical i/o. For example, 259369962b56SMatthew Ahrens * there may be 2 copies of an indirect block, or many fragments of a RAID-Z 259469962b56SMatthew Ahrens * block. There may be a long delay before all copies/fragments are completed, 259569962b56SMatthew Ahrens * so this callback allows us to retire dirty space gradually, as the physical 259669962b56SMatthew Ahrens * i/os complete. 259769962b56SMatthew Ahrens */ 259869962b56SMatthew Ahrens /* ARGSUSED */ 259969962b56SMatthew Ahrens static void 260069962b56SMatthew Ahrens dbuf_write_physdone(zio_t *zio, arc_buf_t *buf, void *arg) 260169962b56SMatthew Ahrens { 260269962b56SMatthew Ahrens dmu_buf_impl_t *db = arg; 260369962b56SMatthew Ahrens objset_t *os = db->db_objset; 260469962b56SMatthew Ahrens dsl_pool_t *dp = dmu_objset_pool(os); 260569962b56SMatthew Ahrens dbuf_dirty_record_t *dr; 260669962b56SMatthew Ahrens int delta = 0; 260769962b56SMatthew Ahrens 260869962b56SMatthew Ahrens dr = db->db_data_pending; 260969962b56SMatthew Ahrens ASSERT3U(dr->dr_txg, ==, zio->io_txg); 261069962b56SMatthew Ahrens 261169962b56SMatthew Ahrens /* 261269962b56SMatthew Ahrens * The callback will be called io_phys_children times. Retire one 261369962b56SMatthew Ahrens * portion of our dirty space each time we are called. Any rounding 261469962b56SMatthew Ahrens * error will be cleaned up by dsl_pool_sync()'s call to 261569962b56SMatthew Ahrens * dsl_pool_undirty_space(). 261669962b56SMatthew Ahrens */ 261769962b56SMatthew Ahrens delta = dr->dr_accounted / zio->io_phys_children; 261869962b56SMatthew Ahrens dsl_pool_undirty_space(dp, delta, zio->io_txg); 261969962b56SMatthew Ahrens } 262069962b56SMatthew Ahrens 2621c717a561Smaybee /* ARGSUSED */ 2622c717a561Smaybee static void 2623c717a561Smaybee dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb) 2624c717a561Smaybee { 2625c717a561Smaybee dmu_buf_impl_t *db = vdb; 2626b24ab676SJeff Bonwick blkptr_t *bp_orig = &zio->io_bp_orig; 262743466aaeSMax Grossman blkptr_t *bp = db->db_blkptr; 262843466aaeSMax Grossman objset_t *os = db->db_objset; 262943466aaeSMax Grossman dmu_tx_t *tx = os->os_synctx; 2630c717a561Smaybee dbuf_dirty_record_t **drp, *dr; 2631c717a561Smaybee 2632fb09f5aaSMadhav Suresh ASSERT0(zio->io_error); 2633b24ab676SJeff Bonwick ASSERT(db->db_blkptr == bp); 2634b24ab676SJeff Bonwick 263580901aeaSGeorge Wilson /* 263680901aeaSGeorge Wilson * For nopwrites and rewrites we ensure that the bp matches our 263780901aeaSGeorge Wilson * original and bypass all the accounting. 263880901aeaSGeorge Wilson */ 263980901aeaSGeorge Wilson if (zio->io_flags & (ZIO_FLAG_IO_REWRITE | ZIO_FLAG_NOPWRITE)) { 2640b24ab676SJeff Bonwick ASSERT(BP_EQUAL(bp, bp_orig)); 2641b24ab676SJeff Bonwick } else { 264243466aaeSMax Grossman dsl_dataset_t *ds = os->os_dsl_dataset; 2643b24ab676SJeff Bonwick (void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE); 2644b24ab676SJeff Bonwick dsl_dataset_block_born(ds, bp, tx); 2645b24ab676SJeff Bonwick } 2646c717a561Smaybee 2647c717a561Smaybee mutex_enter(&db->db_mtx); 2648c717a561Smaybee 2649b24ab676SJeff Bonwick DBUF_VERIFY(db); 2650b24ab676SJeff Bonwick 2651c717a561Smaybee drp = &db->db_last_dirty; 265217f17c2dSbonwick while ((dr = *drp) != db->db_data_pending) 265317f17c2dSbonwick drp = &dr->dr_next; 265417f17c2dSbonwick ASSERT(!list_link_active(&dr->dr_dirty_node)); 2655b24ab676SJeff Bonwick ASSERT(dr->dr_dbuf == db); 265617f17c2dSbonwick ASSERT(dr->dr_next == NULL); 265717f17c2dSbonwick *drp = dr->dr_next; 2658c717a561Smaybee 26590a586ceaSMark Shellenbaum #ifdef ZFS_DEBUG 26600a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) { 2661744947dcSTom Erickson dnode_t *dn; 2662744947dcSTom Erickson 2663744947dcSTom Erickson DB_DNODE_ENTER(db); 2664744947dcSTom Erickson dn = DB_DNODE(db); 26650a586ceaSMark Shellenbaum ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR); 26660a586ceaSMark Shellenbaum ASSERT(!(BP_IS_HOLE(db->db_blkptr)) && 26670a586ceaSMark Shellenbaum db->db_blkptr == &dn->dn_phys->dn_spill); 2668744947dcSTom Erickson DB_DNODE_EXIT(db); 26690a586ceaSMark Shellenbaum } 26700a586ceaSMark Shellenbaum #endif 26710a586ceaSMark Shellenbaum 2672c717a561Smaybee if (db->db_level == 0) { 26730a586ceaSMark Shellenbaum ASSERT(db->db_blkid != DMU_BONUS_BLKID); 2674c717a561Smaybee ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN); 267582c9918fSTim Haley if (db->db_state != DB_NOFILL) { 2676c717a561Smaybee if (dr->dt.dl.dr_data != db->db_buf) 267782c9918fSTim Haley VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, 26783b2aab18SMatthew Ahrens db)); 2679b24ab676SJeff Bonwick else if (!arc_released(db->db_buf)) 2680c717a561Smaybee arc_set_callback(db->db_buf, dbuf_do_evict, db); 268182c9918fSTim Haley } 2682c717a561Smaybee } else { 2683744947dcSTom Erickson dnode_t *dn; 2684744947dcSTom Erickson 2685744947dcSTom Erickson DB_DNODE_ENTER(db); 2686744947dcSTom Erickson dn = DB_DNODE(db); 2687c717a561Smaybee ASSERT(list_head(&dr->dt.di.dr_children) == NULL); 2688c717a561Smaybee ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); 2689c717a561Smaybee if (!BP_IS_HOLE(db->db_blkptr)) { 2690c717a561Smaybee int epbs = 2691c717a561Smaybee dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 269243466aaeSMax Grossman ASSERT3U(db->db_blkid, <=, 269343466aaeSMax Grossman dn->dn_phys->dn_maxblkid >> (db->db_level * epbs)); 2694c717a561Smaybee ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==, 2695c717a561Smaybee db->db.db_size); 26965d7b4d43SMatthew Ahrens if (!arc_released(db->db_buf)) 2697c717a561Smaybee arc_set_callback(db->db_buf, dbuf_do_evict, db); 2698c717a561Smaybee } 2699744947dcSTom Erickson DB_DNODE_EXIT(db); 2700c25056deSgw25295 mutex_destroy(&dr->dt.di.dr_mtx); 2701c25056deSgw25295 list_destroy(&dr->dt.di.dr_children); 2702c717a561Smaybee } 2703c717a561Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 2704c717a561Smaybee 2705c717a561Smaybee cv_broadcast(&db->db_changed); 2706c717a561Smaybee ASSERT(db->db_dirtycnt > 0); 2707c717a561Smaybee db->db_dirtycnt -= 1; 2708c717a561Smaybee db->db_data_pending = NULL; 270943466aaeSMax Grossman dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg); 2710b24ab676SJeff Bonwick } 2711b24ab676SJeff Bonwick 2712b24ab676SJeff Bonwick static void 2713b24ab676SJeff Bonwick dbuf_write_nofill_ready(zio_t *zio) 2714b24ab676SJeff Bonwick { 2715b24ab676SJeff Bonwick dbuf_write_ready(zio, NULL, zio->io_private); 2716b24ab676SJeff Bonwick } 2717b24ab676SJeff Bonwick 2718b24ab676SJeff Bonwick static void 2719b24ab676SJeff Bonwick dbuf_write_nofill_done(zio_t *zio) 2720b24ab676SJeff Bonwick { 2721b24ab676SJeff Bonwick dbuf_write_done(zio, NULL, zio->io_private); 2722b24ab676SJeff Bonwick } 2723b24ab676SJeff Bonwick 2724b24ab676SJeff Bonwick static void 2725b24ab676SJeff Bonwick dbuf_write_override_ready(zio_t *zio) 2726b24ab676SJeff Bonwick { 2727b24ab676SJeff Bonwick dbuf_dirty_record_t *dr = zio->io_private; 2728b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2729b24ab676SJeff Bonwick 2730b24ab676SJeff Bonwick dbuf_write_ready(zio, NULL, db); 2731b24ab676SJeff Bonwick } 2732b24ab676SJeff Bonwick 2733b24ab676SJeff Bonwick static void 2734b24ab676SJeff Bonwick dbuf_write_override_done(zio_t *zio) 2735b24ab676SJeff Bonwick { 2736b24ab676SJeff Bonwick dbuf_dirty_record_t *dr = zio->io_private; 2737b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2738b24ab676SJeff Bonwick blkptr_t *obp = &dr->dt.dl.dr_overridden_by; 2739b24ab676SJeff Bonwick 2740b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2741b24ab676SJeff Bonwick if (!BP_EQUAL(zio->io_bp, obp)) { 2742b24ab676SJeff Bonwick if (!BP_IS_HOLE(obp)) 2743b24ab676SJeff Bonwick dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp); 2744b24ab676SJeff Bonwick arc_release(dr->dt.dl.dr_data, db); 2745b24ab676SJeff Bonwick } 2746c717a561Smaybee mutex_exit(&db->db_mtx); 2747c717a561Smaybee 2748b24ab676SJeff Bonwick dbuf_write_done(zio, NULL, db); 2749b24ab676SJeff Bonwick } 2750c717a561Smaybee 27513e30c24aSWill Andrews /* Issue I/O to commit a dirty buffer to disk. */ 2752b24ab676SJeff Bonwick static void 2753b24ab676SJeff Bonwick dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx) 2754b24ab676SJeff Bonwick { 2755b24ab676SJeff Bonwick dmu_buf_impl_t *db = dr->dr_dbuf; 2756744947dcSTom Erickson dnode_t *dn; 2757744947dcSTom Erickson objset_t *os; 2758b24ab676SJeff Bonwick dmu_buf_impl_t *parent = db->db_parent; 2759b24ab676SJeff Bonwick uint64_t txg = tx->tx_txg; 27607802d7bfSMatthew Ahrens zbookmark_phys_t zb; 2761b24ab676SJeff Bonwick zio_prop_t zp; 2762b24ab676SJeff Bonwick zio_t *zio; 27630a586ceaSMark Shellenbaum int wp_flag = 0; 2764b24ab676SJeff Bonwick 2765744947dcSTom Erickson DB_DNODE_ENTER(db); 2766744947dcSTom Erickson dn = DB_DNODE(db); 2767744947dcSTom Erickson os = dn->dn_objset; 2768744947dcSTom Erickson 2769b24ab676SJeff Bonwick if (db->db_state != DB_NOFILL) { 2770b24ab676SJeff Bonwick if (db->db_level > 0 || dn->dn_type == DMU_OT_DNODE) { 2771b24ab676SJeff Bonwick /* 2772b24ab676SJeff Bonwick * Private object buffers are released here rather 2773b24ab676SJeff Bonwick * than in dbuf_dirty() since they are only modified 2774b24ab676SJeff Bonwick * in the syncing context and we don't want the 2775b24ab676SJeff Bonwick * overhead of making multiple copies of the data. 2776b24ab676SJeff Bonwick */ 2777b24ab676SJeff Bonwick if (BP_IS_HOLE(db->db_blkptr)) { 2778b24ab676SJeff Bonwick arc_buf_thaw(data); 2779b24ab676SJeff Bonwick } else { 27803f9d6ad7SLin Ling dbuf_release_bp(db); 2781b24ab676SJeff Bonwick } 2782b24ab676SJeff Bonwick } 2783b24ab676SJeff Bonwick } 2784b24ab676SJeff Bonwick 2785b24ab676SJeff Bonwick if (parent != dn->dn_dbuf) { 27863e30c24aSWill Andrews /* Our parent is an indirect block. */ 27873e30c24aSWill Andrews /* We have a dirty parent that has been scheduled for write. */ 2788b24ab676SJeff Bonwick ASSERT(parent && parent->db_data_pending); 27893e30c24aSWill Andrews /* Our parent's buffer is one level closer to the dnode. */ 2790b24ab676SJeff Bonwick ASSERT(db->db_level == parent->db_level-1); 27913e30c24aSWill Andrews /* 27923e30c24aSWill Andrews * We're about to modify our parent's db_data by modifying 27933e30c24aSWill Andrews * our block pointer, so the parent must be released. 27943e30c24aSWill Andrews */ 2795b24ab676SJeff Bonwick ASSERT(arc_released(parent->db_buf)); 2796b24ab676SJeff Bonwick zio = parent->db_data_pending->dr_zio; 2797b24ab676SJeff Bonwick } else { 27983e30c24aSWill Andrews /* Our parent is the dnode itself. */ 27990a586ceaSMark Shellenbaum ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 && 28000a586ceaSMark Shellenbaum db->db_blkid != DMU_SPILL_BLKID) || 28010a586ceaSMark Shellenbaum (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0)); 28020a586ceaSMark Shellenbaum if (db->db_blkid != DMU_SPILL_BLKID) 2803b24ab676SJeff Bonwick ASSERT3P(db->db_blkptr, ==, 2804b24ab676SJeff Bonwick &dn->dn_phys->dn_blkptr[db->db_blkid]); 2805b24ab676SJeff Bonwick zio = dn->dn_zio; 2806b24ab676SJeff Bonwick } 2807b24ab676SJeff Bonwick 2808b24ab676SJeff Bonwick ASSERT(db->db_level == 0 || data == db->db_buf); 2809b24ab676SJeff Bonwick ASSERT3U(db->db_blkptr->blk_birth, <=, txg); 2810b24ab676SJeff Bonwick ASSERT(zio); 2811b24ab676SJeff Bonwick 2812b24ab676SJeff Bonwick SET_BOOKMARK(&zb, os->os_dsl_dataset ? 2813b24ab676SJeff Bonwick os->os_dsl_dataset->ds_object : DMU_META_OBJSET, 2814b24ab676SJeff Bonwick db->db.db_object, db->db_level, db->db_blkid); 2815b24ab676SJeff Bonwick 28160a586ceaSMark Shellenbaum if (db->db_blkid == DMU_SPILL_BLKID) 28170a586ceaSMark Shellenbaum wp_flag = WP_SPILL; 28180a586ceaSMark Shellenbaum wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0; 28190a586ceaSMark Shellenbaum 28200a586ceaSMark Shellenbaum dmu_write_policy(os, dn, db->db_level, wp_flag, &zp); 2821744947dcSTom Erickson DB_DNODE_EXIT(db); 2822b24ab676SJeff Bonwick 28235d7b4d43SMatthew Ahrens if (db->db_level == 0 && 28245d7b4d43SMatthew Ahrens dr->dt.dl.dr_override_state == DR_OVERRIDDEN) { 28255d7b4d43SMatthew Ahrens /* 28265d7b4d43SMatthew Ahrens * The BP for this block has been provided by open context 28275d7b4d43SMatthew Ahrens * (by dmu_sync() or dmu_buf_write_embedded()). 28285d7b4d43SMatthew Ahrens */ 28295d7b4d43SMatthew Ahrens void *contents = (data != NULL) ? data->b_data : NULL; 28305d7b4d43SMatthew Ahrens 2831b24ab676SJeff Bonwick dr->dr_zio = zio_write(zio, os->os_spa, txg, 28325d7b4d43SMatthew Ahrens db->db_blkptr, contents, db->db.db_size, &zp, 283369962b56SMatthew Ahrens dbuf_write_override_ready, NULL, dbuf_write_override_done, 283469962b56SMatthew Ahrens dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 2835b24ab676SJeff Bonwick mutex_enter(&db->db_mtx); 2836b24ab676SJeff Bonwick dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; 2837b24ab676SJeff Bonwick zio_write_override(dr->dr_zio, &dr->dt.dl.dr_overridden_by, 283880901aeaSGeorge Wilson dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite); 2839b24ab676SJeff Bonwick mutex_exit(&db->db_mtx); 2840b24ab676SJeff Bonwick } else if (db->db_state == DB_NOFILL) { 2841810e43b2SBill Pijewski ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF || 2842810e43b2SBill Pijewski zp.zp_checksum == ZIO_CHECKSUM_NOPARITY); 2843b24ab676SJeff Bonwick dr->dr_zio = zio_write(zio, os->os_spa, txg, 2844b24ab676SJeff Bonwick db->db_blkptr, NULL, db->db.db_size, &zp, 284569962b56SMatthew Ahrens dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db, 2846b24ab676SJeff Bonwick ZIO_PRIORITY_ASYNC_WRITE, 2847b24ab676SJeff Bonwick ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb); 2848b24ab676SJeff Bonwick } else { 2849b24ab676SJeff Bonwick ASSERT(arc_released(data)); 2850b24ab676SJeff Bonwick dr->dr_zio = arc_write(zio, os->os_spa, txg, 2851aad02571SSaso Kiselkov db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db), 2852aad02571SSaso Kiselkov DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready, 285369962b56SMatthew Ahrens dbuf_write_physdone, dbuf_write_done, db, 285469962b56SMatthew Ahrens ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); 2855b24ab676SJeff Bonwick } 2856fa9e4066Sahrens } 2857