1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * Copyright (c) 2013, 2017 by Delphix. All rights reserved. 24eda14cbcSMatt Macy * Copyright 2014 HybridCluster. All rights reserved. 25eda14cbcSMatt Macy */ 26eda14cbcSMatt Macy 27eda14cbcSMatt Macy #include <sys/dbuf.h> 28eda14cbcSMatt Macy #include <sys/dmu.h> 29eda14cbcSMatt Macy #include <sys/dmu_impl.h> 30eda14cbcSMatt Macy #include <sys/dmu_objset.h> 31eda14cbcSMatt Macy #include <sys/dmu_tx.h> 32eda14cbcSMatt Macy #include <sys/dnode.h> 33eda14cbcSMatt Macy #include <sys/zap.h> 34eda14cbcSMatt Macy #include <sys/zfeature.h> 35eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 36eda14cbcSMatt Macy 37eda14cbcSMatt Macy /* 38eda14cbcSMatt Macy * Each of the concurrent object allocators will grab 39eda14cbcSMatt Macy * 2^dmu_object_alloc_chunk_shift dnode slots at a time. The default is to 40eda14cbcSMatt Macy * grab 128 slots, which is 4 blocks worth. This was experimentally 41eda14cbcSMatt Macy * determined to be the lowest value that eliminates the measurable effect 42eda14cbcSMatt Macy * of lock contention from this code path. 43eda14cbcSMatt Macy */ 44be181ee2SMartin Matuska uint_t dmu_object_alloc_chunk_shift = 7; 45eda14cbcSMatt Macy 46eda14cbcSMatt Macy static uint64_t 47eda14cbcSMatt Macy dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, 48eda14cbcSMatt Macy int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, 49a0b956f5SMartin Matuska int dnodesize, dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx) 50eda14cbcSMatt Macy { 51eda14cbcSMatt Macy uint64_t object; 52eda14cbcSMatt Macy uint64_t L1_dnode_count = DNODES_PER_BLOCK << 53eda14cbcSMatt Macy (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT); 54eda14cbcSMatt Macy dnode_t *dn = NULL; 55eda14cbcSMatt Macy int dn_slots = dnodesize >> DNODE_SHIFT; 56eda14cbcSMatt Macy boolean_t restarted = B_FALSE; 57eda14cbcSMatt Macy uint64_t *cpuobj = NULL; 58be181ee2SMartin Matuska uint_t dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift; 59eda14cbcSMatt Macy int error; 60eda14cbcSMatt Macy 617877fdebSMatt Macy cpuobj = &os->os_obj_next_percpu[CPU_SEQID_UNSTABLE % 62eda14cbcSMatt Macy os->os_obj_next_percpu_len]; 63eda14cbcSMatt Macy 64eda14cbcSMatt Macy if (dn_slots == 0) { 65eda14cbcSMatt Macy dn_slots = DNODE_MIN_SLOTS; 66eda14cbcSMatt Macy } else { 67eda14cbcSMatt Macy ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS); 68eda14cbcSMatt Macy ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS); 69eda14cbcSMatt Macy } 70eda14cbcSMatt Macy 71eda14cbcSMatt Macy /* 72eda14cbcSMatt Macy * The "chunk" of dnodes that is assigned to a CPU-specific 73eda14cbcSMatt Macy * allocator needs to be at least one block's worth, to avoid 74eda14cbcSMatt Macy * lock contention on the dbuf. It can be at most one L1 block's 75eda14cbcSMatt Macy * worth, so that the "rescan after polishing off a L1's worth" 76eda14cbcSMatt Macy * logic below will be sure to kick in. 77eda14cbcSMatt Macy */ 78eda14cbcSMatt Macy if (dnodes_per_chunk < DNODES_PER_BLOCK) 79eda14cbcSMatt Macy dnodes_per_chunk = DNODES_PER_BLOCK; 80eda14cbcSMatt Macy if (dnodes_per_chunk > L1_dnode_count) 81eda14cbcSMatt Macy dnodes_per_chunk = L1_dnode_count; 82eda14cbcSMatt Macy 83eda14cbcSMatt Macy /* 84eda14cbcSMatt Macy * The caller requested the dnode be returned as a performance 85eda14cbcSMatt Macy * optimization in order to avoid releasing the hold only to 86eda14cbcSMatt Macy * immediately reacquire it. Since they caller is responsible 87eda14cbcSMatt Macy * for releasing the hold they must provide the tag. 88eda14cbcSMatt Macy */ 89eda14cbcSMatt Macy if (allocated_dnode != NULL) { 90eda14cbcSMatt Macy ASSERT3P(tag, !=, NULL); 91eda14cbcSMatt Macy } else { 92eda14cbcSMatt Macy ASSERT3P(tag, ==, NULL); 93eda14cbcSMatt Macy tag = FTAG; 94eda14cbcSMatt Macy } 95eda14cbcSMatt Macy 96eda14cbcSMatt Macy object = *cpuobj; 97eda14cbcSMatt Macy for (;;) { 98eda14cbcSMatt Macy /* 99eda14cbcSMatt Macy * If we finished a chunk of dnodes, get a new one from 100eda14cbcSMatt Macy * the global allocator. 101eda14cbcSMatt Macy */ 102eda14cbcSMatt Macy if ((P2PHASE(object, dnodes_per_chunk) == 0) || 103eda14cbcSMatt Macy (P2PHASE(object + dn_slots - 1, dnodes_per_chunk) < 104eda14cbcSMatt Macy dn_slots)) { 105eda14cbcSMatt Macy DNODE_STAT_BUMP(dnode_alloc_next_chunk); 106eda14cbcSMatt Macy mutex_enter(&os->os_obj_lock); 107eda14cbcSMatt Macy ASSERT0(P2PHASE(os->os_obj_next_chunk, 108eda14cbcSMatt Macy dnodes_per_chunk)); 109eda14cbcSMatt Macy object = os->os_obj_next_chunk; 110eda14cbcSMatt Macy 111eda14cbcSMatt Macy /* 112eda14cbcSMatt Macy * Each time we polish off a L1 bp worth of dnodes 113eda14cbcSMatt Macy * (2^12 objects), move to another L1 bp that's 114eda14cbcSMatt Macy * still reasonably sparse (at most 1/4 full). Look 115eda14cbcSMatt Macy * from the beginning at most once per txg. If we 116eda14cbcSMatt Macy * still can't allocate from that L1 block, search 117eda14cbcSMatt Macy * for an empty L0 block, which will quickly skip 118eda14cbcSMatt Macy * to the end of the metadnode if no nearby L0 119eda14cbcSMatt Macy * blocks are empty. This fallback avoids a 120eda14cbcSMatt Macy * pathology where full dnode blocks containing 121eda14cbcSMatt Macy * large dnodes appear sparse because they have a 122eda14cbcSMatt Macy * low blk_fill, leading to many failed allocation 123eda14cbcSMatt Macy * attempts. In the long term a better mechanism to 124eda14cbcSMatt Macy * search for sparse metadnode regions, such as 125eda14cbcSMatt Macy * spacemaps, could be implemented. 126eda14cbcSMatt Macy * 127eda14cbcSMatt Macy * os_scan_dnodes is set during txg sync if enough 128eda14cbcSMatt Macy * objects have been freed since the previous 129eda14cbcSMatt Macy * rescan to justify backfilling again. 130eda14cbcSMatt Macy * 131eda14cbcSMatt Macy * Note that dmu_traverse depends on the behavior 132eda14cbcSMatt Macy * that we use multiple blocks of the dnode object 133eda14cbcSMatt Macy * before going back to reuse objects. Any change 134eda14cbcSMatt Macy * to this algorithm should preserve that property 135eda14cbcSMatt Macy * or find another solution to the issues described 136eda14cbcSMatt Macy * in traverse_visitbp. 137eda14cbcSMatt Macy */ 138eda14cbcSMatt Macy if (P2PHASE(object, L1_dnode_count) == 0) { 139eda14cbcSMatt Macy uint64_t offset; 140eda14cbcSMatt Macy uint64_t blkfill; 141eda14cbcSMatt Macy int minlvl; 142eda14cbcSMatt Macy if (os->os_rescan_dnodes) { 143eda14cbcSMatt Macy offset = 0; 144eda14cbcSMatt Macy os->os_rescan_dnodes = B_FALSE; 145eda14cbcSMatt Macy } else { 146eda14cbcSMatt Macy offset = object << DNODE_SHIFT; 147eda14cbcSMatt Macy } 148eda14cbcSMatt Macy blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2; 149eda14cbcSMatt Macy minlvl = restarted ? 1 : 2; 150eda14cbcSMatt Macy restarted = B_TRUE; 151eda14cbcSMatt Macy error = dnode_next_offset(DMU_META_DNODE(os), 152eda14cbcSMatt Macy DNODE_FIND_HOLE, &offset, minlvl, 153eda14cbcSMatt Macy blkfill, 0); 154eda14cbcSMatt Macy if (error == 0) { 155eda14cbcSMatt Macy object = offset >> DNODE_SHIFT; 156eda14cbcSMatt Macy } 157eda14cbcSMatt Macy } 158eda14cbcSMatt Macy /* 159eda14cbcSMatt Macy * Note: if "restarted", we may find a L0 that 160eda14cbcSMatt Macy * is not suitably aligned. 161eda14cbcSMatt Macy */ 162eda14cbcSMatt Macy os->os_obj_next_chunk = 163eda14cbcSMatt Macy P2ALIGN(object, dnodes_per_chunk) + 164eda14cbcSMatt Macy dnodes_per_chunk; 165eda14cbcSMatt Macy (void) atomic_swap_64(cpuobj, object); 166eda14cbcSMatt Macy mutex_exit(&os->os_obj_lock); 167eda14cbcSMatt Macy } 168eda14cbcSMatt Macy 169eda14cbcSMatt Macy /* 170eda14cbcSMatt Macy * The value of (*cpuobj) before adding dn_slots is the object 171eda14cbcSMatt Macy * ID assigned to us. The value afterwards is the object ID 172eda14cbcSMatt Macy * assigned to whoever wants to do an allocation next. 173eda14cbcSMatt Macy */ 174eda14cbcSMatt Macy object = atomic_add_64_nv(cpuobj, dn_slots) - dn_slots; 175eda14cbcSMatt Macy 176eda14cbcSMatt Macy /* 177eda14cbcSMatt Macy * XXX We should check for an i/o error here and return 178eda14cbcSMatt Macy * up to our caller. Actually we should pre-read it in 179eda14cbcSMatt Macy * dmu_tx_assign(), but there is currently no mechanism 180eda14cbcSMatt Macy * to do so. 181eda14cbcSMatt Macy */ 182eda14cbcSMatt Macy error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, 183eda14cbcSMatt Macy dn_slots, tag, &dn); 184eda14cbcSMatt Macy if (error == 0) { 185eda14cbcSMatt Macy rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 186eda14cbcSMatt Macy /* 187eda14cbcSMatt Macy * Another thread could have allocated it; check 188eda14cbcSMatt Macy * again now that we have the struct lock. 189eda14cbcSMatt Macy */ 190eda14cbcSMatt Macy if (dn->dn_type == DMU_OT_NONE) { 191eda14cbcSMatt Macy dnode_allocate(dn, ot, blocksize, 192eda14cbcSMatt Macy indirect_blockshift, bonustype, 193eda14cbcSMatt Macy bonuslen, dn_slots, tx); 194eda14cbcSMatt Macy rw_exit(&dn->dn_struct_rwlock); 195eda14cbcSMatt Macy dmu_tx_add_new_object(tx, dn); 196eda14cbcSMatt Macy 197eda14cbcSMatt Macy /* 198eda14cbcSMatt Macy * Caller requested the allocated dnode be 199eda14cbcSMatt Macy * returned and is responsible for the hold. 200eda14cbcSMatt Macy */ 201eda14cbcSMatt Macy if (allocated_dnode != NULL) 202eda14cbcSMatt Macy *allocated_dnode = dn; 203eda14cbcSMatt Macy else 204eda14cbcSMatt Macy dnode_rele(dn, tag); 205eda14cbcSMatt Macy 206eda14cbcSMatt Macy return (object); 207eda14cbcSMatt Macy } 208eda14cbcSMatt Macy rw_exit(&dn->dn_struct_rwlock); 209eda14cbcSMatt Macy dnode_rele(dn, tag); 210eda14cbcSMatt Macy DNODE_STAT_BUMP(dnode_alloc_race); 211eda14cbcSMatt Macy } 212eda14cbcSMatt Macy 213eda14cbcSMatt Macy /* 214eda14cbcSMatt Macy * Skip to next known valid starting point on error. This 215eda14cbcSMatt Macy * is the start of the next block of dnodes. 216eda14cbcSMatt Macy */ 217eda14cbcSMatt Macy if (dmu_object_next(os, &object, B_TRUE, 0) != 0) { 218eda14cbcSMatt Macy object = P2ROUNDUP(object + 1, DNODES_PER_BLOCK); 219eda14cbcSMatt Macy DNODE_STAT_BUMP(dnode_alloc_next_block); 220eda14cbcSMatt Macy } 221eda14cbcSMatt Macy (void) atomic_swap_64(cpuobj, object); 222eda14cbcSMatt Macy } 223eda14cbcSMatt Macy } 224eda14cbcSMatt Macy 225eda14cbcSMatt Macy uint64_t 226eda14cbcSMatt Macy dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, 227eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 228eda14cbcSMatt Macy { 229eda14cbcSMatt Macy return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, 230eda14cbcSMatt Macy bonuslen, 0, NULL, NULL, tx); 231eda14cbcSMatt Macy } 232eda14cbcSMatt Macy 233eda14cbcSMatt Macy uint64_t 234eda14cbcSMatt Macy dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, 235eda14cbcSMatt Macy int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, 236eda14cbcSMatt Macy dmu_tx_t *tx) 237eda14cbcSMatt Macy { 238eda14cbcSMatt Macy return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, 239eda14cbcSMatt Macy bonustype, bonuslen, 0, NULL, NULL, tx); 240eda14cbcSMatt Macy } 241eda14cbcSMatt Macy 242eda14cbcSMatt Macy uint64_t 243eda14cbcSMatt Macy dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, 244eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) 245eda14cbcSMatt Macy { 246eda14cbcSMatt Macy return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, 247eda14cbcSMatt Macy bonuslen, dnodesize, NULL, NULL, tx)); 248eda14cbcSMatt Macy } 249eda14cbcSMatt Macy 250eda14cbcSMatt Macy /* 251eda14cbcSMatt Macy * Allocate a new object and return a pointer to the newly allocated dnode 252eda14cbcSMatt Macy * via the allocated_dnode argument. The returned dnode will be held and 253eda14cbcSMatt Macy * the caller is responsible for releasing the hold by calling dnode_rele(). 254eda14cbcSMatt Macy */ 255eda14cbcSMatt Macy uint64_t 256eda14cbcSMatt Macy dmu_object_alloc_hold(objset_t *os, dmu_object_type_t ot, int blocksize, 257eda14cbcSMatt Macy int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, 258a0b956f5SMartin Matuska int dnodesize, dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx) 259eda14cbcSMatt Macy { 260eda14cbcSMatt Macy return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, 261eda14cbcSMatt Macy bonustype, bonuslen, dnodesize, allocated_dnode, tag, tx)); 262eda14cbcSMatt Macy } 263eda14cbcSMatt Macy 264eda14cbcSMatt Macy int 265eda14cbcSMatt Macy dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, 266eda14cbcSMatt Macy int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 267eda14cbcSMatt Macy { 268eda14cbcSMatt Macy return (dmu_object_claim_dnsize(os, object, ot, blocksize, bonustype, 269eda14cbcSMatt Macy bonuslen, 0, tx)); 270eda14cbcSMatt Macy } 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy int 273eda14cbcSMatt Macy dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, 274eda14cbcSMatt Macy int blocksize, dmu_object_type_t bonustype, int bonuslen, 275eda14cbcSMatt Macy int dnodesize, dmu_tx_t *tx) 276eda14cbcSMatt Macy { 277eda14cbcSMatt Macy dnode_t *dn; 278eda14cbcSMatt Macy int dn_slots = dnodesize >> DNODE_SHIFT; 279eda14cbcSMatt Macy int err; 280eda14cbcSMatt Macy 281eda14cbcSMatt Macy if (dn_slots == 0) 282eda14cbcSMatt Macy dn_slots = DNODE_MIN_SLOTS; 283eda14cbcSMatt Macy ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS); 284eda14cbcSMatt Macy ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS); 285eda14cbcSMatt Macy 286eda14cbcSMatt Macy if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) 287eda14cbcSMatt Macy return (SET_ERROR(EBADF)); 288eda14cbcSMatt Macy 289eda14cbcSMatt Macy err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots, 290eda14cbcSMatt Macy FTAG, &dn); 291eda14cbcSMatt Macy if (err) 292eda14cbcSMatt Macy return (err); 293eda14cbcSMatt Macy 294eda14cbcSMatt Macy dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx); 295eda14cbcSMatt Macy dmu_tx_add_new_object(tx, dn); 296eda14cbcSMatt Macy 297eda14cbcSMatt Macy dnode_rele(dn, FTAG); 298eda14cbcSMatt Macy 299eda14cbcSMatt Macy return (0); 300eda14cbcSMatt Macy } 301eda14cbcSMatt Macy 302eda14cbcSMatt Macy int 303eda14cbcSMatt Macy dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, 304eda14cbcSMatt Macy int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 305eda14cbcSMatt Macy { 306eda14cbcSMatt Macy return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype, 307eda14cbcSMatt Macy bonuslen, DNODE_MIN_SIZE, B_FALSE, tx)); 308eda14cbcSMatt Macy } 309eda14cbcSMatt Macy 310eda14cbcSMatt Macy int 311eda14cbcSMatt Macy dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot, 312eda14cbcSMatt Macy int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize, 313eda14cbcSMatt Macy boolean_t keep_spill, dmu_tx_t *tx) 314eda14cbcSMatt Macy { 315eda14cbcSMatt Macy dnode_t *dn; 316eda14cbcSMatt Macy int dn_slots = dnodesize >> DNODE_SHIFT; 317eda14cbcSMatt Macy int err; 318eda14cbcSMatt Macy 319eda14cbcSMatt Macy if (dn_slots == 0) 320eda14cbcSMatt Macy dn_slots = DNODE_MIN_SLOTS; 321eda14cbcSMatt Macy 322eda14cbcSMatt Macy if (object == DMU_META_DNODE_OBJECT) 323eda14cbcSMatt Macy return (SET_ERROR(EBADF)); 324eda14cbcSMatt Macy 325eda14cbcSMatt Macy err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, 326eda14cbcSMatt Macy FTAG, &dn); 327eda14cbcSMatt Macy if (err) 328eda14cbcSMatt Macy return (err); 329eda14cbcSMatt Macy 330eda14cbcSMatt Macy dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, 331eda14cbcSMatt Macy keep_spill, tx); 332eda14cbcSMatt Macy 333eda14cbcSMatt Macy dnode_rele(dn, FTAG); 334eda14cbcSMatt Macy return (err); 335eda14cbcSMatt Macy } 336eda14cbcSMatt Macy 337eda14cbcSMatt Macy int 338eda14cbcSMatt Macy dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx) 339eda14cbcSMatt Macy { 340eda14cbcSMatt Macy dnode_t *dn; 341eda14cbcSMatt Macy int err; 342eda14cbcSMatt Macy 343eda14cbcSMatt Macy err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, 344eda14cbcSMatt Macy FTAG, &dn); 345eda14cbcSMatt Macy if (err) 346eda14cbcSMatt Macy return (err); 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 349eda14cbcSMatt Macy if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { 350eda14cbcSMatt Macy dbuf_rm_spill(dn, tx); 351eda14cbcSMatt Macy dnode_rm_spill(dn, tx); 352eda14cbcSMatt Macy } 353eda14cbcSMatt Macy rw_exit(&dn->dn_struct_rwlock); 354eda14cbcSMatt Macy 355eda14cbcSMatt Macy dnode_rele(dn, FTAG); 356eda14cbcSMatt Macy return (err); 357eda14cbcSMatt Macy } 358eda14cbcSMatt Macy 359eda14cbcSMatt Macy int 360eda14cbcSMatt Macy dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx) 361eda14cbcSMatt Macy { 362eda14cbcSMatt Macy dnode_t *dn; 363eda14cbcSMatt Macy int err; 364eda14cbcSMatt Macy 365eda14cbcSMatt Macy ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); 366eda14cbcSMatt Macy 367eda14cbcSMatt Macy err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, 368eda14cbcSMatt Macy FTAG, &dn); 369eda14cbcSMatt Macy if (err) 370eda14cbcSMatt Macy return (err); 371eda14cbcSMatt Macy 372eda14cbcSMatt Macy ASSERT(dn->dn_type != DMU_OT_NONE); 373eda14cbcSMatt Macy /* 374eda14cbcSMatt Macy * If we don't create this free range, we'll leak indirect blocks when 375eda14cbcSMatt Macy * we get to freeing the dnode in syncing context. 376eda14cbcSMatt Macy */ 377eda14cbcSMatt Macy dnode_free_range(dn, 0, DMU_OBJECT_END, tx); 378eda14cbcSMatt Macy dnode_free(dn, tx); 379eda14cbcSMatt Macy dnode_rele(dn, FTAG); 380eda14cbcSMatt Macy 381eda14cbcSMatt Macy return (0); 382eda14cbcSMatt Macy } 383eda14cbcSMatt Macy 384eda14cbcSMatt Macy /* 385eda14cbcSMatt Macy * Return (in *objectp) the next object which is allocated (or a hole) 386eda14cbcSMatt Macy * after *object, taking into account only objects that may have been modified 387eda14cbcSMatt Macy * after the specified txg. 388eda14cbcSMatt Macy */ 389eda14cbcSMatt Macy int 390eda14cbcSMatt Macy dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg) 391eda14cbcSMatt Macy { 392eda14cbcSMatt Macy uint64_t offset; 393eda14cbcSMatt Macy uint64_t start_obj; 394eda14cbcSMatt Macy struct dsl_dataset *ds = os->os_dsl_dataset; 395eda14cbcSMatt Macy int error; 396eda14cbcSMatt Macy 397eda14cbcSMatt Macy if (*objectp == 0) { 398eda14cbcSMatt Macy start_obj = 1; 399eda14cbcSMatt Macy } else if (ds && dsl_dataset_feature_is_active(ds, 400eda14cbcSMatt Macy SPA_FEATURE_LARGE_DNODE)) { 401eda14cbcSMatt Macy uint64_t i = *objectp + 1; 402eda14cbcSMatt Macy uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1); 403eda14cbcSMatt Macy dmu_object_info_t doi; 404eda14cbcSMatt Macy 405eda14cbcSMatt Macy /* 406eda14cbcSMatt Macy * Scan through the remaining meta dnode block. The contents 407eda14cbcSMatt Macy * of each slot in the block are known so it can be quickly 408eda14cbcSMatt Macy * checked. If the block is exhausted without a match then 409eda14cbcSMatt Macy * hand off to dnode_next_offset() for further scanning. 410eda14cbcSMatt Macy */ 411eda14cbcSMatt Macy while (i <= last_obj) { 412*2a58b312SMartin Matuska if (i == 0) 413*2a58b312SMartin Matuska return (SET_ERROR(ESRCH)); 414eda14cbcSMatt Macy error = dmu_object_info(os, i, &doi); 415eda14cbcSMatt Macy if (error == ENOENT) { 416eda14cbcSMatt Macy if (hole) { 417eda14cbcSMatt Macy *objectp = i; 418eda14cbcSMatt Macy return (0); 419eda14cbcSMatt Macy } else { 420eda14cbcSMatt Macy i++; 421eda14cbcSMatt Macy } 422eda14cbcSMatt Macy } else if (error == EEXIST) { 423eda14cbcSMatt Macy i++; 424eda14cbcSMatt Macy } else if (error == 0) { 425eda14cbcSMatt Macy if (hole) { 426eda14cbcSMatt Macy i += doi.doi_dnodesize >> DNODE_SHIFT; 427eda14cbcSMatt Macy } else { 428eda14cbcSMatt Macy *objectp = i; 429eda14cbcSMatt Macy return (0); 430eda14cbcSMatt Macy } 431eda14cbcSMatt Macy } else { 432eda14cbcSMatt Macy return (error); 433eda14cbcSMatt Macy } 434eda14cbcSMatt Macy } 435eda14cbcSMatt Macy 436eda14cbcSMatt Macy start_obj = i; 437eda14cbcSMatt Macy } else { 438eda14cbcSMatt Macy start_obj = *objectp + 1; 439eda14cbcSMatt Macy } 440eda14cbcSMatt Macy 441eda14cbcSMatt Macy offset = start_obj << DNODE_SHIFT; 442eda14cbcSMatt Macy 443eda14cbcSMatt Macy error = dnode_next_offset(DMU_META_DNODE(os), 444eda14cbcSMatt Macy (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg); 445eda14cbcSMatt Macy 446eda14cbcSMatt Macy *objectp = offset >> DNODE_SHIFT; 447eda14cbcSMatt Macy 448eda14cbcSMatt Macy return (error); 449eda14cbcSMatt Macy } 450eda14cbcSMatt Macy 451eda14cbcSMatt Macy /* 452eda14cbcSMatt Macy * Turn this object from old_type into DMU_OTN_ZAP_METADATA, and bump the 453eda14cbcSMatt Macy * refcount on SPA_FEATURE_EXTENSIBLE_DATASET. 454eda14cbcSMatt Macy * 455eda14cbcSMatt Macy * Only for use from syncing context, on MOS objects. 456eda14cbcSMatt Macy */ 457eda14cbcSMatt Macy void 458eda14cbcSMatt Macy dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type, 459eda14cbcSMatt Macy dmu_tx_t *tx) 460eda14cbcSMatt Macy { 461eda14cbcSMatt Macy dnode_t *dn; 462eda14cbcSMatt Macy 463eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 464eda14cbcSMatt Macy 465eda14cbcSMatt Macy VERIFY0(dnode_hold(mos, object, FTAG, &dn)); 466eda14cbcSMatt Macy if (dn->dn_type == DMU_OTN_ZAP_METADATA) { 467eda14cbcSMatt Macy dnode_rele(dn, FTAG); 468eda14cbcSMatt Macy return; 469eda14cbcSMatt Macy } 470eda14cbcSMatt Macy ASSERT3U(dn->dn_type, ==, old_type); 471eda14cbcSMatt Macy ASSERT0(dn->dn_maxblkid); 472eda14cbcSMatt Macy 473eda14cbcSMatt Macy /* 474eda14cbcSMatt Macy * We must initialize the ZAP data before changing the type, 475eda14cbcSMatt Macy * so that concurrent calls to *_is_zapified() can determine if 476eda14cbcSMatt Macy * the object has been completely zapified by checking the type. 477eda14cbcSMatt Macy */ 478eda14cbcSMatt Macy mzap_create_impl(dn, 0, 0, tx); 479eda14cbcSMatt Macy 480eda14cbcSMatt Macy dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = 481eda14cbcSMatt Macy DMU_OTN_ZAP_METADATA; 482eda14cbcSMatt Macy dnode_setdirty(dn, tx); 483eda14cbcSMatt Macy dnode_rele(dn, FTAG); 484eda14cbcSMatt Macy 485eda14cbcSMatt Macy spa_feature_incr(dmu_objset_spa(mos), 486eda14cbcSMatt Macy SPA_FEATURE_EXTENSIBLE_DATASET, tx); 487eda14cbcSMatt Macy } 488eda14cbcSMatt Macy 489eda14cbcSMatt Macy void 490eda14cbcSMatt Macy dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx) 491eda14cbcSMatt Macy { 492eda14cbcSMatt Macy dnode_t *dn; 493eda14cbcSMatt Macy dmu_object_type_t t; 494eda14cbcSMatt Macy 495eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 496eda14cbcSMatt Macy 497eda14cbcSMatt Macy VERIFY0(dnode_hold(mos, object, FTAG, &dn)); 498eda14cbcSMatt Macy t = dn->dn_type; 499eda14cbcSMatt Macy dnode_rele(dn, FTAG); 500eda14cbcSMatt Macy 501eda14cbcSMatt Macy if (t == DMU_OTN_ZAP_METADATA) { 502eda14cbcSMatt Macy spa_feature_decr(dmu_objset_spa(mos), 503eda14cbcSMatt Macy SPA_FEATURE_EXTENSIBLE_DATASET, tx); 504eda14cbcSMatt Macy } 505eda14cbcSMatt Macy VERIFY0(dmu_object_free(mos, object, tx)); 506eda14cbcSMatt Macy } 507eda14cbcSMatt Macy 508eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_alloc); 509eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_alloc_ibs); 510eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_alloc_dnsize); 511eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_alloc_hold); 512eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_claim); 513eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_claim_dnsize); 514eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_reclaim); 515eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_reclaim_dnsize); 516eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_rm_spill); 517eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_free); 518eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_next); 519eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_zapify); 520eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_object_free_zapified); 521eda14cbcSMatt Macy 522eda14cbcSMatt Macy /* BEGIN CSTYLED */ 523be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, , dmu_object_alloc_chunk_shift, UINT, ZMOD_RW, 524eda14cbcSMatt Macy "CPU-specific allocator grabs 2^N objects at once"); 525eda14cbcSMatt Macy /* END CSTYLED */ 526