1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy /* 22*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*eda14cbcSMatt Macy * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24*eda14cbcSMatt Macy * Copyright (c) 2013 Martin Matuska. All rights reserved. 25*eda14cbcSMatt Macy * Copyright (c) 2014 Joyent, Inc. All rights reserved. 26*eda14cbcSMatt Macy * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27*eda14cbcSMatt Macy * Copyright (c) 2016 Actifio, Inc. All rights reserved. 28*eda14cbcSMatt Macy * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. 29*eda14cbcSMatt Macy */ 30*eda14cbcSMatt Macy 31*eda14cbcSMatt Macy #include <sys/dmu.h> 32*eda14cbcSMatt Macy #include <sys/dmu_objset.h> 33*eda14cbcSMatt Macy #include <sys/dmu_tx.h> 34*eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 35*eda14cbcSMatt Macy #include <sys/dsl_dir.h> 36*eda14cbcSMatt Macy #include <sys/dsl_prop.h> 37*eda14cbcSMatt Macy #include <sys/dsl_synctask.h> 38*eda14cbcSMatt Macy #include <sys/dsl_deleg.h> 39*eda14cbcSMatt Macy #include <sys/dmu_impl.h> 40*eda14cbcSMatt Macy #include <sys/spa.h> 41*eda14cbcSMatt Macy #include <sys/spa_impl.h> 42*eda14cbcSMatt Macy #include <sys/metaslab.h> 43*eda14cbcSMatt Macy #include <sys/zap.h> 44*eda14cbcSMatt Macy #include <sys/zio.h> 45*eda14cbcSMatt Macy #include <sys/arc.h> 46*eda14cbcSMatt Macy #include <sys/sunddi.h> 47*eda14cbcSMatt Macy #include <sys/zfeature.h> 48*eda14cbcSMatt Macy #include <sys/policy.h> 49*eda14cbcSMatt Macy #include <sys/zfs_znode.h> 50*eda14cbcSMatt Macy #include <sys/zvol.h> 51*eda14cbcSMatt Macy #include <sys/zthr.h> 52*eda14cbcSMatt Macy #include "zfs_namecheck.h" 53*eda14cbcSMatt Macy #include "zfs_prop.h" 54*eda14cbcSMatt Macy #ifdef _KERNEL 55*eda14cbcSMatt Macy #include <sys/zfs_vfsops.h> 56*eda14cbcSMatt Macy #endif 57*eda14cbcSMatt Macy 58*eda14cbcSMatt Macy /* 59*eda14cbcSMatt Macy * Filesystem and Snapshot Limits 60*eda14cbcSMatt Macy * ------------------------------ 61*eda14cbcSMatt Macy * 62*eda14cbcSMatt Macy * These limits are used to restrict the number of filesystems and/or snapshots 63*eda14cbcSMatt Macy * that can be created at a given level in the tree or below. A typical 64*eda14cbcSMatt Macy * use-case is with a delegated dataset where the administrator wants to ensure 65*eda14cbcSMatt Macy * that a user within the zone is not creating too many additional filesystems 66*eda14cbcSMatt Macy * or snapshots, even though they're not exceeding their space quota. 67*eda14cbcSMatt Macy * 68*eda14cbcSMatt Macy * The filesystem and snapshot counts are stored as extensible properties. This 69*eda14cbcSMatt Macy * capability is controlled by a feature flag and must be enabled to be used. 70*eda14cbcSMatt Macy * Once enabled, the feature is not active until the first limit is set. At 71*eda14cbcSMatt Macy * that point, future operations to create/destroy filesystems or snapshots 72*eda14cbcSMatt Macy * will validate and update the counts. 73*eda14cbcSMatt Macy * 74*eda14cbcSMatt Macy * Because the count properties will not exist before the feature is active, 75*eda14cbcSMatt Macy * the counts are updated when a limit is first set on an uninitialized 76*eda14cbcSMatt Macy * dsl_dir node in the tree (The filesystem/snapshot count on a node includes 77*eda14cbcSMatt Macy * all of the nested filesystems/snapshots. Thus, a new leaf node has a 78*eda14cbcSMatt Macy * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and 79*eda14cbcSMatt Macy * snapshot count properties on a node indicate uninitialized counts on that 80*eda14cbcSMatt Macy * node.) When first setting a limit on an uninitialized node, the code starts 81*eda14cbcSMatt Macy * at the filesystem with the new limit and descends into all sub-filesystems 82*eda14cbcSMatt Macy * to add the count properties. 83*eda14cbcSMatt Macy * 84*eda14cbcSMatt Macy * In practice this is lightweight since a limit is typically set when the 85*eda14cbcSMatt Macy * filesystem is created and thus has no children. Once valid, changing the 86*eda14cbcSMatt Macy * limit value won't require a re-traversal since the counts are already valid. 87*eda14cbcSMatt Macy * When recursively fixing the counts, if a node with a limit is encountered 88*eda14cbcSMatt Macy * during the descent, the counts are known to be valid and there is no need to 89*eda14cbcSMatt Macy * descend into that filesystem's children. The counts on filesystems above the 90*eda14cbcSMatt Macy * one with the new limit will still be uninitialized, unless a limit is 91*eda14cbcSMatt Macy * eventually set on one of those filesystems. The counts are always recursively 92*eda14cbcSMatt Macy * updated when a limit is set on a dataset, unless there is already a limit. 93*eda14cbcSMatt Macy * When a new limit value is set on a filesystem with an existing limit, it is 94*eda14cbcSMatt Macy * possible for the new limit to be less than the current count at that level 95*eda14cbcSMatt Macy * since a user who can change the limit is also allowed to exceed the limit. 96*eda14cbcSMatt Macy * 97*eda14cbcSMatt Macy * Once the feature is active, then whenever a filesystem or snapshot is 98*eda14cbcSMatt Macy * created, the code recurses up the tree, validating the new count against the 99*eda14cbcSMatt Macy * limit at each initialized level. In practice, most levels will not have a 100*eda14cbcSMatt Macy * limit set. If there is a limit at any initialized level up the tree, the 101*eda14cbcSMatt Macy * check must pass or the creation will fail. Likewise, when a filesystem or 102*eda14cbcSMatt Macy * snapshot is destroyed, the counts are recursively adjusted all the way up 103*eda14cbcSMatt Macy * the initialized nodes in the tree. Renaming a filesystem into different point 104*eda14cbcSMatt Macy * in the tree will first validate, then update the counts on each branch up to 105*eda14cbcSMatt Macy * the common ancestor. A receive will also validate the counts and then update 106*eda14cbcSMatt Macy * them. 107*eda14cbcSMatt Macy * 108*eda14cbcSMatt Macy * An exception to the above behavior is that the limit is not enforced if the 109*eda14cbcSMatt Macy * user has permission to modify the limit. This is primarily so that 110*eda14cbcSMatt Macy * recursive snapshots in the global zone always work. We want to prevent a 111*eda14cbcSMatt Macy * denial-of-service in which a lower level delegated dataset could max out its 112*eda14cbcSMatt Macy * limit and thus block recursive snapshots from being taken in the global zone. 113*eda14cbcSMatt Macy * Because of this, it is possible for the snapshot count to be over the limit 114*eda14cbcSMatt Macy * and snapshots taken in the global zone could cause a lower level dataset to 115*eda14cbcSMatt Macy * hit or exceed its limit. The administrator taking the global zone recursive 116*eda14cbcSMatt Macy * snapshot should be aware of this side-effect and behave accordingly. 117*eda14cbcSMatt Macy * For consistency, the filesystem limit is also not enforced if the user can 118*eda14cbcSMatt Macy * modify the limit. 119*eda14cbcSMatt Macy * 120*eda14cbcSMatt Macy * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check() 121*eda14cbcSMatt Macy * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in 122*eda14cbcSMatt Macy * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by 123*eda14cbcSMatt Macy * dsl_dir_init_fs_ss_count(). 124*eda14cbcSMatt Macy * 125*eda14cbcSMatt Macy * There is a special case when we receive a filesystem that already exists. In 126*eda14cbcSMatt Macy * this case a temporary clone name of %X is created (see dmu_recv_begin). We 127*eda14cbcSMatt Macy * never update the filesystem counts for temporary clones. 128*eda14cbcSMatt Macy * 129*eda14cbcSMatt Macy * Likewise, we do not update the snapshot counts for temporary snapshots, 130*eda14cbcSMatt Macy * such as those created by zfs diff. 131*eda14cbcSMatt Macy */ 132*eda14cbcSMatt Macy 133*eda14cbcSMatt Macy extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd); 134*eda14cbcSMatt Macy 135*eda14cbcSMatt Macy static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd); 136*eda14cbcSMatt Macy 137*eda14cbcSMatt Macy typedef struct ddulrt_arg { 138*eda14cbcSMatt Macy dsl_dir_t *ddulrta_dd; 139*eda14cbcSMatt Macy uint64_t ddlrta_txg; 140*eda14cbcSMatt Macy } ddulrt_arg_t; 141*eda14cbcSMatt Macy 142*eda14cbcSMatt Macy static void 143*eda14cbcSMatt Macy dsl_dir_evict_async(void *dbu) 144*eda14cbcSMatt Macy { 145*eda14cbcSMatt Macy dsl_dir_t *dd = dbu; 146*eda14cbcSMatt Macy int t; 147*eda14cbcSMatt Macy dsl_pool_t *dp __maybe_unused = dd->dd_pool; 148*eda14cbcSMatt Macy 149*eda14cbcSMatt Macy dd->dd_dbuf = NULL; 150*eda14cbcSMatt Macy 151*eda14cbcSMatt Macy for (t = 0; t < TXG_SIZE; t++) { 152*eda14cbcSMatt Macy ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 153*eda14cbcSMatt Macy ASSERT(dd->dd_tempreserved[t] == 0); 154*eda14cbcSMatt Macy ASSERT(dd->dd_space_towrite[t] == 0); 155*eda14cbcSMatt Macy } 156*eda14cbcSMatt Macy 157*eda14cbcSMatt Macy if (dd->dd_parent) 158*eda14cbcSMatt Macy dsl_dir_async_rele(dd->dd_parent, dd); 159*eda14cbcSMatt Macy 160*eda14cbcSMatt Macy spa_async_close(dd->dd_pool->dp_spa, dd); 161*eda14cbcSMatt Macy 162*eda14cbcSMatt Macy if (dsl_deadlist_is_open(&dd->dd_livelist)) 163*eda14cbcSMatt Macy dsl_dir_livelist_close(dd); 164*eda14cbcSMatt Macy 165*eda14cbcSMatt Macy dsl_prop_fini(dd); 166*eda14cbcSMatt Macy cv_destroy(&dd->dd_activity_cv); 167*eda14cbcSMatt Macy mutex_destroy(&dd->dd_activity_lock); 168*eda14cbcSMatt Macy mutex_destroy(&dd->dd_lock); 169*eda14cbcSMatt Macy kmem_free(dd, sizeof (dsl_dir_t)); 170*eda14cbcSMatt Macy } 171*eda14cbcSMatt Macy 172*eda14cbcSMatt Macy int 173*eda14cbcSMatt Macy dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj, 174*eda14cbcSMatt Macy const char *tail, void *tag, dsl_dir_t **ddp) 175*eda14cbcSMatt Macy { 176*eda14cbcSMatt Macy dmu_buf_t *dbuf; 177*eda14cbcSMatt Macy dsl_dir_t *dd; 178*eda14cbcSMatt Macy dmu_object_info_t doi; 179*eda14cbcSMatt Macy int err; 180*eda14cbcSMatt Macy 181*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp)); 182*eda14cbcSMatt Macy 183*eda14cbcSMatt Macy err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); 184*eda14cbcSMatt Macy if (err != 0) 185*eda14cbcSMatt Macy return (err); 186*eda14cbcSMatt Macy dd = dmu_buf_get_user(dbuf); 187*eda14cbcSMatt Macy 188*eda14cbcSMatt Macy dmu_object_info_from_db(dbuf, &doi); 189*eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR); 190*eda14cbcSMatt Macy ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t)); 191*eda14cbcSMatt Macy 192*eda14cbcSMatt Macy if (dd == NULL) { 193*eda14cbcSMatt Macy dsl_dir_t *winner; 194*eda14cbcSMatt Macy 195*eda14cbcSMatt Macy dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 196*eda14cbcSMatt Macy dd->dd_object = ddobj; 197*eda14cbcSMatt Macy dd->dd_dbuf = dbuf; 198*eda14cbcSMatt Macy dd->dd_pool = dp; 199*eda14cbcSMatt Macy 200*eda14cbcSMatt Macy mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); 201*eda14cbcSMatt Macy mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL); 202*eda14cbcSMatt Macy cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL); 203*eda14cbcSMatt Macy dsl_prop_init(dd); 204*eda14cbcSMatt Macy 205*eda14cbcSMatt Macy if (dsl_dir_is_zapified(dd)) { 206*eda14cbcSMatt Macy err = zap_lookup(dp->dp_meta_objset, 207*eda14cbcSMatt Macy ddobj, DD_FIELD_CRYPTO_KEY_OBJ, 208*eda14cbcSMatt Macy sizeof (uint64_t), 1, &dd->dd_crypto_obj); 209*eda14cbcSMatt Macy if (err == 0) { 210*eda14cbcSMatt Macy /* check for on-disk format errata */ 211*eda14cbcSMatt Macy if (dsl_dir_incompatible_encryption_version( 212*eda14cbcSMatt Macy dd)) { 213*eda14cbcSMatt Macy dp->dp_spa->spa_errata = 214*eda14cbcSMatt Macy ZPOOL_ERRATA_ZOL_6845_ENCRYPTION; 215*eda14cbcSMatt Macy } 216*eda14cbcSMatt Macy } else if (err != ENOENT) { 217*eda14cbcSMatt Macy goto errout; 218*eda14cbcSMatt Macy } 219*eda14cbcSMatt Macy } 220*eda14cbcSMatt Macy 221*eda14cbcSMatt Macy dsl_dir_snap_cmtime_update(dd); 222*eda14cbcSMatt Macy 223*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_parent_obj) { 224*eda14cbcSMatt Macy err = dsl_dir_hold_obj(dp, 225*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_parent_obj, NULL, dd, 226*eda14cbcSMatt Macy &dd->dd_parent); 227*eda14cbcSMatt Macy if (err != 0) 228*eda14cbcSMatt Macy goto errout; 229*eda14cbcSMatt Macy if (tail) { 230*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 231*eda14cbcSMatt Macy uint64_t foundobj; 232*eda14cbcSMatt Macy 233*eda14cbcSMatt Macy err = zap_lookup(dp->dp_meta_objset, 234*eda14cbcSMatt Macy dsl_dir_phys(dd->dd_parent)-> 235*eda14cbcSMatt Macy dd_child_dir_zapobj, tail, 236*eda14cbcSMatt Macy sizeof (foundobj), 1, &foundobj); 237*eda14cbcSMatt Macy ASSERT(err || foundobj == ddobj); 238*eda14cbcSMatt Macy #endif 239*eda14cbcSMatt Macy (void) strlcpy(dd->dd_myname, tail, 240*eda14cbcSMatt Macy sizeof (dd->dd_myname)); 241*eda14cbcSMatt Macy } else { 242*eda14cbcSMatt Macy err = zap_value_search(dp->dp_meta_objset, 243*eda14cbcSMatt Macy dsl_dir_phys(dd->dd_parent)-> 244*eda14cbcSMatt Macy dd_child_dir_zapobj, 245*eda14cbcSMatt Macy ddobj, 0, dd->dd_myname); 246*eda14cbcSMatt Macy } 247*eda14cbcSMatt Macy if (err != 0) 248*eda14cbcSMatt Macy goto errout; 249*eda14cbcSMatt Macy } else { 250*eda14cbcSMatt Macy (void) strlcpy(dd->dd_myname, spa_name(dp->dp_spa), 251*eda14cbcSMatt Macy sizeof (dd->dd_myname)); 252*eda14cbcSMatt Macy } 253*eda14cbcSMatt Macy 254*eda14cbcSMatt Macy if (dsl_dir_is_clone(dd)) { 255*eda14cbcSMatt Macy dmu_buf_t *origin_bonus; 256*eda14cbcSMatt Macy dsl_dataset_phys_t *origin_phys; 257*eda14cbcSMatt Macy 258*eda14cbcSMatt Macy /* 259*eda14cbcSMatt Macy * We can't open the origin dataset, because 260*eda14cbcSMatt Macy * that would require opening this dsl_dir. 261*eda14cbcSMatt Macy * Just look at its phys directly instead. 262*eda14cbcSMatt Macy */ 263*eda14cbcSMatt Macy err = dmu_bonus_hold(dp->dp_meta_objset, 264*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_origin_obj, FTAG, 265*eda14cbcSMatt Macy &origin_bonus); 266*eda14cbcSMatt Macy if (err != 0) 267*eda14cbcSMatt Macy goto errout; 268*eda14cbcSMatt Macy origin_phys = origin_bonus->db_data; 269*eda14cbcSMatt Macy dd->dd_origin_txg = 270*eda14cbcSMatt Macy origin_phys->ds_creation_txg; 271*eda14cbcSMatt Macy dmu_buf_rele(origin_bonus, FTAG); 272*eda14cbcSMatt Macy if (dsl_dir_is_zapified(dd)) { 273*eda14cbcSMatt Macy uint64_t obj; 274*eda14cbcSMatt Macy err = zap_lookup(dp->dp_meta_objset, 275*eda14cbcSMatt Macy dd->dd_object, DD_FIELD_LIVELIST, 276*eda14cbcSMatt Macy sizeof (uint64_t), 1, &obj); 277*eda14cbcSMatt Macy if (err == 0) 278*eda14cbcSMatt Macy dsl_dir_livelist_open(dd, obj); 279*eda14cbcSMatt Macy else if (err != ENOENT) 280*eda14cbcSMatt Macy goto errout; 281*eda14cbcSMatt Macy } 282*eda14cbcSMatt Macy } 283*eda14cbcSMatt Macy 284*eda14cbcSMatt Macy dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async, 285*eda14cbcSMatt Macy &dd->dd_dbuf); 286*eda14cbcSMatt Macy winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu); 287*eda14cbcSMatt Macy if (winner != NULL) { 288*eda14cbcSMatt Macy if (dd->dd_parent) 289*eda14cbcSMatt Macy dsl_dir_rele(dd->dd_parent, dd); 290*eda14cbcSMatt Macy if (dsl_deadlist_is_open(&dd->dd_livelist)) 291*eda14cbcSMatt Macy dsl_dir_livelist_close(dd); 292*eda14cbcSMatt Macy dsl_prop_fini(dd); 293*eda14cbcSMatt Macy cv_destroy(&dd->dd_activity_cv); 294*eda14cbcSMatt Macy mutex_destroy(&dd->dd_activity_lock); 295*eda14cbcSMatt Macy mutex_destroy(&dd->dd_lock); 296*eda14cbcSMatt Macy kmem_free(dd, sizeof (dsl_dir_t)); 297*eda14cbcSMatt Macy dd = winner; 298*eda14cbcSMatt Macy } else { 299*eda14cbcSMatt Macy spa_open_ref(dp->dp_spa, dd); 300*eda14cbcSMatt Macy } 301*eda14cbcSMatt Macy } 302*eda14cbcSMatt Macy 303*eda14cbcSMatt Macy /* 304*eda14cbcSMatt Macy * The dsl_dir_t has both open-to-close and instantiate-to-evict 305*eda14cbcSMatt Macy * holds on the spa. We need the open-to-close holds because 306*eda14cbcSMatt Macy * otherwise the spa_refcnt wouldn't change when we open a 307*eda14cbcSMatt Macy * dir which the spa also has open, so we could incorrectly 308*eda14cbcSMatt Macy * think it was OK to unload/export/destroy the pool. We need 309*eda14cbcSMatt Macy * the instantiate-to-evict hold because the dsl_dir_t has a 310*eda14cbcSMatt Macy * pointer to the dd_pool, which has a pointer to the spa_t. 311*eda14cbcSMatt Macy */ 312*eda14cbcSMatt Macy spa_open_ref(dp->dp_spa, tag); 313*eda14cbcSMatt Macy ASSERT3P(dd->dd_pool, ==, dp); 314*eda14cbcSMatt Macy ASSERT3U(dd->dd_object, ==, ddobj); 315*eda14cbcSMatt Macy ASSERT3P(dd->dd_dbuf, ==, dbuf); 316*eda14cbcSMatt Macy *ddp = dd; 317*eda14cbcSMatt Macy return (0); 318*eda14cbcSMatt Macy 319*eda14cbcSMatt Macy errout: 320*eda14cbcSMatt Macy if (dd->dd_parent) 321*eda14cbcSMatt Macy dsl_dir_rele(dd->dd_parent, dd); 322*eda14cbcSMatt Macy if (dsl_deadlist_is_open(&dd->dd_livelist)) 323*eda14cbcSMatt Macy dsl_dir_livelist_close(dd); 324*eda14cbcSMatt Macy dsl_prop_fini(dd); 325*eda14cbcSMatt Macy cv_destroy(&dd->dd_activity_cv); 326*eda14cbcSMatt Macy mutex_destroy(&dd->dd_activity_lock); 327*eda14cbcSMatt Macy mutex_destroy(&dd->dd_lock); 328*eda14cbcSMatt Macy kmem_free(dd, sizeof (dsl_dir_t)); 329*eda14cbcSMatt Macy dmu_buf_rele(dbuf, tag); 330*eda14cbcSMatt Macy return (err); 331*eda14cbcSMatt Macy } 332*eda14cbcSMatt Macy 333*eda14cbcSMatt Macy void 334*eda14cbcSMatt Macy dsl_dir_rele(dsl_dir_t *dd, void *tag) 335*eda14cbcSMatt Macy { 336*eda14cbcSMatt Macy dprintf_dd(dd, "%s\n", ""); 337*eda14cbcSMatt Macy spa_close(dd->dd_pool->dp_spa, tag); 338*eda14cbcSMatt Macy dmu_buf_rele(dd->dd_dbuf, tag); 339*eda14cbcSMatt Macy } 340*eda14cbcSMatt Macy 341*eda14cbcSMatt Macy /* 342*eda14cbcSMatt Macy * Remove a reference to the given dsl dir that is being asynchronously 343*eda14cbcSMatt Macy * released. Async releases occur from a taskq performing eviction of 344*eda14cbcSMatt Macy * dsl datasets and dirs. This process is identical to a normal release 345*eda14cbcSMatt Macy * with the exception of using the async API for releasing the reference on 346*eda14cbcSMatt Macy * the spa. 347*eda14cbcSMatt Macy */ 348*eda14cbcSMatt Macy void 349*eda14cbcSMatt Macy dsl_dir_async_rele(dsl_dir_t *dd, void *tag) 350*eda14cbcSMatt Macy { 351*eda14cbcSMatt Macy dprintf_dd(dd, "%s\n", ""); 352*eda14cbcSMatt Macy spa_async_close(dd->dd_pool->dp_spa, tag); 353*eda14cbcSMatt Macy dmu_buf_rele(dd->dd_dbuf, tag); 354*eda14cbcSMatt Macy } 355*eda14cbcSMatt Macy 356*eda14cbcSMatt Macy /* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */ 357*eda14cbcSMatt Macy void 358*eda14cbcSMatt Macy dsl_dir_name(dsl_dir_t *dd, char *buf) 359*eda14cbcSMatt Macy { 360*eda14cbcSMatt Macy if (dd->dd_parent) { 361*eda14cbcSMatt Macy dsl_dir_name(dd->dd_parent, buf); 362*eda14cbcSMatt Macy VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <, 363*eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN); 364*eda14cbcSMatt Macy } else { 365*eda14cbcSMatt Macy buf[0] = '\0'; 366*eda14cbcSMatt Macy } 367*eda14cbcSMatt Macy if (!MUTEX_HELD(&dd->dd_lock)) { 368*eda14cbcSMatt Macy /* 369*eda14cbcSMatt Macy * recursive mutex so that we can use 370*eda14cbcSMatt Macy * dprintf_dd() with dd_lock held 371*eda14cbcSMatt Macy */ 372*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 373*eda14cbcSMatt Macy VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), 374*eda14cbcSMatt Macy <, ZFS_MAX_DATASET_NAME_LEN); 375*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 376*eda14cbcSMatt Macy } else { 377*eda14cbcSMatt Macy VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN), 378*eda14cbcSMatt Macy <, ZFS_MAX_DATASET_NAME_LEN); 379*eda14cbcSMatt Macy } 380*eda14cbcSMatt Macy } 381*eda14cbcSMatt Macy 382*eda14cbcSMatt Macy /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */ 383*eda14cbcSMatt Macy int 384*eda14cbcSMatt Macy dsl_dir_namelen(dsl_dir_t *dd) 385*eda14cbcSMatt Macy { 386*eda14cbcSMatt Macy int result = 0; 387*eda14cbcSMatt Macy 388*eda14cbcSMatt Macy if (dd->dd_parent) { 389*eda14cbcSMatt Macy /* parent's name + 1 for the "/" */ 390*eda14cbcSMatt Macy result = dsl_dir_namelen(dd->dd_parent) + 1; 391*eda14cbcSMatt Macy } 392*eda14cbcSMatt Macy 393*eda14cbcSMatt Macy if (!MUTEX_HELD(&dd->dd_lock)) { 394*eda14cbcSMatt Macy /* see dsl_dir_name */ 395*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 396*eda14cbcSMatt Macy result += strlen(dd->dd_myname); 397*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 398*eda14cbcSMatt Macy } else { 399*eda14cbcSMatt Macy result += strlen(dd->dd_myname); 400*eda14cbcSMatt Macy } 401*eda14cbcSMatt Macy 402*eda14cbcSMatt Macy return (result); 403*eda14cbcSMatt Macy } 404*eda14cbcSMatt Macy 405*eda14cbcSMatt Macy static int 406*eda14cbcSMatt Macy getcomponent(const char *path, char *component, const char **nextp) 407*eda14cbcSMatt Macy { 408*eda14cbcSMatt Macy char *p; 409*eda14cbcSMatt Macy 410*eda14cbcSMatt Macy if ((path == NULL) || (path[0] == '\0')) 411*eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 412*eda14cbcSMatt Macy /* This would be a good place to reserve some namespace... */ 413*eda14cbcSMatt Macy p = strpbrk(path, "/@"); 414*eda14cbcSMatt Macy if (p && (p[1] == '/' || p[1] == '@')) { 415*eda14cbcSMatt Macy /* two separators in a row */ 416*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 417*eda14cbcSMatt Macy } 418*eda14cbcSMatt Macy if (p == NULL || p == path) { 419*eda14cbcSMatt Macy /* 420*eda14cbcSMatt Macy * if the first thing is an @ or /, it had better be an 421*eda14cbcSMatt Macy * @ and it had better not have any more ats or slashes, 422*eda14cbcSMatt Macy * and it had better have something after the @. 423*eda14cbcSMatt Macy */ 424*eda14cbcSMatt Macy if (p != NULL && 425*eda14cbcSMatt Macy (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 426*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 427*eda14cbcSMatt Macy if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) 428*eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 429*eda14cbcSMatt Macy (void) strlcpy(component, path, ZFS_MAX_DATASET_NAME_LEN); 430*eda14cbcSMatt Macy p = NULL; 431*eda14cbcSMatt Macy } else if (p[0] == '/') { 432*eda14cbcSMatt Macy if (p - path >= ZFS_MAX_DATASET_NAME_LEN) 433*eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 434*eda14cbcSMatt Macy (void) strncpy(component, path, p - path); 435*eda14cbcSMatt Macy component[p - path] = '\0'; 436*eda14cbcSMatt Macy p++; 437*eda14cbcSMatt Macy } else if (p[0] == '@') { 438*eda14cbcSMatt Macy /* 439*eda14cbcSMatt Macy * if the next separator is an @, there better not be 440*eda14cbcSMatt Macy * any more slashes. 441*eda14cbcSMatt Macy */ 442*eda14cbcSMatt Macy if (strchr(path, '/')) 443*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 444*eda14cbcSMatt Macy if (p - path >= ZFS_MAX_DATASET_NAME_LEN) 445*eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 446*eda14cbcSMatt Macy (void) strncpy(component, path, p - path); 447*eda14cbcSMatt Macy component[p - path] = '\0'; 448*eda14cbcSMatt Macy } else { 449*eda14cbcSMatt Macy panic("invalid p=%p", (void *)p); 450*eda14cbcSMatt Macy } 451*eda14cbcSMatt Macy *nextp = p; 452*eda14cbcSMatt Macy return (0); 453*eda14cbcSMatt Macy } 454*eda14cbcSMatt Macy 455*eda14cbcSMatt Macy /* 456*eda14cbcSMatt Macy * Return the dsl_dir_t, and possibly the last component which couldn't 457*eda14cbcSMatt Macy * be found in *tail. The name must be in the specified dsl_pool_t. This 458*eda14cbcSMatt Macy * thread must hold the dp_config_rwlock for the pool. Returns NULL if the 459*eda14cbcSMatt Macy * path is bogus, or if tail==NULL and we couldn't parse the whole name. 460*eda14cbcSMatt Macy * (*tail)[0] == '@' means that the last component is a snapshot. 461*eda14cbcSMatt Macy */ 462*eda14cbcSMatt Macy int 463*eda14cbcSMatt Macy dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag, 464*eda14cbcSMatt Macy dsl_dir_t **ddp, const char **tailp) 465*eda14cbcSMatt Macy { 466*eda14cbcSMatt Macy char *buf; 467*eda14cbcSMatt Macy const char *spaname, *next, *nextnext = NULL; 468*eda14cbcSMatt Macy int err; 469*eda14cbcSMatt Macy dsl_dir_t *dd; 470*eda14cbcSMatt Macy uint64_t ddobj; 471*eda14cbcSMatt Macy 472*eda14cbcSMatt Macy buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 473*eda14cbcSMatt Macy err = getcomponent(name, buf, &next); 474*eda14cbcSMatt Macy if (err != 0) 475*eda14cbcSMatt Macy goto error; 476*eda14cbcSMatt Macy 477*eda14cbcSMatt Macy /* Make sure the name is in the specified pool. */ 478*eda14cbcSMatt Macy spaname = spa_name(dp->dp_spa); 479*eda14cbcSMatt Macy if (strcmp(buf, spaname) != 0) { 480*eda14cbcSMatt Macy err = SET_ERROR(EXDEV); 481*eda14cbcSMatt Macy goto error; 482*eda14cbcSMatt Macy } 483*eda14cbcSMatt Macy 484*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp)); 485*eda14cbcSMatt Macy 486*eda14cbcSMatt Macy err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); 487*eda14cbcSMatt Macy if (err != 0) { 488*eda14cbcSMatt Macy goto error; 489*eda14cbcSMatt Macy } 490*eda14cbcSMatt Macy 491*eda14cbcSMatt Macy while (next != NULL) { 492*eda14cbcSMatt Macy dsl_dir_t *child_dd; 493*eda14cbcSMatt Macy err = getcomponent(next, buf, &nextnext); 494*eda14cbcSMatt Macy if (err != 0) 495*eda14cbcSMatt Macy break; 496*eda14cbcSMatt Macy ASSERT(next[0] != '\0'); 497*eda14cbcSMatt Macy if (next[0] == '@') 498*eda14cbcSMatt Macy break; 499*eda14cbcSMatt Macy dprintf("looking up %s in obj%lld\n", 500*eda14cbcSMatt Macy buf, dsl_dir_phys(dd)->dd_child_dir_zapobj); 501*eda14cbcSMatt Macy 502*eda14cbcSMatt Macy err = zap_lookup(dp->dp_meta_objset, 503*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_child_dir_zapobj, 504*eda14cbcSMatt Macy buf, sizeof (ddobj), 1, &ddobj); 505*eda14cbcSMatt Macy if (err != 0) { 506*eda14cbcSMatt Macy if (err == ENOENT) 507*eda14cbcSMatt Macy err = 0; 508*eda14cbcSMatt Macy break; 509*eda14cbcSMatt Macy } 510*eda14cbcSMatt Macy 511*eda14cbcSMatt Macy err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd); 512*eda14cbcSMatt Macy if (err != 0) 513*eda14cbcSMatt Macy break; 514*eda14cbcSMatt Macy dsl_dir_rele(dd, tag); 515*eda14cbcSMatt Macy dd = child_dd; 516*eda14cbcSMatt Macy next = nextnext; 517*eda14cbcSMatt Macy } 518*eda14cbcSMatt Macy 519*eda14cbcSMatt Macy if (err != 0) { 520*eda14cbcSMatt Macy dsl_dir_rele(dd, tag); 521*eda14cbcSMatt Macy goto error; 522*eda14cbcSMatt Macy } 523*eda14cbcSMatt Macy 524*eda14cbcSMatt Macy /* 525*eda14cbcSMatt Macy * It's an error if there's more than one component left, or 526*eda14cbcSMatt Macy * tailp==NULL and there's any component left. 527*eda14cbcSMatt Macy */ 528*eda14cbcSMatt Macy if (next != NULL && 529*eda14cbcSMatt Macy (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 530*eda14cbcSMatt Macy /* bad path name */ 531*eda14cbcSMatt Macy dsl_dir_rele(dd, tag); 532*eda14cbcSMatt Macy dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 533*eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 534*eda14cbcSMatt Macy } 535*eda14cbcSMatt Macy if (tailp != NULL) 536*eda14cbcSMatt Macy *tailp = next; 537*eda14cbcSMatt Macy if (err == 0) 538*eda14cbcSMatt Macy *ddp = dd; 539*eda14cbcSMatt Macy error: 540*eda14cbcSMatt Macy kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN); 541*eda14cbcSMatt Macy return (err); 542*eda14cbcSMatt Macy } 543*eda14cbcSMatt Macy 544*eda14cbcSMatt Macy /* 545*eda14cbcSMatt Macy * If the counts are already initialized for this filesystem and its 546*eda14cbcSMatt Macy * descendants then do nothing, otherwise initialize the counts. 547*eda14cbcSMatt Macy * 548*eda14cbcSMatt Macy * The counts on this filesystem, and those below, may be uninitialized due to 549*eda14cbcSMatt Macy * either the use of a pre-existing pool which did not support the 550*eda14cbcSMatt Macy * filesystem/snapshot limit feature, or one in which the feature had not yet 551*eda14cbcSMatt Macy * been enabled. 552*eda14cbcSMatt Macy * 553*eda14cbcSMatt Macy * Recursively descend the filesystem tree and update the filesystem/snapshot 554*eda14cbcSMatt Macy * counts on each filesystem below, then update the cumulative count on the 555*eda14cbcSMatt Macy * current filesystem. If the filesystem already has a count set on it, 556*eda14cbcSMatt Macy * then we know that its counts, and the counts on the filesystems below it, 557*eda14cbcSMatt Macy * are already correct, so we don't have to update this filesystem. 558*eda14cbcSMatt Macy */ 559*eda14cbcSMatt Macy static void 560*eda14cbcSMatt Macy dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx) 561*eda14cbcSMatt Macy { 562*eda14cbcSMatt Macy uint64_t my_fs_cnt = 0; 563*eda14cbcSMatt Macy uint64_t my_ss_cnt = 0; 564*eda14cbcSMatt Macy dsl_pool_t *dp = dd->dd_pool; 565*eda14cbcSMatt Macy objset_t *os = dp->dp_meta_objset; 566*eda14cbcSMatt Macy zap_cursor_t *zc; 567*eda14cbcSMatt Macy zap_attribute_t *za; 568*eda14cbcSMatt Macy dsl_dataset_t *ds; 569*eda14cbcSMatt Macy 570*eda14cbcSMatt Macy ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)); 571*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp)); 572*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 573*eda14cbcSMatt Macy 574*eda14cbcSMatt Macy dsl_dir_zapify(dd, tx); 575*eda14cbcSMatt Macy 576*eda14cbcSMatt Macy /* 577*eda14cbcSMatt Macy * If the filesystem count has already been initialized then we 578*eda14cbcSMatt Macy * don't need to recurse down any further. 579*eda14cbcSMatt Macy */ 580*eda14cbcSMatt Macy if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0) 581*eda14cbcSMatt Macy return; 582*eda14cbcSMatt Macy 583*eda14cbcSMatt Macy zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP); 584*eda14cbcSMatt Macy za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 585*eda14cbcSMatt Macy 586*eda14cbcSMatt Macy /* Iterate my child dirs */ 587*eda14cbcSMatt Macy for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj); 588*eda14cbcSMatt Macy zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) { 589*eda14cbcSMatt Macy dsl_dir_t *chld_dd; 590*eda14cbcSMatt Macy uint64_t count; 591*eda14cbcSMatt Macy 592*eda14cbcSMatt Macy VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG, 593*eda14cbcSMatt Macy &chld_dd)); 594*eda14cbcSMatt Macy 595*eda14cbcSMatt Macy /* 596*eda14cbcSMatt Macy * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and 597*eda14cbcSMatt Macy * temporary datasets. 598*eda14cbcSMatt Macy */ 599*eda14cbcSMatt Macy if (chld_dd->dd_myname[0] == '$' || 600*eda14cbcSMatt Macy chld_dd->dd_myname[0] == '%') { 601*eda14cbcSMatt Macy dsl_dir_rele(chld_dd, FTAG); 602*eda14cbcSMatt Macy continue; 603*eda14cbcSMatt Macy } 604*eda14cbcSMatt Macy 605*eda14cbcSMatt Macy my_fs_cnt++; /* count this child */ 606*eda14cbcSMatt Macy 607*eda14cbcSMatt Macy dsl_dir_init_fs_ss_count(chld_dd, tx); 608*eda14cbcSMatt Macy 609*eda14cbcSMatt Macy VERIFY0(zap_lookup(os, chld_dd->dd_object, 610*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count)); 611*eda14cbcSMatt Macy my_fs_cnt += count; 612*eda14cbcSMatt Macy VERIFY0(zap_lookup(os, chld_dd->dd_object, 613*eda14cbcSMatt Macy DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count)); 614*eda14cbcSMatt Macy my_ss_cnt += count; 615*eda14cbcSMatt Macy 616*eda14cbcSMatt Macy dsl_dir_rele(chld_dd, FTAG); 617*eda14cbcSMatt Macy } 618*eda14cbcSMatt Macy zap_cursor_fini(zc); 619*eda14cbcSMatt Macy /* Count my snapshots (we counted children's snapshots above) */ 620*eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, 621*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds)); 622*eda14cbcSMatt Macy 623*eda14cbcSMatt Macy for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj); 624*eda14cbcSMatt Macy zap_cursor_retrieve(zc, za) == 0; 625*eda14cbcSMatt Macy zap_cursor_advance(zc)) { 626*eda14cbcSMatt Macy /* Don't count temporary snapshots */ 627*eda14cbcSMatt Macy if (za->za_name[0] != '%') 628*eda14cbcSMatt Macy my_ss_cnt++; 629*eda14cbcSMatt Macy } 630*eda14cbcSMatt Macy zap_cursor_fini(zc); 631*eda14cbcSMatt Macy 632*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 633*eda14cbcSMatt Macy 634*eda14cbcSMatt Macy kmem_free(zc, sizeof (zap_cursor_t)); 635*eda14cbcSMatt Macy kmem_free(za, sizeof (zap_attribute_t)); 636*eda14cbcSMatt Macy 637*eda14cbcSMatt Macy /* we're in a sync task, update counts */ 638*eda14cbcSMatt Macy dmu_buf_will_dirty(dd->dd_dbuf, tx); 639*eda14cbcSMatt Macy VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 640*eda14cbcSMatt Macy sizeof (my_fs_cnt), 1, &my_fs_cnt, tx)); 641*eda14cbcSMatt Macy VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 642*eda14cbcSMatt Macy sizeof (my_ss_cnt), 1, &my_ss_cnt, tx)); 643*eda14cbcSMatt Macy } 644*eda14cbcSMatt Macy 645*eda14cbcSMatt Macy static int 646*eda14cbcSMatt Macy dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx) 647*eda14cbcSMatt Macy { 648*eda14cbcSMatt Macy char *ddname = (char *)arg; 649*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 650*eda14cbcSMatt Macy dsl_dataset_t *ds; 651*eda14cbcSMatt Macy dsl_dir_t *dd; 652*eda14cbcSMatt Macy int error; 653*eda14cbcSMatt Macy 654*eda14cbcSMatt Macy error = dsl_dataset_hold(dp, ddname, FTAG, &ds); 655*eda14cbcSMatt Macy if (error != 0) 656*eda14cbcSMatt Macy return (error); 657*eda14cbcSMatt Macy 658*eda14cbcSMatt Macy if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 659*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 660*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 661*eda14cbcSMatt Macy } 662*eda14cbcSMatt Macy 663*eda14cbcSMatt Macy dd = ds->ds_dir; 664*eda14cbcSMatt Macy if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) && 665*eda14cbcSMatt Macy dsl_dir_is_zapified(dd) && 666*eda14cbcSMatt Macy zap_contains(dp->dp_meta_objset, dd->dd_object, 667*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT) == 0) { 668*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 669*eda14cbcSMatt Macy return (SET_ERROR(EALREADY)); 670*eda14cbcSMatt Macy } 671*eda14cbcSMatt Macy 672*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 673*eda14cbcSMatt Macy return (0); 674*eda14cbcSMatt Macy } 675*eda14cbcSMatt Macy 676*eda14cbcSMatt Macy static void 677*eda14cbcSMatt Macy dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx) 678*eda14cbcSMatt Macy { 679*eda14cbcSMatt Macy char *ddname = (char *)arg; 680*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 681*eda14cbcSMatt Macy dsl_dataset_t *ds; 682*eda14cbcSMatt Macy spa_t *spa; 683*eda14cbcSMatt Macy 684*eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds)); 685*eda14cbcSMatt Macy 686*eda14cbcSMatt Macy spa = dsl_dataset_get_spa(ds); 687*eda14cbcSMatt Macy 688*eda14cbcSMatt Macy if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) { 689*eda14cbcSMatt Macy /* 690*eda14cbcSMatt Macy * Since the feature was not active and we're now setting a 691*eda14cbcSMatt Macy * limit, increment the feature-active counter so that the 692*eda14cbcSMatt Macy * feature becomes active for the first time. 693*eda14cbcSMatt Macy * 694*eda14cbcSMatt Macy * We are already in a sync task so we can update the MOS. 695*eda14cbcSMatt Macy */ 696*eda14cbcSMatt Macy spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx); 697*eda14cbcSMatt Macy } 698*eda14cbcSMatt Macy 699*eda14cbcSMatt Macy /* 700*eda14cbcSMatt Macy * Since we are now setting a non-UINT64_MAX limit on the filesystem, 701*eda14cbcSMatt Macy * we need to ensure the counts are correct. Descend down the tree from 702*eda14cbcSMatt Macy * this point and update all of the counts to be accurate. 703*eda14cbcSMatt Macy */ 704*eda14cbcSMatt Macy dsl_dir_init_fs_ss_count(ds->ds_dir, tx); 705*eda14cbcSMatt Macy 706*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 707*eda14cbcSMatt Macy } 708*eda14cbcSMatt Macy 709*eda14cbcSMatt Macy /* 710*eda14cbcSMatt Macy * Make sure the feature is enabled and activate it if necessary. 711*eda14cbcSMatt Macy * Since we're setting a limit, ensure the on-disk counts are valid. 712*eda14cbcSMatt Macy * This is only called by the ioctl path when setting a limit value. 713*eda14cbcSMatt Macy * 714*eda14cbcSMatt Macy * We do not need to validate the new limit, since users who can change the 715*eda14cbcSMatt Macy * limit are also allowed to exceed the limit. 716*eda14cbcSMatt Macy */ 717*eda14cbcSMatt Macy int 718*eda14cbcSMatt Macy dsl_dir_activate_fs_ss_limit(const char *ddname) 719*eda14cbcSMatt Macy { 720*eda14cbcSMatt Macy int error; 721*eda14cbcSMatt Macy 722*eda14cbcSMatt Macy error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check, 723*eda14cbcSMatt Macy dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0, 724*eda14cbcSMatt Macy ZFS_SPACE_CHECK_RESERVED); 725*eda14cbcSMatt Macy 726*eda14cbcSMatt Macy if (error == EALREADY) 727*eda14cbcSMatt Macy error = 0; 728*eda14cbcSMatt Macy 729*eda14cbcSMatt Macy return (error); 730*eda14cbcSMatt Macy } 731*eda14cbcSMatt Macy 732*eda14cbcSMatt Macy /* 733*eda14cbcSMatt Macy * Used to determine if the filesystem_limit or snapshot_limit should be 734*eda14cbcSMatt Macy * enforced. We allow the limit to be exceeded if the user has permission to 735*eda14cbcSMatt Macy * write the property value. We pass in the creds that we got in the open 736*eda14cbcSMatt Macy * context since we will always be the GZ root in syncing context. We also have 737*eda14cbcSMatt Macy * to handle the case where we are allowed to change the limit on the current 738*eda14cbcSMatt Macy * dataset, but there may be another limit in the tree above. 739*eda14cbcSMatt Macy * 740*eda14cbcSMatt Macy * We can never modify these two properties within a non-global zone. In 741*eda14cbcSMatt Macy * addition, the other checks are modeled on zfs_secpolicy_write_perms. We 742*eda14cbcSMatt Macy * can't use that function since we are already holding the dp_config_rwlock. 743*eda14cbcSMatt Macy * In addition, we already have the dd and dealing with snapshots is simplified 744*eda14cbcSMatt Macy * in this code. 745*eda14cbcSMatt Macy */ 746*eda14cbcSMatt Macy 747*eda14cbcSMatt Macy typedef enum { 748*eda14cbcSMatt Macy ENFORCE_ALWAYS, 749*eda14cbcSMatt Macy ENFORCE_NEVER, 750*eda14cbcSMatt Macy ENFORCE_ABOVE 751*eda14cbcSMatt Macy } enforce_res_t; 752*eda14cbcSMatt Macy 753*eda14cbcSMatt Macy static enforce_res_t 754*eda14cbcSMatt Macy dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, 755*eda14cbcSMatt Macy cred_t *cr, proc_t *proc) 756*eda14cbcSMatt Macy { 757*eda14cbcSMatt Macy enforce_res_t enforce = ENFORCE_ALWAYS; 758*eda14cbcSMatt Macy uint64_t obj; 759*eda14cbcSMatt Macy dsl_dataset_t *ds; 760*eda14cbcSMatt Macy uint64_t zoned; 761*eda14cbcSMatt Macy const char *zonedstr; 762*eda14cbcSMatt Macy 763*eda14cbcSMatt Macy ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || 764*eda14cbcSMatt Macy prop == ZFS_PROP_SNAPSHOT_LIMIT); 765*eda14cbcSMatt Macy 766*eda14cbcSMatt Macy #ifdef _KERNEL 767*eda14cbcSMatt Macy if (crgetzoneid(cr) != GLOBAL_ZONEID) 768*eda14cbcSMatt Macy return (ENFORCE_ALWAYS); 769*eda14cbcSMatt Macy 770*eda14cbcSMatt Macy /* 771*eda14cbcSMatt Macy * We are checking the saved credentials of the user process, which is 772*eda14cbcSMatt Macy * not the current process. Note that we can't use secpolicy_zfs(), 773*eda14cbcSMatt Macy * because it only works if the cred is that of the current process (on 774*eda14cbcSMatt Macy * Linux). 775*eda14cbcSMatt Macy */ 776*eda14cbcSMatt Macy if (secpolicy_zfs_proc(cr, proc) == 0) 777*eda14cbcSMatt Macy return (ENFORCE_NEVER); 778*eda14cbcSMatt Macy #endif 779*eda14cbcSMatt Macy 780*eda14cbcSMatt Macy if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0) 781*eda14cbcSMatt Macy return (ENFORCE_ALWAYS); 782*eda14cbcSMatt Macy 783*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dd->dd_pool)); 784*eda14cbcSMatt Macy 785*eda14cbcSMatt Macy if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0) 786*eda14cbcSMatt Macy return (ENFORCE_ALWAYS); 787*eda14cbcSMatt Macy 788*eda14cbcSMatt Macy zonedstr = zfs_prop_to_name(ZFS_PROP_ZONED); 789*eda14cbcSMatt Macy if (dsl_prop_get_ds(ds, zonedstr, 8, 1, &zoned, NULL) || zoned) { 790*eda14cbcSMatt Macy /* Only root can access zoned fs's from the GZ */ 791*eda14cbcSMatt Macy enforce = ENFORCE_ALWAYS; 792*eda14cbcSMatt Macy } else { 793*eda14cbcSMatt Macy if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0) 794*eda14cbcSMatt Macy enforce = ENFORCE_ABOVE; 795*eda14cbcSMatt Macy } 796*eda14cbcSMatt Macy 797*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 798*eda14cbcSMatt Macy return (enforce); 799*eda14cbcSMatt Macy } 800*eda14cbcSMatt Macy 801*eda14cbcSMatt Macy /* 802*eda14cbcSMatt Macy * Check if adding additional child filesystem(s) would exceed any filesystem 803*eda14cbcSMatt Macy * limits or adding additional snapshot(s) would exceed any snapshot limits. 804*eda14cbcSMatt Macy * The prop argument indicates which limit to check. 805*eda14cbcSMatt Macy * 806*eda14cbcSMatt Macy * Note that all filesystem limits up to the root (or the highest 807*eda14cbcSMatt Macy * initialized) filesystem or the given ancestor must be satisfied. 808*eda14cbcSMatt Macy */ 809*eda14cbcSMatt Macy int 810*eda14cbcSMatt Macy dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop, 811*eda14cbcSMatt Macy dsl_dir_t *ancestor, cred_t *cr, proc_t *proc) 812*eda14cbcSMatt Macy { 813*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 814*eda14cbcSMatt Macy uint64_t limit, count; 815*eda14cbcSMatt Macy char *count_prop; 816*eda14cbcSMatt Macy enforce_res_t enforce; 817*eda14cbcSMatt Macy int err = 0; 818*eda14cbcSMatt Macy 819*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dd->dd_pool)); 820*eda14cbcSMatt Macy ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT || 821*eda14cbcSMatt Macy prop == ZFS_PROP_SNAPSHOT_LIMIT); 822*eda14cbcSMatt Macy 823*eda14cbcSMatt Macy /* 824*eda14cbcSMatt Macy * If we're allowed to change the limit, don't enforce the limit 825*eda14cbcSMatt Macy * e.g. this can happen if a snapshot is taken by an administrative 826*eda14cbcSMatt Macy * user in the global zone (i.e. a recursive snapshot by root). 827*eda14cbcSMatt Macy * However, we must handle the case of delegated permissions where we 828*eda14cbcSMatt Macy * are allowed to change the limit on the current dataset, but there 829*eda14cbcSMatt Macy * is another limit in the tree above. 830*eda14cbcSMatt Macy */ 831*eda14cbcSMatt Macy enforce = dsl_enforce_ds_ss_limits(dd, prop, cr, proc); 832*eda14cbcSMatt Macy if (enforce == ENFORCE_NEVER) 833*eda14cbcSMatt Macy return (0); 834*eda14cbcSMatt Macy 835*eda14cbcSMatt Macy /* 836*eda14cbcSMatt Macy * e.g. if renaming a dataset with no snapshots, count adjustment 837*eda14cbcSMatt Macy * is 0. 838*eda14cbcSMatt Macy */ 839*eda14cbcSMatt Macy if (delta == 0) 840*eda14cbcSMatt Macy return (0); 841*eda14cbcSMatt Macy 842*eda14cbcSMatt Macy if (prop == ZFS_PROP_SNAPSHOT_LIMIT) { 843*eda14cbcSMatt Macy /* 844*eda14cbcSMatt Macy * We don't enforce the limit for temporary snapshots. This is 845*eda14cbcSMatt Macy * indicated by a NULL cred_t argument. 846*eda14cbcSMatt Macy */ 847*eda14cbcSMatt Macy if (cr == NULL) 848*eda14cbcSMatt Macy return (0); 849*eda14cbcSMatt Macy 850*eda14cbcSMatt Macy count_prop = DD_FIELD_SNAPSHOT_COUNT; 851*eda14cbcSMatt Macy } else { 852*eda14cbcSMatt Macy count_prop = DD_FIELD_FILESYSTEM_COUNT; 853*eda14cbcSMatt Macy } 854*eda14cbcSMatt Macy 855*eda14cbcSMatt Macy /* 856*eda14cbcSMatt Macy * If an ancestor has been provided, stop checking the limit once we 857*eda14cbcSMatt Macy * hit that dir. We need this during rename so that we don't overcount 858*eda14cbcSMatt Macy * the check once we recurse up to the common ancestor. 859*eda14cbcSMatt Macy */ 860*eda14cbcSMatt Macy if (ancestor == dd) 861*eda14cbcSMatt Macy return (0); 862*eda14cbcSMatt Macy 863*eda14cbcSMatt Macy /* 864*eda14cbcSMatt Macy * If we hit an uninitialized node while recursing up the tree, we can 865*eda14cbcSMatt Macy * stop since we know there is no limit here (or above). The counts are 866*eda14cbcSMatt Macy * not valid on this node and we know we won't touch this node's counts. 867*eda14cbcSMatt Macy */ 868*eda14cbcSMatt Macy if (!dsl_dir_is_zapified(dd)) 869*eda14cbcSMatt Macy return (0); 870*eda14cbcSMatt Macy err = zap_lookup(os, dd->dd_object, 871*eda14cbcSMatt Macy count_prop, sizeof (count), 1, &count); 872*eda14cbcSMatt Macy if (err == ENOENT) 873*eda14cbcSMatt Macy return (0); 874*eda14cbcSMatt Macy if (err != 0) 875*eda14cbcSMatt Macy return (err); 876*eda14cbcSMatt Macy 877*eda14cbcSMatt Macy err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL, 878*eda14cbcSMatt Macy B_FALSE); 879*eda14cbcSMatt Macy if (err != 0) 880*eda14cbcSMatt Macy return (err); 881*eda14cbcSMatt Macy 882*eda14cbcSMatt Macy /* Is there a limit which we've hit? */ 883*eda14cbcSMatt Macy if (enforce == ENFORCE_ALWAYS && (count + delta) > limit) 884*eda14cbcSMatt Macy return (SET_ERROR(EDQUOT)); 885*eda14cbcSMatt Macy 886*eda14cbcSMatt Macy if (dd->dd_parent != NULL) 887*eda14cbcSMatt Macy err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop, 888*eda14cbcSMatt Macy ancestor, cr, proc); 889*eda14cbcSMatt Macy 890*eda14cbcSMatt Macy return (err); 891*eda14cbcSMatt Macy } 892*eda14cbcSMatt Macy 893*eda14cbcSMatt Macy /* 894*eda14cbcSMatt Macy * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all 895*eda14cbcSMatt Macy * parents. When a new filesystem/snapshot is created, increment the count on 896*eda14cbcSMatt Macy * all parents, and when a filesystem/snapshot is destroyed, decrement the 897*eda14cbcSMatt Macy * count. 898*eda14cbcSMatt Macy */ 899*eda14cbcSMatt Macy void 900*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop, 901*eda14cbcSMatt Macy dmu_tx_t *tx) 902*eda14cbcSMatt Macy { 903*eda14cbcSMatt Macy int err; 904*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 905*eda14cbcSMatt Macy uint64_t count; 906*eda14cbcSMatt Macy 907*eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dd->dd_pool)); 908*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 909*eda14cbcSMatt Macy ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 || 910*eda14cbcSMatt Macy strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0); 911*eda14cbcSMatt Macy 912*eda14cbcSMatt Macy /* 913*eda14cbcSMatt Macy * When we receive an incremental stream into a filesystem that already 914*eda14cbcSMatt Macy * exists, a temporary clone is created. We don't count this temporary 915*eda14cbcSMatt Macy * clone, whose name begins with a '%'. We also ignore hidden ($FREE, 916*eda14cbcSMatt Macy * $MOS & $ORIGIN) objsets. 917*eda14cbcSMatt Macy */ 918*eda14cbcSMatt Macy if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') && 919*eda14cbcSMatt Macy strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0) 920*eda14cbcSMatt Macy return; 921*eda14cbcSMatt Macy 922*eda14cbcSMatt Macy /* 923*eda14cbcSMatt Macy * e.g. if renaming a dataset with no snapshots, count adjustment is 0 924*eda14cbcSMatt Macy */ 925*eda14cbcSMatt Macy if (delta == 0) 926*eda14cbcSMatt Macy return; 927*eda14cbcSMatt Macy 928*eda14cbcSMatt Macy /* 929*eda14cbcSMatt Macy * If we hit an uninitialized node while recursing up the tree, we can 930*eda14cbcSMatt Macy * stop since we know the counts are not valid on this node and we 931*eda14cbcSMatt Macy * know we shouldn't touch this node's counts. An uninitialized count 932*eda14cbcSMatt Macy * on the node indicates that either the feature has not yet been 933*eda14cbcSMatt Macy * activated or there are no limits on this part of the tree. 934*eda14cbcSMatt Macy */ 935*eda14cbcSMatt Macy if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object, 936*eda14cbcSMatt Macy prop, sizeof (count), 1, &count)) == ENOENT) 937*eda14cbcSMatt Macy return; 938*eda14cbcSMatt Macy VERIFY0(err); 939*eda14cbcSMatt Macy 940*eda14cbcSMatt Macy count += delta; 941*eda14cbcSMatt Macy /* Use a signed verify to make sure we're not neg. */ 942*eda14cbcSMatt Macy VERIFY3S(count, >=, 0); 943*eda14cbcSMatt Macy 944*eda14cbcSMatt Macy VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count, 945*eda14cbcSMatt Macy tx)); 946*eda14cbcSMatt Macy 947*eda14cbcSMatt Macy /* Roll up this additional count into our ancestors */ 948*eda14cbcSMatt Macy if (dd->dd_parent != NULL) 949*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx); 950*eda14cbcSMatt Macy } 951*eda14cbcSMatt Macy 952*eda14cbcSMatt Macy uint64_t 953*eda14cbcSMatt Macy dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name, 954*eda14cbcSMatt Macy dmu_tx_t *tx) 955*eda14cbcSMatt Macy { 956*eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset; 957*eda14cbcSMatt Macy uint64_t ddobj; 958*eda14cbcSMatt Macy dsl_dir_phys_t *ddphys; 959*eda14cbcSMatt Macy dmu_buf_t *dbuf; 960*eda14cbcSMatt Macy 961*eda14cbcSMatt Macy ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 962*eda14cbcSMatt Macy DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 963*eda14cbcSMatt Macy if (pds) { 964*eda14cbcSMatt Macy VERIFY0(zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj, 965*eda14cbcSMatt Macy name, sizeof (uint64_t), 1, &ddobj, tx)); 966*eda14cbcSMatt Macy } else { 967*eda14cbcSMatt Macy /* it's the root dir */ 968*eda14cbcSMatt Macy VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, 969*eda14cbcSMatt Macy DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx)); 970*eda14cbcSMatt Macy } 971*eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); 972*eda14cbcSMatt Macy dmu_buf_will_dirty(dbuf, tx); 973*eda14cbcSMatt Macy ddphys = dbuf->db_data; 974*eda14cbcSMatt Macy 975*eda14cbcSMatt Macy ddphys->dd_creation_time = gethrestime_sec(); 976*eda14cbcSMatt Macy if (pds) { 977*eda14cbcSMatt Macy ddphys->dd_parent_obj = pds->dd_object; 978*eda14cbcSMatt Macy 979*eda14cbcSMatt Macy /* update the filesystem counts */ 980*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx); 981*eda14cbcSMatt Macy } 982*eda14cbcSMatt Macy ddphys->dd_props_zapobj = zap_create(mos, 983*eda14cbcSMatt Macy DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 984*eda14cbcSMatt Macy ddphys->dd_child_dir_zapobj = zap_create(mos, 985*eda14cbcSMatt Macy DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 986*eda14cbcSMatt Macy if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN) 987*eda14cbcSMatt Macy ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN; 988*eda14cbcSMatt Macy 989*eda14cbcSMatt Macy dmu_buf_rele(dbuf, FTAG); 990*eda14cbcSMatt Macy 991*eda14cbcSMatt Macy return (ddobj); 992*eda14cbcSMatt Macy } 993*eda14cbcSMatt Macy 994*eda14cbcSMatt Macy boolean_t 995*eda14cbcSMatt Macy dsl_dir_is_clone(dsl_dir_t *dd) 996*eda14cbcSMatt Macy { 997*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_origin_obj && 998*eda14cbcSMatt Macy (dd->dd_pool->dp_origin_snap == NULL || 999*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_origin_obj != 1000*eda14cbcSMatt Macy dd->dd_pool->dp_origin_snap->ds_object)); 1001*eda14cbcSMatt Macy } 1002*eda14cbcSMatt Macy 1003*eda14cbcSMatt Macy uint64_t 1004*eda14cbcSMatt Macy dsl_dir_get_used(dsl_dir_t *dd) 1005*eda14cbcSMatt Macy { 1006*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_used_bytes); 1007*eda14cbcSMatt Macy } 1008*eda14cbcSMatt Macy 1009*eda14cbcSMatt Macy uint64_t 1010*eda14cbcSMatt Macy dsl_dir_get_compressed(dsl_dir_t *dd) 1011*eda14cbcSMatt Macy { 1012*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_compressed_bytes); 1013*eda14cbcSMatt Macy } 1014*eda14cbcSMatt Macy 1015*eda14cbcSMatt Macy uint64_t 1016*eda14cbcSMatt Macy dsl_dir_get_quota(dsl_dir_t *dd) 1017*eda14cbcSMatt Macy { 1018*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_quota); 1019*eda14cbcSMatt Macy } 1020*eda14cbcSMatt Macy 1021*eda14cbcSMatt Macy uint64_t 1022*eda14cbcSMatt Macy dsl_dir_get_reservation(dsl_dir_t *dd) 1023*eda14cbcSMatt Macy { 1024*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_reserved); 1025*eda14cbcSMatt Macy } 1026*eda14cbcSMatt Macy 1027*eda14cbcSMatt Macy uint64_t 1028*eda14cbcSMatt Macy dsl_dir_get_compressratio(dsl_dir_t *dd) 1029*eda14cbcSMatt Macy { 1030*eda14cbcSMatt Macy /* a fixed point number, 100x the ratio */ 1031*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 : 1032*eda14cbcSMatt Macy (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 / 1033*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_compressed_bytes)); 1034*eda14cbcSMatt Macy } 1035*eda14cbcSMatt Macy 1036*eda14cbcSMatt Macy uint64_t 1037*eda14cbcSMatt Macy dsl_dir_get_logicalused(dsl_dir_t *dd) 1038*eda14cbcSMatt Macy { 1039*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_uncompressed_bytes); 1040*eda14cbcSMatt Macy } 1041*eda14cbcSMatt Macy 1042*eda14cbcSMatt Macy uint64_t 1043*eda14cbcSMatt Macy dsl_dir_get_usedsnap(dsl_dir_t *dd) 1044*eda14cbcSMatt Macy { 1045*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]); 1046*eda14cbcSMatt Macy } 1047*eda14cbcSMatt Macy 1048*eda14cbcSMatt Macy uint64_t 1049*eda14cbcSMatt Macy dsl_dir_get_usedds(dsl_dir_t *dd) 1050*eda14cbcSMatt Macy { 1051*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]); 1052*eda14cbcSMatt Macy } 1053*eda14cbcSMatt Macy 1054*eda14cbcSMatt Macy uint64_t 1055*eda14cbcSMatt Macy dsl_dir_get_usedrefreserv(dsl_dir_t *dd) 1056*eda14cbcSMatt Macy { 1057*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]); 1058*eda14cbcSMatt Macy } 1059*eda14cbcSMatt Macy 1060*eda14cbcSMatt Macy uint64_t 1061*eda14cbcSMatt Macy dsl_dir_get_usedchild(dsl_dir_t *dd) 1062*eda14cbcSMatt Macy { 1063*eda14cbcSMatt Macy return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] + 1064*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]); 1065*eda14cbcSMatt Macy } 1066*eda14cbcSMatt Macy 1067*eda14cbcSMatt Macy void 1068*eda14cbcSMatt Macy dsl_dir_get_origin(dsl_dir_t *dd, char *buf) 1069*eda14cbcSMatt Macy { 1070*eda14cbcSMatt Macy dsl_dataset_t *ds; 1071*eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold_obj(dd->dd_pool, 1072*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds)); 1073*eda14cbcSMatt Macy 1074*eda14cbcSMatt Macy dsl_dataset_name(ds, buf); 1075*eda14cbcSMatt Macy 1076*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1077*eda14cbcSMatt Macy } 1078*eda14cbcSMatt Macy 1079*eda14cbcSMatt Macy int 1080*eda14cbcSMatt Macy dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count) 1081*eda14cbcSMatt Macy { 1082*eda14cbcSMatt Macy if (dsl_dir_is_zapified(dd)) { 1083*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 1084*eda14cbcSMatt Macy return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 1085*eda14cbcSMatt Macy sizeof (*count), 1, count)); 1086*eda14cbcSMatt Macy } else { 1087*eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 1088*eda14cbcSMatt Macy } 1089*eda14cbcSMatt Macy } 1090*eda14cbcSMatt Macy 1091*eda14cbcSMatt Macy int 1092*eda14cbcSMatt Macy dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count) 1093*eda14cbcSMatt Macy { 1094*eda14cbcSMatt Macy if (dsl_dir_is_zapified(dd)) { 1095*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 1096*eda14cbcSMatt Macy return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 1097*eda14cbcSMatt Macy sizeof (*count), 1, count)); 1098*eda14cbcSMatt Macy } else { 1099*eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 1100*eda14cbcSMatt Macy } 1101*eda14cbcSMatt Macy } 1102*eda14cbcSMatt Macy 1103*eda14cbcSMatt Macy void 1104*eda14cbcSMatt Macy dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv) 1105*eda14cbcSMatt Macy { 1106*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1107*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, 1108*eda14cbcSMatt Macy dsl_dir_get_quota(dd)); 1109*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION, 1110*eda14cbcSMatt Macy dsl_dir_get_reservation(dd)); 1111*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED, 1112*eda14cbcSMatt Macy dsl_dir_get_logicalused(dd)); 1113*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 1114*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP, 1115*eda14cbcSMatt Macy dsl_dir_get_usedsnap(dd)); 1116*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS, 1117*eda14cbcSMatt Macy dsl_dir_get_usedds(dd)); 1118*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV, 1119*eda14cbcSMatt Macy dsl_dir_get_usedrefreserv(dd)); 1120*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD, 1121*eda14cbcSMatt Macy dsl_dir_get_usedchild(dd)); 1122*eda14cbcSMatt Macy } 1123*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1124*eda14cbcSMatt Macy 1125*eda14cbcSMatt Macy uint64_t count; 1126*eda14cbcSMatt Macy if (dsl_dir_get_filesystem_count(dd, &count) == 0) { 1127*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT, 1128*eda14cbcSMatt Macy count); 1129*eda14cbcSMatt Macy } 1130*eda14cbcSMatt Macy if (dsl_dir_get_snapshot_count(dd, &count) == 0) { 1131*eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT, 1132*eda14cbcSMatt Macy count); 1133*eda14cbcSMatt Macy } 1134*eda14cbcSMatt Macy 1135*eda14cbcSMatt Macy if (dsl_dir_is_clone(dd)) { 1136*eda14cbcSMatt Macy char buf[ZFS_MAX_DATASET_NAME_LEN]; 1137*eda14cbcSMatt Macy dsl_dir_get_origin(dd, buf); 1138*eda14cbcSMatt Macy dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf); 1139*eda14cbcSMatt Macy } 1140*eda14cbcSMatt Macy 1141*eda14cbcSMatt Macy } 1142*eda14cbcSMatt Macy 1143*eda14cbcSMatt Macy void 1144*eda14cbcSMatt Macy dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 1145*eda14cbcSMatt Macy { 1146*eda14cbcSMatt Macy dsl_pool_t *dp = dd->dd_pool; 1147*eda14cbcSMatt Macy 1148*eda14cbcSMatt Macy ASSERT(dsl_dir_phys(dd)); 1149*eda14cbcSMatt Macy 1150*eda14cbcSMatt Macy if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) { 1151*eda14cbcSMatt Macy /* up the hold count until we can be written out */ 1152*eda14cbcSMatt Macy dmu_buf_add_ref(dd->dd_dbuf, dd); 1153*eda14cbcSMatt Macy } 1154*eda14cbcSMatt Macy } 1155*eda14cbcSMatt Macy 1156*eda14cbcSMatt Macy static int64_t 1157*eda14cbcSMatt Macy parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 1158*eda14cbcSMatt Macy { 1159*eda14cbcSMatt Macy uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved); 1160*eda14cbcSMatt Macy uint64_t new_accounted = 1161*eda14cbcSMatt Macy MAX(used + delta, dsl_dir_phys(dd)->dd_reserved); 1162*eda14cbcSMatt Macy return (new_accounted - old_accounted); 1163*eda14cbcSMatt Macy } 1164*eda14cbcSMatt Macy 1165*eda14cbcSMatt Macy void 1166*eda14cbcSMatt Macy dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 1167*eda14cbcSMatt Macy { 1168*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 1169*eda14cbcSMatt Macy 1170*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1171*eda14cbcSMatt Macy ASSERT0(dd->dd_tempreserved[tx->tx_txg & TXG_MASK]); 1172*eda14cbcSMatt Macy dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 1173*eda14cbcSMatt Macy dd->dd_space_towrite[tx->tx_txg & TXG_MASK] / 1024); 1174*eda14cbcSMatt Macy dd->dd_space_towrite[tx->tx_txg & TXG_MASK] = 0; 1175*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1176*eda14cbcSMatt Macy 1177*eda14cbcSMatt Macy /* release the hold from dsl_dir_dirty */ 1178*eda14cbcSMatt Macy dmu_buf_rele(dd->dd_dbuf, dd); 1179*eda14cbcSMatt Macy } 1180*eda14cbcSMatt Macy 1181*eda14cbcSMatt Macy static uint64_t 1182*eda14cbcSMatt Macy dsl_dir_space_towrite(dsl_dir_t *dd) 1183*eda14cbcSMatt Macy { 1184*eda14cbcSMatt Macy uint64_t space = 0; 1185*eda14cbcSMatt Macy 1186*eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&dd->dd_lock)); 1187*eda14cbcSMatt Macy 1188*eda14cbcSMatt Macy for (int i = 0; i < TXG_SIZE; i++) { 1189*eda14cbcSMatt Macy space += dd->dd_space_towrite[i & TXG_MASK]; 1190*eda14cbcSMatt Macy ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0); 1191*eda14cbcSMatt Macy } 1192*eda14cbcSMatt Macy return (space); 1193*eda14cbcSMatt Macy } 1194*eda14cbcSMatt Macy 1195*eda14cbcSMatt Macy /* 1196*eda14cbcSMatt Macy * How much space would dd have available if ancestor had delta applied 1197*eda14cbcSMatt Macy * to it? If ondiskonly is set, we're only interested in what's 1198*eda14cbcSMatt Macy * on-disk, not estimated pending changes. 1199*eda14cbcSMatt Macy */ 1200*eda14cbcSMatt Macy uint64_t 1201*eda14cbcSMatt Macy dsl_dir_space_available(dsl_dir_t *dd, 1202*eda14cbcSMatt Macy dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 1203*eda14cbcSMatt Macy { 1204*eda14cbcSMatt Macy uint64_t parentspace, myspace, quota, used; 1205*eda14cbcSMatt Macy 1206*eda14cbcSMatt Macy /* 1207*eda14cbcSMatt Macy * If there are no restrictions otherwise, assume we have 1208*eda14cbcSMatt Macy * unlimited space available. 1209*eda14cbcSMatt Macy */ 1210*eda14cbcSMatt Macy quota = UINT64_MAX; 1211*eda14cbcSMatt Macy parentspace = UINT64_MAX; 1212*eda14cbcSMatt Macy 1213*eda14cbcSMatt Macy if (dd->dd_parent != NULL) { 1214*eda14cbcSMatt Macy parentspace = dsl_dir_space_available(dd->dd_parent, 1215*eda14cbcSMatt Macy ancestor, delta, ondiskonly); 1216*eda14cbcSMatt Macy } 1217*eda14cbcSMatt Macy 1218*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1219*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_quota != 0) 1220*eda14cbcSMatt Macy quota = dsl_dir_phys(dd)->dd_quota; 1221*eda14cbcSMatt Macy used = dsl_dir_phys(dd)->dd_used_bytes; 1222*eda14cbcSMatt Macy if (!ondiskonly) 1223*eda14cbcSMatt Macy used += dsl_dir_space_towrite(dd); 1224*eda14cbcSMatt Macy 1225*eda14cbcSMatt Macy if (dd->dd_parent == NULL) { 1226*eda14cbcSMatt Macy uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, 1227*eda14cbcSMatt Macy ZFS_SPACE_CHECK_NORMAL); 1228*eda14cbcSMatt Macy quota = MIN(quota, poolsize); 1229*eda14cbcSMatt Macy } 1230*eda14cbcSMatt Macy 1231*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) { 1232*eda14cbcSMatt Macy /* 1233*eda14cbcSMatt Macy * We have some space reserved, in addition to what our 1234*eda14cbcSMatt Macy * parent gave us. 1235*eda14cbcSMatt Macy */ 1236*eda14cbcSMatt Macy parentspace += dsl_dir_phys(dd)->dd_reserved - used; 1237*eda14cbcSMatt Macy } 1238*eda14cbcSMatt Macy 1239*eda14cbcSMatt Macy if (dd == ancestor) { 1240*eda14cbcSMatt Macy ASSERT(delta <= 0); 1241*eda14cbcSMatt Macy ASSERT(used >= -delta); 1242*eda14cbcSMatt Macy used += delta; 1243*eda14cbcSMatt Macy if (parentspace != UINT64_MAX) 1244*eda14cbcSMatt Macy parentspace -= delta; 1245*eda14cbcSMatt Macy } 1246*eda14cbcSMatt Macy 1247*eda14cbcSMatt Macy if (used > quota) { 1248*eda14cbcSMatt Macy /* over quota */ 1249*eda14cbcSMatt Macy myspace = 0; 1250*eda14cbcSMatt Macy } else { 1251*eda14cbcSMatt Macy /* 1252*eda14cbcSMatt Macy * the lesser of the space provided by our parent and 1253*eda14cbcSMatt Macy * the space left in our quota 1254*eda14cbcSMatt Macy */ 1255*eda14cbcSMatt Macy myspace = MIN(parentspace, quota - used); 1256*eda14cbcSMatt Macy } 1257*eda14cbcSMatt Macy 1258*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1259*eda14cbcSMatt Macy 1260*eda14cbcSMatt Macy return (myspace); 1261*eda14cbcSMatt Macy } 1262*eda14cbcSMatt Macy 1263*eda14cbcSMatt Macy struct tempreserve { 1264*eda14cbcSMatt Macy list_node_t tr_node; 1265*eda14cbcSMatt Macy dsl_dir_t *tr_ds; 1266*eda14cbcSMatt Macy uint64_t tr_size; 1267*eda14cbcSMatt Macy }; 1268*eda14cbcSMatt Macy 1269*eda14cbcSMatt Macy static int 1270*eda14cbcSMatt Macy dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree, 1271*eda14cbcSMatt Macy boolean_t ignorequota, list_t *tr_list, 1272*eda14cbcSMatt Macy dmu_tx_t *tx, boolean_t first) 1273*eda14cbcSMatt Macy { 1274*eda14cbcSMatt Macy uint64_t txg; 1275*eda14cbcSMatt Macy uint64_t quota; 1276*eda14cbcSMatt Macy struct tempreserve *tr; 1277*eda14cbcSMatt Macy int retval; 1278*eda14cbcSMatt Macy uint64_t ref_rsrv; 1279*eda14cbcSMatt Macy 1280*eda14cbcSMatt Macy top_of_function: 1281*eda14cbcSMatt Macy txg = tx->tx_txg; 1282*eda14cbcSMatt Macy retval = EDQUOT; 1283*eda14cbcSMatt Macy ref_rsrv = 0; 1284*eda14cbcSMatt Macy 1285*eda14cbcSMatt Macy ASSERT3U(txg, !=, 0); 1286*eda14cbcSMatt Macy ASSERT3S(asize, >, 0); 1287*eda14cbcSMatt Macy 1288*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1289*eda14cbcSMatt Macy 1290*eda14cbcSMatt Macy /* 1291*eda14cbcSMatt Macy * Check against the dsl_dir's quota. We don't add in the delta 1292*eda14cbcSMatt Macy * when checking for over-quota because they get one free hit. 1293*eda14cbcSMatt Macy */ 1294*eda14cbcSMatt Macy uint64_t est_inflight = dsl_dir_space_towrite(dd); 1295*eda14cbcSMatt Macy for (int i = 0; i < TXG_SIZE; i++) 1296*eda14cbcSMatt Macy est_inflight += dd->dd_tempreserved[i]; 1297*eda14cbcSMatt Macy uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes; 1298*eda14cbcSMatt Macy 1299*eda14cbcSMatt Macy /* 1300*eda14cbcSMatt Macy * On the first iteration, fetch the dataset's used-on-disk and 1301*eda14cbcSMatt Macy * refreservation values. Also, if checkrefquota is set, test if 1302*eda14cbcSMatt Macy * allocating this space would exceed the dataset's refquota. 1303*eda14cbcSMatt Macy */ 1304*eda14cbcSMatt Macy if (first && tx->tx_objset) { 1305*eda14cbcSMatt Macy int error; 1306*eda14cbcSMatt Macy dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset; 1307*eda14cbcSMatt Macy 1308*eda14cbcSMatt Macy error = dsl_dataset_check_quota(ds, !netfree, 1309*eda14cbcSMatt Macy asize, est_inflight, &used_on_disk, &ref_rsrv); 1310*eda14cbcSMatt Macy if (error != 0) { 1311*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1312*eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_quota); 1313*eda14cbcSMatt Macy return (error); 1314*eda14cbcSMatt Macy } 1315*eda14cbcSMatt Macy } 1316*eda14cbcSMatt Macy 1317*eda14cbcSMatt Macy /* 1318*eda14cbcSMatt Macy * If this transaction will result in a net free of space, 1319*eda14cbcSMatt Macy * we want to let it through. 1320*eda14cbcSMatt Macy */ 1321*eda14cbcSMatt Macy if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0) 1322*eda14cbcSMatt Macy quota = UINT64_MAX; 1323*eda14cbcSMatt Macy else 1324*eda14cbcSMatt Macy quota = dsl_dir_phys(dd)->dd_quota; 1325*eda14cbcSMatt Macy 1326*eda14cbcSMatt Macy /* 1327*eda14cbcSMatt Macy * Adjust the quota against the actual pool size at the root 1328*eda14cbcSMatt Macy * minus any outstanding deferred frees. 1329*eda14cbcSMatt Macy * To ensure that it's possible to remove files from a full 1330*eda14cbcSMatt Macy * pool without inducing transient overcommits, we throttle 1331*eda14cbcSMatt Macy * netfree transactions against a quota that is slightly larger, 1332*eda14cbcSMatt Macy * but still within the pool's allocation slop. In cases where 1333*eda14cbcSMatt Macy * we're very close to full, this will allow a steady trickle of 1334*eda14cbcSMatt Macy * removes to get through. 1335*eda14cbcSMatt Macy */ 1336*eda14cbcSMatt Macy uint64_t deferred = 0; 1337*eda14cbcSMatt Macy if (dd->dd_parent == NULL) { 1338*eda14cbcSMatt Macy uint64_t avail = dsl_pool_unreserved_space(dd->dd_pool, 1339*eda14cbcSMatt Macy (netfree) ? 1340*eda14cbcSMatt Macy ZFS_SPACE_CHECK_RESERVED : ZFS_SPACE_CHECK_NORMAL); 1341*eda14cbcSMatt Macy 1342*eda14cbcSMatt Macy if (avail < quota) { 1343*eda14cbcSMatt Macy quota = avail; 1344*eda14cbcSMatt Macy retval = SET_ERROR(ENOSPC); 1345*eda14cbcSMatt Macy } 1346*eda14cbcSMatt Macy } 1347*eda14cbcSMatt Macy 1348*eda14cbcSMatt Macy /* 1349*eda14cbcSMatt Macy * If they are requesting more space, and our current estimate 1350*eda14cbcSMatt Macy * is over quota, they get to try again unless the actual 1351*eda14cbcSMatt Macy * on-disk is over quota and there are no pending changes (which 1352*eda14cbcSMatt Macy * may free up space for us). 1353*eda14cbcSMatt Macy */ 1354*eda14cbcSMatt Macy if (used_on_disk + est_inflight >= quota) { 1355*eda14cbcSMatt Macy if (est_inflight > 0 || used_on_disk < quota || 1356*eda14cbcSMatt Macy (retval == ENOSPC && used_on_disk < quota + deferred)) 1357*eda14cbcSMatt Macy retval = ERESTART; 1358*eda14cbcSMatt Macy dprintf_dd(dd, "failing: used=%lluK inflight = %lluK " 1359*eda14cbcSMatt Macy "quota=%lluK tr=%lluK err=%d\n", 1360*eda14cbcSMatt Macy used_on_disk>>10, est_inflight>>10, 1361*eda14cbcSMatt Macy quota>>10, asize>>10, retval); 1362*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1363*eda14cbcSMatt Macy DMU_TX_STAT_BUMP(dmu_tx_quota); 1364*eda14cbcSMatt Macy return (SET_ERROR(retval)); 1365*eda14cbcSMatt Macy } 1366*eda14cbcSMatt Macy 1367*eda14cbcSMatt Macy /* We need to up our estimated delta before dropping dd_lock */ 1368*eda14cbcSMatt Macy dd->dd_tempreserved[txg & TXG_MASK] += asize; 1369*eda14cbcSMatt Macy 1370*eda14cbcSMatt Macy uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight, 1371*eda14cbcSMatt Macy asize - ref_rsrv); 1372*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1373*eda14cbcSMatt Macy 1374*eda14cbcSMatt Macy tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 1375*eda14cbcSMatt Macy tr->tr_ds = dd; 1376*eda14cbcSMatt Macy tr->tr_size = asize; 1377*eda14cbcSMatt Macy list_insert_tail(tr_list, tr); 1378*eda14cbcSMatt Macy 1379*eda14cbcSMatt Macy /* see if it's OK with our parent */ 1380*eda14cbcSMatt Macy if (dd->dd_parent != NULL && parent_rsrv != 0) { 1381*eda14cbcSMatt Macy /* 1382*eda14cbcSMatt Macy * Recurse on our parent without recursion. This has been 1383*eda14cbcSMatt Macy * observed to be potentially large stack usage even within 1384*eda14cbcSMatt Macy * the test suite. Largest seen stack was 7632 bytes on linux. 1385*eda14cbcSMatt Macy */ 1386*eda14cbcSMatt Macy 1387*eda14cbcSMatt Macy dd = dd->dd_parent; 1388*eda14cbcSMatt Macy asize = parent_rsrv; 1389*eda14cbcSMatt Macy ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0); 1390*eda14cbcSMatt Macy first = B_FALSE; 1391*eda14cbcSMatt Macy goto top_of_function; 1392*eda14cbcSMatt Macy 1393*eda14cbcSMatt Macy } else { 1394*eda14cbcSMatt Macy return (0); 1395*eda14cbcSMatt Macy } 1396*eda14cbcSMatt Macy } 1397*eda14cbcSMatt Macy 1398*eda14cbcSMatt Macy /* 1399*eda14cbcSMatt Macy * Reserve space in this dsl_dir, to be used in this tx's txg. 1400*eda14cbcSMatt Macy * After the space has been dirtied (and dsl_dir_willuse_space() 1401*eda14cbcSMatt Macy * has been called), the reservation should be canceled, using 1402*eda14cbcSMatt Macy * dsl_dir_tempreserve_clear(). 1403*eda14cbcSMatt Macy */ 1404*eda14cbcSMatt Macy int 1405*eda14cbcSMatt Macy dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize, 1406*eda14cbcSMatt Macy boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx) 1407*eda14cbcSMatt Macy { 1408*eda14cbcSMatt Macy int err; 1409*eda14cbcSMatt Macy list_t *tr_list; 1410*eda14cbcSMatt Macy 1411*eda14cbcSMatt Macy if (asize == 0) { 1412*eda14cbcSMatt Macy *tr_cookiep = NULL; 1413*eda14cbcSMatt Macy return (0); 1414*eda14cbcSMatt Macy } 1415*eda14cbcSMatt Macy 1416*eda14cbcSMatt Macy tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 1417*eda14cbcSMatt Macy list_create(tr_list, sizeof (struct tempreserve), 1418*eda14cbcSMatt Macy offsetof(struct tempreserve, tr_node)); 1419*eda14cbcSMatt Macy ASSERT3S(asize, >, 0); 1420*eda14cbcSMatt Macy 1421*eda14cbcSMatt Macy err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg); 1422*eda14cbcSMatt Macy if (err == 0) { 1423*eda14cbcSMatt Macy struct tempreserve *tr; 1424*eda14cbcSMatt Macy 1425*eda14cbcSMatt Macy tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP); 1426*eda14cbcSMatt Macy tr->tr_size = lsize; 1427*eda14cbcSMatt Macy list_insert_tail(tr_list, tr); 1428*eda14cbcSMatt Macy } else { 1429*eda14cbcSMatt Macy if (err == EAGAIN) { 1430*eda14cbcSMatt Macy /* 1431*eda14cbcSMatt Macy * If arc_memory_throttle() detected that pageout 1432*eda14cbcSMatt Macy * is running and we are low on memory, we delay new 1433*eda14cbcSMatt Macy * non-pageout transactions to give pageout an 1434*eda14cbcSMatt Macy * advantage. 1435*eda14cbcSMatt Macy * 1436*eda14cbcSMatt Macy * It is unfortunate to be delaying while the caller's 1437*eda14cbcSMatt Macy * locks are held. 1438*eda14cbcSMatt Macy */ 1439*eda14cbcSMatt Macy txg_delay(dd->dd_pool, tx->tx_txg, 1440*eda14cbcSMatt Macy MSEC2NSEC(10), MSEC2NSEC(10)); 1441*eda14cbcSMatt Macy err = SET_ERROR(ERESTART); 1442*eda14cbcSMatt Macy } 1443*eda14cbcSMatt Macy } 1444*eda14cbcSMatt Macy 1445*eda14cbcSMatt Macy if (err == 0) { 1446*eda14cbcSMatt Macy err = dsl_dir_tempreserve_impl(dd, asize, netfree, 1447*eda14cbcSMatt Macy B_FALSE, tr_list, tx, B_TRUE); 1448*eda14cbcSMatt Macy } 1449*eda14cbcSMatt Macy 1450*eda14cbcSMatt Macy if (err != 0) 1451*eda14cbcSMatt Macy dsl_dir_tempreserve_clear(tr_list, tx); 1452*eda14cbcSMatt Macy else 1453*eda14cbcSMatt Macy *tr_cookiep = tr_list; 1454*eda14cbcSMatt Macy 1455*eda14cbcSMatt Macy return (err); 1456*eda14cbcSMatt Macy } 1457*eda14cbcSMatt Macy 1458*eda14cbcSMatt Macy /* 1459*eda14cbcSMatt Macy * Clear a temporary reservation that we previously made with 1460*eda14cbcSMatt Macy * dsl_dir_tempreserve_space(). 1461*eda14cbcSMatt Macy */ 1462*eda14cbcSMatt Macy void 1463*eda14cbcSMatt Macy dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 1464*eda14cbcSMatt Macy { 1465*eda14cbcSMatt Macy int txgidx = tx->tx_txg & TXG_MASK; 1466*eda14cbcSMatt Macy list_t *tr_list = tr_cookie; 1467*eda14cbcSMatt Macy struct tempreserve *tr; 1468*eda14cbcSMatt Macy 1469*eda14cbcSMatt Macy ASSERT3U(tx->tx_txg, !=, 0); 1470*eda14cbcSMatt Macy 1471*eda14cbcSMatt Macy if (tr_cookie == NULL) 1472*eda14cbcSMatt Macy return; 1473*eda14cbcSMatt Macy 1474*eda14cbcSMatt Macy while ((tr = list_head(tr_list)) != NULL) { 1475*eda14cbcSMatt Macy if (tr->tr_ds) { 1476*eda14cbcSMatt Macy mutex_enter(&tr->tr_ds->dd_lock); 1477*eda14cbcSMatt Macy ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 1478*eda14cbcSMatt Macy tr->tr_size); 1479*eda14cbcSMatt Macy tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 1480*eda14cbcSMatt Macy mutex_exit(&tr->tr_ds->dd_lock); 1481*eda14cbcSMatt Macy } else { 1482*eda14cbcSMatt Macy arc_tempreserve_clear(tr->tr_size); 1483*eda14cbcSMatt Macy } 1484*eda14cbcSMatt Macy list_remove(tr_list, tr); 1485*eda14cbcSMatt Macy kmem_free(tr, sizeof (struct tempreserve)); 1486*eda14cbcSMatt Macy } 1487*eda14cbcSMatt Macy 1488*eda14cbcSMatt Macy kmem_free(tr_list, sizeof (list_t)); 1489*eda14cbcSMatt Macy } 1490*eda14cbcSMatt Macy 1491*eda14cbcSMatt Macy /* 1492*eda14cbcSMatt Macy * This should be called from open context when we think we're going to write 1493*eda14cbcSMatt Macy * or free space, for example when dirtying data. Be conservative; it's okay 1494*eda14cbcSMatt Macy * to write less space or free more, but we don't want to write more or free 1495*eda14cbcSMatt Macy * less than the amount specified. 1496*eda14cbcSMatt Macy * 1497*eda14cbcSMatt Macy * NOTE: The behavior of this function is identical to the Illumos / FreeBSD 1498*eda14cbcSMatt Macy * version however it has been adjusted to use an iterative rather than 1499*eda14cbcSMatt Macy * recursive algorithm to minimize stack usage. 1500*eda14cbcSMatt Macy */ 1501*eda14cbcSMatt Macy void 1502*eda14cbcSMatt Macy dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 1503*eda14cbcSMatt Macy { 1504*eda14cbcSMatt Macy int64_t parent_space; 1505*eda14cbcSMatt Macy uint64_t est_used; 1506*eda14cbcSMatt Macy 1507*eda14cbcSMatt Macy do { 1508*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1509*eda14cbcSMatt Macy if (space > 0) 1510*eda14cbcSMatt Macy dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 1511*eda14cbcSMatt Macy 1512*eda14cbcSMatt Macy est_used = dsl_dir_space_towrite(dd) + 1513*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_bytes; 1514*eda14cbcSMatt Macy parent_space = parent_delta(dd, est_used, space); 1515*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1516*eda14cbcSMatt Macy 1517*eda14cbcSMatt Macy /* Make sure that we clean up dd_space_to* */ 1518*eda14cbcSMatt Macy dsl_dir_dirty(dd, tx); 1519*eda14cbcSMatt Macy 1520*eda14cbcSMatt Macy dd = dd->dd_parent; 1521*eda14cbcSMatt Macy space = parent_space; 1522*eda14cbcSMatt Macy } while (space && dd); 1523*eda14cbcSMatt Macy } 1524*eda14cbcSMatt Macy 1525*eda14cbcSMatt Macy /* call from syncing context when we actually write/free space for this dd */ 1526*eda14cbcSMatt Macy void 1527*eda14cbcSMatt Macy dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type, 1528*eda14cbcSMatt Macy int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 1529*eda14cbcSMatt Macy { 1530*eda14cbcSMatt Macy int64_t accounted_delta; 1531*eda14cbcSMatt Macy 1532*eda14cbcSMatt Macy /* 1533*eda14cbcSMatt Macy * dsl_dataset_set_refreservation_sync_impl() calls this with 1534*eda14cbcSMatt Macy * dd_lock held, so that it can atomically update 1535*eda14cbcSMatt Macy * ds->ds_reserved and the dsl_dir accounting, so that 1536*eda14cbcSMatt Macy * dsl_dataset_check_quota() can see dataset and dir accounting 1537*eda14cbcSMatt Macy * consistently. 1538*eda14cbcSMatt Macy */ 1539*eda14cbcSMatt Macy boolean_t needlock = !MUTEX_HELD(&dd->dd_lock); 1540*eda14cbcSMatt Macy 1541*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 1542*eda14cbcSMatt Macy ASSERT(type < DD_USED_NUM); 1543*eda14cbcSMatt Macy 1544*eda14cbcSMatt Macy dmu_buf_will_dirty(dd->dd_dbuf, tx); 1545*eda14cbcSMatt Macy 1546*eda14cbcSMatt Macy if (needlock) 1547*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1548*eda14cbcSMatt Macy accounted_delta = 1549*eda14cbcSMatt Macy parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used); 1550*eda14cbcSMatt Macy ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used); 1551*eda14cbcSMatt Macy ASSERT(compressed >= 0 || 1552*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed); 1553*eda14cbcSMatt Macy ASSERT(uncompressed >= 0 || 1554*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed); 1555*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_bytes += used; 1556*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed; 1557*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_compressed_bytes += compressed; 1558*eda14cbcSMatt Macy 1559*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 1560*eda14cbcSMatt Macy ASSERT(used > 0 || 1561*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used); 1562*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[type] += used; 1563*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 1564*eda14cbcSMatt Macy { 1565*eda14cbcSMatt Macy dd_used_t t; 1566*eda14cbcSMatt Macy uint64_t u = 0; 1567*eda14cbcSMatt Macy for (t = 0; t < DD_USED_NUM; t++) 1568*eda14cbcSMatt Macy u += dsl_dir_phys(dd)->dd_used_breakdown[t]; 1569*eda14cbcSMatt Macy ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes); 1570*eda14cbcSMatt Macy } 1571*eda14cbcSMatt Macy #endif 1572*eda14cbcSMatt Macy } 1573*eda14cbcSMatt Macy if (needlock) 1574*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1575*eda14cbcSMatt Macy 1576*eda14cbcSMatt Macy if (dd->dd_parent != NULL) { 1577*eda14cbcSMatt Macy dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 1578*eda14cbcSMatt Macy accounted_delta, compressed, uncompressed, tx); 1579*eda14cbcSMatt Macy dsl_dir_transfer_space(dd->dd_parent, 1580*eda14cbcSMatt Macy used - accounted_delta, 1581*eda14cbcSMatt Macy DD_USED_CHILD_RSRV, DD_USED_CHILD, tx); 1582*eda14cbcSMatt Macy } 1583*eda14cbcSMatt Macy } 1584*eda14cbcSMatt Macy 1585*eda14cbcSMatt Macy void 1586*eda14cbcSMatt Macy dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta, 1587*eda14cbcSMatt Macy dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx) 1588*eda14cbcSMatt Macy { 1589*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 1590*eda14cbcSMatt Macy ASSERT(oldtype < DD_USED_NUM); 1591*eda14cbcSMatt Macy ASSERT(newtype < DD_USED_NUM); 1592*eda14cbcSMatt Macy 1593*eda14cbcSMatt Macy if (delta == 0 || 1594*eda14cbcSMatt Macy !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN)) 1595*eda14cbcSMatt Macy return; 1596*eda14cbcSMatt Macy 1597*eda14cbcSMatt Macy dmu_buf_will_dirty(dd->dd_dbuf, tx); 1598*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1599*eda14cbcSMatt Macy ASSERT(delta > 0 ? 1600*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta : 1601*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta); 1602*eda14cbcSMatt Macy ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta)); 1603*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta; 1604*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta; 1605*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1606*eda14cbcSMatt Macy } 1607*eda14cbcSMatt Macy 1608*eda14cbcSMatt Macy typedef struct dsl_dir_set_qr_arg { 1609*eda14cbcSMatt Macy const char *ddsqra_name; 1610*eda14cbcSMatt Macy zprop_source_t ddsqra_source; 1611*eda14cbcSMatt Macy uint64_t ddsqra_value; 1612*eda14cbcSMatt Macy } dsl_dir_set_qr_arg_t; 1613*eda14cbcSMatt Macy 1614*eda14cbcSMatt Macy static int 1615*eda14cbcSMatt Macy dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx) 1616*eda14cbcSMatt Macy { 1617*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t *ddsqra = arg; 1618*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 1619*eda14cbcSMatt Macy dsl_dataset_t *ds; 1620*eda14cbcSMatt Macy int error; 1621*eda14cbcSMatt Macy uint64_t towrite, newval; 1622*eda14cbcSMatt Macy 1623*eda14cbcSMatt Macy error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 1624*eda14cbcSMatt Macy if (error != 0) 1625*eda14cbcSMatt Macy return (error); 1626*eda14cbcSMatt Macy 1627*eda14cbcSMatt Macy error = dsl_prop_predict(ds->ds_dir, "quota", 1628*eda14cbcSMatt Macy ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 1629*eda14cbcSMatt Macy if (error != 0) { 1630*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1631*eda14cbcSMatt Macy return (error); 1632*eda14cbcSMatt Macy } 1633*eda14cbcSMatt Macy 1634*eda14cbcSMatt Macy if (newval == 0) { 1635*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1636*eda14cbcSMatt Macy return (0); 1637*eda14cbcSMatt Macy } 1638*eda14cbcSMatt Macy 1639*eda14cbcSMatt Macy mutex_enter(&ds->ds_dir->dd_lock); 1640*eda14cbcSMatt Macy /* 1641*eda14cbcSMatt Macy * If we are doing the preliminary check in open context, and 1642*eda14cbcSMatt Macy * there are pending changes, then don't fail it, since the 1643*eda14cbcSMatt Macy * pending changes could under-estimate the amount of space to be 1644*eda14cbcSMatt Macy * freed up. 1645*eda14cbcSMatt Macy */ 1646*eda14cbcSMatt Macy towrite = dsl_dir_space_towrite(ds->ds_dir); 1647*eda14cbcSMatt Macy if ((dmu_tx_is_syncing(tx) || towrite == 0) && 1648*eda14cbcSMatt Macy (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved || 1649*eda14cbcSMatt Macy newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) { 1650*eda14cbcSMatt Macy error = SET_ERROR(ENOSPC); 1651*eda14cbcSMatt Macy } 1652*eda14cbcSMatt Macy mutex_exit(&ds->ds_dir->dd_lock); 1653*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1654*eda14cbcSMatt Macy return (error); 1655*eda14cbcSMatt Macy } 1656*eda14cbcSMatt Macy 1657*eda14cbcSMatt Macy static void 1658*eda14cbcSMatt Macy dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx) 1659*eda14cbcSMatt Macy { 1660*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t *ddsqra = arg; 1661*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 1662*eda14cbcSMatt Macy dsl_dataset_t *ds; 1663*eda14cbcSMatt Macy uint64_t newval; 1664*eda14cbcSMatt Macy 1665*eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 1666*eda14cbcSMatt Macy 1667*eda14cbcSMatt Macy if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { 1668*eda14cbcSMatt Macy dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA), 1669*eda14cbcSMatt Macy ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 1670*eda14cbcSMatt Macy &ddsqra->ddsqra_value, tx); 1671*eda14cbcSMatt Macy 1672*eda14cbcSMatt Macy VERIFY0(dsl_prop_get_int_ds(ds, 1673*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_QUOTA), &newval)); 1674*eda14cbcSMatt Macy } else { 1675*eda14cbcSMatt Macy newval = ddsqra->ddsqra_value; 1676*eda14cbcSMatt Macy spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", 1677*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval); 1678*eda14cbcSMatt Macy } 1679*eda14cbcSMatt Macy 1680*eda14cbcSMatt Macy dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1681*eda14cbcSMatt Macy mutex_enter(&ds->ds_dir->dd_lock); 1682*eda14cbcSMatt Macy dsl_dir_phys(ds->ds_dir)->dd_quota = newval; 1683*eda14cbcSMatt Macy mutex_exit(&ds->ds_dir->dd_lock); 1684*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1685*eda14cbcSMatt Macy } 1686*eda14cbcSMatt Macy 1687*eda14cbcSMatt Macy int 1688*eda14cbcSMatt Macy dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota) 1689*eda14cbcSMatt Macy { 1690*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t ddsqra; 1691*eda14cbcSMatt Macy 1692*eda14cbcSMatt Macy ddsqra.ddsqra_name = ddname; 1693*eda14cbcSMatt Macy ddsqra.ddsqra_source = source; 1694*eda14cbcSMatt Macy ddsqra.ddsqra_value = quota; 1695*eda14cbcSMatt Macy 1696*eda14cbcSMatt Macy return (dsl_sync_task(ddname, dsl_dir_set_quota_check, 1697*eda14cbcSMatt Macy dsl_dir_set_quota_sync, &ddsqra, 0, 1698*eda14cbcSMatt Macy ZFS_SPACE_CHECK_EXTRA_RESERVED)); 1699*eda14cbcSMatt Macy } 1700*eda14cbcSMatt Macy 1701*eda14cbcSMatt Macy static int 1702*eda14cbcSMatt Macy dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx) 1703*eda14cbcSMatt Macy { 1704*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t *ddsqra = arg; 1705*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 1706*eda14cbcSMatt Macy dsl_dataset_t *ds; 1707*eda14cbcSMatt Macy dsl_dir_t *dd; 1708*eda14cbcSMatt Macy uint64_t newval, used, avail; 1709*eda14cbcSMatt Macy int error; 1710*eda14cbcSMatt Macy 1711*eda14cbcSMatt Macy error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 1712*eda14cbcSMatt Macy if (error != 0) 1713*eda14cbcSMatt Macy return (error); 1714*eda14cbcSMatt Macy dd = ds->ds_dir; 1715*eda14cbcSMatt Macy 1716*eda14cbcSMatt Macy /* 1717*eda14cbcSMatt Macy * If we are doing the preliminary check in open context, the 1718*eda14cbcSMatt Macy * space estimates may be inaccurate. 1719*eda14cbcSMatt Macy */ 1720*eda14cbcSMatt Macy if (!dmu_tx_is_syncing(tx)) { 1721*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1722*eda14cbcSMatt Macy return (0); 1723*eda14cbcSMatt Macy } 1724*eda14cbcSMatt Macy 1725*eda14cbcSMatt Macy error = dsl_prop_predict(ds->ds_dir, 1726*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_RESERVATION), 1727*eda14cbcSMatt Macy ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 1728*eda14cbcSMatt Macy if (error != 0) { 1729*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1730*eda14cbcSMatt Macy return (error); 1731*eda14cbcSMatt Macy } 1732*eda14cbcSMatt Macy 1733*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1734*eda14cbcSMatt Macy used = dsl_dir_phys(dd)->dd_used_bytes; 1735*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1736*eda14cbcSMatt Macy 1737*eda14cbcSMatt Macy if (dd->dd_parent) { 1738*eda14cbcSMatt Macy avail = dsl_dir_space_available(dd->dd_parent, 1739*eda14cbcSMatt Macy NULL, 0, FALSE); 1740*eda14cbcSMatt Macy } else { 1741*eda14cbcSMatt Macy avail = dsl_pool_adjustedsize(dd->dd_pool, 1742*eda14cbcSMatt Macy ZFS_SPACE_CHECK_NORMAL) - used; 1743*eda14cbcSMatt Macy } 1744*eda14cbcSMatt Macy 1745*eda14cbcSMatt Macy if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) { 1746*eda14cbcSMatt Macy uint64_t delta = MAX(used, newval) - 1747*eda14cbcSMatt Macy MAX(used, dsl_dir_phys(dd)->dd_reserved); 1748*eda14cbcSMatt Macy 1749*eda14cbcSMatt Macy if (delta > avail || 1750*eda14cbcSMatt Macy (dsl_dir_phys(dd)->dd_quota > 0 && 1751*eda14cbcSMatt Macy newval > dsl_dir_phys(dd)->dd_quota)) 1752*eda14cbcSMatt Macy error = SET_ERROR(ENOSPC); 1753*eda14cbcSMatt Macy } 1754*eda14cbcSMatt Macy 1755*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1756*eda14cbcSMatt Macy return (error); 1757*eda14cbcSMatt Macy } 1758*eda14cbcSMatt Macy 1759*eda14cbcSMatt Macy void 1760*eda14cbcSMatt Macy dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx) 1761*eda14cbcSMatt Macy { 1762*eda14cbcSMatt Macy uint64_t used; 1763*eda14cbcSMatt Macy int64_t delta; 1764*eda14cbcSMatt Macy 1765*eda14cbcSMatt Macy dmu_buf_will_dirty(dd->dd_dbuf, tx); 1766*eda14cbcSMatt Macy 1767*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1768*eda14cbcSMatt Macy used = dsl_dir_phys(dd)->dd_used_bytes; 1769*eda14cbcSMatt Macy delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved); 1770*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_reserved = value; 1771*eda14cbcSMatt Macy 1772*eda14cbcSMatt Macy if (dd->dd_parent != NULL) { 1773*eda14cbcSMatt Macy /* Roll up this additional usage into our ancestors */ 1774*eda14cbcSMatt Macy dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 1775*eda14cbcSMatt Macy delta, 0, 0, tx); 1776*eda14cbcSMatt Macy } 1777*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1778*eda14cbcSMatt Macy } 1779*eda14cbcSMatt Macy 1780*eda14cbcSMatt Macy static void 1781*eda14cbcSMatt Macy dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx) 1782*eda14cbcSMatt Macy { 1783*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t *ddsqra = arg; 1784*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 1785*eda14cbcSMatt Macy dsl_dataset_t *ds; 1786*eda14cbcSMatt Macy uint64_t newval; 1787*eda14cbcSMatt Macy 1788*eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 1789*eda14cbcSMatt Macy 1790*eda14cbcSMatt Macy if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) { 1791*eda14cbcSMatt Macy dsl_prop_set_sync_impl(ds, 1792*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_RESERVATION), 1793*eda14cbcSMatt Macy ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 1794*eda14cbcSMatt Macy &ddsqra->ddsqra_value, tx); 1795*eda14cbcSMatt Macy 1796*eda14cbcSMatt Macy VERIFY0(dsl_prop_get_int_ds(ds, 1797*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval)); 1798*eda14cbcSMatt Macy } else { 1799*eda14cbcSMatt Macy newval = ddsqra->ddsqra_value; 1800*eda14cbcSMatt Macy spa_history_log_internal_ds(ds, "set", tx, "%s=%lld", 1801*eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_RESERVATION), 1802*eda14cbcSMatt Macy (longlong_t)newval); 1803*eda14cbcSMatt Macy } 1804*eda14cbcSMatt Macy 1805*eda14cbcSMatt Macy dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx); 1806*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 1807*eda14cbcSMatt Macy } 1808*eda14cbcSMatt Macy 1809*eda14cbcSMatt Macy int 1810*eda14cbcSMatt Macy dsl_dir_set_reservation(const char *ddname, zprop_source_t source, 1811*eda14cbcSMatt Macy uint64_t reservation) 1812*eda14cbcSMatt Macy { 1813*eda14cbcSMatt Macy dsl_dir_set_qr_arg_t ddsqra; 1814*eda14cbcSMatt Macy 1815*eda14cbcSMatt Macy ddsqra.ddsqra_name = ddname; 1816*eda14cbcSMatt Macy ddsqra.ddsqra_source = source; 1817*eda14cbcSMatt Macy ddsqra.ddsqra_value = reservation; 1818*eda14cbcSMatt Macy 1819*eda14cbcSMatt Macy return (dsl_sync_task(ddname, dsl_dir_set_reservation_check, 1820*eda14cbcSMatt Macy dsl_dir_set_reservation_sync, &ddsqra, 0, 1821*eda14cbcSMatt Macy ZFS_SPACE_CHECK_EXTRA_RESERVED)); 1822*eda14cbcSMatt Macy } 1823*eda14cbcSMatt Macy 1824*eda14cbcSMatt Macy static dsl_dir_t * 1825*eda14cbcSMatt Macy closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1826*eda14cbcSMatt Macy { 1827*eda14cbcSMatt Macy for (; ds1; ds1 = ds1->dd_parent) { 1828*eda14cbcSMatt Macy dsl_dir_t *dd; 1829*eda14cbcSMatt Macy for (dd = ds2; dd; dd = dd->dd_parent) { 1830*eda14cbcSMatt Macy if (ds1 == dd) 1831*eda14cbcSMatt Macy return (dd); 1832*eda14cbcSMatt Macy } 1833*eda14cbcSMatt Macy } 1834*eda14cbcSMatt Macy return (NULL); 1835*eda14cbcSMatt Macy } 1836*eda14cbcSMatt Macy 1837*eda14cbcSMatt Macy /* 1838*eda14cbcSMatt Macy * If delta is applied to dd, how much of that delta would be applied to 1839*eda14cbcSMatt Macy * ancestor? Syncing context only. 1840*eda14cbcSMatt Macy */ 1841*eda14cbcSMatt Macy static int64_t 1842*eda14cbcSMatt Macy would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1843*eda14cbcSMatt Macy { 1844*eda14cbcSMatt Macy if (dd == ancestor) 1845*eda14cbcSMatt Macy return (delta); 1846*eda14cbcSMatt Macy 1847*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 1848*eda14cbcSMatt Macy delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta); 1849*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 1850*eda14cbcSMatt Macy return (would_change(dd->dd_parent, delta, ancestor)); 1851*eda14cbcSMatt Macy } 1852*eda14cbcSMatt Macy 1853*eda14cbcSMatt Macy typedef struct dsl_dir_rename_arg { 1854*eda14cbcSMatt Macy const char *ddra_oldname; 1855*eda14cbcSMatt Macy const char *ddra_newname; 1856*eda14cbcSMatt Macy cred_t *ddra_cred; 1857*eda14cbcSMatt Macy proc_t *ddra_proc; 1858*eda14cbcSMatt Macy } dsl_dir_rename_arg_t; 1859*eda14cbcSMatt Macy 1860*eda14cbcSMatt Macy typedef struct dsl_valid_rename_arg { 1861*eda14cbcSMatt Macy int char_delta; 1862*eda14cbcSMatt Macy int nest_delta; 1863*eda14cbcSMatt Macy } dsl_valid_rename_arg_t; 1864*eda14cbcSMatt Macy 1865*eda14cbcSMatt Macy /* ARGSUSED */ 1866*eda14cbcSMatt Macy static int 1867*eda14cbcSMatt Macy dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) 1868*eda14cbcSMatt Macy { 1869*eda14cbcSMatt Macy dsl_valid_rename_arg_t *dvra = arg; 1870*eda14cbcSMatt Macy char namebuf[ZFS_MAX_DATASET_NAME_LEN]; 1871*eda14cbcSMatt Macy 1872*eda14cbcSMatt Macy dsl_dataset_name(ds, namebuf); 1873*eda14cbcSMatt Macy 1874*eda14cbcSMatt Macy ASSERT3U(strnlen(namebuf, ZFS_MAX_DATASET_NAME_LEN), 1875*eda14cbcSMatt Macy <, ZFS_MAX_DATASET_NAME_LEN); 1876*eda14cbcSMatt Macy int namelen = strlen(namebuf) + dvra->char_delta; 1877*eda14cbcSMatt Macy int depth = get_dataset_depth(namebuf) + dvra->nest_delta; 1878*eda14cbcSMatt Macy 1879*eda14cbcSMatt Macy if (namelen >= ZFS_MAX_DATASET_NAME_LEN) 1880*eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 1881*eda14cbcSMatt Macy if (dvra->nest_delta > 0 && depth >= zfs_max_dataset_nesting) 1882*eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 1883*eda14cbcSMatt Macy return (0); 1884*eda14cbcSMatt Macy } 1885*eda14cbcSMatt Macy 1886*eda14cbcSMatt Macy static int 1887*eda14cbcSMatt Macy dsl_dir_rename_check(void *arg, dmu_tx_t *tx) 1888*eda14cbcSMatt Macy { 1889*eda14cbcSMatt Macy dsl_dir_rename_arg_t *ddra = arg; 1890*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 1891*eda14cbcSMatt Macy dsl_dir_t *dd, *newparent; 1892*eda14cbcSMatt Macy dsl_valid_rename_arg_t dvra; 1893*eda14cbcSMatt Macy dsl_dataset_t *parentds; 1894*eda14cbcSMatt Macy objset_t *parentos; 1895*eda14cbcSMatt Macy const char *mynewname; 1896*eda14cbcSMatt Macy int error; 1897*eda14cbcSMatt Macy 1898*eda14cbcSMatt Macy /* target dir should exist */ 1899*eda14cbcSMatt Macy error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL); 1900*eda14cbcSMatt Macy if (error != 0) 1901*eda14cbcSMatt Macy return (error); 1902*eda14cbcSMatt Macy 1903*eda14cbcSMatt Macy /* new parent should exist */ 1904*eda14cbcSMatt Macy error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG, 1905*eda14cbcSMatt Macy &newparent, &mynewname); 1906*eda14cbcSMatt Macy if (error != 0) { 1907*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1908*eda14cbcSMatt Macy return (error); 1909*eda14cbcSMatt Macy } 1910*eda14cbcSMatt Macy 1911*eda14cbcSMatt Macy /* can't rename to different pool */ 1912*eda14cbcSMatt Macy if (dd->dd_pool != newparent->dd_pool) { 1913*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1914*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1915*eda14cbcSMatt Macy return (SET_ERROR(EXDEV)); 1916*eda14cbcSMatt Macy } 1917*eda14cbcSMatt Macy 1918*eda14cbcSMatt Macy /* new name should not already exist */ 1919*eda14cbcSMatt Macy if (mynewname == NULL) { 1920*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1921*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1922*eda14cbcSMatt Macy return (SET_ERROR(EEXIST)); 1923*eda14cbcSMatt Macy } 1924*eda14cbcSMatt Macy 1925*eda14cbcSMatt Macy /* can't rename below anything but filesystems (eg. no ZVOLs) */ 1926*eda14cbcSMatt Macy error = dsl_dataset_hold_obj(newparent->dd_pool, 1927*eda14cbcSMatt Macy dsl_dir_phys(newparent)->dd_head_dataset_obj, FTAG, &parentds); 1928*eda14cbcSMatt Macy if (error != 0) { 1929*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1930*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1931*eda14cbcSMatt Macy return (error); 1932*eda14cbcSMatt Macy } 1933*eda14cbcSMatt Macy error = dmu_objset_from_ds(parentds, &parentos); 1934*eda14cbcSMatt Macy if (error != 0) { 1935*eda14cbcSMatt Macy dsl_dataset_rele(parentds, FTAG); 1936*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1937*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1938*eda14cbcSMatt Macy return (error); 1939*eda14cbcSMatt Macy } 1940*eda14cbcSMatt Macy if (dmu_objset_type(parentos) != DMU_OST_ZFS) { 1941*eda14cbcSMatt Macy dsl_dataset_rele(parentds, FTAG); 1942*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1943*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1944*eda14cbcSMatt Macy return (SET_ERROR(ZFS_ERR_WRONG_PARENT)); 1945*eda14cbcSMatt Macy } 1946*eda14cbcSMatt Macy dsl_dataset_rele(parentds, FTAG); 1947*eda14cbcSMatt Macy 1948*eda14cbcSMatt Macy ASSERT3U(strnlen(ddra->ddra_newname, ZFS_MAX_DATASET_NAME_LEN), 1949*eda14cbcSMatt Macy <, ZFS_MAX_DATASET_NAME_LEN); 1950*eda14cbcSMatt Macy ASSERT3U(strnlen(ddra->ddra_oldname, ZFS_MAX_DATASET_NAME_LEN), 1951*eda14cbcSMatt Macy <, ZFS_MAX_DATASET_NAME_LEN); 1952*eda14cbcSMatt Macy dvra.char_delta = strlen(ddra->ddra_newname) 1953*eda14cbcSMatt Macy - strlen(ddra->ddra_oldname); 1954*eda14cbcSMatt Macy dvra.nest_delta = get_dataset_depth(ddra->ddra_newname) 1955*eda14cbcSMatt Macy - get_dataset_depth(ddra->ddra_oldname); 1956*eda14cbcSMatt Macy 1957*eda14cbcSMatt Macy /* if the name length is growing, validate child name lengths */ 1958*eda14cbcSMatt Macy if (dvra.char_delta > 0 || dvra.nest_delta > 0) { 1959*eda14cbcSMatt Macy error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename, 1960*eda14cbcSMatt Macy &dvra, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 1961*eda14cbcSMatt Macy if (error != 0) { 1962*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 1963*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 1964*eda14cbcSMatt Macy return (error); 1965*eda14cbcSMatt Macy } 1966*eda14cbcSMatt Macy } 1967*eda14cbcSMatt Macy 1968*eda14cbcSMatt Macy if (dmu_tx_is_syncing(tx)) { 1969*eda14cbcSMatt Macy if (spa_feature_is_active(dp->dp_spa, 1970*eda14cbcSMatt Macy SPA_FEATURE_FS_SS_LIMIT)) { 1971*eda14cbcSMatt Macy /* 1972*eda14cbcSMatt Macy * Although this is the check function and we don't 1973*eda14cbcSMatt Macy * normally make on-disk changes in check functions, 1974*eda14cbcSMatt Macy * we need to do that here. 1975*eda14cbcSMatt Macy * 1976*eda14cbcSMatt Macy * Ensure this portion of the tree's counts have been 1977*eda14cbcSMatt Macy * initialized in case the new parent has limits set. 1978*eda14cbcSMatt Macy */ 1979*eda14cbcSMatt Macy dsl_dir_init_fs_ss_count(dd, tx); 1980*eda14cbcSMatt Macy } 1981*eda14cbcSMatt Macy } 1982*eda14cbcSMatt Macy 1983*eda14cbcSMatt Macy if (newparent != dd->dd_parent) { 1984*eda14cbcSMatt Macy /* is there enough space? */ 1985*eda14cbcSMatt Macy uint64_t myspace = 1986*eda14cbcSMatt Macy MAX(dsl_dir_phys(dd)->dd_used_bytes, 1987*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_reserved); 1988*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 1989*eda14cbcSMatt Macy uint64_t fs_cnt = 0; 1990*eda14cbcSMatt Macy uint64_t ss_cnt = 0; 1991*eda14cbcSMatt Macy 1992*eda14cbcSMatt Macy if (dsl_dir_is_zapified(dd)) { 1993*eda14cbcSMatt Macy int err; 1994*eda14cbcSMatt Macy 1995*eda14cbcSMatt Macy err = zap_lookup(os, dd->dd_object, 1996*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, 1997*eda14cbcSMatt Macy &fs_cnt); 1998*eda14cbcSMatt Macy if (err != ENOENT && err != 0) { 1999*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2000*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2001*eda14cbcSMatt Macy return (err); 2002*eda14cbcSMatt Macy } 2003*eda14cbcSMatt Macy 2004*eda14cbcSMatt Macy /* 2005*eda14cbcSMatt Macy * have to add 1 for the filesystem itself that we're 2006*eda14cbcSMatt Macy * moving 2007*eda14cbcSMatt Macy */ 2008*eda14cbcSMatt Macy fs_cnt++; 2009*eda14cbcSMatt Macy 2010*eda14cbcSMatt Macy err = zap_lookup(os, dd->dd_object, 2011*eda14cbcSMatt Macy DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, 2012*eda14cbcSMatt Macy &ss_cnt); 2013*eda14cbcSMatt Macy if (err != ENOENT && err != 0) { 2014*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2015*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2016*eda14cbcSMatt Macy return (err); 2017*eda14cbcSMatt Macy } 2018*eda14cbcSMatt Macy } 2019*eda14cbcSMatt Macy 2020*eda14cbcSMatt Macy /* check for encryption errors */ 2021*eda14cbcSMatt Macy error = dsl_dir_rename_crypt_check(dd, newparent); 2022*eda14cbcSMatt Macy if (error != 0) { 2023*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2024*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2025*eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 2026*eda14cbcSMatt Macy } 2027*eda14cbcSMatt Macy 2028*eda14cbcSMatt Macy /* no rename into our descendant */ 2029*eda14cbcSMatt Macy if (closest_common_ancestor(dd, newparent) == dd) { 2030*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2031*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2032*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 2033*eda14cbcSMatt Macy } 2034*eda14cbcSMatt Macy 2035*eda14cbcSMatt Macy error = dsl_dir_transfer_possible(dd->dd_parent, 2036*eda14cbcSMatt Macy newparent, fs_cnt, ss_cnt, myspace, 2037*eda14cbcSMatt Macy ddra->ddra_cred, ddra->ddra_proc); 2038*eda14cbcSMatt Macy if (error != 0) { 2039*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2040*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2041*eda14cbcSMatt Macy return (error); 2042*eda14cbcSMatt Macy } 2043*eda14cbcSMatt Macy } 2044*eda14cbcSMatt Macy 2045*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2046*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2047*eda14cbcSMatt Macy return (0); 2048*eda14cbcSMatt Macy } 2049*eda14cbcSMatt Macy 2050*eda14cbcSMatt Macy static void 2051*eda14cbcSMatt Macy dsl_dir_rename_sync(void *arg, dmu_tx_t *tx) 2052*eda14cbcSMatt Macy { 2053*eda14cbcSMatt Macy dsl_dir_rename_arg_t *ddra = arg; 2054*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 2055*eda14cbcSMatt Macy dsl_dir_t *dd, *newparent; 2056*eda14cbcSMatt Macy const char *mynewname; 2057*eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset; 2058*eda14cbcSMatt Macy 2059*eda14cbcSMatt Macy VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL)); 2060*eda14cbcSMatt Macy VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent, 2061*eda14cbcSMatt Macy &mynewname)); 2062*eda14cbcSMatt Macy 2063*eda14cbcSMatt Macy /* Log this before we change the name. */ 2064*eda14cbcSMatt Macy spa_history_log_internal_dd(dd, "rename", tx, 2065*eda14cbcSMatt Macy "-> %s", ddra->ddra_newname); 2066*eda14cbcSMatt Macy 2067*eda14cbcSMatt Macy if (newparent != dd->dd_parent) { 2068*eda14cbcSMatt Macy objset_t *os = dd->dd_pool->dp_meta_objset; 2069*eda14cbcSMatt Macy uint64_t fs_cnt = 0; 2070*eda14cbcSMatt Macy uint64_t ss_cnt = 0; 2071*eda14cbcSMatt Macy 2072*eda14cbcSMatt Macy /* 2073*eda14cbcSMatt Macy * We already made sure the dd counts were initialized in the 2074*eda14cbcSMatt Macy * check function. 2075*eda14cbcSMatt Macy */ 2076*eda14cbcSMatt Macy if (spa_feature_is_active(dp->dp_spa, 2077*eda14cbcSMatt Macy SPA_FEATURE_FS_SS_LIMIT)) { 2078*eda14cbcSMatt Macy VERIFY0(zap_lookup(os, dd->dd_object, 2079*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1, 2080*eda14cbcSMatt Macy &fs_cnt)); 2081*eda14cbcSMatt Macy /* add 1 for the filesystem itself that we're moving */ 2082*eda14cbcSMatt Macy fs_cnt++; 2083*eda14cbcSMatt Macy 2084*eda14cbcSMatt Macy VERIFY0(zap_lookup(os, dd->dd_object, 2085*eda14cbcSMatt Macy DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1, 2086*eda14cbcSMatt Macy &ss_cnt)); 2087*eda14cbcSMatt Macy } 2088*eda14cbcSMatt Macy 2089*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt, 2090*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT, tx); 2091*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(newparent, fs_cnt, 2092*eda14cbcSMatt Macy DD_FIELD_FILESYSTEM_COUNT, tx); 2093*eda14cbcSMatt Macy 2094*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt, 2095*eda14cbcSMatt Macy DD_FIELD_SNAPSHOT_COUNT, tx); 2096*eda14cbcSMatt Macy dsl_fs_ss_count_adjust(newparent, ss_cnt, 2097*eda14cbcSMatt Macy DD_FIELD_SNAPSHOT_COUNT, tx); 2098*eda14cbcSMatt Macy 2099*eda14cbcSMatt Macy dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD, 2100*eda14cbcSMatt Macy -dsl_dir_phys(dd)->dd_used_bytes, 2101*eda14cbcSMatt Macy -dsl_dir_phys(dd)->dd_compressed_bytes, 2102*eda14cbcSMatt Macy -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); 2103*eda14cbcSMatt Macy dsl_dir_diduse_space(newparent, DD_USED_CHILD, 2104*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_bytes, 2105*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_compressed_bytes, 2106*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_uncompressed_bytes, tx); 2107*eda14cbcSMatt Macy 2108*eda14cbcSMatt Macy if (dsl_dir_phys(dd)->dd_reserved > 2109*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_bytes) { 2110*eda14cbcSMatt Macy uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved - 2111*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_used_bytes; 2112*eda14cbcSMatt Macy 2113*eda14cbcSMatt Macy dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV, 2114*eda14cbcSMatt Macy -unused_rsrv, 0, 0, tx); 2115*eda14cbcSMatt Macy dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV, 2116*eda14cbcSMatt Macy unused_rsrv, 0, 0, tx); 2117*eda14cbcSMatt Macy } 2118*eda14cbcSMatt Macy } 2119*eda14cbcSMatt Macy 2120*eda14cbcSMatt Macy dmu_buf_will_dirty(dd->dd_dbuf, tx); 2121*eda14cbcSMatt Macy 2122*eda14cbcSMatt Macy /* remove from old parent zapobj */ 2123*eda14cbcSMatt Macy VERIFY0(zap_remove(mos, 2124*eda14cbcSMatt Macy dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, 2125*eda14cbcSMatt Macy dd->dd_myname, tx)); 2126*eda14cbcSMatt Macy 2127*eda14cbcSMatt Macy (void) strlcpy(dd->dd_myname, mynewname, 2128*eda14cbcSMatt Macy sizeof (dd->dd_myname)); 2129*eda14cbcSMatt Macy dsl_dir_rele(dd->dd_parent, dd); 2130*eda14cbcSMatt Macy dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object; 2131*eda14cbcSMatt Macy VERIFY0(dsl_dir_hold_obj(dp, 2132*eda14cbcSMatt Macy newparent->dd_object, NULL, dd, &dd->dd_parent)); 2133*eda14cbcSMatt Macy 2134*eda14cbcSMatt Macy /* add to new parent zapobj */ 2135*eda14cbcSMatt Macy VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj, 2136*eda14cbcSMatt Macy dd->dd_myname, 8, 1, &dd->dd_object, tx)); 2137*eda14cbcSMatt Macy 2138*eda14cbcSMatt Macy zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname, 2139*eda14cbcSMatt Macy ddra->ddra_newname, B_TRUE); 2140*eda14cbcSMatt Macy 2141*eda14cbcSMatt Macy dsl_prop_notify_all(dd); 2142*eda14cbcSMatt Macy 2143*eda14cbcSMatt Macy dsl_dir_rele(newparent, FTAG); 2144*eda14cbcSMatt Macy dsl_dir_rele(dd, FTAG); 2145*eda14cbcSMatt Macy } 2146*eda14cbcSMatt Macy 2147*eda14cbcSMatt Macy int 2148*eda14cbcSMatt Macy dsl_dir_rename(const char *oldname, const char *newname) 2149*eda14cbcSMatt Macy { 2150*eda14cbcSMatt Macy dsl_dir_rename_arg_t ddra; 2151*eda14cbcSMatt Macy 2152*eda14cbcSMatt Macy ddra.ddra_oldname = oldname; 2153*eda14cbcSMatt Macy ddra.ddra_newname = newname; 2154*eda14cbcSMatt Macy ddra.ddra_cred = CRED(); 2155*eda14cbcSMatt Macy ddra.ddra_proc = curproc; 2156*eda14cbcSMatt Macy 2157*eda14cbcSMatt Macy return (dsl_sync_task(oldname, 2158*eda14cbcSMatt Macy dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 2159*eda14cbcSMatt Macy 3, ZFS_SPACE_CHECK_RESERVED)); 2160*eda14cbcSMatt Macy } 2161*eda14cbcSMatt Macy 2162*eda14cbcSMatt Macy int 2163*eda14cbcSMatt Macy dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, 2164*eda14cbcSMatt Macy uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, 2165*eda14cbcSMatt Macy cred_t *cr, proc_t *proc) 2166*eda14cbcSMatt Macy { 2167*eda14cbcSMatt Macy dsl_dir_t *ancestor; 2168*eda14cbcSMatt Macy int64_t adelta; 2169*eda14cbcSMatt Macy uint64_t avail; 2170*eda14cbcSMatt Macy int err; 2171*eda14cbcSMatt Macy 2172*eda14cbcSMatt Macy ancestor = closest_common_ancestor(sdd, tdd); 2173*eda14cbcSMatt Macy adelta = would_change(sdd, -space, ancestor); 2174*eda14cbcSMatt Macy avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); 2175*eda14cbcSMatt Macy if (avail < space) 2176*eda14cbcSMatt Macy return (SET_ERROR(ENOSPC)); 2177*eda14cbcSMatt Macy 2178*eda14cbcSMatt Macy err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT, 2179*eda14cbcSMatt Macy ancestor, cr, proc); 2180*eda14cbcSMatt Macy if (err != 0) 2181*eda14cbcSMatt Macy return (err); 2182*eda14cbcSMatt Macy err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT, 2183*eda14cbcSMatt Macy ancestor, cr, proc); 2184*eda14cbcSMatt Macy if (err != 0) 2185*eda14cbcSMatt Macy return (err); 2186*eda14cbcSMatt Macy 2187*eda14cbcSMatt Macy return (0); 2188*eda14cbcSMatt Macy } 2189*eda14cbcSMatt Macy 2190*eda14cbcSMatt Macy inode_timespec_t 2191*eda14cbcSMatt Macy dsl_dir_snap_cmtime(dsl_dir_t *dd) 2192*eda14cbcSMatt Macy { 2193*eda14cbcSMatt Macy inode_timespec_t t; 2194*eda14cbcSMatt Macy 2195*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 2196*eda14cbcSMatt Macy t = dd->dd_snap_cmtime; 2197*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 2198*eda14cbcSMatt Macy 2199*eda14cbcSMatt Macy return (t); 2200*eda14cbcSMatt Macy } 2201*eda14cbcSMatt Macy 2202*eda14cbcSMatt Macy void 2203*eda14cbcSMatt Macy dsl_dir_snap_cmtime_update(dsl_dir_t *dd) 2204*eda14cbcSMatt Macy { 2205*eda14cbcSMatt Macy inode_timespec_t t; 2206*eda14cbcSMatt Macy 2207*eda14cbcSMatt Macy gethrestime(&t); 2208*eda14cbcSMatt Macy mutex_enter(&dd->dd_lock); 2209*eda14cbcSMatt Macy dd->dd_snap_cmtime = t; 2210*eda14cbcSMatt Macy mutex_exit(&dd->dd_lock); 2211*eda14cbcSMatt Macy } 2212*eda14cbcSMatt Macy 2213*eda14cbcSMatt Macy void 2214*eda14cbcSMatt Macy dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx) 2215*eda14cbcSMatt Macy { 2216*eda14cbcSMatt Macy objset_t *mos = dd->dd_pool->dp_meta_objset; 2217*eda14cbcSMatt Macy dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx); 2218*eda14cbcSMatt Macy } 2219*eda14cbcSMatt Macy 2220*eda14cbcSMatt Macy boolean_t 2221*eda14cbcSMatt Macy dsl_dir_is_zapified(dsl_dir_t *dd) 2222*eda14cbcSMatt Macy { 2223*eda14cbcSMatt Macy dmu_object_info_t doi; 2224*eda14cbcSMatt Macy 2225*eda14cbcSMatt Macy dmu_object_info_from_db(dd->dd_dbuf, &doi); 2226*eda14cbcSMatt Macy return (doi.doi_type == DMU_OTN_ZAP_METADATA); 2227*eda14cbcSMatt Macy } 2228*eda14cbcSMatt Macy 2229*eda14cbcSMatt Macy void 2230*eda14cbcSMatt Macy dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj) 2231*eda14cbcSMatt Macy { 2232*eda14cbcSMatt Macy objset_t *mos = dd->dd_pool->dp_meta_objset; 2233*eda14cbcSMatt Macy ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa, 2234*eda14cbcSMatt Macy SPA_FEATURE_LIVELIST)); 2235*eda14cbcSMatt Macy dsl_deadlist_open(&dd->dd_livelist, mos, obj); 2236*eda14cbcSMatt Macy bplist_create(&dd->dd_pending_allocs); 2237*eda14cbcSMatt Macy bplist_create(&dd->dd_pending_frees); 2238*eda14cbcSMatt Macy } 2239*eda14cbcSMatt Macy 2240*eda14cbcSMatt Macy void 2241*eda14cbcSMatt Macy dsl_dir_livelist_close(dsl_dir_t *dd) 2242*eda14cbcSMatt Macy { 2243*eda14cbcSMatt Macy dsl_deadlist_close(&dd->dd_livelist); 2244*eda14cbcSMatt Macy bplist_destroy(&dd->dd_pending_allocs); 2245*eda14cbcSMatt Macy bplist_destroy(&dd->dd_pending_frees); 2246*eda14cbcSMatt Macy } 2247*eda14cbcSMatt Macy 2248*eda14cbcSMatt Macy void 2249*eda14cbcSMatt Macy dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total) 2250*eda14cbcSMatt Macy { 2251*eda14cbcSMatt Macy uint64_t obj; 2252*eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx); 2253*eda14cbcSMatt Macy spa_t *spa = dp->dp_spa; 2254*eda14cbcSMatt Macy livelist_condense_entry_t to_condense = spa->spa_to_condense; 2255*eda14cbcSMatt Macy 2256*eda14cbcSMatt Macy if (!dsl_deadlist_is_open(&dd->dd_livelist)) 2257*eda14cbcSMatt Macy return; 2258*eda14cbcSMatt Macy 2259*eda14cbcSMatt Macy /* 2260*eda14cbcSMatt Macy * If the livelist being removed is set to be condensed, stop the 2261*eda14cbcSMatt Macy * condense zthr and indicate the cancellation in the spa_to_condense 2262*eda14cbcSMatt Macy * struct in case the condense no-wait synctask has already started 2263*eda14cbcSMatt Macy */ 2264*eda14cbcSMatt Macy zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr; 2265*eda14cbcSMatt Macy if (ll_condense_thread != NULL && 2266*eda14cbcSMatt Macy (to_condense.ds != NULL) && (to_condense.ds->ds_dir == dd)) { 2267*eda14cbcSMatt Macy /* 2268*eda14cbcSMatt Macy * We use zthr_wait_cycle_done instead of zthr_cancel 2269*eda14cbcSMatt Macy * because we don't want to destroy the zthr, just have 2270*eda14cbcSMatt Macy * it skip its current task. 2271*eda14cbcSMatt Macy */ 2272*eda14cbcSMatt Macy spa->spa_to_condense.cancelled = B_TRUE; 2273*eda14cbcSMatt Macy zthr_wait_cycle_done(ll_condense_thread); 2274*eda14cbcSMatt Macy /* 2275*eda14cbcSMatt Macy * If we've returned from zthr_wait_cycle_done without 2276*eda14cbcSMatt Macy * clearing the to_condense data structure it's either 2277*eda14cbcSMatt Macy * because the no-wait synctask has started (which is 2278*eda14cbcSMatt Macy * indicated by 'syncing' field of to_condense) and we 2279*eda14cbcSMatt Macy * can expect it to clear to_condense on its own. 2280*eda14cbcSMatt Macy * Otherwise, we returned before the zthr ran. The 2281*eda14cbcSMatt Macy * checkfunc will now fail as cancelled == B_TRUE so we 2282*eda14cbcSMatt Macy * can safely NULL out ds, allowing a different dir's 2283*eda14cbcSMatt Macy * livelist to be condensed. 2284*eda14cbcSMatt Macy * 2285*eda14cbcSMatt Macy * We can be sure that the to_condense struct will not 2286*eda14cbcSMatt Macy * be repopulated at this stage because both this 2287*eda14cbcSMatt Macy * function and dsl_livelist_try_condense execute in 2288*eda14cbcSMatt Macy * syncing context. 2289*eda14cbcSMatt Macy */ 2290*eda14cbcSMatt Macy if ((spa->spa_to_condense.ds != NULL) && 2291*eda14cbcSMatt Macy !spa->spa_to_condense.syncing) { 2292*eda14cbcSMatt Macy dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf, 2293*eda14cbcSMatt Macy spa); 2294*eda14cbcSMatt Macy spa->spa_to_condense.ds = NULL; 2295*eda14cbcSMatt Macy } 2296*eda14cbcSMatt Macy } 2297*eda14cbcSMatt Macy 2298*eda14cbcSMatt Macy dsl_dir_livelist_close(dd); 2299*eda14cbcSMatt Macy VERIFY0(zap_lookup(dp->dp_meta_objset, dd->dd_object, 2300*eda14cbcSMatt Macy DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj)); 2301*eda14cbcSMatt Macy VERIFY0(zap_remove(dp->dp_meta_objset, dd->dd_object, 2302*eda14cbcSMatt Macy DD_FIELD_LIVELIST, tx)); 2303*eda14cbcSMatt Macy if (total) { 2304*eda14cbcSMatt Macy dsl_deadlist_free(dp->dp_meta_objset, obj, tx); 2305*eda14cbcSMatt Macy spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx); 2306*eda14cbcSMatt Macy } 2307*eda14cbcSMatt Macy } 2308*eda14cbcSMatt Macy 2309*eda14cbcSMatt Macy static int 2310*eda14cbcSMatt Macy dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds, 2311*eda14cbcSMatt Macy zfs_wait_activity_t activity, boolean_t *in_progress) 2312*eda14cbcSMatt Macy { 2313*eda14cbcSMatt Macy int error = 0; 2314*eda14cbcSMatt Macy 2315*eda14cbcSMatt Macy ASSERT(MUTEX_HELD(&dd->dd_activity_lock)); 2316*eda14cbcSMatt Macy 2317*eda14cbcSMatt Macy switch (activity) { 2318*eda14cbcSMatt Macy case ZFS_WAIT_DELETEQ: { 2319*eda14cbcSMatt Macy #ifdef _KERNEL 2320*eda14cbcSMatt Macy objset_t *os; 2321*eda14cbcSMatt Macy error = dmu_objset_from_ds(ds, &os); 2322*eda14cbcSMatt Macy if (error != 0) 2323*eda14cbcSMatt Macy break; 2324*eda14cbcSMatt Macy 2325*eda14cbcSMatt Macy mutex_enter(&os->os_user_ptr_lock); 2326*eda14cbcSMatt Macy void *user = dmu_objset_get_user(os); 2327*eda14cbcSMatt Macy mutex_exit(&os->os_user_ptr_lock); 2328*eda14cbcSMatt Macy if (dmu_objset_type(os) != DMU_OST_ZFS || 2329*eda14cbcSMatt Macy user == NULL || zfs_get_vfs_flag_unmounted(os)) { 2330*eda14cbcSMatt Macy *in_progress = B_FALSE; 2331*eda14cbcSMatt Macy return (0); 2332*eda14cbcSMatt Macy } 2333*eda14cbcSMatt Macy 2334*eda14cbcSMatt Macy uint64_t readonly = B_FALSE; 2335*eda14cbcSMatt Macy error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly, 2336*eda14cbcSMatt Macy NULL); 2337*eda14cbcSMatt Macy 2338*eda14cbcSMatt Macy if (error != 0) 2339*eda14cbcSMatt Macy break; 2340*eda14cbcSMatt Macy 2341*eda14cbcSMatt Macy if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) { 2342*eda14cbcSMatt Macy *in_progress = B_FALSE; 2343*eda14cbcSMatt Macy return (0); 2344*eda14cbcSMatt Macy } 2345*eda14cbcSMatt Macy 2346*eda14cbcSMatt Macy uint64_t count, unlinked_obj; 2347*eda14cbcSMatt Macy error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 2348*eda14cbcSMatt Macy &unlinked_obj); 2349*eda14cbcSMatt Macy if (error != 0) { 2350*eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG); 2351*eda14cbcSMatt Macy break; 2352*eda14cbcSMatt Macy } 2353*eda14cbcSMatt Macy error = zap_count(os, unlinked_obj, &count); 2354*eda14cbcSMatt Macy 2355*eda14cbcSMatt Macy if (error == 0) 2356*eda14cbcSMatt Macy *in_progress = (count != 0); 2357*eda14cbcSMatt Macy break; 2358*eda14cbcSMatt Macy #else 2359*eda14cbcSMatt Macy /* 2360*eda14cbcSMatt Macy * The delete queue is ZPL specific, and libzpool doesn't have 2361*eda14cbcSMatt Macy * it. It doesn't make sense to wait for it. 2362*eda14cbcSMatt Macy */ 2363*eda14cbcSMatt Macy *in_progress = B_FALSE; 2364*eda14cbcSMatt Macy break; 2365*eda14cbcSMatt Macy #endif 2366*eda14cbcSMatt Macy } 2367*eda14cbcSMatt Macy default: 2368*eda14cbcSMatt Macy panic("unrecognized value for activity %d", activity); 2369*eda14cbcSMatt Macy } 2370*eda14cbcSMatt Macy 2371*eda14cbcSMatt Macy return (error); 2372*eda14cbcSMatt Macy } 2373*eda14cbcSMatt Macy 2374*eda14cbcSMatt Macy int 2375*eda14cbcSMatt Macy dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity, 2376*eda14cbcSMatt Macy boolean_t *waited) 2377*eda14cbcSMatt Macy { 2378*eda14cbcSMatt Macy int error = 0; 2379*eda14cbcSMatt Macy boolean_t in_progress; 2380*eda14cbcSMatt Macy dsl_pool_t *dp = dd->dd_pool; 2381*eda14cbcSMatt Macy for (;;) { 2382*eda14cbcSMatt Macy dsl_pool_config_enter(dp, FTAG); 2383*eda14cbcSMatt Macy error = dsl_dir_activity_in_progress(dd, ds, activity, 2384*eda14cbcSMatt Macy &in_progress); 2385*eda14cbcSMatt Macy dsl_pool_config_exit(dp, FTAG); 2386*eda14cbcSMatt Macy if (error != 0 || !in_progress) 2387*eda14cbcSMatt Macy break; 2388*eda14cbcSMatt Macy 2389*eda14cbcSMatt Macy *waited = B_TRUE; 2390*eda14cbcSMatt Macy 2391*eda14cbcSMatt Macy if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) == 2392*eda14cbcSMatt Macy 0 || dd->dd_activity_cancelled) { 2393*eda14cbcSMatt Macy error = SET_ERROR(EINTR); 2394*eda14cbcSMatt Macy break; 2395*eda14cbcSMatt Macy } 2396*eda14cbcSMatt Macy } 2397*eda14cbcSMatt Macy return (error); 2398*eda14cbcSMatt Macy } 2399*eda14cbcSMatt Macy 2400*eda14cbcSMatt Macy void 2401*eda14cbcSMatt Macy dsl_dir_cancel_waiters(dsl_dir_t *dd) 2402*eda14cbcSMatt Macy { 2403*eda14cbcSMatt Macy mutex_enter(&dd->dd_activity_lock); 2404*eda14cbcSMatt Macy dd->dd_activity_cancelled = B_TRUE; 2405*eda14cbcSMatt Macy cv_broadcast(&dd->dd_activity_cv); 2406*eda14cbcSMatt Macy while (dd->dd_activity_waiters > 0) 2407*eda14cbcSMatt Macy cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock); 2408*eda14cbcSMatt Macy mutex_exit(&dd->dd_activity_lock); 2409*eda14cbcSMatt Macy } 2410*eda14cbcSMatt Macy 2411*eda14cbcSMatt Macy #if defined(_KERNEL) 2412*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_dir_set_quota); 2413*eda14cbcSMatt Macy EXPORT_SYMBOL(dsl_dir_set_reservation); 2414*eda14cbcSMatt Macy #endif 2415