1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5033f9833Sek110237 * Common Development and Distribution License (the "License"). 6033f9833Sek110237 * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22f3e6fb2fSMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 239a686fbcSPaul Dagnelie * Copyright (c) 2013, 2015 by Delphix. All rights reserved. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #include <sys/types.h> 27fa9e4066Sahrens #include <sys/param.h> 28fa9e4066Sahrens #include <sys/time.h> 29fa9e4066Sahrens #include <sys/systm.h> 30fa9e4066Sahrens #include <sys/sysmacros.h> 31fa9e4066Sahrens #include <sys/resource.h> 32fa9e4066Sahrens #include <sys/vfs.h> 33fa9e4066Sahrens #include <sys/vnode.h> 34fa9e4066Sahrens #include <sys/file.h> 35fa9e4066Sahrens #include <sys/mode.h> 36fa9e4066Sahrens #include <sys/kmem.h> 37fa9e4066Sahrens #include <sys/uio.h> 38fa9e4066Sahrens #include <sys/pathname.h> 39fa9e4066Sahrens #include <sys/cmn_err.h> 40fa9e4066Sahrens #include <sys/errno.h> 41fa9e4066Sahrens #include <sys/stat.h> 42fa9e4066Sahrens #include <sys/unistd.h> 43de8267e0Stimh #include <sys/sunddi.h> 44fa9e4066Sahrens #include <sys/random.h> 45fa9e4066Sahrens #include <sys/policy.h> 46fa9e4066Sahrens #include <sys/zfs_dir.h> 47fa9e4066Sahrens #include <sys/zfs_acl.h> 48fa9e4066Sahrens #include <sys/fs/zfs.h> 49fa9e4066Sahrens #include "fs/fs_subr.h" 50fa9e4066Sahrens #include <sys/zap.h> 51fa9e4066Sahrens #include <sys/dmu.h> 52fa9e4066Sahrens #include <sys/atomic.h> 53fa9e4066Sahrens #include <sys/zfs_ctldir.h> 54da6c28aaSamw #include <sys/zfs_fuid.h> 550a586ceaSMark Shellenbaum #include <sys/sa.h> 560a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 57033f9833Sek110237 #include <sys/dnlc.h> 58da6c28aaSamw #include <sys/extdirent.h> 59da6c28aaSamw 60da6c28aaSamw /* 61da6c28aaSamw * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups 62da6c28aaSamw * of names after deciding which is the appropriate lookup interface. 63da6c28aaSamw */ 64da6c28aaSamw static int 65da6c28aaSamw zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact, 66da6c28aaSamw boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid) 67da6c28aaSamw { 68da6c28aaSamw int error; 69da6c28aaSamw 70da6c28aaSamw if (zfsvfs->z_norm) { 71da6c28aaSamw matchtype_t mt = MT_FIRST; 72da6c28aaSamw boolean_t conflict = B_FALSE; 73da6c28aaSamw size_t bufsz = 0; 74da6c28aaSamw char *buf = NULL; 75da6c28aaSamw 76da6c28aaSamw if (rpnp) { 77ab04eb8eStimh buf = rpnp->pn_buf; 78da6c28aaSamw bufsz = rpnp->pn_bufsize; 79da6c28aaSamw } 80da6c28aaSamw if (exact) 81da6c28aaSamw mt = MT_EXACT; 82da6c28aaSamw /* 83da6c28aaSamw * In the non-mixed case we only expect there would ever 84da6c28aaSamw * be one match, but we need to use the normalizing lookup. 85da6c28aaSamw */ 86da6c28aaSamw error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1, 87da6c28aaSamw zoid, mt, buf, bufsz, &conflict); 88ab04eb8eStimh if (!error && deflags) 89da6c28aaSamw *deflags = conflict ? ED_CASE_CONFLICT : 0; 90da6c28aaSamw } else { 91da6c28aaSamw error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid); 92da6c28aaSamw } 93da6c28aaSamw *zoid = ZFS_DIRENT_OBJ(*zoid); 94da6c28aaSamw 95da6c28aaSamw if (error == ENOENT && update) 96da6c28aaSamw dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE); 97da6c28aaSamw 98da6c28aaSamw return (error); 99da6c28aaSamw } 100fa9e4066Sahrens 101fa9e4066Sahrens /* 102fa9e4066Sahrens * Lock a directory entry. A dirlock on <dzp, name> protects that name 103fa9e4066Sahrens * in dzp's directory zap object. As long as you hold a dirlock, you can 104fa9e4066Sahrens * assume two things: (1) dzp cannot be reaped, and (2) no other thread 105fa9e4066Sahrens * can change the zap entry for (i.e. link or unlink) this name. 106fa9e4066Sahrens * 107fa9e4066Sahrens * Input arguments: 108fa9e4066Sahrens * dzp - znode for directory 109fa9e4066Sahrens * name - name of entry to lock 110fa9e4066Sahrens * flag - ZNEW: if the entry already exists, fail with EEXIST. 111fa9e4066Sahrens * ZEXISTS: if the entry does not exist, fail with ENOENT. 112fa9e4066Sahrens * ZSHARED: allow concurrent access with other ZSHARED callers. 113fa9e4066Sahrens * ZXATTR: we want dzp's xattr directory 114da6c28aaSamw * ZCILOOK: On a mixed sensitivity file system, 115da6c28aaSamw * this lookup should be case-insensitive. 116da6c28aaSamw * ZCIEXACT: On a purely case-insensitive file system, 117da6c28aaSamw * this lookup should be case-sensitive. 118da6c28aaSamw * ZRENAMING: we are locking for renaming, force narrow locks 119afefc7e4SSanjeev Bagewadi * ZHAVELOCK: Don't grab the z_name_lock for this call. The 120afefc7e4SSanjeev Bagewadi * current thread already holds it. 121fa9e4066Sahrens * 122fa9e4066Sahrens * Output arguments: 123fa9e4066Sahrens * zpp - pointer to the znode for the entry (NULL if there isn't one) 124fa9e4066Sahrens * dlpp - pointer to the dirlock for this entry (NULL on error) 125da6c28aaSamw * direntflags - (case-insensitive lookup only) 126da6c28aaSamw * flags if multiple case-sensitive matches exist in directory 127da6c28aaSamw * realpnp - (case-insensitive lookup only) 128da6c28aaSamw * actual name matched within the directory 129fa9e4066Sahrens * 130fa9e4066Sahrens * Return value: 0 on success or errno on failure. 131fa9e4066Sahrens * 132fa9e4066Sahrens * NOTE: Always checks for, and rejects, '.' and '..'. 133da6c28aaSamw * NOTE: For case-insensitive file systems we take wide locks (see below), 134da6c28aaSamw * but return znode pointers to a single match. 135fa9e4066Sahrens */ 136fa9e4066Sahrens int 137fa9e4066Sahrens zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp, 138da6c28aaSamw int flag, int *direntflags, pathname_t *realpnp) 139fa9e4066Sahrens { 140fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 141fa9e4066Sahrens zfs_dirlock_t *dl; 142da6c28aaSamw boolean_t update; 143da6c28aaSamw boolean_t exact; 144fa9e4066Sahrens uint64_t zoid; 145da6c28aaSamw vnode_t *vp = NULL; 146da6c28aaSamw int error = 0; 147da6c28aaSamw int cmpflags; 148fa9e4066Sahrens 149fa9e4066Sahrens *zpp = NULL; 150fa9e4066Sahrens *dlpp = NULL; 151fa9e4066Sahrens 152fa9e4066Sahrens /* 153fa9e4066Sahrens * Verify that we are not trying to lock '.', '..', or '.zfs' 154fa9e4066Sahrens */ 155fa9e4066Sahrens if (name[0] == '.' && 156fa9e4066Sahrens (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) || 157fa9e4066Sahrens zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) 158be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 159fa9e4066Sahrens 160fa9e4066Sahrens /* 161da6c28aaSamw * Case sensitivity and normalization preferences are set when 162da6c28aaSamw * the file system is created. These are stored in the 163da6c28aaSamw * zfsvfs->z_case and zfsvfs->z_norm fields. These choices 164da6c28aaSamw * affect what vnodes can be cached in the DNLC, how we 165da6c28aaSamw * perform zap lookups, and the "width" of our dirlocks. 166da6c28aaSamw * 167da6c28aaSamw * A normal dirlock locks a single name. Note that with 168da6c28aaSamw * normalization a name can be composed multiple ways, but 169da6c28aaSamw * when normalized, these names all compare equal. A wide 170da6c28aaSamw * dirlock locks multiple names. We need these when the file 171da6c28aaSamw * system is supporting mixed-mode access. It is sometimes 172da6c28aaSamw * necessary to lock all case permutations of file name at 173da6c28aaSamw * once so that simultaneous case-insensitive/case-sensitive 174da6c28aaSamw * behaves as rationally as possible. 175da6c28aaSamw */ 176da6c28aaSamw 177da6c28aaSamw /* 178da6c28aaSamw * Decide if exact matches should be requested when performing 179da6c28aaSamw * a zap lookup on file systems supporting case-insensitive 180da6c28aaSamw * access. 181da6c28aaSamw */ 182de8267e0Stimh exact = 183de8267e0Stimh ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) || 184de8267e0Stimh ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK)); 185da6c28aaSamw 186da6c28aaSamw /* 187da6c28aaSamw * Only look in or update the DNLC if we are looking for the 188da6c28aaSamw * name on a file system that does not require normalization 189da6c28aaSamw * or case folding. We can also look there if we happen to be 190da6c28aaSamw * on a non-normalizing, mixed sensitivity file system IF we 191da6c28aaSamw * are looking for the exact name. 192da6c28aaSamw * 193da6c28aaSamw * Maybe can add TO-UPPERed version of name to dnlc in ci-only 194da6c28aaSamw * case for performance improvement? 195da6c28aaSamw */ 196da6c28aaSamw update = !zfsvfs->z_norm || 197de8267e0Stimh ((zfsvfs->z_case == ZFS_CASE_MIXED) && 198da6c28aaSamw !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK)); 199da6c28aaSamw 200da6c28aaSamw /* 201da6c28aaSamw * ZRENAMING indicates we are in a situation where we should 202da6c28aaSamw * take narrow locks regardless of the file system's 203da6c28aaSamw * preferences for normalizing and case folding. This will 204da6c28aaSamw * prevent us deadlocking trying to grab the same wide lock 205da6c28aaSamw * twice if the two names happen to be case-insensitive 206da6c28aaSamw * matches. 207da6c28aaSamw */ 208da6c28aaSamw if (flag & ZRENAMING) 209da6c28aaSamw cmpflags = 0; 210da6c28aaSamw else 211da6c28aaSamw cmpflags = zfsvfs->z_norm; 212da6c28aaSamw 213da6c28aaSamw /* 214fa9e4066Sahrens * Wait until there are no locks on this name. 215afefc7e4SSanjeev Bagewadi * 216afefc7e4SSanjeev Bagewadi * Don't grab the the lock if it is already held. However, cannot 217afefc7e4SSanjeev Bagewadi * have both ZSHARED and ZHAVELOCK together. 218fa9e4066Sahrens */ 219afefc7e4SSanjeev Bagewadi ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK)); 220afefc7e4SSanjeev Bagewadi if (!(flag & ZHAVELOCK)) 221af2c4821Smaybee rw_enter(&dzp->z_name_lock, RW_READER); 222afefc7e4SSanjeev Bagewadi 223fa9e4066Sahrens mutex_enter(&dzp->z_lock); 224fa9e4066Sahrens for (;;) { 225893a6d32Sahrens if (dzp->z_unlinked) { 226fa9e4066Sahrens mutex_exit(&dzp->z_lock); 227afefc7e4SSanjeev Bagewadi if (!(flag & ZHAVELOCK)) 228af2c4821Smaybee rw_exit(&dzp->z_name_lock); 229be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 230fa9e4066Sahrens } 231da6c28aaSamw for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) { 232da6c28aaSamw if ((u8_strcmp(name, dl->dl_name, 0, cmpflags, 233da6c28aaSamw U8_UNICODE_LATEST, &error) == 0) || error != 0) 234fa9e4066Sahrens break; 235da6c28aaSamw } 236da6c28aaSamw if (error != 0) { 237da6c28aaSamw mutex_exit(&dzp->z_lock); 238afefc7e4SSanjeev Bagewadi if (!(flag & ZHAVELOCK)) 239da6c28aaSamw rw_exit(&dzp->z_name_lock); 240be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 241da6c28aaSamw } 242fa9e4066Sahrens if (dl == NULL) { 243fa9e4066Sahrens /* 244fa9e4066Sahrens * Allocate a new dirlock and add it to the list. 245fa9e4066Sahrens */ 246fa9e4066Sahrens dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP); 247fa9e4066Sahrens cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL); 248fa9e4066Sahrens dl->dl_name = name; 249fa9e4066Sahrens dl->dl_sharecnt = 0; 250afefc7e4SSanjeev Bagewadi dl->dl_namelock = 0; 251fa9e4066Sahrens dl->dl_namesize = 0; 252fa9e4066Sahrens dl->dl_dzp = dzp; 253fa9e4066Sahrens dl->dl_next = dzp->z_dirlocks; 254fa9e4066Sahrens dzp->z_dirlocks = dl; 255fa9e4066Sahrens break; 256fa9e4066Sahrens } 257fa9e4066Sahrens if ((flag & ZSHARED) && dl->dl_sharecnt != 0) 258fa9e4066Sahrens break; 259fa9e4066Sahrens cv_wait(&dl->dl_cv, &dzp->z_lock); 260fa9e4066Sahrens } 261fa9e4066Sahrens 262afefc7e4SSanjeev Bagewadi /* 263afefc7e4SSanjeev Bagewadi * If the z_name_lock was NOT held for this dirlock record it. 264afefc7e4SSanjeev Bagewadi */ 265afefc7e4SSanjeev Bagewadi if (flag & ZHAVELOCK) 266afefc7e4SSanjeev Bagewadi dl->dl_namelock = 1; 267afefc7e4SSanjeev Bagewadi 268fa9e4066Sahrens if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) { 269fa9e4066Sahrens /* 270fa9e4066Sahrens * We're the second shared reference to dl. Make a copy of 271fa9e4066Sahrens * dl_name in case the first thread goes away before we do. 272fa9e4066Sahrens * Note that we initialize the new name before storing its 273fa9e4066Sahrens * pointer into dl_name, because the first thread may load 274fa9e4066Sahrens * dl->dl_name at any time. He'll either see the old value, 275fa9e4066Sahrens * which is his, or the new shared copy; either is OK. 276fa9e4066Sahrens */ 277fa9e4066Sahrens dl->dl_namesize = strlen(dl->dl_name) + 1; 278fa9e4066Sahrens name = kmem_alloc(dl->dl_namesize, KM_SLEEP); 279fa9e4066Sahrens bcopy(dl->dl_name, name, dl->dl_namesize); 280fa9e4066Sahrens dl->dl_name = name; 281fa9e4066Sahrens } 282fa9e4066Sahrens 283fa9e4066Sahrens mutex_exit(&dzp->z_lock); 284fa9e4066Sahrens 285fa9e4066Sahrens /* 286fa9e4066Sahrens * We have a dirlock on the name. (Note that it is the dirlock, 287fa9e4066Sahrens * not the dzp's z_lock, that protects the name in the zap object.) 288fa9e4066Sahrens * See if there's an object by this name; if so, put a hold on it. 289fa9e4066Sahrens */ 290fa9e4066Sahrens if (flag & ZXATTR) { 2910a586ceaSMark Shellenbaum error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid, 2920a586ceaSMark Shellenbaum sizeof (zoid)); 2930a586ceaSMark Shellenbaum if (error == 0) 294fa9e4066Sahrens error = (zoid == 0 ? ENOENT : 0); 295fa9e4066Sahrens } else { 296da6c28aaSamw if (update) 297033f9833Sek110237 vp = dnlc_lookup(ZTOV(dzp), name); 298033f9833Sek110237 if (vp == DNLC_NO_VNODE) { 299033f9833Sek110237 VN_RELE(vp); 300be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 301033f9833Sek110237 } else if (vp) { 302033f9833Sek110237 if (flag & ZNEW) { 303033f9833Sek110237 zfs_dirent_unlock(dl); 304033f9833Sek110237 VN_RELE(vp); 305be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 306033f9833Sek110237 } 307033f9833Sek110237 *dlpp = dl; 308033f9833Sek110237 *zpp = VTOZ(vp); 309033f9833Sek110237 return (0); 310033f9833Sek110237 } else { 311da6c28aaSamw error = zfs_match_find(zfsvfs, dzp, name, exact, 312da6c28aaSamw update, direntflags, realpnp, &zoid); 313033f9833Sek110237 } 314fa9e4066Sahrens } 315fa9e4066Sahrens if (error) { 316fa9e4066Sahrens if (error != ENOENT || (flag & ZEXISTS)) { 317fa9e4066Sahrens zfs_dirent_unlock(dl); 318fa9e4066Sahrens return (error); 319fa9e4066Sahrens } 320fa9e4066Sahrens } else { 321fa9e4066Sahrens if (flag & ZNEW) { 322fa9e4066Sahrens zfs_dirent_unlock(dl); 323be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 324fa9e4066Sahrens } 325fa9e4066Sahrens error = zfs_zget(zfsvfs, zoid, zpp); 326fa9e4066Sahrens if (error) { 327fa9e4066Sahrens zfs_dirent_unlock(dl); 328fa9e4066Sahrens return (error); 329fa9e4066Sahrens } 330da6c28aaSamw if (!(flag & ZXATTR) && update) 331033f9833Sek110237 dnlc_update(ZTOV(dzp), name, ZTOV(*zpp)); 332fa9e4066Sahrens } 333fa9e4066Sahrens 334fa9e4066Sahrens *dlpp = dl; 335fa9e4066Sahrens 336fa9e4066Sahrens return (0); 337fa9e4066Sahrens } 338fa9e4066Sahrens 339fa9e4066Sahrens /* 340fa9e4066Sahrens * Unlock this directory entry and wake anyone who was waiting for it. 341fa9e4066Sahrens */ 342fa9e4066Sahrens void 343fa9e4066Sahrens zfs_dirent_unlock(zfs_dirlock_t *dl) 344fa9e4066Sahrens { 345fa9e4066Sahrens znode_t *dzp = dl->dl_dzp; 346fa9e4066Sahrens zfs_dirlock_t **prev_dl, *cur_dl; 347fa9e4066Sahrens 348fa9e4066Sahrens mutex_enter(&dzp->z_lock); 349afefc7e4SSanjeev Bagewadi 350afefc7e4SSanjeev Bagewadi if (!dl->dl_namelock) 351af2c4821Smaybee rw_exit(&dzp->z_name_lock); 352afefc7e4SSanjeev Bagewadi 353fa9e4066Sahrens if (dl->dl_sharecnt > 1) { 354fa9e4066Sahrens dl->dl_sharecnt--; 355fa9e4066Sahrens mutex_exit(&dzp->z_lock); 356fa9e4066Sahrens return; 357fa9e4066Sahrens } 358fa9e4066Sahrens prev_dl = &dzp->z_dirlocks; 359fa9e4066Sahrens while ((cur_dl = *prev_dl) != dl) 360fa9e4066Sahrens prev_dl = &cur_dl->dl_next; 361fa9e4066Sahrens *prev_dl = dl->dl_next; 362fa9e4066Sahrens cv_broadcast(&dl->dl_cv); 363fa9e4066Sahrens mutex_exit(&dzp->z_lock); 364fa9e4066Sahrens 365fa9e4066Sahrens if (dl->dl_namesize != 0) 366fa9e4066Sahrens kmem_free(dl->dl_name, dl->dl_namesize); 367fa9e4066Sahrens cv_destroy(&dl->dl_cv); 368fa9e4066Sahrens kmem_free(dl, sizeof (*dl)); 369fa9e4066Sahrens } 370fa9e4066Sahrens 371fa9e4066Sahrens /* 372fa9e4066Sahrens * Look up an entry in a directory. 373fa9e4066Sahrens * 374fa9e4066Sahrens * NOTE: '.' and '..' are handled as special cases because 375fa9e4066Sahrens * no directory entries are actually stored for them. If this is 376fa9e4066Sahrens * the root of a filesystem, then '.zfs' is also treated as a 377fa9e4066Sahrens * special pseudo-directory. 378fa9e4066Sahrens */ 379fa9e4066Sahrens int 380da6c28aaSamw zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags, 381da6c28aaSamw int *deflg, pathname_t *rpnp) 382fa9e4066Sahrens { 383fa9e4066Sahrens zfs_dirlock_t *dl; 384fa9e4066Sahrens znode_t *zp; 385fa9e4066Sahrens int error = 0; 3860a586ceaSMark Shellenbaum uint64_t parent; 387fa9e4066Sahrens 388fa9e4066Sahrens if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) { 389fa9e4066Sahrens *vpp = ZTOV(dzp); 390fa9e4066Sahrens VN_HOLD(*vpp); 391fa9e4066Sahrens } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) { 392fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3930a586ceaSMark Shellenbaum 394fa9e4066Sahrens /* 395fa9e4066Sahrens * If we are a snapshot mounted under .zfs, return 396fa9e4066Sahrens * the vp for the snapshot directory. 397fa9e4066Sahrens */ 3980a586ceaSMark Shellenbaum if ((error = sa_lookup(dzp->z_sa_hdl, 3990a586ceaSMark Shellenbaum SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) 4000a586ceaSMark Shellenbaum return (error); 4010a586ceaSMark Shellenbaum if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) { 402fa9e4066Sahrens error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir, 403da6c28aaSamw "snapshot", vpp, NULL, 0, NULL, kcred, 404da6c28aaSamw NULL, NULL, NULL); 405fa9e4066Sahrens return (error); 406fa9e4066Sahrens } 407fa9e4066Sahrens rw_enter(&dzp->z_parent_lock, RW_READER); 4080a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, parent, &zp); 409fa9e4066Sahrens if (error == 0) 410fa9e4066Sahrens *vpp = ZTOV(zp); 411fa9e4066Sahrens rw_exit(&dzp->z_parent_lock); 412fa9e4066Sahrens } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) { 413fa9e4066Sahrens *vpp = zfsctl_root(dzp); 414fa9e4066Sahrens } else { 415da6c28aaSamw int zf; 416da6c28aaSamw 417da6c28aaSamw zf = ZEXISTS | ZSHARED; 418da6c28aaSamw if (flags & FIGNORECASE) 419da6c28aaSamw zf |= ZCILOOK; 420da6c28aaSamw 421da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp); 422fa9e4066Sahrens if (error == 0) { 423fa9e4066Sahrens *vpp = ZTOV(zp); 424fa9e4066Sahrens zfs_dirent_unlock(dl); 4257f6e3e7dSperrin dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */ 426fa9e4066Sahrens } 427da6c28aaSamw rpnp = NULL; 428fa9e4066Sahrens } 429fa9e4066Sahrens 430ab04eb8eStimh if ((flags & FIGNORECASE) && rpnp && !error) 431ab04eb8eStimh (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize); 432da6c28aaSamw 433fa9e4066Sahrens return (error); 434fa9e4066Sahrens } 435fa9e4066Sahrens 436ea8dc4b6Seschrock /* 437893a6d32Sahrens * unlinked Set (formerly known as the "delete queue") Error Handling 438ea8dc4b6Seschrock * 439893a6d32Sahrens * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we 440ea8dc4b6Seschrock * don't specify the name of the entry that we will be manipulating. We 441ea8dc4b6Seschrock * also fib and say that we won't be adding any new entries to the 442893a6d32Sahrens * unlinked set, even though we might (this is to lower the minimum file 443ea8dc4b6Seschrock * size that can be deleted in a full filesystem). So on the small 444893a6d32Sahrens * chance that the nlink list is using a fat zap (ie. has more than 445ea8dc4b6Seschrock * 2000 entries), we *may* not pre-read a block that's needed. 446ea8dc4b6Seschrock * Therefore it is remotely possible for some of the assertions 447893a6d32Sahrens * regarding the unlinked set below to fail due to i/o error. On a 448ea8dc4b6Seschrock * nondebug system, this will result in the space being leaked. 449ea8dc4b6Seschrock */ 450fa9e4066Sahrens void 451893a6d32Sahrens zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx) 452fa9e4066Sahrens { 453fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 454fa9e4066Sahrens 455893a6d32Sahrens ASSERT(zp->z_unlinked); 4560a586ceaSMark Shellenbaum ASSERT(zp->z_links == 0); 457fa9e4066Sahrens 458b420f3adSRichard Lowe VERIFY3U(0, ==, 459b420f3adSRichard Lowe zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 460cdb0ab79Smaybee } 461cdb0ab79Smaybee 462fa9e4066Sahrens /* 463893a6d32Sahrens * Clean up any znodes that had no links when we either crashed or 464893a6d32Sahrens * (force) umounted the file system. 465893a6d32Sahrens */ 466893a6d32Sahrens void 467893a6d32Sahrens zfs_unlinked_drain(zfsvfs_t *zfsvfs) 468893a6d32Sahrens { 469893a6d32Sahrens zap_cursor_t zc; 470893a6d32Sahrens zap_attribute_t zap; 471893a6d32Sahrens dmu_object_info_t doi; 472893a6d32Sahrens znode_t *zp; 473893a6d32Sahrens int error; 474893a6d32Sahrens 475893a6d32Sahrens /* 476893a6d32Sahrens * Interate over the contents of the unlinked set. 477893a6d32Sahrens */ 478893a6d32Sahrens for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj); 479893a6d32Sahrens zap_cursor_retrieve(&zc, &zap) == 0; 480893a6d32Sahrens zap_cursor_advance(&zc)) { 481893a6d32Sahrens 482893a6d32Sahrens /* 483893a6d32Sahrens * See what kind of object we have in list 484893a6d32Sahrens */ 485893a6d32Sahrens 486893a6d32Sahrens error = dmu_object_info(zfsvfs->z_os, 487893a6d32Sahrens zap.za_first_integer, &doi); 488893a6d32Sahrens if (error != 0) 489893a6d32Sahrens continue; 490893a6d32Sahrens 491893a6d32Sahrens ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) || 492893a6d32Sahrens (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS)); 493893a6d32Sahrens /* 494893a6d32Sahrens * We need to re-mark these list entries for deletion, 495893a6d32Sahrens * so we pull them back into core and set zp->z_unlinked. 496893a6d32Sahrens */ 497893a6d32Sahrens error = zfs_zget(zfsvfs, zap.za_first_integer, &zp); 498893a6d32Sahrens 499893a6d32Sahrens /* 500893a6d32Sahrens * We may pick up znodes that are already marked for deletion. 501893a6d32Sahrens * This could happen during the purge of an extended attribute 502893a6d32Sahrens * directory. All we need to do is skip over them, since they 503893a6d32Sahrens * are already in the system marked z_unlinked. 504893a6d32Sahrens */ 505893a6d32Sahrens if (error != 0) 506893a6d32Sahrens continue; 507893a6d32Sahrens 508893a6d32Sahrens zp->z_unlinked = B_TRUE; 509893a6d32Sahrens VN_RELE(ZTOV(zp)); 510893a6d32Sahrens } 511893a6d32Sahrens zap_cursor_fini(&zc); 512893a6d32Sahrens } 513893a6d32Sahrens 514893a6d32Sahrens /* 515fa9e4066Sahrens * Delete the entire contents of a directory. Return a count 51640ff3960Sck153898 * of the number of entries that could not be deleted. If we encounter 51740ff3960Sck153898 * an error, return a count of at least one so that the directory stays 51840ff3960Sck153898 * in the unlinked set. 519fa9e4066Sahrens * 520fa9e4066Sahrens * NOTE: this function assumes that the directory is inactive, 521fa9e4066Sahrens * so there is no need to lock its entries before deletion. 522fa9e4066Sahrens * Also, it assumes the directory contents is *only* regular 523fa9e4066Sahrens * files. 524fa9e4066Sahrens */ 525fa9e4066Sahrens static int 526fa9e4066Sahrens zfs_purgedir(znode_t *dzp) 527fa9e4066Sahrens { 528fa9e4066Sahrens zap_cursor_t zc; 529fa9e4066Sahrens zap_attribute_t zap; 530fa9e4066Sahrens znode_t *xzp; 531fa9e4066Sahrens dmu_tx_t *tx; 532fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 533fa9e4066Sahrens zfs_dirlock_t dl; 534fa9e4066Sahrens int skipped = 0; 535fa9e4066Sahrens int error; 536fa9e4066Sahrens 537fa9e4066Sahrens for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id); 538fa9e4066Sahrens (error = zap_cursor_retrieve(&zc, &zap)) == 0; 539fa9e4066Sahrens zap_cursor_advance(&zc)) { 540b1b8ab34Slling error = zfs_zget(zfsvfs, 541b1b8ab34Slling ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp); 54240ff3960Sck153898 if (error) { 54340ff3960Sck153898 skipped += 1; 54440ff3960Sck153898 continue; 54540ff3960Sck153898 } 546fa9e4066Sahrens 547fa9e4066Sahrens ASSERT((ZTOV(xzp)->v_type == VREG) || 548fa9e4066Sahrens (ZTOV(xzp)->v_type == VLNK)); 549fa9e4066Sahrens 550fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 5510a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 552ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name); 5530a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 554893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 5550a586ceaSMark Shellenbaum /* Is this really needed ? */ 5560a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, xzp); 5574bb73804SMatthew Ahrens dmu_tx_mark_netfree(tx); 558fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 559fa9e4066Sahrens if (error) { 560fa9e4066Sahrens dmu_tx_abort(tx); 561fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 562fa9e4066Sahrens skipped += 1; 563fa9e4066Sahrens continue; 564fa9e4066Sahrens } 565fa9e4066Sahrens bzero(&dl, sizeof (dl)); 566fa9e4066Sahrens dl.dl_dzp = dzp; 567fa9e4066Sahrens dl.dl_name = zap.za_name; 568fa9e4066Sahrens 569fa9e4066Sahrens error = zfs_link_destroy(&dl, xzp, tx, 0, NULL); 57040ff3960Sck153898 if (error) 57140ff3960Sck153898 skipped += 1; 572fa9e4066Sahrens dmu_tx_commit(tx); 573fa9e4066Sahrens 574fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 575fa9e4066Sahrens } 57687e5029aSahrens zap_cursor_fini(&zc); 57740ff3960Sck153898 if (error != ENOENT) 57840ff3960Sck153898 skipped += 1; 579fa9e4066Sahrens return (skipped); 580fa9e4066Sahrens } 581fa9e4066Sahrens 582fa9e4066Sahrens void 583fa9e4066Sahrens zfs_rmnode(znode_t *zp) 584fa9e4066Sahrens { 585fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 586fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 587fa9e4066Sahrens znode_t *xzp = NULL; 588fa9e4066Sahrens dmu_tx_t *tx; 589fa9e4066Sahrens uint64_t acl_obj; 5900a586ceaSMark Shellenbaum uint64_t xattr_obj; 591fa9e4066Sahrens int error; 592fa9e4066Sahrens 5930a586ceaSMark Shellenbaum ASSERT(zp->z_links == 0); 594fa9e4066Sahrens ASSERT(ZTOV(zp)->v_count == 0); 595fa9e4066Sahrens 596fa9e4066Sahrens /* 597fa9e4066Sahrens * If this is an attribute directory, purge its contents. 598fa9e4066Sahrens */ 5990a586ceaSMark Shellenbaum if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) { 600fa9e4066Sahrens if (zfs_purgedir(zp) != 0) { 601fa9e4066Sahrens /* 602893a6d32Sahrens * Not enough space to delete some xattrs. 603cdb0ab79Smaybee * Leave it in the unlinked set. 604fa9e4066Sahrens */ 6050e276a64Smaybee zfs_znode_dmu_fini(zp); 6060e276a64Smaybee zfs_znode_free(zp); 607fa9e4066Sahrens return; 608fa9e4066Sahrens } 609893a6d32Sahrens } 610fa9e4066Sahrens 611fa9e4066Sahrens /* 612cdb0ab79Smaybee * Free up all the data in the file. 613cdb0ab79Smaybee */ 614cdb0ab79Smaybee error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END); 615cdb0ab79Smaybee if (error) { 616cdb0ab79Smaybee /* 617cdb0ab79Smaybee * Not enough space. Leave the file in the unlinked set. 618cdb0ab79Smaybee */ 619cdb0ab79Smaybee zfs_znode_dmu_fini(zp); 620cdb0ab79Smaybee zfs_znode_free(zp); 621cdb0ab79Smaybee return; 622cdb0ab79Smaybee } 623cdb0ab79Smaybee 624cdb0ab79Smaybee /* 625893a6d32Sahrens * If the file has extended attributes, we're going to unlink 626893a6d32Sahrens * the xattr dir. 627fa9e4066Sahrens */ 6280a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 6290a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 6300a586ceaSMark Shellenbaum if (error == 0 && xattr_obj) { 6310a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 632fa9e4066Sahrens ASSERT(error == 0); 633fa9e4066Sahrens } 634fa9e4066Sahrens 6351412a1a2SMark Shellenbaum acl_obj = zfs_external_acl(zp); 636fa9e4066Sahrens 637fa9e4066Sahrens /* 638cdb0ab79Smaybee * Set up the final transaction. 639fa9e4066Sahrens */ 640fa9e4066Sahrens tx = dmu_tx_create(os); 641fa9e4066Sahrens dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END); 642893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 643fa9e4066Sahrens if (xzp) { 644893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL); 6450a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 646fa9e4066Sahrens } 647fa9e4066Sahrens if (acl_obj) 648fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 6490a586ceaSMark Shellenbaum 6500a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 651*9d7a20afSArne Jansen dmu_tx_mark_netfree(tx); 652fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 653fa9e4066Sahrens if (error) { 654fa9e4066Sahrens /* 655893a6d32Sahrens * Not enough space to delete the file. Leave it in the 656893a6d32Sahrens * unlinked set, leaking it until the fs is remounted (at 657893a6d32Sahrens * which point we'll call zfs_unlinked_drain() to process it). 658fa9e4066Sahrens */ 659893a6d32Sahrens dmu_tx_abort(tx); 6600e276a64Smaybee zfs_znode_dmu_fini(zp); 6610e276a64Smaybee zfs_znode_free(zp); 6620e276a64Smaybee goto out; 663fa9e4066Sahrens } 664fa9e4066Sahrens 665fa9e4066Sahrens if (xzp) { 6660a586ceaSMark Shellenbaum ASSERT(error == 0); 667fa9e4066Sahrens mutex_enter(&xzp->z_lock); 668893a6d32Sahrens xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */ 6690a586ceaSMark Shellenbaum xzp->z_links = 0; /* no more links to it */ 6700a586ceaSMark Shellenbaum VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 6710a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx)); 672fa9e4066Sahrens mutex_exit(&xzp->z_lock); 673893a6d32Sahrens zfs_unlinked_add(xzp, tx); 674fa9e4066Sahrens } 675fa9e4066Sahrens 676893a6d32Sahrens /* Remove this znode from the unlinked set */ 677b420f3adSRichard Lowe VERIFY3U(0, ==, 678b420f3adSRichard Lowe zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx)); 679fa9e4066Sahrens 680fa9e4066Sahrens zfs_znode_delete(zp, tx); 681fa9e4066Sahrens 682fa9e4066Sahrens dmu_tx_commit(tx); 6830e276a64Smaybee out: 684fa9e4066Sahrens if (xzp) 685fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 686fa9e4066Sahrens } 687fa9e4066Sahrens 688e7437265Sahrens static uint64_t 6890a586ceaSMark Shellenbaum zfs_dirent(znode_t *zp, uint64_t mode) 690e7437265Sahrens { 691e7437265Sahrens uint64_t de = zp->z_id; 6920a586ceaSMark Shellenbaum 693e7437265Sahrens if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE) 6940a586ceaSMark Shellenbaum de |= IFTODT(mode) << 60; 695e7437265Sahrens return (de); 696e7437265Sahrens } 697e7437265Sahrens 698fa9e4066Sahrens /* 699893a6d32Sahrens * Link zp into dl. Can only fail if zp has been unlinked. 700fa9e4066Sahrens */ 701fa9e4066Sahrens int 702fa9e4066Sahrens zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag) 703fa9e4066Sahrens { 704fa9e4066Sahrens znode_t *dzp = dl->dl_dzp; 7050a586ceaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 706fa9e4066Sahrens vnode_t *vp = ZTOV(zp); 707b1b8ab34Slling uint64_t value; 708fa9e4066Sahrens int zp_is_dir = (vp->v_type == VDIR); 7090a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[5]; 7100a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 7110a586ceaSMark Shellenbaum int count = 0; 712fa9e4066Sahrens int error; 713fa9e4066Sahrens 714fa9e4066Sahrens mutex_enter(&zp->z_lock); 715fa9e4066Sahrens 716fa9e4066Sahrens if (!(flag & ZRENAMING)) { 717893a6d32Sahrens if (zp->z_unlinked) { /* no new links to unlinked zp */ 718fa9e4066Sahrens ASSERT(!(flag & (ZNEW | ZEXISTS))); 719fa9e4066Sahrens mutex_exit(&zp->z_lock); 720be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 721fa9e4066Sahrens } 7220a586ceaSMark Shellenbaum zp->z_links++; 7230a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 7240a586ceaSMark Shellenbaum &zp->z_links, sizeof (zp->z_links)); 725fa9e4066Sahrens 7260a586ceaSMark Shellenbaum } 7270a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 7280a586ceaSMark Shellenbaum &dzp->z_id, sizeof (dzp->z_id)); 7290a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 7300a586ceaSMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 7310a586ceaSMark Shellenbaum 7320a586ceaSMark Shellenbaum if (!(flag & ZNEW)) { 7330a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 7340a586ceaSMark Shellenbaum ctime, sizeof (ctime)); 7350a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, 7360a586ceaSMark Shellenbaum ctime, B_TRUE); 7370a586ceaSMark Shellenbaum } 7380a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 7390a586ceaSMark Shellenbaum ASSERT(error == 0); 7400a586ceaSMark Shellenbaum 741fa9e4066Sahrens mutex_exit(&zp->z_lock); 742fa9e4066Sahrens 743fa9e4066Sahrens mutex_enter(&dzp->z_lock); 7440a586ceaSMark Shellenbaum dzp->z_size++; 7450a586ceaSMark Shellenbaum dzp->z_links += zp_is_dir; 7460a586ceaSMark Shellenbaum count = 0; 7470a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 7480a586ceaSMark Shellenbaum &dzp->z_size, sizeof (dzp->z_size)); 7490a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 7500a586ceaSMark Shellenbaum &dzp->z_links, sizeof (dzp->z_links)); 7510a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 7520a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 7530a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 7540a586ceaSMark Shellenbaum ctime, sizeof (ctime)); 7550a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 7560a586ceaSMark Shellenbaum &dzp->z_pflags, sizeof (dzp->z_pflags)); 7570a586ceaSMark Shellenbaum zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 7580a586ceaSMark Shellenbaum error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 7590a586ceaSMark Shellenbaum ASSERT(error == 0); 760fa9e4066Sahrens mutex_exit(&dzp->z_lock); 761fa9e4066Sahrens 7620a586ceaSMark Shellenbaum value = zfs_dirent(zp, zp->z_mode); 763fa9e4066Sahrens error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name, 764b1b8ab34Slling 8, 1, &value, tx); 765fa9e4066Sahrens ASSERT(error == 0); 766fa9e4066Sahrens 767033f9833Sek110237 dnlc_update(ZTOV(dzp), dl->dl_name, vp); 768033f9833Sek110237 769fa9e4066Sahrens return (0); 770fa9e4066Sahrens } 771fa9e4066Sahrens 7726ed5e6abSSam Falkner static int 7736ed5e6abSSam Falkner zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx, 7746ed5e6abSSam Falkner int flag) 7756ed5e6abSSam Falkner { 7766ed5e6abSSam Falkner int error; 7776ed5e6abSSam Falkner 7786ed5e6abSSam Falkner if (zp->z_zfsvfs->z_norm) { 7796ed5e6abSSam Falkner if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && 7806ed5e6abSSam Falkner (flag & ZCIEXACT)) || 7816ed5e6abSSam Falkner ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) && 7826ed5e6abSSam Falkner !(flag & ZCILOOK))) 7836ed5e6abSSam Falkner error = zap_remove_norm(zp->z_zfsvfs->z_os, 7846ed5e6abSSam Falkner dzp->z_id, dl->dl_name, MT_EXACT, tx); 7856ed5e6abSSam Falkner else 7866ed5e6abSSam Falkner error = zap_remove_norm(zp->z_zfsvfs->z_os, 7876ed5e6abSSam Falkner dzp->z_id, dl->dl_name, MT_FIRST, tx); 7886ed5e6abSSam Falkner } else { 7896ed5e6abSSam Falkner error = zap_remove(zp->z_zfsvfs->z_os, 7906ed5e6abSSam Falkner dzp->z_id, dl->dl_name, tx); 7916ed5e6abSSam Falkner } 7926ed5e6abSSam Falkner 7936ed5e6abSSam Falkner return (error); 7946ed5e6abSSam Falkner } 7956ed5e6abSSam Falkner 796fa9e4066Sahrens /* 797893a6d32Sahrens * Unlink zp from dl, and mark zp for deletion if this was the last link. 798fa9e4066Sahrens * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST). 799893a6d32Sahrens * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list. 800893a6d32Sahrens * If it's non-NULL, we use it to indicate whether the znode needs deletion, 801fa9e4066Sahrens * and it's the caller's job to do it. 802fa9e4066Sahrens */ 803fa9e4066Sahrens int 804fa9e4066Sahrens zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag, 805893a6d32Sahrens boolean_t *unlinkedp) 806fa9e4066Sahrens { 807fa9e4066Sahrens znode_t *dzp = dl->dl_dzp; 8080a586ceaSMark Shellenbaum zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 809fa9e4066Sahrens vnode_t *vp = ZTOV(zp); 810fa9e4066Sahrens int zp_is_dir = (vp->v_type == VDIR); 811893a6d32Sahrens boolean_t unlinked = B_FALSE; 8120a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[5]; 8130a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 8140a586ceaSMark Shellenbaum int count = 0; 815fa9e4066Sahrens int error; 816fa9e4066Sahrens 817033f9833Sek110237 dnlc_remove(ZTOV(dzp), dl->dl_name); 818033f9833Sek110237 819fa9e4066Sahrens if (!(flag & ZRENAMING)) { 820fa9e4066Sahrens if (vn_vfswlock(vp)) /* prevent new mounts on zp */ 821be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY)); 822fa9e4066Sahrens 823fa9e4066Sahrens if (vn_ismntpt(vp)) { /* don't remove mount point */ 824fa9e4066Sahrens vn_vfsunlock(vp); 825be6fd75aSMatthew Ahrens return (SET_ERROR(EBUSY)); 826fa9e4066Sahrens } 827fa9e4066Sahrens 828fa9e4066Sahrens mutex_enter(&zp->z_lock); 8290a586ceaSMark Shellenbaum 8300a586ceaSMark Shellenbaum if (zp_is_dir && !zfs_dirempty(zp)) { 831fa9e4066Sahrens mutex_exit(&zp->z_lock); 832fa9e4066Sahrens vn_vfsunlock(vp); 833be6fd75aSMatthew Ahrens return (SET_ERROR(EEXIST)); 834fa9e4066Sahrens } 8350a586ceaSMark Shellenbaum 8366ed5e6abSSam Falkner /* 8376ed5e6abSSam Falkner * If we get here, we are going to try to remove the object. 8386ed5e6abSSam Falkner * First try removing the name from the directory; if that 8396ed5e6abSSam Falkner * fails, return the error. 8406ed5e6abSSam Falkner */ 8416ed5e6abSSam Falkner error = zfs_dropname(dl, zp, dzp, tx, flag); 8426ed5e6abSSam Falkner if (error != 0) { 8436ed5e6abSSam Falkner mutex_exit(&zp->z_lock); 8446ed5e6abSSam Falkner vn_vfsunlock(vp); 8456ed5e6abSSam Falkner return (error); 8466ed5e6abSSam Falkner } 8476ed5e6abSSam Falkner 8480a586ceaSMark Shellenbaum if (zp->z_links <= zp_is_dir) { 8490125049cSahrens zfs_panic_recover("zfs: link count on %s is %u, " 8500125049cSahrens "should be at least %u", 8510125049cSahrens zp->z_vnode->v_path ? zp->z_vnode->v_path : 8520a586ceaSMark Shellenbaum "<unknown>", (int)zp->z_links, 8530125049cSahrens zp_is_dir + 1); 8540a586ceaSMark Shellenbaum zp->z_links = zp_is_dir + 1; 8550125049cSahrens } 8560a586ceaSMark Shellenbaum if (--zp->z_links == zp_is_dir) { 857893a6d32Sahrens zp->z_unlinked = B_TRUE; 8580a586ceaSMark Shellenbaum zp->z_links = 0; 859893a6d32Sahrens unlinked = B_TRUE; 860fa9e4066Sahrens } else { 8610a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 8620a586ceaSMark Shellenbaum NULL, &ctime, sizeof (ctime)); 8630a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 8640a586ceaSMark Shellenbaum NULL, &zp->z_pflags, sizeof (zp->z_pflags)); 8650a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 8660a586ceaSMark Shellenbaum B_TRUE); 867fa9e4066Sahrens } 8680a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 8690a586ceaSMark Shellenbaum NULL, &zp->z_links, sizeof (zp->z_links)); 8700a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 8710a586ceaSMark Shellenbaum count = 0; 8720a586ceaSMark Shellenbaum ASSERT(error == 0); 873fa9e4066Sahrens mutex_exit(&zp->z_lock); 874fa9e4066Sahrens vn_vfsunlock(vp); 8756ed5e6abSSam Falkner } else { 8766ed5e6abSSam Falkner error = zfs_dropname(dl, zp, dzp, tx, flag); 8776ed5e6abSSam Falkner if (error != 0) 8786ed5e6abSSam Falkner return (error); 879fa9e4066Sahrens } 880fa9e4066Sahrens 881fa9e4066Sahrens mutex_enter(&dzp->z_lock); 8820a586ceaSMark Shellenbaum dzp->z_size--; /* one dirent removed */ 8830a586ceaSMark Shellenbaum dzp->z_links -= zp_is_dir; /* ".." link from zp */ 8840a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), 8850a586ceaSMark Shellenbaum NULL, &dzp->z_links, sizeof (dzp->z_links)); 8860a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 8870a586ceaSMark Shellenbaum NULL, &dzp->z_size, sizeof (dzp->z_size)); 8880a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), 8890a586ceaSMark Shellenbaum NULL, ctime, sizeof (ctime)); 8900a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 8910a586ceaSMark Shellenbaum NULL, mtime, sizeof (mtime)); 8920a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 8930a586ceaSMark Shellenbaum NULL, &dzp->z_pflags, sizeof (dzp->z_pflags)); 8940a586ceaSMark Shellenbaum zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); 8950a586ceaSMark Shellenbaum error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx); 8960a586ceaSMark Shellenbaum ASSERT(error == 0); 897fa9e4066Sahrens mutex_exit(&dzp->z_lock); 898fa9e4066Sahrens 899893a6d32Sahrens if (unlinkedp != NULL) 900893a6d32Sahrens *unlinkedp = unlinked; 901893a6d32Sahrens else if (unlinked) 902893a6d32Sahrens zfs_unlinked_add(zp, tx); 903fa9e4066Sahrens 904fa9e4066Sahrens return (0); 905fa9e4066Sahrens } 906fa9e4066Sahrens 907fa9e4066Sahrens /* 908fa9e4066Sahrens * Indicate whether the directory is empty. Works with or without z_lock 909fa9e4066Sahrens * held, but can only be consider a hint in the latter case. Returns true 910fa9e4066Sahrens * if only "." and ".." remain and there's no work in progress. 911fa9e4066Sahrens */ 912fa9e4066Sahrens boolean_t 913fa9e4066Sahrens zfs_dirempty(znode_t *dzp) 914fa9e4066Sahrens { 9150a586ceaSMark Shellenbaum return (dzp->z_size == 2 && dzp->z_dirlocks == 0); 916fa9e4066Sahrens } 917fa9e4066Sahrens 918fa9e4066Sahrens int 919fa9e4066Sahrens zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr) 920fa9e4066Sahrens { 921fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 922fa9e4066Sahrens znode_t *xzp; 923fa9e4066Sahrens dmu_tx_t *tx; 924fa9e4066Sahrens int error; 92589459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 92689459e17SMark Shellenbaum boolean_t fuid_dirtied; 9270a586ceaSMark Shellenbaum uint64_t parent; 928fa9e4066Sahrens 929fa9e4066Sahrens *xvpp = NULL; 930fa9e4066Sahrens 931da6c28aaSamw if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr)) 932fa9e4066Sahrens return (error); 933fa9e4066Sahrens 93489459e17SMark Shellenbaum if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL, 93589459e17SMark Shellenbaum &acl_ids)) != 0) 93689459e17SMark Shellenbaum return (error); 93714843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 93814843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 939be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 94014843421SMatthew Ahrens } 94189459e17SMark Shellenbaum 942fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 9430a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 9440a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 9450a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 946ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 94789459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 94814843421SMatthew Ahrens if (fuid_dirtied) 94914843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 950e722410cSMatthew Ahrens error = dmu_tx_assign(tx, TXG_WAIT); 951fa9e4066Sahrens if (error) { 952c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 953c8c24165SMark Shellenbaum dmu_tx_abort(tx); 954fa9e4066Sahrens return (error); 955fa9e4066Sahrens } 9560a586ceaSMark Shellenbaum zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids); 95789459e17SMark Shellenbaum 95889459e17SMark Shellenbaum if (fuid_dirtied) 95989459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 96089459e17SMark Shellenbaum 961f3e6fb2fSMark Shellenbaum #ifdef DEBUG 962f3e6fb2fSMark Shellenbaum error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 963f3e6fb2fSMark Shellenbaum &parent, sizeof (parent)); 964f3e6fb2fSMark Shellenbaum ASSERT(error == 0 && parent == zp->z_id); 965f3e6fb2fSMark Shellenbaum #endif 9660a586ceaSMark Shellenbaum 9670a586ceaSMark Shellenbaum VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id, 9680a586ceaSMark Shellenbaum sizeof (xzp->z_id), tx)); 969fa9e4066Sahrens 970da6c28aaSamw (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, 97189459e17SMark Shellenbaum xzp, "", NULL, acl_ids.z_fuidp, vap); 97289459e17SMark Shellenbaum 97389459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 974fa9e4066Sahrens dmu_tx_commit(tx); 975fa9e4066Sahrens 976fa9e4066Sahrens *xvpp = ZTOV(xzp); 977fa9e4066Sahrens 978fa9e4066Sahrens return (0); 979fa9e4066Sahrens } 980fa9e4066Sahrens 981fa9e4066Sahrens /* 982fa9e4066Sahrens * Return a znode for the extended attribute directory for zp. 983fa9e4066Sahrens * ** If the directory does not already exist, it is created ** 984fa9e4066Sahrens * 985fa9e4066Sahrens * IN: zp - znode to obtain attribute directory from 986fa9e4066Sahrens * cr - credentials of caller 9873f063a9dSck153898 * flags - flags from the VOP_LOOKUP call 988fa9e4066Sahrens * 989fa9e4066Sahrens * OUT: xzpp - pointer to extended attribute znode 990fa9e4066Sahrens * 991fa9e4066Sahrens * RETURN: 0 on success 992fa9e4066Sahrens * error number on failure 993fa9e4066Sahrens */ 994fa9e4066Sahrens int 9953f063a9dSck153898 zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags) 996fa9e4066Sahrens { 997fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 998fa9e4066Sahrens znode_t *xzp; 999fa9e4066Sahrens zfs_dirlock_t *dl; 1000fa9e4066Sahrens vattr_t va; 1001fa9e4066Sahrens int error; 1002fa9e4066Sahrens top: 1003da6c28aaSamw error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL); 1004fa9e4066Sahrens if (error) 1005fa9e4066Sahrens return (error); 1006fa9e4066Sahrens 1007fa9e4066Sahrens if (xzp != NULL) { 1008fa9e4066Sahrens *xvpp = ZTOV(xzp); 1009fa9e4066Sahrens zfs_dirent_unlock(dl); 1010fa9e4066Sahrens return (0); 1011fa9e4066Sahrens } 1012fa9e4066Sahrens 1013fa9e4066Sahrens 10143f063a9dSck153898 if (!(flags & CREATE_XATTR_DIR)) { 10153f063a9dSck153898 zfs_dirent_unlock(dl); 1016be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 10173f063a9dSck153898 } 10183f063a9dSck153898 1019fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 1020fa9e4066Sahrens zfs_dirent_unlock(dl); 1021be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 1022fa9e4066Sahrens } 1023fa9e4066Sahrens 1024fa9e4066Sahrens /* 1025fa9e4066Sahrens * The ability to 'create' files in an attribute 1026fa9e4066Sahrens * directory comes from the write_xattr permission on the base file. 1027fa9e4066Sahrens * 1028fa9e4066Sahrens * The ability to 'search' an attribute directory requires 1029fa9e4066Sahrens * read_xattr permission on the base file. 1030fa9e4066Sahrens * 1031fa9e4066Sahrens * Once in a directory the ability to read/write attributes 1032fa9e4066Sahrens * is controlled by the permissions on the attribute file. 1033fa9e4066Sahrens */ 1034fa9e4066Sahrens va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID; 1035fa9e4066Sahrens va.va_type = VDIR; 1036d2443e76Smarks va.va_mode = S_IFDIR | S_ISVTX | 0777; 1037bda89588Sjp151216 zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid); 1038fa9e4066Sahrens 1039fa9e4066Sahrens error = zfs_make_xattrdir(zp, &va, xvpp, cr); 1040fa9e4066Sahrens zfs_dirent_unlock(dl); 1041fa9e4066Sahrens 10421209a471SNeil Perrin if (error == ERESTART) { 10438a2f1b91Sahrens /* NB: we already did dmu_tx_wait() if necessary */ 1044fa9e4066Sahrens goto top; 1045fa9e4066Sahrens } 1046fa9e4066Sahrens 1047fa9e4066Sahrens return (error); 1048fa9e4066Sahrens } 1049fa9e4066Sahrens 1050fa9e4066Sahrens /* 1051fa9e4066Sahrens * Decide whether it is okay to remove within a sticky directory. 1052fa9e4066Sahrens * 1053fa9e4066Sahrens * In sticky directories, write access is not sufficient; 1054fa9e4066Sahrens * you can remove entries from a directory only if: 1055fa9e4066Sahrens * 1056fa9e4066Sahrens * you own the directory, 1057fa9e4066Sahrens * you own the entry, 1058fa9e4066Sahrens * the entry is a plain file and you have write access, 1059fa9e4066Sahrens * or you are privileged (checked in secpolicy...). 1060fa9e4066Sahrens * 1061fa9e4066Sahrens * The function returns 0 if remove access is granted. 1062fa9e4066Sahrens */ 1063fa9e4066Sahrens int 1064fa9e4066Sahrens zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr) 1065fa9e4066Sahrens { 1066fa9e4066Sahrens uid_t uid; 1067f1696b23SMark Shellenbaum uid_t downer; 1068f1696b23SMark Shellenbaum uid_t fowner; 1069f1696b23SMark Shellenbaum zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1070fa9e4066Sahrens 10711209a471SNeil Perrin if (zdp->z_zfsvfs->z_replay) 1072fa9e4066Sahrens return (0); 1073fa9e4066Sahrens 10740a586ceaSMark Shellenbaum if ((zdp->z_mode & S_ISVTX) == 0) 1075da6c28aaSamw return (0); 1076da6c28aaSamw 1077f1696b23SMark Shellenbaum downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER); 1078f1696b23SMark Shellenbaum fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER); 1079f1696b23SMark Shellenbaum 1080f1696b23SMark Shellenbaum if ((uid = crgetuid(cr)) == downer || uid == fowner || 1081fa9e4066Sahrens (ZTOV(zp)->v_type == VREG && 1082da6c28aaSamw zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0)) 1083fa9e4066Sahrens return (0); 1084fa9e4066Sahrens else 1085fa9e4066Sahrens return (secpolicy_vnode_remove(cr)); 1086fa9e4066Sahrens } 1087