161145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy * CDDL HEADER START
4eda14cbcSMatt Macy *
5eda14cbcSMatt Macy * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy *
9eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy * See the License for the specific language governing permissions
12eda14cbcSMatt Macy * and limitations under the License.
13eda14cbcSMatt Macy *
14eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy *
20eda14cbcSMatt Macy * CDDL HEADER END
21eda14cbcSMatt Macy */
22eda14cbcSMatt Macy
23eda14cbcSMatt Macy /*
24eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25eda14cbcSMatt Macy * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
26eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc.
27eda14cbcSMatt Macy */
28eda14cbcSMatt Macy
29eda14cbcSMatt Macy #include <sys/types.h>
30eda14cbcSMatt Macy #include <sys/param.h>
31eda14cbcSMatt Macy #include <sys/time.h>
32eda14cbcSMatt Macy #include <sys/sysmacros.h>
33eda14cbcSMatt Macy #include <sys/vfs.h>
34eda14cbcSMatt Macy #include <sys/vnode.h>
35eda14cbcSMatt Macy #include <sys/file.h>
36eda14cbcSMatt Macy #include <sys/kmem.h>
37eda14cbcSMatt Macy #include <sys/uio.h>
38eda14cbcSMatt Macy #include <sys/pathname.h>
39eda14cbcSMatt Macy #include <sys/cmn_err.h>
40eda14cbcSMatt Macy #include <sys/errno.h>
41eda14cbcSMatt Macy #include <sys/stat.h>
42eda14cbcSMatt Macy #include <sys/sunddi.h>
43eda14cbcSMatt Macy #include <sys/random.h>
44eda14cbcSMatt Macy #include <sys/policy.h>
45eda14cbcSMatt Macy #include <sys/zfs_dir.h>
46eda14cbcSMatt Macy #include <sys/zfs_acl.h>
47eda14cbcSMatt Macy #include <sys/zfs_vnops.h>
48eda14cbcSMatt Macy #include <sys/fs/zfs.h>
49eda14cbcSMatt Macy #include <sys/zap.h>
50eda14cbcSMatt Macy #include <sys/dmu.h>
51eda14cbcSMatt Macy #include <sys/atomic.h>
52eda14cbcSMatt Macy #include <sys/zfs_ctldir.h>
53eda14cbcSMatt Macy #include <sys/zfs_fuid.h>
54eda14cbcSMatt Macy #include <sys/sa.h>
55eda14cbcSMatt Macy #include <sys/zfs_sa.h>
56eda14cbcSMatt Macy #include <sys/dmu_objset.h>
57eda14cbcSMatt Macy #include <sys/dsl_dir.h>
58eda14cbcSMatt Macy
59eda14cbcSMatt Macy /*
60eda14cbcSMatt Macy * zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
61eda14cbcSMatt Macy * of names after deciding which is the appropriate lookup interface.
62eda14cbcSMatt Macy */
63eda14cbcSMatt Macy static int
zfs_match_find(zfsvfs_t * zfsvfs,znode_t * dzp,const char * name,matchtype_t mt,boolean_t update,int * deflags,pathname_t * rpnp,uint64_t * zoid)64180f8225SMatt Macy zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, const char *name,
65180f8225SMatt Macy matchtype_t mt, boolean_t update, int *deflags, pathname_t *rpnp,
66180f8225SMatt Macy uint64_t *zoid)
67eda14cbcSMatt Macy {
68eda14cbcSMatt Macy boolean_t conflict = B_FALSE;
69eda14cbcSMatt Macy int error;
70eda14cbcSMatt Macy
71eda14cbcSMatt Macy if (zfsvfs->z_norm) {
72eda14cbcSMatt Macy size_t bufsz = 0;
73eda14cbcSMatt Macy char *buf = NULL;
74eda14cbcSMatt Macy
75eda14cbcSMatt Macy if (rpnp) {
76eda14cbcSMatt Macy buf = rpnp->pn_buf;
77eda14cbcSMatt Macy bufsz = rpnp->pn_bufsize;
78eda14cbcSMatt Macy }
79eda14cbcSMatt Macy
80eda14cbcSMatt Macy /*
81eda14cbcSMatt Macy * In the non-mixed case we only expect there would ever
82eda14cbcSMatt Macy * be one match, but we need to use the normalizing lookup.
83eda14cbcSMatt Macy */
84eda14cbcSMatt Macy error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
85eda14cbcSMatt Macy zoid, mt, buf, bufsz, &conflict);
86eda14cbcSMatt Macy } else {
87eda14cbcSMatt Macy error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
88eda14cbcSMatt Macy }
89eda14cbcSMatt Macy
90eda14cbcSMatt Macy /*
91eda14cbcSMatt Macy * Allow multiple entries provided the first entry is
92eda14cbcSMatt Macy * the object id. Non-zpl consumers may safely make
93eda14cbcSMatt Macy * use of the additional space.
94eda14cbcSMatt Macy *
95eda14cbcSMatt Macy * XXX: This should be a feature flag for compatibility
96eda14cbcSMatt Macy */
97eda14cbcSMatt Macy if (error == EOVERFLOW)
98eda14cbcSMatt Macy error = 0;
99eda14cbcSMatt Macy
100eda14cbcSMatt Macy if (zfsvfs->z_norm && !error && deflags)
101eda14cbcSMatt Macy *deflags = conflict ? ED_CASE_CONFLICT : 0;
102eda14cbcSMatt Macy
103eda14cbcSMatt Macy *zoid = ZFS_DIRENT_OBJ(*zoid);
104eda14cbcSMatt Macy
105eda14cbcSMatt Macy return (error);
106eda14cbcSMatt Macy }
107eda14cbcSMatt Macy
108eda14cbcSMatt Macy /*
109eda14cbcSMatt Macy * Lock a directory entry. A dirlock on <dzp, name> protects that name
110eda14cbcSMatt Macy * in dzp's directory zap object. As long as you hold a dirlock, you can
111eda14cbcSMatt Macy * assume two things: (1) dzp cannot be reaped, and (2) no other thread
112eda14cbcSMatt Macy * can change the zap entry for (i.e. link or unlink) this name.
113eda14cbcSMatt Macy *
114eda14cbcSMatt Macy * Input arguments:
115eda14cbcSMatt Macy * dzp - znode for directory
116eda14cbcSMatt Macy * name - name of entry to lock
117eda14cbcSMatt Macy * flag - ZNEW: if the entry already exists, fail with EEXIST.
118eda14cbcSMatt Macy * ZEXISTS: if the entry does not exist, fail with ENOENT.
119eda14cbcSMatt Macy * ZSHARED: allow concurrent access with other ZSHARED callers.
120eda14cbcSMatt Macy * ZXATTR: we want dzp's xattr directory
121eda14cbcSMatt Macy * ZCILOOK: On a mixed sensitivity file system,
122eda14cbcSMatt Macy * this lookup should be case-insensitive.
123eda14cbcSMatt Macy * ZCIEXACT: On a purely case-insensitive file system,
124eda14cbcSMatt Macy * this lookup should be case-sensitive.
125eda14cbcSMatt Macy * ZRENAMING: we are locking for renaming, force narrow locks
126eda14cbcSMatt Macy * ZHAVELOCK: Don't grab the z_name_lock for this call. The
127eda14cbcSMatt Macy * current thread already holds it.
128eda14cbcSMatt Macy *
129eda14cbcSMatt Macy * Output arguments:
130eda14cbcSMatt Macy * zpp - pointer to the znode for the entry (NULL if there isn't one)
131eda14cbcSMatt Macy * dlpp - pointer to the dirlock for this entry (NULL on error)
132eda14cbcSMatt Macy * direntflags - (case-insensitive lookup only)
133eda14cbcSMatt Macy * flags if multiple case-sensitive matches exist in directory
134eda14cbcSMatt Macy * realpnp - (case-insensitive lookup only)
135eda14cbcSMatt Macy * actual name matched within the directory
136eda14cbcSMatt Macy *
137eda14cbcSMatt Macy * Return value: 0 on success or errno on failure.
138eda14cbcSMatt Macy *
139eda14cbcSMatt Macy * NOTE: Always checks for, and rejects, '.' and '..'.
140eda14cbcSMatt Macy * NOTE: For case-insensitive file systems we take wide locks (see below),
141eda14cbcSMatt Macy * but return znode pointers to a single match.
142eda14cbcSMatt Macy */
143eda14cbcSMatt Macy int
zfs_dirent_lock(zfs_dirlock_t ** dlpp,znode_t * dzp,char * name,znode_t ** zpp,int flag,int * direntflags,pathname_t * realpnp)144180f8225SMatt Macy zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name,
145180f8225SMatt Macy znode_t **zpp, int flag, int *direntflags, pathname_t *realpnp)
146eda14cbcSMatt Macy {
147eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(dzp);
148eda14cbcSMatt Macy zfs_dirlock_t *dl;
149eda14cbcSMatt Macy boolean_t update;
150eda14cbcSMatt Macy matchtype_t mt = 0;
151eda14cbcSMatt Macy uint64_t zoid;
152eda14cbcSMatt Macy int error = 0;
153eda14cbcSMatt Macy int cmpflags;
154eda14cbcSMatt Macy
155eda14cbcSMatt Macy *zpp = NULL;
156eda14cbcSMatt Macy *dlpp = NULL;
157eda14cbcSMatt Macy
158eda14cbcSMatt Macy /*
159eda14cbcSMatt Macy * Verify that we are not trying to lock '.', '..', or '.zfs'
160eda14cbcSMatt Macy */
161eda14cbcSMatt Macy if ((name[0] == '.' &&
162eda14cbcSMatt Macy (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'))) ||
163eda14cbcSMatt Macy (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0))
164eda14cbcSMatt Macy return (SET_ERROR(EEXIST));
165eda14cbcSMatt Macy
166eda14cbcSMatt Macy /*
167eda14cbcSMatt Macy * Case sensitivity and normalization preferences are set when
168eda14cbcSMatt Macy * the file system is created. These are stored in the
169eda14cbcSMatt Macy * zfsvfs->z_case and zfsvfs->z_norm fields. These choices
170eda14cbcSMatt Macy * affect what vnodes can be cached in the DNLC, how we
171eda14cbcSMatt Macy * perform zap lookups, and the "width" of our dirlocks.
172eda14cbcSMatt Macy *
173eda14cbcSMatt Macy * A normal dirlock locks a single name. Note that with
174eda14cbcSMatt Macy * normalization a name can be composed multiple ways, but
175eda14cbcSMatt Macy * when normalized, these names all compare equal. A wide
176eda14cbcSMatt Macy * dirlock locks multiple names. We need these when the file
177eda14cbcSMatt Macy * system is supporting mixed-mode access. It is sometimes
178eda14cbcSMatt Macy * necessary to lock all case permutations of file name at
179eda14cbcSMatt Macy * once so that simultaneous case-insensitive/case-sensitive
180eda14cbcSMatt Macy * behaves as rationally as possible.
181eda14cbcSMatt Macy */
182eda14cbcSMatt Macy
183eda14cbcSMatt Macy /*
184eda14cbcSMatt Macy * When matching we may need to normalize & change case according to
185eda14cbcSMatt Macy * FS settings.
186eda14cbcSMatt Macy *
187eda14cbcSMatt Macy * Note that a normalized match is necessary for a case insensitive
188eda14cbcSMatt Macy * filesystem when the lookup request is not exact because normalization
189eda14cbcSMatt Macy * can fold case independent of normalizing code point sequences.
190eda14cbcSMatt Macy *
191eda14cbcSMatt Macy * See the table above zfs_dropname().
192eda14cbcSMatt Macy */
193eda14cbcSMatt Macy if (zfsvfs->z_norm != 0) {
194eda14cbcSMatt Macy mt = MT_NORMALIZE;
195eda14cbcSMatt Macy
196eda14cbcSMatt Macy /*
197eda14cbcSMatt Macy * Determine if the match needs to honor the case specified in
198eda14cbcSMatt Macy * lookup, and if so keep track of that so that during
199eda14cbcSMatt Macy * normalization we don't fold case.
200eda14cbcSMatt Macy */
201eda14cbcSMatt Macy if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
202eda14cbcSMatt Macy (flag & ZCIEXACT)) ||
203eda14cbcSMatt Macy (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
204eda14cbcSMatt Macy mt |= MT_MATCH_CASE;
205eda14cbcSMatt Macy }
206eda14cbcSMatt Macy }
207eda14cbcSMatt Macy
208eda14cbcSMatt Macy /*
209eda14cbcSMatt Macy * Only look in or update the DNLC if we are looking for the
210eda14cbcSMatt Macy * name on a file system that does not require normalization
211eda14cbcSMatt Macy * or case folding. We can also look there if we happen to be
212eda14cbcSMatt Macy * on a non-normalizing, mixed sensitivity file system IF we
213eda14cbcSMatt Macy * are looking for the exact name.
214eda14cbcSMatt Macy *
215eda14cbcSMatt Macy * Maybe can add TO-UPPERed version of name to dnlc in ci-only
216eda14cbcSMatt Macy * case for performance improvement?
217eda14cbcSMatt Macy */
218eda14cbcSMatt Macy update = !zfsvfs->z_norm ||
219eda14cbcSMatt Macy (zfsvfs->z_case == ZFS_CASE_MIXED &&
220eda14cbcSMatt Macy !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
221eda14cbcSMatt Macy
222eda14cbcSMatt Macy /*
223eda14cbcSMatt Macy * ZRENAMING indicates we are in a situation where we should
224eda14cbcSMatt Macy * take narrow locks regardless of the file system's
225eda14cbcSMatt Macy * preferences for normalizing and case folding. This will
226eda14cbcSMatt Macy * prevent us deadlocking trying to grab the same wide lock
227eda14cbcSMatt Macy * twice if the two names happen to be case-insensitive
228eda14cbcSMatt Macy * matches.
229eda14cbcSMatt Macy */
230eda14cbcSMatt Macy if (flag & ZRENAMING)
231eda14cbcSMatt Macy cmpflags = 0;
232eda14cbcSMatt Macy else
233eda14cbcSMatt Macy cmpflags = zfsvfs->z_norm;
234eda14cbcSMatt Macy
235eda14cbcSMatt Macy /*
236eda14cbcSMatt Macy * Wait until there are no locks on this name.
237eda14cbcSMatt Macy *
238eda14cbcSMatt Macy * Don't grab the lock if it is already held. However, cannot
239eda14cbcSMatt Macy * have both ZSHARED and ZHAVELOCK together.
240eda14cbcSMatt Macy */
241eda14cbcSMatt Macy ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
242eda14cbcSMatt Macy if (!(flag & ZHAVELOCK))
243eda14cbcSMatt Macy rw_enter(&dzp->z_name_lock, RW_READER);
244eda14cbcSMatt Macy
245eda14cbcSMatt Macy mutex_enter(&dzp->z_lock);
246eda14cbcSMatt Macy for (;;) {
247eda14cbcSMatt Macy if (dzp->z_unlinked && !(flag & ZXATTR)) {
248eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
249eda14cbcSMatt Macy if (!(flag & ZHAVELOCK))
250eda14cbcSMatt Macy rw_exit(&dzp->z_name_lock);
251eda14cbcSMatt Macy return (SET_ERROR(ENOENT));
252eda14cbcSMatt Macy }
253eda14cbcSMatt Macy for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
254eda14cbcSMatt Macy if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
255eda14cbcSMatt Macy U8_UNICODE_LATEST, &error) == 0) || error != 0)
256eda14cbcSMatt Macy break;
257eda14cbcSMatt Macy }
258eda14cbcSMatt Macy if (error != 0) {
259eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
260eda14cbcSMatt Macy if (!(flag & ZHAVELOCK))
261eda14cbcSMatt Macy rw_exit(&dzp->z_name_lock);
262eda14cbcSMatt Macy return (SET_ERROR(ENOENT));
263eda14cbcSMatt Macy }
264eda14cbcSMatt Macy if (dl == NULL) {
265eda14cbcSMatt Macy /*
266eda14cbcSMatt Macy * Allocate a new dirlock and add it to the list.
267eda14cbcSMatt Macy */
268eda14cbcSMatt Macy dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
269eda14cbcSMatt Macy cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
270eda14cbcSMatt Macy dl->dl_name = name;
271eda14cbcSMatt Macy dl->dl_sharecnt = 0;
272eda14cbcSMatt Macy dl->dl_namelock = 0;
273eda14cbcSMatt Macy dl->dl_namesize = 0;
274eda14cbcSMatt Macy dl->dl_dzp = dzp;
275eda14cbcSMatt Macy dl->dl_next = dzp->z_dirlocks;
276eda14cbcSMatt Macy dzp->z_dirlocks = dl;
277eda14cbcSMatt Macy break;
278eda14cbcSMatt Macy }
279eda14cbcSMatt Macy if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
280eda14cbcSMatt Macy break;
281eda14cbcSMatt Macy cv_wait(&dl->dl_cv, &dzp->z_lock);
282eda14cbcSMatt Macy }
283eda14cbcSMatt Macy
284eda14cbcSMatt Macy /*
285eda14cbcSMatt Macy * If the z_name_lock was NOT held for this dirlock record it.
286eda14cbcSMatt Macy */
287eda14cbcSMatt Macy if (flag & ZHAVELOCK)
288eda14cbcSMatt Macy dl->dl_namelock = 1;
289eda14cbcSMatt Macy
290eda14cbcSMatt Macy if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
291eda14cbcSMatt Macy /*
292eda14cbcSMatt Macy * We're the second shared reference to dl. Make a copy of
293eda14cbcSMatt Macy * dl_name in case the first thread goes away before we do.
294eda14cbcSMatt Macy * Note that we initialize the new name before storing its
295eda14cbcSMatt Macy * pointer into dl_name, because the first thread may load
296eda14cbcSMatt Macy * dl->dl_name at any time. It'll either see the old value,
297eda14cbcSMatt Macy * which belongs to it, or the new shared copy; either is OK.
298eda14cbcSMatt Macy */
299eda14cbcSMatt Macy dl->dl_namesize = strlen(dl->dl_name) + 1;
300eda14cbcSMatt Macy name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
301da5137abSMartin Matuska memcpy(name, dl->dl_name, dl->dl_namesize);
302eda14cbcSMatt Macy dl->dl_name = name;
303eda14cbcSMatt Macy }
304eda14cbcSMatt Macy
305eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
306eda14cbcSMatt Macy
307eda14cbcSMatt Macy /*
308eda14cbcSMatt Macy * We have a dirlock on the name. (Note that it is the dirlock,
309eda14cbcSMatt Macy * not the dzp's z_lock, that protects the name in the zap object.)
310eda14cbcSMatt Macy * See if there's an object by this name; if so, put a hold on it.
311eda14cbcSMatt Macy */
312eda14cbcSMatt Macy if (flag & ZXATTR) {
313eda14cbcSMatt Macy error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
314eda14cbcSMatt Macy sizeof (zoid));
315eda14cbcSMatt Macy if (error == 0)
316eda14cbcSMatt Macy error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
317eda14cbcSMatt Macy } else {
318eda14cbcSMatt Macy error = zfs_match_find(zfsvfs, dzp, name, mt,
319eda14cbcSMatt Macy update, direntflags, realpnp, &zoid);
320eda14cbcSMatt Macy }
321eda14cbcSMatt Macy if (error) {
322eda14cbcSMatt Macy if (error != ENOENT || (flag & ZEXISTS)) {
323eda14cbcSMatt Macy zfs_dirent_unlock(dl);
324eda14cbcSMatt Macy return (error);
325eda14cbcSMatt Macy }
326eda14cbcSMatt Macy } else {
327eda14cbcSMatt Macy if (flag & ZNEW) {
328eda14cbcSMatt Macy zfs_dirent_unlock(dl);
329eda14cbcSMatt Macy return (SET_ERROR(EEXIST));
330eda14cbcSMatt Macy }
331eda14cbcSMatt Macy error = zfs_zget(zfsvfs, zoid, zpp);
332eda14cbcSMatt Macy if (error) {
333eda14cbcSMatt Macy zfs_dirent_unlock(dl);
334eda14cbcSMatt Macy return (error);
335eda14cbcSMatt Macy }
336eda14cbcSMatt Macy }
337eda14cbcSMatt Macy
338eda14cbcSMatt Macy *dlpp = dl;
339eda14cbcSMatt Macy
340eda14cbcSMatt Macy return (0);
341eda14cbcSMatt Macy }
342eda14cbcSMatt Macy
343eda14cbcSMatt Macy /*
344eda14cbcSMatt Macy * Unlock this directory entry and wake anyone who was waiting for it.
345eda14cbcSMatt Macy */
346eda14cbcSMatt Macy void
zfs_dirent_unlock(zfs_dirlock_t * dl)347eda14cbcSMatt Macy zfs_dirent_unlock(zfs_dirlock_t *dl)
348eda14cbcSMatt Macy {
349eda14cbcSMatt Macy znode_t *dzp = dl->dl_dzp;
350eda14cbcSMatt Macy zfs_dirlock_t **prev_dl, *cur_dl;
351eda14cbcSMatt Macy
352eda14cbcSMatt Macy mutex_enter(&dzp->z_lock);
353eda14cbcSMatt Macy
354eda14cbcSMatt Macy if (!dl->dl_namelock)
355eda14cbcSMatt Macy rw_exit(&dzp->z_name_lock);
356eda14cbcSMatt Macy
357eda14cbcSMatt Macy if (dl->dl_sharecnt > 1) {
358eda14cbcSMatt Macy dl->dl_sharecnt--;
359eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
360eda14cbcSMatt Macy return;
361eda14cbcSMatt Macy }
362eda14cbcSMatt Macy prev_dl = &dzp->z_dirlocks;
363eda14cbcSMatt Macy while ((cur_dl = *prev_dl) != dl)
364eda14cbcSMatt Macy prev_dl = &cur_dl->dl_next;
365eda14cbcSMatt Macy *prev_dl = dl->dl_next;
366eda14cbcSMatt Macy cv_broadcast(&dl->dl_cv);
367eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
368eda14cbcSMatt Macy
369eda14cbcSMatt Macy if (dl->dl_namesize != 0)
370eda14cbcSMatt Macy kmem_free(dl->dl_name, dl->dl_namesize);
371eda14cbcSMatt Macy cv_destroy(&dl->dl_cv);
372eda14cbcSMatt Macy kmem_free(dl, sizeof (*dl));
373eda14cbcSMatt Macy }
374eda14cbcSMatt Macy
375eda14cbcSMatt Macy /*
376eda14cbcSMatt Macy * Look up an entry in a directory.
377eda14cbcSMatt Macy *
378eda14cbcSMatt Macy * NOTE: '.' and '..' are handled as special cases because
379eda14cbcSMatt Macy * no directory entries are actually stored for them. If this is
380eda14cbcSMatt Macy * the root of a filesystem, then '.zfs' is also treated as a
381eda14cbcSMatt Macy * special pseudo-directory.
382eda14cbcSMatt Macy */
383eda14cbcSMatt Macy int
zfs_dirlook(znode_t * dzp,char * name,znode_t ** zpp,int flags,int * deflg,pathname_t * rpnp)384eda14cbcSMatt Macy zfs_dirlook(znode_t *dzp, char *name, znode_t **zpp, int flags,
385eda14cbcSMatt Macy int *deflg, pathname_t *rpnp)
386eda14cbcSMatt Macy {
387eda14cbcSMatt Macy zfs_dirlock_t *dl;
388eda14cbcSMatt Macy znode_t *zp;
389eda14cbcSMatt Macy struct inode *ip;
390eda14cbcSMatt Macy int error = 0;
391eda14cbcSMatt Macy uint64_t parent;
392eda14cbcSMatt Macy
393eda14cbcSMatt Macy if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
394eda14cbcSMatt Macy *zpp = dzp;
395eda14cbcSMatt Macy zhold(*zpp);
396eda14cbcSMatt Macy } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
397eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(dzp);
398eda14cbcSMatt Macy
399eda14cbcSMatt Macy /*
400eda14cbcSMatt Macy * If we are a snapshot mounted under .zfs, return
401eda14cbcSMatt Macy * the inode pointer for the snapshot directory.
402eda14cbcSMatt Macy */
403eda14cbcSMatt Macy if ((error = sa_lookup(dzp->z_sa_hdl,
404eda14cbcSMatt Macy SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
405eda14cbcSMatt Macy return (error);
406eda14cbcSMatt Macy
407eda14cbcSMatt Macy if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
408eda14cbcSMatt Macy error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
409eda14cbcSMatt Macy "snapshot", &ip, 0, kcred, NULL, NULL);
410eda14cbcSMatt Macy *zpp = ITOZ(ip);
411eda14cbcSMatt Macy return (error);
412eda14cbcSMatt Macy }
413eda14cbcSMatt Macy rw_enter(&dzp->z_parent_lock, RW_READER);
414eda14cbcSMatt Macy error = zfs_zget(zfsvfs, parent, &zp);
415eda14cbcSMatt Macy if (error == 0)
416eda14cbcSMatt Macy *zpp = zp;
417eda14cbcSMatt Macy rw_exit(&dzp->z_parent_lock);
418eda14cbcSMatt Macy } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
4197a7741afSMartin Matuska if (ZTOZSB(dzp)->z_show_ctldir == ZFS_SNAPDIR_DISABLED) {
4207a7741afSMartin Matuska return (SET_ERROR(ENOENT));
4217a7741afSMartin Matuska }
422eda14cbcSMatt Macy ip = zfsctl_root(dzp);
423eda14cbcSMatt Macy *zpp = ITOZ(ip);
424eda14cbcSMatt Macy } else {
425eda14cbcSMatt Macy int zf;
426eda14cbcSMatt Macy
427eda14cbcSMatt Macy zf = ZEXISTS | ZSHARED;
428eda14cbcSMatt Macy if (flags & FIGNORECASE)
429eda14cbcSMatt Macy zf |= ZCILOOK;
430eda14cbcSMatt Macy
431eda14cbcSMatt Macy error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
432eda14cbcSMatt Macy if (error == 0) {
433eda14cbcSMatt Macy *zpp = zp;
434eda14cbcSMatt Macy zfs_dirent_unlock(dl);
435eda14cbcSMatt Macy dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
436eda14cbcSMatt Macy }
437eda14cbcSMatt Macy rpnp = NULL;
438eda14cbcSMatt Macy }
439eda14cbcSMatt Macy
440eda14cbcSMatt Macy if ((flags & FIGNORECASE) && rpnp && !error)
441eda14cbcSMatt Macy (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
442eda14cbcSMatt Macy
443eda14cbcSMatt Macy return (error);
444eda14cbcSMatt Macy }
445eda14cbcSMatt Macy
446eda14cbcSMatt Macy /*
447eda14cbcSMatt Macy * unlinked Set (formerly known as the "delete queue") Error Handling
448eda14cbcSMatt Macy *
449eda14cbcSMatt Macy * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
450eda14cbcSMatt Macy * don't specify the name of the entry that we will be manipulating. We
451eda14cbcSMatt Macy * also fib and say that we won't be adding any new entries to the
452eda14cbcSMatt Macy * unlinked set, even though we might (this is to lower the minimum file
453eda14cbcSMatt Macy * size that can be deleted in a full filesystem). So on the small
454eda14cbcSMatt Macy * chance that the nlink list is using a fat zap (ie. has more than
455eda14cbcSMatt Macy * 2000 entries), we *may* not pre-read a block that's needed.
456eda14cbcSMatt Macy * Therefore it is remotely possible for some of the assertions
457eda14cbcSMatt Macy * regarding the unlinked set below to fail due to i/o error. On a
458eda14cbcSMatt Macy * nondebug system, this will result in the space being leaked.
459eda14cbcSMatt Macy */
460eda14cbcSMatt Macy void
zfs_unlinked_add(znode_t * zp,dmu_tx_t * tx)461eda14cbcSMatt Macy zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
462eda14cbcSMatt Macy {
463eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zp);
464eda14cbcSMatt Macy
465eda14cbcSMatt Macy ASSERT(zp->z_unlinked);
466*d0abb9a6SMartin Matuska ASSERT0(ZTOI(zp)->i_nlink);
467eda14cbcSMatt Macy
468eda14cbcSMatt Macy VERIFY3U(0, ==,
469eda14cbcSMatt Macy zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
470eda14cbcSMatt Macy
471eda14cbcSMatt Macy dataset_kstats_update_nunlinks_kstat(&zfsvfs->z_kstat, 1);
472eda14cbcSMatt Macy }
473eda14cbcSMatt Macy
474eda14cbcSMatt Macy /*
475eda14cbcSMatt Macy * Clean up any znodes that had no links when we either crashed or
476eda14cbcSMatt Macy * (force) umounted the file system.
477eda14cbcSMatt Macy */
478eda14cbcSMatt Macy static void
zfs_unlinked_drain_task(void * arg)479eda14cbcSMatt Macy zfs_unlinked_drain_task(void *arg)
480eda14cbcSMatt Macy {
481eda14cbcSMatt Macy zfsvfs_t *zfsvfs = arg;
482eda14cbcSMatt Macy zap_cursor_t zc;
4837a7741afSMartin Matuska zap_attribute_t *zap = zap_attribute_alloc();
484eda14cbcSMatt Macy dmu_object_info_t doi;
485eda14cbcSMatt Macy znode_t *zp;
486eda14cbcSMatt Macy int error;
487eda14cbcSMatt Macy
488eda14cbcSMatt Macy ASSERT3B(zfsvfs->z_draining, ==, B_TRUE);
489eda14cbcSMatt Macy
490eda14cbcSMatt Macy /*
491eda14cbcSMatt Macy * Iterate over the contents of the unlinked set.
492eda14cbcSMatt Macy */
493eda14cbcSMatt Macy for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
4947a7741afSMartin Matuska zap_cursor_retrieve(&zc, zap) == 0 && !zfsvfs->z_drain_cancel;
495eda14cbcSMatt Macy zap_cursor_advance(&zc)) {
496eda14cbcSMatt Macy
497eda14cbcSMatt Macy /*
498eda14cbcSMatt Macy * See what kind of object we have in list
499eda14cbcSMatt Macy */
500eda14cbcSMatt Macy
501eda14cbcSMatt Macy error = dmu_object_info(zfsvfs->z_os,
5027a7741afSMartin Matuska zap->za_first_integer, &doi);
503eda14cbcSMatt Macy if (error != 0)
504eda14cbcSMatt Macy continue;
505eda14cbcSMatt Macy
506eda14cbcSMatt Macy ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
507eda14cbcSMatt Macy (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
508eda14cbcSMatt Macy /*
509eda14cbcSMatt Macy * We need to re-mark these list entries for deletion,
510eda14cbcSMatt Macy * so we pull them back into core and set zp->z_unlinked.
511eda14cbcSMatt Macy */
5127a7741afSMartin Matuska error = zfs_zget(zfsvfs, zap->za_first_integer, &zp);
513eda14cbcSMatt Macy
514eda14cbcSMatt Macy /*
515eda14cbcSMatt Macy * We may pick up znodes that are already marked for deletion.
516eda14cbcSMatt Macy * This could happen during the purge of an extended attribute
517eda14cbcSMatt Macy * directory. All we need to do is skip over them, since they
518eda14cbcSMatt Macy * are already in the system marked z_unlinked.
519eda14cbcSMatt Macy */
520eda14cbcSMatt Macy if (error != 0)
521eda14cbcSMatt Macy continue;
522eda14cbcSMatt Macy
523eda14cbcSMatt Macy zp->z_unlinked = B_TRUE;
524eda14cbcSMatt Macy
525eda14cbcSMatt Macy /*
526eda14cbcSMatt Macy * zrele() decrements the znode's ref count and may cause
527eda14cbcSMatt Macy * it to be synchronously freed. We interrupt freeing
528eda14cbcSMatt Macy * of this znode by checking the return value of
529eda14cbcSMatt Macy * dmu_objset_zfs_unmounting() in dmu_free_long_range()
530eda14cbcSMatt Macy * when an unmount is requested.
531eda14cbcSMatt Macy */
532eda14cbcSMatt Macy zrele(zp);
533eda14cbcSMatt Macy ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
534eda14cbcSMatt Macy }
535eda14cbcSMatt Macy zap_cursor_fini(&zc);
536eda14cbcSMatt Macy
537eda14cbcSMatt Macy zfsvfs->z_draining = B_FALSE;
538eda14cbcSMatt Macy zfsvfs->z_drain_task = TASKQID_INVALID;
5397a7741afSMartin Matuska zap_attribute_free(zap);
540eda14cbcSMatt Macy }
541eda14cbcSMatt Macy
542eda14cbcSMatt Macy /*
543eda14cbcSMatt Macy * Sets z_draining then tries to dispatch async unlinked drain.
544eda14cbcSMatt Macy * If that fails executes synchronous unlinked drain.
545eda14cbcSMatt Macy */
546eda14cbcSMatt Macy void
zfs_unlinked_drain(zfsvfs_t * zfsvfs)547eda14cbcSMatt Macy zfs_unlinked_drain(zfsvfs_t *zfsvfs)
548eda14cbcSMatt Macy {
549eda14cbcSMatt Macy ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
550eda14cbcSMatt Macy ASSERT3B(zfsvfs->z_draining, ==, B_FALSE);
551eda14cbcSMatt Macy
552eda14cbcSMatt Macy zfsvfs->z_draining = B_TRUE;
553eda14cbcSMatt Macy zfsvfs->z_drain_cancel = B_FALSE;
554eda14cbcSMatt Macy
555eda14cbcSMatt Macy zfsvfs->z_drain_task = taskq_dispatch(
556eda14cbcSMatt Macy dsl_pool_unlinked_drain_taskq(dmu_objset_pool(zfsvfs->z_os)),
557eda14cbcSMatt Macy zfs_unlinked_drain_task, zfsvfs, TQ_SLEEP);
558eda14cbcSMatt Macy if (zfsvfs->z_drain_task == TASKQID_INVALID) {
559eda14cbcSMatt Macy zfs_dbgmsg("async zfs_unlinked_drain dispatch failed");
560eda14cbcSMatt Macy zfs_unlinked_drain_task(zfsvfs);
561eda14cbcSMatt Macy }
562eda14cbcSMatt Macy }
563eda14cbcSMatt Macy
564eda14cbcSMatt Macy /*
565eda14cbcSMatt Macy * Wait for the unlinked drain taskq task to stop. This will interrupt the
566eda14cbcSMatt Macy * unlinked set processing if it is in progress.
567eda14cbcSMatt Macy */
568eda14cbcSMatt Macy void
zfs_unlinked_drain_stop_wait(zfsvfs_t * zfsvfs)569eda14cbcSMatt Macy zfs_unlinked_drain_stop_wait(zfsvfs_t *zfsvfs)
570eda14cbcSMatt Macy {
571eda14cbcSMatt Macy ASSERT3B(zfsvfs->z_unmounted, ==, B_FALSE);
572eda14cbcSMatt Macy
573eda14cbcSMatt Macy if (zfsvfs->z_draining) {
574eda14cbcSMatt Macy zfsvfs->z_drain_cancel = B_TRUE;
575eda14cbcSMatt Macy taskq_cancel_id(dsl_pool_unlinked_drain_taskq(
576eda14cbcSMatt Macy dmu_objset_pool(zfsvfs->z_os)), zfsvfs->z_drain_task);
577eda14cbcSMatt Macy zfsvfs->z_drain_task = TASKQID_INVALID;
578eda14cbcSMatt Macy zfsvfs->z_draining = B_FALSE;
579eda14cbcSMatt Macy }
580eda14cbcSMatt Macy }
581eda14cbcSMatt Macy
582eda14cbcSMatt Macy /*
583eda14cbcSMatt Macy * Delete the entire contents of a directory. Return a count
584eda14cbcSMatt Macy * of the number of entries that could not be deleted. If we encounter
585eda14cbcSMatt Macy * an error, return a count of at least one so that the directory stays
586eda14cbcSMatt Macy * in the unlinked set.
587eda14cbcSMatt Macy *
588eda14cbcSMatt Macy * NOTE: this function assumes that the directory is inactive,
589eda14cbcSMatt Macy * so there is no need to lock its entries before deletion.
590eda14cbcSMatt Macy * Also, it assumes the directory contents is *only* regular
591eda14cbcSMatt Macy * files.
592eda14cbcSMatt Macy */
593eda14cbcSMatt Macy static int
zfs_purgedir(znode_t * dzp)594eda14cbcSMatt Macy zfs_purgedir(znode_t *dzp)
595eda14cbcSMatt Macy {
596eda14cbcSMatt Macy zap_cursor_t zc;
5977a7741afSMartin Matuska zap_attribute_t *zap = zap_attribute_alloc();
598eda14cbcSMatt Macy znode_t *xzp;
599eda14cbcSMatt Macy dmu_tx_t *tx;
600eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(dzp);
601eda14cbcSMatt Macy zfs_dirlock_t dl;
602eda14cbcSMatt Macy int skipped = 0;
603eda14cbcSMatt Macy int error;
604eda14cbcSMatt Macy
605eda14cbcSMatt Macy for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
6067a7741afSMartin Matuska (error = zap_cursor_retrieve(&zc, zap)) == 0;
607eda14cbcSMatt Macy zap_cursor_advance(&zc)) {
608eda14cbcSMatt Macy error = zfs_zget(zfsvfs,
6097a7741afSMartin Matuska ZFS_DIRENT_OBJ(zap->za_first_integer), &xzp);
610eda14cbcSMatt Macy if (error) {
611eda14cbcSMatt Macy skipped += 1;
612eda14cbcSMatt Macy continue;
613eda14cbcSMatt Macy }
614eda14cbcSMatt Macy
615eda14cbcSMatt Macy ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
616eda14cbcSMatt Macy S_ISLNK(ZTOI(xzp)->i_mode));
617eda14cbcSMatt Macy
618eda14cbcSMatt Macy tx = dmu_tx_create(zfsvfs->z_os);
619eda14cbcSMatt Macy dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
6207a7741afSMartin Matuska dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap->za_name);
621eda14cbcSMatt Macy dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
622eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
623eda14cbcSMatt Macy /* Is this really needed ? */
624eda14cbcSMatt Macy zfs_sa_upgrade_txholds(tx, xzp);
625eda14cbcSMatt Macy dmu_tx_mark_netfree(tx);
62661145dc2SMartin Matuska error = dmu_tx_assign(tx, DMU_TX_WAIT);
627eda14cbcSMatt Macy if (error) {
628eda14cbcSMatt Macy dmu_tx_abort(tx);
629eda14cbcSMatt Macy zfs_zrele_async(xzp);
630eda14cbcSMatt Macy skipped += 1;
631eda14cbcSMatt Macy continue;
632eda14cbcSMatt Macy }
633da5137abSMartin Matuska memset(&dl, 0, sizeof (dl));
634eda14cbcSMatt Macy dl.dl_dzp = dzp;
6357a7741afSMartin Matuska dl.dl_name = zap->za_name;
636eda14cbcSMatt Macy
637eda14cbcSMatt Macy error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
638eda14cbcSMatt Macy if (error)
639eda14cbcSMatt Macy skipped += 1;
640eda14cbcSMatt Macy dmu_tx_commit(tx);
641eda14cbcSMatt Macy
642eda14cbcSMatt Macy zfs_zrele_async(xzp);
643eda14cbcSMatt Macy }
644eda14cbcSMatt Macy zap_cursor_fini(&zc);
6457a7741afSMartin Matuska zap_attribute_free(zap);
646eda14cbcSMatt Macy if (error != ENOENT)
647eda14cbcSMatt Macy skipped += 1;
648eda14cbcSMatt Macy return (skipped);
649eda14cbcSMatt Macy }
650eda14cbcSMatt Macy
651eda14cbcSMatt Macy void
zfs_rmnode(znode_t * zp)652eda14cbcSMatt Macy zfs_rmnode(znode_t *zp)
653eda14cbcSMatt Macy {
654eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zp);
655eda14cbcSMatt Macy objset_t *os = zfsvfs->z_os;
656eda14cbcSMatt Macy znode_t *xzp = NULL;
657eda14cbcSMatt Macy dmu_tx_t *tx;
65815f0b8c3SMartin Matuska znode_hold_t *zh;
65915f0b8c3SMartin Matuska uint64_t z_id = zp->z_id;
660eda14cbcSMatt Macy uint64_t acl_obj;
661eda14cbcSMatt Macy uint64_t xattr_obj;
662eda14cbcSMatt Macy uint64_t links;
663eda14cbcSMatt Macy int error;
664eda14cbcSMatt Macy
665*d0abb9a6SMartin Matuska ASSERT0(ZTOI(zp)->i_nlink);
666*d0abb9a6SMartin Matuska ASSERT0(atomic_read(&ZTOI(zp)->i_count));
667eda14cbcSMatt Macy
668eda14cbcSMatt Macy /*
669eda14cbcSMatt Macy * If this is an attribute directory, purge its contents.
670eda14cbcSMatt Macy */
671eda14cbcSMatt Macy if (S_ISDIR(ZTOI(zp)->i_mode) && (zp->z_pflags & ZFS_XATTR)) {
672eda14cbcSMatt Macy if (zfs_purgedir(zp) != 0) {
673eda14cbcSMatt Macy /*
674eda14cbcSMatt Macy * Not enough space to delete some xattrs.
675eda14cbcSMatt Macy * Leave it in the unlinked set.
676eda14cbcSMatt Macy */
67715f0b8c3SMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, z_id);
678eda14cbcSMatt Macy zfs_znode_dmu_fini(zp);
67915f0b8c3SMartin Matuska zfs_znode_hold_exit(zfsvfs, zh);
680eda14cbcSMatt Macy return;
681eda14cbcSMatt Macy }
682eda14cbcSMatt Macy }
683eda14cbcSMatt Macy
684eda14cbcSMatt Macy /*
685eda14cbcSMatt Macy * Free up all the data in the file. We don't do this for directories
686eda14cbcSMatt Macy * because we need truncate and remove to be in the same tx, like in
687eda14cbcSMatt Macy * zfs_znode_delete(). Otherwise, if we crash here we'll end up with
688eda14cbcSMatt Macy * an inconsistent truncated zap object in the delete queue. Note a
689eda14cbcSMatt Macy * truncated file is harmless since it only contains user data.
690eda14cbcSMatt Macy */
691eda14cbcSMatt Macy if (S_ISREG(ZTOI(zp)->i_mode)) {
692eda14cbcSMatt Macy error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
693eda14cbcSMatt Macy if (error) {
694eda14cbcSMatt Macy /*
695eda14cbcSMatt Macy * Not enough space or we were interrupted by unmount.
696eda14cbcSMatt Macy * Leave the file in the unlinked set.
697eda14cbcSMatt Macy */
69815f0b8c3SMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, z_id);
699eda14cbcSMatt Macy zfs_znode_dmu_fini(zp);
70015f0b8c3SMartin Matuska zfs_znode_hold_exit(zfsvfs, zh);
701eda14cbcSMatt Macy return;
702eda14cbcSMatt Macy }
703eda14cbcSMatt Macy }
704eda14cbcSMatt Macy
705eda14cbcSMatt Macy /*
706eda14cbcSMatt Macy * If the file has extended attributes, we're going to unlink
707eda14cbcSMatt Macy * the xattr dir.
708eda14cbcSMatt Macy */
709eda14cbcSMatt Macy error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
710eda14cbcSMatt Macy &xattr_obj, sizeof (xattr_obj));
711eda14cbcSMatt Macy if (error == 0 && xattr_obj) {
712eda14cbcSMatt Macy error = zfs_zget(zfsvfs, xattr_obj, &xzp);
713*d0abb9a6SMartin Matuska ASSERT0(error);
714eda14cbcSMatt Macy }
715eda14cbcSMatt Macy
716eda14cbcSMatt Macy acl_obj = zfs_external_acl(zp);
717eda14cbcSMatt Macy
718eda14cbcSMatt Macy /*
719eda14cbcSMatt Macy * Set up the final transaction.
720eda14cbcSMatt Macy */
721eda14cbcSMatt Macy tx = dmu_tx_create(os);
722eda14cbcSMatt Macy dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
723eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
724eda14cbcSMatt Macy if (xzp) {
725eda14cbcSMatt Macy dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
726eda14cbcSMatt Macy dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
727eda14cbcSMatt Macy }
728eda14cbcSMatt Macy if (acl_obj)
729eda14cbcSMatt Macy dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
730eda14cbcSMatt Macy
731eda14cbcSMatt Macy zfs_sa_upgrade_txholds(tx, zp);
73261145dc2SMartin Matuska error = dmu_tx_assign(tx, DMU_TX_WAIT);
733eda14cbcSMatt Macy if (error) {
734eda14cbcSMatt Macy /*
735eda14cbcSMatt Macy * Not enough space to delete the file. Leave it in the
736eda14cbcSMatt Macy * unlinked set, leaking it until the fs is remounted (at
737eda14cbcSMatt Macy * which point we'll call zfs_unlinked_drain() to process it).
738eda14cbcSMatt Macy */
739eda14cbcSMatt Macy dmu_tx_abort(tx);
74015f0b8c3SMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, z_id);
741eda14cbcSMatt Macy zfs_znode_dmu_fini(zp);
74215f0b8c3SMartin Matuska zfs_znode_hold_exit(zfsvfs, zh);
743eda14cbcSMatt Macy goto out;
744eda14cbcSMatt Macy }
745eda14cbcSMatt Macy
746eda14cbcSMatt Macy if (xzp) {
747*d0abb9a6SMartin Matuska ASSERT0(error);
748eda14cbcSMatt Macy mutex_enter(&xzp->z_lock);
749eda14cbcSMatt Macy xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
750eda14cbcSMatt Macy clear_nlink(ZTOI(xzp)); /* no more links to it */
751eda14cbcSMatt Macy links = 0;
752*d0abb9a6SMartin Matuska VERIFY0(sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
753eda14cbcSMatt Macy &links, sizeof (links), tx));
754eda14cbcSMatt Macy mutex_exit(&xzp->z_lock);
755eda14cbcSMatt Macy zfs_unlinked_add(xzp, tx);
756eda14cbcSMatt Macy }
757eda14cbcSMatt Macy
758eda14cbcSMatt Macy mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
759eda14cbcSMatt Macy
760eda14cbcSMatt Macy /*
761eda14cbcSMatt Macy * Remove this znode from the unlinked set. If a has rollback has
762eda14cbcSMatt Macy * occurred while a file is open and unlinked. Then when the file
763eda14cbcSMatt Macy * is closed post rollback it will not exist in the rolled back
764eda14cbcSMatt Macy * version of the unlinked object.
765eda14cbcSMatt Macy */
766eda14cbcSMatt Macy error = zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
767eda14cbcSMatt Macy zp->z_id, tx);
768eda14cbcSMatt Macy VERIFY(error == 0 || error == ENOENT);
769eda14cbcSMatt Macy
770eda14cbcSMatt Macy uint64_t count;
771eda14cbcSMatt Macy if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
772eda14cbcSMatt Macy cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
773eda14cbcSMatt Macy }
774eda14cbcSMatt Macy
775eda14cbcSMatt Macy mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
776eda14cbcSMatt Macy
777eda14cbcSMatt Macy dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
778eda14cbcSMatt Macy
779eda14cbcSMatt Macy zfs_znode_delete(zp, tx);
780eda14cbcSMatt Macy
781eda14cbcSMatt Macy dmu_tx_commit(tx);
782eda14cbcSMatt Macy out:
783eda14cbcSMatt Macy if (xzp)
784eda14cbcSMatt Macy zfs_zrele_async(xzp);
785eda14cbcSMatt Macy }
786eda14cbcSMatt Macy
787eda14cbcSMatt Macy static uint64_t
zfs_dirent(znode_t * zp,uint64_t mode)788eda14cbcSMatt Macy zfs_dirent(znode_t *zp, uint64_t mode)
789eda14cbcSMatt Macy {
790eda14cbcSMatt Macy uint64_t de = zp->z_id;
791eda14cbcSMatt Macy
792eda14cbcSMatt Macy if (ZTOZSB(zp)->z_version >= ZPL_VERSION_DIRENT_TYPE)
793eda14cbcSMatt Macy de |= IFTODT(mode) << 60;
794eda14cbcSMatt Macy return (de);
795eda14cbcSMatt Macy }
796eda14cbcSMatt Macy
797eda14cbcSMatt Macy /*
798eda14cbcSMatt Macy * Link zp into dl. Can fail in the following cases :
799eda14cbcSMatt Macy * - if zp has been unlinked.
800eda14cbcSMatt Macy * - if the number of entries with the same hash (aka. colliding entries)
801eda14cbcSMatt Macy * exceed the capacity of a leaf-block of fatzap and splitting of the
802eda14cbcSMatt Macy * leaf-block does not help.
803eda14cbcSMatt Macy */
804eda14cbcSMatt Macy int
zfs_link_create(zfs_dirlock_t * dl,znode_t * zp,dmu_tx_t * tx,int flag)805eda14cbcSMatt Macy zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
806eda14cbcSMatt Macy {
807eda14cbcSMatt Macy znode_t *dzp = dl->dl_dzp;
808eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zp);
809eda14cbcSMatt Macy uint64_t value;
810eda14cbcSMatt Macy int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
811eda14cbcSMatt Macy sa_bulk_attr_t bulk[5];
812eda14cbcSMatt Macy uint64_t mtime[2], ctime[2];
813eda14cbcSMatt Macy uint64_t links;
814eda14cbcSMatt Macy int count = 0;
815eda14cbcSMatt Macy int error;
816eda14cbcSMatt Macy
817eda14cbcSMatt Macy mutex_enter(&zp->z_lock);
818eda14cbcSMatt Macy
819eda14cbcSMatt Macy if (!(flag & ZRENAMING)) {
820eda14cbcSMatt Macy if (zp->z_unlinked) { /* no new links to unlinked zp */
821eda14cbcSMatt Macy ASSERT(!(flag & (ZNEW | ZEXISTS)));
822eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
823eda14cbcSMatt Macy return (SET_ERROR(ENOENT));
824eda14cbcSMatt Macy }
825eda14cbcSMatt Macy if (!(flag & ZNEW)) {
826eda14cbcSMatt Macy /*
827eda14cbcSMatt Macy * ZNEW nodes come from zfs_mknode() where the link
828eda14cbcSMatt Macy * count has already been initialised
829eda14cbcSMatt Macy */
830eda14cbcSMatt Macy inc_nlink(ZTOI(zp));
831eda14cbcSMatt Macy links = ZTOI(zp)->i_nlink;
832eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
833eda14cbcSMatt Macy NULL, &links, sizeof (links));
834eda14cbcSMatt Macy }
835eda14cbcSMatt Macy }
836eda14cbcSMatt Macy
837eda14cbcSMatt Macy value = zfs_dirent(zp, zp->z_mode);
838eda14cbcSMatt Macy error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
839eda14cbcSMatt Macy &value, tx);
840eda14cbcSMatt Macy
841eda14cbcSMatt Macy /*
842eda14cbcSMatt Macy * zap_add could fail to add the entry if it exceeds the capacity of the
843eda14cbcSMatt Macy * leaf-block and zap_leaf_split() failed to help.
844eda14cbcSMatt Macy * The caller of this routine is responsible for failing the transaction
845eda14cbcSMatt Macy * which will rollback the SA updates done above.
846eda14cbcSMatt Macy */
847eda14cbcSMatt Macy if (error != 0) {
848eda14cbcSMatt Macy if (!(flag & ZRENAMING) && !(flag & ZNEW))
849eda14cbcSMatt Macy drop_nlink(ZTOI(zp));
850eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
851eda14cbcSMatt Macy return (error);
852eda14cbcSMatt Macy }
853eda14cbcSMatt Macy
8547a7741afSMartin Matuska /*
8557a7741afSMartin Matuska * If we added a longname activate the SPA_FEATURE_LONGNAME.
8567a7741afSMartin Matuska */
8577a7741afSMartin Matuska if (strlen(dl->dl_name) >= ZAP_MAXNAMELEN) {
8587a7741afSMartin Matuska dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
8597a7741afSMartin Matuska ds->ds_feature_activation[SPA_FEATURE_LONGNAME] =
8607a7741afSMartin Matuska (void *)B_TRUE;
8617a7741afSMartin Matuska }
8627a7741afSMartin Matuska
863eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
864eda14cbcSMatt Macy &dzp->z_id, sizeof (dzp->z_id));
865eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
866eda14cbcSMatt Macy &zp->z_pflags, sizeof (zp->z_pflags));
867eda14cbcSMatt Macy
868eda14cbcSMatt Macy if (!(flag & ZNEW)) {
869eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
870eda14cbcSMatt Macy ctime, sizeof (ctime));
871eda14cbcSMatt Macy zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
872eda14cbcSMatt Macy ctime);
873eda14cbcSMatt Macy }
874eda14cbcSMatt Macy error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
875*d0abb9a6SMartin Matuska ASSERT0(error);
876eda14cbcSMatt Macy
877eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
878eda14cbcSMatt Macy
879eda14cbcSMatt Macy mutex_enter(&dzp->z_lock);
880eda14cbcSMatt Macy dzp->z_size++;
881eda14cbcSMatt Macy if (zp_is_dir)
882eda14cbcSMatt Macy inc_nlink(ZTOI(dzp));
883eda14cbcSMatt Macy links = ZTOI(dzp)->i_nlink;
884eda14cbcSMatt Macy count = 0;
885eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
886eda14cbcSMatt Macy &dzp->z_size, sizeof (dzp->z_size));
887eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
888eda14cbcSMatt Macy &links, sizeof (links));
889eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
890eda14cbcSMatt Macy mtime, sizeof (mtime));
891eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
892eda14cbcSMatt Macy ctime, sizeof (ctime));
893eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
894eda14cbcSMatt Macy &dzp->z_pflags, sizeof (dzp->z_pflags));
895eda14cbcSMatt Macy zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
896eda14cbcSMatt Macy error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
897*d0abb9a6SMartin Matuska ASSERT0(error);
898eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
899eda14cbcSMatt Macy
900eda14cbcSMatt Macy return (0);
901eda14cbcSMatt Macy }
902eda14cbcSMatt Macy
903eda14cbcSMatt Macy /*
904eda14cbcSMatt Macy * The match type in the code for this function should conform to:
905eda14cbcSMatt Macy *
906eda14cbcSMatt Macy * ------------------------------------------------------------------------
907eda14cbcSMatt Macy * fs type | z_norm | lookup type | match type
908eda14cbcSMatt Macy * ---------|-------------|-------------|----------------------------------
909eda14cbcSMatt Macy * CS !norm | 0 | 0 | 0 (exact)
910eda14cbcSMatt Macy * CS norm | formX | 0 | MT_NORMALIZE
911eda14cbcSMatt Macy * CI !norm | upper | !ZCIEXACT | MT_NORMALIZE
912eda14cbcSMatt Macy * CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
913eda14cbcSMatt Macy * CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE
914eda14cbcSMatt Macy * CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
915eda14cbcSMatt Macy * CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
916eda14cbcSMatt Macy * CM !norm | upper | ZCILOOK | MT_NORMALIZE
917eda14cbcSMatt Macy * CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
918eda14cbcSMatt Macy * CM norm | upper|formX | ZCILOOK | MT_NORMALIZE
919eda14cbcSMatt Macy *
920eda14cbcSMatt Macy * Abbreviations:
921eda14cbcSMatt Macy * CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
922eda14cbcSMatt Macy * upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
923eda14cbcSMatt Macy * formX = unicode normalization form set on fs creation
924eda14cbcSMatt Macy */
925eda14cbcSMatt Macy static int
zfs_dropname(zfs_dirlock_t * dl,znode_t * zp,znode_t * dzp,dmu_tx_t * tx,int flag)926eda14cbcSMatt Macy zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
927eda14cbcSMatt Macy int flag)
928eda14cbcSMatt Macy {
929eda14cbcSMatt Macy int error;
930eda14cbcSMatt Macy
931eda14cbcSMatt Macy if (ZTOZSB(zp)->z_norm) {
932eda14cbcSMatt Macy matchtype_t mt = MT_NORMALIZE;
933eda14cbcSMatt Macy
934eda14cbcSMatt Macy if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
935eda14cbcSMatt Macy (flag & ZCIEXACT)) ||
936eda14cbcSMatt Macy (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
937eda14cbcSMatt Macy !(flag & ZCILOOK))) {
938eda14cbcSMatt Macy mt |= MT_MATCH_CASE;
939eda14cbcSMatt Macy }
940eda14cbcSMatt Macy
941eda14cbcSMatt Macy error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
942eda14cbcSMatt Macy dl->dl_name, mt, tx);
943eda14cbcSMatt Macy } else {
944eda14cbcSMatt Macy error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
945eda14cbcSMatt Macy tx);
946eda14cbcSMatt Macy }
947eda14cbcSMatt Macy
948eda14cbcSMatt Macy return (error);
949eda14cbcSMatt Macy }
950eda14cbcSMatt Macy
951dbd5678dSMartin Matuska static int
zfs_drop_nlink_locked(znode_t * zp,dmu_tx_t * tx,boolean_t * unlinkedp)952dbd5678dSMartin Matuska zfs_drop_nlink_locked(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
953dbd5678dSMartin Matuska {
954dbd5678dSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp);
955dbd5678dSMartin Matuska int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
956dbd5678dSMartin Matuska boolean_t unlinked = B_FALSE;
957dbd5678dSMartin Matuska sa_bulk_attr_t bulk[3];
958dbd5678dSMartin Matuska uint64_t mtime[2], ctime[2];
959dbd5678dSMartin Matuska uint64_t links;
960dbd5678dSMartin Matuska int count = 0;
961dbd5678dSMartin Matuska int error;
962dbd5678dSMartin Matuska
963dbd5678dSMartin Matuska if (zp_is_dir && !zfs_dirempty(zp))
964dbd5678dSMartin Matuska return (SET_ERROR(ENOTEMPTY));
965dbd5678dSMartin Matuska
966dbd5678dSMartin Matuska if (ZTOI(zp)->i_nlink <= zp_is_dir) {
967dbd5678dSMartin Matuska zfs_panic_recover("zfs: link count on %lu is %u, "
968dbd5678dSMartin Matuska "should be at least %u", zp->z_id,
969dbd5678dSMartin Matuska (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
970dbd5678dSMartin Matuska set_nlink(ZTOI(zp), zp_is_dir + 1);
971dbd5678dSMartin Matuska }
972dbd5678dSMartin Matuska drop_nlink(ZTOI(zp));
973dbd5678dSMartin Matuska if (ZTOI(zp)->i_nlink == zp_is_dir) {
974dbd5678dSMartin Matuska zp->z_unlinked = B_TRUE;
975dbd5678dSMartin Matuska clear_nlink(ZTOI(zp));
976dbd5678dSMartin Matuska unlinked = B_TRUE;
977dbd5678dSMartin Matuska } else {
978dbd5678dSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
979dbd5678dSMartin Matuska NULL, &ctime, sizeof (ctime));
980dbd5678dSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
981dbd5678dSMartin Matuska NULL, &zp->z_pflags, sizeof (zp->z_pflags));
982dbd5678dSMartin Matuska zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
983dbd5678dSMartin Matuska ctime);
984dbd5678dSMartin Matuska }
985dbd5678dSMartin Matuska links = ZTOI(zp)->i_nlink;
986dbd5678dSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
987dbd5678dSMartin Matuska NULL, &links, sizeof (links));
988dbd5678dSMartin Matuska error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
989*d0abb9a6SMartin Matuska ASSERT0(error);
990dbd5678dSMartin Matuska
991dbd5678dSMartin Matuska if (unlinkedp != NULL)
992dbd5678dSMartin Matuska *unlinkedp = unlinked;
993dbd5678dSMartin Matuska else if (unlinked)
994dbd5678dSMartin Matuska zfs_unlinked_add(zp, tx);
995dbd5678dSMartin Matuska
996dbd5678dSMartin Matuska return (0);
997dbd5678dSMartin Matuska }
998dbd5678dSMartin Matuska
999dbd5678dSMartin Matuska /*
1000dbd5678dSMartin Matuska * Forcefully drop an nlink reference from (zp) and mark it for deletion if it
1001dbd5678dSMartin Matuska * was the last link. This *must* only be done to znodes which have already
1002dbd5678dSMartin Matuska * been zfs_link_destroy()'d with ZRENAMING. This is explicitly only used in
1003dbd5678dSMartin Matuska * the error path of zfs_rename(), where we have to correct the nlink count if
1004dbd5678dSMartin Matuska * we failed to link the target as well as failing to re-link the original
1005dbd5678dSMartin Matuska * znodes.
1006dbd5678dSMartin Matuska */
1007dbd5678dSMartin Matuska int
zfs_drop_nlink(znode_t * zp,dmu_tx_t * tx,boolean_t * unlinkedp)1008dbd5678dSMartin Matuska zfs_drop_nlink(znode_t *zp, dmu_tx_t *tx, boolean_t *unlinkedp)
1009dbd5678dSMartin Matuska {
1010dbd5678dSMartin Matuska int error;
1011dbd5678dSMartin Matuska
1012dbd5678dSMartin Matuska mutex_enter(&zp->z_lock);
1013dbd5678dSMartin Matuska error = zfs_drop_nlink_locked(zp, tx, unlinkedp);
1014dbd5678dSMartin Matuska mutex_exit(&zp->z_lock);
1015dbd5678dSMartin Matuska
1016dbd5678dSMartin Matuska return (error);
1017dbd5678dSMartin Matuska }
1018dbd5678dSMartin Matuska
1019eda14cbcSMatt Macy /*
1020eda14cbcSMatt Macy * Unlink zp from dl, and mark zp for deletion if this was the last link. Can
1021eda14cbcSMatt Macy * fail if zp is a mount point (EBUSY) or a non-empty directory (ENOTEMPTY).
1022eda14cbcSMatt Macy * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
1023eda14cbcSMatt Macy * If it's non-NULL, we use it to indicate whether the znode needs deletion,
1024eda14cbcSMatt Macy * and it's the caller's job to do it.
1025eda14cbcSMatt Macy */
1026eda14cbcSMatt Macy int
zfs_link_destroy(zfs_dirlock_t * dl,znode_t * zp,dmu_tx_t * tx,int flag,boolean_t * unlinkedp)1027eda14cbcSMatt Macy zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
1028eda14cbcSMatt Macy boolean_t *unlinkedp)
1029eda14cbcSMatt Macy {
1030eda14cbcSMatt Macy znode_t *dzp = dl->dl_dzp;
1031eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(dzp);
1032eda14cbcSMatt Macy int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
1033eda14cbcSMatt Macy boolean_t unlinked = B_FALSE;
1034eda14cbcSMatt Macy sa_bulk_attr_t bulk[5];
1035eda14cbcSMatt Macy uint64_t mtime[2], ctime[2];
1036eda14cbcSMatt Macy uint64_t links;
1037eda14cbcSMatt Macy int count = 0;
1038eda14cbcSMatt Macy int error;
1039eda14cbcSMatt Macy
1040eda14cbcSMatt Macy if (!(flag & ZRENAMING)) {
1041eda14cbcSMatt Macy mutex_enter(&zp->z_lock);
1042eda14cbcSMatt Macy
1043eda14cbcSMatt Macy if (zp_is_dir && !zfs_dirempty(zp)) {
1044eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
1045eda14cbcSMatt Macy return (SET_ERROR(ENOTEMPTY));
1046eda14cbcSMatt Macy }
1047eda14cbcSMatt Macy
1048eda14cbcSMatt Macy /*
1049eda14cbcSMatt Macy * If we get here, we are going to try to remove the object.
1050eda14cbcSMatt Macy * First try removing the name from the directory; if that
1051eda14cbcSMatt Macy * fails, return the error.
1052eda14cbcSMatt Macy */
1053eda14cbcSMatt Macy error = zfs_dropname(dl, zp, dzp, tx, flag);
1054eda14cbcSMatt Macy if (error != 0) {
1055eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
1056eda14cbcSMatt Macy return (error);
1057eda14cbcSMatt Macy }
1058eda14cbcSMatt Macy
1059dbd5678dSMartin Matuska /* The only error is !zfs_dirempty() and we checked earlier. */
1060dbd5678dSMartin Matuska error = zfs_drop_nlink_locked(zp, tx, &unlinked);
1061*d0abb9a6SMartin Matuska ASSERT0(error);
1062eda14cbcSMatt Macy mutex_exit(&zp->z_lock);
1063eda14cbcSMatt Macy } else {
1064eda14cbcSMatt Macy error = zfs_dropname(dl, zp, dzp, tx, flag);
1065eda14cbcSMatt Macy if (error != 0)
1066eda14cbcSMatt Macy return (error);
1067eda14cbcSMatt Macy }
1068eda14cbcSMatt Macy
1069eda14cbcSMatt Macy mutex_enter(&dzp->z_lock);
1070eda14cbcSMatt Macy dzp->z_size--; /* one dirent removed */
1071eda14cbcSMatt Macy if (zp_is_dir)
1072eda14cbcSMatt Macy drop_nlink(ZTOI(dzp)); /* ".." link from zp */
1073eda14cbcSMatt Macy links = ZTOI(dzp)->i_nlink;
1074eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
1075eda14cbcSMatt Macy NULL, &links, sizeof (links));
1076eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
1077eda14cbcSMatt Macy NULL, &dzp->z_size, sizeof (dzp->z_size));
1078eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
1079eda14cbcSMatt Macy NULL, ctime, sizeof (ctime));
1080eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
1081eda14cbcSMatt Macy NULL, mtime, sizeof (mtime));
1082eda14cbcSMatt Macy SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1083eda14cbcSMatt Macy NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
1084eda14cbcSMatt Macy zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
1085eda14cbcSMatt Macy error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
1086*d0abb9a6SMartin Matuska ASSERT0(error);
1087eda14cbcSMatt Macy mutex_exit(&dzp->z_lock);
1088eda14cbcSMatt Macy
1089eda14cbcSMatt Macy if (unlinkedp != NULL)
1090eda14cbcSMatt Macy *unlinkedp = unlinked;
1091eda14cbcSMatt Macy else if (unlinked)
1092eda14cbcSMatt Macy zfs_unlinked_add(zp, tx);
1093eda14cbcSMatt Macy
1094eda14cbcSMatt Macy return (0);
1095eda14cbcSMatt Macy }
1096eda14cbcSMatt Macy
1097eda14cbcSMatt Macy /*
1098eda14cbcSMatt Macy * Indicate whether the directory is empty. Works with or without z_lock
1099eda14cbcSMatt Macy * held, but can only be consider a hint in the latter case. Returns true
1100eda14cbcSMatt Macy * if only "." and ".." remain and there's no work in progress.
1101eda14cbcSMatt Macy *
1102eda14cbcSMatt Macy * The internal ZAP size, rather than zp->z_size, needs to be checked since
1103eda14cbcSMatt Macy * some consumers (Lustre) do not strictly maintain an accurate SA_ZPL_SIZE.
1104eda14cbcSMatt Macy */
1105eda14cbcSMatt Macy boolean_t
zfs_dirempty(znode_t * dzp)1106eda14cbcSMatt Macy zfs_dirempty(znode_t *dzp)
1107eda14cbcSMatt Macy {
1108eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(dzp);
1109eda14cbcSMatt Macy uint64_t count;
1110eda14cbcSMatt Macy int error;
1111eda14cbcSMatt Macy
1112eda14cbcSMatt Macy if (dzp->z_dirlocks != NULL)
1113eda14cbcSMatt Macy return (B_FALSE);
1114eda14cbcSMatt Macy
1115eda14cbcSMatt Macy error = zap_count(zfsvfs->z_os, dzp->z_id, &count);
1116eda14cbcSMatt Macy if (error != 0 || count != 0)
1117eda14cbcSMatt Macy return (B_FALSE);
1118eda14cbcSMatt Macy
1119eda14cbcSMatt Macy return (B_TRUE);
1120eda14cbcSMatt Macy }
1121eda14cbcSMatt Macy
1122eda14cbcSMatt Macy int
zfs_make_xattrdir(znode_t * zp,vattr_t * vap,znode_t ** xzpp,cred_t * cr)1123eda14cbcSMatt Macy zfs_make_xattrdir(znode_t *zp, vattr_t *vap, znode_t **xzpp, cred_t *cr)
1124eda14cbcSMatt Macy {
1125eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zp);
1126eda14cbcSMatt Macy znode_t *xzp;
1127eda14cbcSMatt Macy dmu_tx_t *tx;
1128eda14cbcSMatt Macy int error;
1129eda14cbcSMatt Macy zfs_acl_ids_t acl_ids;
1130eda14cbcSMatt Macy boolean_t fuid_dirtied;
1131eda14cbcSMatt Macy #ifdef ZFS_DEBUG
1132eda14cbcSMatt Macy uint64_t parent;
1133eda14cbcSMatt Macy #endif
1134eda14cbcSMatt Macy
1135eda14cbcSMatt Macy *xzpp = NULL;
1136eda14cbcSMatt Macy
1137eda14cbcSMatt Macy if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
1138d411c1d6SMartin Matuska &acl_ids, zfs_init_idmap)) != 0)
1139eda14cbcSMatt Macy return (error);
1140eda14cbcSMatt Macy if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zp->z_projid)) {
1141eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids);
1142eda14cbcSMatt Macy return (SET_ERROR(EDQUOT));
1143eda14cbcSMatt Macy }
1144eda14cbcSMatt Macy
1145eda14cbcSMatt Macy tx = dmu_tx_create(zfsvfs->z_os);
1146eda14cbcSMatt Macy dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1147eda14cbcSMatt Macy ZFS_SA_BASE_ATTR_SIZE);
1148eda14cbcSMatt Macy dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1149eda14cbcSMatt Macy dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1150eda14cbcSMatt Macy fuid_dirtied = zfsvfs->z_fuid_dirty;
1151eda14cbcSMatt Macy if (fuid_dirtied)
1152eda14cbcSMatt Macy zfs_fuid_txhold(zfsvfs, tx);
115361145dc2SMartin Matuska error = dmu_tx_assign(tx, DMU_TX_WAIT);
1154eda14cbcSMatt Macy if (error) {
1155eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids);
1156eda14cbcSMatt Macy dmu_tx_abort(tx);
1157eda14cbcSMatt Macy return (error);
1158eda14cbcSMatt Macy }
1159eda14cbcSMatt Macy zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
1160eda14cbcSMatt Macy
1161eda14cbcSMatt Macy if (fuid_dirtied)
1162eda14cbcSMatt Macy zfs_fuid_sync(zfsvfs, tx);
1163eda14cbcSMatt Macy
1164eda14cbcSMatt Macy #ifdef ZFS_DEBUG
1165eda14cbcSMatt Macy error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1166eda14cbcSMatt Macy &parent, sizeof (parent));
1167eda14cbcSMatt Macy ASSERT(error == 0 && parent == zp->z_id);
1168eda14cbcSMatt Macy #endif
1169eda14cbcSMatt Macy
1170*d0abb9a6SMartin Matuska VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
1171eda14cbcSMatt Macy sizeof (xzp->z_id), tx));
1172eda14cbcSMatt Macy
1173eda14cbcSMatt Macy if (!zp->z_unlinked)
117416038816SMartin Matuska zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp, xzp, "", NULL,
117516038816SMartin Matuska acl_ids.z_fuidp, vap);
1176eda14cbcSMatt Macy
1177eda14cbcSMatt Macy zfs_acl_ids_free(&acl_ids);
1178eda14cbcSMatt Macy dmu_tx_commit(tx);
1179eda14cbcSMatt Macy
1180eda14cbcSMatt Macy *xzpp = xzp;
1181eda14cbcSMatt Macy
1182eda14cbcSMatt Macy return (0);
1183eda14cbcSMatt Macy }
1184eda14cbcSMatt Macy
1185eda14cbcSMatt Macy /*
1186eda14cbcSMatt Macy * Return a znode for the extended attribute directory for zp.
1187eda14cbcSMatt Macy * ** If the directory does not already exist, it is created **
1188eda14cbcSMatt Macy *
1189eda14cbcSMatt Macy * IN: zp - znode to obtain attribute directory from
1190eda14cbcSMatt Macy * cr - credentials of caller
1191eda14cbcSMatt Macy * flags - flags from the VOP_LOOKUP call
1192eda14cbcSMatt Macy *
1193eda14cbcSMatt Macy * OUT: xipp - pointer to extended attribute znode
1194eda14cbcSMatt Macy *
1195eda14cbcSMatt Macy * RETURN: 0 on success
1196eda14cbcSMatt Macy * error number on failure
1197eda14cbcSMatt Macy */
1198eda14cbcSMatt Macy int
zfs_get_xattrdir(znode_t * zp,znode_t ** xzpp,cred_t * cr,int flags)1199eda14cbcSMatt Macy zfs_get_xattrdir(znode_t *zp, znode_t **xzpp, cred_t *cr, int flags)
1200eda14cbcSMatt Macy {
1201eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zp);
1202eda14cbcSMatt Macy znode_t *xzp;
1203eda14cbcSMatt Macy zfs_dirlock_t *dl;
1204eda14cbcSMatt Macy vattr_t va;
1205eda14cbcSMatt Macy int error;
1206eda14cbcSMatt Macy top:
1207eda14cbcSMatt Macy error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
1208eda14cbcSMatt Macy if (error)
1209eda14cbcSMatt Macy return (error);
1210eda14cbcSMatt Macy
1211eda14cbcSMatt Macy if (xzp != NULL) {
1212eda14cbcSMatt Macy *xzpp = xzp;
1213eda14cbcSMatt Macy zfs_dirent_unlock(dl);
1214eda14cbcSMatt Macy return (0);
1215eda14cbcSMatt Macy }
1216eda14cbcSMatt Macy
1217eda14cbcSMatt Macy if (!(flags & CREATE_XATTR_DIR)) {
1218eda14cbcSMatt Macy zfs_dirent_unlock(dl);
1219eda14cbcSMatt Macy return (SET_ERROR(ENOENT));
1220eda14cbcSMatt Macy }
1221eda14cbcSMatt Macy
1222eda14cbcSMatt Macy if (zfs_is_readonly(zfsvfs)) {
1223eda14cbcSMatt Macy zfs_dirent_unlock(dl);
1224eda14cbcSMatt Macy return (SET_ERROR(EROFS));
1225eda14cbcSMatt Macy }
1226eda14cbcSMatt Macy
1227eda14cbcSMatt Macy /*
1228eda14cbcSMatt Macy * The ability to 'create' files in an attribute
1229eda14cbcSMatt Macy * directory comes from the write_xattr permission on the base file.
1230eda14cbcSMatt Macy *
1231eda14cbcSMatt Macy * The ability to 'search' an attribute directory requires
1232eda14cbcSMatt Macy * read_xattr permission on the base file.
1233eda14cbcSMatt Macy *
1234eda14cbcSMatt Macy * Once in a directory the ability to read/write attributes
1235eda14cbcSMatt Macy * is controlled by the permissions on the attribute file.
1236eda14cbcSMatt Macy */
1237eda14cbcSMatt Macy va.va_mask = ATTR_MODE | ATTR_UID | ATTR_GID;
1238eda14cbcSMatt Macy va.va_mode = S_IFDIR | S_ISVTX | 0777;
1239eda14cbcSMatt Macy zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
1240eda14cbcSMatt Macy
1241eda14cbcSMatt Macy va.va_dentry = NULL;
1242eda14cbcSMatt Macy error = zfs_make_xattrdir(zp, &va, xzpp, cr);
1243eda14cbcSMatt Macy zfs_dirent_unlock(dl);
1244eda14cbcSMatt Macy
1245eda14cbcSMatt Macy if (error == ERESTART) {
1246eda14cbcSMatt Macy /* NB: we already did dmu_tx_wait() if necessary */
1247eda14cbcSMatt Macy goto top;
1248eda14cbcSMatt Macy }
1249eda14cbcSMatt Macy
1250eda14cbcSMatt Macy return (error);
1251eda14cbcSMatt Macy }
1252eda14cbcSMatt Macy
1253eda14cbcSMatt Macy /*
1254eda14cbcSMatt Macy * Decide whether it is okay to remove within a sticky directory.
1255eda14cbcSMatt Macy *
1256eda14cbcSMatt Macy * In sticky directories, write access is not sufficient;
1257eda14cbcSMatt Macy * you can remove entries from a directory only if:
1258eda14cbcSMatt Macy *
1259eda14cbcSMatt Macy * you own the directory,
1260eda14cbcSMatt Macy * you own the entry,
1261eda14cbcSMatt Macy * you have write access to the entry,
1262eda14cbcSMatt Macy * or you are privileged (checked in secpolicy...).
1263eda14cbcSMatt Macy *
1264eda14cbcSMatt Macy * The function returns 0 if remove access is granted.
1265eda14cbcSMatt Macy */
1266eda14cbcSMatt Macy int
zfs_sticky_remove_access(znode_t * zdp,znode_t * zp,cred_t * cr)1267eda14cbcSMatt Macy zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
1268eda14cbcSMatt Macy {
1269eda14cbcSMatt Macy uid_t uid;
1270eda14cbcSMatt Macy uid_t downer;
1271eda14cbcSMatt Macy uid_t fowner;
1272eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ZTOZSB(zdp);
1273eda14cbcSMatt Macy
1274eda14cbcSMatt Macy if (zfsvfs->z_replay)
1275eda14cbcSMatt Macy return (0);
1276eda14cbcSMatt Macy
1277eda14cbcSMatt Macy if ((zdp->z_mode & S_ISVTX) == 0)
1278eda14cbcSMatt Macy return (0);
1279eda14cbcSMatt Macy
1280eda14cbcSMatt Macy downer = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zdp)->i_uid),
1281eda14cbcSMatt Macy cr, ZFS_OWNER);
1282eda14cbcSMatt Macy fowner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zp)->i_uid),
1283eda14cbcSMatt Macy cr, ZFS_OWNER);
1284eda14cbcSMatt Macy
1285eda14cbcSMatt Macy if ((uid = crgetuid(cr)) == downer || uid == fowner ||
1286dbd5678dSMartin Matuska zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
1287d411c1d6SMartin Matuska zfs_init_idmap) == 0)
1288eda14cbcSMatt Macy return (0);
1289eda14cbcSMatt Macy else
1290eda14cbcSMatt Macy return (secpolicy_vnode_remove(cr));
1291eda14cbcSMatt Macy }
1292