xref: /freebsd/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2014 Integros [integros.com]
26  * Copyright 2017 Nexenta Systems, Inc.
27  */
28 
29 /* Portions Copyright 2007 Jeremy Teo */
30 /* Portions Copyright 2010 Robert Milkowski */
31 
32 #include <sys/param.h>
33 #include <sys/time.h>
34 #include <sys/systm.h>
35 #include <sys/sysmacros.h>
36 #include <sys/resource.h>
37 #include <security/mac/mac_framework.h>
38 #include <sys/vfs.h>
39 #include <sys/endian.h>
40 #include <sys/vm.h>
41 #include <sys/vnode.h>
42 #include <sys/smr.h>
43 #include <sys/dirent.h>
44 #include <sys/file.h>
45 #include <sys/stat.h>
46 #include <sys/kmem.h>
47 #include <sys/taskq.h>
48 #include <sys/uio.h>
49 #include <sys/atomic.h>
50 #include <sys/namei.h>
51 #include <sys/mman.h>
52 #include <sys/cmn_err.h>
53 #include <sys/kdb.h>
54 #include <sys/sysproto.h>
55 #include <sys/errno.h>
56 #include <sys/unistd.h>
57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_ioctl.h>
59 #include <sys/fs/zfs.h>
60 #include <sys/dmu.h>
61 #include <sys/dmu_objset.h>
62 #include <sys/spa.h>
63 #include <sys/txg.h>
64 #include <sys/dbuf.h>
65 #include <sys/zap.h>
66 #include <sys/sa.h>
67 #include <sys/policy.h>
68 #include <sys/sunddi.h>
69 #include <sys/filio.h>
70 #include <sys/sid.h>
71 #include <sys/zfs_ctldir.h>
72 #include <sys/zfs_fuid.h>
73 #include <sys/zfs_quota.h>
74 #include <sys/zfs_sa.h>
75 #include <sys/zfs_rlock.h>
76 #include <sys/bio.h>
77 #include <sys/buf.h>
78 #include <sys/sched.h>
79 #include <sys/acl.h>
80 #include <sys/vmmeter.h>
81 #include <vm/vm_param.h>
82 #include <sys/zil.h>
83 #include <sys/zfs_vnops.h>
84 #include <sys/module.h>
85 #include <sys/sysent.h>
86 #include <sys/dmu_impl.h>
87 #include <sys/brt.h>
88 #include <sys/zfeature.h>
89 
90 #include <vm/vm_object.h>
91 
92 #include <sys/extattr.h>
93 #include <sys/priv.h>
94 
95 #ifndef VN_OPEN_INVFS
96 #define	VN_OPEN_INVFS	0x0
97 #endif
98 
99 VFS_SMR_DECLARE;
100 
101 #ifdef DEBUG_VFS_LOCKS
102 #define	VNCHECKREF(vp)				  \
103 	VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp,	\
104 	    ("%s: wrong ref counts", __func__));
105 #else
106 #define	VNCHECKREF(vp)
107 #endif
108 
109 #if __FreeBSD_version >= 1400045
110 typedef uint64_t cookie_t;
111 #else
112 typedef ulong_t cookie_t;
113 #endif
114 
115 /*
116  * Programming rules.
117  *
118  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
119  * properly lock its in-core state, create a DMU transaction, do the work,
120  * record this work in the intent log (ZIL), commit the DMU transaction,
121  * and wait for the intent log to commit if it is a synchronous operation.
122  * Moreover, the vnode ops must work in both normal and log replay context.
123  * The ordering of events is important to avoid deadlocks and references
124  * to freed memory.  The example below illustrates the following Big Rules:
125  *
126  *  (1)	A check must be made in each zfs thread for a mounted file system.
127  *	This is done avoiding races using zfs_enter(zfsvfs).
128  *	A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
129  *	must be checked with zfs_verify_zp(zp).  Both of these macros
130  *	can return EIO from the calling function.
131  *
132  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
133  *	(if necessary) and zfs_exit(). This is for 3 reasons:
134  *	First, if it's the last reference, the vnode/znode
135  *	can be freed, so the zp may point to freed memory.  Second, the last
136  *	reference will call zfs_zinactive(), which may induce a lot of work --
137  *	pushing cached pages (which acquires range locks) and syncing out
138  *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
139  *	which could deadlock the system if you were already holding one.
140  *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
141  *
142  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
143  *	as they can span dmu_tx_assign() calls.
144  *
145  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
146  *      dmu_tx_assign().  This is critical because we don't want to block
147  *      while holding locks.
148  *
149  *	If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
150  *	reduces lock contention and CPU usage when we must wait (note that if
151  *	throughput is constrained by the storage, nearly every transaction
152  *	must wait).
153  *
154  *      Note, in particular, that if a lock is sometimes acquired before
155  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
156  *      to use a non-blocking assign can deadlock the system.  The scenario:
157  *
158  *	Thread A has grabbed a lock before calling dmu_tx_assign().
159  *	Thread B is in an already-assigned tx, and blocks for this lock.
160  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
161  *	forever, because the previous txg can't quiesce until B's tx commits.
162  *
163  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
164  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
165  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
166  *	to indicate that this operation has already called dmu_tx_wait().
167  *	This will ensure that we don't retry forever, waiting a short bit
168  *	each time.
169  *
170  *  (5)	If the operation succeeded, generate the intent log entry for it
171  *	before dropping locks.  This ensures that the ordering of events
172  *	in the intent log matches the order in which they actually occurred.
173  *	During ZIL replay the zfs_log_* functions will update the sequence
174  *	number to indicate the zil transaction has replayed.
175  *
176  *  (6)	At the end of each vnode op, the DMU tx must always commit,
177  *	regardless of whether there were any errors.
178  *
179  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
180  *	to ensure that synchronous semantics are provided when necessary.
181  *
182  * In general, this is how things should be ordered in each vnode op:
183  *
184  *	zfs_enter(zfsvfs);		// exit if unmounted
185  * top:
186  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
187  *	rw_enter(...);			// grab any other locks you need
188  *	tx = dmu_tx_create(...);	// get DMU tx
189  *	dmu_tx_hold_*();		// hold each object you might modify
190  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
191  *	if (error) {
192  *		rw_exit(...);		// drop locks
193  *		zfs_dirent_unlock(dl);	// unlock directory entry
194  *		VN_RELE(...);		// release held vnodes
195  *		if (error == ERESTART) {
196  *			waited = B_TRUE;
197  *			dmu_tx_wait(tx);
198  *			dmu_tx_abort(tx);
199  *			goto top;
200  *		}
201  *		dmu_tx_abort(tx);	// abort DMU tx
202  *		zfs_exit(zfsvfs);	// finished in zfs
203  *		return (error);		// really out of space
204  *	}
205  *	error = do_real_work();		// do whatever this VOP does
206  *	if (error == 0)
207  *		zfs_log_*(...);		// on success, make ZIL entry
208  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
209  *	rw_exit(...);			// drop locks
210  *	zfs_dirent_unlock(dl);		// unlock directory entry
211  *	VN_RELE(...);			// release held vnodes
212  *	zil_commit(zilog, foid);	// synchronous when necessary
213  *	zfs_exit(zfsvfs);		// finished in zfs
214  *	return (error);			// done, report error
215  */
216 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)217 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
218 {
219 	(void) cr;
220 	znode_t	*zp = VTOZ(*vpp);
221 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
222 	int error;
223 
224 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
225 		return (error);
226 
227 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
228 	    ((flag & FAPPEND) == 0)) {
229 		zfs_exit(zfsvfs, FTAG);
230 		return (SET_ERROR(EPERM));
231 	}
232 
233 	/*
234 	 * Keep a count of the synchronous opens in the znode.  On first
235 	 * synchronous open we must convert all previous async transactions
236 	 * into sync to keep correct ordering.
237 	 */
238 	if (flag & O_SYNC) {
239 		if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
240 			zil_async_to_sync(zfsvfs->z_log, zp->z_id);
241 	}
242 
243 	zfs_exit(zfsvfs, FTAG);
244 	return (0);
245 }
246 
247 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)248 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
249 {
250 	(void) offset, (void) cr;
251 	znode_t	*zp = VTOZ(vp);
252 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
253 	int error;
254 
255 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
256 		return (error);
257 
258 	/* Decrement the synchronous opens in the znode */
259 	if ((flag & O_SYNC) && (count == 1))
260 		atomic_dec_32(&zp->z_sync_cnt);
261 
262 	zfs_exit(zfsvfs, FTAG);
263 	return (0);
264 }
265 
266 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)267 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
268     int *rvalp)
269 {
270 	(void) flag, (void) cred, (void) rvalp;
271 	loff_t off;
272 	int error;
273 
274 	switch (com) {
275 	case _FIOFFS:
276 	{
277 		return (0);
278 
279 		/*
280 		 * The following two ioctls are used by bfu.  Faking out,
281 		 * necessary to avoid bfu errors.
282 		 */
283 	}
284 	case _FIOGDIO:
285 	case _FIOSDIO:
286 	{
287 		return (0);
288 	}
289 
290 	case F_SEEK_DATA:
291 	case F_SEEK_HOLE:
292 	{
293 		off = *(offset_t *)data;
294 		error = vn_lock(vp, LK_SHARED);
295 		if (error)
296 			return (error);
297 		/* offset parameter is in/out */
298 		error = zfs_holey(VTOZ(vp), com, &off);
299 		VOP_UNLOCK(vp);
300 		if (error)
301 			return (error);
302 		*(offset_t *)data = off;
303 		return (0);
304 	}
305 	}
306 	return (SET_ERROR(ENOTTY));
307 }
308 
309 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)310 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
311 {
312 	vm_object_t obj;
313 	vm_page_t pp;
314 	int64_t end;
315 
316 	/*
317 	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
318 	 * aligned boundaries, if the range is not aligned.  As a result a
319 	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
320 	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
321 	 * the whole page would be considered clean despite have some
322 	 * dirty data.
323 	 * For this reason we should shrink the range to DEV_BSIZE aligned
324 	 * boundaries before calling vm_page_clear_dirty.
325 	 */
326 	end = rounddown2(off + nbytes, DEV_BSIZE);
327 	off = roundup2(off, DEV_BSIZE);
328 	nbytes = end - off;
329 
330 	obj = vp->v_object;
331 	vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
332 	    VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
333 	    VM_ALLOC_IGN_SBUSY);
334 	if (pp != NULL) {
335 		ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
336 		vm_object_pip_add(obj, 1);
337 		pmap_remove_write(pp);
338 		if (nbytes != 0)
339 			vm_page_clear_dirty(pp, off, nbytes);
340 	}
341 	return (pp);
342 }
343 
344 static void
page_unbusy(vm_page_t pp)345 page_unbusy(vm_page_t pp)
346 {
347 
348 	vm_page_sunbusy(pp);
349 	vm_object_pip_wakeup(pp->object);
350 }
351 
352 static vm_page_t
page_hold(vnode_t * vp,int64_t start)353 page_hold(vnode_t *vp, int64_t start)
354 {
355 	vm_object_t obj;
356 	vm_page_t m;
357 
358 	obj = vp->v_object;
359 	vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
360 	    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
361 	    VM_ALLOC_NOBUSY);
362 	return (m);
363 }
364 
365 static void
page_unhold(vm_page_t pp)366 page_unhold(vm_page_t pp)
367 {
368 	vm_page_unwire(pp, PQ_ACTIVE);
369 }
370 
371 /*
372  * When a file is memory mapped, we must keep the IO data synchronized
373  * between the DMU cache and the memory mapped pages.  What this means:
374  *
375  * On Write:	If we find a memory mapped page, we write to *both*
376  *		the page and the dmu buffer.
377  */
378 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)379 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
380 {
381 	vm_object_t obj;
382 	struct sf_buf *sf;
383 	vnode_t *vp = ZTOV(zp);
384 	caddr_t va;
385 	int off;
386 
387 	ASSERT3P(vp->v_mount, !=, NULL);
388 	obj = vp->v_object;
389 	ASSERT3P(obj, !=, NULL);
390 
391 	off = start & PAGEOFFSET;
392 	vm_object_pip_add(obj, 1);
393 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
394 		vm_page_t pp;
395 		int nbytes = imin(PAGESIZE - off, len);
396 
397 		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
398 			va = zfs_map_page(pp, &sf);
399 			(void) dmu_read(os, zp->z_id, start + off, nbytes,
400 			    va + off, DMU_READ_PREFETCH);
401 			zfs_unmap_page(sf);
402 			page_unbusy(pp);
403 		}
404 		len -= nbytes;
405 		off = 0;
406 	}
407 	vm_object_pip_wakeup(obj);
408 }
409 
410 /*
411  * Read with UIO_NOCOPY flag means that sendfile(2) requests
412  * ZFS to populate a range of page cache pages with data.
413  *
414  * NOTE: this function could be optimized to pre-allocate
415  * all pages in advance, drain exclusive busy on all of them,
416  * map them into contiguous KVA region and populate them
417  * in one single dmu_read() call.
418  */
419 int
mappedread_sf(znode_t * zp,int nbytes,zfs_uio_t * uio)420 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
421 {
422 	vnode_t *vp = ZTOV(zp);
423 	objset_t *os = zp->z_zfsvfs->z_os;
424 	struct sf_buf *sf;
425 	vm_object_t obj;
426 	vm_page_t pp;
427 	int64_t start;
428 	caddr_t va;
429 	int len = nbytes;
430 	int error = 0;
431 
432 	ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
433 	ASSERT3P(vp->v_mount, !=, NULL);
434 	obj = vp->v_object;
435 	ASSERT3P(obj, !=, NULL);
436 	ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
437 
438 	for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
439 		int bytes = MIN(PAGESIZE, len);
440 
441 		pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
442 		    VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
443 		if (vm_page_none_valid(pp)) {
444 			va = zfs_map_page(pp, &sf);
445 			error = dmu_read(os, zp->z_id, start, bytes, va,
446 			    DMU_READ_PREFETCH);
447 			if (bytes != PAGESIZE && error == 0)
448 				memset(va + bytes, 0, PAGESIZE - bytes);
449 			zfs_unmap_page(sf);
450 			if (error == 0) {
451 				vm_page_valid(pp);
452 				vm_page_activate(pp);
453 				vm_page_sunbusy(pp);
454 			} else {
455 				zfs_vmobject_wlock(obj);
456 				if (!vm_page_wired(pp) && pp->valid == 0 &&
457 				    vm_page_busy_tryupgrade(pp))
458 					vm_page_free(pp);
459 				else {
460 					vm_page_deactivate_noreuse(pp);
461 					vm_page_sunbusy(pp);
462 				}
463 				zfs_vmobject_wunlock(obj);
464 			}
465 		} else {
466 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
467 			vm_page_sunbusy(pp);
468 		}
469 		if (error)
470 			break;
471 		zfs_uio_advance(uio, bytes);
472 		len -= bytes;
473 	}
474 	return (error);
475 }
476 
477 /*
478  * When a file is memory mapped, we must keep the IO data synchronized
479  * between the DMU cache and the memory mapped pages.  What this means:
480  *
481  * On Read:	We "read" preferentially from memory mapped pages,
482  *		else we default from the dmu buffer.
483  *
484  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
485  *	 the file is memory mapped.
486  */
487 int
mappedread(znode_t * zp,int nbytes,zfs_uio_t * uio)488 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
489 {
490 	vnode_t *vp = ZTOV(zp);
491 	vm_object_t obj;
492 	int64_t start;
493 	int len = nbytes;
494 	int off;
495 	int error = 0;
496 
497 	ASSERT3P(vp->v_mount, !=, NULL);
498 	obj = vp->v_object;
499 	ASSERT3P(obj, !=, NULL);
500 
501 	start = zfs_uio_offset(uio);
502 	off = start & PAGEOFFSET;
503 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
504 		vm_page_t pp;
505 		uint64_t bytes = MIN(PAGESIZE - off, len);
506 
507 		if ((pp = page_hold(vp, start))) {
508 			struct sf_buf *sf;
509 			caddr_t va;
510 
511 			va = zfs_map_page(pp, &sf);
512 			error = vn_io_fault_uiomove(va + off, bytes,
513 			    GET_UIO_STRUCT(uio));
514 			zfs_unmap_page(sf);
515 			page_unhold(pp);
516 		} else {
517 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
518 			    uio, bytes);
519 		}
520 		len -= bytes;
521 		off = 0;
522 		if (error)
523 			break;
524 	}
525 	return (error);
526 }
527 
528 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)529 zfs_write_simple(znode_t *zp, const void *data, size_t len,
530     loff_t pos, size_t *presid)
531 {
532 	int error = 0;
533 	ssize_t resid;
534 
535 	error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
536 	    UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
537 
538 	if (error) {
539 		return (SET_ERROR(error));
540 	} else if (presid == NULL) {
541 		if (resid != 0) {
542 			error = SET_ERROR(EIO);
543 		}
544 	} else {
545 		*presid = resid;
546 	}
547 	return (error);
548 }
549 
550 void
zfs_zrele_async(znode_t * zp)551 zfs_zrele_async(znode_t *zp)
552 {
553 	vnode_t *vp = ZTOV(zp);
554 	objset_t *os = ITOZSB(vp)->z_os;
555 
556 	VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
557 }
558 
559 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)560 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
561 {
562 	int error;
563 
564 	*vpp = arg;
565 	error = vn_lock(*vpp, lkflags);
566 	if (error != 0)
567 		vrele(*vpp);
568 	return (error);
569 }
570 
571 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)572 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
573 {
574 	znode_t *zdp = VTOZ(dvp);
575 	zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
576 	int error;
577 	int ltype;
578 
579 	if (zfsvfs->z_replay == B_FALSE)
580 		ASSERT_VOP_LOCKED(dvp, __func__);
581 
582 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
583 		ASSERT3P(dvp, ==, vp);
584 		vref(dvp);
585 		ltype = lkflags & LK_TYPE_MASK;
586 		if (ltype != VOP_ISLOCKED(dvp)) {
587 			if (ltype == LK_EXCLUSIVE)
588 				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
589 			else /* if (ltype == LK_SHARED) */
590 				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
591 
592 			/*
593 			 * Relock for the "." case could leave us with
594 			 * reclaimed vnode.
595 			 */
596 			if (VN_IS_DOOMED(dvp)) {
597 				vrele(dvp);
598 				return (SET_ERROR(ENOENT));
599 			}
600 		}
601 		return (0);
602 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
603 		/*
604 		 * Note that in this case, dvp is the child vnode, and we
605 		 * are looking up the parent vnode - exactly reverse from
606 		 * normal operation.  Unlocking dvp requires some rather
607 		 * tricky unlock/relock dance to prevent mp from being freed;
608 		 * use vn_vget_ino_gen() which takes care of all that.
609 		 *
610 		 * XXX Note that there is a time window when both vnodes are
611 		 * unlocked.  It is possible, although highly unlikely, that
612 		 * during that window the parent-child relationship between
613 		 * the vnodes may change, for example, get reversed.
614 		 * In that case we would have a wrong lock order for the vnodes.
615 		 * All other filesystems seem to ignore this problem, so we
616 		 * do the same here.
617 		 * A potential solution could be implemented as follows:
618 		 * - using LK_NOWAIT when locking the second vnode and retrying
619 		 *   if necessary
620 		 * - checking that the parent-child relationship still holds
621 		 *   after locking both vnodes and retrying if it doesn't
622 		 */
623 		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
624 		return (error);
625 	} else {
626 		error = vn_lock(vp, lkflags);
627 		if (error != 0)
628 			vrele(vp);
629 		return (error);
630 	}
631 }
632 
633 /*
634  * Lookup an entry in a directory, or an extended attribute directory.
635  * If it exists, return a held vnode reference for it.
636  *
637  *	IN:	dvp	- vnode of directory to search.
638  *		nm	- name of entry to lookup.
639  *		pnp	- full pathname to lookup [UNUSED].
640  *		flags	- LOOKUP_XATTR set if looking for an attribute.
641  *		rdir	- root directory vnode [UNUSED].
642  *		cr	- credentials of caller.
643  *		ct	- caller context
644  *
645  *	OUT:	vpp	- vnode of located entry, NULL if not found.
646  *
647  *	RETURN:	0 on success, error code on failure.
648  *
649  * Timestamps:
650  *	NA
651  */
652 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,int flags,boolean_t cached)653 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
654     struct componentname *cnp, int nameiop, cred_t *cr, int flags,
655     boolean_t cached)
656 {
657 	znode_t *zdp = VTOZ(dvp);
658 	znode_t *zp;
659 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
660 	seqc_t dvp_seqc;
661 	int	error = 0;
662 
663 	/*
664 	 * Fast path lookup, however we must skip DNLC lookup
665 	 * for case folding or normalizing lookups because the
666 	 * DNLC code only stores the passed in name.  This means
667 	 * creating 'a' and removing 'A' on a case insensitive
668 	 * file system would work, but DNLC still thinks 'a'
669 	 * exists and won't let you create it again on the next
670 	 * pass through fast path.
671 	 */
672 	if (!(flags & LOOKUP_XATTR)) {
673 		if (dvp->v_type != VDIR) {
674 			return (SET_ERROR(ENOTDIR));
675 		} else if (zdp->z_sa_hdl == NULL) {
676 			return (SET_ERROR(EIO));
677 		}
678 	}
679 
680 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
681 	    const char *, nm);
682 
683 	if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
684 		return (error);
685 
686 	dvp_seqc = vn_seqc_read_notmodify(dvp);
687 
688 	*vpp = NULL;
689 
690 	if (flags & LOOKUP_XATTR) {
691 		/*
692 		 * If the xattr property is off, refuse the lookup request.
693 		 */
694 		if (!(zfsvfs->z_flags & ZSB_XATTR)) {
695 			zfs_exit(zfsvfs, FTAG);
696 			return (SET_ERROR(EOPNOTSUPP));
697 		}
698 
699 		/*
700 		 * We don't allow recursive attributes..
701 		 * Maybe someday we will.
702 		 */
703 		if (zdp->z_pflags & ZFS_XATTR) {
704 			zfs_exit(zfsvfs, FTAG);
705 			return (SET_ERROR(EINVAL));
706 		}
707 
708 		if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
709 			zfs_exit(zfsvfs, FTAG);
710 			return (error);
711 		}
712 		*vpp = ZTOV(zp);
713 
714 		/*
715 		 * Do we have permission to get into attribute directory?
716 		 */
717 		error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
718 		if (error) {
719 			vrele(ZTOV(zp));
720 		}
721 
722 		zfs_exit(zfsvfs, FTAG);
723 		return (error);
724 	}
725 
726 	/*
727 	 * Check accessibility of directory if we're not coming in via
728 	 * VOP_CACHEDLOOKUP.
729 	 */
730 	if (!cached) {
731 #ifdef NOEXECCHECK
732 		if ((cnp->cn_flags & NOEXECCHECK) != 0) {
733 			cnp->cn_flags &= ~NOEXECCHECK;
734 		} else
735 #endif
736 		if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
737 		    NULL))) {
738 			zfs_exit(zfsvfs, FTAG);
739 			return (error);
740 		}
741 	}
742 
743 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
744 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
745 		zfs_exit(zfsvfs, FTAG);
746 		return (SET_ERROR(EILSEQ));
747 	}
748 
749 
750 	/*
751 	 * First handle the special cases.
752 	 */
753 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
754 		/*
755 		 * If we are a snapshot mounted under .zfs, return
756 		 * the vp for the snapshot directory.
757 		 */
758 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
759 			struct componentname cn;
760 			vnode_t *zfsctl_vp;
761 			int ltype;
762 
763 			zfs_exit(zfsvfs, FTAG);
764 			ltype = VOP_ISLOCKED(dvp);
765 			VOP_UNLOCK(dvp);
766 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
767 			    &zfsctl_vp);
768 			if (error == 0) {
769 				cn.cn_nameptr = "snapshot";
770 				cn.cn_namelen = strlen(cn.cn_nameptr);
771 				cn.cn_nameiop = cnp->cn_nameiop;
772 				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
773 				cn.cn_lkflags = cnp->cn_lkflags;
774 				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
775 				vput(zfsctl_vp);
776 			}
777 			vn_lock(dvp, ltype | LK_RETRY);
778 			return (error);
779 		}
780 	}
781 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
782 		zfs_exit(zfsvfs, FTAG);
783 		if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED)
784 			return (SET_ERROR(ENOENT));
785 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
786 			return (SET_ERROR(ENOTSUP));
787 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
788 		return (error);
789 	}
790 
791 	/*
792 	 * The loop is retry the lookup if the parent-child relationship
793 	 * changes during the dot-dot locking complexities.
794 	 */
795 	for (;;) {
796 		uint64_t parent;
797 
798 		error = zfs_dirlook(zdp, nm, &zp);
799 		if (error == 0)
800 			*vpp = ZTOV(zp);
801 
802 		zfs_exit(zfsvfs, FTAG);
803 		if (error != 0)
804 			break;
805 
806 		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
807 		if (error != 0) {
808 			/*
809 			 * If we've got a locking error, then the vnode
810 			 * got reclaimed because of a force unmount.
811 			 * We never enter doomed vnodes into the name cache.
812 			 */
813 			*vpp = NULL;
814 			return (error);
815 		}
816 
817 		if ((cnp->cn_flags & ISDOTDOT) == 0)
818 			break;
819 
820 		if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
821 			vput(ZTOV(zp));
822 			*vpp = NULL;
823 			return (error);
824 		}
825 		if (zdp->z_sa_hdl == NULL) {
826 			error = SET_ERROR(EIO);
827 		} else {
828 			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
829 			    &parent, sizeof (parent));
830 		}
831 		if (error != 0) {
832 			zfs_exit(zfsvfs, FTAG);
833 			vput(ZTOV(zp));
834 			break;
835 		}
836 		if (zp->z_id == parent) {
837 			zfs_exit(zfsvfs, FTAG);
838 			break;
839 		}
840 		vput(ZTOV(zp));
841 	}
842 
843 	if (error != 0)
844 		*vpp = NULL;
845 
846 	/* Translate errors and add SAVENAME when needed. */
847 	if (cnp->cn_flags & ISLASTCN) {
848 		switch (nameiop) {
849 		case CREATE:
850 		case RENAME:
851 			if (error == ENOENT) {
852 				error = EJUSTRETURN;
853 #if __FreeBSD_version < 1400068
854 				cnp->cn_flags |= SAVENAME;
855 #endif
856 				break;
857 			}
858 			zfs_fallthrough;
859 		case DELETE:
860 #if __FreeBSD_version < 1400068
861 			if (error == 0)
862 				cnp->cn_flags |= SAVENAME;
863 #endif
864 			break;
865 		}
866 	}
867 
868 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
869 		/*
870 		 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
871 		 * handle races. In particular different callers may end up
872 		 * with different vnodes and will try to add conflicting
873 		 * entries to the namecache.
874 		 *
875 		 * While finding different result may be acceptable in face
876 		 * of concurrent modification, adding conflicting entries
877 		 * trips over an assert in the namecache.
878 		 *
879 		 * Ultimately let an entry through once everything settles.
880 		 */
881 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
882 			cnp->cn_flags &= ~MAKEENTRY;
883 		}
884 	}
885 
886 	/* Insert name into cache (as non-existent) if appropriate. */
887 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
888 	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
889 		cache_enter(dvp, NULL, cnp);
890 
891 	/* Insert name into cache if appropriate. */
892 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
893 	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
894 		if (!(cnp->cn_flags & ISLASTCN) ||
895 		    (nameiop != DELETE && nameiop != RENAME)) {
896 			cache_enter(dvp, *vpp, cnp);
897 		}
898 	}
899 
900 	return (error);
901 }
902 
903 static inline bool
is_nametoolong(zfsvfs_t * zfsvfs,const char * name)904 is_nametoolong(zfsvfs_t *zfsvfs, const char *name)
905 {
906 	size_t dlen = strlen(name);
907 	return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) ||
908 	    dlen >= ZAP_MAXNAMELEN_NEW);
909 }
910 
911 /*
912  * Attempt to create a new entry in a directory.  If the entry
913  * already exists, truncate the file if permissible, else return
914  * an error.  Return the vp of the created or trunc'd file.
915  *
916  *	IN:	dvp	- vnode of directory to put new file entry in.
917  *		name	- name of new file entry.
918  *		vap	- attributes of new file.
919  *		excl	- flag indicating exclusive or non-exclusive mode.
920  *		mode	- mode to open file with.
921  *		cr	- credentials of caller.
922  *		flag	- large file flag [UNUSED].
923  *		ct	- caller context
924  *		vsecp	- ACL to be set
925  *		mnt_ns	- Unused on FreeBSD
926  *
927  *	OUT:	vpp	- vnode of created or trunc'd entry.
928  *
929  *	RETURN:	0 on success, error code on failure.
930  *
931  * Timestamps:
932  *	dvp - ctime|mtime updated if new entry created
933  *	 vp - ctime|mtime always, atime if new
934  */
935 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp,zidmap_t * mnt_ns)936 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
937     znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
938 {
939 	(void) excl, (void) mode, (void) flag;
940 	znode_t		*zp;
941 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
942 	zilog_t		*zilog;
943 	objset_t	*os;
944 	dmu_tx_t	*tx;
945 	int		error;
946 	uid_t		uid = crgetuid(cr);
947 	gid_t		gid = crgetgid(cr);
948 	uint64_t	projid = ZFS_DEFAULT_PROJID;
949 	zfs_acl_ids_t   acl_ids;
950 	boolean_t	fuid_dirtied;
951 	uint64_t	txtype;
952 #ifdef DEBUG_VFS_LOCKS
953 	vnode_t	*dvp = ZTOV(dzp);
954 #endif
955 
956 	if (is_nametoolong(zfsvfs, name))
957 		return (SET_ERROR(ENAMETOOLONG));
958 
959 	/*
960 	 * If we have an ephemeral id, ACL, or XVATTR then
961 	 * make sure file system is at proper version
962 	 */
963 	if (zfsvfs->z_use_fuids == B_FALSE &&
964 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
965 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
966 		return (SET_ERROR(EINVAL));
967 
968 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
969 		return (error);
970 	os = zfsvfs->z_os;
971 	zilog = zfsvfs->z_log;
972 
973 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
974 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
975 		zfs_exit(zfsvfs, FTAG);
976 		return (SET_ERROR(EILSEQ));
977 	}
978 
979 	if (vap->va_mask & AT_XVATTR) {
980 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
981 		    crgetuid(cr), cr, vap->va_type)) != 0) {
982 			zfs_exit(zfsvfs, FTAG);
983 			return (error);
984 		}
985 	}
986 
987 	*zpp = NULL;
988 
989 	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
990 		vap->va_mode &= ~S_ISVTX;
991 
992 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
993 	if (error) {
994 		zfs_exit(zfsvfs, FTAG);
995 		return (error);
996 	}
997 	ASSERT3P(zp, ==, NULL);
998 
999 	/*
1000 	 * Create a new file object and update the directory
1001 	 * to reference it.
1002 	 */
1003 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
1004 		goto out;
1005 	}
1006 
1007 	/*
1008 	 * We only support the creation of regular files in
1009 	 * extended attribute directories.
1010 	 */
1011 
1012 	if ((dzp->z_pflags & ZFS_XATTR) &&
1013 	    (vap->va_type != VREG)) {
1014 		error = SET_ERROR(EINVAL);
1015 		goto out;
1016 	}
1017 
1018 	if ((error = zfs_acl_ids_create(dzp, 0, vap,
1019 	    cr, vsecp, &acl_ids, NULL)) != 0)
1020 		goto out;
1021 
1022 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1023 		projid = zfs_inherit_projid(dzp);
1024 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1025 		zfs_acl_ids_free(&acl_ids);
1026 		error = SET_ERROR(EDQUOT);
1027 		goto out;
1028 	}
1029 
1030 	getnewvnode_reserve();
1031 
1032 	tx = dmu_tx_create(os);
1033 
1034 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1035 	    ZFS_SA_BASE_ATTR_SIZE);
1036 
1037 	fuid_dirtied = zfsvfs->z_fuid_dirty;
1038 	if (fuid_dirtied)
1039 		zfs_fuid_txhold(zfsvfs, tx);
1040 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1041 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1042 	if (!zfsvfs->z_use_sa &&
1043 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1044 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1045 		    0, acl_ids.z_aclp->z_acl_bytes);
1046 	}
1047 	error = dmu_tx_assign(tx, TXG_WAIT);
1048 	if (error) {
1049 		zfs_acl_ids_free(&acl_ids);
1050 		dmu_tx_abort(tx);
1051 		getnewvnode_drop_reserve();
1052 		zfs_exit(zfsvfs, FTAG);
1053 		return (error);
1054 	}
1055 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1056 
1057 	error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1058 	if (error != 0) {
1059 		/*
1060 		 * Since, we failed to add the directory entry for it,
1061 		 * delete the newly created dnode.
1062 		 */
1063 		zfs_znode_delete(zp, tx);
1064 		VOP_UNLOCK(ZTOV(zp));
1065 		zrele(zp);
1066 		zfs_acl_ids_free(&acl_ids);
1067 		dmu_tx_commit(tx);
1068 		getnewvnode_drop_reserve();
1069 		goto out;
1070 	}
1071 
1072 	if (fuid_dirtied)
1073 		zfs_fuid_sync(zfsvfs, tx);
1074 
1075 	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1076 	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1077 	    vsecp, acl_ids.z_fuidp, vap);
1078 	zfs_acl_ids_free(&acl_ids);
1079 	dmu_tx_commit(tx);
1080 
1081 	getnewvnode_drop_reserve();
1082 
1083 out:
1084 	VNCHECKREF(dvp);
1085 	if (error == 0) {
1086 		*zpp = zp;
1087 	}
1088 
1089 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1090 		zil_commit(zilog, 0);
1091 
1092 	zfs_exit(zfsvfs, FTAG);
1093 	return (error);
1094 }
1095 
1096 /*
1097  * Remove an entry from a directory.
1098  *
1099  *	IN:	dvp	- vnode of directory to remove entry from.
1100  *		name	- name of entry to remove.
1101  *		cr	- credentials of caller.
1102  *		ct	- caller context
1103  *		flags	- case flags
1104  *
1105  *	RETURN:	0 on success, error code on failure.
1106  *
1107  * Timestamps:
1108  *	dvp - ctime|mtime
1109  *	 vp - ctime (if nlink > 0)
1110  */
1111 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1112 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1113 {
1114 	znode_t		*dzp = VTOZ(dvp);
1115 	znode_t		*zp;
1116 	znode_t		*xzp;
1117 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1118 	zilog_t		*zilog;
1119 	uint64_t	xattr_obj;
1120 	uint64_t	obj = 0;
1121 	dmu_tx_t	*tx;
1122 	boolean_t	unlinked;
1123 	uint64_t	txtype;
1124 	int		error;
1125 
1126 
1127 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1128 		return (error);
1129 	zp = VTOZ(vp);
1130 	if ((error = zfs_verify_zp(zp)) != 0) {
1131 		zfs_exit(zfsvfs, FTAG);
1132 		return (error);
1133 	}
1134 	zilog = zfsvfs->z_log;
1135 
1136 	xattr_obj = 0;
1137 	xzp = NULL;
1138 
1139 	if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1140 		goto out;
1141 	}
1142 
1143 	/*
1144 	 * Need to use rmdir for removing directories.
1145 	 */
1146 	if (vp->v_type == VDIR) {
1147 		error = SET_ERROR(EPERM);
1148 		goto out;
1149 	}
1150 
1151 	vnevent_remove(vp, dvp, name, ct);
1152 
1153 	obj = zp->z_id;
1154 
1155 	/* are there any extended attributes? */
1156 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1157 	    &xattr_obj, sizeof (xattr_obj));
1158 	if (error == 0 && xattr_obj) {
1159 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1160 		ASSERT0(error);
1161 	}
1162 
1163 	/*
1164 	 * We may delete the znode now, or we may put it in the unlinked set;
1165 	 * it depends on whether we're the last link, and on whether there are
1166 	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
1167 	 * allow for either case.
1168 	 */
1169 	tx = dmu_tx_create(zfsvfs->z_os);
1170 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1171 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1172 	zfs_sa_upgrade_txholds(tx, zp);
1173 	zfs_sa_upgrade_txholds(tx, dzp);
1174 
1175 	if (xzp) {
1176 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1177 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1178 	}
1179 
1180 	/* charge as an update -- would be nice not to charge at all */
1181 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1182 
1183 	/*
1184 	 * Mark this transaction as typically resulting in a net free of space
1185 	 */
1186 	dmu_tx_mark_netfree(tx);
1187 
1188 	error = dmu_tx_assign(tx, TXG_WAIT);
1189 	if (error) {
1190 		dmu_tx_abort(tx);
1191 		zfs_exit(zfsvfs, FTAG);
1192 		return (error);
1193 	}
1194 
1195 	/*
1196 	 * Remove the directory entry.
1197 	 */
1198 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1199 
1200 	if (error) {
1201 		dmu_tx_commit(tx);
1202 		goto out;
1203 	}
1204 
1205 	if (unlinked) {
1206 		zfs_unlinked_add(zp, tx);
1207 		vp->v_vflag |= VV_NOSYNC;
1208 	}
1209 	/* XXX check changes to linux vnops */
1210 	txtype = TX_REMOVE;
1211 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1212 
1213 	dmu_tx_commit(tx);
1214 out:
1215 
1216 	if (xzp)
1217 		vrele(ZTOV(xzp));
1218 
1219 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1220 		zil_commit(zilog, 0);
1221 
1222 
1223 	zfs_exit(zfsvfs, FTAG);
1224 	return (error);
1225 }
1226 
1227 
1228 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1229 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1230     struct componentname *cnp, int nameiop)
1231 {
1232 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1233 	int error;
1234 
1235 	cnp->cn_nameptr = __DECONST(char *, name);
1236 	cnp->cn_namelen = strlen(name);
1237 	cnp->cn_nameiop = nameiop;
1238 	cnp->cn_flags = ISLASTCN;
1239 #if __FreeBSD_version < 1400068
1240 	cnp->cn_flags |= SAVENAME;
1241 #endif
1242 	cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1243 	cnp->cn_cred = kcred;
1244 #if __FreeBSD_version < 1400037
1245 	cnp->cn_thread = curthread;
1246 #endif
1247 
1248 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1249 		struct vop_lookup_args a;
1250 
1251 		a.a_gen.a_desc = &vop_lookup_desc;
1252 		a.a_dvp = ZTOV(dzp);
1253 		a.a_vpp = vpp;
1254 		a.a_cnp = cnp;
1255 		error = vfs_cache_lookup(&a);
1256 	} else {
1257 		error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1258 		    B_FALSE);
1259 	}
1260 #ifdef ZFS_DEBUG
1261 	if (error) {
1262 		printf("got error %d on name %s on op %d\n", error, name,
1263 		    nameiop);
1264 		kdb_backtrace();
1265 	}
1266 #endif
1267 	return (error);
1268 }
1269 
1270 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1271 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1272 {
1273 	vnode_t *vp;
1274 	int error;
1275 	struct componentname cn;
1276 
1277 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1278 		return (error);
1279 
1280 	error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1281 	vput(vp);
1282 	return (error);
1283 }
1284 /*
1285  * Create a new directory and insert it into dvp using the name
1286  * provided.  Return a pointer to the inserted directory.
1287  *
1288  *	IN:	dvp	- vnode of directory to add subdir to.
1289  *		dirname	- name of new directory.
1290  *		vap	- attributes of new directory.
1291  *		cr	- credentials of caller.
1292  *		ct	- caller context
1293  *		flags	- case flags
1294  *		vsecp	- ACL to be set
1295  *		mnt_ns	- Unused on FreeBSD
1296  *
1297  *	OUT:	vpp	- vnode of created directory.
1298  *
1299  *	RETURN:	0 on success, error code on failure.
1300  *
1301  * Timestamps:
1302  *	dvp - ctime|mtime updated
1303  *	 vp - ctime|mtime|atime updated
1304  */
1305 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp,zidmap_t * mnt_ns)1306 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1307     cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1308 {
1309 	(void) flags, (void) vsecp;
1310 	znode_t		*zp;
1311 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1312 	zilog_t		*zilog;
1313 	uint64_t	txtype;
1314 	dmu_tx_t	*tx;
1315 	int		error;
1316 	uid_t		uid = crgetuid(cr);
1317 	gid_t		gid = crgetgid(cr);
1318 	zfs_acl_ids_t   acl_ids;
1319 	boolean_t	fuid_dirtied;
1320 
1321 	ASSERT3U(vap->va_type, ==, VDIR);
1322 
1323 	if (is_nametoolong(zfsvfs, dirname))
1324 		return (SET_ERROR(ENAMETOOLONG));
1325 
1326 	/*
1327 	 * If we have an ephemeral id, ACL, or XVATTR then
1328 	 * make sure file system is at proper version
1329 	 */
1330 	if (zfsvfs->z_use_fuids == B_FALSE &&
1331 	    ((vap->va_mask & AT_XVATTR) ||
1332 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1333 		return (SET_ERROR(EINVAL));
1334 
1335 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1336 		return (error);
1337 	zilog = zfsvfs->z_log;
1338 
1339 	if (dzp->z_pflags & ZFS_XATTR) {
1340 		zfs_exit(zfsvfs, FTAG);
1341 		return (SET_ERROR(EINVAL));
1342 	}
1343 
1344 	if (zfsvfs->z_utf8 && u8_validate(dirname,
1345 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1346 		zfs_exit(zfsvfs, FTAG);
1347 		return (SET_ERROR(EILSEQ));
1348 	}
1349 
1350 	if (vap->va_mask & AT_XVATTR) {
1351 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1352 		    crgetuid(cr), cr, vap->va_type)) != 0) {
1353 			zfs_exit(zfsvfs, FTAG);
1354 			return (error);
1355 		}
1356 	}
1357 
1358 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1359 	    NULL, &acl_ids, NULL)) != 0) {
1360 		zfs_exit(zfsvfs, FTAG);
1361 		return (error);
1362 	}
1363 
1364 	/*
1365 	 * First make sure the new directory doesn't exist.
1366 	 *
1367 	 * Existence is checked first to make sure we don't return
1368 	 * EACCES instead of EEXIST which can cause some applications
1369 	 * to fail.
1370 	 */
1371 	*zpp = NULL;
1372 
1373 	if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1374 		zfs_acl_ids_free(&acl_ids);
1375 		zfs_exit(zfsvfs, FTAG);
1376 		return (error);
1377 	}
1378 	ASSERT3P(zp, ==, NULL);
1379 
1380 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1381 	    mnt_ns))) {
1382 		zfs_acl_ids_free(&acl_ids);
1383 		zfs_exit(zfsvfs, FTAG);
1384 		return (error);
1385 	}
1386 
1387 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1388 		zfs_acl_ids_free(&acl_ids);
1389 		zfs_exit(zfsvfs, FTAG);
1390 		return (SET_ERROR(EDQUOT));
1391 	}
1392 
1393 	/*
1394 	 * Add a new entry to the directory.
1395 	 */
1396 	getnewvnode_reserve();
1397 	tx = dmu_tx_create(zfsvfs->z_os);
1398 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1399 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1400 	fuid_dirtied = zfsvfs->z_fuid_dirty;
1401 	if (fuid_dirtied)
1402 		zfs_fuid_txhold(zfsvfs, tx);
1403 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1404 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1405 		    acl_ids.z_aclp->z_acl_bytes);
1406 	}
1407 
1408 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1409 	    ZFS_SA_BASE_ATTR_SIZE);
1410 
1411 	error = dmu_tx_assign(tx, TXG_WAIT);
1412 	if (error) {
1413 		zfs_acl_ids_free(&acl_ids);
1414 		dmu_tx_abort(tx);
1415 		getnewvnode_drop_reserve();
1416 		zfs_exit(zfsvfs, FTAG);
1417 		return (error);
1418 	}
1419 
1420 	/*
1421 	 * Create new node.
1422 	 */
1423 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1424 
1425 	/*
1426 	 * Now put new name in parent dir.
1427 	 */
1428 	error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1429 	if (error != 0) {
1430 		zfs_znode_delete(zp, tx);
1431 		VOP_UNLOCK(ZTOV(zp));
1432 		zrele(zp);
1433 		goto out;
1434 	}
1435 
1436 	if (fuid_dirtied)
1437 		zfs_fuid_sync(zfsvfs, tx);
1438 
1439 	*zpp = zp;
1440 
1441 	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1442 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1443 	    acl_ids.z_fuidp, vap);
1444 
1445 out:
1446 	zfs_acl_ids_free(&acl_ids);
1447 
1448 	dmu_tx_commit(tx);
1449 
1450 	getnewvnode_drop_reserve();
1451 
1452 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1453 		zil_commit(zilog, 0);
1454 
1455 	zfs_exit(zfsvfs, FTAG);
1456 	return (error);
1457 }
1458 
1459 /*
1460  * Remove a directory subdir entry.  If the current working
1461  * directory is the same as the subdir to be removed, the
1462  * remove will fail.
1463  *
1464  *	IN:	dvp	- vnode of directory to remove from.
1465  *		name	- name of directory to be removed.
1466  *		cwd	- vnode of current working directory.
1467  *		cr	- credentials of caller.
1468  *		ct	- caller context
1469  *		flags	- case flags
1470  *
1471  *	RETURN:	0 on success, error code on failure.
1472  *
1473  * Timestamps:
1474  *	dvp - ctime|mtime updated
1475  */
1476 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1477 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1478 {
1479 	znode_t		*dzp = VTOZ(dvp);
1480 	znode_t		*zp = VTOZ(vp);
1481 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
1482 	zilog_t		*zilog;
1483 	dmu_tx_t	*tx;
1484 	int		error;
1485 
1486 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1487 		return (error);
1488 	if ((error = zfs_verify_zp(zp)) != 0) {
1489 		zfs_exit(zfsvfs, FTAG);
1490 		return (error);
1491 	}
1492 	zilog = zfsvfs->z_log;
1493 
1494 
1495 	if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1496 		goto out;
1497 	}
1498 
1499 	if (vp->v_type != VDIR) {
1500 		error = SET_ERROR(ENOTDIR);
1501 		goto out;
1502 	}
1503 
1504 	vnevent_rmdir(vp, dvp, name, ct);
1505 
1506 	tx = dmu_tx_create(zfsvfs->z_os);
1507 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1508 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1509 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1510 	zfs_sa_upgrade_txholds(tx, zp);
1511 	zfs_sa_upgrade_txholds(tx, dzp);
1512 	dmu_tx_mark_netfree(tx);
1513 	error = dmu_tx_assign(tx, TXG_WAIT);
1514 	if (error) {
1515 		dmu_tx_abort(tx);
1516 		zfs_exit(zfsvfs, FTAG);
1517 		return (error);
1518 	}
1519 
1520 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1521 
1522 	if (error == 0) {
1523 		uint64_t txtype = TX_RMDIR;
1524 		zfs_log_remove(zilog, tx, txtype, dzp, name,
1525 		    ZFS_NO_OBJECT, B_FALSE);
1526 	}
1527 
1528 	dmu_tx_commit(tx);
1529 
1530 	if (zfsvfs->z_use_namecache)
1531 		cache_vop_rmdir(dvp, vp);
1532 out:
1533 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1534 		zil_commit(zilog, 0);
1535 
1536 	zfs_exit(zfsvfs, FTAG);
1537 	return (error);
1538 }
1539 
1540 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1541 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1542 {
1543 	struct componentname cn;
1544 	vnode_t *vp;
1545 	int error;
1546 
1547 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1548 		return (error);
1549 
1550 	error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1551 	vput(vp);
1552 	return (error);
1553 }
1554 
1555 /*
1556  * Read as many directory entries as will fit into the provided
1557  * buffer from the given directory cursor position (specified in
1558  * the uio structure).
1559  *
1560  *	IN:	vp	- vnode of directory to read.
1561  *		uio	- structure supplying read location, range info,
1562  *			  and return buffer.
1563  *		cr	- credentials of caller.
1564  *		ct	- caller context
1565  *
1566  *	OUT:	uio	- updated offset and range, buffer filled.
1567  *		eofp	- set to true if end-of-file detected.
1568  *		ncookies- number of entries in cookies
1569  *		cookies	- offsets to directory entries
1570  *
1571  *	RETURN:	0 on success, error code on failure.
1572  *
1573  * Timestamps:
1574  *	vp - atime updated
1575  *
1576  * Note that the low 4 bits of the cookie returned by zap is always zero.
1577  * This allows us to use the low range for "special" directory entries:
1578  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
1579  * we use the offset 2 for the '.zfs' directory.
1580  */
1581 static int
zfs_readdir(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,int * eofp,int * ncookies,cookie_t ** cookies)1582 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1583     int *ncookies, cookie_t **cookies)
1584 {
1585 	znode_t		*zp = VTOZ(vp);
1586 	iovec_t		*iovp;
1587 	dirent64_t	*odp;
1588 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
1589 	objset_t	*os;
1590 	caddr_t		outbuf;
1591 	size_t		bufsize;
1592 	zap_cursor_t	zc;
1593 	zap_attribute_t	*zap;
1594 	uint_t		bytes_wanted;
1595 	uint64_t	offset; /* must be unsigned; checks for < 1 */
1596 	uint64_t	parent;
1597 	int		local_eof;
1598 	int		outcount;
1599 	int		error;
1600 	uint8_t		prefetch;
1601 	uint8_t		type;
1602 	int		ncooks;
1603 	cookie_t	*cooks = NULL;
1604 
1605 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1606 		return (error);
1607 
1608 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1609 	    &parent, sizeof (parent))) != 0) {
1610 		zfs_exit(zfsvfs, FTAG);
1611 		return (error);
1612 	}
1613 
1614 	/*
1615 	 * If we are not given an eof variable,
1616 	 * use a local one.
1617 	 */
1618 	if (eofp == NULL)
1619 		eofp = &local_eof;
1620 
1621 	/*
1622 	 * Check for valid iov_len.
1623 	 */
1624 	if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1625 		zfs_exit(zfsvfs, FTAG);
1626 		return (SET_ERROR(EINVAL));
1627 	}
1628 
1629 	/*
1630 	 * Quit if directory has been removed (posix)
1631 	 */
1632 	if ((*eofp = zp->z_unlinked) != 0) {
1633 		zfs_exit(zfsvfs, FTAG);
1634 		return (0);
1635 	}
1636 
1637 	error = 0;
1638 	os = zfsvfs->z_os;
1639 	offset = zfs_uio_offset(uio);
1640 	prefetch = zp->z_zn_prefetch;
1641 	zap = zap_attribute_long_alloc();
1642 
1643 	/*
1644 	 * Initialize the iterator cursor.
1645 	 */
1646 	if (offset <= 3) {
1647 		/*
1648 		 * Start iteration from the beginning of the directory.
1649 		 */
1650 		zap_cursor_init(&zc, os, zp->z_id);
1651 	} else {
1652 		/*
1653 		 * The offset is a serialized cursor.
1654 		 */
1655 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1656 	}
1657 
1658 	/*
1659 	 * Get space to change directory entries into fs independent format.
1660 	 */
1661 	iovp = GET_UIO_STRUCT(uio)->uio_iov;
1662 	bytes_wanted = iovp->iov_len;
1663 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1664 		bufsize = bytes_wanted;
1665 		outbuf = kmem_alloc(bufsize, KM_SLEEP);
1666 		odp = (struct dirent64 *)outbuf;
1667 	} else {
1668 		bufsize = bytes_wanted;
1669 		outbuf = NULL;
1670 		odp = (struct dirent64 *)iovp->iov_base;
1671 	}
1672 
1673 	if (ncookies != NULL) {
1674 		/*
1675 		 * Minimum entry size is dirent size and 1 byte for a file name.
1676 		 */
1677 		ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1678 		    sizeof (((struct dirent *)NULL)->d_name) + 1);
1679 		cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1680 		*cookies = cooks;
1681 		*ncookies = ncooks;
1682 	}
1683 
1684 	/*
1685 	 * Transform to file-system independent format
1686 	 */
1687 	outcount = 0;
1688 	while (outcount < bytes_wanted) {
1689 		ino64_t objnum;
1690 		ushort_t reclen;
1691 		off64_t *next = NULL;
1692 
1693 		/*
1694 		 * Special case `.', `..', and `.zfs'.
1695 		 */
1696 		if (offset == 0) {
1697 			(void) strcpy(zap->za_name, ".");
1698 			zap->za_normalization_conflict = 0;
1699 			objnum = zp->z_id;
1700 			type = DT_DIR;
1701 		} else if (offset == 1) {
1702 			(void) strcpy(zap->za_name, "..");
1703 			zap->za_normalization_conflict = 0;
1704 			objnum = parent;
1705 			type = DT_DIR;
1706 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
1707 			(void) strcpy(zap->za_name, ZFS_CTLDIR_NAME);
1708 			zap->za_normalization_conflict = 0;
1709 			objnum = ZFSCTL_INO_ROOT;
1710 			type = DT_DIR;
1711 		} else {
1712 			/*
1713 			 * Grab next entry.
1714 			 */
1715 			if ((error = zap_cursor_retrieve(&zc, zap))) {
1716 				if ((*eofp = (error == ENOENT)) != 0)
1717 					break;
1718 				else
1719 					goto update;
1720 			}
1721 
1722 			if (zap->za_integer_length != 8 ||
1723 			    zap->za_num_integers != 1) {
1724 				cmn_err(CE_WARN, "zap_readdir: bad directory "
1725 				    "entry, obj = %lld, offset = %lld\n",
1726 				    (u_longlong_t)zp->z_id,
1727 				    (u_longlong_t)offset);
1728 				error = SET_ERROR(ENXIO);
1729 				goto update;
1730 			}
1731 
1732 			objnum = ZFS_DIRENT_OBJ(zap->za_first_integer);
1733 			/*
1734 			 * MacOS X can extract the object type here such as:
1735 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1736 			 */
1737 			type = ZFS_DIRENT_TYPE(zap->za_first_integer);
1738 		}
1739 
1740 		reclen = DIRENT64_RECLEN(strlen(zap->za_name));
1741 
1742 		/*
1743 		 * Will this entry fit in the buffer?
1744 		 */
1745 		if (outcount + reclen > bufsize) {
1746 			/*
1747 			 * Did we manage to fit anything in the buffer?
1748 			 */
1749 			if (!outcount) {
1750 				error = SET_ERROR(EINVAL);
1751 				goto update;
1752 			}
1753 			break;
1754 		}
1755 		/*
1756 		 * Add normal entry:
1757 		 */
1758 		odp->d_ino = objnum;
1759 		odp->d_reclen = reclen;
1760 		odp->d_namlen = strlen(zap->za_name);
1761 		/* NOTE: d_off is the offset for the *next* entry. */
1762 		next = &odp->d_off;
1763 		strlcpy(odp->d_name, zap->za_name, odp->d_namlen + 1);
1764 		odp->d_type = type;
1765 		dirent_terminate(odp);
1766 		odp = (dirent64_t *)((intptr_t)odp + reclen);
1767 
1768 		outcount += reclen;
1769 
1770 		ASSERT3S(outcount, <=, bufsize);
1771 
1772 		if (prefetch)
1773 			dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1774 
1775 		/*
1776 		 * Move to the next entry, fill in the previous offset.
1777 		 */
1778 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1779 			zap_cursor_advance(&zc);
1780 			offset = zap_cursor_serialize(&zc);
1781 		} else {
1782 			offset += 1;
1783 		}
1784 
1785 		/* Fill the offset right after advancing the cursor. */
1786 		if (next != NULL)
1787 			*next = offset;
1788 		if (cooks != NULL) {
1789 			*cooks++ = offset;
1790 			ncooks--;
1791 			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1792 		}
1793 	}
1794 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1795 
1796 	/* Subtract unused cookies */
1797 	if (ncookies != NULL)
1798 		*ncookies -= ncooks;
1799 
1800 	if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1801 		iovp->iov_base += outcount;
1802 		iovp->iov_len -= outcount;
1803 		zfs_uio_resid(uio) -= outcount;
1804 	} else if ((error =
1805 	    zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1806 		/*
1807 		 * Reset the pointer.
1808 		 */
1809 		offset = zfs_uio_offset(uio);
1810 	}
1811 
1812 update:
1813 	zap_cursor_fini(&zc);
1814 	zap_attribute_free(zap);
1815 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1816 		kmem_free(outbuf, bufsize);
1817 
1818 	if (error == ENOENT)
1819 		error = 0;
1820 
1821 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1822 
1823 	zfs_uio_setoffset(uio, offset);
1824 	zfs_exit(zfsvfs, FTAG);
1825 	if (error != 0 && cookies != NULL) {
1826 		free(*cookies, M_TEMP);
1827 		*cookies = NULL;
1828 		*ncookies = 0;
1829 	}
1830 	return (error);
1831 }
1832 
1833 /*
1834  * Get the requested file attributes and place them in the provided
1835  * vattr structure.
1836  *
1837  *	IN:	vp	- vnode of file.
1838  *		vap	- va_mask identifies requested attributes.
1839  *			  If AT_XVATTR set, then optional attrs are requested
1840  *		flags	- ATTR_NOACLCHECK (CIFS server context)
1841  *		cr	- credentials of caller.
1842  *
1843  *	OUT:	vap	- attribute values.
1844  *
1845  *	RETURN:	0 (always succeeds).
1846  */
1847 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)1848 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1849 {
1850 	znode_t *zp = VTOZ(vp);
1851 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1852 	int	error = 0;
1853 	uint32_t blksize;
1854 	u_longlong_t nblocks;
1855 	uint64_t mtime[2], ctime[2], crtime[2], rdev;
1856 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
1857 	xoptattr_t *xoap = NULL;
1858 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1859 	sa_bulk_attr_t bulk[4];
1860 	int count = 0;
1861 
1862 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1863 		return (error);
1864 
1865 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1866 
1867 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1868 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1869 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1870 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1871 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1872 		    &rdev, 8);
1873 
1874 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1875 		zfs_exit(zfsvfs, FTAG);
1876 		return (error);
1877 	}
1878 
1879 	/*
1880 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
1881 	 * Also, if we are the owner don't bother, since owner should
1882 	 * always be allowed to read basic attributes of file.
1883 	 */
1884 	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
1885 	    (vap->va_uid != crgetuid(cr))) {
1886 		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
1887 		    skipaclchk, cr, NULL))) {
1888 			zfs_exit(zfsvfs, FTAG);
1889 			return (error);
1890 		}
1891 	}
1892 
1893 	/*
1894 	 * Return all attributes.  It's cheaper to provide the answer
1895 	 * than to determine whether we were asked the question.
1896 	 */
1897 
1898 	vap->va_type = IFTOVT(zp->z_mode);
1899 	vap->va_mode = zp->z_mode & ~S_IFMT;
1900 	vn_fsid(vp, vap);
1901 	vap->va_nodeid = zp->z_id;
1902 	vap->va_nlink = zp->z_links;
1903 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
1904 	    zp->z_links < ZFS_LINK_MAX)
1905 		vap->va_nlink++;
1906 	vap->va_size = zp->z_size;
1907 	if (vp->v_type == VBLK || vp->v_type == VCHR)
1908 		vap->va_rdev = zfs_cmpldev(rdev);
1909 	else
1910 		vap->va_rdev = 0;
1911 	vap->va_gen = zp->z_gen;
1912 	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
1913 	vap->va_filerev = zp->z_seq;
1914 
1915 	/*
1916 	 * Add in any requested optional attributes and the create time.
1917 	 * Also set the corresponding bits in the returned attribute bitmap.
1918 	 */
1919 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
1920 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
1921 			xoap->xoa_archive =
1922 			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
1923 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
1924 		}
1925 
1926 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
1927 			xoap->xoa_readonly =
1928 			    ((zp->z_pflags & ZFS_READONLY) != 0);
1929 			XVA_SET_RTN(xvap, XAT_READONLY);
1930 		}
1931 
1932 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
1933 			xoap->xoa_system =
1934 			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
1935 			XVA_SET_RTN(xvap, XAT_SYSTEM);
1936 		}
1937 
1938 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
1939 			xoap->xoa_hidden =
1940 			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
1941 			XVA_SET_RTN(xvap, XAT_HIDDEN);
1942 		}
1943 
1944 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
1945 			xoap->xoa_nounlink =
1946 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
1947 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
1948 		}
1949 
1950 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
1951 			xoap->xoa_immutable =
1952 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
1953 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
1954 		}
1955 
1956 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
1957 			xoap->xoa_appendonly =
1958 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
1959 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
1960 		}
1961 
1962 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
1963 			xoap->xoa_nodump =
1964 			    ((zp->z_pflags & ZFS_NODUMP) != 0);
1965 			XVA_SET_RTN(xvap, XAT_NODUMP);
1966 		}
1967 
1968 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1969 			xoap->xoa_opaque =
1970 			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
1971 			XVA_SET_RTN(xvap, XAT_OPAQUE);
1972 		}
1973 
1974 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1975 			xoap->xoa_av_quarantined =
1976 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
1977 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1978 		}
1979 
1980 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1981 			xoap->xoa_av_modified =
1982 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
1983 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1984 		}
1985 
1986 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
1987 		    vp->v_type == VREG) {
1988 			zfs_sa_get_scanstamp(zp, xvap);
1989 		}
1990 
1991 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1992 			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
1993 			XVA_SET_RTN(xvap, XAT_REPARSE);
1994 		}
1995 		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
1996 			xoap->xoa_generation = zp->z_gen;
1997 			XVA_SET_RTN(xvap, XAT_GEN);
1998 		}
1999 
2000 		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2001 			xoap->xoa_offline =
2002 			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
2003 			XVA_SET_RTN(xvap, XAT_OFFLINE);
2004 		}
2005 
2006 		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2007 			xoap->xoa_sparse =
2008 			    ((zp->z_pflags & ZFS_SPARSE) != 0);
2009 			XVA_SET_RTN(xvap, XAT_SPARSE);
2010 		}
2011 
2012 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2013 			xoap->xoa_projinherit =
2014 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2015 			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2016 		}
2017 
2018 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2019 			xoap->xoa_projid = zp->z_projid;
2020 			XVA_SET_RTN(xvap, XAT_PROJID);
2021 		}
2022 	}
2023 
2024 	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2025 	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2026 	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2027 	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2028 
2029 
2030 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2031 	vap->va_blksize = blksize;
2032 	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
2033 
2034 	if (zp->z_blksz == 0) {
2035 		/*
2036 		 * Block size hasn't been set; suggest maximal I/O transfers.
2037 		 */
2038 		vap->va_blksize = zfsvfs->z_max_blksz;
2039 	}
2040 
2041 	zfs_exit(zfsvfs, FTAG);
2042 	return (0);
2043 }
2044 
2045 /*
2046  * Set the file attributes to the values contained in the
2047  * vattr structure.
2048  *
2049  *	IN:	zp	- znode of file to be modified.
2050  *		vap	- new attribute values.
2051  *			  If AT_XVATTR set, then optional attrs are being set
2052  *		flags	- ATTR_UTIME set if non-default time values provided.
2053  *			- ATTR_NOACLCHECK (CIFS context only).
2054  *		cr	- credentials of caller.
2055  *		mnt_ns	- Unused on FreeBSD
2056  *
2057  *	RETURN:	0 on success, error code on failure.
2058  *
2059  * Timestamps:
2060  *	vp - ctime updated, mtime updated if size changed.
2061  */
2062 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr,zidmap_t * mnt_ns)2063 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2064 {
2065 	vnode_t		*vp = ZTOV(zp);
2066 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
2067 	objset_t	*os;
2068 	zilog_t		*zilog;
2069 	dmu_tx_t	*tx;
2070 	vattr_t		oldva;
2071 	xvattr_t	tmpxvattr;
2072 	uint_t		mask = vap->va_mask;
2073 	uint_t		saved_mask = 0;
2074 	uint64_t	saved_mode;
2075 	int		trim_mask = 0;
2076 	uint64_t	new_mode;
2077 	uint64_t	new_uid, new_gid;
2078 	uint64_t	xattr_obj;
2079 	uint64_t	mtime[2], ctime[2];
2080 	uint64_t	projid = ZFS_INVALID_PROJID;
2081 	znode_t		*attrzp;
2082 	int		need_policy = FALSE;
2083 	int		err, err2;
2084 	zfs_fuid_info_t *fuidp = NULL;
2085 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
2086 	xoptattr_t	*xoap;
2087 	zfs_acl_t	*aclp;
2088 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2089 	boolean_t	fuid_dirtied = B_FALSE;
2090 	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
2091 	int		count = 0, xattr_count = 0;
2092 
2093 	if (mask == 0)
2094 		return (0);
2095 
2096 	if (mask & AT_NOSET)
2097 		return (SET_ERROR(EINVAL));
2098 
2099 	if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2100 		return (err);
2101 
2102 	os = zfsvfs->z_os;
2103 	zilog = zfsvfs->z_log;
2104 
2105 	/*
2106 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
2107 	 * that file system is at proper version level
2108 	 */
2109 
2110 	if (zfsvfs->z_use_fuids == B_FALSE &&
2111 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2112 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2113 	    (mask & AT_XVATTR))) {
2114 		zfs_exit(zfsvfs, FTAG);
2115 		return (SET_ERROR(EINVAL));
2116 	}
2117 
2118 	if (mask & AT_SIZE && vp->v_type == VDIR) {
2119 		zfs_exit(zfsvfs, FTAG);
2120 		return (SET_ERROR(EISDIR));
2121 	}
2122 
2123 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2124 		zfs_exit(zfsvfs, FTAG);
2125 		return (SET_ERROR(EINVAL));
2126 	}
2127 
2128 	/*
2129 	 * If this is an xvattr_t, then get a pointer to the structure of
2130 	 * optional attributes.  If this is NULL, then we have a vattr_t.
2131 	 */
2132 	xoap = xva_getxoptattr(xvap);
2133 
2134 	xva_init(&tmpxvattr);
2135 
2136 	/*
2137 	 * Immutable files can only alter immutable bit and atime
2138 	 */
2139 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2140 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2141 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2142 		zfs_exit(zfsvfs, FTAG);
2143 		return (SET_ERROR(EPERM));
2144 	}
2145 
2146 	/*
2147 	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2148 	 */
2149 
2150 	/*
2151 	 * Verify timestamps doesn't overflow 32 bits.
2152 	 * ZFS can handle large timestamps, but 32bit syscalls can't
2153 	 * handle times greater than 2039.  This check should be removed
2154 	 * once large timestamps are fully supported.
2155 	 */
2156 	if (mask & (AT_ATIME | AT_MTIME)) {
2157 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2158 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2159 			zfs_exit(zfsvfs, FTAG);
2160 			return (SET_ERROR(EOVERFLOW));
2161 		}
2162 	}
2163 	if (xoap != NULL && (mask & AT_XVATTR)) {
2164 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2165 		    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2166 			zfs_exit(zfsvfs, FTAG);
2167 			return (SET_ERROR(EOVERFLOW));
2168 		}
2169 
2170 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2171 			if (!dmu_objset_projectquota_enabled(os) ||
2172 			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2173 				zfs_exit(zfsvfs, FTAG);
2174 				return (SET_ERROR(EOPNOTSUPP));
2175 			}
2176 
2177 			projid = xoap->xoa_projid;
2178 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
2179 				zfs_exit(zfsvfs, FTAG);
2180 				return (SET_ERROR(EINVAL));
2181 			}
2182 
2183 			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2184 				projid = ZFS_INVALID_PROJID;
2185 			else
2186 				need_policy = TRUE;
2187 		}
2188 
2189 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2190 		    (xoap->xoa_projinherit !=
2191 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2192 		    (!dmu_objset_projectquota_enabled(os) ||
2193 		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2194 			zfs_exit(zfsvfs, FTAG);
2195 			return (SET_ERROR(EOPNOTSUPP));
2196 		}
2197 	}
2198 
2199 	attrzp = NULL;
2200 	aclp = NULL;
2201 
2202 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2203 		zfs_exit(zfsvfs, FTAG);
2204 		return (SET_ERROR(EROFS));
2205 	}
2206 
2207 	/*
2208 	 * First validate permissions
2209 	 */
2210 
2211 	if (mask & AT_SIZE) {
2212 		/*
2213 		 * XXX - Note, we are not providing any open
2214 		 * mode flags here (like FNDELAY), so we may
2215 		 * block if there are locks present... this
2216 		 * should be addressed in openat().
2217 		 */
2218 		/* XXX - would it be OK to generate a log record here? */
2219 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2220 		if (err) {
2221 			zfs_exit(zfsvfs, FTAG);
2222 			return (err);
2223 		}
2224 	}
2225 
2226 	if (mask & (AT_ATIME|AT_MTIME) ||
2227 	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2228 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2229 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2230 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2231 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2232 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2233 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2234 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2235 		    skipaclchk, cr, mnt_ns);
2236 	}
2237 
2238 	if (mask & (AT_UID|AT_GID)) {
2239 		int	idmask = (mask & (AT_UID|AT_GID));
2240 		int	take_owner;
2241 		int	take_group;
2242 
2243 		/*
2244 		 * NOTE: even if a new mode is being set,
2245 		 * we may clear S_ISUID/S_ISGID bits.
2246 		 */
2247 
2248 		if (!(mask & AT_MODE))
2249 			vap->va_mode = zp->z_mode;
2250 
2251 		/*
2252 		 * Take ownership or chgrp to group we are a member of
2253 		 */
2254 
2255 		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2256 		take_group = (mask & AT_GID) &&
2257 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
2258 
2259 		/*
2260 		 * If both AT_UID and AT_GID are set then take_owner and
2261 		 * take_group must both be set in order to allow taking
2262 		 * ownership.
2263 		 *
2264 		 * Otherwise, send the check through secpolicy_vnode_setattr()
2265 		 *
2266 		 */
2267 
2268 		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2269 		    ((idmask == AT_UID) && take_owner) ||
2270 		    ((idmask == AT_GID) && take_group)) {
2271 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2272 			    skipaclchk, cr, mnt_ns) == 0) {
2273 				/*
2274 				 * Remove setuid/setgid for non-privileged users
2275 				 */
2276 				secpolicy_setid_clear(vap, vp, cr);
2277 				trim_mask = (mask & (AT_UID|AT_GID));
2278 			} else {
2279 				need_policy =  TRUE;
2280 			}
2281 		} else {
2282 			need_policy =  TRUE;
2283 		}
2284 	}
2285 
2286 	oldva.va_mode = zp->z_mode;
2287 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2288 	if (mask & AT_XVATTR) {
2289 		/*
2290 		 * Update xvattr mask to include only those attributes
2291 		 * that are actually changing.
2292 		 *
2293 		 * the bits will be restored prior to actually setting
2294 		 * the attributes so the caller thinks they were set.
2295 		 */
2296 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2297 			if (xoap->xoa_appendonly !=
2298 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2299 				need_policy = TRUE;
2300 			} else {
2301 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2302 				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2303 			}
2304 		}
2305 
2306 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2307 			if (xoap->xoa_projinherit !=
2308 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2309 				need_policy = TRUE;
2310 			} else {
2311 				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2312 				XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2313 			}
2314 		}
2315 
2316 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2317 			if (xoap->xoa_nounlink !=
2318 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2319 				need_policy = TRUE;
2320 			} else {
2321 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2322 				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2323 			}
2324 		}
2325 
2326 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2327 			if (xoap->xoa_immutable !=
2328 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2329 				need_policy = TRUE;
2330 			} else {
2331 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2332 				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2333 			}
2334 		}
2335 
2336 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2337 			if (xoap->xoa_nodump !=
2338 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2339 				need_policy = TRUE;
2340 			} else {
2341 				XVA_CLR_REQ(xvap, XAT_NODUMP);
2342 				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2343 			}
2344 		}
2345 
2346 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2347 			if (xoap->xoa_av_modified !=
2348 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2349 				need_policy = TRUE;
2350 			} else {
2351 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2352 				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2353 			}
2354 		}
2355 
2356 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2357 			if ((vp->v_type != VREG &&
2358 			    xoap->xoa_av_quarantined) ||
2359 			    xoap->xoa_av_quarantined !=
2360 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2361 				need_policy = TRUE;
2362 			} else {
2363 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2364 				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2365 			}
2366 		}
2367 
2368 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2369 			zfs_exit(zfsvfs, FTAG);
2370 			return (SET_ERROR(EPERM));
2371 		}
2372 
2373 		if (need_policy == FALSE &&
2374 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2375 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2376 			need_policy = TRUE;
2377 		}
2378 	}
2379 
2380 	if (mask & AT_MODE) {
2381 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2382 		    mnt_ns) == 0) {
2383 			err = secpolicy_setid_setsticky_clear(vp, vap,
2384 			    &oldva, cr);
2385 			if (err) {
2386 				zfs_exit(zfsvfs, FTAG);
2387 				return (err);
2388 			}
2389 			trim_mask |= AT_MODE;
2390 		} else {
2391 			need_policy = TRUE;
2392 		}
2393 	}
2394 
2395 	if (need_policy) {
2396 		/*
2397 		 * If trim_mask is set then take ownership
2398 		 * has been granted or write_acl is present and user
2399 		 * has the ability to modify mode.  In that case remove
2400 		 * UID|GID and or MODE from mask so that
2401 		 * secpolicy_vnode_setattr() doesn't revoke it.
2402 		 */
2403 
2404 		if (trim_mask) {
2405 			saved_mask = vap->va_mask;
2406 			vap->va_mask &= ~trim_mask;
2407 			if (trim_mask & AT_MODE) {
2408 				/*
2409 				 * Save the mode, as secpolicy_vnode_setattr()
2410 				 * will overwrite it with ova.va_mode.
2411 				 */
2412 				saved_mode = vap->va_mode;
2413 			}
2414 		}
2415 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2416 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2417 		if (err) {
2418 			zfs_exit(zfsvfs, FTAG);
2419 			return (err);
2420 		}
2421 
2422 		if (trim_mask) {
2423 			vap->va_mask |= saved_mask;
2424 			if (trim_mask & AT_MODE) {
2425 				/*
2426 				 * Recover the mode after
2427 				 * secpolicy_vnode_setattr().
2428 				 */
2429 				vap->va_mode = saved_mode;
2430 			}
2431 		}
2432 	}
2433 
2434 	/*
2435 	 * secpolicy_vnode_setattr, or take ownership may have
2436 	 * changed va_mask
2437 	 */
2438 	mask = vap->va_mask;
2439 
2440 	if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2441 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2442 		    &xattr_obj, sizeof (xattr_obj));
2443 
2444 		if (err == 0 && xattr_obj) {
2445 			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2446 			if (err == 0) {
2447 				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2448 				if (err != 0)
2449 					vrele(ZTOV(attrzp));
2450 			}
2451 			if (err)
2452 				goto out2;
2453 		}
2454 		if (mask & AT_UID) {
2455 			new_uid = zfs_fuid_create(zfsvfs,
2456 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2457 			if (new_uid != zp->z_uid &&
2458 			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2459 			    new_uid)) {
2460 				if (attrzp)
2461 					vput(ZTOV(attrzp));
2462 				err = SET_ERROR(EDQUOT);
2463 				goto out2;
2464 			}
2465 		}
2466 
2467 		if (mask & AT_GID) {
2468 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2469 			    cr, ZFS_GROUP, &fuidp);
2470 			if (new_gid != zp->z_gid &&
2471 			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2472 			    new_gid)) {
2473 				if (attrzp)
2474 					vput(ZTOV(attrzp));
2475 				err = SET_ERROR(EDQUOT);
2476 				goto out2;
2477 			}
2478 		}
2479 
2480 		if (projid != ZFS_INVALID_PROJID &&
2481 		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2482 			if (attrzp)
2483 				vput(ZTOV(attrzp));
2484 			err = SET_ERROR(EDQUOT);
2485 			goto out2;
2486 		}
2487 	}
2488 	tx = dmu_tx_create(os);
2489 
2490 	if (mask & AT_MODE) {
2491 		uint64_t pmode = zp->z_mode;
2492 		uint64_t acl_obj;
2493 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2494 
2495 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2496 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2497 			err = SET_ERROR(EPERM);
2498 			goto out;
2499 		}
2500 
2501 		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2502 			goto out;
2503 
2504 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2505 			/*
2506 			 * Are we upgrading ACL from old V0 format
2507 			 * to V1 format?
2508 			 */
2509 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2510 			    zfs_znode_acl_version(zp) ==
2511 			    ZFS_ACL_VERSION_INITIAL) {
2512 				dmu_tx_hold_free(tx, acl_obj, 0,
2513 				    DMU_OBJECT_END);
2514 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2515 				    0, aclp->z_acl_bytes);
2516 			} else {
2517 				dmu_tx_hold_write(tx, acl_obj, 0,
2518 				    aclp->z_acl_bytes);
2519 			}
2520 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2521 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2522 			    0, aclp->z_acl_bytes);
2523 		}
2524 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2525 	} else {
2526 		if (((mask & AT_XVATTR) &&
2527 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2528 		    (projid != ZFS_INVALID_PROJID &&
2529 		    !(zp->z_pflags & ZFS_PROJID)))
2530 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2531 		else
2532 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2533 	}
2534 
2535 	if (attrzp) {
2536 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2537 	}
2538 
2539 	fuid_dirtied = zfsvfs->z_fuid_dirty;
2540 	if (fuid_dirtied)
2541 		zfs_fuid_txhold(zfsvfs, tx);
2542 
2543 	zfs_sa_upgrade_txholds(tx, zp);
2544 
2545 	err = dmu_tx_assign(tx, TXG_WAIT);
2546 	if (err)
2547 		goto out;
2548 
2549 	count = 0;
2550 	/*
2551 	 * Set each attribute requested.
2552 	 * We group settings according to the locks they need to acquire.
2553 	 *
2554 	 * Note: you cannot set ctime directly, although it will be
2555 	 * updated as a side-effect of calling this function.
2556 	 */
2557 
2558 	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2559 		/*
2560 		 * For the existed object that is upgraded from old system,
2561 		 * its on-disk layout has no slot for the project ID attribute.
2562 		 * But quota accounting logic needs to access related slots by
2563 		 * offset directly. So we need to adjust old objects' layout
2564 		 * to make the project ID to some unified and fixed offset.
2565 		 */
2566 		if (attrzp)
2567 			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2568 		if (err == 0)
2569 			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2570 
2571 		if (unlikely(err == EEXIST))
2572 			err = 0;
2573 		else if (err != 0)
2574 			goto out;
2575 		else
2576 			projid = ZFS_INVALID_PROJID;
2577 	}
2578 
2579 	if (mask & (AT_UID|AT_GID|AT_MODE))
2580 		mutex_enter(&zp->z_acl_lock);
2581 
2582 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2583 	    &zp->z_pflags, sizeof (zp->z_pflags));
2584 
2585 	if (attrzp) {
2586 		if (mask & (AT_UID|AT_GID|AT_MODE))
2587 			mutex_enter(&attrzp->z_acl_lock);
2588 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2589 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2590 		    sizeof (attrzp->z_pflags));
2591 		if (projid != ZFS_INVALID_PROJID) {
2592 			attrzp->z_projid = projid;
2593 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2594 			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2595 			    sizeof (attrzp->z_projid));
2596 		}
2597 	}
2598 
2599 	if (mask & (AT_UID|AT_GID)) {
2600 
2601 		if (mask & AT_UID) {
2602 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2603 			    &new_uid, sizeof (new_uid));
2604 			zp->z_uid = new_uid;
2605 			if (attrzp) {
2606 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2607 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2608 				    sizeof (new_uid));
2609 				attrzp->z_uid = new_uid;
2610 			}
2611 		}
2612 
2613 		if (mask & AT_GID) {
2614 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2615 			    NULL, &new_gid, sizeof (new_gid));
2616 			zp->z_gid = new_gid;
2617 			if (attrzp) {
2618 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2619 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2620 				    sizeof (new_gid));
2621 				attrzp->z_gid = new_gid;
2622 			}
2623 		}
2624 		if (!(mask & AT_MODE)) {
2625 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2626 			    NULL, &new_mode, sizeof (new_mode));
2627 			new_mode = zp->z_mode;
2628 		}
2629 		err = zfs_acl_chown_setattr(zp);
2630 		ASSERT0(err);
2631 		if (attrzp) {
2632 			vn_seqc_write_begin(ZTOV(attrzp));
2633 			err = zfs_acl_chown_setattr(attrzp);
2634 			vn_seqc_write_end(ZTOV(attrzp));
2635 			ASSERT0(err);
2636 		}
2637 	}
2638 
2639 	if (mask & AT_MODE) {
2640 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2641 		    &new_mode, sizeof (new_mode));
2642 		zp->z_mode = new_mode;
2643 		ASSERT3P(aclp, !=, NULL);
2644 		err = zfs_aclset_common(zp, aclp, cr, tx);
2645 		ASSERT0(err);
2646 		if (zp->z_acl_cached)
2647 			zfs_acl_free(zp->z_acl_cached);
2648 		zp->z_acl_cached = aclp;
2649 		aclp = NULL;
2650 	}
2651 
2652 
2653 	if (mask & AT_ATIME) {
2654 		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2655 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2656 		    &zp->z_atime, sizeof (zp->z_atime));
2657 	}
2658 
2659 	if (mask & AT_MTIME) {
2660 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2661 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2662 		    mtime, sizeof (mtime));
2663 	}
2664 
2665 	if (projid != ZFS_INVALID_PROJID) {
2666 		zp->z_projid = projid;
2667 		SA_ADD_BULK_ATTR(bulk, count,
2668 		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2669 		    sizeof (zp->z_projid));
2670 	}
2671 
2672 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2673 	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2674 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2675 		    NULL, mtime, sizeof (mtime));
2676 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2677 		    &ctime, sizeof (ctime));
2678 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2679 	} else if (mask != 0) {
2680 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2681 		    &ctime, sizeof (ctime));
2682 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2683 		if (attrzp) {
2684 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2685 			    SA_ZPL_CTIME(zfsvfs), NULL,
2686 			    &ctime, sizeof (ctime));
2687 			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2688 			    mtime, ctime);
2689 		}
2690 	}
2691 
2692 	/*
2693 	 * Do this after setting timestamps to prevent timestamp
2694 	 * update from toggling bit
2695 	 */
2696 
2697 	if (xoap && (mask & AT_XVATTR)) {
2698 
2699 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2700 			xoap->xoa_createtime = vap->va_birthtime;
2701 		/*
2702 		 * restore trimmed off masks
2703 		 * so that return masks can be set for caller.
2704 		 */
2705 
2706 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2707 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
2708 		}
2709 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2710 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
2711 		}
2712 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2713 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2714 		}
2715 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2716 			XVA_SET_REQ(xvap, XAT_NODUMP);
2717 		}
2718 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2719 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2720 		}
2721 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2722 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2723 		}
2724 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2725 			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2726 		}
2727 
2728 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2729 			ASSERT3S(vp->v_type, ==, VREG);
2730 
2731 		zfs_xvattr_set(zp, xvap, tx);
2732 	}
2733 
2734 	if (fuid_dirtied)
2735 		zfs_fuid_sync(zfsvfs, tx);
2736 
2737 	if (mask != 0)
2738 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2739 
2740 	if (mask & (AT_UID|AT_GID|AT_MODE))
2741 		mutex_exit(&zp->z_acl_lock);
2742 
2743 	if (attrzp) {
2744 		if (mask & (AT_UID|AT_GID|AT_MODE))
2745 			mutex_exit(&attrzp->z_acl_lock);
2746 	}
2747 out:
2748 	if (err == 0 && attrzp) {
2749 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2750 		    xattr_count, tx);
2751 		ASSERT0(err2);
2752 	}
2753 
2754 	if (attrzp)
2755 		vput(ZTOV(attrzp));
2756 
2757 	if (aclp)
2758 		zfs_acl_free(aclp);
2759 
2760 	if (fuidp) {
2761 		zfs_fuid_info_free(fuidp);
2762 		fuidp = NULL;
2763 	}
2764 
2765 	if (err) {
2766 		dmu_tx_abort(tx);
2767 	} else {
2768 		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2769 		dmu_tx_commit(tx);
2770 	}
2771 
2772 out2:
2773 	if (os->os_sync == ZFS_SYNC_ALWAYS)
2774 		zil_commit(zilog, 0);
2775 
2776 	zfs_exit(zfsvfs, FTAG);
2777 	return (err);
2778 }
2779 
2780 /*
2781  * Look up the directory entries corresponding to the source and target
2782  * directory/name pairs.
2783  */
2784 static int
zfs_rename_relock_lookup(znode_t * sdzp,const struct componentname * scnp,znode_t ** szpp,znode_t * tdzp,const struct componentname * tcnp,znode_t ** tzpp)2785 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
2786     znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
2787     znode_t **tzpp)
2788 {
2789 	zfsvfs_t *zfsvfs;
2790 	znode_t *szp, *tzp;
2791 	int error;
2792 
2793 	/*
2794 	 * Before using sdzp and tdzp we must ensure that they are live.
2795 	 * As a porting legacy from illumos we have two things to worry
2796 	 * about.  One is typical for FreeBSD and it is that the vnode is
2797 	 * not reclaimed (doomed).  The other is that the znode is live.
2798 	 * The current code can invalidate the znode without acquiring the
2799 	 * corresponding vnode lock if the object represented by the znode
2800 	 * and vnode is no longer valid after a rollback or receive operation.
2801 	 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
2802 	 * that protects the znodes from the invalidation.
2803 	 */
2804 	zfsvfs = sdzp->z_zfsvfs;
2805 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
2806 	if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
2807 		return (error);
2808 	if ((error = zfs_verify_zp(tdzp)) != 0) {
2809 		zfs_exit(zfsvfs, FTAG);
2810 		return (error);
2811 	}
2812 
2813 	/*
2814 	 * Re-resolve svp to be certain it still exists and fetch the
2815 	 * correct vnode.
2816 	 */
2817 	error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
2818 	if (error != 0) {
2819 		/* Source entry invalid or not there. */
2820 		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
2821 		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
2822 			error = SET_ERROR(EINVAL);
2823 		goto out;
2824 	}
2825 	*szpp = szp;
2826 
2827 	/*
2828 	 * Re-resolve tvp, if it disappeared we just carry on.
2829 	 */
2830 	error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
2831 	if (error != 0) {
2832 		vrele(ZTOV(szp));
2833 		if ((tcnp->cn_flags & ISDOTDOT) != 0)
2834 			error = SET_ERROR(EINVAL);
2835 		goto out;
2836 	}
2837 	*tzpp = tzp;
2838 out:
2839 	zfs_exit(zfsvfs, FTAG);
2840 	return (error);
2841 }
2842 
2843 /*
2844  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
2845  * fail to acquire any lock in the path we will drop all held locks,
2846  * acquire the new lock in a blocking fashion, and then release it and
2847  * restart the rename.  This acquire/release step ensures that we do not
2848  * spin on a lock waiting for release.  On error release all vnode locks
2849  * and decrement references the way tmpfs_rename() would do.
2850  */
2851 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)2852 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
2853     struct vnode *tdvp, struct vnode **tvpp,
2854     const struct componentname *scnp, const struct componentname *tcnp)
2855 {
2856 	struct vnode	*nvp, *svp, *tvp;
2857 	znode_t		*sdzp, *tdzp, *szp, *tzp;
2858 	int		error;
2859 
2860 	VOP_UNLOCK(tdvp);
2861 	if (*tvpp != NULL && *tvpp != tdvp)
2862 		VOP_UNLOCK(*tvpp);
2863 
2864 relock:
2865 	error = vn_lock(sdvp, LK_EXCLUSIVE);
2866 	if (error)
2867 		goto out;
2868 	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
2869 	if (error != 0) {
2870 		VOP_UNLOCK(sdvp);
2871 		if (error != EBUSY)
2872 			goto out;
2873 		error = vn_lock(tdvp, LK_EXCLUSIVE);
2874 		if (error)
2875 			goto out;
2876 		VOP_UNLOCK(tdvp);
2877 		goto relock;
2878 	}
2879 	tdzp = VTOZ(tdvp);
2880 	sdzp = VTOZ(sdvp);
2881 
2882 	error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
2883 	if (error != 0) {
2884 		VOP_UNLOCK(sdvp);
2885 		VOP_UNLOCK(tdvp);
2886 		goto out;
2887 	}
2888 	svp = ZTOV(szp);
2889 	tvp = tzp != NULL ? ZTOV(tzp) : NULL;
2890 
2891 	/*
2892 	 * Now try acquire locks on svp and tvp.
2893 	 */
2894 	nvp = svp;
2895 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2896 	if (error != 0) {
2897 		VOP_UNLOCK(sdvp);
2898 		VOP_UNLOCK(tdvp);
2899 		if (tvp != NULL)
2900 			vrele(tvp);
2901 		if (error != EBUSY) {
2902 			vrele(nvp);
2903 			goto out;
2904 		}
2905 		error = vn_lock(nvp, LK_EXCLUSIVE);
2906 		if (error != 0) {
2907 			vrele(nvp);
2908 			goto out;
2909 		}
2910 		VOP_UNLOCK(nvp);
2911 		/*
2912 		 * Concurrent rename race.
2913 		 * XXX ?
2914 		 */
2915 		if (nvp == tdvp) {
2916 			vrele(nvp);
2917 			error = SET_ERROR(EINVAL);
2918 			goto out;
2919 		}
2920 		vrele(*svpp);
2921 		*svpp = nvp;
2922 		goto relock;
2923 	}
2924 	vrele(*svpp);
2925 	*svpp = nvp;
2926 
2927 	if (*tvpp != NULL)
2928 		vrele(*tvpp);
2929 	*tvpp = NULL;
2930 	if (tvp != NULL) {
2931 		nvp = tvp;
2932 		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
2933 		if (error != 0) {
2934 			VOP_UNLOCK(sdvp);
2935 			VOP_UNLOCK(tdvp);
2936 			VOP_UNLOCK(*svpp);
2937 			if (error != EBUSY) {
2938 				vrele(nvp);
2939 				goto out;
2940 			}
2941 			error = vn_lock(nvp, LK_EXCLUSIVE);
2942 			if (error != 0) {
2943 				vrele(nvp);
2944 				goto out;
2945 			}
2946 			vput(nvp);
2947 			goto relock;
2948 		}
2949 		*tvpp = nvp;
2950 	}
2951 
2952 	return (0);
2953 
2954 out:
2955 	return (error);
2956 }
2957 
2958 /*
2959  * Note that we must use VRELE_ASYNC in this function as it walks
2960  * up the directory tree and vrele may need to acquire an exclusive
2961  * lock if a last reference to a vnode is dropped.
2962  */
2963 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)2964 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
2965 {
2966 	zfsvfs_t	*zfsvfs;
2967 	znode_t		*zp, *zp1;
2968 	uint64_t	parent;
2969 	int		error;
2970 
2971 	zfsvfs = tdzp->z_zfsvfs;
2972 	if (tdzp == szp)
2973 		return (SET_ERROR(EINVAL));
2974 	if (tdzp == sdzp)
2975 		return (0);
2976 	if (tdzp->z_id == zfsvfs->z_root)
2977 		return (0);
2978 	zp = tdzp;
2979 	for (;;) {
2980 		ASSERT(!zp->z_unlinked);
2981 		if ((error = sa_lookup(zp->z_sa_hdl,
2982 		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
2983 			break;
2984 
2985 		if (parent == szp->z_id) {
2986 			error = SET_ERROR(EINVAL);
2987 			break;
2988 		}
2989 		if (parent == zfsvfs->z_root)
2990 			break;
2991 		if (parent == sdzp->z_id)
2992 			break;
2993 
2994 		error = zfs_zget(zfsvfs, parent, &zp1);
2995 		if (error != 0)
2996 			break;
2997 
2998 		if (zp != tdzp)
2999 			VN_RELE_ASYNC(ZTOV(zp),
3000 			    dsl_pool_zrele_taskq(
3001 			    dmu_objset_pool(zfsvfs->z_os)));
3002 		zp = zp1;
3003 	}
3004 
3005 	if (error == ENOTDIR)
3006 		panic("checkpath: .. not a directory\n");
3007 	if (zp != tdzp)
3008 		VN_RELE_ASYNC(ZTOV(zp),
3009 		    dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3010 	return (error);
3011 }
3012 
3013 static int
3014 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3015     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3016     cred_t *cr);
3017 
3018 /*
3019  * Move an entry from the provided source directory to the target
3020  * directory.  Change the entry name as indicated.
3021  *
3022  *	IN:	sdvp	- Source directory containing the "old entry".
3023  *		scnp	- Old entry name.
3024  *		tdvp	- Target directory to contain the "new entry".
3025  *		tcnp	- New entry name.
3026  *		cr	- credentials of caller.
3027  *	INOUT:	svpp	- Source file
3028  *		tvpp	- Target file, may point to NULL initially
3029  *
3030  *	RETURN:	0 on success, error code on failure.
3031  *
3032  * Timestamps:
3033  *	sdvp,tdvp - ctime|mtime updated
3034  */
3035 static int
zfs_do_rename(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3036 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3037     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3038     cred_t *cr)
3039 {
3040 	int	error;
3041 
3042 	ASSERT_VOP_ELOCKED(tdvp, __func__);
3043 	if (*tvpp != NULL)
3044 		ASSERT_VOP_ELOCKED(*tvpp, __func__);
3045 
3046 	/* Reject renames across filesystems. */
3047 	if ((*svpp)->v_mount != tdvp->v_mount ||
3048 	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3049 		error = SET_ERROR(EXDEV);
3050 		goto out;
3051 	}
3052 
3053 	if (zfsctl_is_node(tdvp)) {
3054 		error = SET_ERROR(EXDEV);
3055 		goto out;
3056 	}
3057 
3058 	/*
3059 	 * Lock all four vnodes to ensure safety and semantics of renaming.
3060 	 */
3061 	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3062 	if (error != 0) {
3063 		/* no vnodes are locked in the case of error here */
3064 		return (error);
3065 	}
3066 
3067 	error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3068 	VOP_UNLOCK(sdvp);
3069 	VOP_UNLOCK(*svpp);
3070 out:
3071 	if (*tvpp != NULL)
3072 		VOP_UNLOCK(*tvpp);
3073 	if (tdvp != *tvpp)
3074 		VOP_UNLOCK(tdvp);
3075 
3076 	return (error);
3077 }
3078 
3079 static int
zfs_do_rename_impl(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3080 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3081     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3082     cred_t *cr)
3083 {
3084 	dmu_tx_t	*tx;
3085 	zfsvfs_t	*zfsvfs;
3086 	zilog_t		*zilog;
3087 	znode_t		*tdzp, *sdzp, *tzp, *szp;
3088 	const char	*snm = scnp->cn_nameptr;
3089 	const char	*tnm = tcnp->cn_nameptr;
3090 	int		error;
3091 
3092 	tdzp = VTOZ(tdvp);
3093 	sdzp = VTOZ(sdvp);
3094 	zfsvfs = tdzp->z_zfsvfs;
3095 
3096 	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3097 		return (error);
3098 	if ((error = zfs_verify_zp(sdzp)) != 0) {
3099 		zfs_exit(zfsvfs, FTAG);
3100 		return (error);
3101 	}
3102 	zilog = zfsvfs->z_log;
3103 
3104 	if (zfsvfs->z_utf8 && u8_validate(tnm,
3105 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3106 		error = SET_ERROR(EILSEQ);
3107 		goto out;
3108 	}
3109 
3110 	/* If source and target are the same file, there is nothing to do. */
3111 	if ((*svpp) == (*tvpp)) {
3112 		error = 0;
3113 		goto out;
3114 	}
3115 
3116 	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3117 	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3118 	    (*tvpp)->v_mountedhere != NULL)) {
3119 		error = SET_ERROR(EXDEV);
3120 		goto out;
3121 	}
3122 
3123 	szp = VTOZ(*svpp);
3124 	if ((error = zfs_verify_zp(szp)) != 0) {
3125 		zfs_exit(zfsvfs, FTAG);
3126 		return (error);
3127 	}
3128 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3129 	if (tzp != NULL) {
3130 		if ((error = zfs_verify_zp(tzp)) != 0) {
3131 			zfs_exit(zfsvfs, FTAG);
3132 			return (error);
3133 		}
3134 	}
3135 
3136 	/*
3137 	 * This is to prevent the creation of links into attribute space
3138 	 * by renaming a linked file into/outof an attribute directory.
3139 	 * See the comment in zfs_link() for why this is considered bad.
3140 	 */
3141 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3142 		error = SET_ERROR(EINVAL);
3143 		goto out;
3144 	}
3145 
3146 	/*
3147 	 * If we are using project inheritance, means if the directory has
3148 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3149 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3150 	 * such case, we only allow renames into our tree when the project
3151 	 * IDs are the same.
3152 	 */
3153 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3154 	    tdzp->z_projid != szp->z_projid) {
3155 		error = SET_ERROR(EXDEV);
3156 		goto out;
3157 	}
3158 
3159 	/*
3160 	 * Must have write access at the source to remove the old entry
3161 	 * and write access at the target to create the new entry.
3162 	 * Note that if target and source are the same, this can be
3163 	 * done in a single check.
3164 	 */
3165 	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3166 		goto out;
3167 
3168 	if ((*svpp)->v_type == VDIR) {
3169 		/*
3170 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
3171 		 */
3172 		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3173 		    sdzp == szp ||
3174 		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3175 			error = EINVAL;
3176 			goto out;
3177 		}
3178 
3179 		/*
3180 		 * Check to make sure rename is valid.
3181 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3182 		 */
3183 		if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3184 			goto out;
3185 	}
3186 
3187 	/*
3188 	 * Does target exist?
3189 	 */
3190 	if (tzp) {
3191 		/*
3192 		 * Source and target must be the same type.
3193 		 */
3194 		if ((*svpp)->v_type == VDIR) {
3195 			if ((*tvpp)->v_type != VDIR) {
3196 				error = SET_ERROR(ENOTDIR);
3197 				goto out;
3198 			} else {
3199 				cache_purge(tdvp);
3200 				if (sdvp != tdvp)
3201 					cache_purge(sdvp);
3202 			}
3203 		} else {
3204 			if ((*tvpp)->v_type == VDIR) {
3205 				error = SET_ERROR(EISDIR);
3206 				goto out;
3207 			}
3208 		}
3209 	}
3210 
3211 	vn_seqc_write_begin(*svpp);
3212 	vn_seqc_write_begin(sdvp);
3213 	if (*tvpp != NULL)
3214 		vn_seqc_write_begin(*tvpp);
3215 	if (tdvp != *tvpp)
3216 		vn_seqc_write_begin(tdvp);
3217 
3218 	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3219 	if (tzp)
3220 		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3221 
3222 	/*
3223 	 * notify the target directory if it is not the same
3224 	 * as source directory.
3225 	 */
3226 	if (tdvp != sdvp) {
3227 		vnevent_rename_dest_dir(tdvp, ct);
3228 	}
3229 
3230 	tx = dmu_tx_create(zfsvfs->z_os);
3231 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3232 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3233 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3234 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3235 	if (sdzp != tdzp) {
3236 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3237 		zfs_sa_upgrade_txholds(tx, tdzp);
3238 	}
3239 	if (tzp) {
3240 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3241 		zfs_sa_upgrade_txholds(tx, tzp);
3242 	}
3243 
3244 	zfs_sa_upgrade_txholds(tx, szp);
3245 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3246 	error = dmu_tx_assign(tx, TXG_WAIT);
3247 	if (error) {
3248 		dmu_tx_abort(tx);
3249 		goto out_seq;
3250 	}
3251 
3252 	if (tzp)	/* Attempt to remove the existing target */
3253 		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3254 
3255 	if (error == 0) {
3256 		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3257 		if (error == 0) {
3258 			szp->z_pflags |= ZFS_AV_MODIFIED;
3259 
3260 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3261 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3262 			ASSERT0(error);
3263 
3264 			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3265 			    NULL);
3266 			if (error == 0) {
3267 				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3268 				    snm, tdzp, tnm, szp);
3269 			} else {
3270 				/*
3271 				 * At this point, we have successfully created
3272 				 * the target name, but have failed to remove
3273 				 * the source name.  Since the create was done
3274 				 * with the ZRENAMING flag, there are
3275 				 * complications; for one, the link count is
3276 				 * wrong.  The easiest way to deal with this
3277 				 * is to remove the newly created target, and
3278 				 * return the original error.  This must
3279 				 * succeed; fortunately, it is very unlikely to
3280 				 * fail, since we just created it.
3281 				 */
3282 				VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3283 				    ZRENAMING, NULL));
3284 			}
3285 		}
3286 		if (error == 0) {
3287 			cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3288 		}
3289 	}
3290 
3291 	dmu_tx_commit(tx);
3292 
3293 out_seq:
3294 	vn_seqc_write_end(*svpp);
3295 	vn_seqc_write_end(sdvp);
3296 	if (*tvpp != NULL)
3297 		vn_seqc_write_end(*tvpp);
3298 	if (tdvp != *tvpp)
3299 		vn_seqc_write_end(tdvp);
3300 
3301 out:
3302 	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3303 		zil_commit(zilog, 0);
3304 	zfs_exit(zfsvfs, FTAG);
3305 
3306 	return (error);
3307 }
3308 
3309 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags,uint64_t rflags,vattr_t * wo_vap,zidmap_t * mnt_ns)3310 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3311     cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3312 {
3313 	struct componentname scn, tcn;
3314 	vnode_t *sdvp, *tdvp;
3315 	vnode_t *svp, *tvp;
3316 	int error;
3317 	svp = tvp = NULL;
3318 
3319 	if (is_nametoolong(tdzp->z_zfsvfs, tname))
3320 		return (SET_ERROR(ENAMETOOLONG));
3321 
3322 	if (rflags != 0 || wo_vap != NULL)
3323 		return (SET_ERROR(EINVAL));
3324 
3325 	sdvp = ZTOV(sdzp);
3326 	tdvp = ZTOV(tdzp);
3327 	error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3328 	if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3329 		VOP_UNLOCK(sdvp);
3330 	if (error != 0)
3331 		goto fail;
3332 	VOP_UNLOCK(svp);
3333 
3334 	vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3335 	error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3336 	if (error == EJUSTRETURN)
3337 		tvp = NULL;
3338 	else if (error != 0) {
3339 		VOP_UNLOCK(tdvp);
3340 		goto fail;
3341 	}
3342 
3343 	error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3344 fail:
3345 	if (svp != NULL)
3346 		vrele(svp);
3347 	if (tvp != NULL)
3348 		vrele(tvp);
3349 
3350 	return (error);
3351 }
3352 
3353 /*
3354  * Insert the indicated symbolic reference entry into the directory.
3355  *
3356  *	IN:	dvp	- Directory to contain new symbolic link.
3357  *		link	- Name for new symlink entry.
3358  *		vap	- Attributes of new entry.
3359  *		cr	- credentials of caller.
3360  *		ct	- caller context
3361  *		flags	- case flags
3362  *		mnt_ns	- Unused on FreeBSD
3363  *
3364  *	RETURN:	0 on success, error code on failure.
3365  *
3366  * Timestamps:
3367  *	dvp - ctime|mtime updated
3368  */
3369 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags,zidmap_t * mnt_ns)3370 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3371     const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3372 {
3373 	(void) flags;
3374 	znode_t		*zp;
3375 	dmu_tx_t	*tx;
3376 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
3377 	zilog_t		*zilog;
3378 	uint64_t	len = strlen(link);
3379 	int		error;
3380 	zfs_acl_ids_t	acl_ids;
3381 	boolean_t	fuid_dirtied;
3382 	uint64_t	txtype = TX_SYMLINK;
3383 
3384 	ASSERT3S(vap->va_type, ==, VLNK);
3385 
3386 	if (is_nametoolong(zfsvfs, name))
3387 		return (SET_ERROR(ENAMETOOLONG));
3388 
3389 	if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3390 		return (error);
3391 	zilog = zfsvfs->z_log;
3392 
3393 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3394 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3395 		zfs_exit(zfsvfs, FTAG);
3396 		return (SET_ERROR(EILSEQ));
3397 	}
3398 
3399 	if (len > MAXPATHLEN) {
3400 		zfs_exit(zfsvfs, FTAG);
3401 		return (SET_ERROR(ENAMETOOLONG));
3402 	}
3403 
3404 	if ((error = zfs_acl_ids_create(dzp, 0,
3405 	    vap, cr, NULL, &acl_ids, NULL)) != 0) {
3406 		zfs_exit(zfsvfs, FTAG);
3407 		return (error);
3408 	}
3409 
3410 	/*
3411 	 * Attempt to lock directory; fail if entry already exists.
3412 	 */
3413 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3414 	if (error) {
3415 		zfs_acl_ids_free(&acl_ids);
3416 		zfs_exit(zfsvfs, FTAG);
3417 		return (error);
3418 	}
3419 
3420 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3421 		zfs_acl_ids_free(&acl_ids);
3422 		zfs_exit(zfsvfs, FTAG);
3423 		return (error);
3424 	}
3425 
3426 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
3427 	    0 /* projid */)) {
3428 		zfs_acl_ids_free(&acl_ids);
3429 		zfs_exit(zfsvfs, FTAG);
3430 		return (SET_ERROR(EDQUOT));
3431 	}
3432 
3433 	getnewvnode_reserve();
3434 	tx = dmu_tx_create(zfsvfs->z_os);
3435 	fuid_dirtied = zfsvfs->z_fuid_dirty;
3436 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3437 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3438 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3439 	    ZFS_SA_BASE_ATTR_SIZE + len);
3440 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3441 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3442 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3443 		    acl_ids.z_aclp->z_acl_bytes);
3444 	}
3445 	if (fuid_dirtied)
3446 		zfs_fuid_txhold(zfsvfs, tx);
3447 	error = dmu_tx_assign(tx, TXG_WAIT);
3448 	if (error) {
3449 		zfs_acl_ids_free(&acl_ids);
3450 		dmu_tx_abort(tx);
3451 		getnewvnode_drop_reserve();
3452 		zfs_exit(zfsvfs, FTAG);
3453 		return (error);
3454 	}
3455 
3456 	/*
3457 	 * Create a new object for the symlink.
3458 	 * for version 4 ZPL datasets the symlink will be an SA attribute
3459 	 */
3460 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3461 
3462 	if (fuid_dirtied)
3463 		zfs_fuid_sync(zfsvfs, tx);
3464 
3465 	if (zp->z_is_sa)
3466 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3467 		    __DECONST(void *, link), len, tx);
3468 	else
3469 		zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3470 
3471 	zp->z_size = len;
3472 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3473 	    &zp->z_size, sizeof (zp->z_size), tx);
3474 	/*
3475 	 * Insert the new object into the directory.
3476 	 */
3477 	error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3478 	if (error != 0) {
3479 		zfs_znode_delete(zp, tx);
3480 		VOP_UNLOCK(ZTOV(zp));
3481 		zrele(zp);
3482 	} else {
3483 		zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3484 	}
3485 
3486 	zfs_acl_ids_free(&acl_ids);
3487 
3488 	dmu_tx_commit(tx);
3489 
3490 	getnewvnode_drop_reserve();
3491 
3492 	if (error == 0) {
3493 		*zpp = zp;
3494 
3495 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3496 			zil_commit(zilog, 0);
3497 	}
3498 
3499 	zfs_exit(zfsvfs, FTAG);
3500 	return (error);
3501 }
3502 
3503 /*
3504  * Return, in the buffer contained in the provided uio structure,
3505  * the symbolic path referred to by vp.
3506  *
3507  *	IN:	vp	- vnode of symbolic link.
3508  *		uio	- structure to contain the link path.
3509  *		cr	- credentials of caller.
3510  *		ct	- caller context
3511  *
3512  *	OUT:	uio	- structure containing the link path.
3513  *
3514  *	RETURN:	0 on success, error code on failure.
3515  *
3516  * Timestamps:
3517  *	vp - atime updated
3518  */
3519 static int
zfs_readlink(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,caller_context_t * ct)3520 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3521 {
3522 	(void) cr, (void) ct;
3523 	znode_t		*zp = VTOZ(vp);
3524 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
3525 	int		error;
3526 
3527 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3528 		return (error);
3529 
3530 	if (zp->z_is_sa)
3531 		error = sa_lookup_uio(zp->z_sa_hdl,
3532 		    SA_ZPL_SYMLINK(zfsvfs), uio);
3533 	else
3534 		error = zfs_sa_readlink(zp, uio);
3535 
3536 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3537 
3538 	zfs_exit(zfsvfs, FTAG);
3539 	return (error);
3540 }
3541 
3542 /*
3543  * Insert a new entry into directory tdvp referencing svp.
3544  *
3545  *	IN:	tdvp	- Directory to contain new entry.
3546  *		svp	- vnode of new entry.
3547  *		name	- name of new entry.
3548  *		cr	- credentials of caller.
3549  *
3550  *	RETURN:	0 on success, error code on failure.
3551  *
3552  * Timestamps:
3553  *	tdvp - ctime|mtime updated
3554  *	 svp - ctime updated
3555  */
3556 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3557 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3558     int flags)
3559 {
3560 	(void) flags;
3561 	znode_t		*tzp;
3562 	zfsvfs_t	*zfsvfs = tdzp->z_zfsvfs;
3563 	zilog_t		*zilog;
3564 	dmu_tx_t	*tx;
3565 	int		error;
3566 	uint64_t	parent;
3567 	uid_t		owner;
3568 
3569 	ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3570 
3571 	if (is_nametoolong(zfsvfs, name))
3572 		return (SET_ERROR(ENAMETOOLONG));
3573 
3574 	if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3575 		return (error);
3576 	zilog = zfsvfs->z_log;
3577 
3578 	/*
3579 	 * POSIX dictates that we return EPERM here.
3580 	 * Better choices include ENOTSUP or EISDIR.
3581 	 */
3582 	if (ZTOV(szp)->v_type == VDIR) {
3583 		zfs_exit(zfsvfs, FTAG);
3584 		return (SET_ERROR(EPERM));
3585 	}
3586 
3587 	if ((error = zfs_verify_zp(szp)) != 0) {
3588 		zfs_exit(zfsvfs, FTAG);
3589 		return (error);
3590 	}
3591 
3592 	/*
3593 	 * If we are using project inheritance, means if the directory has
3594 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3595 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3596 	 * such case, we only allow hard link creation in our tree when the
3597 	 * project IDs are the same.
3598 	 */
3599 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3600 	    tdzp->z_projid != szp->z_projid) {
3601 		zfs_exit(zfsvfs, FTAG);
3602 		return (SET_ERROR(EXDEV));
3603 	}
3604 
3605 	if (szp->z_pflags & (ZFS_APPENDONLY |
3606 	    ZFS_IMMUTABLE | ZFS_READONLY)) {
3607 		zfs_exit(zfsvfs, FTAG);
3608 		return (SET_ERROR(EPERM));
3609 	}
3610 
3611 	/* Prevent links to .zfs/shares files */
3612 
3613 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3614 	    &parent, sizeof (uint64_t))) != 0) {
3615 		zfs_exit(zfsvfs, FTAG);
3616 		return (error);
3617 	}
3618 	if (parent == zfsvfs->z_shares_dir) {
3619 		zfs_exit(zfsvfs, FTAG);
3620 		return (SET_ERROR(EPERM));
3621 	}
3622 
3623 	if (zfsvfs->z_utf8 && u8_validate(name,
3624 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3625 		zfs_exit(zfsvfs, FTAG);
3626 		return (SET_ERROR(EILSEQ));
3627 	}
3628 
3629 	/*
3630 	 * We do not support links between attributes and non-attributes
3631 	 * because of the potential security risk of creating links
3632 	 * into "normal" file space in order to circumvent restrictions
3633 	 * imposed in attribute space.
3634 	 */
3635 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3636 		zfs_exit(zfsvfs, FTAG);
3637 		return (SET_ERROR(EINVAL));
3638 	}
3639 
3640 
3641 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3642 	if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3643 		zfs_exit(zfsvfs, FTAG);
3644 		return (SET_ERROR(EPERM));
3645 	}
3646 
3647 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3648 		zfs_exit(zfsvfs, FTAG);
3649 		return (error);
3650 	}
3651 
3652 	/*
3653 	 * Attempt to lock directory; fail if entry already exists.
3654 	 */
3655 	error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3656 	if (error) {
3657 		zfs_exit(zfsvfs, FTAG);
3658 		return (error);
3659 	}
3660 
3661 	tx = dmu_tx_create(zfsvfs->z_os);
3662 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3663 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3664 	zfs_sa_upgrade_txholds(tx, szp);
3665 	zfs_sa_upgrade_txholds(tx, tdzp);
3666 	error = dmu_tx_assign(tx, TXG_WAIT);
3667 	if (error) {
3668 		dmu_tx_abort(tx);
3669 		zfs_exit(zfsvfs, FTAG);
3670 		return (error);
3671 	}
3672 
3673 	error = zfs_link_create(tdzp, name, szp, tx, 0);
3674 
3675 	if (error == 0) {
3676 		uint64_t txtype = TX_LINK;
3677 		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3678 	}
3679 
3680 	dmu_tx_commit(tx);
3681 
3682 	if (error == 0) {
3683 		vnevent_link(ZTOV(szp), ct);
3684 	}
3685 
3686 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3687 		zil_commit(zilog, 0);
3688 
3689 	zfs_exit(zfsvfs, FTAG);
3690 	return (error);
3691 }
3692 
3693 /*
3694  * Free or allocate space in a file.  Currently, this function only
3695  * supports the `F_FREESP' command.  However, this command is somewhat
3696  * misnamed, as its functionality includes the ability to allocate as
3697  * well as free space.
3698  *
3699  *	IN:	ip	- inode of file to free data in.
3700  *		cmd	- action to take (only F_FREESP supported).
3701  *		bfp	- section of file to free/alloc.
3702  *		flag	- current file open mode flags.
3703  *		offset	- current file offset.
3704  *		cr	- credentials of caller.
3705  *
3706  *	RETURN:	0 on success, error code on failure.
3707  *
3708  * Timestamps:
3709  *	ip - ctime|mtime updated
3710  */
3711 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3712 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3713     offset_t offset, cred_t *cr)
3714 {
3715 	(void) offset;
3716 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
3717 	uint64_t	off, len;
3718 	int		error;
3719 
3720 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3721 		return (error);
3722 
3723 	if (cmd != F_FREESP) {
3724 		zfs_exit(zfsvfs, FTAG);
3725 		return (SET_ERROR(EINVAL));
3726 	}
3727 
3728 	/*
3729 	 * Callers might not be able to detect properly that we are read-only,
3730 	 * so check it explicitly here.
3731 	 */
3732 	if (zfs_is_readonly(zfsvfs)) {
3733 		zfs_exit(zfsvfs, FTAG);
3734 		return (SET_ERROR(EROFS));
3735 	}
3736 
3737 	if (bfp->l_len < 0) {
3738 		zfs_exit(zfsvfs, FTAG);
3739 		return (SET_ERROR(EINVAL));
3740 	}
3741 
3742 	/*
3743 	 * Permissions aren't checked on Solaris because on this OS
3744 	 * zfs_space() can only be called with an opened file handle.
3745 	 * On Linux we can get here through truncate_range() which
3746 	 * operates directly on inodes, so we need to check access rights.
3747 	 */
3748 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3749 		zfs_exit(zfsvfs, FTAG);
3750 		return (error);
3751 	}
3752 
3753 	off = bfp->l_start;
3754 	len = bfp->l_len; /* 0 means from off to end of file */
3755 
3756 	error = zfs_freesp(zp, off, len, flag, TRUE);
3757 
3758 	zfs_exit(zfsvfs, FTAG);
3759 	return (error);
3760 }
3761 
3762 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3763 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3764 {
3765 	(void) cr, (void) ct;
3766 	znode_t	*zp = VTOZ(vp);
3767 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3768 	int error;
3769 
3770 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
3771 	if (zp->z_sa_hdl == NULL) {
3772 		/*
3773 		 * The fs has been unmounted, or we did a
3774 		 * suspend/resume and this file no longer exists.
3775 		 */
3776 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3777 		vrecycle(vp);
3778 		return;
3779 	}
3780 
3781 	if (zp->z_unlinked) {
3782 		/*
3783 		 * Fast path to recycle a vnode of a removed file.
3784 		 */
3785 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3786 		vrecycle(vp);
3787 		return;
3788 	}
3789 
3790 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
3791 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
3792 
3793 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3794 		zfs_sa_upgrade_txholds(tx, zp);
3795 		error = dmu_tx_assign(tx, TXG_WAIT);
3796 		if (error) {
3797 			dmu_tx_abort(tx);
3798 		} else {
3799 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
3800 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
3801 			zp->z_atime_dirty = 0;
3802 			dmu_tx_commit(tx);
3803 		}
3804 	}
3805 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
3806 }
3807 
3808 
3809 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
3810 	"struct zfid_short bigger than struct fid");
3811 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
3812 	"struct zfid_long bigger than struct fid");
3813 
3814 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)3815 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
3816 {
3817 	(void) ct;
3818 	znode_t		*zp = VTOZ(vp);
3819 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
3820 	uint32_t	gen;
3821 	uint64_t	gen64;
3822 	uint64_t	object = zp->z_id;
3823 	zfid_short_t	*zfid;
3824 	int		size, i, error;
3825 
3826 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3827 		return (error);
3828 
3829 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
3830 	    &gen64, sizeof (uint64_t))) != 0) {
3831 		zfs_exit(zfsvfs, FTAG);
3832 		return (error);
3833 	}
3834 
3835 	gen = (uint32_t)gen64;
3836 
3837 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
3838 	fidp->fid_len = size;
3839 
3840 	zfid = (zfid_short_t *)fidp;
3841 
3842 	zfid->zf_len = size;
3843 
3844 	for (i = 0; i < sizeof (zfid->zf_object); i++)
3845 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
3846 
3847 	/* Must have a non-zero generation number to distinguish from .zfs */
3848 	if (gen == 0)
3849 		gen = 1;
3850 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
3851 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
3852 
3853 	if (size == LONG_FID_LEN) {
3854 		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
3855 		zfid_long_t	*zlfid;
3856 
3857 		zlfid = (zfid_long_t *)fidp;
3858 
3859 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
3860 			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
3861 
3862 		/* XXX - this should be the generation number for the objset */
3863 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
3864 			zlfid->zf_setgen[i] = 0;
3865 	}
3866 
3867 	zfs_exit(zfsvfs, FTAG);
3868 	return (0);
3869 }
3870 
3871 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)3872 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
3873     caller_context_t *ct)
3874 {
3875 	znode_t *zp;
3876 	zfsvfs_t *zfsvfs;
3877 	int error;
3878 
3879 	switch (cmd) {
3880 	case _PC_LINK_MAX:
3881 		*valp = MIN(LONG_MAX, ZFS_LINK_MAX);
3882 		return (0);
3883 
3884 	case _PC_FILESIZEBITS:
3885 		*valp = 64;
3886 		return (0);
3887 	case _PC_MIN_HOLE_SIZE:
3888 		*valp = (int)SPA_MINBLOCKSIZE;
3889 		return (0);
3890 	case _PC_ACL_EXTENDED:
3891 #if 0		/* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
3892 		zp = VTOZ(vp);
3893 		zfsvfs = zp->z_zfsvfs;
3894 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3895 			return (error);
3896 		*valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
3897 		zfs_exit(zfsvfs, FTAG);
3898 #else
3899 		*valp = 0;
3900 #endif
3901 		return (0);
3902 
3903 	case _PC_ACL_NFS4:
3904 		zp = VTOZ(vp);
3905 		zfsvfs = zp->z_zfsvfs;
3906 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3907 			return (error);
3908 		*valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
3909 		zfs_exit(zfsvfs, FTAG);
3910 		return (0);
3911 
3912 	case _PC_ACL_PATH_MAX:
3913 		*valp = ACL_MAX_ENTRIES;
3914 		return (0);
3915 
3916 	default:
3917 		return (EOPNOTSUPP);
3918 	}
3919 }
3920 
3921 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)3922 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
3923     int *rahead)
3924 {
3925 	znode_t *zp = VTOZ(vp);
3926 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3927 	zfs_locked_range_t *lr;
3928 	vm_object_t object;
3929 	off_t start, end, obj_size;
3930 	uint_t blksz;
3931 	int pgsin_b, pgsin_a;
3932 	int error;
3933 
3934 	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
3935 		return (zfs_vm_pagerret_error);
3936 
3937 	object = ma[0]->object;
3938 	start = IDX_TO_OFF(ma[0]->pindex);
3939 	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
3940 
3941 	/*
3942 	 * Lock a range covering all required and optional pages.
3943 	 * Note that we need to handle the case of the block size growing.
3944 	 */
3945 	for (;;) {
3946 		uint64_t len;
3947 
3948 		blksz = zp->z_blksz;
3949 		len = roundup(end, blksz) - rounddown(start, blksz);
3950 
3951 		lr = zfs_rangelock_tryenter(&zp->z_rangelock,
3952 		    rounddown(start, blksz), len, RL_READER);
3953 		if (lr == NULL) {
3954 			/*
3955 			 * Avoid a deadlock with update_pages().  We need to
3956 			 * hold the range lock when copying from the DMU, so
3957 			 * give up the busy lock to allow update_pages() to
3958 			 * proceed.  We might need to allocate new pages, which
3959 			 * isn't quite right since this allocation isn't subject
3960 			 * to the page fault handler's OOM logic, but this is
3961 			 * the best we can do for now.
3962 			 */
3963 			for (int i = 0; i < count; i++)
3964 				vm_page_xunbusy(ma[i]);
3965 
3966 			lr = zfs_rangelock_enter(&zp->z_rangelock,
3967 			    rounddown(start, blksz), len, RL_READER);
3968 
3969 			zfs_vmobject_wlock(object);
3970 			(void) vm_page_grab_pages(object, OFF_TO_IDX(start),
3971 			    VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
3972 			    ma, count);
3973 			zfs_vmobject_wunlock(object);
3974 		}
3975 		if (blksz == zp->z_blksz)
3976 			break;
3977 		zfs_rangelock_exit(lr);
3978 	}
3979 
3980 	zfs_vmobject_wlock(object);
3981 	obj_size = object->un_pager.vnp.vnp_size;
3982 	zfs_vmobject_wunlock(object);
3983 	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
3984 		zfs_rangelock_exit(lr);
3985 		zfs_exit(zfsvfs, FTAG);
3986 		return (zfs_vm_pagerret_bad);
3987 	}
3988 
3989 	pgsin_b = 0;
3990 	if (rbehind != NULL) {
3991 		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
3992 		pgsin_b = MIN(*rbehind, pgsin_b);
3993 	}
3994 
3995 	pgsin_a = 0;
3996 	if (rahead != NULL) {
3997 		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
3998 		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
3999 			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4000 		pgsin_a = MIN(*rahead, pgsin_a);
4001 	}
4002 
4003 	/*
4004 	 * NB: we need to pass the exact byte size of the data that we expect
4005 	 * to read after accounting for the file size.  This is required because
4006 	 * ZFS will panic if we request DMU to read beyond the end of the last
4007 	 * allocated block.
4008 	 */
4009 	for (int i = 0; i < count; i++) {
4010 		int dummypgsin, count1, j, last_size;
4011 
4012 		if (vm_page_any_valid(ma[i])) {
4013 			ASSERT(vm_page_all_valid(ma[i]));
4014 			continue;
4015 		}
4016 		for (j = i + 1; j < count; j++) {
4017 			if (vm_page_any_valid(ma[j])) {
4018 				ASSERT(vm_page_all_valid(ma[j]));
4019 				break;
4020 			}
4021 		}
4022 		count1 = j - i;
4023 		dummypgsin = 0;
4024 		last_size = j == count ?
4025 		    MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
4026 		error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
4027 		    i == 0 ? &pgsin_b : &dummypgsin,
4028 		    j == count ? &pgsin_a : &dummypgsin,
4029 		    last_size);
4030 		if (error != 0)
4031 			break;
4032 		i += count1 - 1;
4033 	}
4034 
4035 	zfs_rangelock_exit(lr);
4036 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4037 
4038 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4039 
4040 	zfs_exit(zfsvfs, FTAG);
4041 
4042 	if (error != 0)
4043 		return (zfs_vm_pagerret_error);
4044 
4045 	VM_CNT_INC(v_vnodein);
4046 	VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4047 	if (rbehind != NULL)
4048 		*rbehind = pgsin_b;
4049 	if (rahead != NULL)
4050 		*rahead = pgsin_a;
4051 	return (zfs_vm_pagerret_ok);
4052 }
4053 
4054 #ifndef _SYS_SYSPROTO_H_
4055 struct vop_getpages_args {
4056 	struct vnode *a_vp;
4057 	vm_page_t *a_m;
4058 	int a_count;
4059 	int *a_rbehind;
4060 	int *a_rahead;
4061 };
4062 #endif
4063 
4064 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4065 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4066 {
4067 
4068 	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4069 	    ap->a_rahead));
4070 }
4071 
4072 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4073 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4074     int *rtvals)
4075 {
4076 	znode_t		*zp = VTOZ(vp);
4077 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
4078 	zfs_locked_range_t		*lr;
4079 	dmu_tx_t	*tx;
4080 	struct sf_buf	*sf;
4081 	vm_object_t	object;
4082 	vm_page_t	m;
4083 	caddr_t		va;
4084 	size_t		tocopy;
4085 	size_t		lo_len;
4086 	vm_ooffset_t	lo_off;
4087 	vm_ooffset_t	off;
4088 	uint_t		blksz;
4089 	int		ncount;
4090 	int		pcount;
4091 	int		err;
4092 	int		i;
4093 
4094 	object = vp->v_object;
4095 	KASSERT(ma[0]->object == object, ("mismatching object"));
4096 	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4097 
4098 	pcount = btoc(len);
4099 	ncount = pcount;
4100 	for (i = 0; i < pcount; i++)
4101 		rtvals[i] = zfs_vm_pagerret_error;
4102 
4103 	if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4104 		return (zfs_vm_pagerret_error);
4105 
4106 	off = IDX_TO_OFF(ma[0]->pindex);
4107 	blksz = zp->z_blksz;
4108 	lo_off = rounddown(off, blksz);
4109 	lo_len = roundup(len + (off - lo_off), blksz);
4110 	lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4111 
4112 	zfs_vmobject_wlock(object);
4113 	if (len + off > object->un_pager.vnp.vnp_size) {
4114 		if (object->un_pager.vnp.vnp_size > off) {
4115 			int pgoff;
4116 
4117 			len = object->un_pager.vnp.vnp_size - off;
4118 			ncount = btoc(len);
4119 			if ((pgoff = (int)len & PAGE_MASK) != 0) {
4120 				/*
4121 				 * If the object is locked and the following
4122 				 * conditions hold, then the page's dirty
4123 				 * field cannot be concurrently changed by a
4124 				 * pmap operation.
4125 				 */
4126 				m = ma[ncount - 1];
4127 				vm_page_assert_sbusied(m);
4128 				KASSERT(!pmap_page_is_write_mapped(m),
4129 				    ("zfs_putpages: page %p is not read-only",
4130 				    m));
4131 				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4132 				    pgoff);
4133 			}
4134 		} else {
4135 			len = 0;
4136 			ncount = 0;
4137 		}
4138 		if (ncount < pcount) {
4139 			for (i = ncount; i < pcount; i++) {
4140 				rtvals[i] = zfs_vm_pagerret_bad;
4141 			}
4142 		}
4143 	}
4144 	zfs_vmobject_wunlock(object);
4145 
4146 	boolean_t commit = (flags & (zfs_vm_pagerput_sync |
4147 	    zfs_vm_pagerput_inval)) != 0 ||
4148 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
4149 
4150 	if (ncount == 0)
4151 		goto out;
4152 
4153 	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4154 	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4155 	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
4156 	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4157 	    zp->z_projid))) {
4158 		goto out;
4159 	}
4160 
4161 	tx = dmu_tx_create(zfsvfs->z_os);
4162 	dmu_tx_hold_write(tx, zp->z_id, off, len);
4163 
4164 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4165 	zfs_sa_upgrade_txholds(tx, zp);
4166 	err = dmu_tx_assign(tx, TXG_WAIT);
4167 	if (err != 0) {
4168 		dmu_tx_abort(tx);
4169 		goto out;
4170 	}
4171 
4172 	if (zp->z_blksz < PAGE_SIZE) {
4173 		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4174 			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4175 			va = zfs_map_page(ma[i], &sf);
4176 			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4177 			zfs_unmap_page(sf);
4178 		}
4179 	} else {
4180 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4181 	}
4182 
4183 	if (err == 0) {
4184 		uint64_t mtime[2], ctime[2];
4185 		sa_bulk_attr_t bulk[3];
4186 		int count = 0;
4187 
4188 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4189 		    &mtime, 16);
4190 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4191 		    &ctime, 16);
4192 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4193 		    &zp->z_pflags, 8);
4194 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4195 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4196 		ASSERT0(err);
4197 		/*
4198 		 * XXX we should be passing a callback to undirty
4199 		 * but that would make the locking messier
4200 		 */
4201 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4202 		    len, commit, B_FALSE, NULL, NULL);
4203 
4204 		zfs_vmobject_wlock(object);
4205 		for (i = 0; i < ncount; i++) {
4206 			rtvals[i] = zfs_vm_pagerret_ok;
4207 			vm_page_undirty(ma[i]);
4208 		}
4209 		zfs_vmobject_wunlock(object);
4210 		VM_CNT_INC(v_vnodeout);
4211 		VM_CNT_ADD(v_vnodepgsout, ncount);
4212 	}
4213 	dmu_tx_commit(tx);
4214 
4215 out:
4216 	zfs_rangelock_exit(lr);
4217 	if (commit)
4218 		zil_commit(zfsvfs->z_log, zp->z_id);
4219 
4220 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4221 
4222 	zfs_exit(zfsvfs, FTAG);
4223 	return (rtvals[0]);
4224 }
4225 
4226 #ifndef _SYS_SYSPROTO_H_
4227 struct vop_putpages_args {
4228 	struct vnode *a_vp;
4229 	vm_page_t *a_m;
4230 	int a_count;
4231 	int a_sync;
4232 	int *a_rtvals;
4233 };
4234 #endif
4235 
4236 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4237 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4238 {
4239 
4240 	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4241 	    ap->a_rtvals));
4242 }
4243 
4244 #ifndef _SYS_SYSPROTO_H_
4245 struct vop_bmap_args {
4246 	struct vnode *a_vp;
4247 	daddr_t  a_bn;
4248 	struct bufobj **a_bop;
4249 	daddr_t *a_bnp;
4250 	int *a_runp;
4251 	int *a_runb;
4252 };
4253 #endif
4254 
4255 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4256 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4257 {
4258 
4259 	if (ap->a_bop != NULL)
4260 		*ap->a_bop = &ap->a_vp->v_bufobj;
4261 	if (ap->a_bnp != NULL)
4262 		*ap->a_bnp = ap->a_bn;
4263 	if (ap->a_runp != NULL)
4264 		*ap->a_runp = 0;
4265 	if (ap->a_runb != NULL)
4266 		*ap->a_runb = 0;
4267 
4268 	return (0);
4269 }
4270 
4271 #ifndef _SYS_SYSPROTO_H_
4272 struct vop_open_args {
4273 	struct vnode *a_vp;
4274 	int a_mode;
4275 	struct ucred *a_cred;
4276 	struct thread *a_td;
4277 };
4278 #endif
4279 
4280 static int
zfs_freebsd_open(struct vop_open_args * ap)4281 zfs_freebsd_open(struct vop_open_args *ap)
4282 {
4283 	vnode_t	*vp = ap->a_vp;
4284 	znode_t *zp = VTOZ(vp);
4285 	int error;
4286 
4287 	error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4288 	if (error == 0)
4289 		vnode_create_vobject(vp, zp->z_size, ap->a_td);
4290 	return (error);
4291 }
4292 
4293 #ifndef _SYS_SYSPROTO_H_
4294 struct vop_close_args {
4295 	struct vnode *a_vp;
4296 	int  a_fflag;
4297 	struct ucred *a_cred;
4298 	struct thread *a_td;
4299 };
4300 #endif
4301 
4302 static int
zfs_freebsd_close(struct vop_close_args * ap)4303 zfs_freebsd_close(struct vop_close_args *ap)
4304 {
4305 
4306 	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4307 }
4308 
4309 #ifndef _SYS_SYSPROTO_H_
4310 struct vop_ioctl_args {
4311 	struct vnode *a_vp;
4312 	ulong_t a_command;
4313 	caddr_t a_data;
4314 	int a_fflag;
4315 	struct ucred *cred;
4316 	struct thread *td;
4317 };
4318 #endif
4319 
4320 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4321 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4322 {
4323 
4324 	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4325 	    ap->a_fflag, ap->a_cred, NULL));
4326 }
4327 
4328 static int
ioflags(int ioflags)4329 ioflags(int ioflags)
4330 {
4331 	int flags = 0;
4332 
4333 	if (ioflags & IO_APPEND)
4334 		flags |= O_APPEND;
4335 	if (ioflags & IO_NDELAY)
4336 		flags |= O_NONBLOCK;
4337 	if (ioflags & IO_DIRECT)
4338 		flags |= O_DIRECT;
4339 	if (ioflags & IO_SYNC)
4340 		flags |= O_SYNC;
4341 
4342 	return (flags);
4343 }
4344 
4345 #ifndef _SYS_SYSPROTO_H_
4346 struct vop_read_args {
4347 	struct vnode *a_vp;
4348 	struct uio *a_uio;
4349 	int a_ioflag;
4350 	struct ucred *a_cred;
4351 };
4352 #endif
4353 
4354 static int
zfs_freebsd_read(struct vop_read_args * ap)4355 zfs_freebsd_read(struct vop_read_args *ap)
4356 {
4357 	zfs_uio_t uio;
4358 	int error = 0;
4359 	zfs_uio_init(&uio, ap->a_uio);
4360 	error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4361 	    ap->a_cred);
4362 	/*
4363 	 * XXX We occasionally get an EFAULT for Direct I/O reads on
4364 	 * FreeBSD 13. This still needs to be resolved. The EFAULT comes
4365 	 * from:
4366 	 * zfs_uio_get__dio_pages_alloc() ->
4367 	 * zfs_uio_get_dio_pages_impl() ->
4368 	 * zfs_uio_iov_step() ->
4369 	 * zfs_uio_get_user_pages().
4370 	 * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O
4371 	 * read fails to map in the user pages (returning EFAULT) the
4372 	 * Direct I/O request is broken up into two separate IO requests
4373 	 * and issued separately using Direct I/O.
4374 	 */
4375 #ifdef ZFS_DEBUG
4376 	if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) {
4377 #if 0
4378 		printf("%s(%d): Direct I/O read returning EFAULT "
4379 		    "uio = %p, zfs_uio_offset(uio) = %lu "
4380 		    "zfs_uio_resid(uio) = %lu\n",
4381 		    __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio),
4382 		    zfs_uio_resid(&uio));
4383 #endif
4384 	}
4385 
4386 #endif
4387 	return (error);
4388 }
4389 
4390 #ifndef _SYS_SYSPROTO_H_
4391 struct vop_write_args {
4392 	struct vnode *a_vp;
4393 	struct uio *a_uio;
4394 	int a_ioflag;
4395 	struct ucred *a_cred;
4396 };
4397 #endif
4398 
4399 static int
zfs_freebsd_write(struct vop_write_args * ap)4400 zfs_freebsd_write(struct vop_write_args *ap)
4401 {
4402 	zfs_uio_t uio;
4403 	zfs_uio_init(&uio, ap->a_uio);
4404 	return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4405 	    ap->a_cred));
4406 }
4407 
4408 /*
4409  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4410  * the comment above cache_fplookup for details.
4411  */
4412 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4413 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4414 {
4415 	vnode_t *vp;
4416 	znode_t *zp;
4417 	uint64_t pflags;
4418 
4419 	vp = v->a_vp;
4420 	zp = VTOZ_SMR(vp);
4421 	if (__predict_false(zp == NULL))
4422 		return (EAGAIN);
4423 	pflags = atomic_load_64(&zp->z_pflags);
4424 	if (pflags & ZFS_AV_QUARANTINED)
4425 		return (EAGAIN);
4426 	if (pflags & ZFS_XATTR)
4427 		return (EAGAIN);
4428 	if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4429 		return (EAGAIN);
4430 	return (0);
4431 }
4432 
4433 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4434 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4435 {
4436 	vnode_t *vp;
4437 	znode_t *zp;
4438 	char *target;
4439 
4440 	vp = v->a_vp;
4441 	zp = VTOZ_SMR(vp);
4442 	if (__predict_false(zp == NULL)) {
4443 		return (EAGAIN);
4444 	}
4445 
4446 	target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4447 	if (target == NULL) {
4448 		return (EAGAIN);
4449 	}
4450 	return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4451 }
4452 
4453 #ifndef _SYS_SYSPROTO_H_
4454 struct vop_access_args {
4455 	struct vnode *a_vp;
4456 	accmode_t a_accmode;
4457 	struct ucred *a_cred;
4458 	struct thread *a_td;
4459 };
4460 #endif
4461 
4462 static int
zfs_freebsd_access(struct vop_access_args * ap)4463 zfs_freebsd_access(struct vop_access_args *ap)
4464 {
4465 	vnode_t *vp = ap->a_vp;
4466 	znode_t *zp = VTOZ(vp);
4467 	accmode_t accmode;
4468 	int error = 0;
4469 
4470 
4471 	if (ap->a_accmode == VEXEC) {
4472 		if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4473 			return (0);
4474 	}
4475 
4476 	/*
4477 	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4478 	 */
4479 	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4480 	if (accmode != 0)
4481 		error = zfs_access(zp, accmode, 0, ap->a_cred);
4482 
4483 	/*
4484 	 * VADMIN has to be handled by vaccess().
4485 	 */
4486 	if (error == 0) {
4487 		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4488 		if (accmode != 0) {
4489 			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4490 			    zp->z_gid, accmode, ap->a_cred);
4491 		}
4492 	}
4493 
4494 	/*
4495 	 * For VEXEC, ensure that at least one execute bit is set for
4496 	 * non-directories.
4497 	 */
4498 	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4499 	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4500 		error = EACCES;
4501 	}
4502 
4503 	return (error);
4504 }
4505 
4506 #ifndef _SYS_SYSPROTO_H_
4507 struct vop_lookup_args {
4508 	struct vnode *a_dvp;
4509 	struct vnode **a_vpp;
4510 	struct componentname *a_cnp;
4511 };
4512 #endif
4513 
4514 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4515 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4516 {
4517 	struct componentname *cnp = ap->a_cnp;
4518 	char nm[NAME_MAX + 1];
4519 
4520 	ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4521 	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4522 
4523 	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4524 	    cnp->cn_cred, 0, cached));
4525 }
4526 
4527 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4528 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4529 {
4530 
4531 	return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4532 }
4533 
4534 #ifndef _SYS_SYSPROTO_H_
4535 struct vop_lookup_args {
4536 	struct vnode *a_dvp;
4537 	struct vnode **a_vpp;
4538 	struct componentname *a_cnp;
4539 };
4540 #endif
4541 
4542 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4543 zfs_cache_lookup(struct vop_lookup_args *ap)
4544 {
4545 	zfsvfs_t *zfsvfs;
4546 
4547 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4548 	if (zfsvfs->z_use_namecache)
4549 		return (vfs_cache_lookup(ap));
4550 	else
4551 		return (zfs_freebsd_lookup(ap, B_FALSE));
4552 }
4553 
4554 #ifndef _SYS_SYSPROTO_H_
4555 struct vop_create_args {
4556 	struct vnode *a_dvp;
4557 	struct vnode **a_vpp;
4558 	struct componentname *a_cnp;
4559 	struct vattr *a_vap;
4560 };
4561 #endif
4562 
4563 static int
zfs_freebsd_create(struct vop_create_args * ap)4564 zfs_freebsd_create(struct vop_create_args *ap)
4565 {
4566 	zfsvfs_t *zfsvfs;
4567 	struct componentname *cnp = ap->a_cnp;
4568 	vattr_t *vap = ap->a_vap;
4569 	znode_t *zp = NULL;
4570 	int rc, mode;
4571 
4572 #if __FreeBSD_version < 1400068
4573 	ASSERT(cnp->cn_flags & SAVENAME);
4574 #endif
4575 
4576 	vattr_init_mask(vap);
4577 	mode = vap->va_mode & ALLPERMS;
4578 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
4579 	*ap->a_vpp = NULL;
4580 
4581 	rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4582 	    &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4583 	if (rc == 0)
4584 		*ap->a_vpp = ZTOV(zp);
4585 	if (zfsvfs->z_use_namecache &&
4586 	    rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4587 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4588 
4589 	return (rc);
4590 }
4591 
4592 #ifndef _SYS_SYSPROTO_H_
4593 struct vop_remove_args {
4594 	struct vnode *a_dvp;
4595 	struct vnode *a_vp;
4596 	struct componentname *a_cnp;
4597 };
4598 #endif
4599 
4600 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4601 zfs_freebsd_remove(struct vop_remove_args *ap)
4602 {
4603 
4604 #if __FreeBSD_version < 1400068
4605 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4606 #endif
4607 
4608 	return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4609 	    ap->a_cnp->cn_cred));
4610 }
4611 
4612 #ifndef _SYS_SYSPROTO_H_
4613 struct vop_mkdir_args {
4614 	struct vnode *a_dvp;
4615 	struct vnode **a_vpp;
4616 	struct componentname *a_cnp;
4617 	struct vattr *a_vap;
4618 };
4619 #endif
4620 
4621 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4622 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4623 {
4624 	vattr_t *vap = ap->a_vap;
4625 	znode_t *zp = NULL;
4626 	int rc;
4627 
4628 #if __FreeBSD_version < 1400068
4629 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4630 #endif
4631 
4632 	vattr_init_mask(vap);
4633 	*ap->a_vpp = NULL;
4634 
4635 	rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4636 	    ap->a_cnp->cn_cred, 0, NULL, NULL);
4637 
4638 	if (rc == 0)
4639 		*ap->a_vpp = ZTOV(zp);
4640 	return (rc);
4641 }
4642 
4643 #ifndef _SYS_SYSPROTO_H_
4644 struct vop_rmdir_args {
4645 	struct vnode *a_dvp;
4646 	struct vnode *a_vp;
4647 	struct componentname *a_cnp;
4648 };
4649 #endif
4650 
4651 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4652 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4653 {
4654 	struct componentname *cnp = ap->a_cnp;
4655 
4656 #if __FreeBSD_version < 1400068
4657 	ASSERT(cnp->cn_flags & SAVENAME);
4658 #endif
4659 
4660 	return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4661 }
4662 
4663 #ifndef _SYS_SYSPROTO_H_
4664 struct vop_readdir_args {
4665 	struct vnode *a_vp;
4666 	struct uio *a_uio;
4667 	struct ucred *a_cred;
4668 	int *a_eofflag;
4669 	int *a_ncookies;
4670 	cookie_t **a_cookies;
4671 };
4672 #endif
4673 
4674 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4675 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4676 {
4677 	zfs_uio_t uio;
4678 	zfs_uio_init(&uio, ap->a_uio);
4679 	return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4680 	    ap->a_ncookies, ap->a_cookies));
4681 }
4682 
4683 #ifndef _SYS_SYSPROTO_H_
4684 struct vop_fsync_args {
4685 	struct vnode *a_vp;
4686 	int a_waitfor;
4687 	struct thread *a_td;
4688 };
4689 #endif
4690 
4691 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4692 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4693 {
4694 
4695 	return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4696 }
4697 
4698 #ifndef _SYS_SYSPROTO_H_
4699 struct vop_getattr_args {
4700 	struct vnode *a_vp;
4701 	struct vattr *a_vap;
4702 	struct ucred *a_cred;
4703 };
4704 #endif
4705 
4706 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4707 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4708 {
4709 	vattr_t *vap = ap->a_vap;
4710 	xvattr_t xvap;
4711 	ulong_t fflags = 0;
4712 	int error;
4713 
4714 	xva_init(&xvap);
4715 	xvap.xva_vattr = *vap;
4716 	xvap.xva_vattr.va_mask |= AT_XVATTR;
4717 
4718 	/* Convert chflags into ZFS-type flags. */
4719 	/* XXX: what about SF_SETTABLE?. */
4720 	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4721 	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4722 	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4723 	XVA_SET_REQ(&xvap, XAT_NODUMP);
4724 	XVA_SET_REQ(&xvap, XAT_READONLY);
4725 	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4726 	XVA_SET_REQ(&xvap, XAT_SYSTEM);
4727 	XVA_SET_REQ(&xvap, XAT_HIDDEN);
4728 	XVA_SET_REQ(&xvap, XAT_REPARSE);
4729 	XVA_SET_REQ(&xvap, XAT_OFFLINE);
4730 	XVA_SET_REQ(&xvap, XAT_SPARSE);
4731 
4732 	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4733 	if (error != 0)
4734 		return (error);
4735 
4736 	/* Convert ZFS xattr into chflags. */
4737 #define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
4738 	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
4739 		fflags |= (fflag);					\
4740 } while (0)
4741 	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4742 	    xvap.xva_xoptattrs.xoa_immutable);
4743 	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4744 	    xvap.xva_xoptattrs.xoa_appendonly);
4745 	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4746 	    xvap.xva_xoptattrs.xoa_nounlink);
4747 	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4748 	    xvap.xva_xoptattrs.xoa_archive);
4749 	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4750 	    xvap.xva_xoptattrs.xoa_nodump);
4751 	FLAG_CHECK(UF_READONLY, XAT_READONLY,
4752 	    xvap.xva_xoptattrs.xoa_readonly);
4753 	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4754 	    xvap.xva_xoptattrs.xoa_system);
4755 	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4756 	    xvap.xva_xoptattrs.xoa_hidden);
4757 	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4758 	    xvap.xva_xoptattrs.xoa_reparse);
4759 	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4760 	    xvap.xva_xoptattrs.xoa_offline);
4761 	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4762 	    xvap.xva_xoptattrs.xoa_sparse);
4763 
4764 #undef	FLAG_CHECK
4765 	*vap = xvap.xva_vattr;
4766 	vap->va_flags = fflags;
4767 	return (0);
4768 }
4769 
4770 #ifndef _SYS_SYSPROTO_H_
4771 struct vop_setattr_args {
4772 	struct vnode *a_vp;
4773 	struct vattr *a_vap;
4774 	struct ucred *a_cred;
4775 };
4776 #endif
4777 
4778 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)4779 zfs_freebsd_setattr(struct vop_setattr_args *ap)
4780 {
4781 	vnode_t *vp = ap->a_vp;
4782 	vattr_t *vap = ap->a_vap;
4783 	cred_t *cred = ap->a_cred;
4784 	xvattr_t xvap;
4785 	ulong_t fflags;
4786 	uint64_t zflags;
4787 
4788 	vattr_init_mask(vap);
4789 	vap->va_mask &= ~AT_NOSET;
4790 
4791 	xva_init(&xvap);
4792 	xvap.xva_vattr = *vap;
4793 
4794 	zflags = VTOZ(vp)->z_pflags;
4795 
4796 	if (vap->va_flags != VNOVAL) {
4797 		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
4798 		int error;
4799 
4800 		if (zfsvfs->z_use_fuids == B_FALSE)
4801 			return (EOPNOTSUPP);
4802 
4803 		fflags = vap->va_flags;
4804 		/*
4805 		 * XXX KDM
4806 		 * We need to figure out whether it makes sense to allow
4807 		 * UF_REPARSE through, since we don't really have other
4808 		 * facilities to handle reparse points and zfs_setattr()
4809 		 * doesn't currently allow setting that attribute anyway.
4810 		 */
4811 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
4812 		    UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
4813 		    UF_OFFLINE|UF_SPARSE)) != 0)
4814 			return (EOPNOTSUPP);
4815 		/*
4816 		 * Unprivileged processes are not permitted to unset system
4817 		 * flags, or modify flags if any system flags are set.
4818 		 * Privileged non-jail processes may not modify system flags
4819 		 * if securelevel > 0 and any existing system flags are set.
4820 		 * Privileged jail processes behave like privileged non-jail
4821 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
4822 		 * otherwise, they behave like unprivileged processes.
4823 		 */
4824 		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
4825 		    priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
4826 			if (zflags &
4827 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
4828 				error = securelevel_gt(cred, 0);
4829 				if (error != 0)
4830 					return (error);
4831 			}
4832 		} else {
4833 			/*
4834 			 * Callers may only modify the file flags on
4835 			 * objects they have VADMIN rights for.
4836 			 */
4837 			if ((error = VOP_ACCESS(vp, VADMIN, cred,
4838 			    curthread)) != 0)
4839 				return (error);
4840 			if (zflags &
4841 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY |
4842 			    ZFS_NOUNLINK)) {
4843 				return (EPERM);
4844 			}
4845 			if (fflags &
4846 			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
4847 				return (EPERM);
4848 			}
4849 		}
4850 
4851 #define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
4852 	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
4853 	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
4854 		XVA_SET_REQ(&xvap, (xflag));				\
4855 		(xfield) = ((fflags & (fflag)) != 0);			\
4856 	}								\
4857 } while (0)
4858 		/* Convert chflags into ZFS-type flags. */
4859 		/* XXX: what about SF_SETTABLE?. */
4860 		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
4861 		    xvap.xva_xoptattrs.xoa_immutable);
4862 		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
4863 		    xvap.xva_xoptattrs.xoa_appendonly);
4864 		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
4865 		    xvap.xva_xoptattrs.xoa_nounlink);
4866 		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
4867 		    xvap.xva_xoptattrs.xoa_archive);
4868 		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
4869 		    xvap.xva_xoptattrs.xoa_nodump);
4870 		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
4871 		    xvap.xva_xoptattrs.xoa_readonly);
4872 		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
4873 		    xvap.xva_xoptattrs.xoa_system);
4874 		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
4875 		    xvap.xva_xoptattrs.xoa_hidden);
4876 		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
4877 		    xvap.xva_xoptattrs.xoa_reparse);
4878 		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
4879 		    xvap.xva_xoptattrs.xoa_offline);
4880 		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
4881 		    xvap.xva_xoptattrs.xoa_sparse);
4882 #undef	FLAG_CHANGE
4883 	}
4884 	if (vap->va_birthtime.tv_sec != VNOVAL) {
4885 		xvap.xva_vattr.va_mask |= AT_XVATTR;
4886 		XVA_SET_REQ(&xvap, XAT_CREATETIME);
4887 	}
4888 	return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
4889 }
4890 
4891 #ifndef _SYS_SYSPROTO_H_
4892 struct vop_rename_args {
4893 	struct vnode *a_fdvp;
4894 	struct vnode *a_fvp;
4895 	struct componentname *a_fcnp;
4896 	struct vnode *a_tdvp;
4897 	struct vnode *a_tvp;
4898 	struct componentname *a_tcnp;
4899 };
4900 #endif
4901 
4902 static int
zfs_freebsd_rename(struct vop_rename_args * ap)4903 zfs_freebsd_rename(struct vop_rename_args *ap)
4904 {
4905 	vnode_t *fdvp = ap->a_fdvp;
4906 	vnode_t *fvp = ap->a_fvp;
4907 	vnode_t *tdvp = ap->a_tdvp;
4908 	vnode_t *tvp = ap->a_tvp;
4909 	int error;
4910 
4911 #if __FreeBSD_version < 1400068
4912 	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
4913 	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
4914 #endif
4915 
4916 	error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
4917 	    ap->a_tcnp, ap->a_fcnp->cn_cred);
4918 
4919 	vrele(fdvp);
4920 	vrele(fvp);
4921 	vrele(tdvp);
4922 	if (tvp != NULL)
4923 		vrele(tvp);
4924 
4925 	return (error);
4926 }
4927 
4928 #ifndef _SYS_SYSPROTO_H_
4929 struct vop_symlink_args {
4930 	struct vnode *a_dvp;
4931 	struct vnode **a_vpp;
4932 	struct componentname *a_cnp;
4933 	struct vattr *a_vap;
4934 	char *a_target;
4935 };
4936 #endif
4937 
4938 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)4939 zfs_freebsd_symlink(struct vop_symlink_args *ap)
4940 {
4941 	struct componentname *cnp = ap->a_cnp;
4942 	vattr_t *vap = ap->a_vap;
4943 	znode_t *zp = NULL;
4944 	char *symlink;
4945 	size_t symlink_len;
4946 	int rc;
4947 
4948 #if __FreeBSD_version < 1400068
4949 	ASSERT(cnp->cn_flags & SAVENAME);
4950 #endif
4951 
4952 	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
4953 	vattr_init_mask(vap);
4954 	*ap->a_vpp = NULL;
4955 
4956 	rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
4957 	    ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
4958 	if (rc == 0) {
4959 		*ap->a_vpp = ZTOV(zp);
4960 		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
4961 		MPASS(zp->z_cached_symlink == NULL);
4962 		symlink_len = strlen(ap->a_target);
4963 		symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
4964 		if (symlink != NULL) {
4965 			memcpy(symlink, ap->a_target, symlink_len);
4966 			symlink[symlink_len] = '\0';
4967 			atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
4968 			    (uintptr_t)symlink);
4969 		}
4970 	}
4971 	return (rc);
4972 }
4973 
4974 #ifndef _SYS_SYSPROTO_H_
4975 struct vop_readlink_args {
4976 	struct vnode *a_vp;
4977 	struct uio *a_uio;
4978 	struct ucred *a_cred;
4979 };
4980 #endif
4981 
4982 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)4983 zfs_freebsd_readlink(struct vop_readlink_args *ap)
4984 {
4985 	zfs_uio_t uio;
4986 	int error;
4987 	znode_t	*zp = VTOZ(ap->a_vp);
4988 	char *symlink, *base;
4989 	size_t symlink_len;
4990 	bool trycache;
4991 
4992 	zfs_uio_init(&uio, ap->a_uio);
4993 	trycache = false;
4994 	if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
4995 	    zfs_uio_iovcnt(&uio) == 1) {
4996 		base = zfs_uio_iovbase(&uio, 0);
4997 		symlink_len = zfs_uio_iovlen(&uio, 0);
4998 		trycache = true;
4999 	}
5000 	error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5001 	if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5002 	    error != 0 || !trycache) {
5003 		return (error);
5004 	}
5005 	symlink_len -= zfs_uio_resid(&uio);
5006 	symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5007 	if (symlink != NULL) {
5008 		memcpy(symlink, base, symlink_len);
5009 		symlink[symlink_len] = '\0';
5010 		if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5011 		    (uintptr_t)NULL, (uintptr_t)symlink)) {
5012 			cache_symlink_free(symlink, symlink_len + 1);
5013 		}
5014 	}
5015 	return (error);
5016 }
5017 
5018 #ifndef _SYS_SYSPROTO_H_
5019 struct vop_link_args {
5020 	struct vnode *a_tdvp;
5021 	struct vnode *a_vp;
5022 	struct componentname *a_cnp;
5023 };
5024 #endif
5025 
5026 static int
zfs_freebsd_link(struct vop_link_args * ap)5027 zfs_freebsd_link(struct vop_link_args *ap)
5028 {
5029 	struct componentname *cnp = ap->a_cnp;
5030 	vnode_t *vp = ap->a_vp;
5031 	vnode_t *tdvp = ap->a_tdvp;
5032 
5033 	if (tdvp->v_mount != vp->v_mount)
5034 		return (EXDEV);
5035 
5036 #if __FreeBSD_version < 1400068
5037 	ASSERT(cnp->cn_flags & SAVENAME);
5038 #endif
5039 
5040 	return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5041 	    cnp->cn_nameptr, cnp->cn_cred, 0));
5042 }
5043 
5044 #ifndef _SYS_SYSPROTO_H_
5045 struct vop_inactive_args {
5046 	struct vnode *a_vp;
5047 	struct thread *a_td;
5048 };
5049 #endif
5050 
5051 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5052 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5053 {
5054 	vnode_t *vp = ap->a_vp;
5055 
5056 	zfs_inactive(vp, curthread->td_ucred, NULL);
5057 	return (0);
5058 }
5059 
5060 #ifndef _SYS_SYSPROTO_H_
5061 struct vop_need_inactive_args {
5062 	struct vnode *a_vp;
5063 	struct thread *a_td;
5064 };
5065 #endif
5066 
5067 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5068 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5069 {
5070 	vnode_t *vp = ap->a_vp;
5071 	znode_t	*zp = VTOZ(vp);
5072 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5073 	int need;
5074 
5075 	if (vn_need_pageq_flush(vp))
5076 		return (1);
5077 
5078 	if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5079 		return (1);
5080 	need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5081 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5082 
5083 	return (need);
5084 }
5085 
5086 #ifndef _SYS_SYSPROTO_H_
5087 struct vop_reclaim_args {
5088 	struct vnode *a_vp;
5089 	struct thread *a_td;
5090 };
5091 #endif
5092 
5093 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5094 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5095 {
5096 	vnode_t	*vp = ap->a_vp;
5097 	znode_t	*zp = VTOZ(vp);
5098 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5099 
5100 	ASSERT3P(zp, !=, NULL);
5101 
5102 	/*
5103 	 * z_teardown_inactive_lock protects from a race with
5104 	 * zfs_znode_dmu_fini in zfsvfs_teardown during
5105 	 * force unmount.
5106 	 */
5107 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5108 	if (zp->z_sa_hdl == NULL)
5109 		zfs_znode_free(zp);
5110 	else
5111 		zfs_zinactive(zp);
5112 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5113 
5114 	vp->v_data = NULL;
5115 	return (0);
5116 }
5117 
5118 #ifndef _SYS_SYSPROTO_H_
5119 struct vop_fid_args {
5120 	struct vnode *a_vp;
5121 	struct fid *a_fid;
5122 };
5123 #endif
5124 
5125 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5126 zfs_freebsd_fid(struct vop_fid_args *ap)
5127 {
5128 
5129 	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5130 }
5131 
5132 
5133 #ifndef _SYS_SYSPROTO_H_
5134 struct vop_pathconf_args {
5135 	struct vnode *a_vp;
5136 	int a_name;
5137 	register_t *a_retval;
5138 } *ap;
5139 #endif
5140 
5141 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5142 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5143 {
5144 	ulong_t val;
5145 	int error;
5146 
5147 	error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5148 	    curthread->td_ucred, NULL);
5149 	if (error == 0) {
5150 		*ap->a_retval = val;
5151 		return (error);
5152 	}
5153 	if (error != EOPNOTSUPP)
5154 		return (error);
5155 
5156 	switch (ap->a_name) {
5157 	case _PC_NAME_MAX:
5158 		*ap->a_retval = NAME_MAX;
5159 		return (0);
5160 #if __FreeBSD_version >= 1400032
5161 	case _PC_DEALLOC_PRESENT:
5162 		*ap->a_retval = 1;
5163 		return (0);
5164 #endif
5165 	case _PC_PIPE_BUF:
5166 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5167 			*ap->a_retval = PIPE_BUF;
5168 			return (0);
5169 		}
5170 		return (EINVAL);
5171 	default:
5172 		return (vop_stdpathconf(ap));
5173 	}
5174 }
5175 
5176 static int zfs_xattr_compat = 1;
5177 
5178 static int
zfs_check_attrname(const char * name)5179 zfs_check_attrname(const char *name)
5180 {
5181 	/* We don't allow '/' character in attribute name. */
5182 	if (strchr(name, '/') != NULL)
5183 		return (SET_ERROR(EINVAL));
5184 	/* We don't allow attribute names that start with a namespace prefix. */
5185 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5186 		return (SET_ERROR(EINVAL));
5187 	return (0);
5188 }
5189 
5190 /*
5191  * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5192  * extended attribute name:
5193  *
5194  *	NAMESPACE	XATTR_COMPAT	PREFIX
5195  *	system		*		freebsd:system:
5196  *	user		1		(none, can be used to access ZFS
5197  *					fsattr(5) attributes created on Solaris)
5198  *	user		0		user.
5199  */
5200 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size,boolean_t compat)5201 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5202     size_t size, boolean_t compat)
5203 {
5204 	const char *namespace, *prefix, *suffix;
5205 
5206 	memset(attrname, 0, size);
5207 
5208 	switch (attrnamespace) {
5209 	case EXTATTR_NAMESPACE_USER:
5210 		if (compat) {
5211 			/*
5212 			 * This is the default namespace by which we can access
5213 			 * all attributes created on Solaris.
5214 			 */
5215 			prefix = namespace = suffix = "";
5216 		} else {
5217 			/*
5218 			 * This is compatible with the user namespace encoding
5219 			 * on Linux prior to xattr_compat, but nothing
5220 			 * else.
5221 			 */
5222 			prefix = "";
5223 			namespace = "user";
5224 			suffix = ".";
5225 		}
5226 		break;
5227 	case EXTATTR_NAMESPACE_SYSTEM:
5228 		prefix = "freebsd:";
5229 		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5230 		suffix = ":";
5231 		break;
5232 	case EXTATTR_NAMESPACE_EMPTY:
5233 	default:
5234 		return (SET_ERROR(EINVAL));
5235 	}
5236 	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5237 	    name) >= size) {
5238 		return (SET_ERROR(ENAMETOOLONG));
5239 	}
5240 	return (0);
5241 }
5242 
5243 static int
zfs_ensure_xattr_cached(znode_t * zp)5244 zfs_ensure_xattr_cached(znode_t *zp)
5245 {
5246 	int error = 0;
5247 
5248 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5249 
5250 	if (zp->z_xattr_cached != NULL)
5251 		return (0);
5252 
5253 	if (rw_write_held(&zp->z_xattr_lock))
5254 		return (zfs_sa_get_xattr(zp));
5255 
5256 	if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5257 		rw_exit(&zp->z_xattr_lock);
5258 		rw_enter(&zp->z_xattr_lock, RW_WRITER);
5259 	}
5260 	if (zp->z_xattr_cached == NULL)
5261 		error = zfs_sa_get_xattr(zp);
5262 	rw_downgrade(&zp->z_xattr_lock);
5263 	return (error);
5264 }
5265 
5266 #ifndef _SYS_SYSPROTO_H_
5267 struct vop_getextattr {
5268 	IN struct vnode *a_vp;
5269 	IN int a_attrnamespace;
5270 	IN const char *a_name;
5271 	INOUT struct uio *a_uio;
5272 	OUT size_t *a_size;
5273 	IN struct ucred *a_cred;
5274 	IN struct thread *a_td;
5275 };
5276 #endif
5277 
5278 static int
zfs_getextattr_dir(struct vop_getextattr_args * ap,const char * attrname)5279 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5280 {
5281 	struct thread *td = ap->a_td;
5282 	struct nameidata nd;
5283 	struct vattr va;
5284 	vnode_t *xvp = NULL, *vp;
5285 	int error, flags;
5286 
5287 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5288 	    LOOKUP_XATTR, B_FALSE);
5289 	if (error != 0)
5290 		return (error);
5291 
5292 	flags = FREAD;
5293 #if __FreeBSD_version < 1400043
5294 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5295 	    xvp, td);
5296 #else
5297 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5298 #endif
5299 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5300 	if (error != 0)
5301 		return (SET_ERROR(error));
5302 	vp = nd.ni_vp;
5303 	NDFREE_PNBUF(&nd);
5304 
5305 	if (ap->a_size != NULL) {
5306 		error = VOP_GETATTR(vp, &va, ap->a_cred);
5307 		if (error == 0)
5308 			*ap->a_size = (size_t)va.va_size;
5309 	} else if (ap->a_uio != NULL)
5310 		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5311 
5312 	VOP_UNLOCK(vp);
5313 	vn_close(vp, flags, ap->a_cred, td);
5314 	return (error);
5315 }
5316 
5317 static int
zfs_getextattr_sa(struct vop_getextattr_args * ap,const char * attrname)5318 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5319 {
5320 	znode_t *zp = VTOZ(ap->a_vp);
5321 	uchar_t *nv_value;
5322 	uint_t nv_size;
5323 	int error;
5324 
5325 	error = zfs_ensure_xattr_cached(zp);
5326 	if (error != 0)
5327 		return (error);
5328 
5329 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5330 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5331 
5332 	error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5333 	    &nv_value, &nv_size);
5334 	if (error != 0)
5335 		return (SET_ERROR(error));
5336 
5337 	if (ap->a_size != NULL)
5338 		*ap->a_size = nv_size;
5339 	else if (ap->a_uio != NULL)
5340 		error = uiomove(nv_value, nv_size, ap->a_uio);
5341 	if (error != 0)
5342 		return (SET_ERROR(error));
5343 
5344 	return (0);
5345 }
5346 
5347 static int
zfs_getextattr_impl(struct vop_getextattr_args * ap,boolean_t compat)5348 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5349 {
5350 	znode_t *zp = VTOZ(ap->a_vp);
5351 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5352 	char attrname[EXTATTR_MAXNAMELEN+1];
5353 	int error;
5354 
5355 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5356 	    sizeof (attrname), compat);
5357 	if (error != 0)
5358 		return (error);
5359 
5360 	error = ENOENT;
5361 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5362 		error = zfs_getextattr_sa(ap, attrname);
5363 	if (error == ENOENT)
5364 		error = zfs_getextattr_dir(ap, attrname);
5365 	return (error);
5366 }
5367 
5368 /*
5369  * Vnode operation to retrieve a named extended attribute.
5370  */
5371 static int
zfs_getextattr(struct vop_getextattr_args * ap)5372 zfs_getextattr(struct vop_getextattr_args *ap)
5373 {
5374 	znode_t *zp = VTOZ(ap->a_vp);
5375 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5376 	int error;
5377 
5378 	/*
5379 	 * If the xattr property is off, refuse the request.
5380 	 */
5381 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5382 		return (SET_ERROR(EOPNOTSUPP));
5383 
5384 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5385 	    ap->a_cred, ap->a_td, VREAD);
5386 	if (error != 0)
5387 		return (SET_ERROR(error));
5388 
5389 	error = zfs_check_attrname(ap->a_name);
5390 	if (error != 0)
5391 		return (error);
5392 
5393 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5394 		return (error);
5395 	error = ENOENT;
5396 	rw_enter(&zp->z_xattr_lock, RW_READER);
5397 
5398 	error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5399 	if ((error == ENOENT || error == ENOATTR) &&
5400 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5401 		/*
5402 		 * Fall back to the alternate namespace format if we failed to
5403 		 * find a user xattr.
5404 		 */
5405 		error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5406 	}
5407 
5408 	rw_exit(&zp->z_xattr_lock);
5409 	zfs_exit(zfsvfs, FTAG);
5410 	if (error == ENOENT)
5411 		error = SET_ERROR(ENOATTR);
5412 	return (error);
5413 }
5414 
5415 #ifndef _SYS_SYSPROTO_H_
5416 struct vop_deleteextattr {
5417 	IN struct vnode *a_vp;
5418 	IN int a_attrnamespace;
5419 	IN const char *a_name;
5420 	IN struct ucred *a_cred;
5421 	IN struct thread *a_td;
5422 };
5423 #endif
5424 
5425 static int
zfs_deleteextattr_dir(struct vop_deleteextattr_args * ap,const char * attrname)5426 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5427 {
5428 	struct nameidata nd;
5429 	vnode_t *xvp = NULL, *vp;
5430 	int error;
5431 
5432 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5433 	    LOOKUP_XATTR, B_FALSE);
5434 	if (error != 0)
5435 		return (error);
5436 
5437 #if __FreeBSD_version < 1400043
5438 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5439 	    UIO_SYSSPACE, attrname, xvp, ap->a_td);
5440 #else
5441 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5442 	    UIO_SYSSPACE, attrname, xvp);
5443 #endif
5444 	error = namei(&nd);
5445 	if (error != 0)
5446 		return (SET_ERROR(error));
5447 
5448 	vp = nd.ni_vp;
5449 	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5450 	NDFREE_PNBUF(&nd);
5451 
5452 	vput(nd.ni_dvp);
5453 	if (vp == nd.ni_dvp)
5454 		vrele(vp);
5455 	else
5456 		vput(vp);
5457 
5458 	return (error);
5459 }
5460 
5461 static int
zfs_deleteextattr_sa(struct vop_deleteextattr_args * ap,const char * attrname)5462 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5463 {
5464 	znode_t *zp = VTOZ(ap->a_vp);
5465 	nvlist_t *nvl;
5466 	int error;
5467 
5468 	error = zfs_ensure_xattr_cached(zp);
5469 	if (error != 0)
5470 		return (error);
5471 
5472 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5473 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5474 
5475 	nvl = zp->z_xattr_cached;
5476 	error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5477 	if (error != 0)
5478 		error = SET_ERROR(error);
5479 	else
5480 		error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5481 	if (error != 0) {
5482 		zp->z_xattr_cached = NULL;
5483 		nvlist_free(nvl);
5484 	}
5485 	return (error);
5486 }
5487 
5488 static int
zfs_deleteextattr_impl(struct vop_deleteextattr_args * ap,boolean_t compat)5489 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5490 {
5491 	znode_t *zp = VTOZ(ap->a_vp);
5492 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5493 	char attrname[EXTATTR_MAXNAMELEN+1];
5494 	int error;
5495 
5496 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5497 	    sizeof (attrname), compat);
5498 	if (error != 0)
5499 		return (error);
5500 
5501 	error = ENOENT;
5502 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5503 		error = zfs_deleteextattr_sa(ap, attrname);
5504 	if (error == ENOENT)
5505 		error = zfs_deleteextattr_dir(ap, attrname);
5506 	return (error);
5507 }
5508 
5509 /*
5510  * Vnode operation to remove a named attribute.
5511  */
5512 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5513 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5514 {
5515 	znode_t *zp = VTOZ(ap->a_vp);
5516 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5517 	int error;
5518 
5519 	/*
5520 	 * If the xattr property is off, refuse the request.
5521 	 */
5522 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5523 		return (SET_ERROR(EOPNOTSUPP));
5524 
5525 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5526 	    ap->a_cred, ap->a_td, VWRITE);
5527 	if (error != 0)
5528 		return (SET_ERROR(error));
5529 
5530 	error = zfs_check_attrname(ap->a_name);
5531 	if (error != 0)
5532 		return (error);
5533 
5534 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5535 		return (error);
5536 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
5537 
5538 	error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5539 	if ((error == ENOENT || error == ENOATTR) &&
5540 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5541 		/*
5542 		 * Fall back to the alternate namespace format if we failed to
5543 		 * find a user xattr.
5544 		 */
5545 		error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5546 	}
5547 
5548 	rw_exit(&zp->z_xattr_lock);
5549 	zfs_exit(zfsvfs, FTAG);
5550 	if (error == ENOENT)
5551 		error = SET_ERROR(ENOATTR);
5552 	return (error);
5553 }
5554 
5555 #ifndef _SYS_SYSPROTO_H_
5556 struct vop_setextattr {
5557 	IN struct vnode *a_vp;
5558 	IN int a_attrnamespace;
5559 	IN const char *a_name;
5560 	INOUT struct uio *a_uio;
5561 	IN struct ucred *a_cred;
5562 	IN struct thread *a_td;
5563 };
5564 #endif
5565 
5566 static int
zfs_setextattr_dir(struct vop_setextattr_args * ap,const char * attrname)5567 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5568 {
5569 	struct thread *td = ap->a_td;
5570 	struct nameidata nd;
5571 	struct vattr va;
5572 	vnode_t *xvp = NULL, *vp;
5573 	int error, flags;
5574 
5575 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5576 	    LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5577 	if (error != 0)
5578 		return (error);
5579 
5580 	flags = FFLAGS(O_WRONLY | O_CREAT);
5581 #if __FreeBSD_version < 1400043
5582 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5583 #else
5584 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5585 #endif
5586 	error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5587 	    NULL);
5588 	if (error != 0)
5589 		return (SET_ERROR(error));
5590 	vp = nd.ni_vp;
5591 	NDFREE_PNBUF(&nd);
5592 
5593 	VATTR_NULL(&va);
5594 	va.va_size = 0;
5595 	error = VOP_SETATTR(vp, &va, ap->a_cred);
5596 	if (error == 0)
5597 		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5598 
5599 	VOP_UNLOCK(vp);
5600 	vn_close(vp, flags, ap->a_cred, td);
5601 	return (error);
5602 }
5603 
5604 static int
zfs_setextattr_sa(struct vop_setextattr_args * ap,const char * attrname)5605 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5606 {
5607 	znode_t *zp = VTOZ(ap->a_vp);
5608 	nvlist_t *nvl;
5609 	size_t sa_size;
5610 	int error;
5611 
5612 	error = zfs_ensure_xattr_cached(zp);
5613 	if (error != 0)
5614 		return (error);
5615 
5616 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5617 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5618 
5619 	nvl = zp->z_xattr_cached;
5620 	size_t entry_size = ap->a_uio->uio_resid;
5621 	if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5622 		return (SET_ERROR(EFBIG));
5623 	error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5624 	if (error != 0)
5625 		return (SET_ERROR(error));
5626 	if (sa_size > DXATTR_MAX_SA_SIZE)
5627 		return (SET_ERROR(EFBIG));
5628 	uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5629 	error = uiomove(buf, entry_size, ap->a_uio);
5630 	if (error != 0) {
5631 		error = SET_ERROR(error);
5632 	} else {
5633 		error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5634 		if (error != 0)
5635 			error = SET_ERROR(error);
5636 	}
5637 	if (error == 0)
5638 		error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5639 	kmem_free(buf, entry_size);
5640 	if (error != 0) {
5641 		zp->z_xattr_cached = NULL;
5642 		nvlist_free(nvl);
5643 	}
5644 	return (error);
5645 }
5646 
5647 static int
zfs_setextattr_impl(struct vop_setextattr_args * ap,boolean_t compat)5648 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5649 {
5650 	znode_t *zp = VTOZ(ap->a_vp);
5651 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5652 	char attrname[EXTATTR_MAXNAMELEN+1];
5653 	int error;
5654 
5655 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5656 	    sizeof (attrname), compat);
5657 	if (error != 0)
5658 		return (error);
5659 
5660 	struct vop_deleteextattr_args vda = {
5661 		.a_vp = ap->a_vp,
5662 		.a_attrnamespace = ap->a_attrnamespace,
5663 		.a_name = ap->a_name,
5664 		.a_cred = ap->a_cred,
5665 		.a_td = ap->a_td,
5666 	};
5667 	error = ENOENT;
5668 	if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5669 		error = zfs_setextattr_sa(ap, attrname);
5670 		if (error == 0) {
5671 			/*
5672 			 * Successfully put into SA, we need to clear the one
5673 			 * in dir if present.
5674 			 */
5675 			zfs_deleteextattr_dir(&vda, attrname);
5676 		}
5677 	}
5678 	if (error != 0) {
5679 		error = zfs_setextattr_dir(ap, attrname);
5680 		if (error == 0 && zp->z_is_sa) {
5681 			/*
5682 			 * Successfully put into dir, we need to clear the one
5683 			 * in SA if present.
5684 			 */
5685 			zfs_deleteextattr_sa(&vda, attrname);
5686 		}
5687 	}
5688 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5689 		/*
5690 		 * Also clear all versions of the alternate compat name.
5691 		 */
5692 		zfs_deleteextattr_impl(&vda, !compat);
5693 	}
5694 	return (error);
5695 }
5696 
5697 /*
5698  * Vnode operation to set a named attribute.
5699  */
5700 static int
zfs_setextattr(struct vop_setextattr_args * ap)5701 zfs_setextattr(struct vop_setextattr_args *ap)
5702 {
5703 	znode_t *zp = VTOZ(ap->a_vp);
5704 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5705 	int error;
5706 
5707 	/*
5708 	 * If the xattr property is off, refuse the request.
5709 	 */
5710 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5711 		return (SET_ERROR(EOPNOTSUPP));
5712 
5713 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5714 	    ap->a_cred, ap->a_td, VWRITE);
5715 	if (error != 0)
5716 		return (SET_ERROR(error));
5717 
5718 	error = zfs_check_attrname(ap->a_name);
5719 	if (error != 0)
5720 		return (error);
5721 
5722 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5723 		return (error);
5724 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
5725 
5726 	error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5727 
5728 	rw_exit(&zp->z_xattr_lock);
5729 	zfs_exit(zfsvfs, FTAG);
5730 	return (error);
5731 }
5732 
5733 #ifndef _SYS_SYSPROTO_H_
5734 struct vop_listextattr {
5735 	IN struct vnode *a_vp;
5736 	IN int a_attrnamespace;
5737 	INOUT struct uio *a_uio;
5738 	OUT size_t *a_size;
5739 	IN struct ucred *a_cred;
5740 	IN struct thread *a_td;
5741 };
5742 #endif
5743 
5744 static int
zfs_listextattr_dir(struct vop_listextattr_args * ap,const char * attrprefix)5745 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5746 {
5747 	struct thread *td = ap->a_td;
5748 	struct nameidata nd;
5749 	uint8_t dirbuf[sizeof (struct dirent)];
5750 	struct iovec aiov;
5751 	struct uio auio;
5752 	vnode_t *xvp = NULL, *vp;
5753 	int error, eof;
5754 
5755 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5756 	    LOOKUP_XATTR, B_FALSE);
5757 	if (error != 0) {
5758 		/*
5759 		 * ENOATTR means that the EA directory does not yet exist,
5760 		 * i.e. there are no extended attributes there.
5761 		 */
5762 		if (error == ENOATTR)
5763 			error = 0;
5764 		return (error);
5765 	}
5766 
5767 #if __FreeBSD_version < 1400043
5768 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5769 	    UIO_SYSSPACE, ".", xvp, td);
5770 #else
5771 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5772 	    UIO_SYSSPACE, ".", xvp);
5773 #endif
5774 	error = namei(&nd);
5775 	if (error != 0)
5776 		return (SET_ERROR(error));
5777 	vp = nd.ni_vp;
5778 	NDFREE_PNBUF(&nd);
5779 
5780 	auio.uio_iov = &aiov;
5781 	auio.uio_iovcnt = 1;
5782 	auio.uio_segflg = UIO_SYSSPACE;
5783 	auio.uio_td = td;
5784 	auio.uio_rw = UIO_READ;
5785 	auio.uio_offset = 0;
5786 
5787 	size_t plen = strlen(attrprefix);
5788 
5789 	do {
5790 		aiov.iov_base = (void *)dirbuf;
5791 		aiov.iov_len = sizeof (dirbuf);
5792 		auio.uio_resid = sizeof (dirbuf);
5793 		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
5794 		if (error != 0)
5795 			break;
5796 		int done = sizeof (dirbuf) - auio.uio_resid;
5797 		for (int pos = 0; pos < done; ) {
5798 			struct dirent *dp = (struct dirent *)(dirbuf + pos);
5799 			pos += dp->d_reclen;
5800 			/*
5801 			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
5802 			 * is what we get when attribute was created on Solaris.
5803 			 */
5804 			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
5805 				continue;
5806 			else if (plen == 0 &&
5807 			    ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
5808 				continue;
5809 			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
5810 				continue;
5811 			uint8_t nlen = dp->d_namlen - plen;
5812 			if (ap->a_size != NULL) {
5813 				*ap->a_size += 1 + nlen;
5814 			} else if (ap->a_uio != NULL) {
5815 				/*
5816 				 * Format of extattr name entry is one byte for
5817 				 * length and the rest for name.
5818 				 */
5819 				error = uiomove(&nlen, 1, ap->a_uio);
5820 				if (error == 0) {
5821 					char *namep = dp->d_name + plen;
5822 					error = uiomove(namep, nlen, ap->a_uio);
5823 				}
5824 				if (error != 0) {
5825 					error = SET_ERROR(error);
5826 					break;
5827 				}
5828 			}
5829 		}
5830 	} while (!eof && error == 0);
5831 
5832 	vput(vp);
5833 	return (error);
5834 }
5835 
5836 static int
zfs_listextattr_sa(struct vop_listextattr_args * ap,const char * attrprefix)5837 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
5838 {
5839 	znode_t *zp = VTOZ(ap->a_vp);
5840 	int error;
5841 
5842 	error = zfs_ensure_xattr_cached(zp);
5843 	if (error != 0)
5844 		return (error);
5845 
5846 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5847 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
5848 
5849 	size_t plen = strlen(attrprefix);
5850 	nvpair_t *nvp = NULL;
5851 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
5852 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
5853 
5854 		const char *name = nvpair_name(nvp);
5855 		if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5856 			continue;
5857 		else if (strncmp(name, attrprefix, plen) != 0)
5858 			continue;
5859 		uint8_t nlen = strlen(name) - plen;
5860 		if (ap->a_size != NULL) {
5861 			*ap->a_size += 1 + nlen;
5862 		} else if (ap->a_uio != NULL) {
5863 			/*
5864 			 * Format of extattr name entry is one byte for
5865 			 * length and the rest for name.
5866 			 */
5867 			error = uiomove(&nlen, 1, ap->a_uio);
5868 			if (error == 0) {
5869 				char *namep = __DECONST(char *, name) + plen;
5870 				error = uiomove(namep, nlen, ap->a_uio);
5871 			}
5872 			if (error != 0) {
5873 				error = SET_ERROR(error);
5874 				break;
5875 			}
5876 		}
5877 	}
5878 
5879 	return (error);
5880 }
5881 
5882 static int
zfs_listextattr_impl(struct vop_listextattr_args * ap,boolean_t compat)5883 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
5884 {
5885 	znode_t *zp = VTOZ(ap->a_vp);
5886 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5887 	char attrprefix[16];
5888 	int error;
5889 
5890 	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
5891 	    sizeof (attrprefix), compat);
5892 	if (error != 0)
5893 		return (error);
5894 
5895 	if (zfsvfs->z_use_sa && zp->z_is_sa)
5896 		error = zfs_listextattr_sa(ap, attrprefix);
5897 	if (error == 0)
5898 		error = zfs_listextattr_dir(ap, attrprefix);
5899 	return (error);
5900 }
5901 
5902 /*
5903  * Vnode operation to retrieve extended attributes on a vnode.
5904  */
5905 static int
zfs_listextattr(struct vop_listextattr_args * ap)5906 zfs_listextattr(struct vop_listextattr_args *ap)
5907 {
5908 	znode_t *zp = VTOZ(ap->a_vp);
5909 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
5910 	int error;
5911 
5912 	if (ap->a_size != NULL)
5913 		*ap->a_size = 0;
5914 
5915 	/*
5916 	 * If the xattr property is off, refuse the request.
5917 	 */
5918 	if (!(zfsvfs->z_flags & ZSB_XATTR))
5919 		return (SET_ERROR(EOPNOTSUPP));
5920 
5921 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5922 	    ap->a_cred, ap->a_td, VREAD);
5923 	if (error != 0)
5924 		return (SET_ERROR(error));
5925 
5926 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5927 		return (error);
5928 	rw_enter(&zp->z_xattr_lock, RW_READER);
5929 
5930 	error = zfs_listextattr_impl(ap, zfs_xattr_compat);
5931 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5932 		/* Also list user xattrs with the alternate format. */
5933 		error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
5934 	}
5935 
5936 	rw_exit(&zp->z_xattr_lock);
5937 	zfs_exit(zfsvfs, FTAG);
5938 	return (error);
5939 }
5940 
5941 #ifndef _SYS_SYSPROTO_H_
5942 struct vop_getacl_args {
5943 	struct vnode *vp;
5944 	acl_type_t type;
5945 	struct acl *aclp;
5946 	struct ucred *cred;
5947 	struct thread *td;
5948 };
5949 #endif
5950 
5951 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)5952 zfs_freebsd_getacl(struct vop_getacl_args *ap)
5953 {
5954 	int		error;
5955 	vsecattr_t	vsecattr;
5956 
5957 	if (ap->a_type != ACL_TYPE_NFS4)
5958 		return (EINVAL);
5959 
5960 	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
5961 	if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
5962 	    &vsecattr, 0, ap->a_cred)))
5963 		return (error);
5964 
5965 	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
5966 	    vsecattr.vsa_aclcnt);
5967 	if (vsecattr.vsa_aclentp != NULL)
5968 		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
5969 
5970 	return (error);
5971 }
5972 
5973 #ifndef _SYS_SYSPROTO_H_
5974 struct vop_setacl_args {
5975 	struct vnode *vp;
5976 	acl_type_t type;
5977 	struct acl *aclp;
5978 	struct ucred *cred;
5979 	struct thread *td;
5980 };
5981 #endif
5982 
5983 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)5984 zfs_freebsd_setacl(struct vop_setacl_args *ap)
5985 {
5986 	int		error;
5987 	vsecattr_t vsecattr;
5988 	int		aclbsize;	/* size of acl list in bytes */
5989 	aclent_t	*aaclp;
5990 
5991 	if (ap->a_type != ACL_TYPE_NFS4)
5992 		return (EINVAL);
5993 
5994 	if (ap->a_aclp == NULL)
5995 		return (EINVAL);
5996 
5997 	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
5998 		return (EINVAL);
5999 
6000 	/*
6001 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
6002 	 * splitting every entry into two and appending "canonical six"
6003 	 * entries at the end.  Don't allow for setting an ACL that would
6004 	 * cause chmod(2) to run out of ACL entries.
6005 	 */
6006 	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
6007 		return (ENOSPC);
6008 
6009 	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
6010 	if (error != 0)
6011 		return (error);
6012 
6013 	vsecattr.vsa_mask = VSA_ACE;
6014 	aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
6015 	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
6016 	aaclp = vsecattr.vsa_aclentp;
6017 	vsecattr.vsa_aclentsz = aclbsize;
6018 
6019 	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
6020 	error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
6021 	kmem_free(aaclp, aclbsize);
6022 
6023 	return (error);
6024 }
6025 
6026 #ifndef _SYS_SYSPROTO_H_
6027 struct vop_aclcheck_args {
6028 	struct vnode *vp;
6029 	acl_type_t type;
6030 	struct acl *aclp;
6031 	struct ucred *cred;
6032 	struct thread *td;
6033 };
6034 #endif
6035 
6036 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)6037 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6038 {
6039 
6040 	return (EOPNOTSUPP);
6041 }
6042 
6043 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)6044 zfs_vptocnp(struct vop_vptocnp_args *ap)
6045 {
6046 	vnode_t *covered_vp;
6047 	vnode_t *vp = ap->a_vp;
6048 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6049 	znode_t *zp = VTOZ(vp);
6050 	int ltype;
6051 	int error;
6052 
6053 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6054 		return (error);
6055 
6056 	/*
6057 	 * If we are a snapshot mounted under .zfs, run the operation
6058 	 * on the covered vnode.
6059 	 */
6060 	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6061 		char name[MAXNAMLEN + 1];
6062 		znode_t *dzp;
6063 		size_t len;
6064 
6065 		error = zfs_znode_parent_and_name(zp, &dzp, name,
6066 		    sizeof (name));
6067 		if (error == 0) {
6068 			len = strlen(name);
6069 			if (*ap->a_buflen < len)
6070 				error = SET_ERROR(ENOMEM);
6071 		}
6072 		if (error == 0) {
6073 			*ap->a_buflen -= len;
6074 			memcpy(ap->a_buf + *ap->a_buflen, name, len);
6075 			*ap->a_vpp = ZTOV(dzp);
6076 		}
6077 		zfs_exit(zfsvfs, FTAG);
6078 		return (error);
6079 	}
6080 	zfs_exit(zfsvfs, FTAG);
6081 
6082 	covered_vp = vp->v_mount->mnt_vnodecovered;
6083 	enum vgetstate vs = vget_prep(covered_vp);
6084 	ltype = VOP_ISLOCKED(vp);
6085 	VOP_UNLOCK(vp);
6086 	error = vget_finish(covered_vp, LK_SHARED, vs);
6087 	if (error == 0) {
6088 		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6089 		    ap->a_buflen);
6090 		vput(covered_vp);
6091 	}
6092 	vn_lock(vp, ltype | LK_RETRY);
6093 	if (VN_IS_DOOMED(vp))
6094 		error = SET_ERROR(ENOENT);
6095 	return (error);
6096 }
6097 
6098 #if __FreeBSD_version >= 1400032
6099 static int
zfs_deallocate(struct vop_deallocate_args * ap)6100 zfs_deallocate(struct vop_deallocate_args *ap)
6101 {
6102 	znode_t *zp = VTOZ(ap->a_vp);
6103 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6104 	zilog_t *zilog;
6105 	off_t off, len, file_sz;
6106 	int error;
6107 
6108 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6109 		return (error);
6110 
6111 	/*
6112 	 * Callers might not be able to detect properly that we are read-only,
6113 	 * so check it explicitly here.
6114 	 */
6115 	if (zfs_is_readonly(zfsvfs)) {
6116 		zfs_exit(zfsvfs, FTAG);
6117 		return (SET_ERROR(EROFS));
6118 	}
6119 
6120 	zilog = zfsvfs->z_log;
6121 	off = *ap->a_offset;
6122 	len = *ap->a_len;
6123 	file_sz = zp->z_size;
6124 	if (off + len > file_sz)
6125 		len = file_sz - off;
6126 	/* Fast path for out-of-range request. */
6127 	if (len <= 0) {
6128 		*ap->a_len = 0;
6129 		zfs_exit(zfsvfs, FTAG);
6130 		return (0);
6131 	}
6132 
6133 	error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6134 	if (error == 0) {
6135 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6136 		    (ap->a_ioflag & IO_SYNC) != 0)
6137 			zil_commit(zilog, zp->z_id);
6138 		*ap->a_offset = off + len;
6139 		*ap->a_len = 0;
6140 	}
6141 
6142 	zfs_exit(zfsvfs, FTAG);
6143 	return (error);
6144 }
6145 #endif
6146 
6147 #ifndef _SYS_SYSPROTO_H_
6148 struct vop_copy_file_range_args {
6149 	struct vnode *a_invp;
6150 	off_t *a_inoffp;
6151 	struct vnode *a_outvp;
6152 	off_t *a_outoffp;
6153 	size_t *a_lenp;
6154 	unsigned int a_flags;
6155 	struct ucred *a_incred;
6156 	struct ucred *a_outcred;
6157 	struct thread *a_fsizetd;
6158 }
6159 #endif
6160 /*
6161  * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6162  * files resides under the same mountpoint. In case of ZFS we want to be called
6163  * even is files are in different datasets (but on the same pools, but we need
6164  * to check that ourselves).
6165  */
6166 static int
zfs_freebsd_copy_file_range(struct vop_copy_file_range_args * ap)6167 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6168 {
6169 	zfsvfs_t *outzfsvfs;
6170 	struct vnode *invp = ap->a_invp;
6171 	struct vnode *outvp = ap->a_outvp;
6172 	struct mount *mp;
6173 	int error;
6174 	uint64_t len = *ap->a_lenp;
6175 
6176 	if (!zfs_bclone_enabled) {
6177 		mp = NULL;
6178 		goto bad_write_fallback;
6179 	}
6180 
6181 	/*
6182 	 * TODO: If offset/length is not aligned to recordsize, use
6183 	 * vn_generic_copy_file_range() on this fragment.
6184 	 * It would be better to do this after we lock the vnodes, but then we
6185 	 * need something else than vn_generic_copy_file_range().
6186 	 */
6187 
6188 	vn_start_write(outvp, &mp, V_WAIT);
6189 	if (__predict_true(mp == outvp->v_mount)) {
6190 		outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6191 		if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6192 		    SPA_FEATURE_BLOCK_CLONING)) {
6193 			goto bad_write_fallback;
6194 		}
6195 	}
6196 	if (invp == outvp) {
6197 		if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6198 			goto bad_write_fallback;
6199 		}
6200 	} else {
6201 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6202 	__FreeBSD_version >= 1400086
6203 		vn_lock_pair(invp, false, LK_SHARED, outvp, false,
6204 		    LK_EXCLUSIVE);
6205 #else
6206 		vn_lock_pair(invp, false, outvp, false);
6207 #endif
6208 		if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6209 			goto bad_locked_fallback;
6210 		}
6211 	}
6212 
6213 #ifdef MAC
6214 	error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6215 	    outvp);
6216 	if (error != 0)
6217 		goto out_locked;
6218 #endif
6219 
6220 	error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6221 	    ap->a_outoffp, &len, ap->a_outcred);
6222 	if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6223 	    error == EOPNOTSUPP)
6224 		goto bad_locked_fallback;
6225 	*ap->a_lenp = (size_t)len;
6226 #ifdef MAC
6227 out_locked:
6228 #endif
6229 	if (invp != outvp)
6230 		VOP_UNLOCK(invp);
6231 	VOP_UNLOCK(outvp);
6232 	if (mp != NULL)
6233 		vn_finished_write(mp);
6234 	return (error);
6235 
6236 bad_locked_fallback:
6237 	if (invp != outvp)
6238 		VOP_UNLOCK(invp);
6239 	VOP_UNLOCK(outvp);
6240 bad_write_fallback:
6241 	if (mp != NULL)
6242 		vn_finished_write(mp);
6243 	error = ENOSYS;
6244 	return (error);
6245 }
6246 
6247 struct vop_vector zfs_vnodeops;
6248 struct vop_vector zfs_fifoops;
6249 struct vop_vector zfs_shareops;
6250 
6251 struct vop_vector zfs_vnodeops = {
6252 	.vop_default =		&default_vnodeops,
6253 	.vop_inactive =		zfs_freebsd_inactive,
6254 	.vop_need_inactive =	zfs_freebsd_need_inactive,
6255 	.vop_reclaim =		zfs_freebsd_reclaim,
6256 	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6257 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6258 	.vop_access =		zfs_freebsd_access,
6259 	.vop_allocate =		VOP_EOPNOTSUPP,
6260 #if __FreeBSD_version >= 1400032
6261 	.vop_deallocate =	zfs_deallocate,
6262 #endif
6263 	.vop_lookup =		zfs_cache_lookup,
6264 	.vop_cachedlookup =	zfs_freebsd_cachedlookup,
6265 	.vop_getattr =		zfs_freebsd_getattr,
6266 	.vop_setattr =		zfs_freebsd_setattr,
6267 	.vop_create =		zfs_freebsd_create,
6268 	.vop_mknod =		(vop_mknod_t *)zfs_freebsd_create,
6269 	.vop_mkdir =		zfs_freebsd_mkdir,
6270 	.vop_readdir =		zfs_freebsd_readdir,
6271 	.vop_fsync =		zfs_freebsd_fsync,
6272 	.vop_open =		zfs_freebsd_open,
6273 	.vop_close =		zfs_freebsd_close,
6274 	.vop_rmdir =		zfs_freebsd_rmdir,
6275 	.vop_ioctl =		zfs_freebsd_ioctl,
6276 	.vop_link =		zfs_freebsd_link,
6277 	.vop_symlink =		zfs_freebsd_symlink,
6278 	.vop_readlink =		zfs_freebsd_readlink,
6279 	.vop_read =		zfs_freebsd_read,
6280 	.vop_write =		zfs_freebsd_write,
6281 	.vop_remove =		zfs_freebsd_remove,
6282 	.vop_rename =		zfs_freebsd_rename,
6283 	.vop_pathconf =		zfs_freebsd_pathconf,
6284 	.vop_bmap =		zfs_freebsd_bmap,
6285 	.vop_fid =		zfs_freebsd_fid,
6286 	.vop_getextattr =	zfs_getextattr,
6287 	.vop_deleteextattr =	zfs_deleteextattr,
6288 	.vop_setextattr =	zfs_setextattr,
6289 	.vop_listextattr =	zfs_listextattr,
6290 	.vop_getacl =		zfs_freebsd_getacl,
6291 	.vop_setacl =		zfs_freebsd_setacl,
6292 	.vop_aclcheck =		zfs_freebsd_aclcheck,
6293 	.vop_getpages =		zfs_freebsd_getpages,
6294 	.vop_putpages =		zfs_freebsd_putpages,
6295 	.vop_vptocnp =		zfs_vptocnp,
6296 	.vop_lock1 =		vop_lock,
6297 	.vop_unlock =		vop_unlock,
6298 	.vop_islocked =		vop_islocked,
6299 #if __FreeBSD_version >= 1400043
6300 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6301 #endif
6302 	.vop_copy_file_range =	zfs_freebsd_copy_file_range,
6303 };
6304 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6305 
6306 struct vop_vector zfs_fifoops = {
6307 	.vop_default =		&fifo_specops,
6308 	.vop_fsync =		zfs_freebsd_fsync,
6309 	.vop_fplookup_vexec =	zfs_freebsd_fplookup_vexec,
6310 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6311 	.vop_access =		zfs_freebsd_access,
6312 	.vop_getattr =		zfs_freebsd_getattr,
6313 	.vop_inactive =		zfs_freebsd_inactive,
6314 	.vop_read =		VOP_PANIC,
6315 	.vop_reclaim =		zfs_freebsd_reclaim,
6316 	.vop_setattr =		zfs_freebsd_setattr,
6317 	.vop_write =		VOP_PANIC,
6318 	.vop_pathconf = 	zfs_freebsd_pathconf,
6319 	.vop_fid =		zfs_freebsd_fid,
6320 	.vop_getacl =		zfs_freebsd_getacl,
6321 	.vop_setacl =		zfs_freebsd_setacl,
6322 	.vop_aclcheck =		zfs_freebsd_aclcheck,
6323 #if __FreeBSD_version >= 1400043
6324 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6325 #endif
6326 };
6327 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6328 
6329 /*
6330  * special share hidden files vnode operations template
6331  */
6332 struct vop_vector zfs_shareops = {
6333 	.vop_default =		&default_vnodeops,
6334 	.vop_fplookup_vexec =	VOP_EAGAIN,
6335 	.vop_fplookup_symlink =	VOP_EAGAIN,
6336 	.vop_access =		zfs_freebsd_access,
6337 	.vop_inactive =		zfs_freebsd_inactive,
6338 	.vop_reclaim =		zfs_freebsd_reclaim,
6339 	.vop_fid =		zfs_freebsd_fid,
6340 	.vop_pathconf =		zfs_freebsd_pathconf,
6341 #if __FreeBSD_version >= 1400043
6342 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
6343 #endif
6344 };
6345 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6346 
6347 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6348 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
6349