1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
26 * Copyright (c) 2014 Integros [integros.com]
27 * Copyright 2017 Nexenta Systems, Inc.
28 */
29
30 /* Portions Copyright 2007 Jeremy Teo */
31 /* Portions Copyright 2010 Robert Milkowski */
32
33 #include <sys/param.h>
34 #include <sys/time.h>
35 #include <sys/systm.h>
36 #include <sys/sysmacros.h>
37 #include <sys/resource.h>
38 #include <security/mac/mac_framework.h>
39 #include <sys/vfs.h>
40 #include <sys/endian.h>
41 #include <sys/vm.h>
42 #include <sys/vnode.h>
43 #include <sys/smr.h>
44 #include <sys/dirent.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/kmem.h>
48 #include <sys/taskq.h>
49 #include <sys/uio.h>
50 #include <sys/atomic.h>
51 #include <sys/namei.h>
52 #include <sys/mman.h>
53 #include <sys/cmn_err.h>
54 #include <sys/kdb.h>
55 #include <sys/sysproto.h>
56 #include <sys/errno.h>
57 #include <sys/unistd.h>
58 #include <sys/zfs_dir.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/dmu.h>
62 #include <sys/dmu_objset.h>
63 #include <sys/spa.h>
64 #include <sys/txg.h>
65 #include <sys/dbuf.h>
66 #include <sys/zap.h>
67 #include <sys/sa.h>
68 #include <sys/policy.h>
69 #include <sys/sunddi.h>
70 #include <sys/filio.h>
71 #include <sys/sid.h>
72 #include <sys/zfs_ctldir.h>
73 #include <sys/zfs_fuid.h>
74 #include <sys/zfs_quota.h>
75 #include <sys/zfs_sa.h>
76 #include <sys/zfs_rlock.h>
77 #include <sys/zfs_project.h>
78 #include <sys/bio.h>
79 #include <sys/buf.h>
80 #include <sys/sched.h>
81 #include <sys/acl.h>
82 #include <sys/vmmeter.h>
83 #include <vm/vm_param.h>
84 #include <sys/zil.h>
85 #include <sys/zfs_vnops.h>
86 #include <sys/module.h>
87 #include <sys/sysent.h>
88 #include <sys/dmu_impl.h>
89 #include <sys/brt.h>
90 #include <sys/zfeature.h>
91
92 #include <vm/vm_object.h>
93
94 #include <sys/extattr.h>
95 #include <sys/priv.h>
96
97 #ifndef VN_OPEN_INVFS
98 #define VN_OPEN_INVFS 0x0
99 #endif
100
101 VFS_SMR_DECLARE;
102
103 #ifdef DEBUG_VFS_LOCKS
104 #define VNCHECKREF(vp) \
105 VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp, \
106 ("%s: wrong ref counts", __func__));
107 #else
108 #define VNCHECKREF(vp)
109 #endif
110
111 #if __FreeBSD_version >= 1400045
112 typedef uint64_t cookie_t;
113 #else
114 typedef ulong_t cookie_t;
115 #endif
116
117 /*
118 * Programming rules.
119 *
120 * Each vnode op performs some logical unit of work. To do this, the ZPL must
121 * properly lock its in-core state, create a DMU transaction, do the work,
122 * record this work in the intent log (ZIL), commit the DMU transaction,
123 * and wait for the intent log to commit if it is a synchronous operation.
124 * Moreover, the vnode ops must work in both normal and log replay context.
125 * The ordering of events is important to avoid deadlocks and references
126 * to freed memory. The example below illustrates the following Big Rules:
127 *
128 * (1) A check must be made in each zfs thread for a mounted file system.
129 * This is done avoiding races using zfs_enter(zfsvfs).
130 * A zfs_exit(zfsvfs) is needed before all returns. Any znodes
131 * must be checked with zfs_verify_zp(zp). Both of these macros
132 * can return EIO from the calling function.
133 *
134 * (2) VN_RELE() should always be the last thing except for zil_commit()
135 * (if necessary) and zfs_exit(). This is for 3 reasons:
136 * First, if it's the last reference, the vnode/znode
137 * can be freed, so the zp may point to freed memory. Second, the last
138 * reference will call zfs_zinactive(), which may induce a lot of work --
139 * pushing cached pages (which acquires range locks) and syncing out
140 * cached atime changes. Third, zfs_zinactive() may require a new tx,
141 * which could deadlock the system if you were already holding one.
142 * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
143 *
144 * (3) All range locks must be grabbed before calling dmu_tx_assign(),
145 * as they can span dmu_tx_assign() calls.
146 *
147 * (4) If ZPL locks are held, pass DMU_TX_NOWAIT as the second argument to
148 * dmu_tx_assign(). This is critical because we don't want to block
149 * while holding locks.
150 *
151 * If no ZPL locks are held (aside from zfs_enter()), use DMU_TX_WAIT.
152 * This reduces lock contention and CPU usage when we must wait (note
153 * that if throughput is constrained by the storage, nearly every
154 * transaction must wait).
155 *
156 * Note, in particular, that if a lock is sometimes acquired before
157 * the tx assigns, and sometimes after (e.g. z_lock), then failing
158 * to use a non-blocking assign can deadlock the system. The scenario:
159 *
160 * Thread A has grabbed a lock before calling dmu_tx_assign().
161 * Thread B is in an already-assigned tx, and blocks for this lock.
162 * Thread A calls dmu_tx_assign(DMU_TX_WAIT) and blocks in
163 * txg_wait_open() forever, because the previous txg can't quiesce
164 * until B's tx commits.
165 *
166 * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is
167 * DMU_TX_NOWAIT, then drop all locks, call dmu_tx_wait(), and try
168 * again. On subsequent calls to dmu_tx_assign(), pass
169 * DMU_TX_NOTHROTTLE in addition to DMU_TX_NOWAIT, to indicate that
170 * this operation has already called dmu_tx_wait(). This will ensure
171 * that we don't retry forever, waiting a short bit each time.
172 *
173 * (5) If the operation succeeded, generate the intent log entry for it
174 * before dropping locks. This ensures that the ordering of events
175 * in the intent log matches the order in which they actually occurred.
176 * During ZIL replay the zfs_log_* functions will update the sequence
177 * number to indicate the zil transaction has replayed.
178 *
179 * (6) At the end of each vnode op, the DMU tx must always commit,
180 * regardless of whether there were any errors.
181 *
182 * (7) After dropping all locks, invoke zil_commit(zilog, foid)
183 * to ensure that synchronous semantics are provided when necessary.
184 *
185 * In general, this is how things should be ordered in each vnode op:
186 *
187 * zfs_enter(zfsvfs); // exit if unmounted
188 * top:
189 * zfs_dirent_lookup(&dl, ...) // lock directory entry (may VN_HOLD())
190 * rw_enter(...); // grab any other locks you need
191 * tx = dmu_tx_create(...); // get DMU tx
192 * dmu_tx_hold_*(); // hold each object you might modify
193 * error = dmu_tx_assign(tx,
194 * (waited ? DMU_TX_NOTHROTTLE : 0) | DMU_TX_NOWAIT);
195 * if (error) {
196 * rw_exit(...); // drop locks
197 * zfs_dirent_unlock(dl); // unlock directory entry
198 * VN_RELE(...); // release held vnodes
199 * if (error == ERESTART) {
200 * waited = B_TRUE;
201 * dmu_tx_wait(tx);
202 * dmu_tx_abort(tx);
203 * goto top;
204 * }
205 * dmu_tx_abort(tx); // abort DMU tx
206 * zfs_exit(zfsvfs); // finished in zfs
207 * return (error); // really out of space
208 * }
209 * error = do_real_work(); // do whatever this VOP does
210 * if (error == 0)
211 * zfs_log_*(...); // on success, make ZIL entry
212 * dmu_tx_commit(tx); // commit DMU tx -- error or not
213 * rw_exit(...); // drop locks
214 * zfs_dirent_unlock(dl); // unlock directory entry
215 * VN_RELE(...); // release held vnodes
216 * zil_commit(zilog, foid); // synchronous when necessary
217 * zfs_exit(zfsvfs); // finished in zfs
218 * return (error); // done, report error
219 */
220 static int
zfs_open(vnode_t ** vpp,int flag,cred_t * cr)221 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
222 {
223 (void) cr;
224 znode_t *zp = VTOZ(*vpp);
225 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
226 int error;
227
228 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
229 return (error);
230
231 if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
232 ((flag & FAPPEND) == 0)) {
233 zfs_exit(zfsvfs, FTAG);
234 return (SET_ERROR(EPERM));
235 }
236
237 /*
238 * Keep a count of the synchronous opens in the znode. On first
239 * synchronous open we must convert all previous async transactions
240 * into sync to keep correct ordering.
241 */
242 if (flag & O_SYNC) {
243 if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
244 zil_async_to_sync(zfsvfs->z_log, zp->z_id);
245 }
246
247 zfs_exit(zfsvfs, FTAG);
248 return (0);
249 }
250
251 static int
zfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr)252 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
253 {
254 (void) offset, (void) cr;
255 znode_t *zp = VTOZ(vp);
256 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
257 int error;
258
259 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
260 return (error);
261
262 /* Decrement the synchronous opens in the znode */
263 if ((flag & O_SYNC) && (count == 1))
264 atomic_dec_32(&zp->z_sync_cnt);
265
266 zfs_exit(zfsvfs, FTAG);
267 return (0);
268 }
269
270 static int
zfs_ioctl_getxattr(vnode_t * vp,zfsxattr_t * fsx)271 zfs_ioctl_getxattr(vnode_t *vp, zfsxattr_t *fsx)
272 {
273 znode_t *zp = VTOZ(vp);
274
275 memset(fsx, 0, sizeof (*fsx));
276 fsx->fsx_xflags = (zp->z_pflags & ZFS_PROJINHERIT) ?
277 ZFS_PROJINHERIT_FL : 0;
278 fsx->fsx_projid = zp->z_projid;
279
280 return (0);
281 }
282
283 static int
zfs_ioctl_setflags(vnode_t * vp,uint32_t ioctl_flags,xvattr_t * xva)284 zfs_ioctl_setflags(vnode_t *vp, uint32_t ioctl_flags, xvattr_t *xva)
285 {
286 uint64_t zfs_flags = VTOZ(vp)->z_pflags;
287 xoptattr_t *xoap;
288
289 if (ioctl_flags & ~(ZFS_PROJINHERIT_FL))
290 return (SET_ERROR(EOPNOTSUPP));
291
292 xva_init(xva);
293 xoap = xva_getxoptattr(xva);
294
295 #define FLAG_CHANGE(iflag, zflag, xflag, xfield) do { \
296 if (((ioctl_flags & (iflag)) && !(zfs_flags & (zflag))) || \
297 ((zfs_flags & (zflag)) && !(ioctl_flags & (iflag)))) { \
298 XVA_SET_REQ(xva, (xflag)); \
299 (xfield) = ((ioctl_flags & (iflag)) != 0); \
300 } \
301 } while (0)
302
303 FLAG_CHANGE(ZFS_PROJINHERIT_FL, ZFS_PROJINHERIT, XAT_PROJINHERIT,
304 xoap->xoa_projinherit);
305
306 #undef FLAG_CHANGE
307
308 return (0);
309 }
310
311 static int
zfs_ioctl_setxattr(vnode_t * vp,zfsxattr_t * fsx,cred_t * cr)312 zfs_ioctl_setxattr(vnode_t *vp, zfsxattr_t *fsx, cred_t *cr)
313 {
314 znode_t *zp = VTOZ(vp);
315 xvattr_t xva;
316 xoptattr_t *xoap;
317 int err;
318
319 if (!zpl_is_valid_projid(fsx->fsx_projid))
320 return (SET_ERROR(EINVAL));
321
322 err = zfs_ioctl_setflags(vp, fsx->fsx_xflags, &xva);
323 if (err)
324 return (err);
325
326 xoap = xva_getxoptattr(&xva);
327 XVA_SET_REQ(&xva, XAT_PROJID);
328 xoap->xoa_projid = fsx->fsx_projid;
329
330 err = zfs_setattr(zp, (vattr_t *)&xva, 0, cr, NULL);
331
332 return (err);
333 }
334
335 static int
zfs_ioctl(vnode_t * vp,ulong_t com,intptr_t data,int flag,cred_t * cred,int * rvalp)336 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
337 int *rvalp)
338 {
339 (void) flag, (void) cred, (void) rvalp;
340 loff_t off;
341 int error;
342
343 switch (com) {
344 case _FIOFFS:
345 {
346 return (0);
347
348 /*
349 * The following two ioctls are used by bfu. Faking out,
350 * necessary to avoid bfu errors.
351 */
352 }
353 case _FIOGDIO:
354 case _FIOSDIO:
355 {
356 return (0);
357 }
358
359 case F_SEEK_DATA:
360 case F_SEEK_HOLE:
361 {
362 off = *(offset_t *)data;
363 error = vn_lock(vp, LK_SHARED);
364 if (error)
365 return (error);
366 /* offset parameter is in/out */
367 error = zfs_holey(VTOZ(vp), com, &off);
368 VOP_UNLOCK(vp);
369 if (error)
370 return (error);
371 *(offset_t *)data = off;
372 return (0);
373 }
374 case ZFS_IOC_FSGETXATTR: {
375 zfsxattr_t *fsx = (zfsxattr_t *)data;
376 error = vn_lock(vp, LK_SHARED);
377 if (error)
378 return (error);
379 error = zfs_ioctl_getxattr(vp, fsx);
380 VOP_UNLOCK(vp);
381 return (error);
382 }
383 case ZFS_IOC_FSSETXATTR: {
384 zfsxattr_t *fsx = (zfsxattr_t *)data;
385 error = vn_lock(vp, LK_EXCLUSIVE);
386 if (error)
387 return (error);
388 error = zfs_ioctl_setxattr(vp, fsx, cred);
389 VOP_UNLOCK(vp);
390 return (error);
391 }
392 case ZFS_IOC_REWRITE: {
393 zfs_rewrite_args_t *args = (zfs_rewrite_args_t *)data;
394 if ((flag & FWRITE) == 0)
395 return (SET_ERROR(EBADF));
396 error = vn_lock(vp, LK_SHARED);
397 if (error)
398 return (error);
399 error = zfs_rewrite(VTOZ(vp), args->off, args->len,
400 args->flags, args->arg);
401 VOP_UNLOCK(vp);
402 return (error);
403 }
404 }
405 return (SET_ERROR(ENOTTY));
406 }
407
408 static vm_page_t
page_busy(vnode_t * vp,int64_t start,int64_t off,int64_t nbytes)409 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
410 {
411 vm_object_t obj;
412 vm_page_t pp;
413 int64_t end;
414
415 /*
416 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
417 * aligned boundaries, if the range is not aligned. As a result a
418 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
419 * It may happen that all DEV_BSIZE subranges are marked clean and thus
420 * the whole page would be considered clean despite have some
421 * dirty data.
422 * For this reason we should shrink the range to DEV_BSIZE aligned
423 * boundaries before calling vm_page_clear_dirty.
424 */
425 end = rounddown2(off + nbytes, DEV_BSIZE);
426 off = roundup2(off, DEV_BSIZE);
427 nbytes = end - off;
428
429 obj = vp->v_object;
430 vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
431 VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
432 VM_ALLOC_IGN_SBUSY);
433 if (pp != NULL) {
434 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
435 vm_object_pip_add(obj, 1);
436 pmap_remove_write(pp);
437 if (nbytes != 0)
438 vm_page_clear_dirty(pp, off, nbytes);
439 }
440 return (pp);
441 }
442
443 static void
page_unbusy(vm_page_t pp)444 page_unbusy(vm_page_t pp)
445 {
446
447 vm_page_sunbusy(pp);
448 vm_object_pip_wakeup(pp->object);
449 }
450
451 static vm_page_t
page_hold(vnode_t * vp,int64_t start)452 page_hold(vnode_t *vp, int64_t start)
453 {
454 vm_object_t obj;
455 vm_page_t m;
456
457 obj = vp->v_object;
458 vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
459 VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
460 VM_ALLOC_NOBUSY);
461 return (m);
462 }
463
464 static void
page_unhold(vm_page_t pp)465 page_unhold(vm_page_t pp)
466 {
467 vm_page_unwire(pp, PQ_ACTIVE);
468 }
469
470 /*
471 * When a file is memory mapped, we must keep the IO data synchronized
472 * between the DMU cache and the memory mapped pages. What this means:
473 *
474 * On Write: If we find a memory mapped page, we write to *both*
475 * the page and the dmu buffer.
476 */
477 void
update_pages(znode_t * zp,int64_t start,int len,objset_t * os)478 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
479 {
480 vm_object_t obj;
481 struct sf_buf *sf;
482 vnode_t *vp = ZTOV(zp);
483 caddr_t va;
484 int off;
485
486 ASSERT3P(vp->v_mount, !=, NULL);
487 obj = vp->v_object;
488 ASSERT3P(obj, !=, NULL);
489
490 off = start & PAGEOFFSET;
491 vm_object_pip_add(obj, 1);
492 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
493 vm_page_t pp;
494 int nbytes = imin(PAGESIZE - off, len);
495
496 if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
497 va = zfs_map_page(pp, &sf);
498 (void) dmu_read(os, zp->z_id, start + off, nbytes,
499 va + off, DMU_READ_PREFETCH);
500 zfs_unmap_page(sf);
501 page_unbusy(pp);
502 }
503 len -= nbytes;
504 off = 0;
505 }
506 vm_object_pip_wakeup(obj);
507 }
508
509 /*
510 * Read with UIO_NOCOPY flag means that sendfile(2) requests
511 * ZFS to populate a range of page cache pages with data.
512 *
513 * NOTE: this function could be optimized to pre-allocate
514 * all pages in advance, drain exclusive busy on all of them,
515 * map them into contiguous KVA region and populate them
516 * in one single dmu_read() call.
517 */
518 int
mappedread_sf(znode_t * zp,int nbytes,zfs_uio_t * uio)519 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
520 {
521 vnode_t *vp = ZTOV(zp);
522 objset_t *os = zp->z_zfsvfs->z_os;
523 struct sf_buf *sf;
524 vm_object_t obj;
525 vm_page_t pp;
526 int64_t start;
527 caddr_t va;
528 int len = nbytes;
529 int error = 0;
530
531 ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
532 ASSERT3P(vp->v_mount, !=, NULL);
533 obj = vp->v_object;
534 ASSERT3P(obj, !=, NULL);
535 ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
536
537 for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
538 int bytes = MIN(PAGESIZE, len);
539
540 pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
541 VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
542 if (vm_page_none_valid(pp)) {
543 va = zfs_map_page(pp, &sf);
544 error = dmu_read(os, zp->z_id, start, bytes, va,
545 DMU_READ_PREFETCH);
546 if (bytes != PAGESIZE && error == 0)
547 memset(va + bytes, 0, PAGESIZE - bytes);
548 zfs_unmap_page(sf);
549 if (error == 0) {
550 vm_page_valid(pp);
551 vm_page_activate(pp);
552 vm_page_sunbusy(pp);
553 } else {
554 zfs_vmobject_wlock(obj);
555 if (!vm_page_wired(pp) && pp->valid == 0 &&
556 vm_page_busy_tryupgrade(pp))
557 vm_page_free(pp);
558 else {
559 vm_page_deactivate_noreuse(pp);
560 vm_page_sunbusy(pp);
561 }
562 zfs_vmobject_wunlock(obj);
563 }
564 } else {
565 ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
566 vm_page_sunbusy(pp);
567 }
568 if (error)
569 break;
570 zfs_uio_advance(uio, bytes);
571 len -= bytes;
572 }
573 return (error);
574 }
575
576 /*
577 * When a file is memory mapped, we must keep the IO data synchronized
578 * between the DMU cache and the memory mapped pages. What this means:
579 *
580 * On Read: We "read" preferentially from memory mapped pages,
581 * else we default from the dmu buffer.
582 *
583 * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
584 * the file is memory mapped.
585 */
586 int
mappedread(znode_t * zp,int nbytes,zfs_uio_t * uio)587 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
588 {
589 vnode_t *vp = ZTOV(zp);
590 vm_object_t obj;
591 int64_t start;
592 int len = nbytes;
593 int off;
594 int error = 0;
595
596 ASSERT3P(vp->v_mount, !=, NULL);
597 obj = vp->v_object;
598 ASSERT3P(obj, !=, NULL);
599
600 start = zfs_uio_offset(uio);
601 off = start & PAGEOFFSET;
602 for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
603 vm_page_t pp;
604 uint64_t bytes = MIN(PAGESIZE - off, len);
605
606 if ((pp = page_hold(vp, start))) {
607 struct sf_buf *sf;
608 caddr_t va;
609
610 va = zfs_map_page(pp, &sf);
611 error = vn_io_fault_uiomove(va + off, bytes,
612 GET_UIO_STRUCT(uio));
613 zfs_unmap_page(sf);
614 page_unhold(pp);
615 } else {
616 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
617 uio, bytes, DMU_READ_PREFETCH);
618 }
619 len -= bytes;
620 off = 0;
621 if (error)
622 break;
623 }
624 return (error);
625 }
626
627 int
zfs_write_simple(znode_t * zp,const void * data,size_t len,loff_t pos,size_t * presid)628 zfs_write_simple(znode_t *zp, const void *data, size_t len,
629 loff_t pos, size_t *presid)
630 {
631 int error = 0;
632 ssize_t resid;
633
634 error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
635 UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
636
637 if (error) {
638 return (SET_ERROR(error));
639 } else if (presid == NULL) {
640 if (resid != 0) {
641 error = SET_ERROR(EIO);
642 }
643 } else {
644 *presid = resid;
645 }
646 return (error);
647 }
648
649 void
zfs_zrele_async(znode_t * zp)650 zfs_zrele_async(znode_t *zp)
651 {
652 vnode_t *vp = ZTOV(zp);
653 objset_t *os = ITOZSB(vp)->z_os;
654
655 VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
656 }
657
658 static int
zfs_dd_callback(struct mount * mp,void * arg,int lkflags,struct vnode ** vpp)659 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
660 {
661 int error;
662
663 *vpp = arg;
664 error = vn_lock(*vpp, lkflags);
665 if (error != 0)
666 vrele(*vpp);
667 return (error);
668 }
669
670 static int
zfs_lookup_lock(vnode_t * dvp,vnode_t * vp,const char * name,int lkflags)671 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
672 {
673 znode_t *zdp = VTOZ(dvp);
674 zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
675 int error;
676 int ltype;
677
678 if (zfsvfs->z_replay == B_FALSE)
679 ASSERT_VOP_LOCKED(dvp, __func__);
680
681 if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
682 ASSERT3P(dvp, ==, vp);
683 vref(dvp);
684 ltype = lkflags & LK_TYPE_MASK;
685 if (ltype != VOP_ISLOCKED(dvp)) {
686 if (ltype == LK_EXCLUSIVE)
687 vn_lock(dvp, LK_UPGRADE | LK_RETRY);
688 else /* if (ltype == LK_SHARED) */
689 vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
690
691 /*
692 * Relock for the "." case could leave us with
693 * reclaimed vnode.
694 */
695 if (VN_IS_DOOMED(dvp)) {
696 vrele(dvp);
697 return (SET_ERROR(ENOENT));
698 }
699 }
700 return (0);
701 } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
702 /*
703 * Note that in this case, dvp is the child vnode, and we
704 * are looking up the parent vnode - exactly reverse from
705 * normal operation. Unlocking dvp requires some rather
706 * tricky unlock/relock dance to prevent mp from being freed;
707 * use vn_vget_ino_gen() which takes care of all that.
708 *
709 * XXX Note that there is a time window when both vnodes are
710 * unlocked. It is possible, although highly unlikely, that
711 * during that window the parent-child relationship between
712 * the vnodes may change, for example, get reversed.
713 * In that case we would have a wrong lock order for the vnodes.
714 * All other filesystems seem to ignore this problem, so we
715 * do the same here.
716 * A potential solution could be implemented as follows:
717 * - using LK_NOWAIT when locking the second vnode and retrying
718 * if necessary
719 * - checking that the parent-child relationship still holds
720 * after locking both vnodes and retrying if it doesn't
721 */
722 error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
723 return (error);
724 } else {
725 error = vn_lock(vp, lkflags);
726 if (error != 0)
727 vrele(vp);
728 return (error);
729 }
730 }
731
732 /*
733 * Lookup an entry in a directory, or an extended attribute directory.
734 * If it exists, return a held vnode reference for it.
735 *
736 * IN: dvp - vnode of directory to search.
737 * nm - name of entry to lookup.
738 * pnp - full pathname to lookup [UNUSED].
739 * flags - LOOKUP_XATTR set if looking for an attribute.
740 * rdir - root directory vnode [UNUSED].
741 * cr - credentials of caller.
742 * ct - caller context
743 *
744 * OUT: vpp - vnode of located entry, NULL if not found.
745 *
746 * RETURN: 0 on success, error code on failure.
747 *
748 * Timestamps:
749 * NA
750 */
751 static int
zfs_lookup(vnode_t * dvp,const char * nm,vnode_t ** vpp,struct componentname * cnp,int nameiop,cred_t * cr,int flags,boolean_t cached)752 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
753 struct componentname *cnp, int nameiop, cred_t *cr, int flags,
754 boolean_t cached)
755 {
756 znode_t *zdp = VTOZ(dvp);
757 znode_t *zp;
758 zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
759 seqc_t dvp_seqc;
760 int error = 0;
761
762 /*
763 * Fast path lookup, however we must skip DNLC lookup
764 * for case folding or normalizing lookups because the
765 * DNLC code only stores the passed in name. This means
766 * creating 'a' and removing 'A' on a case insensitive
767 * file system would work, but DNLC still thinks 'a'
768 * exists and won't let you create it again on the next
769 * pass through fast path.
770 */
771 if (!(flags & LOOKUP_XATTR)) {
772 if (dvp->v_type != VDIR) {
773 return (SET_ERROR(ENOTDIR));
774 } else if (zdp->z_sa_hdl == NULL) {
775 return (SET_ERROR(EIO));
776 }
777 }
778
779 DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
780 const char *, nm);
781
782 if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
783 return (error);
784
785 dvp_seqc = vn_seqc_read_notmodify(dvp);
786
787 *vpp = NULL;
788
789 if (flags & LOOKUP_XATTR) {
790 /*
791 * If the xattr property is off, refuse the lookup request.
792 */
793 if (!(zfsvfs->z_flags & ZSB_XATTR)) {
794 zfs_exit(zfsvfs, FTAG);
795 return (SET_ERROR(EOPNOTSUPP));
796 }
797
798 /*
799 * We don't allow recursive attributes..
800 * Maybe someday we will.
801 */
802 if (zdp->z_pflags & ZFS_XATTR) {
803 zfs_exit(zfsvfs, FTAG);
804 return (SET_ERROR(EINVAL));
805 }
806
807 if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
808 zfs_exit(zfsvfs, FTAG);
809 return (error);
810 }
811 *vpp = ZTOV(zp);
812
813 /*
814 * Do we have permission to get into attribute directory?
815 */
816 error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr, NULL);
817 if (error) {
818 vrele(ZTOV(zp));
819 }
820
821 zfs_exit(zfsvfs, FTAG);
822 return (error);
823 }
824
825 /*
826 * Check accessibility of directory if we're not coming in via
827 * VOP_CACHEDLOOKUP.
828 */
829 if (!cached) {
830 #ifdef NOEXECCHECK
831 if ((cnp->cn_flags & NOEXECCHECK) != 0) {
832 cnp->cn_flags &= ~NOEXECCHECK;
833 } else
834 #endif
835 if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
836 NULL))) {
837 zfs_exit(zfsvfs, FTAG);
838 return (error);
839 }
840 }
841
842 if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
843 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
844 zfs_exit(zfsvfs, FTAG);
845 return (SET_ERROR(EILSEQ));
846 }
847
848
849 /*
850 * First handle the special cases.
851 */
852 if ((cnp->cn_flags & ISDOTDOT) != 0) {
853 /*
854 * If we are a snapshot mounted under .zfs, return
855 * the vp for the snapshot directory.
856 */
857 if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
858 struct componentname cn;
859 vnode_t *zfsctl_vp;
860 int ltype;
861
862 zfs_exit(zfsvfs, FTAG);
863 ltype = VOP_ISLOCKED(dvp);
864 VOP_UNLOCK(dvp);
865 error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
866 &zfsctl_vp);
867 if (error == 0) {
868 cn.cn_nameptr = "snapshot";
869 cn.cn_namelen = strlen(cn.cn_nameptr);
870 cn.cn_nameiop = cnp->cn_nameiop;
871 cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
872 cn.cn_lkflags = cnp->cn_lkflags;
873 error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
874 vput(zfsctl_vp);
875 }
876 vn_lock(dvp, ltype | LK_RETRY);
877 return (error);
878 }
879 }
880 if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
881 zfs_exit(zfsvfs, FTAG);
882 if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED)
883 return (SET_ERROR(ENOENT));
884 if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
885 return (SET_ERROR(ENOTSUP));
886 error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
887 return (error);
888 }
889
890 /*
891 * The loop is retry the lookup if the parent-child relationship
892 * changes during the dot-dot locking complexities.
893 */
894 for (;;) {
895 uint64_t parent;
896
897 error = zfs_dirlook(zdp, nm, &zp);
898 if (error == 0)
899 *vpp = ZTOV(zp);
900
901 zfs_exit(zfsvfs, FTAG);
902 if (error != 0)
903 break;
904
905 error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
906 if (error != 0) {
907 /*
908 * If we've got a locking error, then the vnode
909 * got reclaimed because of a force unmount.
910 * We never enter doomed vnodes into the name cache.
911 */
912 *vpp = NULL;
913 return (error);
914 }
915
916 if ((cnp->cn_flags & ISDOTDOT) == 0)
917 break;
918
919 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) {
920 vput(ZTOV(zp));
921 *vpp = NULL;
922 return (error);
923 }
924 if (zdp->z_sa_hdl == NULL) {
925 error = SET_ERROR(EIO);
926 } else {
927 error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
928 &parent, sizeof (parent));
929 }
930 if (error != 0) {
931 zfs_exit(zfsvfs, FTAG);
932 vput(ZTOV(zp));
933 break;
934 }
935 if (zp->z_id == parent) {
936 zfs_exit(zfsvfs, FTAG);
937 break;
938 }
939 vput(ZTOV(zp));
940 }
941
942 if (error != 0)
943 *vpp = NULL;
944
945 /* Translate errors and add SAVENAME when needed. */
946 if (cnp->cn_flags & ISLASTCN) {
947 switch (nameiop) {
948 case CREATE:
949 case RENAME:
950 if (error == ENOENT) {
951 error = EJUSTRETURN;
952 #if __FreeBSD_version < 1400068
953 cnp->cn_flags |= SAVENAME;
954 #endif
955 break;
956 }
957 zfs_fallthrough;
958 case DELETE:
959 #if __FreeBSD_version < 1400068
960 if (error == 0)
961 cnp->cn_flags |= SAVENAME;
962 #endif
963 break;
964 }
965 }
966
967 if ((cnp->cn_flags & ISDOTDOT) != 0) {
968 /*
969 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
970 * handle races. In particular different callers may end up
971 * with different vnodes and will try to add conflicting
972 * entries to the namecache.
973 *
974 * While finding different result may be acceptable in face
975 * of concurrent modification, adding conflicting entries
976 * trips over an assert in the namecache.
977 *
978 * Ultimately let an entry through once everything settles.
979 */
980 if (!vn_seqc_consistent(dvp, dvp_seqc)) {
981 cnp->cn_flags &= ~MAKEENTRY;
982 }
983 }
984
985 /* Insert name into cache (as non-existent) if appropriate. */
986 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
987 error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
988 cache_enter(dvp, NULL, cnp);
989
990 /* Insert name into cache if appropriate. */
991 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
992 error == 0 && (cnp->cn_flags & MAKEENTRY)) {
993 if (!(cnp->cn_flags & ISLASTCN) ||
994 (nameiop != DELETE && nameiop != RENAME)) {
995 cache_enter(dvp, *vpp, cnp);
996 }
997 }
998
999 return (error);
1000 }
1001
1002 static inline bool
is_nametoolong(zfsvfs_t * zfsvfs,const char * name)1003 is_nametoolong(zfsvfs_t *zfsvfs, const char *name)
1004 {
1005 size_t dlen = strlen(name);
1006 return ((!zfsvfs->z_longname && dlen >= ZAP_MAXNAMELEN) ||
1007 dlen >= ZAP_MAXNAMELEN_NEW);
1008 }
1009
1010 /*
1011 * Attempt to create a new entry in a directory. If the entry
1012 * already exists, truncate the file if permissible, else return
1013 * an error. Return the vp of the created or trunc'd file.
1014 *
1015 * IN: dvp - vnode of directory to put new file entry in.
1016 * name - name of new file entry.
1017 * vap - attributes of new file.
1018 * excl - flag indicating exclusive or non-exclusive mode.
1019 * mode - mode to open file with.
1020 * cr - credentials of caller.
1021 * flag - large file flag [UNUSED].
1022 * ct - caller context
1023 * vsecp - ACL to be set
1024 * mnt_ns - Unused on FreeBSD
1025 *
1026 * OUT: vpp - vnode of created or trunc'd entry.
1027 *
1028 * RETURN: 0 on success, error code on failure.
1029 *
1030 * Timestamps:
1031 * dvp - ctime|mtime updated if new entry created
1032 * vp - ctime|mtime always, atime if new
1033 */
1034 int
zfs_create(znode_t * dzp,const char * name,vattr_t * vap,int excl,int mode,znode_t ** zpp,cred_t * cr,int flag,vsecattr_t * vsecp,zidmap_t * mnt_ns)1035 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
1036 znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1037 {
1038 (void) excl, (void) mode, (void) flag;
1039 znode_t *zp;
1040 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1041 zilog_t *zilog;
1042 objset_t *os;
1043 dmu_tx_t *tx;
1044 int error;
1045 uid_t uid = crgetuid(cr);
1046 gid_t gid = crgetgid(cr);
1047 uint64_t projid = ZFS_DEFAULT_PROJID;
1048 zfs_acl_ids_t acl_ids;
1049 boolean_t fuid_dirtied;
1050 uint64_t txtype;
1051 #ifdef DEBUG_VFS_LOCKS
1052 vnode_t *dvp = ZTOV(dzp);
1053 #endif
1054
1055 if (is_nametoolong(zfsvfs, name))
1056 return (SET_ERROR(ENAMETOOLONG));
1057
1058 /*
1059 * If we have an ephemeral id, ACL, or XVATTR then
1060 * make sure file system is at proper version
1061 */
1062 if (zfsvfs->z_use_fuids == B_FALSE &&
1063 (vsecp || (vap->va_mask & AT_XVATTR) ||
1064 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1065 return (SET_ERROR(EINVAL));
1066
1067 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1068 return (error);
1069 os = zfsvfs->z_os;
1070 zilog = zfsvfs->z_log;
1071
1072 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1073 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1074 zfs_exit(zfsvfs, FTAG);
1075 return (SET_ERROR(EILSEQ));
1076 }
1077
1078 if (vap->va_mask & AT_XVATTR) {
1079 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1080 crgetuid(cr), cr, vap->va_type)) != 0) {
1081 zfs_exit(zfsvfs, FTAG);
1082 return (error);
1083 }
1084 }
1085
1086 *zpp = NULL;
1087
1088 if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
1089 vap->va_mode &= ~S_ISVTX;
1090
1091 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
1092 if (error) {
1093 zfs_exit(zfsvfs, FTAG);
1094 return (error);
1095 }
1096 ASSERT3P(zp, ==, NULL);
1097
1098 /*
1099 * Create a new file object and update the directory
1100 * to reference it.
1101 */
1102 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
1103 goto out;
1104 }
1105
1106 /*
1107 * We only support the creation of regular files in
1108 * extended attribute directories.
1109 */
1110
1111 if ((dzp->z_pflags & ZFS_XATTR) &&
1112 (vap->va_type != VREG)) {
1113 error = SET_ERROR(EINVAL);
1114 goto out;
1115 }
1116
1117 if ((error = zfs_acl_ids_create(dzp, 0, vap,
1118 cr, vsecp, &acl_ids, NULL)) != 0)
1119 goto out;
1120
1121 if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
1122 projid = zfs_inherit_projid(dzp);
1123 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
1124 zfs_acl_ids_free(&acl_ids);
1125 error = SET_ERROR(EDQUOT);
1126 goto out;
1127 }
1128
1129 getnewvnode_reserve();
1130
1131 tx = dmu_tx_create(os);
1132
1133 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1134 ZFS_SA_BASE_ATTR_SIZE);
1135
1136 fuid_dirtied = zfsvfs->z_fuid_dirty;
1137 if (fuid_dirtied)
1138 zfs_fuid_txhold(zfsvfs, tx);
1139 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1140 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1141 if (!zfsvfs->z_use_sa &&
1142 acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1143 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1144 0, acl_ids.z_aclp->z_acl_bytes);
1145 }
1146 error = dmu_tx_assign(tx, DMU_TX_WAIT);
1147 if (error) {
1148 zfs_acl_ids_free(&acl_ids);
1149 dmu_tx_abort(tx);
1150 getnewvnode_drop_reserve();
1151 zfs_exit(zfsvfs, FTAG);
1152 return (error);
1153 }
1154 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1155
1156 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
1157 if (error != 0) {
1158 /*
1159 * Since, we failed to add the directory entry for it,
1160 * delete the newly created dnode.
1161 */
1162 zfs_znode_delete(zp, tx);
1163 VOP_UNLOCK(ZTOV(zp));
1164 zrele(zp);
1165 zfs_acl_ids_free(&acl_ids);
1166 dmu_tx_commit(tx);
1167 getnewvnode_drop_reserve();
1168 goto out;
1169 }
1170
1171 if (fuid_dirtied)
1172 zfs_fuid_sync(zfsvfs, tx);
1173
1174 txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1175 zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1176 vsecp, acl_ids.z_fuidp, vap);
1177 zfs_acl_ids_free(&acl_ids);
1178 dmu_tx_commit(tx);
1179
1180 getnewvnode_drop_reserve();
1181
1182 out:
1183 VNCHECKREF(dvp);
1184 if (error == 0) {
1185 *zpp = zp;
1186 }
1187
1188 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1189 zil_commit(zilog, 0);
1190
1191 zfs_exit(zfsvfs, FTAG);
1192 return (error);
1193 }
1194
1195 /*
1196 * Remove an entry from a directory.
1197 *
1198 * IN: dvp - vnode of directory to remove entry from.
1199 * name - name of entry to remove.
1200 * cr - credentials of caller.
1201 * ct - caller context
1202 * flags - case flags
1203 *
1204 * RETURN: 0 on success, error code on failure.
1205 *
1206 * Timestamps:
1207 * dvp - ctime|mtime
1208 * vp - ctime (if nlink > 0)
1209 */
1210 static int
zfs_remove_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1211 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1212 {
1213 znode_t *dzp = VTOZ(dvp);
1214 znode_t *zp;
1215 znode_t *xzp;
1216 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1217 zilog_t *zilog;
1218 uint64_t xattr_obj;
1219 uint64_t obj = 0;
1220 dmu_tx_t *tx;
1221 boolean_t unlinked;
1222 uint64_t txtype;
1223 int error;
1224
1225
1226 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1227 return (error);
1228 zp = VTOZ(vp);
1229 if ((error = zfs_verify_zp(zp)) != 0) {
1230 zfs_exit(zfsvfs, FTAG);
1231 return (error);
1232 }
1233 zilog = zfsvfs->z_log;
1234
1235 xattr_obj = 0;
1236 xzp = NULL;
1237
1238 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1239 goto out;
1240 }
1241
1242 /*
1243 * Need to use rmdir for removing directories.
1244 */
1245 if (vp->v_type == VDIR) {
1246 error = SET_ERROR(EPERM);
1247 goto out;
1248 }
1249
1250 vnevent_remove(vp, dvp, name, ct);
1251
1252 obj = zp->z_id;
1253
1254 /* are there any extended attributes? */
1255 error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1256 &xattr_obj, sizeof (xattr_obj));
1257 if (error == 0 && xattr_obj) {
1258 error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1259 ASSERT0(error);
1260 }
1261
1262 /*
1263 * We may delete the znode now, or we may put it in the unlinked set;
1264 * it depends on whether we're the last link, and on whether there are
1265 * other holds on the vnode. So we dmu_tx_hold() the right things to
1266 * allow for either case.
1267 */
1268 tx = dmu_tx_create(zfsvfs->z_os);
1269 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1270 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1271 zfs_sa_upgrade_txholds(tx, zp);
1272 zfs_sa_upgrade_txholds(tx, dzp);
1273
1274 if (xzp) {
1275 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1276 dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1277 }
1278
1279 /* charge as an update -- would be nice not to charge at all */
1280 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1281
1282 /*
1283 * Mark this transaction as typically resulting in a net free of space
1284 */
1285 dmu_tx_mark_netfree(tx);
1286
1287 error = dmu_tx_assign(tx, DMU_TX_WAIT);
1288 if (error) {
1289 dmu_tx_abort(tx);
1290 zfs_exit(zfsvfs, FTAG);
1291 return (error);
1292 }
1293
1294 /*
1295 * Remove the directory entry.
1296 */
1297 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
1298
1299 if (error) {
1300 dmu_tx_commit(tx);
1301 goto out;
1302 }
1303
1304 if (unlinked) {
1305 zfs_unlinked_add(zp, tx);
1306 vp->v_vflag |= VV_NOSYNC;
1307 }
1308 /* XXX check changes to linux vnops */
1309 txtype = TX_REMOVE;
1310 zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
1311
1312 dmu_tx_commit(tx);
1313 out:
1314
1315 if (xzp)
1316 vrele(ZTOV(xzp));
1317
1318 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1319 zil_commit(zilog, 0);
1320
1321
1322 zfs_exit(zfsvfs, FTAG);
1323 return (error);
1324 }
1325
1326
1327 static int
zfs_lookup_internal(znode_t * dzp,const char * name,vnode_t ** vpp,struct componentname * cnp,int nameiop)1328 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
1329 struct componentname *cnp, int nameiop)
1330 {
1331 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1332 int error;
1333
1334 cnp->cn_nameptr = __DECONST(char *, name);
1335 cnp->cn_namelen = strlen(name);
1336 cnp->cn_nameiop = nameiop;
1337 cnp->cn_flags = ISLASTCN;
1338 #if __FreeBSD_version < 1400068
1339 cnp->cn_flags |= SAVENAME;
1340 #endif
1341 cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
1342 cnp->cn_cred = kcred;
1343 #if __FreeBSD_version < 1400037
1344 cnp->cn_thread = curthread;
1345 #endif
1346
1347 if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
1348 struct vop_lookup_args a;
1349
1350 a.a_gen.a_desc = &vop_lookup_desc;
1351 a.a_dvp = ZTOV(dzp);
1352 a.a_vpp = vpp;
1353 a.a_cnp = cnp;
1354 error = vfs_cache_lookup(&a);
1355 } else {
1356 error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
1357 B_FALSE);
1358 }
1359 #ifdef ZFS_DEBUG
1360 if (error) {
1361 printf("got error %d on name %s on op %d\n", error, name,
1362 nameiop);
1363 kdb_backtrace();
1364 }
1365 #endif
1366 return (error);
1367 }
1368
1369 int
zfs_remove(znode_t * dzp,const char * name,cred_t * cr,int flags)1370 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
1371 {
1372 vnode_t *vp;
1373 int error;
1374 struct componentname cn;
1375
1376 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1377 return (error);
1378
1379 error = zfs_remove_(ZTOV(dzp), vp, name, cr);
1380 vput(vp);
1381 return (error);
1382 }
1383 /*
1384 * Create a new directory and insert it into dvp using the name
1385 * provided. Return a pointer to the inserted directory.
1386 *
1387 * IN: dvp - vnode of directory to add subdir to.
1388 * dirname - name of new directory.
1389 * vap - attributes of new directory.
1390 * cr - credentials of caller.
1391 * ct - caller context
1392 * flags - case flags
1393 * vsecp - ACL to be set
1394 * mnt_ns - Unused on FreeBSD
1395 *
1396 * OUT: vpp - vnode of created directory.
1397 *
1398 * RETURN: 0 on success, error code on failure.
1399 *
1400 * Timestamps:
1401 * dvp - ctime|mtime updated
1402 * vp - ctime|mtime|atime updated
1403 */
1404 int
zfs_mkdir(znode_t * dzp,const char * dirname,vattr_t * vap,znode_t ** zpp,cred_t * cr,int flags,vsecattr_t * vsecp,zidmap_t * mnt_ns)1405 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
1406 cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
1407 {
1408 (void) flags, (void) vsecp;
1409 znode_t *zp;
1410 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1411 zilog_t *zilog;
1412 uint64_t txtype;
1413 dmu_tx_t *tx;
1414 int error;
1415 uid_t uid = crgetuid(cr);
1416 gid_t gid = crgetgid(cr);
1417 zfs_acl_ids_t acl_ids;
1418 boolean_t fuid_dirtied;
1419
1420 ASSERT3U(vap->va_type, ==, VDIR);
1421
1422 if (is_nametoolong(zfsvfs, dirname))
1423 return (SET_ERROR(ENAMETOOLONG));
1424
1425 /*
1426 * If we have an ephemeral id, ACL, or XVATTR then
1427 * make sure file system is at proper version
1428 */
1429 if (zfsvfs->z_use_fuids == B_FALSE &&
1430 ((vap->va_mask & AT_XVATTR) ||
1431 IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1432 return (SET_ERROR(EINVAL));
1433
1434 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1435 return (error);
1436 zilog = zfsvfs->z_log;
1437
1438 if (dzp->z_pflags & ZFS_XATTR) {
1439 zfs_exit(zfsvfs, FTAG);
1440 return (SET_ERROR(EINVAL));
1441 }
1442
1443 if (zfsvfs->z_utf8 && u8_validate(dirname,
1444 strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1445 zfs_exit(zfsvfs, FTAG);
1446 return (SET_ERROR(EILSEQ));
1447 }
1448
1449 if (vap->va_mask & AT_XVATTR) {
1450 if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
1451 crgetuid(cr), cr, vap->va_type)) != 0) {
1452 zfs_exit(zfsvfs, FTAG);
1453 return (error);
1454 }
1455 }
1456
1457 if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1458 NULL, &acl_ids, NULL)) != 0) {
1459 zfs_exit(zfsvfs, FTAG);
1460 return (error);
1461 }
1462
1463 /*
1464 * First make sure the new directory doesn't exist.
1465 *
1466 * Existence is checked first to make sure we don't return
1467 * EACCES instead of EEXIST which can cause some applications
1468 * to fail.
1469 */
1470 *zpp = NULL;
1471
1472 if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
1473 zfs_acl_ids_free(&acl_ids);
1474 zfs_exit(zfsvfs, FTAG);
1475 return (error);
1476 }
1477 ASSERT3P(zp, ==, NULL);
1478
1479 if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
1480 mnt_ns))) {
1481 zfs_acl_ids_free(&acl_ids);
1482 zfs_exit(zfsvfs, FTAG);
1483 return (error);
1484 }
1485
1486 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
1487 zfs_acl_ids_free(&acl_ids);
1488 zfs_exit(zfsvfs, FTAG);
1489 return (SET_ERROR(EDQUOT));
1490 }
1491
1492 /*
1493 * Add a new entry to the directory.
1494 */
1495 getnewvnode_reserve();
1496 tx = dmu_tx_create(zfsvfs->z_os);
1497 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1498 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1499 fuid_dirtied = zfsvfs->z_fuid_dirty;
1500 if (fuid_dirtied)
1501 zfs_fuid_txhold(zfsvfs, tx);
1502 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1503 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1504 acl_ids.z_aclp->z_acl_bytes);
1505 }
1506
1507 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1508 ZFS_SA_BASE_ATTR_SIZE);
1509
1510 error = dmu_tx_assign(tx, DMU_TX_WAIT);
1511 if (error) {
1512 zfs_acl_ids_free(&acl_ids);
1513 dmu_tx_abort(tx);
1514 getnewvnode_drop_reserve();
1515 zfs_exit(zfsvfs, FTAG);
1516 return (error);
1517 }
1518
1519 /*
1520 * Create new node.
1521 */
1522 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1523
1524 /*
1525 * Now put new name in parent dir.
1526 */
1527 error = zfs_link_create(dzp, dirname, zp, tx, ZNEW);
1528 if (error != 0) {
1529 zfs_znode_delete(zp, tx);
1530 VOP_UNLOCK(ZTOV(zp));
1531 zrele(zp);
1532 goto out;
1533 }
1534
1535 if (fuid_dirtied)
1536 zfs_fuid_sync(zfsvfs, tx);
1537
1538 *zpp = zp;
1539
1540 txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
1541 zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
1542 acl_ids.z_fuidp, vap);
1543
1544 out:
1545 zfs_acl_ids_free(&acl_ids);
1546
1547 dmu_tx_commit(tx);
1548
1549 getnewvnode_drop_reserve();
1550
1551 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1552 zil_commit(zilog, 0);
1553
1554 zfs_exit(zfsvfs, FTAG);
1555 return (error);
1556 }
1557
1558 /*
1559 * Remove a directory subdir entry. If the current working
1560 * directory is the same as the subdir to be removed, the
1561 * remove will fail.
1562 *
1563 * IN: dvp - vnode of directory to remove from.
1564 * name - name of directory to be removed.
1565 * cwd - vnode of current working directory.
1566 * cr - credentials of caller.
1567 * ct - caller context
1568 * flags - case flags
1569 *
1570 * RETURN: 0 on success, error code on failure.
1571 *
1572 * Timestamps:
1573 * dvp - ctime|mtime updated
1574 */
1575 static int
zfs_rmdir_(vnode_t * dvp,vnode_t * vp,const char * name,cred_t * cr)1576 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
1577 {
1578 znode_t *dzp = VTOZ(dvp);
1579 znode_t *zp = VTOZ(vp);
1580 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
1581 zilog_t *zilog;
1582 dmu_tx_t *tx;
1583 int error;
1584
1585 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
1586 return (error);
1587 if ((error = zfs_verify_zp(zp)) != 0) {
1588 zfs_exit(zfsvfs, FTAG);
1589 return (error);
1590 }
1591 zilog = zfsvfs->z_log;
1592
1593
1594 if ((error = zfs_zaccess_delete(dzp, zp, cr, NULL))) {
1595 goto out;
1596 }
1597
1598 if (vp->v_type != VDIR) {
1599 error = SET_ERROR(ENOTDIR);
1600 goto out;
1601 }
1602
1603 vnevent_rmdir(vp, dvp, name, ct);
1604
1605 tx = dmu_tx_create(zfsvfs->z_os);
1606 dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1607 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1608 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1609 zfs_sa_upgrade_txholds(tx, zp);
1610 zfs_sa_upgrade_txholds(tx, dzp);
1611 dmu_tx_mark_netfree(tx);
1612 error = dmu_tx_assign(tx, DMU_TX_WAIT);
1613 if (error) {
1614 dmu_tx_abort(tx);
1615 zfs_exit(zfsvfs, FTAG);
1616 return (error);
1617 }
1618
1619 error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
1620
1621 if (error == 0) {
1622 uint64_t txtype = TX_RMDIR;
1623 zfs_log_remove(zilog, tx, txtype, dzp, name,
1624 ZFS_NO_OBJECT, B_FALSE);
1625 }
1626
1627 dmu_tx_commit(tx);
1628
1629 if (zfsvfs->z_use_namecache)
1630 cache_vop_rmdir(dvp, vp);
1631 out:
1632 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1633 zil_commit(zilog, 0);
1634
1635 zfs_exit(zfsvfs, FTAG);
1636 return (error);
1637 }
1638
1639 int
zfs_rmdir(znode_t * dzp,const char * name,znode_t * cwd,cred_t * cr,int flags)1640 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
1641 {
1642 struct componentname cn;
1643 vnode_t *vp;
1644 int error;
1645
1646 if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
1647 return (error);
1648
1649 error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
1650 vput(vp);
1651 return (error);
1652 }
1653
1654 /*
1655 * Read as many directory entries as will fit into the provided
1656 * buffer from the given directory cursor position (specified in
1657 * the uio structure).
1658 *
1659 * IN: vp - vnode of directory to read.
1660 * uio - structure supplying read location, range info,
1661 * and return buffer.
1662 * cr - credentials of caller.
1663 * ct - caller context
1664 *
1665 * OUT: uio - updated offset and range, buffer filled.
1666 * eofp - set to true if end-of-file detected.
1667 * ncookies- number of entries in cookies
1668 * cookies - offsets to directory entries
1669 *
1670 * RETURN: 0 on success, error code on failure.
1671 *
1672 * Timestamps:
1673 * vp - atime updated
1674 *
1675 * Note that the low 4 bits of the cookie returned by zap is always zero.
1676 * This allows us to use the low range for "special" directory entries:
1677 * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
1678 * we use the offset 2 for the '.zfs' directory.
1679 */
1680 static int
zfs_readdir(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,int * eofp,int * ncookies,cookie_t ** cookies)1681 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
1682 int *ncookies, cookie_t **cookies)
1683 {
1684 znode_t *zp = VTOZ(vp);
1685 iovec_t *iovp;
1686 dirent64_t *odp;
1687 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1688 objset_t *os;
1689 caddr_t outbuf;
1690 size_t bufsize;
1691 zap_cursor_t zc;
1692 zap_attribute_t *zap;
1693 uint_t bytes_wanted;
1694 uint64_t offset; /* must be unsigned; checks for < 1 */
1695 uint64_t parent;
1696 int local_eof;
1697 int outcount;
1698 int error;
1699 uint8_t prefetch;
1700 uint8_t type;
1701 int ncooks;
1702 cookie_t *cooks = NULL;
1703
1704 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1705 return (error);
1706
1707 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
1708 &parent, sizeof (parent))) != 0) {
1709 zfs_exit(zfsvfs, FTAG);
1710 return (error);
1711 }
1712
1713 /*
1714 * If we are not given an eof variable,
1715 * use a local one.
1716 */
1717 if (eofp == NULL)
1718 eofp = &local_eof;
1719
1720 /*
1721 * Check for valid iov_len.
1722 */
1723 if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
1724 zfs_exit(zfsvfs, FTAG);
1725 return (SET_ERROR(EINVAL));
1726 }
1727
1728 /*
1729 * Quit if directory has been removed (posix)
1730 */
1731 if ((*eofp = zp->z_unlinked) != 0) {
1732 zfs_exit(zfsvfs, FTAG);
1733 return (0);
1734 }
1735
1736 error = 0;
1737 os = zfsvfs->z_os;
1738 offset = zfs_uio_offset(uio);
1739 prefetch = zp->z_zn_prefetch;
1740 zap = zap_attribute_long_alloc();
1741
1742 /*
1743 * Initialize the iterator cursor.
1744 */
1745 if (offset <= 3) {
1746 /*
1747 * Start iteration from the beginning of the directory.
1748 */
1749 zap_cursor_init(&zc, os, zp->z_id);
1750 } else {
1751 /*
1752 * The offset is a serialized cursor.
1753 */
1754 zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
1755 }
1756
1757 /*
1758 * Get space to change directory entries into fs independent format.
1759 */
1760 iovp = GET_UIO_STRUCT(uio)->uio_iov;
1761 bytes_wanted = iovp->iov_len;
1762 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
1763 bufsize = bytes_wanted;
1764 outbuf = kmem_alloc(bufsize, KM_SLEEP);
1765 odp = (struct dirent64 *)outbuf;
1766 } else {
1767 bufsize = bytes_wanted;
1768 outbuf = NULL;
1769 odp = (struct dirent64 *)iovp->iov_base;
1770 }
1771
1772 if (ncookies != NULL) {
1773 /*
1774 * Minimum entry size is dirent size and 1 byte for a file name.
1775 */
1776 ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
1777 sizeof (((struct dirent *)NULL)->d_name) + 1);
1778 cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
1779 *cookies = cooks;
1780 *ncookies = ncooks;
1781 }
1782
1783 /*
1784 * Transform to file-system independent format
1785 */
1786 outcount = 0;
1787 while (outcount < bytes_wanted) {
1788 ino64_t objnum;
1789 ushort_t reclen;
1790 off64_t *next = NULL;
1791
1792 /*
1793 * Special case `.', `..', and `.zfs'.
1794 */
1795 if (offset == 0) {
1796 (void) strcpy(zap->za_name, ".");
1797 zap->za_normalization_conflict = 0;
1798 objnum = zp->z_id;
1799 type = DT_DIR;
1800 } else if (offset == 1) {
1801 (void) strcpy(zap->za_name, "..");
1802 zap->za_normalization_conflict = 0;
1803 objnum = parent;
1804 type = DT_DIR;
1805 } else if (offset == 2 && zfs_show_ctldir(zp)) {
1806 (void) strcpy(zap->za_name, ZFS_CTLDIR_NAME);
1807 zap->za_normalization_conflict = 0;
1808 objnum = ZFSCTL_INO_ROOT;
1809 type = DT_DIR;
1810 } else {
1811 /*
1812 * Grab next entry.
1813 */
1814 if ((error = zap_cursor_retrieve(&zc, zap))) {
1815 if ((*eofp = (error == ENOENT)) != 0)
1816 break;
1817 else
1818 goto update;
1819 }
1820
1821 if (zap->za_integer_length != 8 ||
1822 zap->za_num_integers != 1) {
1823 cmn_err(CE_WARN, "zap_readdir: bad directory "
1824 "entry, obj = %lld, offset = %lld\n",
1825 (u_longlong_t)zp->z_id,
1826 (u_longlong_t)offset);
1827 error = SET_ERROR(ENXIO);
1828 goto update;
1829 }
1830
1831 objnum = ZFS_DIRENT_OBJ(zap->za_first_integer);
1832 /*
1833 * MacOS X can extract the object type here such as:
1834 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
1835 */
1836 type = ZFS_DIRENT_TYPE(zap->za_first_integer);
1837 }
1838
1839 reclen = DIRENT64_RECLEN(strlen(zap->za_name));
1840
1841 /*
1842 * Will this entry fit in the buffer?
1843 */
1844 if (outcount + reclen > bufsize) {
1845 /*
1846 * Did we manage to fit anything in the buffer?
1847 */
1848 if (!outcount) {
1849 error = SET_ERROR(EINVAL);
1850 goto update;
1851 }
1852 break;
1853 }
1854 /*
1855 * Add normal entry:
1856 */
1857 odp->d_ino = objnum;
1858 odp->d_reclen = reclen;
1859 odp->d_namlen = strlen(zap->za_name);
1860 /* NOTE: d_off is the offset for the *next* entry. */
1861 next = &odp->d_off;
1862 strlcpy(odp->d_name, zap->za_name, odp->d_namlen + 1);
1863 odp->d_type = type;
1864 dirent_terminate(odp);
1865 odp = (dirent64_t *)((intptr_t)odp + reclen);
1866
1867 outcount += reclen;
1868
1869 ASSERT3S(outcount, <=, bufsize);
1870
1871 if (prefetch)
1872 dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
1873
1874 /*
1875 * Move to the next entry, fill in the previous offset.
1876 */
1877 if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
1878 zap_cursor_advance(&zc);
1879 offset = zap_cursor_serialize(&zc);
1880 } else {
1881 offset += 1;
1882 }
1883
1884 /* Fill the offset right after advancing the cursor. */
1885 if (next != NULL)
1886 *next = offset;
1887 if (cooks != NULL) {
1888 *cooks++ = offset;
1889 ncooks--;
1890 KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
1891 }
1892 }
1893 zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
1894
1895 /* Subtract unused cookies */
1896 if (ncookies != NULL)
1897 *ncookies -= ncooks;
1898
1899 if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
1900 iovp->iov_base += outcount;
1901 iovp->iov_len -= outcount;
1902 zfs_uio_resid(uio) -= outcount;
1903 } else if ((error =
1904 zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
1905 /*
1906 * Reset the pointer.
1907 */
1908 offset = zfs_uio_offset(uio);
1909 }
1910
1911 update:
1912 zap_cursor_fini(&zc);
1913 zap_attribute_free(zap);
1914 if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
1915 kmem_free(outbuf, bufsize);
1916
1917 if (error == ENOENT)
1918 error = 0;
1919
1920 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
1921
1922 zfs_uio_setoffset(uio, offset);
1923 zfs_exit(zfsvfs, FTAG);
1924 if (error != 0 && cookies != NULL) {
1925 free(*cookies, M_TEMP);
1926 *cookies = NULL;
1927 *ncookies = 0;
1928 }
1929 return (error);
1930 }
1931
1932 /*
1933 * Get the requested file attributes and place them in the provided
1934 * vattr structure.
1935 *
1936 * IN: vp - vnode of file.
1937 * vap - va_mask identifies requested attributes.
1938 * If AT_XVATTR set, then optional attrs are requested
1939 * flags - ATTR_NOACLCHECK (CIFS server context)
1940 * cr - credentials of caller.
1941 *
1942 * OUT: vap - attribute values.
1943 *
1944 * RETURN: 0 (always succeeds).
1945 */
1946 static int
zfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr)1947 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1948 {
1949 znode_t *zp = VTOZ(vp);
1950 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1951 int error = 0;
1952 uint32_t blksize;
1953 u_longlong_t nblocks;
1954 uint64_t mtime[2], ctime[2], crtime[2], rdev;
1955 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
1956 xoptattr_t *xoap = NULL;
1957 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
1958 sa_bulk_attr_t bulk[4];
1959 int count = 0;
1960
1961 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
1962 return (error);
1963
1964 zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
1965
1966 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1967 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1968 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
1969 if (vp->v_type == VBLK || vp->v_type == VCHR)
1970 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
1971 &rdev, 8);
1972
1973 if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
1974 zfs_exit(zfsvfs, FTAG);
1975 return (error);
1976 }
1977
1978 /*
1979 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
1980 * Also, if we are the owner don't bother, since owner should
1981 * always be allowed to read basic attributes of file.
1982 */
1983 if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
1984 (vap->va_uid != crgetuid(cr))) {
1985 if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
1986 skipaclchk, cr, NULL))) {
1987 zfs_exit(zfsvfs, FTAG);
1988 return (error);
1989 }
1990 }
1991
1992 /*
1993 * Return all attributes. It's cheaper to provide the answer
1994 * than to determine whether we were asked the question.
1995 */
1996
1997 vap->va_type = IFTOVT(zp->z_mode);
1998 vap->va_mode = zp->z_mode & ~S_IFMT;
1999 vn_fsid(vp, vap);
2000 vap->va_nodeid = zp->z_id;
2001 vap->va_nlink = zp->z_links;
2002 if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
2003 zp->z_links < ZFS_LINK_MAX)
2004 vap->va_nlink++;
2005 vap->va_size = zp->z_size;
2006 if (vp->v_type == VBLK || vp->v_type == VCHR)
2007 vap->va_rdev = zfs_cmpldev(rdev);
2008 else
2009 vap->va_rdev = 0;
2010 vap->va_gen = zp->z_gen;
2011 vap->va_flags = 0; /* FreeBSD: Reset chflags(2) flags. */
2012 vap->va_filerev = zp->z_seq;
2013
2014 /*
2015 * Add in any requested optional attributes and the create time.
2016 * Also set the corresponding bits in the returned attribute bitmap.
2017 */
2018 if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2019 if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2020 xoap->xoa_archive =
2021 ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2022 XVA_SET_RTN(xvap, XAT_ARCHIVE);
2023 }
2024
2025 if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2026 xoap->xoa_readonly =
2027 ((zp->z_pflags & ZFS_READONLY) != 0);
2028 XVA_SET_RTN(xvap, XAT_READONLY);
2029 }
2030
2031 if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2032 xoap->xoa_system =
2033 ((zp->z_pflags & ZFS_SYSTEM) != 0);
2034 XVA_SET_RTN(xvap, XAT_SYSTEM);
2035 }
2036
2037 if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2038 xoap->xoa_hidden =
2039 ((zp->z_pflags & ZFS_HIDDEN) != 0);
2040 XVA_SET_RTN(xvap, XAT_HIDDEN);
2041 }
2042
2043 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2044 xoap->xoa_nounlink =
2045 ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2046 XVA_SET_RTN(xvap, XAT_NOUNLINK);
2047 }
2048
2049 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2050 xoap->xoa_immutable =
2051 ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2052 XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2053 }
2054
2055 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2056 xoap->xoa_appendonly =
2057 ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2058 XVA_SET_RTN(xvap, XAT_APPENDONLY);
2059 }
2060
2061 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2062 xoap->xoa_nodump =
2063 ((zp->z_pflags & ZFS_NODUMP) != 0);
2064 XVA_SET_RTN(xvap, XAT_NODUMP);
2065 }
2066
2067 if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2068 xoap->xoa_opaque =
2069 ((zp->z_pflags & ZFS_OPAQUE) != 0);
2070 XVA_SET_RTN(xvap, XAT_OPAQUE);
2071 }
2072
2073 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2074 xoap->xoa_av_quarantined =
2075 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2076 XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2077 }
2078
2079 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2080 xoap->xoa_av_modified =
2081 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2082 XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2083 }
2084
2085 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2086 vp->v_type == VREG) {
2087 zfs_sa_get_scanstamp(zp, xvap);
2088 }
2089
2090 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2091 xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2092 XVA_SET_RTN(xvap, XAT_REPARSE);
2093 }
2094 if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2095 xoap->xoa_generation = zp->z_gen;
2096 XVA_SET_RTN(xvap, XAT_GEN);
2097 }
2098
2099 if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2100 xoap->xoa_offline =
2101 ((zp->z_pflags & ZFS_OFFLINE) != 0);
2102 XVA_SET_RTN(xvap, XAT_OFFLINE);
2103 }
2104
2105 if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2106 xoap->xoa_sparse =
2107 ((zp->z_pflags & ZFS_SPARSE) != 0);
2108 XVA_SET_RTN(xvap, XAT_SPARSE);
2109 }
2110
2111 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2112 xoap->xoa_projinherit =
2113 ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
2114 XVA_SET_RTN(xvap, XAT_PROJINHERIT);
2115 }
2116
2117 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2118 xoap->xoa_projid = zp->z_projid;
2119 XVA_SET_RTN(xvap, XAT_PROJID);
2120 }
2121 }
2122
2123 ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2124 ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2125 ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2126 ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
2127
2128
2129 sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
2130 vap->va_blksize = blksize;
2131 vap->va_bytes = nblocks << 9; /* nblocks * 512 */
2132
2133 if (zp->z_blksz == 0) {
2134 /*
2135 * Block size hasn't been set; suggest maximal I/O transfers.
2136 */
2137 vap->va_blksize = zfsvfs->z_max_blksz;
2138 }
2139
2140 zfs_exit(zfsvfs, FTAG);
2141 return (0);
2142 }
2143
2144 /*
2145 * For the operation of changing file's user/group/project, we need to
2146 * handle not only the main object that is assigned to the file directly,
2147 * but also the ones that are used by the file via hidden xattr directory.
2148 *
2149 * Because the xattr directory may contains many EA entries, as to it may
2150 * be impossible to change all of them via the transaction of changing the
2151 * main object's user/group/project attributes. Then we have to change them
2152 * via other multiple independent transactions one by one. It may be not good
2153 * solution, but we have no better idea yet.
2154 */
2155 static int
zfs_setattr_dir(znode_t * dzp)2156 zfs_setattr_dir(znode_t *dzp)
2157 {
2158 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
2159 objset_t *os = zfsvfs->z_os;
2160 zap_cursor_t zc;
2161 zap_attribute_t *zap;
2162 znode_t *zp = NULL;
2163 dmu_tx_t *tx = NULL;
2164 uint64_t uid, gid;
2165 sa_bulk_attr_t bulk[4];
2166 int count;
2167 int err;
2168
2169 zap = zap_attribute_alloc();
2170 zap_cursor_init(&zc, os, dzp->z_id);
2171 while ((err = zap_cursor_retrieve(&zc, zap)) == 0) {
2172 count = 0;
2173 if (zap->za_integer_length != 8 || zap->za_num_integers != 1) {
2174 err = ENXIO;
2175 break;
2176 }
2177
2178 err = zfs_dirent_lookup(dzp, zap->za_name, &zp, ZEXISTS);
2179 if (err == ENOENT)
2180 goto next;
2181 if (err)
2182 break;
2183
2184 if (zp->z_uid == dzp->z_uid &&
2185 zp->z_gid == dzp->z_gid &&
2186 zp->z_projid == dzp->z_projid)
2187 goto next;
2188
2189 tx = dmu_tx_create(os);
2190 if (!(zp->z_pflags & ZFS_PROJID))
2191 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2192 else
2193 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2194
2195 err = dmu_tx_assign(tx, DMU_TX_WAIT);
2196 if (err)
2197 break;
2198
2199 mutex_enter(&dzp->z_lock);
2200
2201 if (zp->z_uid != dzp->z_uid) {
2202 uid = dzp->z_uid;
2203 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2204 &uid, sizeof (uid));
2205 zp->z_uid = uid;
2206 }
2207
2208 if (zp->z_gid != dzp->z_gid) {
2209 gid = dzp->z_gid;
2210 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
2211 &gid, sizeof (gid));
2212 zp->z_gid = gid;
2213 }
2214
2215 uint64_t projid = dzp->z_projid;
2216 if (zp->z_projid != projid) {
2217 if (!(zp->z_pflags & ZFS_PROJID)) {
2218 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2219 if (unlikely(err == EEXIST)) {
2220 err = 0;
2221 } else if (err != 0) {
2222 goto sa_add_projid_err;
2223 } else {
2224 projid = ZFS_INVALID_PROJID;
2225 }
2226 }
2227
2228 if (projid != ZFS_INVALID_PROJID) {
2229 zp->z_projid = projid;
2230 SA_ADD_BULK_ATTR(bulk, count,
2231 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2232 sizeof (zp->z_projid));
2233 }
2234 }
2235
2236 sa_add_projid_err:
2237 mutex_exit(&dzp->z_lock);
2238
2239 if (likely(count > 0)) {
2240 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2241 dmu_tx_commit(tx);
2242 } else if (projid == ZFS_INVALID_PROJID) {
2243 dmu_tx_commit(tx);
2244 } else {
2245 dmu_tx_abort(tx);
2246 }
2247 tx = NULL;
2248 if (err != 0 && err != ENOENT)
2249 break;
2250
2251 next:
2252 if (zp) {
2253 zrele(zp);
2254 zp = NULL;
2255 }
2256 zap_cursor_advance(&zc);
2257 }
2258
2259 if (tx)
2260 dmu_tx_abort(tx);
2261 if (zp) {
2262 zrele(zp);
2263 }
2264 zap_cursor_fini(&zc);
2265 zap_attribute_free(zap);
2266
2267 return (err == ENOENT ? 0 : err);
2268 }
2269
2270 /*
2271 * Set the file attributes to the values contained in the
2272 * vattr structure.
2273 *
2274 * IN: zp - znode of file to be modified.
2275 * vap - new attribute values.
2276 * If AT_XVATTR set, then optional attrs are being set
2277 * flags - ATTR_UTIME set if non-default time values provided.
2278 * - ATTR_NOACLCHECK (CIFS context only).
2279 * cr - credentials of caller.
2280 * mnt_ns - Unused on FreeBSD
2281 *
2282 * RETURN: 0 on success, error code on failure.
2283 *
2284 * Timestamps:
2285 * vp - ctime updated, mtime updated if size changed.
2286 */
2287 int
zfs_setattr(znode_t * zp,vattr_t * vap,int flags,cred_t * cr,zidmap_t * mnt_ns)2288 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
2289 {
2290 vnode_t *vp = ZTOV(zp);
2291 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2292 objset_t *os;
2293 zilog_t *zilog;
2294 dmu_tx_t *tx;
2295 vattr_t oldva;
2296 xvattr_t tmpxvattr;
2297 uint_t mask = vap->va_mask;
2298 uint_t saved_mask = 0;
2299 uint64_t saved_mode;
2300 int trim_mask = 0;
2301 uint64_t new_mode;
2302 uint64_t new_uid, new_gid;
2303 uint64_t xattr_obj;
2304 uint64_t mtime[2], ctime[2];
2305 uint64_t projid = ZFS_INVALID_PROJID;
2306 znode_t *attrzp;
2307 int need_policy = FALSE;
2308 int err, err2;
2309 zfs_fuid_info_t *fuidp = NULL;
2310 xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */
2311 xoptattr_t *xoap;
2312 zfs_acl_t *aclp;
2313 boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2314 boolean_t fuid_dirtied = B_FALSE;
2315 boolean_t handle_eadir = B_FALSE;
2316 sa_bulk_attr_t bulk[7], xattr_bulk[7];
2317 int count = 0, xattr_count = 0;
2318
2319 if (mask == 0)
2320 return (0);
2321
2322 if (mask & AT_NOSET)
2323 return (SET_ERROR(EINVAL));
2324
2325 if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
2326 return (err);
2327
2328 os = zfsvfs->z_os;
2329 zilog = zfsvfs->z_log;
2330
2331 /*
2332 * Make sure that if we have ephemeral uid/gid or xvattr specified
2333 * that file system is at proper version level
2334 */
2335
2336 if (zfsvfs->z_use_fuids == B_FALSE &&
2337 (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2338 ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2339 (mask & AT_XVATTR))) {
2340 zfs_exit(zfsvfs, FTAG);
2341 return (SET_ERROR(EINVAL));
2342 }
2343
2344 if (mask & AT_SIZE && vp->v_type == VDIR) {
2345 zfs_exit(zfsvfs, FTAG);
2346 return (SET_ERROR(EISDIR));
2347 }
2348
2349 if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2350 zfs_exit(zfsvfs, FTAG);
2351 return (SET_ERROR(EINVAL));
2352 }
2353
2354 /*
2355 * If this is an xvattr_t, then get a pointer to the structure of
2356 * optional attributes. If this is NULL, then we have a vattr_t.
2357 */
2358 xoap = xva_getxoptattr(xvap);
2359
2360 xva_init(&tmpxvattr);
2361
2362 /*
2363 * Immutable files can only alter immutable bit and atime
2364 */
2365 if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2366 ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2367 ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2368 zfs_exit(zfsvfs, FTAG);
2369 return (SET_ERROR(EPERM));
2370 }
2371
2372 /*
2373 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
2374 */
2375
2376 /*
2377 * Verify timestamps doesn't overflow 32 bits.
2378 * ZFS can handle large timestamps, but 32bit syscalls can't
2379 * handle times greater than 2039. This check should be removed
2380 * once large timestamps are fully supported.
2381 */
2382 if (mask & (AT_ATIME | AT_MTIME)) {
2383 if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2384 ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2385 zfs_exit(zfsvfs, FTAG);
2386 return (SET_ERROR(EOVERFLOW));
2387 }
2388 }
2389 if (xoap != NULL && (mask & AT_XVATTR)) {
2390 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
2391 TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
2392 zfs_exit(zfsvfs, FTAG);
2393 return (SET_ERROR(EOVERFLOW));
2394 }
2395
2396 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
2397 if (!dmu_objset_projectquota_enabled(os) ||
2398 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
2399 zfs_exit(zfsvfs, FTAG);
2400 return (SET_ERROR(EOPNOTSUPP));
2401 }
2402
2403 projid = xoap->xoa_projid;
2404 if (unlikely(projid == ZFS_INVALID_PROJID)) {
2405 zfs_exit(zfsvfs, FTAG);
2406 return (SET_ERROR(EINVAL));
2407 }
2408
2409 if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
2410 projid = ZFS_INVALID_PROJID;
2411 else
2412 need_policy = TRUE;
2413 }
2414
2415 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
2416 (xoap->xoa_projinherit !=
2417 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
2418 (!dmu_objset_projectquota_enabled(os) ||
2419 (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
2420 zfs_exit(zfsvfs, FTAG);
2421 return (SET_ERROR(EOPNOTSUPP));
2422 }
2423 }
2424
2425 attrzp = NULL;
2426 aclp = NULL;
2427
2428 if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2429 zfs_exit(zfsvfs, FTAG);
2430 return (SET_ERROR(EROFS));
2431 }
2432
2433 /*
2434 * First validate permissions
2435 */
2436
2437 if (mask & AT_SIZE) {
2438 /*
2439 * XXX - Note, we are not providing any open
2440 * mode flags here (like FNDELAY), so we may
2441 * block if there are locks present... this
2442 * should be addressed in openat().
2443 */
2444 /* XXX - would it be OK to generate a log record here? */
2445 err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2446 if (err) {
2447 zfs_exit(zfsvfs, FTAG);
2448 return (err);
2449 }
2450 }
2451
2452 if (mask & (AT_ATIME|AT_MTIME) ||
2453 ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2454 XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2455 XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2456 XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2457 XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2458 XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2459 XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2460 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2461 skipaclchk, cr, mnt_ns);
2462 }
2463
2464 if (mask & (AT_UID|AT_GID)) {
2465 int idmask = (mask & (AT_UID|AT_GID));
2466 int take_owner;
2467 int take_group;
2468
2469 /*
2470 * NOTE: even if a new mode is being set,
2471 * we may clear S_ISUID/S_ISGID bits.
2472 */
2473
2474 if (!(mask & AT_MODE))
2475 vap->va_mode = zp->z_mode;
2476
2477 /*
2478 * Take ownership or chgrp to group we are a member of
2479 */
2480
2481 take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2482 take_group = (mask & AT_GID) &&
2483 zfs_groupmember(zfsvfs, vap->va_gid, cr);
2484
2485 /*
2486 * If both AT_UID and AT_GID are set then take_owner and
2487 * take_group must both be set in order to allow taking
2488 * ownership.
2489 *
2490 * Otherwise, send the check through secpolicy_vnode_setattr()
2491 *
2492 */
2493
2494 if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2495 ((idmask == AT_UID) && take_owner) ||
2496 ((idmask == AT_GID) && take_group)) {
2497 if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2498 skipaclchk, cr, mnt_ns) == 0) {
2499 /*
2500 * Remove setuid/setgid for non-privileged users
2501 */
2502 secpolicy_setid_clear(vap, vp, cr);
2503 trim_mask = (mask & (AT_UID|AT_GID));
2504 } else {
2505 need_policy = TRUE;
2506 }
2507 } else {
2508 need_policy = TRUE;
2509 }
2510 }
2511
2512 oldva.va_mode = zp->z_mode;
2513 zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2514 if (mask & AT_XVATTR) {
2515 /*
2516 * Update xvattr mask to include only those attributes
2517 * that are actually changing.
2518 *
2519 * the bits will be restored prior to actually setting
2520 * the attributes so the caller thinks they were set.
2521 */
2522 if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2523 if (xoap->xoa_appendonly !=
2524 ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2525 need_policy = TRUE;
2526 } else {
2527 XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2528 XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2529 }
2530 }
2531
2532 if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
2533 if (xoap->xoa_projinherit !=
2534 ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
2535 need_policy = TRUE;
2536 } else {
2537 XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
2538 XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
2539 }
2540 }
2541
2542 if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2543 if (xoap->xoa_nounlink !=
2544 ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2545 need_policy = TRUE;
2546 } else {
2547 XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2548 XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2549 }
2550 }
2551
2552 if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2553 if (xoap->xoa_immutable !=
2554 ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2555 need_policy = TRUE;
2556 } else {
2557 XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2558 XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2559 }
2560 }
2561
2562 if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2563 if (xoap->xoa_nodump !=
2564 ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2565 need_policy = TRUE;
2566 } else {
2567 XVA_CLR_REQ(xvap, XAT_NODUMP);
2568 XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2569 }
2570 }
2571
2572 if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2573 if (xoap->xoa_av_modified !=
2574 ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2575 need_policy = TRUE;
2576 } else {
2577 XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2578 XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2579 }
2580 }
2581
2582 if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2583 if ((vp->v_type != VREG &&
2584 xoap->xoa_av_quarantined) ||
2585 xoap->xoa_av_quarantined !=
2586 ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2587 need_policy = TRUE;
2588 } else {
2589 XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2590 XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2591 }
2592 }
2593
2594 if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2595 zfs_exit(zfsvfs, FTAG);
2596 return (SET_ERROR(EPERM));
2597 }
2598
2599 if (need_policy == FALSE &&
2600 (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2601 XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2602 need_policy = TRUE;
2603 }
2604 }
2605
2606 if (mask & AT_MODE) {
2607 if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
2608 mnt_ns) == 0) {
2609 err = secpolicy_setid_setsticky_clear(vp, vap,
2610 &oldva, cr);
2611 if (err) {
2612 zfs_exit(zfsvfs, FTAG);
2613 return (err);
2614 }
2615 trim_mask |= AT_MODE;
2616 } else {
2617 need_policy = TRUE;
2618 }
2619 }
2620
2621 if (need_policy) {
2622 /*
2623 * If trim_mask is set then take ownership
2624 * has been granted or write_acl is present and user
2625 * has the ability to modify mode. In that case remove
2626 * UID|GID and or MODE from mask so that
2627 * secpolicy_vnode_setattr() doesn't revoke it.
2628 */
2629
2630 if (trim_mask) {
2631 saved_mask = vap->va_mask;
2632 vap->va_mask &= ~trim_mask;
2633 if (trim_mask & AT_MODE) {
2634 /*
2635 * Save the mode, as secpolicy_vnode_setattr()
2636 * will overwrite it with ova.va_mode.
2637 */
2638 saved_mode = vap->va_mode;
2639 }
2640 }
2641 err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2642 (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2643 if (err) {
2644 zfs_exit(zfsvfs, FTAG);
2645 return (err);
2646 }
2647
2648 if (trim_mask) {
2649 vap->va_mask |= saved_mask;
2650 if (trim_mask & AT_MODE) {
2651 /*
2652 * Recover the mode after
2653 * secpolicy_vnode_setattr().
2654 */
2655 vap->va_mode = saved_mode;
2656 }
2657 }
2658 }
2659
2660 /*
2661 * secpolicy_vnode_setattr, or take ownership may have
2662 * changed va_mask
2663 */
2664 mask = vap->va_mask;
2665
2666 if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
2667 handle_eadir = B_TRUE;
2668 err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2669 &xattr_obj, sizeof (xattr_obj));
2670
2671 if (err == 0 && xattr_obj) {
2672 err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2673 if (err == 0) {
2674 err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
2675 if (err != 0)
2676 vrele(ZTOV(attrzp));
2677 }
2678 if (err)
2679 goto out2;
2680 }
2681 if (mask & AT_UID) {
2682 new_uid = zfs_fuid_create(zfsvfs,
2683 (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2684 if (new_uid != zp->z_uid &&
2685 zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
2686 new_uid)) {
2687 if (attrzp)
2688 vput(ZTOV(attrzp));
2689 err = SET_ERROR(EDQUOT);
2690 goto out2;
2691 }
2692 }
2693
2694 if (mask & AT_GID) {
2695 new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2696 cr, ZFS_GROUP, &fuidp);
2697 if (new_gid != zp->z_gid &&
2698 zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
2699 new_gid)) {
2700 if (attrzp)
2701 vput(ZTOV(attrzp));
2702 err = SET_ERROR(EDQUOT);
2703 goto out2;
2704 }
2705 }
2706
2707 if (projid != ZFS_INVALID_PROJID &&
2708 zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
2709 if (attrzp)
2710 vput(ZTOV(attrzp));
2711 err = SET_ERROR(EDQUOT);
2712 goto out2;
2713 }
2714 }
2715 tx = dmu_tx_create(os);
2716
2717 if (mask & AT_MODE) {
2718 uint64_t pmode = zp->z_mode;
2719 uint64_t acl_obj;
2720 new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2721
2722 if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2723 !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2724 err = SET_ERROR(EPERM);
2725 goto out;
2726 }
2727
2728 if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
2729 goto out;
2730
2731 if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2732 /*
2733 * Are we upgrading ACL from old V0 format
2734 * to V1 format?
2735 */
2736 if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
2737 zfs_znode_acl_version(zp) ==
2738 ZFS_ACL_VERSION_INITIAL) {
2739 dmu_tx_hold_free(tx, acl_obj, 0,
2740 DMU_OBJECT_END);
2741 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2742 0, aclp->z_acl_bytes);
2743 } else {
2744 dmu_tx_hold_write(tx, acl_obj, 0,
2745 aclp->z_acl_bytes);
2746 }
2747 } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
2748 dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
2749 0, aclp->z_acl_bytes);
2750 }
2751 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2752 } else {
2753 if (((mask & AT_XVATTR) &&
2754 XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
2755 (projid != ZFS_INVALID_PROJID &&
2756 !(zp->z_pflags & ZFS_PROJID)))
2757 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
2758 else
2759 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2760 }
2761
2762 if (attrzp) {
2763 dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
2764 }
2765
2766 fuid_dirtied = zfsvfs->z_fuid_dirty;
2767 if (fuid_dirtied)
2768 zfs_fuid_txhold(zfsvfs, tx);
2769
2770 zfs_sa_upgrade_txholds(tx, zp);
2771
2772 err = dmu_tx_assign(tx, DMU_TX_WAIT);
2773 if (err)
2774 goto out;
2775
2776 count = 0;
2777 /*
2778 * Set each attribute requested.
2779 * We group settings according to the locks they need to acquire.
2780 *
2781 * Note: you cannot set ctime directly, although it will be
2782 * updated as a side-effect of calling this function.
2783 */
2784
2785 if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
2786 /*
2787 * For the existed object that is upgraded from old system,
2788 * its on-disk layout has no slot for the project ID attribute.
2789 * But quota accounting logic needs to access related slots by
2790 * offset directly. So we need to adjust old objects' layout
2791 * to make the project ID to some unified and fixed offset.
2792 */
2793 if (attrzp)
2794 err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
2795 if (err == 0)
2796 err = sa_add_projid(zp->z_sa_hdl, tx, projid);
2797
2798 if (unlikely(err == EEXIST))
2799 err = 0;
2800 else if (err != 0)
2801 goto out;
2802 else
2803 projid = ZFS_INVALID_PROJID;
2804 }
2805
2806 if (mask & (AT_UID|AT_GID|AT_MODE))
2807 mutex_enter(&zp->z_acl_lock);
2808
2809 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
2810 &zp->z_pflags, sizeof (zp->z_pflags));
2811
2812 if (attrzp) {
2813 if (mask & (AT_UID|AT_GID|AT_MODE))
2814 mutex_enter(&attrzp->z_acl_lock);
2815 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2816 SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
2817 sizeof (attrzp->z_pflags));
2818 if (projid != ZFS_INVALID_PROJID) {
2819 attrzp->z_projid = projid;
2820 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2821 SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
2822 sizeof (attrzp->z_projid));
2823 }
2824 }
2825
2826 if (mask & (AT_UID|AT_GID)) {
2827
2828 if (mask & AT_UID) {
2829 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
2830 &new_uid, sizeof (new_uid));
2831 zp->z_uid = new_uid;
2832 if (attrzp) {
2833 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2834 SA_ZPL_UID(zfsvfs), NULL, &new_uid,
2835 sizeof (new_uid));
2836 attrzp->z_uid = new_uid;
2837 }
2838 }
2839
2840 if (mask & AT_GID) {
2841 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
2842 NULL, &new_gid, sizeof (new_gid));
2843 zp->z_gid = new_gid;
2844 if (attrzp) {
2845 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2846 SA_ZPL_GID(zfsvfs), NULL, &new_gid,
2847 sizeof (new_gid));
2848 attrzp->z_gid = new_gid;
2849 }
2850 }
2851 if (!(mask & AT_MODE)) {
2852 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
2853 NULL, &new_mode, sizeof (new_mode));
2854 new_mode = zp->z_mode;
2855 }
2856 err = zfs_acl_chown_setattr(zp);
2857 ASSERT0(err);
2858 if (attrzp) {
2859 vn_seqc_write_begin(ZTOV(attrzp));
2860 err = zfs_acl_chown_setattr(attrzp);
2861 vn_seqc_write_end(ZTOV(attrzp));
2862 ASSERT0(err);
2863 }
2864 }
2865
2866 if (mask & AT_MODE) {
2867 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
2868 &new_mode, sizeof (new_mode));
2869 zp->z_mode = new_mode;
2870 ASSERT3P(aclp, !=, NULL);
2871 err = zfs_aclset_common(zp, aclp, cr, tx);
2872 ASSERT0(err);
2873 if (zp->z_acl_cached)
2874 zfs_acl_free(zp->z_acl_cached);
2875 zp->z_acl_cached = aclp;
2876 aclp = NULL;
2877 }
2878
2879
2880 if (mask & AT_ATIME) {
2881 ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
2882 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
2883 &zp->z_atime, sizeof (zp->z_atime));
2884 }
2885
2886 if (mask & AT_MTIME) {
2887 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
2888 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
2889 mtime, sizeof (mtime));
2890 }
2891
2892 if (projid != ZFS_INVALID_PROJID) {
2893 zp->z_projid = projid;
2894 SA_ADD_BULK_ATTR(bulk, count,
2895 SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
2896 sizeof (zp->z_projid));
2897 }
2898
2899 /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
2900 if (mask & AT_SIZE && !(mask & AT_MTIME)) {
2901 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
2902 NULL, mtime, sizeof (mtime));
2903 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2904 &ctime, sizeof (ctime));
2905 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
2906 } else if (mask != 0) {
2907 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
2908 &ctime, sizeof (ctime));
2909 zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
2910 if (attrzp) {
2911 SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
2912 SA_ZPL_CTIME(zfsvfs), NULL,
2913 &ctime, sizeof (ctime));
2914 zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
2915 mtime, ctime);
2916 }
2917 }
2918
2919 /*
2920 * Do this after setting timestamps to prevent timestamp
2921 * update from toggling bit
2922 */
2923
2924 if (xoap && (mask & AT_XVATTR)) {
2925
2926 if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
2927 xoap->xoa_createtime = vap->va_birthtime;
2928 /*
2929 * restore trimmed off masks
2930 * so that return masks can be set for caller.
2931 */
2932
2933 if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
2934 XVA_SET_REQ(xvap, XAT_APPENDONLY);
2935 }
2936 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
2937 XVA_SET_REQ(xvap, XAT_NOUNLINK);
2938 }
2939 if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
2940 XVA_SET_REQ(xvap, XAT_IMMUTABLE);
2941 }
2942 if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
2943 XVA_SET_REQ(xvap, XAT_NODUMP);
2944 }
2945 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
2946 XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
2947 }
2948 if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
2949 XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
2950 }
2951 if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
2952 XVA_SET_REQ(xvap, XAT_PROJINHERIT);
2953 }
2954
2955 if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
2956 ASSERT3S(vp->v_type, ==, VREG);
2957
2958 zfs_xvattr_set(zp, xvap, tx);
2959 }
2960
2961 if (fuid_dirtied)
2962 zfs_fuid_sync(zfsvfs, tx);
2963
2964 if (mask != 0)
2965 zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
2966
2967 if (mask & (AT_UID|AT_GID|AT_MODE))
2968 mutex_exit(&zp->z_acl_lock);
2969
2970 if (attrzp) {
2971 if (mask & (AT_UID|AT_GID|AT_MODE))
2972 mutex_exit(&attrzp->z_acl_lock);
2973 }
2974 out:
2975 if (err == 0 && attrzp) {
2976 err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
2977 xattr_count, tx);
2978 ASSERT0(err2);
2979 }
2980
2981 if (attrzp)
2982 vput(ZTOV(attrzp));
2983
2984 if (aclp)
2985 zfs_acl_free(aclp);
2986
2987 if (fuidp) {
2988 zfs_fuid_info_free(fuidp);
2989 fuidp = NULL;
2990 }
2991
2992 if (err) {
2993 dmu_tx_abort(tx);
2994 } else {
2995 err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
2996 dmu_tx_commit(tx);
2997 if (attrzp) {
2998 if (err2 == 0 && handle_eadir)
2999 err = zfs_setattr_dir(attrzp);
3000 }
3001 }
3002
3003 out2:
3004 if (os->os_sync == ZFS_SYNC_ALWAYS)
3005 zil_commit(zilog, 0);
3006
3007 zfs_exit(zfsvfs, FTAG);
3008 return (err);
3009 }
3010
3011 /*
3012 * Look up the directory entries corresponding to the source and target
3013 * directory/name pairs.
3014 */
3015 static int
zfs_rename_relock_lookup(znode_t * sdzp,const struct componentname * scnp,znode_t ** szpp,znode_t * tdzp,const struct componentname * tcnp,znode_t ** tzpp)3016 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
3017 znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
3018 znode_t **tzpp)
3019 {
3020 zfsvfs_t *zfsvfs;
3021 znode_t *szp, *tzp;
3022 int error;
3023
3024 /*
3025 * Before using sdzp and tdzp we must ensure that they are live.
3026 * As a porting legacy from illumos we have two things to worry
3027 * about. One is typical for FreeBSD and it is that the vnode is
3028 * not reclaimed (doomed). The other is that the znode is live.
3029 * The current code can invalidate the znode without acquiring the
3030 * corresponding vnode lock if the object represented by the znode
3031 * and vnode is no longer valid after a rollback or receive operation.
3032 * z_teardown_lock hidden behind zfs_enter and zfs_exit is the lock
3033 * that protects the znodes from the invalidation.
3034 */
3035 zfsvfs = sdzp->z_zfsvfs;
3036 ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
3037 if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
3038 return (error);
3039 if ((error = zfs_verify_zp(tdzp)) != 0) {
3040 zfs_exit(zfsvfs, FTAG);
3041 return (error);
3042 }
3043
3044 /*
3045 * Re-resolve svp to be certain it still exists and fetch the
3046 * correct vnode.
3047 */
3048 error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
3049 if (error != 0) {
3050 /* Source entry invalid or not there. */
3051 if ((scnp->cn_flags & ISDOTDOT) != 0 ||
3052 (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
3053 error = SET_ERROR(EINVAL);
3054 goto out;
3055 }
3056 *szpp = szp;
3057
3058 /*
3059 * Re-resolve tvp, if it disappeared we just carry on.
3060 */
3061 error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
3062 if (error != 0) {
3063 vrele(ZTOV(szp));
3064 if ((tcnp->cn_flags & ISDOTDOT) != 0)
3065 error = SET_ERROR(EINVAL);
3066 goto out;
3067 }
3068 *tzpp = tzp;
3069 out:
3070 zfs_exit(zfsvfs, FTAG);
3071 return (error);
3072 }
3073
3074 /*
3075 * We acquire all but fdvp locks using non-blocking acquisitions. If we
3076 * fail to acquire any lock in the path we will drop all held locks,
3077 * acquire the new lock in a blocking fashion, and then release it and
3078 * restart the rename. This acquire/release step ensures that we do not
3079 * spin on a lock waiting for release. On error release all vnode locks
3080 * and decrement references the way tmpfs_rename() would do.
3081 */
3082 static int
zfs_rename_relock(struct vnode * sdvp,struct vnode ** svpp,struct vnode * tdvp,struct vnode ** tvpp,const struct componentname * scnp,const struct componentname * tcnp)3083 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
3084 struct vnode *tdvp, struct vnode **tvpp,
3085 const struct componentname *scnp, const struct componentname *tcnp)
3086 {
3087 struct vnode *nvp, *svp, *tvp;
3088 znode_t *sdzp, *tdzp, *szp, *tzp;
3089 int error;
3090
3091 VOP_UNLOCK(tdvp);
3092 if (*tvpp != NULL && *tvpp != tdvp)
3093 VOP_UNLOCK(*tvpp);
3094
3095 relock:
3096 error = vn_lock(sdvp, LK_EXCLUSIVE);
3097 if (error)
3098 goto out;
3099 error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
3100 if (error != 0) {
3101 VOP_UNLOCK(sdvp);
3102 if (error != EBUSY)
3103 goto out;
3104 error = vn_lock(tdvp, LK_EXCLUSIVE);
3105 if (error)
3106 goto out;
3107 VOP_UNLOCK(tdvp);
3108 goto relock;
3109 }
3110 tdzp = VTOZ(tdvp);
3111 sdzp = VTOZ(sdvp);
3112
3113 error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
3114 if (error != 0) {
3115 VOP_UNLOCK(sdvp);
3116 VOP_UNLOCK(tdvp);
3117 goto out;
3118 }
3119 svp = ZTOV(szp);
3120 tvp = tzp != NULL ? ZTOV(tzp) : NULL;
3121
3122 /*
3123 * Now try acquire locks on svp and tvp.
3124 */
3125 nvp = svp;
3126 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3127 if (error != 0) {
3128 VOP_UNLOCK(sdvp);
3129 VOP_UNLOCK(tdvp);
3130 if (tvp != NULL)
3131 vrele(tvp);
3132 if (error != EBUSY) {
3133 vrele(nvp);
3134 goto out;
3135 }
3136 error = vn_lock(nvp, LK_EXCLUSIVE);
3137 if (error != 0) {
3138 vrele(nvp);
3139 goto out;
3140 }
3141 VOP_UNLOCK(nvp);
3142 /*
3143 * Concurrent rename race.
3144 * XXX ?
3145 */
3146 if (nvp == tdvp) {
3147 vrele(nvp);
3148 error = SET_ERROR(EINVAL);
3149 goto out;
3150 }
3151 vrele(*svpp);
3152 *svpp = nvp;
3153 goto relock;
3154 }
3155 vrele(*svpp);
3156 *svpp = nvp;
3157
3158 if (*tvpp != NULL)
3159 vrele(*tvpp);
3160 *tvpp = NULL;
3161 if (tvp != NULL) {
3162 nvp = tvp;
3163 error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
3164 if (error != 0) {
3165 VOP_UNLOCK(sdvp);
3166 VOP_UNLOCK(tdvp);
3167 VOP_UNLOCK(*svpp);
3168 if (error != EBUSY) {
3169 vrele(nvp);
3170 goto out;
3171 }
3172 error = vn_lock(nvp, LK_EXCLUSIVE);
3173 if (error != 0) {
3174 vrele(nvp);
3175 goto out;
3176 }
3177 vput(nvp);
3178 goto relock;
3179 }
3180 *tvpp = nvp;
3181 }
3182
3183 return (0);
3184
3185 out:
3186 return (error);
3187 }
3188
3189 /*
3190 * Note that we must use VRELE_ASYNC in this function as it walks
3191 * up the directory tree and vrele may need to acquire an exclusive
3192 * lock if a last reference to a vnode is dropped.
3193 */
3194 static int
zfs_rename_check(znode_t * szp,znode_t * sdzp,znode_t * tdzp)3195 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
3196 {
3197 zfsvfs_t *zfsvfs;
3198 znode_t *zp, *zp1;
3199 uint64_t parent;
3200 int error;
3201
3202 zfsvfs = tdzp->z_zfsvfs;
3203 if (tdzp == szp)
3204 return (SET_ERROR(EINVAL));
3205 if (tdzp == sdzp)
3206 return (0);
3207 if (tdzp->z_id == zfsvfs->z_root)
3208 return (0);
3209 zp = tdzp;
3210 for (;;) {
3211 ASSERT(!zp->z_unlinked);
3212 if ((error = sa_lookup(zp->z_sa_hdl,
3213 SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
3214 break;
3215
3216 if (parent == szp->z_id) {
3217 error = SET_ERROR(EINVAL);
3218 break;
3219 }
3220 if (parent == zfsvfs->z_root)
3221 break;
3222 if (parent == sdzp->z_id)
3223 break;
3224
3225 error = zfs_zget(zfsvfs, parent, &zp1);
3226 if (error != 0)
3227 break;
3228
3229 if (zp != tdzp)
3230 VN_RELE_ASYNC(ZTOV(zp),
3231 dsl_pool_zrele_taskq(
3232 dmu_objset_pool(zfsvfs->z_os)));
3233 zp = zp1;
3234 }
3235
3236 if (error == ENOTDIR)
3237 panic("checkpath: .. not a directory\n");
3238 if (zp != tdzp)
3239 VN_RELE_ASYNC(ZTOV(zp),
3240 dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
3241 return (error);
3242 }
3243
3244 static int
3245 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3246 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3247 cred_t *cr);
3248
3249 /*
3250 * Move an entry from the provided source directory to the target
3251 * directory. Change the entry name as indicated.
3252 *
3253 * IN: sdvp - Source directory containing the "old entry".
3254 * scnp - Old entry name.
3255 * tdvp - Target directory to contain the "new entry".
3256 * tcnp - New entry name.
3257 * cr - credentials of caller.
3258 * INOUT: svpp - Source file
3259 * tvpp - Target file, may point to NULL initially
3260 *
3261 * RETURN: 0 on success, error code on failure.
3262 *
3263 * Timestamps:
3264 * sdvp,tdvp - ctime|mtime updated
3265 */
3266 static int
zfs_do_rename(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3267 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3268 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3269 cred_t *cr)
3270 {
3271 int error;
3272
3273 ASSERT_VOP_ELOCKED(tdvp, __func__);
3274 if (*tvpp != NULL)
3275 ASSERT_VOP_ELOCKED(*tvpp, __func__);
3276
3277 /* Reject renames across filesystems. */
3278 if ((*svpp)->v_mount != tdvp->v_mount ||
3279 ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
3280 error = SET_ERROR(EXDEV);
3281 goto out;
3282 }
3283
3284 if (zfsctl_is_node(tdvp)) {
3285 error = SET_ERROR(EXDEV);
3286 goto out;
3287 }
3288
3289 /*
3290 * Lock all four vnodes to ensure safety and semantics of renaming.
3291 */
3292 error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
3293 if (error != 0) {
3294 /* no vnodes are locked in the case of error here */
3295 return (error);
3296 }
3297
3298 error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
3299 VOP_UNLOCK(sdvp);
3300 VOP_UNLOCK(*svpp);
3301 out:
3302 if (*tvpp != NULL)
3303 VOP_UNLOCK(*tvpp);
3304 if (tdvp != *tvpp)
3305 VOP_UNLOCK(tdvp);
3306
3307 return (error);
3308 }
3309
3310 static int
zfs_do_rename_impl(vnode_t * sdvp,vnode_t ** svpp,struct componentname * scnp,vnode_t * tdvp,vnode_t ** tvpp,struct componentname * tcnp,cred_t * cr)3311 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
3312 vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
3313 cred_t *cr)
3314 {
3315 dmu_tx_t *tx;
3316 zfsvfs_t *zfsvfs;
3317 zilog_t *zilog;
3318 znode_t *tdzp, *sdzp, *tzp, *szp;
3319 const char *snm = scnp->cn_nameptr;
3320 const char *tnm = tcnp->cn_nameptr;
3321 int error;
3322
3323 tdzp = VTOZ(tdvp);
3324 sdzp = VTOZ(sdvp);
3325 zfsvfs = tdzp->z_zfsvfs;
3326
3327 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3328 return (error);
3329 if ((error = zfs_verify_zp(sdzp)) != 0) {
3330 zfs_exit(zfsvfs, FTAG);
3331 return (error);
3332 }
3333 zilog = zfsvfs->z_log;
3334
3335 if (zfsvfs->z_utf8 && u8_validate(tnm,
3336 strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3337 error = SET_ERROR(EILSEQ);
3338 goto out;
3339 }
3340
3341 /* If source and target are the same file, there is nothing to do. */
3342 if ((*svpp) == (*tvpp)) {
3343 error = 0;
3344 goto out;
3345 }
3346
3347 if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
3348 ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
3349 (*tvpp)->v_mountedhere != NULL)) {
3350 error = SET_ERROR(EXDEV);
3351 goto out;
3352 }
3353
3354 szp = VTOZ(*svpp);
3355 if ((error = zfs_verify_zp(szp)) != 0) {
3356 zfs_exit(zfsvfs, FTAG);
3357 return (error);
3358 }
3359 tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
3360 if (tzp != NULL) {
3361 if ((error = zfs_verify_zp(tzp)) != 0) {
3362 zfs_exit(zfsvfs, FTAG);
3363 return (error);
3364 }
3365 }
3366
3367 /*
3368 * This is to prevent the creation of links into attribute space
3369 * by renaming a linked file into/outof an attribute directory.
3370 * See the comment in zfs_link() for why this is considered bad.
3371 */
3372 if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3373 error = SET_ERROR(EINVAL);
3374 goto out;
3375 }
3376
3377 /*
3378 * If we are using project inheritance, means if the directory has
3379 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3380 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3381 * such case, we only allow renames into our tree when the project
3382 * IDs are the same.
3383 */
3384 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3385 tdzp->z_projid != szp->z_projid) {
3386 error = SET_ERROR(EXDEV);
3387 goto out;
3388 }
3389
3390 /*
3391 * Must have write access at the source to remove the old entry
3392 * and write access at the target to create the new entry.
3393 * Note that if target and source are the same, this can be
3394 * done in a single check.
3395 */
3396 if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, NULL)))
3397 goto out;
3398
3399 if ((*svpp)->v_type == VDIR) {
3400 /*
3401 * Avoid ".", "..", and aliases of "." for obvious reasons.
3402 */
3403 if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
3404 sdzp == szp ||
3405 (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
3406 error = EINVAL;
3407 goto out;
3408 }
3409
3410 /*
3411 * Check to make sure rename is valid.
3412 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3413 */
3414 if ((error = zfs_rename_check(szp, sdzp, tdzp)))
3415 goto out;
3416 }
3417
3418 /*
3419 * Does target exist?
3420 */
3421 if (tzp) {
3422 /*
3423 * Source and target must be the same type.
3424 */
3425 if ((*svpp)->v_type == VDIR) {
3426 if ((*tvpp)->v_type != VDIR) {
3427 error = SET_ERROR(ENOTDIR);
3428 goto out;
3429 } else {
3430 cache_purge(tdvp);
3431 if (sdvp != tdvp)
3432 cache_purge(sdvp);
3433 }
3434 } else {
3435 if ((*tvpp)->v_type == VDIR) {
3436 error = SET_ERROR(EISDIR);
3437 goto out;
3438 }
3439 }
3440 }
3441
3442 vn_seqc_write_begin(*svpp);
3443 vn_seqc_write_begin(sdvp);
3444 if (*tvpp != NULL)
3445 vn_seqc_write_begin(*tvpp);
3446 if (tdvp != *tvpp)
3447 vn_seqc_write_begin(tdvp);
3448
3449 vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
3450 if (tzp)
3451 vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
3452
3453 /*
3454 * notify the target directory if it is not the same
3455 * as source directory.
3456 */
3457 if (tdvp != sdvp) {
3458 vnevent_rename_dest_dir(tdvp, ct);
3459 }
3460
3461 tx = dmu_tx_create(zfsvfs->z_os);
3462 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3463 dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3464 dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3465 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3466 if (sdzp != tdzp) {
3467 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3468 zfs_sa_upgrade_txholds(tx, tdzp);
3469 }
3470 if (tzp) {
3471 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3472 zfs_sa_upgrade_txholds(tx, tzp);
3473 }
3474
3475 zfs_sa_upgrade_txholds(tx, szp);
3476 dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3477 error = dmu_tx_assign(tx, DMU_TX_WAIT);
3478 if (error) {
3479 dmu_tx_abort(tx);
3480 goto out_seq;
3481 }
3482
3483 if (tzp) /* Attempt to remove the existing target */
3484 error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
3485
3486 if (error == 0) {
3487 error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
3488 if (error == 0) {
3489 szp->z_pflags |= ZFS_AV_MODIFIED;
3490
3491 error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3492 (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3493 ASSERT0(error);
3494
3495 error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
3496 NULL);
3497 if (error == 0) {
3498 zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
3499 snm, tdzp, tnm, szp);
3500 } else {
3501 /*
3502 * At this point, we have successfully created
3503 * the target name, but have failed to remove
3504 * the source name. Since the create was done
3505 * with the ZRENAMING flag, there are
3506 * complications; for one, the link count is
3507 * wrong. The easiest way to deal with this
3508 * is to remove the newly created target, and
3509 * return the original error. This must
3510 * succeed; fortunately, it is very unlikely to
3511 * fail, since we just created it.
3512 */
3513 VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
3514 ZRENAMING, NULL));
3515 }
3516 }
3517 if (error == 0) {
3518 cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
3519 }
3520 }
3521
3522 dmu_tx_commit(tx);
3523
3524 out_seq:
3525 vn_seqc_write_end(*svpp);
3526 vn_seqc_write_end(sdvp);
3527 if (*tvpp != NULL)
3528 vn_seqc_write_end(*tvpp);
3529 if (tdvp != *tvpp)
3530 vn_seqc_write_end(tdvp);
3531
3532 out:
3533 if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3534 zil_commit(zilog, 0);
3535 zfs_exit(zfsvfs, FTAG);
3536
3537 return (error);
3538 }
3539
3540 int
zfs_rename(znode_t * sdzp,const char * sname,znode_t * tdzp,const char * tname,cred_t * cr,int flags,uint64_t rflags,vattr_t * wo_vap,zidmap_t * mnt_ns)3541 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
3542 cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
3543 {
3544 struct componentname scn, tcn;
3545 vnode_t *sdvp, *tdvp;
3546 vnode_t *svp, *tvp;
3547 int error;
3548 svp = tvp = NULL;
3549
3550 if (is_nametoolong(tdzp->z_zfsvfs, tname))
3551 return (SET_ERROR(ENAMETOOLONG));
3552
3553 if (rflags != 0 || wo_vap != NULL)
3554 return (SET_ERROR(EINVAL));
3555
3556 sdvp = ZTOV(sdzp);
3557 tdvp = ZTOV(tdzp);
3558 error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
3559 if (sdzp->z_zfsvfs->z_replay == B_FALSE)
3560 VOP_UNLOCK(sdvp);
3561 if (error != 0)
3562 goto fail;
3563 VOP_UNLOCK(svp);
3564
3565 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
3566 error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
3567 if (error == EJUSTRETURN)
3568 tvp = NULL;
3569 else if (error != 0) {
3570 VOP_UNLOCK(tdvp);
3571 goto fail;
3572 }
3573
3574 error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
3575 fail:
3576 if (svp != NULL)
3577 vrele(svp);
3578 if (tvp != NULL)
3579 vrele(tvp);
3580
3581 return (error);
3582 }
3583
3584 /*
3585 * Insert the indicated symbolic reference entry into the directory.
3586 *
3587 * IN: dvp - Directory to contain new symbolic link.
3588 * link - Name for new symlink entry.
3589 * vap - Attributes of new entry.
3590 * cr - credentials of caller.
3591 * ct - caller context
3592 * flags - case flags
3593 * mnt_ns - Unused on FreeBSD
3594 *
3595 * RETURN: 0 on success, error code on failure.
3596 *
3597 * Timestamps:
3598 * dvp - ctime|mtime updated
3599 */
3600 int
zfs_symlink(znode_t * dzp,const char * name,vattr_t * vap,const char * link,znode_t ** zpp,cred_t * cr,int flags,zidmap_t * mnt_ns)3601 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
3602 const char *link, znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
3603 {
3604 (void) flags;
3605 znode_t *zp;
3606 dmu_tx_t *tx;
3607 zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3608 zilog_t *zilog;
3609 uint64_t len = strlen(link);
3610 int error;
3611 zfs_acl_ids_t acl_ids;
3612 boolean_t fuid_dirtied;
3613 uint64_t txtype = TX_SYMLINK;
3614
3615 ASSERT3S(vap->va_type, ==, VLNK);
3616
3617 if (is_nametoolong(zfsvfs, name))
3618 return (SET_ERROR(ENAMETOOLONG));
3619
3620 if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
3621 return (error);
3622 zilog = zfsvfs->z_log;
3623
3624 if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3625 NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3626 zfs_exit(zfsvfs, FTAG);
3627 return (SET_ERROR(EILSEQ));
3628 }
3629
3630 if (len > MAXPATHLEN) {
3631 zfs_exit(zfsvfs, FTAG);
3632 return (SET_ERROR(ENAMETOOLONG));
3633 }
3634
3635 if ((error = zfs_acl_ids_create(dzp, 0,
3636 vap, cr, NULL, &acl_ids, NULL)) != 0) {
3637 zfs_exit(zfsvfs, FTAG);
3638 return (error);
3639 }
3640
3641 /*
3642 * Attempt to lock directory; fail if entry already exists.
3643 */
3644 error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
3645 if (error) {
3646 zfs_acl_ids_free(&acl_ids);
3647 zfs_exit(zfsvfs, FTAG);
3648 return (error);
3649 }
3650
3651 if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
3652 zfs_acl_ids_free(&acl_ids);
3653 zfs_exit(zfsvfs, FTAG);
3654 return (error);
3655 }
3656
3657 if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
3658 zfs_acl_ids_free(&acl_ids);
3659 zfs_exit(zfsvfs, FTAG);
3660 return (SET_ERROR(EDQUOT));
3661 }
3662
3663 getnewvnode_reserve();
3664 tx = dmu_tx_create(zfsvfs->z_os);
3665 fuid_dirtied = zfsvfs->z_fuid_dirty;
3666 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3667 dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3668 dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3669 ZFS_SA_BASE_ATTR_SIZE + len);
3670 dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3671 if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3672 dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3673 acl_ids.z_aclp->z_acl_bytes);
3674 }
3675 if (fuid_dirtied)
3676 zfs_fuid_txhold(zfsvfs, tx);
3677 error = dmu_tx_assign(tx, DMU_TX_WAIT);
3678 if (error) {
3679 zfs_acl_ids_free(&acl_ids);
3680 dmu_tx_abort(tx);
3681 getnewvnode_drop_reserve();
3682 zfs_exit(zfsvfs, FTAG);
3683 return (error);
3684 }
3685
3686 /*
3687 * Create a new object for the symlink.
3688 * for version 4 ZPL datasets the symlink will be an SA attribute
3689 */
3690 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3691
3692 if (fuid_dirtied)
3693 zfs_fuid_sync(zfsvfs, tx);
3694
3695 if (zp->z_is_sa)
3696 error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3697 __DECONST(void *, link), len, tx);
3698 else
3699 zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
3700
3701 zp->z_size = len;
3702 (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3703 &zp->z_size, sizeof (zp->z_size), tx);
3704 /*
3705 * Insert the new object into the directory.
3706 */
3707 error = zfs_link_create(dzp, name, zp, tx, ZNEW);
3708 if (error != 0) {
3709 zfs_znode_delete(zp, tx);
3710 VOP_UNLOCK(ZTOV(zp));
3711 zrele(zp);
3712 } else {
3713 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3714 }
3715
3716 zfs_acl_ids_free(&acl_ids);
3717
3718 dmu_tx_commit(tx);
3719
3720 getnewvnode_drop_reserve();
3721
3722 if (error == 0) {
3723 *zpp = zp;
3724
3725 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3726 zil_commit(zilog, 0);
3727 }
3728
3729 zfs_exit(zfsvfs, FTAG);
3730 return (error);
3731 }
3732
3733 /*
3734 * Return, in the buffer contained in the provided uio structure,
3735 * the symbolic path referred to by vp.
3736 *
3737 * IN: vp - vnode of symbolic link.
3738 * uio - structure to contain the link path.
3739 * cr - credentials of caller.
3740 * ct - caller context
3741 *
3742 * OUT: uio - structure containing the link path.
3743 *
3744 * RETURN: 0 on success, error code on failure.
3745 *
3746 * Timestamps:
3747 * vp - atime updated
3748 */
3749 static int
zfs_readlink(vnode_t * vp,zfs_uio_t * uio,cred_t * cr,caller_context_t * ct)3750 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
3751 {
3752 (void) cr, (void) ct;
3753 znode_t *zp = VTOZ(vp);
3754 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3755 int error;
3756
3757 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3758 return (error);
3759
3760 if (zp->z_is_sa)
3761 error = sa_lookup_uio(zp->z_sa_hdl,
3762 SA_ZPL_SYMLINK(zfsvfs), uio);
3763 else
3764 error = zfs_sa_readlink(zp, uio);
3765
3766 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3767
3768 zfs_exit(zfsvfs, FTAG);
3769 return (error);
3770 }
3771
3772 /*
3773 * Insert a new entry into directory tdvp referencing svp.
3774 *
3775 * IN: tdvp - Directory to contain new entry.
3776 * svp - vnode of new entry.
3777 * name - name of new entry.
3778 * cr - credentials of caller.
3779 *
3780 * RETURN: 0 on success, error code on failure.
3781 *
3782 * Timestamps:
3783 * tdvp - ctime|mtime updated
3784 * svp - ctime updated
3785 */
3786 int
zfs_link(znode_t * tdzp,znode_t * szp,const char * name,cred_t * cr,int flags)3787 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
3788 int flags)
3789 {
3790 (void) flags;
3791 znode_t *tzp;
3792 zfsvfs_t *zfsvfs = tdzp->z_zfsvfs;
3793 zilog_t *zilog;
3794 dmu_tx_t *tx;
3795 int error;
3796 uint64_t parent;
3797 uid_t owner;
3798
3799 ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
3800
3801 if (is_nametoolong(zfsvfs, name))
3802 return (SET_ERROR(ENAMETOOLONG));
3803
3804 if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
3805 return (error);
3806 zilog = zfsvfs->z_log;
3807
3808 /*
3809 * POSIX dictates that we return EPERM here.
3810 * Better choices include ENOTSUP or EISDIR.
3811 */
3812 if (ZTOV(szp)->v_type == VDIR) {
3813 zfs_exit(zfsvfs, FTAG);
3814 return (SET_ERROR(EPERM));
3815 }
3816
3817 if ((error = zfs_verify_zp(szp)) != 0) {
3818 zfs_exit(zfsvfs, FTAG);
3819 return (error);
3820 }
3821
3822 /*
3823 * If we are using project inheritance, means if the directory has
3824 * ZFS_PROJINHERIT set, then its descendant directories will inherit
3825 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
3826 * such case, we only allow hard link creation in our tree when the
3827 * project IDs are the same.
3828 */
3829 if (tdzp->z_pflags & ZFS_PROJINHERIT &&
3830 tdzp->z_projid != szp->z_projid) {
3831 zfs_exit(zfsvfs, FTAG);
3832 return (SET_ERROR(EXDEV));
3833 }
3834
3835 if (szp->z_pflags & (ZFS_APPENDONLY |
3836 ZFS_IMMUTABLE | ZFS_READONLY)) {
3837 zfs_exit(zfsvfs, FTAG);
3838 return (SET_ERROR(EPERM));
3839 }
3840
3841 /* Prevent links to .zfs/shares files */
3842
3843 if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3844 &parent, sizeof (uint64_t))) != 0) {
3845 zfs_exit(zfsvfs, FTAG);
3846 return (error);
3847 }
3848 if (parent == zfsvfs->z_shares_dir) {
3849 zfs_exit(zfsvfs, FTAG);
3850 return (SET_ERROR(EPERM));
3851 }
3852
3853 if (zfsvfs->z_utf8 && u8_validate(name,
3854 strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3855 zfs_exit(zfsvfs, FTAG);
3856 return (SET_ERROR(EILSEQ));
3857 }
3858
3859 /*
3860 * We do not support links between attributes and non-attributes
3861 * because of the potential security risk of creating links
3862 * into "normal" file space in order to circumvent restrictions
3863 * imposed in attribute space.
3864 */
3865 if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
3866 zfs_exit(zfsvfs, FTAG);
3867 return (SET_ERROR(EINVAL));
3868 }
3869
3870
3871 owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3872 if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
3873 zfs_exit(zfsvfs, FTAG);
3874 return (SET_ERROR(EPERM));
3875 }
3876
3877 if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr, NULL))) {
3878 zfs_exit(zfsvfs, FTAG);
3879 return (error);
3880 }
3881
3882 /*
3883 * Attempt to lock directory; fail if entry already exists.
3884 */
3885 error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
3886 if (error) {
3887 zfs_exit(zfsvfs, FTAG);
3888 return (error);
3889 }
3890
3891 tx = dmu_tx_create(zfsvfs->z_os);
3892 dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3893 dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
3894 zfs_sa_upgrade_txholds(tx, szp);
3895 zfs_sa_upgrade_txholds(tx, tdzp);
3896 error = dmu_tx_assign(tx, DMU_TX_WAIT);
3897 if (error) {
3898 dmu_tx_abort(tx);
3899 zfs_exit(zfsvfs, FTAG);
3900 return (error);
3901 }
3902
3903 error = zfs_link_create(tdzp, name, szp, tx, 0);
3904
3905 if (error == 0) {
3906 uint64_t txtype = TX_LINK;
3907 zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
3908 }
3909
3910 dmu_tx_commit(tx);
3911
3912 if (error == 0) {
3913 vnevent_link(ZTOV(szp), ct);
3914 }
3915
3916 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3917 zil_commit(zilog, 0);
3918
3919 zfs_exit(zfsvfs, FTAG);
3920 return (error);
3921 }
3922
3923 /*
3924 * Free or allocate space in a file. Currently, this function only
3925 * supports the `F_FREESP' command. However, this command is somewhat
3926 * misnamed, as its functionality includes the ability to allocate as
3927 * well as free space.
3928 *
3929 * IN: ip - inode of file to free data in.
3930 * cmd - action to take (only F_FREESP supported).
3931 * bfp - section of file to free/alloc.
3932 * flag - current file open mode flags.
3933 * offset - current file offset.
3934 * cr - credentials of caller.
3935 *
3936 * RETURN: 0 on success, error code on failure.
3937 *
3938 * Timestamps:
3939 * ip - ctime|mtime updated
3940 */
3941 int
zfs_space(znode_t * zp,int cmd,flock64_t * bfp,int flag,offset_t offset,cred_t * cr)3942 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
3943 offset_t offset, cred_t *cr)
3944 {
3945 (void) offset;
3946 zfsvfs_t *zfsvfs = ZTOZSB(zp);
3947 uint64_t off, len;
3948 int error;
3949
3950 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
3951 return (error);
3952
3953 if (cmd != F_FREESP) {
3954 zfs_exit(zfsvfs, FTAG);
3955 return (SET_ERROR(EINVAL));
3956 }
3957
3958 /*
3959 * Callers might not be able to detect properly that we are read-only,
3960 * so check it explicitly here.
3961 */
3962 if (zfs_is_readonly(zfsvfs)) {
3963 zfs_exit(zfsvfs, FTAG);
3964 return (SET_ERROR(EROFS));
3965 }
3966
3967 if (bfp->l_len < 0) {
3968 zfs_exit(zfsvfs, FTAG);
3969 return (SET_ERROR(EINVAL));
3970 }
3971
3972 /*
3973 * Permissions aren't checked on Solaris because on this OS
3974 * zfs_space() can only be called with an opened file handle.
3975 * On Linux we can get here through truncate_range() which
3976 * operates directly on inodes, so we need to check access rights.
3977 */
3978 if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr, NULL))) {
3979 zfs_exit(zfsvfs, FTAG);
3980 return (error);
3981 }
3982
3983 off = bfp->l_start;
3984 len = bfp->l_len; /* 0 means from off to end of file */
3985
3986 error = zfs_freesp(zp, off, len, flag, TRUE);
3987
3988 zfs_exit(zfsvfs, FTAG);
3989 return (error);
3990 }
3991
3992 static void
zfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)3993 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
3994 {
3995 (void) cr, (void) ct;
3996 znode_t *zp = VTOZ(vp);
3997 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
3998 int error;
3999
4000 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
4001 if (zp->z_sa_hdl == NULL) {
4002 /*
4003 * The fs has been unmounted, or we did a
4004 * suspend/resume and this file no longer exists.
4005 */
4006 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
4007 vrecycle(vp);
4008 return;
4009 }
4010
4011 if (zp->z_unlinked) {
4012 /*
4013 * Fast path to recycle a vnode of a removed file.
4014 */
4015 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
4016 vrecycle(vp);
4017 return;
4018 }
4019
4020 if (zp->z_atime_dirty && zp->z_unlinked == 0) {
4021 dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
4022
4023 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4024 zfs_sa_upgrade_txholds(tx, zp);
4025 error = dmu_tx_assign(tx, DMU_TX_WAIT);
4026 if (error) {
4027 dmu_tx_abort(tx);
4028 } else {
4029 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
4030 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
4031 zp->z_atime_dirty = 0;
4032 dmu_tx_commit(tx);
4033 }
4034 }
4035 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
4036 }
4037
4038
4039 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
4040 "struct zfid_short bigger than struct fid");
4041 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
4042 "struct zfid_long bigger than struct fid");
4043
4044 static int
zfs_fid(vnode_t * vp,fid_t * fidp,caller_context_t * ct)4045 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4046 {
4047 (void) ct;
4048 znode_t *zp = VTOZ(vp);
4049 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4050 uint32_t gen;
4051 uint64_t gen64;
4052 uint64_t object = zp->z_id;
4053 zfid_short_t *zfid;
4054 int size, i, error;
4055
4056 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
4057 return (error);
4058
4059 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
4060 &gen64, sizeof (uint64_t))) != 0) {
4061 zfs_exit(zfsvfs, FTAG);
4062 return (error);
4063 }
4064
4065 gen = (uint32_t)gen64;
4066
4067 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
4068 fidp->fid_len = size;
4069
4070 zfid = (zfid_short_t *)fidp;
4071
4072 zfid->zf_len = size;
4073
4074 for (i = 0; i < sizeof (zfid->zf_object); i++)
4075 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
4076
4077 /* Must have a non-zero generation number to distinguish from .zfs */
4078 if (gen == 0)
4079 gen = 1;
4080 for (i = 0; i < sizeof (zfid->zf_gen); i++)
4081 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
4082
4083 if (size == LONG_FID_LEN) {
4084 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
4085 zfid_long_t *zlfid;
4086
4087 zlfid = (zfid_long_t *)fidp;
4088
4089 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
4090 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
4091
4092 /* XXX - this should be the generation number for the objset */
4093 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
4094 zlfid->zf_setgen[i] = 0;
4095 }
4096
4097 zfs_exit(zfsvfs, FTAG);
4098 return (0);
4099 }
4100
4101 static int
zfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)4102 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4103 caller_context_t *ct)
4104 {
4105 znode_t *zp;
4106 zfsvfs_t *zfsvfs;
4107 int error;
4108
4109 switch (cmd) {
4110 case _PC_LINK_MAX:
4111 *valp = MIN(LONG_MAX, ZFS_LINK_MAX);
4112 return (0);
4113
4114 case _PC_FILESIZEBITS:
4115 *valp = 64;
4116 return (0);
4117 case _PC_MIN_HOLE_SIZE:
4118 *valp = (int)SPA_MINBLOCKSIZE;
4119 return (0);
4120 case _PC_ACL_EXTENDED:
4121 #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
4122 zp = VTOZ(vp);
4123 zfsvfs = zp->z_zfsvfs;
4124 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
4125 return (error);
4126 *valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
4127 zfs_exit(zfsvfs, FTAG);
4128 #else
4129 *valp = 0;
4130 #endif
4131 return (0);
4132
4133 case _PC_ACL_NFS4:
4134 zp = VTOZ(vp);
4135 zfsvfs = zp->z_zfsvfs;
4136 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
4137 return (error);
4138 *valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
4139 zfs_exit(zfsvfs, FTAG);
4140 return (0);
4141
4142 case _PC_ACL_PATH_MAX:
4143 *valp = ACL_MAX_ENTRIES;
4144 return (0);
4145
4146 default:
4147 return (EOPNOTSUPP);
4148 }
4149 }
4150
4151 static int
zfs_getpages(struct vnode * vp,vm_page_t * ma,int count,int * rbehind,int * rahead)4152 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
4153 int *rahead)
4154 {
4155 znode_t *zp = VTOZ(vp);
4156 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4157 zfs_locked_range_t *lr;
4158 vm_object_t object;
4159 off_t start, end, obj_size;
4160 uint_t blksz;
4161 int pgsin_b, pgsin_a;
4162 int error;
4163
4164 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4165 return (zfs_vm_pagerret_error);
4166
4167 object = ma[0]->object;
4168 start = IDX_TO_OFF(ma[0]->pindex);
4169 end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
4170
4171 /*
4172 * Lock a range covering all required and optional pages.
4173 * Note that we need to handle the case of the block size growing.
4174 */
4175 for (;;) {
4176 uint64_t len;
4177
4178 blksz = zp->z_blksz;
4179 len = roundup(end, blksz) - rounddown(start, blksz);
4180
4181 lr = zfs_rangelock_tryenter(&zp->z_rangelock,
4182 rounddown(start, blksz), len, RL_READER);
4183 if (lr == NULL) {
4184 /*
4185 * Avoid a deadlock with update_pages(). We need to
4186 * hold the range lock when copying from the DMU, so
4187 * give up the busy lock to allow update_pages() to
4188 * proceed. We might need to allocate new pages, which
4189 * isn't quite right since this allocation isn't subject
4190 * to the page fault handler's OOM logic, but this is
4191 * the best we can do for now.
4192 */
4193 for (int i = 0; i < count; i++)
4194 vm_page_xunbusy(ma[i]);
4195
4196 lr = zfs_rangelock_enter(&zp->z_rangelock,
4197 rounddown(start, blksz), len, RL_READER);
4198
4199 zfs_vmobject_wlock(object);
4200 (void) vm_page_grab_pages(object, OFF_TO_IDX(start),
4201 VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO,
4202 ma, count);
4203 zfs_vmobject_wunlock(object);
4204 }
4205 if (blksz == zp->z_blksz)
4206 break;
4207 zfs_rangelock_exit(lr);
4208 }
4209
4210 zfs_vmobject_wlock(object);
4211 obj_size = object->un_pager.vnp.vnp_size;
4212 zfs_vmobject_wunlock(object);
4213 if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
4214 zfs_rangelock_exit(lr);
4215 zfs_exit(zfsvfs, FTAG);
4216 return (zfs_vm_pagerret_bad);
4217 }
4218
4219 pgsin_b = 0;
4220 if (rbehind != NULL) {
4221 pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
4222 pgsin_b = MIN(*rbehind, pgsin_b);
4223 }
4224
4225 pgsin_a = 0;
4226 if (rahead != NULL) {
4227 pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
4228 if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
4229 pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
4230 pgsin_a = MIN(*rahead, pgsin_a);
4231 }
4232
4233 /*
4234 * NB: we need to pass the exact byte size of the data that we expect
4235 * to read after accounting for the file size. This is required because
4236 * ZFS will panic if we request DMU to read beyond the end of the last
4237 * allocated block.
4238 */
4239 for (int i = 0; i < count; i++) {
4240 int dummypgsin, count1, j, last_size;
4241
4242 if (vm_page_any_valid(ma[i])) {
4243 ASSERT(vm_page_all_valid(ma[i]));
4244 continue;
4245 }
4246 for (j = i + 1; j < count; j++) {
4247 if (vm_page_any_valid(ma[j])) {
4248 ASSERT(vm_page_all_valid(ma[j]));
4249 break;
4250 }
4251 }
4252 count1 = j - i;
4253 dummypgsin = 0;
4254 last_size = j == count ?
4255 MIN(end, obj_size) - (end - PAGE_SIZE) : PAGE_SIZE;
4256 error = dmu_read_pages(zfsvfs->z_os, zp->z_id, &ma[i], count1,
4257 i == 0 ? &pgsin_b : &dummypgsin,
4258 j == count ? &pgsin_a : &dummypgsin,
4259 last_size);
4260 if (error != 0)
4261 break;
4262 i += count1 - 1;
4263 }
4264
4265 zfs_rangelock_exit(lr);
4266 ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4267
4268 dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
4269
4270 zfs_exit(zfsvfs, FTAG);
4271
4272 if (error != 0)
4273 return (zfs_vm_pagerret_error);
4274
4275 VM_CNT_INC(v_vnodein);
4276 VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
4277 if (rbehind != NULL)
4278 *rbehind = pgsin_b;
4279 if (rahead != NULL)
4280 *rahead = pgsin_a;
4281 return (zfs_vm_pagerret_ok);
4282 }
4283
4284 #ifndef _SYS_SYSPROTO_H_
4285 struct vop_getpages_args {
4286 struct vnode *a_vp;
4287 vm_page_t *a_m;
4288 int a_count;
4289 int *a_rbehind;
4290 int *a_rahead;
4291 };
4292 #endif
4293
4294 static int
zfs_freebsd_getpages(struct vop_getpages_args * ap)4295 zfs_freebsd_getpages(struct vop_getpages_args *ap)
4296 {
4297
4298 return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
4299 ap->a_rahead));
4300 }
4301
4302 static int
zfs_putpages(struct vnode * vp,vm_page_t * ma,size_t len,int flags,int * rtvals)4303 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
4304 int *rtvals)
4305 {
4306 znode_t *zp = VTOZ(vp);
4307 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4308 zfs_locked_range_t *lr;
4309 dmu_tx_t *tx;
4310 struct sf_buf *sf;
4311 vm_object_t object;
4312 vm_page_t m;
4313 caddr_t va;
4314 size_t tocopy;
4315 size_t lo_len;
4316 vm_ooffset_t lo_off;
4317 vm_ooffset_t off;
4318 uint_t blksz;
4319 int ncount;
4320 int pcount;
4321 int err;
4322 int i;
4323
4324 object = vp->v_object;
4325 KASSERT(ma[0]->object == object, ("mismatching object"));
4326 KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
4327
4328 pcount = btoc(len);
4329 ncount = pcount;
4330 for (i = 0; i < pcount; i++)
4331 rtvals[i] = zfs_vm_pagerret_error;
4332
4333 if (zfs_enter_verify_zp(zfsvfs, zp, FTAG) != 0)
4334 return (zfs_vm_pagerret_error);
4335
4336 off = IDX_TO_OFF(ma[0]->pindex);
4337 blksz = zp->z_blksz;
4338 lo_off = rounddown(off, blksz);
4339 lo_len = roundup(len + (off - lo_off), blksz);
4340 lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
4341
4342 zfs_vmobject_wlock(object);
4343 if (len + off > object->un_pager.vnp.vnp_size) {
4344 if (object->un_pager.vnp.vnp_size > off) {
4345 int pgoff;
4346
4347 len = object->un_pager.vnp.vnp_size - off;
4348 ncount = btoc(len);
4349 if ((pgoff = (int)len & PAGE_MASK) != 0) {
4350 /*
4351 * If the object is locked and the following
4352 * conditions hold, then the page's dirty
4353 * field cannot be concurrently changed by a
4354 * pmap operation.
4355 */
4356 m = ma[ncount - 1];
4357 vm_page_assert_sbusied(m);
4358 KASSERT(!pmap_page_is_write_mapped(m),
4359 ("zfs_putpages: page %p is not read-only",
4360 m));
4361 vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
4362 pgoff);
4363 }
4364 } else {
4365 len = 0;
4366 ncount = 0;
4367 }
4368 if (ncount < pcount) {
4369 for (i = ncount; i < pcount; i++) {
4370 rtvals[i] = zfs_vm_pagerret_bad;
4371 }
4372 }
4373 }
4374 zfs_vmobject_wunlock(object);
4375
4376 boolean_t commit = (flags & (zfs_vm_pagerput_sync |
4377 zfs_vm_pagerput_inval)) != 0 ||
4378 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS;
4379
4380 if (ncount == 0)
4381 goto out;
4382
4383 if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
4384 zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
4385 (zp->z_projid != ZFS_DEFAULT_PROJID &&
4386 zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
4387 zp->z_projid))) {
4388 goto out;
4389 }
4390
4391 tx = dmu_tx_create(zfsvfs->z_os);
4392 dmu_tx_hold_write(tx, zp->z_id, off, len);
4393
4394 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4395 zfs_sa_upgrade_txholds(tx, zp);
4396 err = dmu_tx_assign(tx, DMU_TX_WAIT);
4397 if (err != 0) {
4398 dmu_tx_abort(tx);
4399 goto out;
4400 }
4401
4402 if (zp->z_blksz < PAGE_SIZE) {
4403 for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
4404 tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
4405 va = zfs_map_page(ma[i], &sf);
4406 dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
4407 zfs_unmap_page(sf);
4408 }
4409 } else {
4410 err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
4411 }
4412
4413 if (err == 0) {
4414 uint64_t mtime[2], ctime[2];
4415 sa_bulk_attr_t bulk[3];
4416 int count = 0;
4417
4418 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4419 &mtime, 16);
4420 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4421 &ctime, 16);
4422 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4423 &zp->z_pflags, 8);
4424 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
4425 err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
4426 ASSERT0(err);
4427 /*
4428 * XXX we should be passing a callback to undirty
4429 * but that would make the locking messier
4430 */
4431 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
4432 len, commit, B_FALSE, NULL, NULL);
4433
4434 zfs_vmobject_wlock(object);
4435 for (i = 0; i < ncount; i++) {
4436 rtvals[i] = zfs_vm_pagerret_ok;
4437 vm_page_undirty(ma[i]);
4438 }
4439 zfs_vmobject_wunlock(object);
4440 VM_CNT_INC(v_vnodeout);
4441 VM_CNT_ADD(v_vnodepgsout, ncount);
4442 }
4443 dmu_tx_commit(tx);
4444
4445 out:
4446 zfs_rangelock_exit(lr);
4447 if (commit)
4448 zil_commit(zfsvfs->z_log, zp->z_id);
4449
4450 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
4451
4452 zfs_exit(zfsvfs, FTAG);
4453 return (rtvals[0]);
4454 }
4455
4456 #ifndef _SYS_SYSPROTO_H_
4457 struct vop_putpages_args {
4458 struct vnode *a_vp;
4459 vm_page_t *a_m;
4460 int a_count;
4461 int a_sync;
4462 int *a_rtvals;
4463 };
4464 #endif
4465
4466 static int
zfs_freebsd_putpages(struct vop_putpages_args * ap)4467 zfs_freebsd_putpages(struct vop_putpages_args *ap)
4468 {
4469
4470 return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
4471 ap->a_rtvals));
4472 }
4473
4474 #ifndef _SYS_SYSPROTO_H_
4475 struct vop_bmap_args {
4476 struct vnode *a_vp;
4477 daddr_t a_bn;
4478 struct bufobj **a_bop;
4479 daddr_t *a_bnp;
4480 int *a_runp;
4481 int *a_runb;
4482 };
4483 #endif
4484
4485 static int
zfs_freebsd_bmap(struct vop_bmap_args * ap)4486 zfs_freebsd_bmap(struct vop_bmap_args *ap)
4487 {
4488
4489 if (ap->a_bop != NULL)
4490 *ap->a_bop = &ap->a_vp->v_bufobj;
4491 if (ap->a_bnp != NULL)
4492 *ap->a_bnp = ap->a_bn;
4493 if (ap->a_runp != NULL)
4494 *ap->a_runp = 0;
4495 if (ap->a_runb != NULL)
4496 *ap->a_runb = 0;
4497
4498 return (0);
4499 }
4500
4501 #ifndef _SYS_SYSPROTO_H_
4502 struct vop_open_args {
4503 struct vnode *a_vp;
4504 int a_mode;
4505 struct ucred *a_cred;
4506 struct thread *a_td;
4507 };
4508 #endif
4509
4510 static int
zfs_freebsd_open(struct vop_open_args * ap)4511 zfs_freebsd_open(struct vop_open_args *ap)
4512 {
4513 vnode_t *vp = ap->a_vp;
4514 znode_t *zp = VTOZ(vp);
4515 int error;
4516
4517 error = zfs_open(&vp, ap->a_mode, ap->a_cred);
4518 if (error == 0)
4519 vnode_create_vobject(vp, zp->z_size, ap->a_td);
4520 return (error);
4521 }
4522
4523 #ifndef _SYS_SYSPROTO_H_
4524 struct vop_close_args {
4525 struct vnode *a_vp;
4526 int a_fflag;
4527 struct ucred *a_cred;
4528 struct thread *a_td;
4529 };
4530 #endif
4531
4532 static int
zfs_freebsd_close(struct vop_close_args * ap)4533 zfs_freebsd_close(struct vop_close_args *ap)
4534 {
4535
4536 return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
4537 }
4538
4539 #ifndef _SYS_SYSPROTO_H_
4540 struct vop_ioctl_args {
4541 struct vnode *a_vp;
4542 ulong_t a_command;
4543 caddr_t a_data;
4544 int a_fflag;
4545 struct ucred *cred;
4546 struct thread *td;
4547 };
4548 #endif
4549
4550 static int
zfs_freebsd_ioctl(struct vop_ioctl_args * ap)4551 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
4552 {
4553
4554 return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
4555 ap->a_fflag, ap->a_cred, NULL));
4556 }
4557
4558 static int
ioflags(int ioflags)4559 ioflags(int ioflags)
4560 {
4561 int flags = 0;
4562
4563 if (ioflags & IO_APPEND)
4564 flags |= O_APPEND;
4565 if (ioflags & IO_NDELAY)
4566 flags |= O_NONBLOCK;
4567 if (ioflags & IO_DIRECT)
4568 flags |= O_DIRECT;
4569 if (ioflags & IO_SYNC)
4570 flags |= O_SYNC;
4571
4572 return (flags);
4573 }
4574
4575 #ifndef _SYS_SYSPROTO_H_
4576 struct vop_read_args {
4577 struct vnode *a_vp;
4578 struct uio *a_uio;
4579 int a_ioflag;
4580 struct ucred *a_cred;
4581 };
4582 #endif
4583
4584 static int
zfs_freebsd_read(struct vop_read_args * ap)4585 zfs_freebsd_read(struct vop_read_args *ap)
4586 {
4587 zfs_uio_t uio;
4588 int error = 0;
4589 zfs_uio_init(&uio, ap->a_uio);
4590 error = zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4591 ap->a_cred);
4592 /*
4593 * XXX We occasionally get an EFAULT for Direct I/O reads on
4594 * FreeBSD 13. This still needs to be resolved. The EFAULT comes
4595 * from:
4596 * zfs_uio_get__dio_pages_alloc() ->
4597 * zfs_uio_get_dio_pages_impl() ->
4598 * zfs_uio_iov_step() ->
4599 * zfs_uio_get_user_pages().
4600 * We return EFAULT from zfs_uio_iov_step(). When a Direct I/O
4601 * read fails to map in the user pages (returning EFAULT) the
4602 * Direct I/O request is broken up into two separate IO requests
4603 * and issued separately using Direct I/O.
4604 */
4605 #ifdef ZFS_DEBUG
4606 if (error == EFAULT && uio.uio_extflg & UIO_DIRECT) {
4607 #if 0
4608 printf("%s(%d): Direct I/O read returning EFAULT "
4609 "uio = %p, zfs_uio_offset(uio) = %lu "
4610 "zfs_uio_resid(uio) = %lu\n",
4611 __FUNCTION__, __LINE__, &uio, zfs_uio_offset(&uio),
4612 zfs_uio_resid(&uio));
4613 #endif
4614 }
4615
4616 #endif
4617 return (error);
4618 }
4619
4620 #ifndef _SYS_SYSPROTO_H_
4621 struct vop_write_args {
4622 struct vnode *a_vp;
4623 struct uio *a_uio;
4624 int a_ioflag;
4625 struct ucred *a_cred;
4626 };
4627 #endif
4628
4629 static int
zfs_freebsd_write(struct vop_write_args * ap)4630 zfs_freebsd_write(struct vop_write_args *ap)
4631 {
4632 zfs_uio_t uio;
4633 zfs_uio_init(&uio, ap->a_uio);
4634 return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
4635 ap->a_cred));
4636 }
4637
4638 /*
4639 * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
4640 * the comment above cache_fplookup for details.
4641 */
4642 static int
zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args * v)4643 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
4644 {
4645 vnode_t *vp;
4646 znode_t *zp;
4647 uint64_t pflags;
4648
4649 vp = v->a_vp;
4650 zp = VTOZ_SMR(vp);
4651 if (__predict_false(zp == NULL))
4652 return (EAGAIN);
4653 pflags = atomic_load_64(&zp->z_pflags);
4654 if (pflags & ZFS_AV_QUARANTINED)
4655 return (EAGAIN);
4656 if (pflags & ZFS_XATTR)
4657 return (EAGAIN);
4658 if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
4659 return (EAGAIN);
4660 return (0);
4661 }
4662
4663 static int
zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args * v)4664 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
4665 {
4666 vnode_t *vp;
4667 znode_t *zp;
4668 char *target;
4669
4670 vp = v->a_vp;
4671 zp = VTOZ_SMR(vp);
4672 if (__predict_false(zp == NULL)) {
4673 return (EAGAIN);
4674 }
4675
4676 target = atomic_load_consume_ptr(&zp->z_cached_symlink);
4677 if (target == NULL) {
4678 return (EAGAIN);
4679 }
4680 return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
4681 }
4682
4683 #ifndef _SYS_SYSPROTO_H_
4684 struct vop_access_args {
4685 struct vnode *a_vp;
4686 accmode_t a_accmode;
4687 struct ucred *a_cred;
4688 struct thread *a_td;
4689 };
4690 #endif
4691
4692 static int
zfs_freebsd_access(struct vop_access_args * ap)4693 zfs_freebsd_access(struct vop_access_args *ap)
4694 {
4695 vnode_t *vp = ap->a_vp;
4696 znode_t *zp = VTOZ(vp);
4697 accmode_t accmode;
4698 int error = 0;
4699
4700
4701 if (ap->a_accmode == VEXEC) {
4702 if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
4703 return (0);
4704 }
4705
4706 /*
4707 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
4708 */
4709 accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
4710 if (accmode != 0)
4711 error = zfs_access(zp, accmode, 0, ap->a_cred);
4712
4713 /*
4714 * VADMIN has to be handled by vaccess().
4715 */
4716 if (error == 0) {
4717 accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
4718 if (accmode != 0) {
4719 error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
4720 zp->z_gid, accmode, ap->a_cred);
4721 }
4722 }
4723
4724 /*
4725 * For VEXEC, ensure that at least one execute bit is set for
4726 * non-directories.
4727 */
4728 if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
4729 (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
4730 error = EACCES;
4731 }
4732
4733 return (error);
4734 }
4735
4736 #ifndef _SYS_SYSPROTO_H_
4737 struct vop_lookup_args {
4738 struct vnode *a_dvp;
4739 struct vnode **a_vpp;
4740 struct componentname *a_cnp;
4741 };
4742 #endif
4743
4744 static int
zfs_freebsd_lookup(struct vop_lookup_args * ap,boolean_t cached)4745 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
4746 {
4747 struct componentname *cnp = ap->a_cnp;
4748 char nm[NAME_MAX + 1];
4749
4750 ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
4751 strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
4752
4753 return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
4754 cnp->cn_cred, 0, cached));
4755 }
4756
4757 static int
zfs_freebsd_cachedlookup(struct vop_cachedlookup_args * ap)4758 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
4759 {
4760
4761 return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
4762 }
4763
4764 #ifndef _SYS_SYSPROTO_H_
4765 struct vop_lookup_args {
4766 struct vnode *a_dvp;
4767 struct vnode **a_vpp;
4768 struct componentname *a_cnp;
4769 };
4770 #endif
4771
4772 static int
zfs_cache_lookup(struct vop_lookup_args * ap)4773 zfs_cache_lookup(struct vop_lookup_args *ap)
4774 {
4775 zfsvfs_t *zfsvfs;
4776
4777 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4778 if (zfsvfs->z_use_namecache)
4779 return (vfs_cache_lookup(ap));
4780 else
4781 return (zfs_freebsd_lookup(ap, B_FALSE));
4782 }
4783
4784 #ifndef _SYS_SYSPROTO_H_
4785 struct vop_create_args {
4786 struct vnode *a_dvp;
4787 struct vnode **a_vpp;
4788 struct componentname *a_cnp;
4789 struct vattr *a_vap;
4790 };
4791 #endif
4792
4793 static int
zfs_freebsd_create(struct vop_create_args * ap)4794 zfs_freebsd_create(struct vop_create_args *ap)
4795 {
4796 zfsvfs_t *zfsvfs;
4797 struct componentname *cnp = ap->a_cnp;
4798 vattr_t *vap = ap->a_vap;
4799 znode_t *zp = NULL;
4800 int rc, mode;
4801
4802 #if __FreeBSD_version < 1400068
4803 ASSERT(cnp->cn_flags & SAVENAME);
4804 #endif
4805
4806 vattr_init_mask(vap);
4807 mode = vap->va_mode & ALLPERMS;
4808 zfsvfs = ap->a_dvp->v_mount->mnt_data;
4809 *ap->a_vpp = NULL;
4810
4811 rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
4812 &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */, NULL);
4813 if (rc == 0)
4814 *ap->a_vpp = ZTOV(zp);
4815 if (zfsvfs->z_use_namecache &&
4816 rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
4817 cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
4818
4819 return (rc);
4820 }
4821
4822 #ifndef _SYS_SYSPROTO_H_
4823 struct vop_remove_args {
4824 struct vnode *a_dvp;
4825 struct vnode *a_vp;
4826 struct componentname *a_cnp;
4827 };
4828 #endif
4829
4830 static int
zfs_freebsd_remove(struct vop_remove_args * ap)4831 zfs_freebsd_remove(struct vop_remove_args *ap)
4832 {
4833
4834 #if __FreeBSD_version < 1400068
4835 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4836 #endif
4837
4838 return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
4839 ap->a_cnp->cn_cred));
4840 }
4841
4842 #ifndef _SYS_SYSPROTO_H_
4843 struct vop_mkdir_args {
4844 struct vnode *a_dvp;
4845 struct vnode **a_vpp;
4846 struct componentname *a_cnp;
4847 struct vattr *a_vap;
4848 };
4849 #endif
4850
4851 static int
zfs_freebsd_mkdir(struct vop_mkdir_args * ap)4852 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
4853 {
4854 vattr_t *vap = ap->a_vap;
4855 znode_t *zp = NULL;
4856 int rc;
4857
4858 #if __FreeBSD_version < 1400068
4859 ASSERT(ap->a_cnp->cn_flags & SAVENAME);
4860 #endif
4861
4862 vattr_init_mask(vap);
4863 *ap->a_vpp = NULL;
4864
4865 rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
4866 ap->a_cnp->cn_cred, 0, NULL, NULL);
4867
4868 if (rc == 0)
4869 *ap->a_vpp = ZTOV(zp);
4870 return (rc);
4871 }
4872
4873 #ifndef _SYS_SYSPROTO_H_
4874 struct vop_rmdir_args {
4875 struct vnode *a_dvp;
4876 struct vnode *a_vp;
4877 struct componentname *a_cnp;
4878 };
4879 #endif
4880
4881 static int
zfs_freebsd_rmdir(struct vop_rmdir_args * ap)4882 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
4883 {
4884 struct componentname *cnp = ap->a_cnp;
4885
4886 #if __FreeBSD_version < 1400068
4887 ASSERT(cnp->cn_flags & SAVENAME);
4888 #endif
4889
4890 return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
4891 }
4892
4893 #ifndef _SYS_SYSPROTO_H_
4894 struct vop_readdir_args {
4895 struct vnode *a_vp;
4896 struct uio *a_uio;
4897 struct ucred *a_cred;
4898 int *a_eofflag;
4899 int *a_ncookies;
4900 cookie_t **a_cookies;
4901 };
4902 #endif
4903
4904 static int
zfs_freebsd_readdir(struct vop_readdir_args * ap)4905 zfs_freebsd_readdir(struct vop_readdir_args *ap)
4906 {
4907 zfs_uio_t uio;
4908 zfs_uio_init(&uio, ap->a_uio);
4909 return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
4910 ap->a_ncookies, ap->a_cookies));
4911 }
4912
4913 #ifndef _SYS_SYSPROTO_H_
4914 struct vop_fsync_args {
4915 struct vnode *a_vp;
4916 int a_waitfor;
4917 struct thread *a_td;
4918 };
4919 #endif
4920
4921 static int
zfs_freebsd_fsync(struct vop_fsync_args * ap)4922 zfs_freebsd_fsync(struct vop_fsync_args *ap)
4923 {
4924
4925 return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
4926 }
4927
4928 #ifndef _SYS_SYSPROTO_H_
4929 struct vop_getattr_args {
4930 struct vnode *a_vp;
4931 struct vattr *a_vap;
4932 struct ucred *a_cred;
4933 };
4934 #endif
4935
4936 static int
zfs_freebsd_getattr(struct vop_getattr_args * ap)4937 zfs_freebsd_getattr(struct vop_getattr_args *ap)
4938 {
4939 vattr_t *vap = ap->a_vap;
4940 xvattr_t xvap;
4941 ulong_t fflags = 0;
4942 int error;
4943
4944 xva_init(&xvap);
4945 xvap.xva_vattr = *vap;
4946 xvap.xva_vattr.va_mask |= AT_XVATTR;
4947
4948 /* Convert chflags into ZFS-type flags. */
4949 /* XXX: what about SF_SETTABLE?. */
4950 XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
4951 XVA_SET_REQ(&xvap, XAT_APPENDONLY);
4952 XVA_SET_REQ(&xvap, XAT_NOUNLINK);
4953 XVA_SET_REQ(&xvap, XAT_NODUMP);
4954 XVA_SET_REQ(&xvap, XAT_READONLY);
4955 XVA_SET_REQ(&xvap, XAT_ARCHIVE);
4956 XVA_SET_REQ(&xvap, XAT_SYSTEM);
4957 XVA_SET_REQ(&xvap, XAT_HIDDEN);
4958 XVA_SET_REQ(&xvap, XAT_REPARSE);
4959 XVA_SET_REQ(&xvap, XAT_OFFLINE);
4960 XVA_SET_REQ(&xvap, XAT_SPARSE);
4961
4962 error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
4963 if (error != 0)
4964 return (error);
4965
4966 /* Convert ZFS xattr into chflags. */
4967 #define FLAG_CHECK(fflag, xflag, xfield) do { \
4968 if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0) \
4969 fflags |= (fflag); \
4970 } while (0)
4971 FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
4972 xvap.xva_xoptattrs.xoa_immutable);
4973 FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
4974 xvap.xva_xoptattrs.xoa_appendonly);
4975 FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
4976 xvap.xva_xoptattrs.xoa_nounlink);
4977 FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
4978 xvap.xva_xoptattrs.xoa_archive);
4979 FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
4980 xvap.xva_xoptattrs.xoa_nodump);
4981 FLAG_CHECK(UF_READONLY, XAT_READONLY,
4982 xvap.xva_xoptattrs.xoa_readonly);
4983 FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
4984 xvap.xva_xoptattrs.xoa_system);
4985 FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
4986 xvap.xva_xoptattrs.xoa_hidden);
4987 FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
4988 xvap.xva_xoptattrs.xoa_reparse);
4989 FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
4990 xvap.xva_xoptattrs.xoa_offline);
4991 FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
4992 xvap.xva_xoptattrs.xoa_sparse);
4993
4994 #undef FLAG_CHECK
4995 *vap = xvap.xva_vattr;
4996 vap->va_flags = fflags;
4997 return (0);
4998 }
4999
5000 #ifndef _SYS_SYSPROTO_H_
5001 struct vop_setattr_args {
5002 struct vnode *a_vp;
5003 struct vattr *a_vap;
5004 struct ucred *a_cred;
5005 };
5006 #endif
5007
5008 static int
zfs_freebsd_setattr(struct vop_setattr_args * ap)5009 zfs_freebsd_setattr(struct vop_setattr_args *ap)
5010 {
5011 vnode_t *vp = ap->a_vp;
5012 vattr_t *vap = ap->a_vap;
5013 cred_t *cred = ap->a_cred;
5014 xvattr_t xvap;
5015 ulong_t fflags;
5016 uint64_t zflags;
5017
5018 vattr_init_mask(vap);
5019 vap->va_mask &= ~AT_NOSET;
5020
5021 xva_init(&xvap);
5022 xvap.xva_vattr = *vap;
5023
5024 zflags = VTOZ(vp)->z_pflags;
5025
5026 if (vap->va_flags != VNOVAL) {
5027 zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
5028 int error;
5029
5030 if (zfsvfs->z_use_fuids == B_FALSE)
5031 return (EOPNOTSUPP);
5032
5033 fflags = vap->va_flags;
5034 /*
5035 * XXX KDM
5036 * We need to figure out whether it makes sense to allow
5037 * UF_REPARSE through, since we don't really have other
5038 * facilities to handle reparse points and zfs_setattr()
5039 * doesn't currently allow setting that attribute anyway.
5040 */
5041 if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
5042 UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
5043 UF_OFFLINE|UF_SPARSE)) != 0)
5044 return (EOPNOTSUPP);
5045 /*
5046 * Unprivileged processes are not permitted to unset system
5047 * flags, or modify flags if any system flags are set.
5048 * Privileged non-jail processes may not modify system flags
5049 * if securelevel > 0 and any existing system flags are set.
5050 * Privileged jail processes behave like privileged non-jail
5051 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
5052 * otherwise, they behave like unprivileged processes.
5053 */
5054 if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
5055 priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
5056 if (zflags &
5057 (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
5058 error = securelevel_gt(cred, 0);
5059 if (error != 0)
5060 return (error);
5061 }
5062 } else {
5063 /*
5064 * Callers may only modify the file flags on
5065 * objects they have VADMIN rights for.
5066 */
5067 if ((error = VOP_ACCESS(vp, VADMIN, cred,
5068 curthread)) != 0)
5069 return (error);
5070 if (zflags &
5071 (ZFS_IMMUTABLE | ZFS_APPENDONLY |
5072 ZFS_NOUNLINK)) {
5073 return (EPERM);
5074 }
5075 if (fflags &
5076 (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
5077 return (EPERM);
5078 }
5079 }
5080
5081 #define FLAG_CHANGE(fflag, zflag, xflag, xfield) do { \
5082 if (((fflags & (fflag)) && !(zflags & (zflag))) || \
5083 ((zflags & (zflag)) && !(fflags & (fflag)))) { \
5084 XVA_SET_REQ(&xvap, (xflag)); \
5085 (xfield) = ((fflags & (fflag)) != 0); \
5086 } \
5087 } while (0)
5088 /* Convert chflags into ZFS-type flags. */
5089 /* XXX: what about SF_SETTABLE?. */
5090 FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
5091 xvap.xva_xoptattrs.xoa_immutable);
5092 FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
5093 xvap.xva_xoptattrs.xoa_appendonly);
5094 FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
5095 xvap.xva_xoptattrs.xoa_nounlink);
5096 FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
5097 xvap.xva_xoptattrs.xoa_archive);
5098 FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
5099 xvap.xva_xoptattrs.xoa_nodump);
5100 FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
5101 xvap.xva_xoptattrs.xoa_readonly);
5102 FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
5103 xvap.xva_xoptattrs.xoa_system);
5104 FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
5105 xvap.xva_xoptattrs.xoa_hidden);
5106 FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
5107 xvap.xva_xoptattrs.xoa_reparse);
5108 FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
5109 xvap.xva_xoptattrs.xoa_offline);
5110 FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
5111 xvap.xva_xoptattrs.xoa_sparse);
5112 #undef FLAG_CHANGE
5113 }
5114 if (vap->va_birthtime.tv_sec != VNOVAL) {
5115 xvap.xva_vattr.va_mask |= AT_XVATTR;
5116 XVA_SET_REQ(&xvap, XAT_CREATETIME);
5117 }
5118 return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred, NULL));
5119 }
5120
5121 #ifndef _SYS_SYSPROTO_H_
5122 struct vop_rename_args {
5123 struct vnode *a_fdvp;
5124 struct vnode *a_fvp;
5125 struct componentname *a_fcnp;
5126 struct vnode *a_tdvp;
5127 struct vnode *a_tvp;
5128 struct componentname *a_tcnp;
5129 };
5130 #endif
5131
5132 static int
zfs_freebsd_rename(struct vop_rename_args * ap)5133 zfs_freebsd_rename(struct vop_rename_args *ap)
5134 {
5135 vnode_t *fdvp = ap->a_fdvp;
5136 vnode_t *fvp = ap->a_fvp;
5137 vnode_t *tdvp = ap->a_tdvp;
5138 vnode_t *tvp = ap->a_tvp;
5139 int error;
5140
5141 #if __FreeBSD_version < 1400068
5142 ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
5143 ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
5144 #endif
5145
5146 error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
5147 ap->a_tcnp, ap->a_fcnp->cn_cred);
5148
5149 vrele(fdvp);
5150 vrele(fvp);
5151 vrele(tdvp);
5152 if (tvp != NULL)
5153 vrele(tvp);
5154
5155 return (error);
5156 }
5157
5158 #ifndef _SYS_SYSPROTO_H_
5159 struct vop_symlink_args {
5160 struct vnode *a_dvp;
5161 struct vnode **a_vpp;
5162 struct componentname *a_cnp;
5163 struct vattr *a_vap;
5164 char *a_target;
5165 };
5166 #endif
5167
5168 static int
zfs_freebsd_symlink(struct vop_symlink_args * ap)5169 zfs_freebsd_symlink(struct vop_symlink_args *ap)
5170 {
5171 struct componentname *cnp = ap->a_cnp;
5172 vattr_t *vap = ap->a_vap;
5173 znode_t *zp = NULL;
5174 char *symlink;
5175 size_t symlink_len;
5176 int rc;
5177
5178 #if __FreeBSD_version < 1400068
5179 ASSERT(cnp->cn_flags & SAVENAME);
5180 #endif
5181
5182 vap->va_type = VLNK; /* FreeBSD: Syscall only sets va_mode. */
5183 vattr_init_mask(vap);
5184 *ap->a_vpp = NULL;
5185
5186 rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
5187 ap->a_target, &zp, cnp->cn_cred, 0 /* flags */, NULL);
5188 if (rc == 0) {
5189 *ap->a_vpp = ZTOV(zp);
5190 ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
5191 MPASS(zp->z_cached_symlink == NULL);
5192 symlink_len = strlen(ap->a_target);
5193 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5194 if (symlink != NULL) {
5195 memcpy(symlink, ap->a_target, symlink_len);
5196 symlink[symlink_len] = '\0';
5197 atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5198 (uintptr_t)symlink);
5199 }
5200 }
5201 return (rc);
5202 }
5203
5204 #ifndef _SYS_SYSPROTO_H_
5205 struct vop_readlink_args {
5206 struct vnode *a_vp;
5207 struct uio *a_uio;
5208 struct ucred *a_cred;
5209 };
5210 #endif
5211
5212 static int
zfs_freebsd_readlink(struct vop_readlink_args * ap)5213 zfs_freebsd_readlink(struct vop_readlink_args *ap)
5214 {
5215 zfs_uio_t uio;
5216 int error;
5217 znode_t *zp = VTOZ(ap->a_vp);
5218 char *symlink, *base;
5219 size_t symlink_len;
5220 bool trycache;
5221
5222 zfs_uio_init(&uio, ap->a_uio);
5223 trycache = false;
5224 if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
5225 zfs_uio_iovcnt(&uio) == 1) {
5226 base = zfs_uio_iovbase(&uio, 0);
5227 symlink_len = zfs_uio_iovlen(&uio, 0);
5228 trycache = true;
5229 }
5230 error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
5231 if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
5232 error != 0 || !trycache) {
5233 return (error);
5234 }
5235 symlink_len -= zfs_uio_resid(&uio);
5236 symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
5237 if (symlink != NULL) {
5238 memcpy(symlink, base, symlink_len);
5239 symlink[symlink_len] = '\0';
5240 if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
5241 (uintptr_t)NULL, (uintptr_t)symlink)) {
5242 cache_symlink_free(symlink, symlink_len + 1);
5243 }
5244 }
5245 return (error);
5246 }
5247
5248 #ifndef _SYS_SYSPROTO_H_
5249 struct vop_link_args {
5250 struct vnode *a_tdvp;
5251 struct vnode *a_vp;
5252 struct componentname *a_cnp;
5253 };
5254 #endif
5255
5256 static int
zfs_freebsd_link(struct vop_link_args * ap)5257 zfs_freebsd_link(struct vop_link_args *ap)
5258 {
5259 struct componentname *cnp = ap->a_cnp;
5260 vnode_t *vp = ap->a_vp;
5261 vnode_t *tdvp = ap->a_tdvp;
5262
5263 if (tdvp->v_mount != vp->v_mount)
5264 return (EXDEV);
5265
5266 #if __FreeBSD_version < 1400068
5267 ASSERT(cnp->cn_flags & SAVENAME);
5268 #endif
5269
5270 return (zfs_link(VTOZ(tdvp), VTOZ(vp),
5271 cnp->cn_nameptr, cnp->cn_cred, 0));
5272 }
5273
5274 #ifndef _SYS_SYSPROTO_H_
5275 struct vop_inactive_args {
5276 struct vnode *a_vp;
5277 struct thread *a_td;
5278 };
5279 #endif
5280
5281 static int
zfs_freebsd_inactive(struct vop_inactive_args * ap)5282 zfs_freebsd_inactive(struct vop_inactive_args *ap)
5283 {
5284 vnode_t *vp = ap->a_vp;
5285
5286 zfs_inactive(vp, curthread->td_ucred, NULL);
5287 return (0);
5288 }
5289
5290 #ifndef _SYS_SYSPROTO_H_
5291 struct vop_need_inactive_args {
5292 struct vnode *a_vp;
5293 struct thread *a_td;
5294 };
5295 #endif
5296
5297 static int
zfs_freebsd_need_inactive(struct vop_need_inactive_args * ap)5298 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
5299 {
5300 vnode_t *vp = ap->a_vp;
5301 znode_t *zp = VTOZ(vp);
5302 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5303 int need;
5304
5305 if (vn_need_pageq_flush(vp))
5306 return (1);
5307
5308 if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
5309 return (1);
5310 need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
5311 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5312
5313 return (need);
5314 }
5315
5316 #ifndef _SYS_SYSPROTO_H_
5317 struct vop_reclaim_args {
5318 struct vnode *a_vp;
5319 struct thread *a_td;
5320 };
5321 #endif
5322
5323 static int
zfs_freebsd_reclaim(struct vop_reclaim_args * ap)5324 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
5325 {
5326 vnode_t *vp = ap->a_vp;
5327 znode_t *zp = VTOZ(vp);
5328 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
5329
5330 ASSERT3P(zp, !=, NULL);
5331
5332 /*
5333 * z_teardown_inactive_lock protects from a race with
5334 * zfs_znode_dmu_fini in zfsvfs_teardown during
5335 * force unmount.
5336 */
5337 ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
5338 if (zp->z_sa_hdl == NULL)
5339 zfs_znode_free(zp);
5340 else
5341 zfs_zinactive(zp);
5342 ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
5343
5344 vp->v_data = NULL;
5345 return (0);
5346 }
5347
5348 #ifndef _SYS_SYSPROTO_H_
5349 struct vop_fid_args {
5350 struct vnode *a_vp;
5351 struct fid *a_fid;
5352 };
5353 #endif
5354
5355 static int
zfs_freebsd_fid(struct vop_fid_args * ap)5356 zfs_freebsd_fid(struct vop_fid_args *ap)
5357 {
5358
5359 return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
5360 }
5361
5362
5363 #ifndef _SYS_SYSPROTO_H_
5364 struct vop_pathconf_args {
5365 struct vnode *a_vp;
5366 int a_name;
5367 register_t *a_retval;
5368 } *ap;
5369 #endif
5370
5371 static int
zfs_freebsd_pathconf(struct vop_pathconf_args * ap)5372 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
5373 {
5374 ulong_t val;
5375 int error;
5376
5377 error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
5378 curthread->td_ucred, NULL);
5379 if (error == 0) {
5380 *ap->a_retval = val;
5381 return (error);
5382 }
5383 if (error != EOPNOTSUPP)
5384 return (error);
5385
5386 switch (ap->a_name) {
5387 case _PC_NAME_MAX:
5388 *ap->a_retval = NAME_MAX;
5389 return (0);
5390 #if __FreeBSD_version >= 1400032
5391 case _PC_DEALLOC_PRESENT:
5392 *ap->a_retval = 1;
5393 return (0);
5394 #endif
5395 case _PC_PIPE_BUF:
5396 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
5397 *ap->a_retval = PIPE_BUF;
5398 return (0);
5399 }
5400 return (EINVAL);
5401 default:
5402 return (vop_stdpathconf(ap));
5403 }
5404 }
5405
5406 static int zfs_xattr_compat = 1;
5407
5408 static int
zfs_check_attrname(const char * name)5409 zfs_check_attrname(const char *name)
5410 {
5411 /* We don't allow '/' character in attribute name. */
5412 if (strchr(name, '/') != NULL)
5413 return (SET_ERROR(EINVAL));
5414 /* We don't allow attribute names that start with a namespace prefix. */
5415 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
5416 return (SET_ERROR(EINVAL));
5417 return (0);
5418 }
5419
5420 /*
5421 * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
5422 * extended attribute name:
5423 *
5424 * NAMESPACE XATTR_COMPAT PREFIX
5425 * system * freebsd:system:
5426 * user 1 (none, can be used to access ZFS
5427 * fsattr(5) attributes created on Solaris)
5428 * user 0 user.
5429 */
5430 static int
zfs_create_attrname(int attrnamespace,const char * name,char * attrname,size_t size,boolean_t compat)5431 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
5432 size_t size, boolean_t compat)
5433 {
5434 const char *namespace, *prefix, *suffix;
5435
5436 memset(attrname, 0, size);
5437
5438 switch (attrnamespace) {
5439 case EXTATTR_NAMESPACE_USER:
5440 if (compat) {
5441 /*
5442 * This is the default namespace by which we can access
5443 * all attributes created on Solaris.
5444 */
5445 prefix = namespace = suffix = "";
5446 } else {
5447 /*
5448 * This is compatible with the user namespace encoding
5449 * on Linux prior to xattr_compat, but nothing
5450 * else.
5451 */
5452 prefix = "";
5453 namespace = "user";
5454 suffix = ".";
5455 }
5456 break;
5457 case EXTATTR_NAMESPACE_SYSTEM:
5458 prefix = "freebsd:";
5459 namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
5460 suffix = ":";
5461 break;
5462 case EXTATTR_NAMESPACE_EMPTY:
5463 default:
5464 return (SET_ERROR(EINVAL));
5465 }
5466 if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
5467 name) >= size) {
5468 return (SET_ERROR(ENAMETOOLONG));
5469 }
5470 return (0);
5471 }
5472
5473 static int
zfs_ensure_xattr_cached(znode_t * zp)5474 zfs_ensure_xattr_cached(znode_t *zp)
5475 {
5476 int error = 0;
5477
5478 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5479
5480 if (zp->z_xattr_cached != NULL)
5481 return (0);
5482
5483 if (rw_write_held(&zp->z_xattr_lock))
5484 return (zfs_sa_get_xattr(zp));
5485
5486 if (!rw_tryupgrade(&zp->z_xattr_lock)) {
5487 rw_exit(&zp->z_xattr_lock);
5488 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5489 }
5490 if (zp->z_xattr_cached == NULL)
5491 error = zfs_sa_get_xattr(zp);
5492 rw_downgrade(&zp->z_xattr_lock);
5493 return (error);
5494 }
5495
5496 #ifndef _SYS_SYSPROTO_H_
5497 struct vop_getextattr {
5498 IN struct vnode *a_vp;
5499 IN int a_attrnamespace;
5500 IN const char *a_name;
5501 INOUT struct uio *a_uio;
5502 OUT size_t *a_size;
5503 IN struct ucred *a_cred;
5504 IN struct thread *a_td;
5505 };
5506 #endif
5507
5508 static int
zfs_getextattr_dir(struct vop_getextattr_args * ap,const char * attrname)5509 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
5510 {
5511 struct thread *td = ap->a_td;
5512 struct nameidata nd;
5513 struct vattr va;
5514 vnode_t *xvp = NULL, *vp;
5515 int error, flags;
5516
5517 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5518 LOOKUP_XATTR, B_FALSE);
5519 if (error != 0)
5520 return (error);
5521
5522 flags = FREAD;
5523 #if __FreeBSD_version < 1400043
5524 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
5525 xvp, td);
5526 #else
5527 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5528 #endif
5529 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
5530 if (error != 0)
5531 return (SET_ERROR(error));
5532 vp = nd.ni_vp;
5533 NDFREE_PNBUF(&nd);
5534
5535 if (ap->a_size != NULL) {
5536 error = VOP_GETATTR(vp, &va, ap->a_cred);
5537 if (error == 0)
5538 *ap->a_size = (size_t)va.va_size;
5539 } else if (ap->a_uio != NULL)
5540 error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5541
5542 VOP_UNLOCK(vp);
5543 vn_close(vp, flags, ap->a_cred, td);
5544 return (error);
5545 }
5546
5547 static int
zfs_getextattr_sa(struct vop_getextattr_args * ap,const char * attrname)5548 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
5549 {
5550 znode_t *zp = VTOZ(ap->a_vp);
5551 uchar_t *nv_value;
5552 uint_t nv_size;
5553 int error;
5554
5555 error = zfs_ensure_xattr_cached(zp);
5556 if (error != 0)
5557 return (error);
5558
5559 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
5560 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5561
5562 error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
5563 &nv_value, &nv_size);
5564 if (error != 0)
5565 return (SET_ERROR(error));
5566
5567 if (ap->a_size != NULL)
5568 *ap->a_size = nv_size;
5569 else if (ap->a_uio != NULL)
5570 error = uiomove(nv_value, nv_size, ap->a_uio);
5571 if (error != 0)
5572 return (SET_ERROR(error));
5573
5574 return (0);
5575 }
5576
5577 static int
zfs_getextattr_impl(struct vop_getextattr_args * ap,boolean_t compat)5578 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
5579 {
5580 znode_t *zp = VTOZ(ap->a_vp);
5581 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5582 char attrname[EXTATTR_MAXNAMELEN+1];
5583 int error;
5584
5585 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5586 sizeof (attrname), compat);
5587 if (error != 0)
5588 return (error);
5589
5590 error = ENOENT;
5591 if (zfsvfs->z_use_sa && zp->z_is_sa)
5592 error = zfs_getextattr_sa(ap, attrname);
5593 if (error == ENOENT)
5594 error = zfs_getextattr_dir(ap, attrname);
5595 return (error);
5596 }
5597
5598 /*
5599 * Vnode operation to retrieve a named extended attribute.
5600 */
5601 static int
zfs_getextattr(struct vop_getextattr_args * ap)5602 zfs_getextattr(struct vop_getextattr_args *ap)
5603 {
5604 znode_t *zp = VTOZ(ap->a_vp);
5605 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5606 int error;
5607
5608 /*
5609 * If the xattr property is off, refuse the request.
5610 */
5611 if (!(zfsvfs->z_flags & ZSB_XATTR))
5612 return (SET_ERROR(EOPNOTSUPP));
5613
5614 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5615 ap->a_cred, ap->a_td, VREAD);
5616 if (error != 0)
5617 return (SET_ERROR(error));
5618
5619 error = zfs_check_attrname(ap->a_name);
5620 if (error != 0)
5621 return (error);
5622
5623 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5624 return (error);
5625 error = ENOENT;
5626 rw_enter(&zp->z_xattr_lock, RW_READER);
5627
5628 error = zfs_getextattr_impl(ap, zfs_xattr_compat);
5629 if ((error == ENOENT || error == ENOATTR) &&
5630 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5631 /*
5632 * Fall back to the alternate namespace format if we failed to
5633 * find a user xattr.
5634 */
5635 error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
5636 }
5637
5638 rw_exit(&zp->z_xattr_lock);
5639 zfs_exit(zfsvfs, FTAG);
5640 if (error == ENOENT)
5641 error = SET_ERROR(ENOATTR);
5642 return (error);
5643 }
5644
5645 #ifndef _SYS_SYSPROTO_H_
5646 struct vop_deleteextattr {
5647 IN struct vnode *a_vp;
5648 IN int a_attrnamespace;
5649 IN const char *a_name;
5650 IN struct ucred *a_cred;
5651 IN struct thread *a_td;
5652 };
5653 #endif
5654
5655 static int
zfs_deleteextattr_dir(struct vop_deleteextattr_args * ap,const char * attrname)5656 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
5657 {
5658 struct nameidata nd;
5659 vnode_t *xvp = NULL, *vp;
5660 int error;
5661
5662 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5663 LOOKUP_XATTR, B_FALSE);
5664 if (error != 0)
5665 return (error);
5666
5667 #if __FreeBSD_version < 1400043
5668 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5669 UIO_SYSSPACE, attrname, xvp, ap->a_td);
5670 #else
5671 NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
5672 UIO_SYSSPACE, attrname, xvp);
5673 #endif
5674 error = namei(&nd);
5675 if (error != 0)
5676 return (SET_ERROR(error));
5677
5678 vp = nd.ni_vp;
5679 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
5680 NDFREE_PNBUF(&nd);
5681
5682 vput(nd.ni_dvp);
5683 if (vp == nd.ni_dvp)
5684 vrele(vp);
5685 else
5686 vput(vp);
5687
5688 return (error);
5689 }
5690
5691 static int
zfs_deleteextattr_sa(struct vop_deleteextattr_args * ap,const char * attrname)5692 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
5693 {
5694 znode_t *zp = VTOZ(ap->a_vp);
5695 nvlist_t *nvl;
5696 int error;
5697
5698 error = zfs_ensure_xattr_cached(zp);
5699 if (error != 0)
5700 return (error);
5701
5702 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5703 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5704
5705 nvl = zp->z_xattr_cached;
5706 error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
5707 if (error != 0)
5708 error = SET_ERROR(error);
5709 else
5710 error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
5711 if (error != 0) {
5712 zp->z_xattr_cached = NULL;
5713 nvlist_free(nvl);
5714 }
5715 return (error);
5716 }
5717
5718 static int
zfs_deleteextattr_impl(struct vop_deleteextattr_args * ap,boolean_t compat)5719 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
5720 {
5721 znode_t *zp = VTOZ(ap->a_vp);
5722 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5723 char attrname[EXTATTR_MAXNAMELEN+1];
5724 int error;
5725
5726 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5727 sizeof (attrname), compat);
5728 if (error != 0)
5729 return (error);
5730
5731 error = ENOENT;
5732 if (zfsvfs->z_use_sa && zp->z_is_sa)
5733 error = zfs_deleteextattr_sa(ap, attrname);
5734 if (error == ENOENT)
5735 error = zfs_deleteextattr_dir(ap, attrname);
5736 return (error);
5737 }
5738
5739 /*
5740 * Vnode operation to remove a named attribute.
5741 */
5742 static int
zfs_deleteextattr(struct vop_deleteextattr_args * ap)5743 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
5744 {
5745 znode_t *zp = VTOZ(ap->a_vp);
5746 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5747 int error;
5748
5749 /*
5750 * If the xattr property is off, refuse the request.
5751 */
5752 if (!(zfsvfs->z_flags & ZSB_XATTR))
5753 return (SET_ERROR(EOPNOTSUPP));
5754
5755 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5756 ap->a_cred, ap->a_td, VWRITE);
5757 if (error != 0)
5758 return (SET_ERROR(error));
5759
5760 error = zfs_check_attrname(ap->a_name);
5761 if (error != 0)
5762 return (error);
5763
5764 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5765 return (error);
5766 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5767
5768 error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
5769 if ((error == ENOENT || error == ENOATTR) &&
5770 ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5771 /*
5772 * Fall back to the alternate namespace format if we failed to
5773 * find a user xattr.
5774 */
5775 error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
5776 }
5777
5778 rw_exit(&zp->z_xattr_lock);
5779 zfs_exit(zfsvfs, FTAG);
5780 if (error == ENOENT)
5781 error = SET_ERROR(ENOATTR);
5782 return (error);
5783 }
5784
5785 #ifndef _SYS_SYSPROTO_H_
5786 struct vop_setextattr {
5787 IN struct vnode *a_vp;
5788 IN int a_attrnamespace;
5789 IN const char *a_name;
5790 INOUT struct uio *a_uio;
5791 IN struct ucred *a_cred;
5792 IN struct thread *a_td;
5793 };
5794 #endif
5795
5796 static int
zfs_setextattr_dir(struct vop_setextattr_args * ap,const char * attrname)5797 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
5798 {
5799 struct thread *td = ap->a_td;
5800 struct nameidata nd;
5801 struct vattr va;
5802 vnode_t *xvp = NULL, *vp;
5803 int error, flags;
5804
5805 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5806 LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
5807 if (error != 0)
5808 return (error);
5809
5810 flags = FFLAGS(O_WRONLY | O_CREAT);
5811 #if __FreeBSD_version < 1400043
5812 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
5813 #else
5814 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
5815 #endif
5816 error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
5817 NULL);
5818 if (error != 0)
5819 return (SET_ERROR(error));
5820 vp = nd.ni_vp;
5821 NDFREE_PNBUF(&nd);
5822
5823 VATTR_NULL(&va);
5824 va.va_size = 0;
5825 error = VOP_SETATTR(vp, &va, ap->a_cred);
5826 if (error == 0)
5827 VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
5828
5829 VOP_UNLOCK(vp);
5830 vn_close(vp, flags, ap->a_cred, td);
5831 return (error);
5832 }
5833
5834 static int
zfs_setextattr_sa(struct vop_setextattr_args * ap,const char * attrname)5835 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
5836 {
5837 znode_t *zp = VTOZ(ap->a_vp);
5838 nvlist_t *nvl;
5839 size_t sa_size;
5840 int error;
5841
5842 error = zfs_ensure_xattr_cached(zp);
5843 if (error != 0)
5844 return (error);
5845
5846 ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
5847 ASSERT3P(zp->z_xattr_cached, !=, NULL);
5848
5849 nvl = zp->z_xattr_cached;
5850 size_t entry_size = ap->a_uio->uio_resid;
5851 if (entry_size > DXATTR_MAX_ENTRY_SIZE)
5852 return (SET_ERROR(EFBIG));
5853 error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
5854 if (error != 0)
5855 return (SET_ERROR(error));
5856 if (sa_size > DXATTR_MAX_SA_SIZE)
5857 return (SET_ERROR(EFBIG));
5858 uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
5859 error = uiomove(buf, entry_size, ap->a_uio);
5860 if (error != 0) {
5861 error = SET_ERROR(error);
5862 } else {
5863 error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
5864 if (error != 0)
5865 error = SET_ERROR(error);
5866 }
5867 if (error == 0)
5868 error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
5869 kmem_free(buf, entry_size);
5870 if (error != 0) {
5871 zp->z_xattr_cached = NULL;
5872 nvlist_free(nvl);
5873 }
5874 return (error);
5875 }
5876
5877 static int
zfs_setextattr_impl(struct vop_setextattr_args * ap,boolean_t compat)5878 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
5879 {
5880 znode_t *zp = VTOZ(ap->a_vp);
5881 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5882 char attrname[EXTATTR_MAXNAMELEN+1];
5883 int error;
5884
5885 error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
5886 sizeof (attrname), compat);
5887 if (error != 0)
5888 return (error);
5889
5890 struct vop_deleteextattr_args vda = {
5891 .a_vp = ap->a_vp,
5892 .a_attrnamespace = ap->a_attrnamespace,
5893 .a_name = ap->a_name,
5894 .a_cred = ap->a_cred,
5895 .a_td = ap->a_td,
5896 };
5897 error = ENOENT;
5898 if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
5899 error = zfs_setextattr_sa(ap, attrname);
5900 if (error == 0) {
5901 /*
5902 * Successfully put into SA, we need to clear the one
5903 * in dir if present.
5904 */
5905 zfs_deleteextattr_dir(&vda, attrname);
5906 }
5907 }
5908 if (error != 0) {
5909 error = zfs_setextattr_dir(ap, attrname);
5910 if (error == 0 && zp->z_is_sa) {
5911 /*
5912 * Successfully put into dir, we need to clear the one
5913 * in SA if present.
5914 */
5915 zfs_deleteextattr_sa(&vda, attrname);
5916 }
5917 }
5918 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
5919 /*
5920 * Also clear all versions of the alternate compat name.
5921 */
5922 zfs_deleteextattr_impl(&vda, !compat);
5923 }
5924 return (error);
5925 }
5926
5927 /*
5928 * Vnode operation to set a named attribute.
5929 */
5930 static int
zfs_setextattr(struct vop_setextattr_args * ap)5931 zfs_setextattr(struct vop_setextattr_args *ap)
5932 {
5933 znode_t *zp = VTOZ(ap->a_vp);
5934 zfsvfs_t *zfsvfs = ZTOZSB(zp);
5935 int error;
5936
5937 /*
5938 * If the xattr property is off, refuse the request.
5939 */
5940 if (!(zfsvfs->z_flags & ZSB_XATTR))
5941 return (SET_ERROR(EOPNOTSUPP));
5942
5943 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
5944 ap->a_cred, ap->a_td, VWRITE);
5945 if (error != 0)
5946 return (SET_ERROR(error));
5947
5948 error = zfs_check_attrname(ap->a_name);
5949 if (error != 0)
5950 return (error);
5951
5952 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
5953 return (error);
5954 rw_enter(&zp->z_xattr_lock, RW_WRITER);
5955
5956 error = zfs_setextattr_impl(ap, zfs_xattr_compat);
5957
5958 rw_exit(&zp->z_xattr_lock);
5959 zfs_exit(zfsvfs, FTAG);
5960 return (error);
5961 }
5962
5963 #ifndef _SYS_SYSPROTO_H_
5964 struct vop_listextattr {
5965 IN struct vnode *a_vp;
5966 IN int a_attrnamespace;
5967 INOUT struct uio *a_uio;
5968 OUT size_t *a_size;
5969 IN struct ucred *a_cred;
5970 IN struct thread *a_td;
5971 };
5972 #endif
5973
5974 static int
zfs_listextattr_dir(struct vop_listextattr_args * ap,const char * attrprefix)5975 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
5976 {
5977 struct thread *td = ap->a_td;
5978 struct nameidata nd;
5979 uint8_t dirbuf[sizeof (struct dirent)];
5980 struct iovec aiov;
5981 struct uio auio;
5982 vnode_t *xvp = NULL, *vp;
5983 int error, eof;
5984
5985 error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
5986 LOOKUP_XATTR, B_FALSE);
5987 if (error != 0) {
5988 /*
5989 * ENOATTR means that the EA directory does not yet exist,
5990 * i.e. there are no extended attributes there.
5991 */
5992 if (error == ENOATTR)
5993 error = 0;
5994 return (error);
5995 }
5996
5997 #if __FreeBSD_version < 1400043
5998 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
5999 UIO_SYSSPACE, ".", xvp, td);
6000 #else
6001 NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
6002 UIO_SYSSPACE, ".", xvp);
6003 #endif
6004 error = namei(&nd);
6005 if (error != 0)
6006 return (SET_ERROR(error));
6007 vp = nd.ni_vp;
6008 NDFREE_PNBUF(&nd);
6009
6010 auio.uio_iov = &aiov;
6011 auio.uio_iovcnt = 1;
6012 auio.uio_segflg = UIO_SYSSPACE;
6013 auio.uio_td = td;
6014 auio.uio_rw = UIO_READ;
6015 auio.uio_offset = 0;
6016
6017 size_t plen = strlen(attrprefix);
6018
6019 do {
6020 aiov.iov_base = (void *)dirbuf;
6021 aiov.iov_len = sizeof (dirbuf);
6022 auio.uio_resid = sizeof (dirbuf);
6023 error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
6024 if (error != 0)
6025 break;
6026 int done = sizeof (dirbuf) - auio.uio_resid;
6027 for (int pos = 0; pos < done; ) {
6028 struct dirent *dp = (struct dirent *)(dirbuf + pos);
6029 pos += dp->d_reclen;
6030 /*
6031 * XXX: Temporarily we also accept DT_UNKNOWN, as this
6032 * is what we get when attribute was created on Solaris.
6033 */
6034 if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
6035 continue;
6036 else if (plen == 0 &&
6037 ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
6038 continue;
6039 else if (strncmp(dp->d_name, attrprefix, plen) != 0)
6040 continue;
6041 uint8_t nlen = dp->d_namlen - plen;
6042 if (ap->a_size != NULL) {
6043 *ap->a_size += 1 + nlen;
6044 } else if (ap->a_uio != NULL) {
6045 /*
6046 * Format of extattr name entry is one byte for
6047 * length and the rest for name.
6048 */
6049 error = uiomove(&nlen, 1, ap->a_uio);
6050 if (error == 0) {
6051 char *namep = dp->d_name + plen;
6052 error = uiomove(namep, nlen, ap->a_uio);
6053 }
6054 if (error != 0) {
6055 error = SET_ERROR(error);
6056 break;
6057 }
6058 }
6059 }
6060 } while (!eof && error == 0);
6061
6062 vput(vp);
6063 return (error);
6064 }
6065
6066 static int
zfs_listextattr_sa(struct vop_listextattr_args * ap,const char * attrprefix)6067 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
6068 {
6069 znode_t *zp = VTOZ(ap->a_vp);
6070 int error;
6071
6072 error = zfs_ensure_xattr_cached(zp);
6073 if (error != 0)
6074 return (error);
6075
6076 ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
6077 ASSERT3P(zp->z_xattr_cached, !=, NULL);
6078
6079 size_t plen = strlen(attrprefix);
6080 nvpair_t *nvp = NULL;
6081 while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
6082 ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
6083
6084 const char *name = nvpair_name(nvp);
6085 if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
6086 continue;
6087 else if (strncmp(name, attrprefix, plen) != 0)
6088 continue;
6089 uint8_t nlen = strlen(name) - plen;
6090 if (ap->a_size != NULL) {
6091 *ap->a_size += 1 + nlen;
6092 } else if (ap->a_uio != NULL) {
6093 /*
6094 * Format of extattr name entry is one byte for
6095 * length and the rest for name.
6096 */
6097 error = uiomove(&nlen, 1, ap->a_uio);
6098 if (error == 0) {
6099 char *namep = __DECONST(char *, name) + plen;
6100 error = uiomove(namep, nlen, ap->a_uio);
6101 }
6102 if (error != 0) {
6103 error = SET_ERROR(error);
6104 break;
6105 }
6106 }
6107 }
6108
6109 return (error);
6110 }
6111
6112 static int
zfs_listextattr_impl(struct vop_listextattr_args * ap,boolean_t compat)6113 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
6114 {
6115 znode_t *zp = VTOZ(ap->a_vp);
6116 zfsvfs_t *zfsvfs = ZTOZSB(zp);
6117 char attrprefix[16];
6118 int error;
6119
6120 error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
6121 sizeof (attrprefix), compat);
6122 if (error != 0)
6123 return (error);
6124
6125 if (zfsvfs->z_use_sa && zp->z_is_sa)
6126 error = zfs_listextattr_sa(ap, attrprefix);
6127 if (error == 0)
6128 error = zfs_listextattr_dir(ap, attrprefix);
6129 return (error);
6130 }
6131
6132 /*
6133 * Vnode operation to retrieve extended attributes on a vnode.
6134 */
6135 static int
zfs_listextattr(struct vop_listextattr_args * ap)6136 zfs_listextattr(struct vop_listextattr_args *ap)
6137 {
6138 znode_t *zp = VTOZ(ap->a_vp);
6139 zfsvfs_t *zfsvfs = ZTOZSB(zp);
6140 int error;
6141
6142 if (ap->a_size != NULL)
6143 *ap->a_size = 0;
6144
6145 /*
6146 * If the xattr property is off, refuse the request.
6147 */
6148 if (!(zfsvfs->z_flags & ZSB_XATTR))
6149 return (SET_ERROR(EOPNOTSUPP));
6150
6151 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
6152 ap->a_cred, ap->a_td, VREAD);
6153 if (error != 0)
6154 return (SET_ERROR(error));
6155
6156 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6157 return (error);
6158 rw_enter(&zp->z_xattr_lock, RW_READER);
6159
6160 error = zfs_listextattr_impl(ap, zfs_xattr_compat);
6161 if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
6162 /* Also list user xattrs with the alternate format. */
6163 error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
6164 }
6165
6166 rw_exit(&zp->z_xattr_lock);
6167 zfs_exit(zfsvfs, FTAG);
6168 return (error);
6169 }
6170
6171 #ifndef _SYS_SYSPROTO_H_
6172 struct vop_getacl_args {
6173 struct vnode *vp;
6174 acl_type_t type;
6175 struct acl *aclp;
6176 struct ucred *cred;
6177 struct thread *td;
6178 };
6179 #endif
6180
6181 static int
zfs_freebsd_getacl(struct vop_getacl_args * ap)6182 zfs_freebsd_getacl(struct vop_getacl_args *ap)
6183 {
6184 int error;
6185 vsecattr_t vsecattr;
6186
6187 if (ap->a_type != ACL_TYPE_NFS4)
6188 return (EINVAL);
6189
6190 vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
6191 if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
6192 &vsecattr, 0, ap->a_cred)))
6193 return (error);
6194
6195 error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
6196 vsecattr.vsa_aclcnt);
6197 if (vsecattr.vsa_aclentp != NULL)
6198 kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
6199
6200 return (error);
6201 }
6202
6203 #ifndef _SYS_SYSPROTO_H_
6204 struct vop_setacl_args {
6205 struct vnode *vp;
6206 acl_type_t type;
6207 struct acl *aclp;
6208 struct ucred *cred;
6209 struct thread *td;
6210 };
6211 #endif
6212
6213 static int
zfs_freebsd_setacl(struct vop_setacl_args * ap)6214 zfs_freebsd_setacl(struct vop_setacl_args *ap)
6215 {
6216 int error;
6217 vsecattr_t vsecattr;
6218 int aclbsize; /* size of acl list in bytes */
6219 aclent_t *aaclp;
6220
6221 if (ap->a_type != ACL_TYPE_NFS4)
6222 return (EINVAL);
6223
6224 if (ap->a_aclp == NULL)
6225 return (EINVAL);
6226
6227 if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
6228 return (EINVAL);
6229
6230 /*
6231 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
6232 * splitting every entry into two and appending "canonical six"
6233 * entries at the end. Don't allow for setting an ACL that would
6234 * cause chmod(2) to run out of ACL entries.
6235 */
6236 if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
6237 return (ENOSPC);
6238
6239 error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
6240 if (error != 0)
6241 return (error);
6242
6243 vsecattr.vsa_mask = VSA_ACE;
6244 aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
6245 vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
6246 aaclp = vsecattr.vsa_aclentp;
6247 vsecattr.vsa_aclentsz = aclbsize;
6248
6249 aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
6250 error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
6251 kmem_free(aaclp, aclbsize);
6252
6253 return (error);
6254 }
6255
6256 #ifndef _SYS_SYSPROTO_H_
6257 struct vop_aclcheck_args {
6258 struct vnode *vp;
6259 acl_type_t type;
6260 struct acl *aclp;
6261 struct ucred *cred;
6262 struct thread *td;
6263 };
6264 #endif
6265
6266 static int
zfs_freebsd_aclcheck(struct vop_aclcheck_args * ap)6267 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
6268 {
6269
6270 return (EOPNOTSUPP);
6271 }
6272
6273 #ifndef _SYS_SYSPROTO_H_
6274 struct vop_advise_args {
6275 struct vnode *a_vp;
6276 off_t a_start;
6277 off_t a_end;
6278 int a_advice;
6279 };
6280 #endif
6281
6282 static int
zfs_freebsd_advise(struct vop_advise_args * ap)6283 zfs_freebsd_advise(struct vop_advise_args *ap)
6284 {
6285 vnode_t *vp = ap->a_vp;
6286 off_t start = ap->a_start;
6287 off_t end = ap->a_end;
6288 int advice = ap->a_advice;
6289 off_t len;
6290 znode_t *zp;
6291 zfsvfs_t *zfsvfs;
6292 objset_t *os;
6293 int error = 0;
6294
6295 if (end < start)
6296 return (EINVAL);
6297
6298 error = vn_lock(vp, LK_SHARED);
6299 if (error)
6300 return (error);
6301
6302 zp = VTOZ(vp);
6303 zfsvfs = zp->z_zfsvfs;
6304 os = zp->z_zfsvfs->z_os;
6305
6306 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6307 goto out_unlock;
6308
6309 /* kern_posix_fadvise points to the last byte, we want one past */
6310 if (end != OFF_MAX)
6311 end += 1;
6312 len = end - start;
6313
6314 switch (advice) {
6315 case POSIX_FADV_WILLNEED:
6316 /*
6317 * Pass on the caller's size directly, but note that
6318 * dmu_prefetch_max will effectively cap it. If there really
6319 * is a larger sequential access pattern, perhaps dmu_zfetch
6320 * will detect it.
6321 */
6322 dmu_prefetch(os, zp->z_id, 0, start, len,
6323 ZIO_PRIORITY_ASYNC_READ);
6324 break;
6325 case POSIX_FADV_NORMAL:
6326 case POSIX_FADV_RANDOM:
6327 case POSIX_FADV_SEQUENTIAL:
6328 case POSIX_FADV_DONTNEED:
6329 case POSIX_FADV_NOREUSE:
6330 /* ignored for now */
6331 break;
6332 default:
6333 error = EINVAL;
6334 break;
6335 }
6336
6337 zfs_exit(zfsvfs, FTAG);
6338
6339 out_unlock:
6340 VOP_UNLOCK(vp);
6341
6342 return (error);
6343 }
6344
6345 static int
zfs_vptocnp(struct vop_vptocnp_args * ap)6346 zfs_vptocnp(struct vop_vptocnp_args *ap)
6347 {
6348 vnode_t *covered_vp;
6349 vnode_t *vp = ap->a_vp;
6350 zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
6351 znode_t *zp = VTOZ(vp);
6352 int ltype;
6353 int error;
6354
6355 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6356 return (error);
6357
6358 /*
6359 * If we are a snapshot mounted under .zfs, run the operation
6360 * on the covered vnode.
6361 */
6362 if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
6363 char name[MAXNAMLEN + 1];
6364 znode_t *dzp;
6365 size_t len;
6366
6367 error = zfs_znode_parent_and_name(zp, &dzp, name,
6368 sizeof (name));
6369 if (error == 0) {
6370 len = strlen(name);
6371 if (*ap->a_buflen < len)
6372 error = SET_ERROR(ENOMEM);
6373 }
6374 if (error == 0) {
6375 *ap->a_buflen -= len;
6376 memcpy(ap->a_buf + *ap->a_buflen, name, len);
6377 *ap->a_vpp = ZTOV(dzp);
6378 }
6379 zfs_exit(zfsvfs, FTAG);
6380 return (error);
6381 }
6382 zfs_exit(zfsvfs, FTAG);
6383
6384 covered_vp = vp->v_mount->mnt_vnodecovered;
6385 enum vgetstate vs = vget_prep(covered_vp);
6386 ltype = VOP_ISLOCKED(vp);
6387 VOP_UNLOCK(vp);
6388 error = vget_finish(covered_vp, LK_SHARED, vs);
6389 if (error == 0) {
6390 error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
6391 ap->a_buflen);
6392 vput(covered_vp);
6393 }
6394 vn_lock(vp, ltype | LK_RETRY);
6395 if (VN_IS_DOOMED(vp))
6396 error = SET_ERROR(ENOENT);
6397 return (error);
6398 }
6399
6400 #if __FreeBSD_version >= 1400032
6401 static int
zfs_deallocate(struct vop_deallocate_args * ap)6402 zfs_deallocate(struct vop_deallocate_args *ap)
6403 {
6404 znode_t *zp = VTOZ(ap->a_vp);
6405 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
6406 zilog_t *zilog;
6407 off_t off, len, file_sz;
6408 int error;
6409
6410 if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
6411 return (error);
6412
6413 /*
6414 * Callers might not be able to detect properly that we are read-only,
6415 * so check it explicitly here.
6416 */
6417 if (zfs_is_readonly(zfsvfs)) {
6418 zfs_exit(zfsvfs, FTAG);
6419 return (SET_ERROR(EROFS));
6420 }
6421
6422 zilog = zfsvfs->z_log;
6423 off = *ap->a_offset;
6424 len = *ap->a_len;
6425 file_sz = zp->z_size;
6426 if (off + len > file_sz)
6427 len = file_sz - off;
6428 /* Fast path for out-of-range request. */
6429 if (len <= 0) {
6430 *ap->a_len = 0;
6431 zfs_exit(zfsvfs, FTAG);
6432 return (0);
6433 }
6434
6435 error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
6436 if (error == 0) {
6437 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
6438 (ap->a_ioflag & IO_SYNC) != 0)
6439 zil_commit(zilog, zp->z_id);
6440 *ap->a_offset = off + len;
6441 *ap->a_len = 0;
6442 }
6443
6444 zfs_exit(zfsvfs, FTAG);
6445 return (error);
6446 }
6447 #endif
6448
6449 #ifndef _SYS_SYSPROTO_H_
6450 struct vop_copy_file_range_args {
6451 struct vnode *a_invp;
6452 off_t *a_inoffp;
6453 struct vnode *a_outvp;
6454 off_t *a_outoffp;
6455 size_t *a_lenp;
6456 unsigned int a_flags;
6457 struct ucred *a_incred;
6458 struct ucred *a_outcred;
6459 struct thread *a_fsizetd;
6460 }
6461 #endif
6462 /*
6463 * TODO: FreeBSD will only call file system-specific copy_file_range() if both
6464 * files resides under the same mountpoint. In case of ZFS we want to be called
6465 * even is files are in different datasets (but on the same pools, but we need
6466 * to check that ourselves).
6467 */
6468 static int
zfs_freebsd_copy_file_range(struct vop_copy_file_range_args * ap)6469 zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
6470 {
6471 zfsvfs_t *outzfsvfs;
6472 struct vnode *invp = ap->a_invp;
6473 struct vnode *outvp = ap->a_outvp;
6474 struct mount *mp;
6475 int error;
6476 uint64_t len = *ap->a_lenp;
6477
6478 if (!zfs_bclone_enabled) {
6479 mp = NULL;
6480 goto bad_write_fallback;
6481 }
6482
6483 /*
6484 * TODO: If offset/length is not aligned to recordsize, use
6485 * vn_generic_copy_file_range() on this fragment.
6486 * It would be better to do this after we lock the vnodes, but then we
6487 * need something else than vn_generic_copy_file_range().
6488 */
6489
6490 vn_start_write(outvp, &mp, V_WAIT);
6491 if (__predict_true(mp == outvp->v_mount)) {
6492 outzfsvfs = (zfsvfs_t *)mp->mnt_data;
6493 if (!spa_feature_is_enabled(dmu_objset_spa(outzfsvfs->z_os),
6494 SPA_FEATURE_BLOCK_CLONING)) {
6495 goto bad_write_fallback;
6496 }
6497 }
6498 if (invp == outvp) {
6499 if (vn_lock(outvp, LK_EXCLUSIVE) != 0) {
6500 goto bad_write_fallback;
6501 }
6502 } else {
6503 #if (__FreeBSD_version >= 1302506 && __FreeBSD_version < 1400000) || \
6504 __FreeBSD_version >= 1400086
6505 vn_lock_pair(invp, false, LK_SHARED, outvp, false,
6506 LK_EXCLUSIVE);
6507 #else
6508 vn_lock_pair(invp, false, outvp, false);
6509 #endif
6510 if (VN_IS_DOOMED(invp) || VN_IS_DOOMED(outvp)) {
6511 goto bad_locked_fallback;
6512 }
6513 }
6514
6515 #ifdef MAC
6516 error = mac_vnode_check_write(curthread->td_ucred, ap->a_outcred,
6517 outvp);
6518 if (error != 0)
6519 goto out_locked;
6520 #endif
6521
6522 error = zfs_clone_range(VTOZ(invp), ap->a_inoffp, VTOZ(outvp),
6523 ap->a_outoffp, &len, ap->a_outcred);
6524 if (error == EXDEV || error == EAGAIN || error == EINVAL ||
6525 error == EOPNOTSUPP)
6526 goto bad_locked_fallback;
6527 *ap->a_lenp = (size_t)len;
6528 #ifdef MAC
6529 out_locked:
6530 #endif
6531 if (invp != outvp)
6532 VOP_UNLOCK(invp);
6533 VOP_UNLOCK(outvp);
6534 if (mp != NULL)
6535 vn_finished_write(mp);
6536 return (error);
6537
6538 bad_locked_fallback:
6539 if (invp != outvp)
6540 VOP_UNLOCK(invp);
6541 VOP_UNLOCK(outvp);
6542 bad_write_fallback:
6543 if (mp != NULL)
6544 vn_finished_write(mp);
6545 error = ENOSYS;
6546 return (error);
6547 }
6548
6549 struct vop_vector zfs_vnodeops;
6550 struct vop_vector zfs_fifoops;
6551 struct vop_vector zfs_shareops;
6552
6553 struct vop_vector zfs_vnodeops = {
6554 .vop_default = &default_vnodeops,
6555 .vop_inactive = zfs_freebsd_inactive,
6556 .vop_need_inactive = zfs_freebsd_need_inactive,
6557 .vop_reclaim = zfs_freebsd_reclaim,
6558 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6559 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6560 .vop_access = zfs_freebsd_access,
6561 .vop_allocate = VOP_EOPNOTSUPP,
6562 #if __FreeBSD_version >= 1400032
6563 .vop_deallocate = zfs_deallocate,
6564 #endif
6565 .vop_lookup = zfs_cache_lookup,
6566 .vop_cachedlookup = zfs_freebsd_cachedlookup,
6567 .vop_getattr = zfs_freebsd_getattr,
6568 .vop_setattr = zfs_freebsd_setattr,
6569 .vop_create = zfs_freebsd_create,
6570 .vop_mknod = (vop_mknod_t *)zfs_freebsd_create,
6571 .vop_mkdir = zfs_freebsd_mkdir,
6572 .vop_readdir = zfs_freebsd_readdir,
6573 .vop_fsync = zfs_freebsd_fsync,
6574 .vop_open = zfs_freebsd_open,
6575 .vop_close = zfs_freebsd_close,
6576 .vop_rmdir = zfs_freebsd_rmdir,
6577 .vop_ioctl = zfs_freebsd_ioctl,
6578 .vop_link = zfs_freebsd_link,
6579 .vop_symlink = zfs_freebsd_symlink,
6580 .vop_readlink = zfs_freebsd_readlink,
6581 .vop_advise = zfs_freebsd_advise,
6582 .vop_read = zfs_freebsd_read,
6583 .vop_write = zfs_freebsd_write,
6584 .vop_remove = zfs_freebsd_remove,
6585 .vop_rename = zfs_freebsd_rename,
6586 .vop_pathconf = zfs_freebsd_pathconf,
6587 .vop_bmap = zfs_freebsd_bmap,
6588 .vop_fid = zfs_freebsd_fid,
6589 .vop_getextattr = zfs_getextattr,
6590 .vop_deleteextattr = zfs_deleteextattr,
6591 .vop_setextattr = zfs_setextattr,
6592 .vop_listextattr = zfs_listextattr,
6593 .vop_getacl = zfs_freebsd_getacl,
6594 .vop_setacl = zfs_freebsd_setacl,
6595 .vop_aclcheck = zfs_freebsd_aclcheck,
6596 .vop_getpages = zfs_freebsd_getpages,
6597 .vop_putpages = zfs_freebsd_putpages,
6598 .vop_vptocnp = zfs_vptocnp,
6599 .vop_lock1 = vop_lock,
6600 .vop_unlock = vop_unlock,
6601 .vop_islocked = vop_islocked,
6602 #if __FreeBSD_version >= 1400043
6603 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6604 #endif
6605 .vop_copy_file_range = zfs_freebsd_copy_file_range,
6606 };
6607 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
6608
6609 struct vop_vector zfs_fifoops = {
6610 .vop_default = &fifo_specops,
6611 .vop_fsync = zfs_freebsd_fsync,
6612 .vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
6613 .vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
6614 .vop_access = zfs_freebsd_access,
6615 .vop_getattr = zfs_freebsd_getattr,
6616 .vop_inactive = zfs_freebsd_inactive,
6617 .vop_read = VOP_PANIC,
6618 .vop_reclaim = zfs_freebsd_reclaim,
6619 .vop_setattr = zfs_freebsd_setattr,
6620 .vop_write = VOP_PANIC,
6621 .vop_pathconf = zfs_freebsd_pathconf,
6622 .vop_fid = zfs_freebsd_fid,
6623 .vop_getacl = zfs_freebsd_getacl,
6624 .vop_setacl = zfs_freebsd_setacl,
6625 .vop_aclcheck = zfs_freebsd_aclcheck,
6626 #if __FreeBSD_version >= 1400043
6627 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6628 #endif
6629 };
6630 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
6631
6632 /*
6633 * special share hidden files vnode operations template
6634 */
6635 struct vop_vector zfs_shareops = {
6636 .vop_default = &default_vnodeops,
6637 .vop_fplookup_vexec = VOP_EAGAIN,
6638 .vop_fplookup_symlink = VOP_EAGAIN,
6639 .vop_access = zfs_freebsd_access,
6640 .vop_inactive = zfs_freebsd_inactive,
6641 .vop_reclaim = zfs_freebsd_reclaim,
6642 .vop_fid = zfs_freebsd_fid,
6643 .vop_pathconf = zfs_freebsd_pathconf,
6644 #if __FreeBSD_version >= 1400043
6645 .vop_add_writecount = vop_stdadd_writecount_nomsync,
6646 #endif
6647 };
6648 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
6649
6650 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
6651 "Use legacy ZFS xattr naming for writing new user namespace xattrs");
6652