1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5736b9155Smarks * Common Development and Distribution License (the "License"). 6736b9155Smarks * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22d39ee142SMark Shellenbaum * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 239a686fbcSPaul Dagnelie * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 242144b121SMarcel Telka * Copyright 2014 Nexenta Systems, Inc. All rights reserved. 25c3d26abcSMatthew Ahrens * Copyright (c) 2014 Integros [integros.com] 26fa9e4066Sahrens */ 27fa9e4066Sahrens 2875c76197Speteh /* Portions Copyright 2007 Jeremy Teo */ 2955da60b9SMark J Musante /* Portions Copyright 2010 Robert Milkowski */ 3075c76197Speteh 31fa9e4066Sahrens #include <sys/types.h> 32fa9e4066Sahrens #include <sys/param.h> 33fa9e4066Sahrens #include <sys/time.h> 34fa9e4066Sahrens #include <sys/systm.h> 35fa9e4066Sahrens #include <sys/sysmacros.h> 36fa9e4066Sahrens #include <sys/resource.h> 37fa9e4066Sahrens #include <sys/vfs.h> 38aa59c4cbSrsb #include <sys/vfs_opreg.h> 39fa9e4066Sahrens #include <sys/vnode.h> 40fa9e4066Sahrens #include <sys/file.h> 41fa9e4066Sahrens #include <sys/stat.h> 42fa9e4066Sahrens #include <sys/kmem.h> 43fa9e4066Sahrens #include <sys/taskq.h> 44fa9e4066Sahrens #include <sys/uio.h> 45fa9e4066Sahrens #include <sys/vmsystm.h> 46fa9e4066Sahrens #include <sys/atomic.h> 4744eda4d7Smaybee #include <sys/vm.h> 48fa9e4066Sahrens #include <vm/seg_vn.h> 49fa9e4066Sahrens #include <vm/pvn.h> 50fa9e4066Sahrens #include <vm/as.h> 510fab61baSJonathan W Adams #include <vm/kpm.h> 520fab61baSJonathan W Adams #include <vm/seg_kpm.h> 53fa9e4066Sahrens #include <sys/mman.h> 54fa9e4066Sahrens #include <sys/pathname.h> 55fa9e4066Sahrens #include <sys/cmn_err.h> 56fa9e4066Sahrens #include <sys/errno.h> 57fa9e4066Sahrens #include <sys/unistd.h> 58fa9e4066Sahrens #include <sys/zfs_dir.h> 59fa9e4066Sahrens #include <sys/zfs_acl.h> 60fa9e4066Sahrens #include <sys/zfs_ioctl.h> 61fa9e4066Sahrens #include <sys/fs/zfs.h> 62fa9e4066Sahrens #include <sys/dmu.h> 6355da60b9SMark J Musante #include <sys/dmu_objset.h> 64fa9e4066Sahrens #include <sys/spa.h> 65fa9e4066Sahrens #include <sys/txg.h> 66fa9e4066Sahrens #include <sys/dbuf.h> 67fa9e4066Sahrens #include <sys/zap.h> 680a586ceaSMark Shellenbaum #include <sys/sa.h> 69fa9e4066Sahrens #include <sys/dirent.h> 70fa9e4066Sahrens #include <sys/policy.h> 71fa9e4066Sahrens #include <sys/sunddi.h> 72fa9e4066Sahrens #include <sys/filio.h> 73c1ce5987SMark Shellenbaum #include <sys/sid.h> 74fa9e4066Sahrens #include "fs/fs_subr.h" 75fa9e4066Sahrens #include <sys/zfs_ctldir.h> 76da6c28aaSamw #include <sys/zfs_fuid.h> 770a586ceaSMark Shellenbaum #include <sys/zfs_sa.h> 78033f9833Sek110237 #include <sys/dnlc.h> 79104e2ed7Sperrin #include <sys/zfs_rlock.h> 80da6c28aaSamw #include <sys/extdirent.h> 81da6c28aaSamw #include <sys/kidmap.h> 8267dbe2beSCasper H.S. Dik #include <sys/cred.h> 83b38f0970Sck153898 #include <sys/attr.h> 84d78b796cSAndreas Jaekel #include <sys/zfs_events.h> 85e206ace3SAndreas Jaekel #include <sys/fs/zev.h> 86fa9e4066Sahrens 87fa9e4066Sahrens /* 88fa9e4066Sahrens * Programming rules. 89fa9e4066Sahrens * 90fa9e4066Sahrens * Each vnode op performs some logical unit of work. To do this, the ZPL must 91fa9e4066Sahrens * properly lock its in-core state, create a DMU transaction, do the work, 92fa9e4066Sahrens * record this work in the intent log (ZIL), commit the DMU transaction, 93da6c28aaSamw * and wait for the intent log to commit if it is a synchronous operation. 94da6c28aaSamw * Moreover, the vnode ops must work in both normal and log replay context. 95fa9e4066Sahrens * The ordering of events is important to avoid deadlocks and references 96fa9e4066Sahrens * to freed memory. The example below illustrates the following Big Rules: 97fa9e4066Sahrens * 98fa9e4066Sahrens * (1) A check must be made in each zfs thread for a mounted file system. 993cb34c60Sahrens * This is done avoiding races using ZFS_ENTER(zfsvfs). 1003cb34c60Sahrens * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes 1013cb34c60Sahrens * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros 1023cb34c60Sahrens * can return EIO from the calling function. 103fa9e4066Sahrens * 104fa9e4066Sahrens * (2) VN_RELE() should always be the last thing except for zil_commit() 105b19a79ecSperrin * (if necessary) and ZFS_EXIT(). This is for 3 reasons: 106fa9e4066Sahrens * First, if it's the last reference, the vnode/znode 107fa9e4066Sahrens * can be freed, so the zp may point to freed memory. Second, the last 108fa9e4066Sahrens * reference will call zfs_zinactive(), which may induce a lot of work -- 109104e2ed7Sperrin * pushing cached pages (which acquires range locks) and syncing out 110fa9e4066Sahrens * cached atime changes. Third, zfs_zinactive() may require a new tx, 111fa9e4066Sahrens * which could deadlock the system if you were already holding one. 1129d3574bfSNeil Perrin * If you must call VN_RELE() within a tx then use VN_RELE_ASYNC(). 113fa9e4066Sahrens * 1147885c754Sperrin * (3) All range locks must be grabbed before calling dmu_tx_assign(), 1157885c754Sperrin * as they can span dmu_tx_assign() calls. 1167885c754Sperrin * 117e722410cSMatthew Ahrens * (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to 118e722410cSMatthew Ahrens * dmu_tx_assign(). This is critical because we don't want to block 119e722410cSMatthew Ahrens * while holding locks. 120e722410cSMatthew Ahrens * 121e722410cSMatthew Ahrens * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This 122e722410cSMatthew Ahrens * reduces lock contention and CPU usage when we must wait (note that if 123e722410cSMatthew Ahrens * throughput is constrained by the storage, nearly every transaction 124e722410cSMatthew Ahrens * must wait). 125e722410cSMatthew Ahrens * 126fa9e4066Sahrens * Note, in particular, that if a lock is sometimes acquired before 127e722410cSMatthew Ahrens * the tx assigns, and sometimes after (e.g. z_lock), then failing 128e722410cSMatthew Ahrens * to use a non-blocking assign can deadlock the system. The scenario: 129fa9e4066Sahrens * 130fa9e4066Sahrens * Thread A has grabbed a lock before calling dmu_tx_assign(). 131fa9e4066Sahrens * Thread B is in an already-assigned tx, and blocks for this lock. 132fa9e4066Sahrens * Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open() 133fa9e4066Sahrens * forever, because the previous txg can't quiesce until B's tx commits. 134fa9e4066Sahrens * 135fa9e4066Sahrens * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT, 13669962b56SMatthew Ahrens * then drop all locks, call dmu_tx_wait(), and try again. On subsequent 13769962b56SMatthew Ahrens * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT, 13869962b56SMatthew Ahrens * to indicate that this operation has already called dmu_tx_wait(). 13969962b56SMatthew Ahrens * This will ensure that we don't retry forever, waiting a short bit 14069962b56SMatthew Ahrens * each time. 141fa9e4066Sahrens * 1427885c754Sperrin * (5) If the operation succeeded, generate the intent log entry for it 143fa9e4066Sahrens * before dropping locks. This ensures that the ordering of events 144fa9e4066Sahrens * in the intent log matches the order in which they actually occurred. 1451209a471SNeil Perrin * During ZIL replay the zfs_log_* functions will update the sequence 1461209a471SNeil Perrin * number to indicate the zil transaction has replayed. 147fa9e4066Sahrens * 1487885c754Sperrin * (6) At the end of each vnode op, the DMU tx must always commit, 149fa9e4066Sahrens * regardless of whether there were any errors. 150fa9e4066Sahrens * 1515002558fSNeil Perrin * (7) After dropping all locks, invoke zil_commit(zilog, foid) 152fa9e4066Sahrens * to ensure that synchronous semantics are provided when necessary. 153fa9e4066Sahrens * 154fa9e4066Sahrens * In general, this is how things should be ordered in each vnode op: 155fa9e4066Sahrens * 156fa9e4066Sahrens * ZFS_ENTER(zfsvfs); // exit if unmounted 157fa9e4066Sahrens * top: 158fa9e4066Sahrens * zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD()) 159fa9e4066Sahrens * rw_enter(...); // grab any other locks you need 160fa9e4066Sahrens * tx = dmu_tx_create(...); // get DMU tx 161fa9e4066Sahrens * dmu_tx_hold_*(); // hold each object you might modify 16269962b56SMatthew Ahrens * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 163fa9e4066Sahrens * if (error) { 164fa9e4066Sahrens * rw_exit(...); // drop locks 165fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 166fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1671209a471SNeil Perrin * if (error == ERESTART) { 16869962b56SMatthew Ahrens * waited = B_TRUE; 1698a2f1b91Sahrens * dmu_tx_wait(tx); 1708a2f1b91Sahrens * dmu_tx_abort(tx); 171fa9e4066Sahrens * goto top; 172fa9e4066Sahrens * } 1738a2f1b91Sahrens * dmu_tx_abort(tx); // abort DMU tx 174fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 175fa9e4066Sahrens * return (error); // really out of space 176fa9e4066Sahrens * } 177fa9e4066Sahrens * error = do_real_work(); // do whatever this VOP does 178fa9e4066Sahrens * if (error == 0) 179b19a79ecSperrin * zfs_log_*(...); // on success, make ZIL entry 180fa9e4066Sahrens * dmu_tx_commit(tx); // commit DMU tx -- error or not 181fa9e4066Sahrens * rw_exit(...); // drop locks 182fa9e4066Sahrens * zfs_dirent_unlock(dl); // unlock directory entry 183fa9e4066Sahrens * VN_RELE(...); // release held vnodes 1845002558fSNeil Perrin * zil_commit(zilog, foid); // synchronous when necessary 185fa9e4066Sahrens * ZFS_EXIT(zfsvfs); // finished in zfs 186fa9e4066Sahrens * return (error); // done, report error 187fa9e4066Sahrens */ 1883cb34c60Sahrens 189fa9e4066Sahrens /* ARGSUSED */ 190fa9e4066Sahrens static int 191da6c28aaSamw zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 192fa9e4066Sahrens { 19367bd71c6Sperrin znode_t *zp = VTOZ(*vpp); 194b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 195b614fdaaSMark Shellenbaum 196b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 197b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 19867bd71c6Sperrin 1990a586ceaSMark Shellenbaum if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) && 200da6c28aaSamw ((flag & FAPPEND) == 0)) { 201b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 202be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 203da6c28aaSamw } 204da6c28aaSamw 205da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 206da6c28aaSamw ZTOV(zp)->v_type == VREG && 2070a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) { 208b614fdaaSMark Shellenbaum if (fs_vscan(*vpp, cr, 0) != 0) { 209b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 210be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 211b614fdaaSMark Shellenbaum } 212b614fdaaSMark Shellenbaum } 213da6c28aaSamw 21467bd71c6Sperrin /* Keep a count of the synchronous opens in the znode */ 21567bd71c6Sperrin if (flag & (FSYNC | FDSYNC)) 21667bd71c6Sperrin atomic_inc_32(&zp->z_sync_cnt); 217da6c28aaSamw 218b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 219fa9e4066Sahrens return (0); 220fa9e4066Sahrens } 221fa9e4066Sahrens 222fa9e4066Sahrens /* ARGSUSED */ 223fa9e4066Sahrens static int 224da6c28aaSamw zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 225da6c28aaSamw caller_context_t *ct) 226fa9e4066Sahrens { 22767bd71c6Sperrin znode_t *zp = VTOZ(vp); 228b614fdaaSMark Shellenbaum zfsvfs_t *zfsvfs = zp->z_zfsvfs; 229b614fdaaSMark Shellenbaum 230ee8143cbSChris Kirby /* 231ee8143cbSChris Kirby * Clean up any locks held by this process on the vp. 232ee8143cbSChris Kirby */ 233ee8143cbSChris Kirby cleanlocks(vp, ddi_get_pid(), 0); 234ee8143cbSChris Kirby cleanshares(vp, ddi_get_pid()); 235ee8143cbSChris Kirby 236b614fdaaSMark Shellenbaum ZFS_ENTER(zfsvfs); 237b614fdaaSMark Shellenbaum ZFS_VERIFY_ZP(zp); 23867bd71c6Sperrin 23967bd71c6Sperrin /* Decrement the synchronous opens in the znode */ 240ecb72030Sperrin if ((flag & (FSYNC | FDSYNC)) && (count == 1)) 24167bd71c6Sperrin atomic_dec_32(&zp->z_sync_cnt); 24267bd71c6Sperrin 243da6c28aaSamw if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan && 244da6c28aaSamw ZTOV(zp)->v_type == VREG && 2450a586ceaSMark Shellenbaum !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) 246da6c28aaSamw VERIFY(fs_vscan(vp, cr, 1) == 0); 247da6c28aaSamw 248ce7671ecSAndreas Jaekel if (ZTOV(zp)->v_type == VREG && zp->z_new_content) { 249d78b796cSAndreas Jaekel zp->z_new_content = 0; 250d78b796cSAndreas Jaekel rw_enter(&rz_zev_rwlock, RW_READER); 251d78b796cSAndreas Jaekel if (rz_zev_callbacks && 252d78b796cSAndreas Jaekel rz_zev_callbacks->rz_zev_znode_close_after_update) 253d78b796cSAndreas Jaekel rz_zev_callbacks->rz_zev_znode_close_after_update(zp); 254d78b796cSAndreas Jaekel rw_exit(&rz_zev_rwlock); 255d78b796cSAndreas Jaekel } 256d78b796cSAndreas Jaekel 257b614fdaaSMark Shellenbaum ZFS_EXIT(zfsvfs); 258fa9e4066Sahrens return (0); 259fa9e4066Sahrens } 260fa9e4066Sahrens 261fa9e4066Sahrens /* 262fa9e4066Sahrens * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and 263fa9e4066Sahrens * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter. 264fa9e4066Sahrens */ 265fa9e4066Sahrens static int 266fa9e4066Sahrens zfs_holey(vnode_t *vp, int cmd, offset_t *off) 267fa9e4066Sahrens { 268fa9e4066Sahrens znode_t *zp = VTOZ(vp); 269fa9e4066Sahrens uint64_t noff = (uint64_t)*off; /* new offset */ 270fa9e4066Sahrens uint64_t file_sz; 271fa9e4066Sahrens int error; 272fa9e4066Sahrens boolean_t hole; 273fa9e4066Sahrens 2740a586ceaSMark Shellenbaum file_sz = zp->z_size; 275fa9e4066Sahrens if (noff >= file_sz) { 276be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 277fa9e4066Sahrens } 278fa9e4066Sahrens 279fa9e4066Sahrens if (cmd == _FIO_SEEK_HOLE) 280fa9e4066Sahrens hole = B_TRUE; 281fa9e4066Sahrens else 282fa9e4066Sahrens hole = B_FALSE; 283fa9e4066Sahrens 284fa9e4066Sahrens error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff); 285fa9e4066Sahrens 2860fbc0cd0SMatthew Ahrens if (error == ESRCH) 287be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 2880fbc0cd0SMatthew Ahrens 2890fbc0cd0SMatthew Ahrens /* 2900fbc0cd0SMatthew Ahrens * We could find a hole that begins after the logical end-of-file, 2910fbc0cd0SMatthew Ahrens * because dmu_offset_next() only works on whole blocks. If the 2920fbc0cd0SMatthew Ahrens * EOF falls mid-block, then indicate that the "virtual hole" 2930fbc0cd0SMatthew Ahrens * at the end of the file begins at the logical EOF, rather than 2940fbc0cd0SMatthew Ahrens * at the end of the last block. 2950fbc0cd0SMatthew Ahrens */ 2960fbc0cd0SMatthew Ahrens if (noff > file_sz) { 2970fbc0cd0SMatthew Ahrens ASSERT(hole); 2980fbc0cd0SMatthew Ahrens noff = file_sz; 299fa9e4066Sahrens } 300fa9e4066Sahrens 301fa9e4066Sahrens if (noff < *off) 302fa9e4066Sahrens return (error); 303fa9e4066Sahrens *off = noff; 304fa9e4066Sahrens return (error); 305fa9e4066Sahrens } 306fa9e4066Sahrens 307fa9e4066Sahrens /* ARGSUSED */ 308fa9e4066Sahrens static int 309fa9e4066Sahrens zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred, 310da6c28aaSamw int *rvalp, caller_context_t *ct) 311fa9e4066Sahrens { 312fa9e4066Sahrens offset_t off; 3132bcf0248SMax Grossman offset_t ndata; 3142bcf0248SMax Grossman dmu_object_info_t doi; 315fa9e4066Sahrens int error; 316fa9e4066Sahrens zfsvfs_t *zfsvfs; 317f18faf3fSek110237 znode_t *zp; 318fa9e4066Sahrens 319fa9e4066Sahrens switch (com) { 320fa9e4066Sahrens case _FIOFFS: 3212bcf0248SMax Grossman { 322fa9e4066Sahrens return (zfs_sync(vp->v_vfsp, 0, cred)); 323fa9e4066Sahrens 324ea8dc4b6Seschrock /* 325ea8dc4b6Seschrock * The following two ioctls are used by bfu. Faking out, 326ea8dc4b6Seschrock * necessary to avoid bfu errors. 327ea8dc4b6Seschrock */ 3282bcf0248SMax Grossman } 329ea8dc4b6Seschrock case _FIOGDIO: 330ea8dc4b6Seschrock case _FIOSDIO: 3312bcf0248SMax Grossman { 332ea8dc4b6Seschrock return (0); 3332bcf0248SMax Grossman } 334ea8dc4b6Seschrock 335fa9e4066Sahrens case _FIO_SEEK_DATA: 336fa9e4066Sahrens case _FIO_SEEK_HOLE: 3372bcf0248SMax Grossman { 338fa9e4066Sahrens if (ddi_copyin((void *)data, &off, sizeof (off), flag)) 339be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 340fa9e4066Sahrens 341f18faf3fSek110237 zp = VTOZ(vp); 342f18faf3fSek110237 zfsvfs = zp->z_zfsvfs; 3433cb34c60Sahrens ZFS_ENTER(zfsvfs); 3443cb34c60Sahrens ZFS_VERIFY_ZP(zp); 345fa9e4066Sahrens 346fa9e4066Sahrens /* offset parameter is in/out */ 347fa9e4066Sahrens error = zfs_holey(vp, com, &off); 348fa9e4066Sahrens ZFS_EXIT(zfsvfs); 349fa9e4066Sahrens if (error) 350fa9e4066Sahrens return (error); 351fa9e4066Sahrens if (ddi_copyout(&off, (void *)data, sizeof (off), flag)) 352be6fd75aSMatthew Ahrens return (SET_ERROR(EFAULT)); 353fa9e4066Sahrens return (0); 354fa9e4066Sahrens } 3552bcf0248SMax Grossman case _FIO_COUNT_FILLED: 3562bcf0248SMax Grossman { 3572bcf0248SMax Grossman /* 3582bcf0248SMax Grossman * _FIO_COUNT_FILLED adds a new ioctl command which 3592bcf0248SMax Grossman * exposes the number of filled blocks in a 3602bcf0248SMax Grossman * ZFS object. 3612bcf0248SMax Grossman */ 3622bcf0248SMax Grossman zp = VTOZ(vp); 3632bcf0248SMax Grossman zfsvfs = zp->z_zfsvfs; 3642bcf0248SMax Grossman ZFS_ENTER(zfsvfs); 3652bcf0248SMax Grossman ZFS_VERIFY_ZP(zp); 3662bcf0248SMax Grossman 3672bcf0248SMax Grossman /* 3682bcf0248SMax Grossman * Wait for all dirty blocks for this object 3692bcf0248SMax Grossman * to get synced out to disk, and the DMU info 3702bcf0248SMax Grossman * updated. 3712bcf0248SMax Grossman */ 3722bcf0248SMax Grossman error = dmu_object_wait_synced(zfsvfs->z_os, zp->z_id); 3732bcf0248SMax Grossman if (error) { 3742bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3752bcf0248SMax Grossman return (error); 3762bcf0248SMax Grossman } 3772bcf0248SMax Grossman 3782bcf0248SMax Grossman /* 3792bcf0248SMax Grossman * Retrieve fill count from DMU object. 3802bcf0248SMax Grossman */ 3812bcf0248SMax Grossman error = dmu_object_info(zfsvfs->z_os, zp->z_id, &doi); 3822bcf0248SMax Grossman if (error) { 3832bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3842bcf0248SMax Grossman return (error); 3852bcf0248SMax Grossman } 3862bcf0248SMax Grossman 3872bcf0248SMax Grossman ndata = doi.doi_fill_count; 3882bcf0248SMax Grossman 3892bcf0248SMax Grossman ZFS_EXIT(zfsvfs); 3902bcf0248SMax Grossman if (ddi_copyout(&ndata, (void *)data, sizeof (ndata), flag)) 3912bcf0248SMax Grossman return (SET_ERROR(EFAULT)); 3922bcf0248SMax Grossman return (0); 3932bcf0248SMax Grossman } 3942bcf0248SMax Grossman } 395be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTTY)); 396fa9e4066Sahrens } 397fa9e4066Sahrens 398fa9e4066Sahrens /* 3990fab61baSJonathan W Adams * Utility functions to map and unmap a single physical page. These 4000fab61baSJonathan W Adams * are used to manage the mappable copies of ZFS file data, and therefore 4010fab61baSJonathan W Adams * do not update ref/mod bits. 4020fab61baSJonathan W Adams */ 4030fab61baSJonathan W Adams caddr_t 4040fab61baSJonathan W Adams zfs_map_page(page_t *pp, enum seg_rw rw) 4050fab61baSJonathan W Adams { 4060fab61baSJonathan W Adams if (kpm_enable) 4070fab61baSJonathan W Adams return (hat_kpm_mapin(pp, 0)); 4080fab61baSJonathan W Adams ASSERT(rw == S_READ || rw == S_WRITE); 4090fab61baSJonathan W Adams return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0), 4100fab61baSJonathan W Adams (caddr_t)-1)); 4110fab61baSJonathan W Adams } 4120fab61baSJonathan W Adams 4130fab61baSJonathan W Adams void 4140fab61baSJonathan W Adams zfs_unmap_page(page_t *pp, caddr_t addr) 4150fab61baSJonathan W Adams { 4160fab61baSJonathan W Adams if (kpm_enable) { 4170fab61baSJonathan W Adams hat_kpm_mapout(pp, 0, addr); 4180fab61baSJonathan W Adams } else { 4190fab61baSJonathan W Adams ppmapout(addr); 4200fab61baSJonathan W Adams } 4210fab61baSJonathan W Adams } 4220fab61baSJonathan W Adams 4230fab61baSJonathan W Adams /* 424fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 425fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 426fa9e4066Sahrens * 427fa9e4066Sahrens * On Write: If we find a memory mapped page, we write to *both* 428fa9e4066Sahrens * the page and the dmu buffer. 429fa9e4066Sahrens */ 430ac05c741SMark Maybee static void 431ac05c741SMark Maybee update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid) 432fa9e4066Sahrens { 433ac05c741SMark Maybee int64_t off; 434fa9e4066Sahrens 435fa9e4066Sahrens off = start & PAGEOFFSET; 436fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 437fa9e4066Sahrens page_t *pp; 438ac05c741SMark Maybee uint64_t nbytes = MIN(PAGESIZE - off, len); 439fa9e4066Sahrens 440fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 441fa9e4066Sahrens caddr_t va; 442fa9e4066Sahrens 4430fab61baSJonathan W Adams va = zfs_map_page(pp, S_WRITE); 4447bfdf011SNeil Perrin (void) dmu_read(os, oid, start+off, nbytes, va+off, 4457bfdf011SNeil Perrin DMU_READ_PREFETCH); 4460fab61baSJonathan W Adams zfs_unmap_page(pp, va); 447fa9e4066Sahrens page_unlock(pp); 448fa9e4066Sahrens } 449ac05c741SMark Maybee len -= nbytes; 450fa9e4066Sahrens off = 0; 451fa9e4066Sahrens } 452fa9e4066Sahrens } 453fa9e4066Sahrens 454fa9e4066Sahrens /* 455fa9e4066Sahrens * When a file is memory mapped, we must keep the IO data synchronized 456fa9e4066Sahrens * between the DMU cache and the memory mapped pages. What this means: 457fa9e4066Sahrens * 458fa9e4066Sahrens * On Read: We "read" preferentially from memory mapped pages, 459fa9e4066Sahrens * else we default from the dmu buffer. 460fa9e4066Sahrens * 461fa9e4066Sahrens * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when 462fa9e4066Sahrens * the file is memory mapped. 463fa9e4066Sahrens */ 464fa9e4066Sahrens static int 465feb08c6bSbillm mappedread(vnode_t *vp, int nbytes, uio_t *uio) 466fa9e4066Sahrens { 467feb08c6bSbillm znode_t *zp = VTOZ(vp); 468feb08c6bSbillm int64_t start, off; 469fa9e4066Sahrens int len = nbytes; 470fa9e4066Sahrens int error = 0; 471fa9e4066Sahrens 472fa9e4066Sahrens start = uio->uio_loffset; 473fa9e4066Sahrens off = start & PAGEOFFSET; 474fa9e4066Sahrens for (start &= PAGEMASK; len > 0; start += PAGESIZE) { 475fa9e4066Sahrens page_t *pp; 476feb08c6bSbillm uint64_t bytes = MIN(PAGESIZE - off, len); 477fa9e4066Sahrens 478fa9e4066Sahrens if (pp = page_lookup(vp, start, SE_SHARED)) { 479fa9e4066Sahrens caddr_t va; 480fa9e4066Sahrens 4810fab61baSJonathan W Adams va = zfs_map_page(pp, S_READ); 482fa9e4066Sahrens error = uiomove(va + off, bytes, UIO_READ, uio); 4830fab61baSJonathan W Adams zfs_unmap_page(pp, va); 484fa9e4066Sahrens page_unlock(pp); 485fa9e4066Sahrens } else { 486f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 487f8554bb9SMatthew Ahrens uio, bytes); 488fa9e4066Sahrens } 489fa9e4066Sahrens len -= bytes; 490fa9e4066Sahrens off = 0; 491fa9e4066Sahrens if (error) 492fa9e4066Sahrens break; 493fa9e4066Sahrens } 494fa9e4066Sahrens return (error); 495fa9e4066Sahrens } 496fa9e4066Sahrens 497feb08c6bSbillm offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */ 498fa9e4066Sahrens 499fa9e4066Sahrens /* 500fa9e4066Sahrens * Read bytes from specified file into supplied buffer. 501fa9e4066Sahrens * 502fa9e4066Sahrens * IN: vp - vnode of file to be read from. 503fa9e4066Sahrens * uio - structure supplying read location, range info, 504fa9e4066Sahrens * and return buffer. 505fa9e4066Sahrens * ioflag - SYNC flags; used to provide FRSYNC semantics. 506fa9e4066Sahrens * cr - credentials of caller. 507da6c28aaSamw * ct - caller context 508fa9e4066Sahrens * 509fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 510fa9e4066Sahrens * 511f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 512fa9e4066Sahrens * 513fa9e4066Sahrens * Side Effects: 514fa9e4066Sahrens * vp - atime updated if byte count > 0 515fa9e4066Sahrens */ 516fa9e4066Sahrens /* ARGSUSED */ 517fa9e4066Sahrens static int 518fa9e4066Sahrens zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 519fa9e4066Sahrens { 520fa9e4066Sahrens znode_t *zp = VTOZ(vp); 521fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 522feb08c6bSbillm ssize_t n, nbytes; 523d5285caeSGeorge Wilson int error = 0; 524104e2ed7Sperrin rl_t *rl; 525c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 526fa9e4066Sahrens 5273cb34c60Sahrens ZFS_ENTER(zfsvfs); 5283cb34c60Sahrens ZFS_VERIFY_ZP(zp); 529fa9e4066Sahrens 5300a586ceaSMark Shellenbaum if (zp->z_pflags & ZFS_AV_QUARANTINED) { 5310616c50eSmarks ZFS_EXIT(zfsvfs); 532be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 5330616c50eSmarks } 5340616c50eSmarks 535fa9e4066Sahrens /* 536fa9e4066Sahrens * Validate file offset 537fa9e4066Sahrens */ 538fa9e4066Sahrens if (uio->uio_loffset < (offset_t)0) { 539fa9e4066Sahrens ZFS_EXIT(zfsvfs); 540be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 541fa9e4066Sahrens } 542fa9e4066Sahrens 543fa9e4066Sahrens /* 544fa9e4066Sahrens * Fasttrack empty reads 545fa9e4066Sahrens */ 546fa9e4066Sahrens if (uio->uio_resid == 0) { 547fa9e4066Sahrens ZFS_EXIT(zfsvfs); 548fa9e4066Sahrens return (0); 549fa9e4066Sahrens } 550fa9e4066Sahrens 551fa9e4066Sahrens /* 552104e2ed7Sperrin * Check for mandatory locks 553fa9e4066Sahrens */ 5540a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode)) { 555fa9e4066Sahrens if (error = chklock(vp, FREAD, 556fa9e4066Sahrens uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) { 557fa9e4066Sahrens ZFS_EXIT(zfsvfs); 558fa9e4066Sahrens return (error); 559fa9e4066Sahrens } 560fa9e4066Sahrens } 561fa9e4066Sahrens 562fa9e4066Sahrens /* 563fa9e4066Sahrens * If we're in FRSYNC mode, sync out this znode before reading it. 564fa9e4066Sahrens */ 56555da60b9SMark J Musante if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 5665002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 567fa9e4066Sahrens 568fa9e4066Sahrens /* 569104e2ed7Sperrin * Lock the range against changes. 570fa9e4066Sahrens */ 571104e2ed7Sperrin rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER); 572104e2ed7Sperrin 573fa9e4066Sahrens /* 574fa9e4066Sahrens * If we are reading past end-of-file we can skip 575fa9e4066Sahrens * to the end; but we might still need to set atime. 576fa9e4066Sahrens */ 5770a586ceaSMark Shellenbaum if (uio->uio_loffset >= zp->z_size) { 578fa9e4066Sahrens error = 0; 579fa9e4066Sahrens goto out; 580fa9e4066Sahrens } 581fa9e4066Sahrens 5820a586ceaSMark Shellenbaum ASSERT(uio->uio_loffset < zp->z_size); 5830a586ceaSMark Shellenbaum n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset); 584fa9e4066Sahrens 585c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 586c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) { 587c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int nblk; 588c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz = zp->z_blksz; 589c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uint64_t offset = uio->uio_loffset; 590c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 591c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 592c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((ISP2(blksz))) { 593c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset, 594c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz)) / blksz; 595c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 596c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(offset + n <= blksz); 597c242f9a0Schunli zhang - Sun Microsystems - Irvine United States nblk = 1; 598c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 599570de38fSSurya Prakki (void) dmu_xuio_init(xuio, nblk); 600c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 601c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (vn_has_cached_data(vp)) { 602c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 603c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * For simplicity, we always allocate a full buffer 604c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * even if we only expect to read a portion of a block. 605c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 606c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (--nblk >= 0) { 607570de38fSSurya Prakki (void) dmu_xuio_add(xuio, 6080a586ceaSMark Shellenbaum dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 6090a586ceaSMark Shellenbaum blksz), 0, blksz); 610c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 611c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 612c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 613c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 614feb08c6bSbillm while (n > 0) { 615feb08c6bSbillm nbytes = MIN(n, zfs_read_chunk_size - 616feb08c6bSbillm P2PHASE(uio->uio_loffset, zfs_read_chunk_size)); 617feb08c6bSbillm 618f8554bb9SMatthew Ahrens if (vn_has_cached_data(vp)) { 619feb08c6bSbillm error = mappedread(vp, nbytes, uio); 620f8554bb9SMatthew Ahrens } else { 621f8554bb9SMatthew Ahrens error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), 622f8554bb9SMatthew Ahrens uio, nbytes); 623f8554bb9SMatthew Ahrens } 624b87f3af3Sperrin if (error) { 625b87f3af3Sperrin /* convert checksum errors into IO errors */ 626b87f3af3Sperrin if (error == ECKSUM) 627be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 628fa9e4066Sahrens break; 629b87f3af3Sperrin } 630feb08c6bSbillm 631feb08c6bSbillm n -= nbytes; 632fa9e4066Sahrens } 633fa9e4066Sahrens out: 634c5c6ffa0Smaybee zfs_range_unlock(rl); 635fa9e4066Sahrens 636fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 637fa9e4066Sahrens ZFS_EXIT(zfsvfs); 638fa9e4066Sahrens return (error); 639fa9e4066Sahrens } 640fa9e4066Sahrens 641fa9e4066Sahrens /* 642fa9e4066Sahrens * Write the bytes to a file. 643fa9e4066Sahrens * 644fa9e4066Sahrens * IN: vp - vnode of file to be written to. 645fa9e4066Sahrens * uio - structure supplying write location, range info, 646fa9e4066Sahrens * and data buffer. 647f7170741SWill Andrews * ioflag - FAPPEND, FSYNC, and/or FDSYNC. FAPPEND is 648f7170741SWill Andrews * set if in append mode. 649fa9e4066Sahrens * cr - credentials of caller. 650da6c28aaSamw * ct - caller context (NFS/CIFS fem monitor only) 651fa9e4066Sahrens * 652fa9e4066Sahrens * OUT: uio - updated offset and range. 653fa9e4066Sahrens * 654f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 655fa9e4066Sahrens * 656fa9e4066Sahrens * Timestamps: 657fa9e4066Sahrens * vp - ctime|mtime updated if byte count > 0 658fa9e4066Sahrens */ 6590a586ceaSMark Shellenbaum 660fa9e4066Sahrens /* ARGSUSED */ 661fa9e4066Sahrens static int 662fa9e4066Sahrens zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct) 663fa9e4066Sahrens { 664fa9e4066Sahrens znode_t *zp = VTOZ(vp); 665fa9e4066Sahrens rlim64_t limit = uio->uio_llimit; 666fa9e4066Sahrens ssize_t start_resid = uio->uio_resid; 667fa9e4066Sahrens ssize_t tx_bytes; 668fa9e4066Sahrens uint64_t end_size; 669fa9e4066Sahrens dmu_tx_t *tx; 670fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 671f18faf3fSek110237 zilog_t *zilog; 672fa9e4066Sahrens offset_t woff; 673fa9e4066Sahrens ssize_t n, nbytes; 674104e2ed7Sperrin rl_t *rl; 675fa9e4066Sahrens int max_blksz = zfsvfs->z_max_blksz; 676d5285caeSGeorge Wilson int error = 0; 6772fdbea25SAleksandr Guzovskiy arc_buf_t *abuf; 678d5285caeSGeorge Wilson iovec_t *aiov = NULL; 679c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_t *xuio = NULL; 680c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i_iov = 0; 681c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int iovcnt = uio->uio_iovcnt; 682c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *iovp = uio->uio_iov; 683c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int write_eof; 6840a586ceaSMark Shellenbaum int count = 0; 6850a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[4]; 6860a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 6875e286361SAndreas Jaekel ssize_t lock_off, lock_len; 688fa9e4066Sahrens 689fa9e4066Sahrens /* 690fa9e4066Sahrens * Fasttrack empty write 691fa9e4066Sahrens */ 692104e2ed7Sperrin n = start_resid; 693fa9e4066Sahrens if (n == 0) 694fa9e4066Sahrens return (0); 695fa9e4066Sahrens 696104e2ed7Sperrin if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 697104e2ed7Sperrin limit = MAXOFFSET_T; 698104e2ed7Sperrin 6993cb34c60Sahrens ZFS_ENTER(zfsvfs); 7003cb34c60Sahrens ZFS_VERIFY_ZP(zp); 701c09193bfSmarks 7020a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 7030a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 7040a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 7050a586ceaSMark Shellenbaum &zp->z_size, 8); 7060a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 7070a586ceaSMark Shellenbaum &zp->z_pflags, 8); 7080a586ceaSMark Shellenbaum 709c09193bfSmarks /* 7102144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 7112144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 7122144b121SMarcel Telka * so check it explicitly here. 7132144b121SMarcel Telka */ 7142144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 7152144b121SMarcel Telka ZFS_EXIT(zfsvfs); 7162144b121SMarcel Telka return (SET_ERROR(EROFS)); 7172144b121SMarcel Telka } 7182144b121SMarcel Telka 7192144b121SMarcel Telka /* 720c09193bfSmarks * If immutable or not appending then return EPERM 721c09193bfSmarks */ 7220a586ceaSMark Shellenbaum if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) || 7230a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) && 7240a586ceaSMark Shellenbaum (uio->uio_loffset < zp->z_size))) { 725c09193bfSmarks ZFS_EXIT(zfsvfs); 726be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 727c09193bfSmarks } 728c09193bfSmarks 729f18faf3fSek110237 zilog = zfsvfs->z_log; 730fa9e4066Sahrens 731fa9e4066Sahrens /* 73241865f27SWilliam Gorrell * Validate file offset 73341865f27SWilliam Gorrell */ 7340a586ceaSMark Shellenbaum woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset; 73541865f27SWilliam Gorrell if (woff < 0) { 73641865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 737be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 73841865f27SWilliam Gorrell } 73941865f27SWilliam Gorrell 74041865f27SWilliam Gorrell /* 74141865f27SWilliam Gorrell * Check for mandatory locks before calling zfs_range_lock() 74241865f27SWilliam Gorrell * in order to prevent a deadlock with locks set via fcntl(). 74341865f27SWilliam Gorrell */ 7440a586ceaSMark Shellenbaum if (MANDMODE((mode_t)zp->z_mode) && 74541865f27SWilliam Gorrell (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) { 74641865f27SWilliam Gorrell ZFS_EXIT(zfsvfs); 74741865f27SWilliam Gorrell return (error); 74841865f27SWilliam Gorrell } 74941865f27SWilliam Gorrell 75041865f27SWilliam Gorrell /* 751c5c6ffa0Smaybee * Pre-fault the pages to ensure slow (eg NFS) pages 752104e2ed7Sperrin * don't hold up txg. 753c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Skip this if uio contains loaned arc_buf. 754fa9e4066Sahrens */ 755c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if ((uio->uio_extflg == UIO_XUIO) && 756c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) 757c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio = (xuio_t *)uio; 758c242f9a0Schunli zhang - Sun Microsystems - Irvine United States else 759ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 760fa9e4066Sahrens 761fa9e4066Sahrens /* 762fa9e4066Sahrens * If in append mode, set the io offset pointer to eof. 763fa9e4066Sahrens */ 764104e2ed7Sperrin if (ioflag & FAPPEND) { 765104e2ed7Sperrin /* 76641865f27SWilliam Gorrell * Obtain an appending range lock to guarantee file append 76741865f27SWilliam Gorrell * semantics. We reset the write offset once we have the lock. 768104e2ed7Sperrin */ 769104e2ed7Sperrin rl = zfs_range_lock(zp, 0, n, RL_APPEND); 77041865f27SWilliam Gorrell woff = rl->r_off; 771104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 772fa9e4066Sahrens /* 77341865f27SWilliam Gorrell * We overlocked the file because this write will cause 77441865f27SWilliam Gorrell * the file block size to increase. 77541865f27SWilliam Gorrell * Note that zp_size cannot change with this lock held. 776fa9e4066Sahrens */ 7770a586ceaSMark Shellenbaum woff = zp->z_size; 778fa9e4066Sahrens } 77941865f27SWilliam Gorrell uio->uio_loffset = woff; 78041865f27SWilliam Gorrell } else { 781fa9e4066Sahrens /* 78241865f27SWilliam Gorrell * Note that if the file block size will change as a result of 78341865f27SWilliam Gorrell * this write, then this range lock will lock the entire file 78441865f27SWilliam Gorrell * so that we can re-write the block safely. 785fa9e4066Sahrens */ 7865e286361SAndreas Jaekel 7875e286361SAndreas Jaekel /* 7885e286361SAndreas Jaekel * If in zev mode, lock offsets are quantized to 1MB chunks 7895e286361SAndreas Jaekel * so that we can calculate level 1 checksums later on. 7905e286361SAndreas Jaekel */ 7915e286361SAndreas Jaekel if (rz_zev_active()) { 7925e286361SAndreas Jaekel /* start of this megabyte */ 7935e286361SAndreas Jaekel lock_off = P2ALIGN(woff, ZEV_L1_SIZE); 7945e286361SAndreas Jaekel /* full megabytes */ 7955e286361SAndreas Jaekel lock_len = n + (woff - lock_off); 7965e286361SAndreas Jaekel lock_len = P2ROUNDUP(lock_len, ZEV_L1_SIZE); 7975e286361SAndreas Jaekel } else { 7985e286361SAndreas Jaekel lock_off = woff; 7995e286361SAndreas Jaekel lock_len = n; 8005e286361SAndreas Jaekel } 8015e286361SAndreas Jaekel 8025e286361SAndreas Jaekel rl = zfs_range_lock(zp, lock_off, lock_len, RL_WRITER); 803fa9e4066Sahrens } 804fa9e4066Sahrens 805fa9e4066Sahrens if (woff >= limit) { 806feb08c6bSbillm zfs_range_unlock(rl); 807feb08c6bSbillm ZFS_EXIT(zfsvfs); 808be6fd75aSMatthew Ahrens return (SET_ERROR(EFBIG)); 809fa9e4066Sahrens } 810fa9e4066Sahrens 811fa9e4066Sahrens if ((woff + n) > limit || woff > (limit - n)) 812fa9e4066Sahrens n = limit - woff; 813fa9e4066Sahrens 814c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* Will this write extend the file length? */ 8150a586ceaSMark Shellenbaum write_eof = (woff + n > zp->z_size); 816c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 8170a586ceaSMark Shellenbaum end_size = MAX(zp->z_size, woff + n); 818feb08c6bSbillm 819feb08c6bSbillm /* 820feb08c6bSbillm * Write the file in reasonable size chunks. Each chunk is written 821feb08c6bSbillm * in a separate transaction; this keeps the intent log records small 822feb08c6bSbillm * and allows us to do more fine-grained space accounting. 823feb08c6bSbillm */ 824feb08c6bSbillm while (n > 0) { 8252fdbea25SAleksandr Guzovskiy abuf = NULL; 8262fdbea25SAleksandr Guzovskiy woff = uio->uio_loffset; 8270a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 8280a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 8292fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8302fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 831be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 83214843421SMatthew Ahrens break; 83314843421SMatthew Ahrens } 8342fdbea25SAleksandr Guzovskiy 835c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio && abuf == NULL) { 836c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(i_iov < iovcnt); 837c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov = &iovp[i_iov]; 838c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i_iov); 839c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_clear(xuio, i_iov); 840c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_cp_write, int, i_iov, 841c242f9a0Schunli zhang - Sun Microsystems - Irvine United States iovec_t *, aiov, arc_buf_t *, abuf); 842c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT((aiov->iov_base == abuf->b_data) || 843c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ((char *)aiov->iov_base - (char *)abuf->b_data + 844c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len == arc_buf_size(abuf))); 845c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i_iov++; 846c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else if (abuf == NULL && n >= max_blksz && 8470a586ceaSMark Shellenbaum woff >= zp->z_size && 8482fdbea25SAleksandr Guzovskiy P2PHASE(woff, max_blksz) == 0 && 8492fdbea25SAleksandr Guzovskiy zp->z_blksz == max_blksz) { 850c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 851c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * This write covers a full block. "Borrow" a buffer 852c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * from the dmu so that we can fill it before we enter 853c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * a transaction. This avoids the possibility of 854c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * holding up the transaction if the data copy hangs 855c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * up on a pagefault (e.g., from an NFS server mapping). 856c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 8572fdbea25SAleksandr Guzovskiy size_t cbytes; 8582fdbea25SAleksandr Guzovskiy 8590a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 8600a586ceaSMark Shellenbaum max_blksz); 8612fdbea25SAleksandr Guzovskiy ASSERT(abuf != NULL); 8622fdbea25SAleksandr Guzovskiy ASSERT(arc_buf_size(abuf) == max_blksz); 8632fdbea25SAleksandr Guzovskiy if (error = uiocopy(abuf->b_data, max_blksz, 8642fdbea25SAleksandr Guzovskiy UIO_WRITE, uio, &cbytes)) { 8652fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 8662fdbea25SAleksandr Guzovskiy break; 8672fdbea25SAleksandr Guzovskiy } 8682fdbea25SAleksandr Guzovskiy ASSERT(cbytes == max_blksz); 8692fdbea25SAleksandr Guzovskiy } 8702fdbea25SAleksandr Guzovskiy 8712fdbea25SAleksandr Guzovskiy /* 8722fdbea25SAleksandr Guzovskiy * Start a transaction. 8732fdbea25SAleksandr Guzovskiy */ 874fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 8750a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 876fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); 8770a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 878e722410cSMatthew Ahrens error = dmu_tx_assign(tx, TXG_WAIT); 879fa9e4066Sahrens if (error) { 8808a2f1b91Sahrens dmu_tx_abort(tx); 8812fdbea25SAleksandr Guzovskiy if (abuf != NULL) 8822fdbea25SAleksandr Guzovskiy dmu_return_arcbuf(abuf); 883feb08c6bSbillm break; 884fa9e4066Sahrens } 885fa9e4066Sahrens 886fa9e4066Sahrens /* 887104e2ed7Sperrin * If zfs_range_lock() over-locked we grow the blocksize 888feb08c6bSbillm * and then reduce the lock range. This will only happen 889feb08c6bSbillm * on the first iteration since zfs_range_reduce() will 890feb08c6bSbillm * shrink down r_len to the appropriate size. 891fa9e4066Sahrens */ 892104e2ed7Sperrin if (rl->r_len == UINT64_MAX) { 893104e2ed7Sperrin uint64_t new_blksz; 894104e2ed7Sperrin 895fa9e4066Sahrens if (zp->z_blksz > max_blksz) { 896b5152584SMatthew Ahrens /* 897b5152584SMatthew Ahrens * File's blocksize is already larger than the 898b5152584SMatthew Ahrens * "recordsize" property. Only let it grow to 899b5152584SMatthew Ahrens * the next power of 2. 900b5152584SMatthew Ahrens */ 901fa9e4066Sahrens ASSERT(!ISP2(zp->z_blksz)); 902b5152584SMatthew Ahrens new_blksz = MIN(end_size, 903b5152584SMatthew Ahrens 1 << highbit64(zp->z_blksz)); 904fa9e4066Sahrens } else { 905fa9e4066Sahrens new_blksz = MIN(end_size, max_blksz); 906fa9e4066Sahrens } 907104e2ed7Sperrin zfs_grow_blocksize(zp, new_blksz, tx); 908c5c6ffa0Smaybee zfs_range_reduce(rl, woff, n); 909fa9e4066Sahrens } 910fa9e4066Sahrens 911fa9e4066Sahrens /* 912fa9e4066Sahrens * XXX - should we really limit each write to z_max_blksz? 913fa9e4066Sahrens * Perhaps we should use SPA_MAXBLOCKSIZE chunks? 914fa9e4066Sahrens */ 915fa9e4066Sahrens nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz)); 916fa9e4066Sahrens 9172fdbea25SAleksandr Guzovskiy if (abuf == NULL) { 918fa9e4066Sahrens tx_bytes = uio->uio_resid; 91994d1a210STim Haley error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), 92094d1a210STim Haley uio, nbytes, tx); 921fa9e4066Sahrens tx_bytes -= uio->uio_resid; 9222fdbea25SAleksandr Guzovskiy } else { 9232fdbea25SAleksandr Guzovskiy tx_bytes = nbytes; 924c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio == NULL || tx_bytes == aiov->iov_len); 925c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 926c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * If this is not a full block write, but we are 927c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * extending the file past EOF and this data starts 928c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * block-aligned, use assign_arcbuf(). Otherwise, 929c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write via dmu_write(). 930c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 931c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (tx_bytes < max_blksz && (!write_eof || 932c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_base != abuf->b_data)) { 933c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio); 934c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_write(zfsvfs->z_os, zp->z_id, woff, 935c242f9a0Schunli zhang - Sun Microsystems - Irvine United States aiov->iov_len, aiov->iov_base, tx); 936c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 937c242f9a0Schunli zhang - Sun Microsystems - Irvine United States xuio_stat_wbuf_copied(); 938c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 939c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio || tx_bytes == max_blksz); 9400a586ceaSMark Shellenbaum dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl), 9410a586ceaSMark Shellenbaum woff, abuf, tx); 942c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 9432fdbea25SAleksandr Guzovskiy ASSERT(tx_bytes <= uio->uio_resid); 9442fdbea25SAleksandr Guzovskiy uioskip(uio, tx_bytes); 9452fdbea25SAleksandr Guzovskiy } 9462fdbea25SAleksandr Guzovskiy if (tx_bytes && vn_has_cached_data(vp)) { 947ac05c741SMark Maybee update_pages(vp, woff, 948ac05c741SMark Maybee tx_bytes, zfsvfs->z_os, zp->z_id); 9492fdbea25SAleksandr Guzovskiy } 950fa9e4066Sahrens 951feb08c6bSbillm /* 952feb08c6bSbillm * If we made no progress, we're done. If we made even 953feb08c6bSbillm * partial progress, update the znode and ZIL accordingly. 954feb08c6bSbillm */ 955feb08c6bSbillm if (tx_bytes == 0) { 9560a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 9570a586ceaSMark Shellenbaum (void *)&zp->z_size, sizeof (uint64_t), tx); 958af2c4821Smaybee dmu_tx_commit(tx); 959feb08c6bSbillm ASSERT(error != 0); 960fa9e4066Sahrens break; 961fa9e4066Sahrens } 962fa9e4066Sahrens 963169cdae2Smarks /* 964169cdae2Smarks * Clear Set-UID/Set-GID bits on successful write if not 965169cdae2Smarks * privileged and at least one of the excute bits is set. 966169cdae2Smarks * 967169cdae2Smarks * It would be nice to to this after all writes have 968169cdae2Smarks * been done, but that would still expose the ISUID/ISGID 969169cdae2Smarks * to another app after the partial write is committed. 970da6c28aaSamw * 971f1696b23SMark Shellenbaum * Note: we don't call zfs_fuid_map_id() here because 972f1696b23SMark Shellenbaum * user 0 is not an ephemeral uid. 973169cdae2Smarks */ 974169cdae2Smarks mutex_enter(&zp->z_acl_lock); 9750a586ceaSMark Shellenbaum if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | 976169cdae2Smarks (S_IXUSR >> 6))) != 0 && 9770a586ceaSMark Shellenbaum (zp->z_mode & (S_ISUID | S_ISGID)) != 0 && 978169cdae2Smarks secpolicy_vnode_setid_retain(cr, 9790a586ceaSMark Shellenbaum (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) { 9800a586ceaSMark Shellenbaum uint64_t newmode; 9810a586ceaSMark Shellenbaum zp->z_mode &= ~(S_ISUID | S_ISGID); 9820a586ceaSMark Shellenbaum newmode = zp->z_mode; 9830a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), 9840a586ceaSMark Shellenbaum (void *)&newmode, sizeof (uint64_t), tx); 985169cdae2Smarks } 986169cdae2Smarks mutex_exit(&zp->z_acl_lock); 987169cdae2Smarks 9880a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 9890a586ceaSMark Shellenbaum B_TRUE); 990feb08c6bSbillm 991feb08c6bSbillm /* 992feb08c6bSbillm * Update the file size (zp_size) if it has changed; 993feb08c6bSbillm * account for possible concurrent updates. 994feb08c6bSbillm */ 9950a586ceaSMark Shellenbaum while ((end_size = zp->z_size) < uio->uio_loffset) { 9960a586ceaSMark Shellenbaum (void) atomic_cas_64(&zp->z_size, end_size, 997feb08c6bSbillm uio->uio_loffset); 9980a586ceaSMark Shellenbaum ASSERT(error == 0); 9990a586ceaSMark Shellenbaum } 1000c0e50c98SNeil Perrin /* 1001c0e50c98SNeil Perrin * If we are replaying and eof is non zero then force 1002c0e50c98SNeil Perrin * the file size to the specified eof. Note, there's no 1003c0e50c98SNeil Perrin * concurrency during replay. 1004c0e50c98SNeil Perrin */ 1005c0e50c98SNeil Perrin if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0) 1006c0e50c98SNeil Perrin zp->z_size = zfsvfs->z_replay_eof; 1007c0e50c98SNeil Perrin 10080a586ceaSMark Shellenbaum error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 10090a586ceaSMark Shellenbaum 1010feb08c6bSbillm zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag); 1011feb08c6bSbillm dmu_tx_commit(tx); 1012feb08c6bSbillm 1013feb08c6bSbillm if (error != 0) 1014fa9e4066Sahrens break; 1015feb08c6bSbillm ASSERT(tx_bytes == nbytes); 1016feb08c6bSbillm n -= nbytes; 1017ff866947SSanjeev Bagewadi 1018ff866947SSanjeev Bagewadi if (!xuio && n > 0) 1019ff866947SSanjeev Bagewadi uio_prefaultpages(MIN(n, max_blksz), uio); 1020fa9e4066Sahrens } 1021fa9e4066Sahrens 1022c5c6ffa0Smaybee zfs_range_unlock(rl); 1023fa9e4066Sahrens 1024fa9e4066Sahrens /* 1025fa9e4066Sahrens * If we're in replay mode, or we made no progress, return error. 1026fa9e4066Sahrens * Otherwise, it's at least a partial write, so it's successful. 1027fa9e4066Sahrens */ 10281209a471SNeil Perrin if (zfsvfs->z_replay || uio->uio_resid == start_resid) { 1029fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1030fa9e4066Sahrens return (error); 1031fa9e4066Sahrens } 1032fa9e4066Sahrens 103355da60b9SMark J Musante if (ioflag & (FSYNC | FDSYNC) || 103455da60b9SMark J Musante zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 10355002558fSNeil Perrin zil_commit(zilog, zp->z_id); 1036fa9e4066Sahrens 1037fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1038fa9e4066Sahrens return (0); 1039fa9e4066Sahrens } 1040fa9e4066Sahrens 1041c5c6ffa0Smaybee void 1042b24ab676SJeff Bonwick zfs_get_done(zgd_t *zgd, int error) 1043c5c6ffa0Smaybee { 1044b24ab676SJeff Bonwick znode_t *zp = zgd->zgd_private; 1045b24ab676SJeff Bonwick objset_t *os = zp->z_zfsvfs->z_os; 1046c5c6ffa0Smaybee 1047b24ab676SJeff Bonwick if (zgd->zgd_db) 1048b24ab676SJeff Bonwick dmu_buf_rele(zgd->zgd_db, zgd); 1049b24ab676SJeff Bonwick 1050b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1051b24ab676SJeff Bonwick 10529d3574bfSNeil Perrin /* 10539d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10549d3574bfSNeil Perrin * txg stopped from syncing. 10559d3574bfSNeil Perrin */ 1056b24ab676SJeff Bonwick VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1057b24ab676SJeff Bonwick 1058b24ab676SJeff Bonwick if (error == 0 && zgd->zgd_bp) 105917f17c2dSbonwick zil_add_block(zgd->zgd_zilog, zgd->zgd_bp); 1060b24ab676SJeff Bonwick 106167bd71c6Sperrin kmem_free(zgd, sizeof (zgd_t)); 1062c5c6ffa0Smaybee } 1063c5c6ffa0Smaybee 1064c87b8fc5SMark J Musante #ifdef DEBUG 1065c87b8fc5SMark J Musante static int zil_fault_io = 0; 1066c87b8fc5SMark J Musante #endif 1067c87b8fc5SMark J Musante 1068fa9e4066Sahrens /* 1069fa9e4066Sahrens * Get data to generate a TX_WRITE intent log record. 1070fa9e4066Sahrens */ 1071fa9e4066Sahrens int 1072c5c6ffa0Smaybee zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio) 1073fa9e4066Sahrens { 1074fa9e4066Sahrens zfsvfs_t *zfsvfs = arg; 1075fa9e4066Sahrens objset_t *os = zfsvfs->z_os; 1076fa9e4066Sahrens znode_t *zp; 1077b24ab676SJeff Bonwick uint64_t object = lr->lr_foid; 1078b24ab676SJeff Bonwick uint64_t offset = lr->lr_offset; 1079b24ab676SJeff Bonwick uint64_t size = lr->lr_length; 1080b24ab676SJeff Bonwick blkptr_t *bp = &lr->lr_blkptr; 1081c5c6ffa0Smaybee dmu_buf_t *db; 108267bd71c6Sperrin zgd_t *zgd; 1083fa9e4066Sahrens int error = 0; 1084fa9e4066Sahrens 1085b24ab676SJeff Bonwick ASSERT(zio != NULL); 1086b24ab676SJeff Bonwick ASSERT(size != 0); 1087fa9e4066Sahrens 1088fa9e4066Sahrens /* 1089104e2ed7Sperrin * Nothing to do if the file has been removed 1090fa9e4066Sahrens */ 1091b24ab676SJeff Bonwick if (zfs_zget(zfsvfs, object, &zp) != 0) 1092be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1093893a6d32Sahrens if (zp->z_unlinked) { 10949d3574bfSNeil Perrin /* 10959d3574bfSNeil Perrin * Release the vnode asynchronously as we currently have the 10969d3574bfSNeil Perrin * txg stopped from syncing. 10979d3574bfSNeil Perrin */ 10989d3574bfSNeil Perrin VN_RELE_ASYNC(ZTOV(zp), 10999d3574bfSNeil Perrin dsl_pool_vnrele_taskq(dmu_objset_pool(os))); 1100be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1101fa9e4066Sahrens } 1102fa9e4066Sahrens 1103b24ab676SJeff Bonwick zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP); 1104b24ab676SJeff Bonwick zgd->zgd_zilog = zfsvfs->z_log; 1105b24ab676SJeff Bonwick zgd->zgd_private = zp; 1106b24ab676SJeff Bonwick 1107fa9e4066Sahrens /* 1108fa9e4066Sahrens * Write records come in two flavors: immediate and indirect. 1109fa9e4066Sahrens * For small writes it's cheaper to store the data with the 1110fa9e4066Sahrens * log record (immediate); for large writes it's cheaper to 1111fa9e4066Sahrens * sync the data and get a pointer to it (indirect) so that 1112fa9e4066Sahrens * we don't have to write the data twice. 1113fa9e4066Sahrens */ 1114104e2ed7Sperrin if (buf != NULL) { /* immediate write */ 1115b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER); 1116104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 11170a586ceaSMark Shellenbaum if (offset >= zp->z_size) { 1118be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1119b24ab676SJeff Bonwick } else { 1120b24ab676SJeff Bonwick error = dmu_read(os, object, offset, size, buf, 1121b24ab676SJeff Bonwick DMU_READ_NO_PREFETCH); 1122104e2ed7Sperrin } 1123b24ab676SJeff Bonwick ASSERT(error == 0 || error == ENOENT); 1124104e2ed7Sperrin } else { /* indirect write */ 1125fa9e4066Sahrens /* 1126104e2ed7Sperrin * Have to lock the whole block to ensure when it's 1127104e2ed7Sperrin * written out and it's checksum is being calculated 1128104e2ed7Sperrin * that no one can change the data. We need to re-check 1129104e2ed7Sperrin * blocksize after we get the lock in case it's changed! 1130fa9e4066Sahrens */ 1131104e2ed7Sperrin for (;;) { 1132b24ab676SJeff Bonwick uint64_t blkoff; 1133b24ab676SJeff Bonwick size = zp->z_blksz; 1134dfe73b3dSJeff Bonwick blkoff = ISP2(size) ? P2PHASE(offset, size) : offset; 1135b24ab676SJeff Bonwick offset -= blkoff; 1136b24ab676SJeff Bonwick zgd->zgd_rl = zfs_range_lock(zp, offset, size, 1137b24ab676SJeff Bonwick RL_READER); 1138b24ab676SJeff Bonwick if (zp->z_blksz == size) 1139104e2ed7Sperrin break; 1140b24ab676SJeff Bonwick offset += blkoff; 1141b24ab676SJeff Bonwick zfs_range_unlock(zgd->zgd_rl); 1142104e2ed7Sperrin } 1143104e2ed7Sperrin /* test for truncation needs to be done while range locked */ 11440a586ceaSMark Shellenbaum if (lr->lr_offset >= zp->z_size) 1145be6fd75aSMatthew Ahrens error = SET_ERROR(ENOENT); 1146c87b8fc5SMark J Musante #ifdef DEBUG 1147c87b8fc5SMark J Musante if (zil_fault_io) { 1148be6fd75aSMatthew Ahrens error = SET_ERROR(EIO); 1149c87b8fc5SMark J Musante zil_fault_io = 0; 1150c87b8fc5SMark J Musante } 1151c87b8fc5SMark J Musante #endif 1152b24ab676SJeff Bonwick if (error == 0) 115347cb52daSJeff Bonwick error = dmu_buf_hold(os, object, offset, zgd, &db, 115447cb52daSJeff Bonwick DMU_READ_NO_PREFETCH); 1155c87b8fc5SMark J Musante 1156975c32a0SNeil Perrin if (error == 0) { 115780901aeaSGeorge Wilson blkptr_t *obp = dmu_buf_get_blkptr(db); 115880901aeaSGeorge Wilson if (obp) { 115980901aeaSGeorge Wilson ASSERT(BP_IS_HOLE(bp)); 116080901aeaSGeorge Wilson *bp = *obp; 116180901aeaSGeorge Wilson } 116280901aeaSGeorge Wilson 1163b24ab676SJeff Bonwick zgd->zgd_db = db; 1164b24ab676SJeff Bonwick zgd->zgd_bp = bp; 1165b24ab676SJeff Bonwick 1166b24ab676SJeff Bonwick ASSERT(db->db_offset == offset); 1167b24ab676SJeff Bonwick ASSERT(db->db_size == size); 1168b24ab676SJeff Bonwick 1169b24ab676SJeff Bonwick error = dmu_sync(zio, lr->lr_common.lrc_txg, 1170b24ab676SJeff Bonwick zfs_get_done, zgd); 1171b24ab676SJeff Bonwick ASSERT(error || lr->lr_length <= zp->z_blksz); 1172975c32a0SNeil Perrin 1173c5c6ffa0Smaybee /* 1174b24ab676SJeff Bonwick * On success, we need to wait for the write I/O 1175b24ab676SJeff Bonwick * initiated by dmu_sync() to complete before we can 1176b24ab676SJeff Bonwick * release this dbuf. We will finish everything up 1177b24ab676SJeff Bonwick * in the zfs_get_done() callback. 1178c5c6ffa0Smaybee */ 1179b24ab676SJeff Bonwick if (error == 0) 1180c5c6ffa0Smaybee return (0); 1181b24ab676SJeff Bonwick 1182b24ab676SJeff Bonwick if (error == EALREADY) { 1183975c32a0SNeil Perrin lr->lr_common.lrc_txtype = TX_WRITE2; 1184975c32a0SNeil Perrin error = 0; 1185975c32a0SNeil Perrin } 1186fa9e4066Sahrens } 1187b24ab676SJeff Bonwick } 1188b24ab676SJeff Bonwick 1189b24ab676SJeff Bonwick zfs_get_done(zgd, error); 1190b24ab676SJeff Bonwick 1191fa9e4066Sahrens return (error); 1192fa9e4066Sahrens } 1193fa9e4066Sahrens 1194fa9e4066Sahrens /*ARGSUSED*/ 1195fa9e4066Sahrens static int 1196da6c28aaSamw zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr, 1197da6c28aaSamw caller_context_t *ct) 1198fa9e4066Sahrens { 1199fa9e4066Sahrens znode_t *zp = VTOZ(vp); 1200fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 1201fa9e4066Sahrens int error; 1202fa9e4066Sahrens 12033cb34c60Sahrens ZFS_ENTER(zfsvfs); 12043cb34c60Sahrens ZFS_VERIFY_ZP(zp); 1205da6c28aaSamw 1206da6c28aaSamw if (flag & V_ACE_MASK) 1207da6c28aaSamw error = zfs_zaccess(zp, mode, flag, B_FALSE, cr); 1208da6c28aaSamw else 1209da6c28aaSamw error = zfs_zaccess_rwx(zp, mode, flag, cr); 1210da6c28aaSamw 1211fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1212fa9e4066Sahrens return (error); 1213fa9e4066Sahrens } 1214fa9e4066Sahrens 1215fa9e4066Sahrens /* 1216d47621a4STim Haley * If vnode is for a device return a specfs vnode instead. 1217d47621a4STim Haley */ 1218d47621a4STim Haley static int 1219d47621a4STim Haley specvp_check(vnode_t **vpp, cred_t *cr) 1220d47621a4STim Haley { 1221d47621a4STim Haley int error = 0; 1222d47621a4STim Haley 1223d47621a4STim Haley if (IS_DEVVP(*vpp)) { 1224d47621a4STim Haley struct vnode *svp; 1225d47621a4STim Haley 1226d47621a4STim Haley svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 1227d47621a4STim Haley VN_RELE(*vpp); 1228d47621a4STim Haley if (svp == NULL) 1229be6fd75aSMatthew Ahrens error = SET_ERROR(ENOSYS); 1230d47621a4STim Haley *vpp = svp; 1231d47621a4STim Haley } 1232d47621a4STim Haley return (error); 1233d47621a4STim Haley } 1234d47621a4STim Haley 1235d47621a4STim Haley 1236d47621a4STim Haley /* 1237fa9e4066Sahrens * Lookup an entry in a directory, or an extended attribute directory. 1238fa9e4066Sahrens * If it exists, return a held vnode reference for it. 1239fa9e4066Sahrens * 1240fa9e4066Sahrens * IN: dvp - vnode of directory to search. 1241fa9e4066Sahrens * nm - name of entry to lookup. 1242fa9e4066Sahrens * pnp - full pathname to lookup [UNUSED]. 1243fa9e4066Sahrens * flags - LOOKUP_XATTR set if looking for an attribute. 1244fa9e4066Sahrens * rdir - root directory vnode [UNUSED]. 1245fa9e4066Sahrens * cr - credentials of caller. 1246da6c28aaSamw * ct - caller context 1247da6c28aaSamw * direntflags - directory lookup flags 1248da6c28aaSamw * realpnp - returned pathname. 1249fa9e4066Sahrens * 1250fa9e4066Sahrens * OUT: vpp - vnode of located entry, NULL if not found. 1251fa9e4066Sahrens * 1252f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1253fa9e4066Sahrens * 1254fa9e4066Sahrens * Timestamps: 1255fa9e4066Sahrens * NA 1256fa9e4066Sahrens */ 1257fa9e4066Sahrens /* ARGSUSED */ 1258fa9e4066Sahrens static int 1259fa9e4066Sahrens zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1260da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1261da6c28aaSamw int *direntflags, pathname_t *realpnp) 1262fa9e4066Sahrens { 1263fa9e4066Sahrens znode_t *zdp = VTOZ(dvp); 1264fa9e4066Sahrens zfsvfs_t *zfsvfs = zdp->z_zfsvfs; 1265d47621a4STim Haley int error = 0; 1266d47621a4STim Haley 1267d47621a4STim Haley /* fast path */ 1268d47621a4STim Haley if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) { 1269d47621a4STim Haley 1270d47621a4STim Haley if (dvp->v_type != VDIR) { 1271be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 12720a586ceaSMark Shellenbaum } else if (zdp->z_sa_hdl == NULL) { 1273be6fd75aSMatthew Ahrens return (SET_ERROR(EIO)); 1274d47621a4STim Haley } 1275d47621a4STim Haley 1276d47621a4STim Haley if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) { 1277d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1278d47621a4STim Haley if (!error) { 1279d47621a4STim Haley *vpp = dvp; 1280d47621a4STim Haley VN_HOLD(*vpp); 1281d47621a4STim Haley return (0); 1282d47621a4STim Haley } 1283d47621a4STim Haley return (error); 1284d47621a4STim Haley } else { 1285d47621a4STim Haley vnode_t *tvp = dnlc_lookup(dvp, nm); 1286d47621a4STim Haley 1287d47621a4STim Haley if (tvp) { 1288d47621a4STim Haley error = zfs_fastaccesschk_execute(zdp, cr); 1289d47621a4STim Haley if (error) { 1290d47621a4STim Haley VN_RELE(tvp); 1291d47621a4STim Haley return (error); 1292d47621a4STim Haley } 1293d47621a4STim Haley if (tvp == DNLC_NO_VNODE) { 1294d47621a4STim Haley VN_RELE(tvp); 1295be6fd75aSMatthew Ahrens return (SET_ERROR(ENOENT)); 1296d47621a4STim Haley } else { 1297d47621a4STim Haley *vpp = tvp; 1298d47621a4STim Haley return (specvp_check(vpp, cr)); 1299d47621a4STim Haley } 1300d47621a4STim Haley } 1301d47621a4STim Haley } 1302d47621a4STim Haley } 1303d47621a4STim Haley 1304d47621a4STim Haley DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm); 1305fa9e4066Sahrens 13063cb34c60Sahrens ZFS_ENTER(zfsvfs); 13073cb34c60Sahrens ZFS_VERIFY_ZP(zdp); 1308fa9e4066Sahrens 1309fa9e4066Sahrens *vpp = NULL; 1310fa9e4066Sahrens 1311fa9e4066Sahrens if (flags & LOOKUP_XATTR) { 1312fa9e4066Sahrens /* 13137b55fa8eSck153898 * If the xattr property is off, refuse the lookup request. 13147b55fa8eSck153898 */ 13157b55fa8eSck153898 if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) { 13167b55fa8eSck153898 ZFS_EXIT(zfsvfs); 1317be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 13187b55fa8eSck153898 } 13197b55fa8eSck153898 13207b55fa8eSck153898 /* 1321fa9e4066Sahrens * We don't allow recursive attributes.. 1322fa9e4066Sahrens * Maybe someday we will. 1323fa9e4066Sahrens */ 13240a586ceaSMark Shellenbaum if (zdp->z_pflags & ZFS_XATTR) { 1325fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1326be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1327fa9e4066Sahrens } 1328fa9e4066Sahrens 13293f063a9dSck153898 if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) { 1330fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1331fa9e4066Sahrens return (error); 1332fa9e4066Sahrens } 1333fa9e4066Sahrens 1334fa9e4066Sahrens /* 1335fa9e4066Sahrens * Do we have permission to get into attribute directory? 1336fa9e4066Sahrens */ 1337fa9e4066Sahrens 1338da6c28aaSamw if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0, 1339da6c28aaSamw B_FALSE, cr)) { 1340fa9e4066Sahrens VN_RELE(*vpp); 1341da6c28aaSamw *vpp = NULL; 1342fa9e4066Sahrens } 1343fa9e4066Sahrens 1344fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1345fa9e4066Sahrens return (error); 1346fa9e4066Sahrens } 1347fa9e4066Sahrens 13480f2dc02eSek110237 if (dvp->v_type != VDIR) { 13490f2dc02eSek110237 ZFS_EXIT(zfsvfs); 1350be6fd75aSMatthew Ahrens return (SET_ERROR(ENOTDIR)); 13510f2dc02eSek110237 } 1352736b9155Smarks 1353fa9e4066Sahrens /* 1354fa9e4066Sahrens * Check accessibility of directory. 1355fa9e4066Sahrens */ 1356fa9e4066Sahrens 1357da6c28aaSamw if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) { 1358fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1359fa9e4066Sahrens return (error); 1360fa9e4066Sahrens } 1361fa9e4066Sahrens 1362de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), 1363da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1364da6c28aaSamw ZFS_EXIT(zfsvfs); 1365be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1366da6c28aaSamw } 1367fa9e4066Sahrens 1368da6c28aaSamw error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp); 1369d47621a4STim Haley if (error == 0) 1370d47621a4STim Haley error = specvp_check(vpp, cr); 1371fa9e4066Sahrens 1372fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1373fa9e4066Sahrens return (error); 1374fa9e4066Sahrens } 1375fa9e4066Sahrens 1376fa9e4066Sahrens /* 1377fa9e4066Sahrens * Attempt to create a new entry in a directory. If the entry 1378fa9e4066Sahrens * already exists, truncate the file if permissible, else return 1379fa9e4066Sahrens * an error. Return the vp of the created or trunc'd file. 1380fa9e4066Sahrens * 1381fa9e4066Sahrens * IN: dvp - vnode of directory to put new file entry in. 1382fa9e4066Sahrens * name - name of new file entry. 1383fa9e4066Sahrens * vap - attributes of new file. 1384fa9e4066Sahrens * excl - flag indicating exclusive or non-exclusive mode. 1385fa9e4066Sahrens * mode - mode to open file with. 1386fa9e4066Sahrens * cr - credentials of caller. 1387fa9e4066Sahrens * flag - large file flag [UNUSED]. 1388da6c28aaSamw * ct - caller context 1389da6c28aaSamw * vsecp - ACL to be set 1390fa9e4066Sahrens * 1391fa9e4066Sahrens * OUT: vpp - vnode of created or trunc'd entry. 1392fa9e4066Sahrens * 1393f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1394fa9e4066Sahrens * 1395fa9e4066Sahrens * Timestamps: 1396fa9e4066Sahrens * dvp - ctime|mtime updated if new entry created 1397fa9e4066Sahrens * vp - ctime|mtime always, atime if new 1398fa9e4066Sahrens */ 1399da6c28aaSamw 1400fa9e4066Sahrens /* ARGSUSED */ 1401fa9e4066Sahrens static int 1402fa9e4066Sahrens zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl, 1403da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 1404da6c28aaSamw vsecattr_t *vsecp) 1405fa9e4066Sahrens { 1406fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1407fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1408f18faf3fSek110237 zilog_t *zilog; 1409f18faf3fSek110237 objset_t *os; 1410fa9e4066Sahrens zfs_dirlock_t *dl; 1411fa9e4066Sahrens dmu_tx_t *tx; 1412fa9e4066Sahrens int error; 1413c1ce5987SMark Shellenbaum ksid_t *ksid; 1414c1ce5987SMark Shellenbaum uid_t uid; 1415c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 141689459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 141789459e17SMark Shellenbaum boolean_t fuid_dirtied; 1418c8c24165SMark Shellenbaum boolean_t have_acl = B_FALSE; 141969962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1420da6c28aaSamw 1421da6c28aaSamw /* 1422da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1423da6c28aaSamw * make sure file system is at proper version 1424da6c28aaSamw */ 1425da6c28aaSamw 1426c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1427c1ce5987SMark Shellenbaum if (ksid) 1428c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1429c1ce5987SMark Shellenbaum else 1430c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1431c1ce5987SMark Shellenbaum 1432da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1433da6c28aaSamw (vsecp || (vap->va_mask & AT_XVATTR) || 1434c1ce5987SMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1435be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1436fa9e4066Sahrens 14373cb34c60Sahrens ZFS_ENTER(zfsvfs); 14383cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1439f18faf3fSek110237 os = zfsvfs->z_os; 1440f18faf3fSek110237 zilog = zfsvfs->z_log; 1441fa9e4066Sahrens 1442de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 1443da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1444da6c28aaSamw ZFS_EXIT(zfsvfs); 1445be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1446da6c28aaSamw } 1447da6c28aaSamw 1448da6c28aaSamw if (vap->va_mask & AT_XVATTR) { 1449da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1450da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1451da6c28aaSamw ZFS_EXIT(zfsvfs); 1452da6c28aaSamw return (error); 1453da6c28aaSamw } 1454da6c28aaSamw } 1455fa9e4066Sahrens top: 1456fa9e4066Sahrens *vpp = NULL; 1457fa9e4066Sahrens 1458fa9e4066Sahrens if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr)) 1459fa9e4066Sahrens vap->va_mode &= ~VSVTX; 1460fa9e4066Sahrens 1461fa9e4066Sahrens if (*name == '\0') { 1462fa9e4066Sahrens /* 1463fa9e4066Sahrens * Null component name refers to the directory itself. 1464fa9e4066Sahrens */ 1465fa9e4066Sahrens VN_HOLD(dvp); 1466fa9e4066Sahrens zp = dzp; 1467fa9e4066Sahrens dl = NULL; 1468fa9e4066Sahrens error = 0; 1469fa9e4066Sahrens } else { 1470fa9e4066Sahrens /* possible VN_HOLD(zp) */ 1471da6c28aaSamw int zflg = 0; 1472da6c28aaSamw 1473da6c28aaSamw if (flag & FIGNORECASE) 1474da6c28aaSamw zflg |= ZCILOOK; 1475da6c28aaSamw 1476da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1477da6c28aaSamw NULL, NULL); 1478da6c28aaSamw if (error) { 14790b2a8171SMark Shellenbaum if (have_acl) 14800b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1481fa9e4066Sahrens if (strcmp(name, "..") == 0) 1482be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1483fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1484fa9e4066Sahrens return (error); 1485fa9e4066Sahrens } 1486fa9e4066Sahrens } 14870a586ceaSMark Shellenbaum 1488fa9e4066Sahrens if (zp == NULL) { 1489da6c28aaSamw uint64_t txtype; 1490da6c28aaSamw 1491fa9e4066Sahrens /* 1492fa9e4066Sahrens * Create a new file object and update the directory 1493fa9e4066Sahrens * to reference it. 1494fa9e4066Sahrens */ 1495da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 14960b2a8171SMark Shellenbaum if (have_acl) 14970b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1498fa9e4066Sahrens goto out; 1499fa9e4066Sahrens } 1500fa9e4066Sahrens 1501fa9e4066Sahrens /* 1502fa9e4066Sahrens * We only support the creation of regular files in 1503fa9e4066Sahrens * extended attribute directories. 1504fa9e4066Sahrens */ 15050a586ceaSMark Shellenbaum 15060a586ceaSMark Shellenbaum if ((dzp->z_pflags & ZFS_XATTR) && 1507fa9e4066Sahrens (vap->va_type != VREG)) { 15080b2a8171SMark Shellenbaum if (have_acl) 15090b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1510be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 1511fa9e4066Sahrens goto out; 1512fa9e4066Sahrens } 1513fa9e4066Sahrens 1514c8c24165SMark Shellenbaum if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap, 1515c8c24165SMark Shellenbaum cr, vsecp, &acl_ids)) != 0) 151689459e17SMark Shellenbaum goto out; 1517c8c24165SMark Shellenbaum have_acl = B_TRUE; 1518c8c24165SMark Shellenbaum 151914843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 15204929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 1521be6fd75aSMatthew Ahrens error = SET_ERROR(EDQUOT); 152214843421SMatthew Ahrens goto out; 152314843421SMatthew Ahrens } 152489459e17SMark Shellenbaum 1525fa9e4066Sahrens tx = dmu_tx_create(os); 15260a586ceaSMark Shellenbaum 15270a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 15280a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 15290a586ceaSMark Shellenbaum 153089459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 153114843421SMatthew Ahrens if (fuid_dirtied) 153214843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 1533ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 15340a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 15350a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && 15360a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 1537fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 15380a586ceaSMark Shellenbaum 0, acl_ids.z_aclp->z_acl_bytes); 1539da6c28aaSamw } 154069962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1541fa9e4066Sahrens if (error) { 1542fa9e4066Sahrens zfs_dirent_unlock(dl); 15431209a471SNeil Perrin if (error == ERESTART) { 154469962b56SMatthew Ahrens waited = B_TRUE; 15458a2f1b91Sahrens dmu_tx_wait(tx); 15468a2f1b91Sahrens dmu_tx_abort(tx); 1547fa9e4066Sahrens goto top; 1548fa9e4066Sahrens } 1549c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15508a2f1b91Sahrens dmu_tx_abort(tx); 1551fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1552fa9e4066Sahrens return (error); 1553fa9e4066Sahrens } 15540a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 155589459e17SMark Shellenbaum 155689459e17SMark Shellenbaum if (fuid_dirtied) 155789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 155889459e17SMark Shellenbaum 1559fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 1560da6c28aaSamw txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap); 1561da6c28aaSamw if (flag & FIGNORECASE) 1562da6c28aaSamw txtype |= TX_CI; 1563da6c28aaSamw zfs_log_create(zilog, tx, txtype, dzp, zp, name, 156489459e17SMark Shellenbaum vsecp, acl_ids.z_fuidp, vap); 156589459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1566fa9e4066Sahrens dmu_tx_commit(tx); 1567fa9e4066Sahrens } else { 1568da6c28aaSamw int aflags = (flag & FAPPEND) ? V_APPEND : 0; 1569da6c28aaSamw 15700b2a8171SMark Shellenbaum if (have_acl) 15710b2a8171SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 15720b2a8171SMark Shellenbaum have_acl = B_FALSE; 15730b2a8171SMark Shellenbaum 1574fa9e4066Sahrens /* 1575fa9e4066Sahrens * A directory entry already exists for this name. 1576fa9e4066Sahrens */ 1577fa9e4066Sahrens /* 1578fa9e4066Sahrens * Can't truncate an existing file if in exclusive mode. 1579fa9e4066Sahrens */ 1580fa9e4066Sahrens if (excl == EXCL) { 1581be6fd75aSMatthew Ahrens error = SET_ERROR(EEXIST); 1582fa9e4066Sahrens goto out; 1583fa9e4066Sahrens } 1584fa9e4066Sahrens /* 1585fa9e4066Sahrens * Can't open a directory for writing. 1586fa9e4066Sahrens */ 1587fa9e4066Sahrens if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) { 1588be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 1589fa9e4066Sahrens goto out; 1590fa9e4066Sahrens } 1591fa9e4066Sahrens /* 1592fa9e4066Sahrens * Verify requested access to file. 1593fa9e4066Sahrens */ 1594da6c28aaSamw if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) { 1595fa9e4066Sahrens goto out; 1596fa9e4066Sahrens } 1597fa9e4066Sahrens 1598fa9e4066Sahrens mutex_enter(&dzp->z_lock); 1599fa9e4066Sahrens dzp->z_seq++; 1600fa9e4066Sahrens mutex_exit(&dzp->z_lock); 1601fa9e4066Sahrens 1602fa9e4066Sahrens /* 16035730cc9aSmaybee * Truncate regular files if requested. 1604fa9e4066Sahrens */ 16055730cc9aSmaybee if ((ZTOV(zp)->v_type == VREG) && 16065730cc9aSmaybee (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) { 1607cdb0ab79Smaybee /* we can't hold any locks when calling zfs_freesp() */ 16085730cc9aSmaybee zfs_dirent_unlock(dl); 1609cdb0ab79Smaybee dl = NULL; 1610cdb0ab79Smaybee error = zfs_freesp(zp, 0, 0, mode, TRUE); 1611df2381bfSpraks if (error == 0) { 1612da6c28aaSamw vnevent_create(ZTOV(zp), ct); 1613df2381bfSpraks } 1614fa9e4066Sahrens } 1615fa9e4066Sahrens } 1616fa9e4066Sahrens out: 1617fa9e4066Sahrens 1618fa9e4066Sahrens if (dl) 1619fa9e4066Sahrens zfs_dirent_unlock(dl); 1620fa9e4066Sahrens 1621fa9e4066Sahrens if (error) { 1622fa9e4066Sahrens if (zp) 1623fa9e4066Sahrens VN_RELE(ZTOV(zp)); 1624fa9e4066Sahrens } else { 1625fa9e4066Sahrens *vpp = ZTOV(zp); 1626d47621a4STim Haley error = specvp_check(vpp, cr); 1627fa9e4066Sahrens } 1628fa9e4066Sahrens 162955da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 16305002558fSNeil Perrin zil_commit(zilog, 0); 163155da60b9SMark J Musante 1632fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1633fa9e4066Sahrens return (error); 1634fa9e4066Sahrens } 1635fa9e4066Sahrens 1636fa9e4066Sahrens /* 1637fa9e4066Sahrens * Remove an entry from a directory. 1638fa9e4066Sahrens * 1639fa9e4066Sahrens * IN: dvp - vnode of directory to remove entry from. 1640fa9e4066Sahrens * name - name of entry to remove. 1641fa9e4066Sahrens * cr - credentials of caller. 1642da6c28aaSamw * ct - caller context 1643da6c28aaSamw * flags - case flags 1644fa9e4066Sahrens * 1645f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1646fa9e4066Sahrens * 1647fa9e4066Sahrens * Timestamps: 1648fa9e4066Sahrens * dvp - ctime|mtime 1649fa9e4066Sahrens * vp - ctime (if nlink > 0) 1650fa9e4066Sahrens */ 16510a586ceaSMark Shellenbaum 16520a586ceaSMark Shellenbaum uint64_t null_xattr = 0; 16530a586ceaSMark Shellenbaum 1654da6c28aaSamw /*ARGSUSED*/ 1655fa9e4066Sahrens static int 1656da6c28aaSamw zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct, 1657da6c28aaSamw int flags) 1658fa9e4066Sahrens { 1659fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 16600b2a8171SMark Shellenbaum znode_t *xzp; 1661fa9e4066Sahrens vnode_t *vp; 1662fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1663f18faf3fSek110237 zilog_t *zilog; 16640b2a8171SMark Shellenbaum uint64_t acl_obj, xattr_obj; 16650a586ceaSMark Shellenbaum uint64_t xattr_obj_unlinked = 0; 166651bd2f97SNeil Perrin uint64_t obj = 0; 1667fa9e4066Sahrens zfs_dirlock_t *dl; 1668fa9e4066Sahrens dmu_tx_t *tx; 1669893a6d32Sahrens boolean_t may_delete_now, delete_now = FALSE; 1670cdb0ab79Smaybee boolean_t unlinked, toobig = FALSE; 1671da6c28aaSamw uint64_t txtype; 1672da6c28aaSamw pathname_t *realnmp = NULL; 1673da6c28aaSamw pathname_t realnm; 1674fa9e4066Sahrens int error; 1675da6c28aaSamw int zflg = ZEXISTS; 167669962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1677fa9e4066Sahrens 16783cb34c60Sahrens ZFS_ENTER(zfsvfs); 16793cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1680f18faf3fSek110237 zilog = zfsvfs->z_log; 1681fa9e4066Sahrens 1682da6c28aaSamw if (flags & FIGNORECASE) { 1683da6c28aaSamw zflg |= ZCILOOK; 1684da6c28aaSamw pn_alloc(&realnm); 1685da6c28aaSamw realnmp = &realnm; 1686da6c28aaSamw } 1687da6c28aaSamw 1688fa9e4066Sahrens top: 16890b2a8171SMark Shellenbaum xattr_obj = 0; 16900b2a8171SMark Shellenbaum xzp = NULL; 1691fa9e4066Sahrens /* 1692fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 1693fa9e4066Sahrens */ 1694da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 1695da6c28aaSamw NULL, realnmp)) { 1696da6c28aaSamw if (realnmp) 1697da6c28aaSamw pn_free(realnmp); 1698fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1699fa9e4066Sahrens return (error); 1700fa9e4066Sahrens } 1701fa9e4066Sahrens 1702fa9e4066Sahrens vp = ZTOV(zp); 1703fa9e4066Sahrens 1704fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 1705fa9e4066Sahrens goto out; 1706fa9e4066Sahrens } 1707fa9e4066Sahrens 1708fa9e4066Sahrens /* 1709fa9e4066Sahrens * Need to use rmdir for removing directories. 1710fa9e4066Sahrens */ 1711fa9e4066Sahrens if (vp->v_type == VDIR) { 1712be6fd75aSMatthew Ahrens error = SET_ERROR(EPERM); 1713fa9e4066Sahrens goto out; 1714fa9e4066Sahrens } 1715fa9e4066Sahrens 1716da6c28aaSamw vnevent_remove(vp, dvp, name, ct); 1717fa9e4066Sahrens 1718da6c28aaSamw if (realnmp) 1719ab04eb8eStimh dnlc_remove(dvp, realnmp->pn_buf); 1720da6c28aaSamw else 1721033f9833Sek110237 dnlc_remove(dvp, name); 1722033f9833Sek110237 1723fa9e4066Sahrens mutex_enter(&vp->v_lock); 1724fa9e4066Sahrens may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp); 1725fa9e4066Sahrens mutex_exit(&vp->v_lock); 1726fa9e4066Sahrens 1727fa9e4066Sahrens /* 1728893a6d32Sahrens * We may delete the znode now, or we may put it in the unlinked set; 1729fa9e4066Sahrens * it depends on whether we're the last link, and on whether there are 1730fa9e4066Sahrens * other holds on the vnode. So we dmu_tx_hold() the right things to 1731fa9e4066Sahrens * allow for either case. 1732fa9e4066Sahrens */ 173351bd2f97SNeil Perrin obj = zp->z_id; 1734fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 1735ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 17360a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 17370a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 17380a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 1739cdb0ab79Smaybee if (may_delete_now) { 1740cdb0ab79Smaybee toobig = 17410a586ceaSMark Shellenbaum zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT; 1742cdb0ab79Smaybee /* if the file is too big, only hold_free a token amount */ 1743cdb0ab79Smaybee dmu_tx_hold_free(tx, zp->z_id, 0, 1744cdb0ab79Smaybee (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END)); 1745cdb0ab79Smaybee } 1746fa9e4066Sahrens 1747fa9e4066Sahrens /* are there any extended attributes? */ 17480a586ceaSMark Shellenbaum error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 17490a586ceaSMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 17500b2a8171SMark Shellenbaum if (error == 0 && xattr_obj) { 17510a586ceaSMark Shellenbaum error = zfs_zget(zfsvfs, xattr_obj, &xzp); 1752fb09f5aaSMadhav Suresh ASSERT0(error); 17530a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 17540a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE); 1755fa9e4066Sahrens } 1756fa9e4066Sahrens 17571412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 17581412a1a2SMark Shellenbaum if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now) 1759fa9e4066Sahrens dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); 17601412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1761fa9e4066Sahrens 1762fa9e4066Sahrens /* charge as an update -- would be nice not to charge at all */ 1763893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 1764fa9e4066Sahrens 17654bb73804SMatthew Ahrens /* 17666575bca0SSimon Klinkert * Mark this transaction as typically resulting in a net free of space 17674bb73804SMatthew Ahrens */ 17684bb73804SMatthew Ahrens dmu_tx_mark_netfree(tx); 17694bb73804SMatthew Ahrens 177069962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 1771fa9e4066Sahrens if (error) { 1772fa9e4066Sahrens zfs_dirent_unlock(dl); 1773fa9e4066Sahrens VN_RELE(vp); 17740b2a8171SMark Shellenbaum if (xzp) 17750b2a8171SMark Shellenbaum VN_RELE(ZTOV(xzp)); 17761209a471SNeil Perrin if (error == ERESTART) { 177769962b56SMatthew Ahrens waited = B_TRUE; 17788a2f1b91Sahrens dmu_tx_wait(tx); 17798a2f1b91Sahrens dmu_tx_abort(tx); 1780fa9e4066Sahrens goto top; 1781fa9e4066Sahrens } 1782da6c28aaSamw if (realnmp) 1783da6c28aaSamw pn_free(realnmp); 17848a2f1b91Sahrens dmu_tx_abort(tx); 1785fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1786fa9e4066Sahrens return (error); 1787fa9e4066Sahrens } 1788fa9e4066Sahrens 1789fa9e4066Sahrens /* 1790fa9e4066Sahrens * Remove the directory entry. 1791fa9e4066Sahrens */ 1792da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked); 1793fa9e4066Sahrens 1794fa9e4066Sahrens if (error) { 1795fa9e4066Sahrens dmu_tx_commit(tx); 1796fa9e4066Sahrens goto out; 1797fa9e4066Sahrens } 1798fa9e4066Sahrens 1799893a6d32Sahrens if (unlinked) { 18001412a1a2SMark Shellenbaum /* 18011412a1a2SMark Shellenbaum * Hold z_lock so that we can make sure that the ACL obj 18021412a1a2SMark Shellenbaum * hasn't changed. Could have been deleted due to 18031412a1a2SMark Shellenbaum * zfs_sa_upgrade(). 18041412a1a2SMark Shellenbaum */ 18051412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 1806fa9e4066Sahrens mutex_enter(&vp->v_lock); 18070a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 18080a586ceaSMark Shellenbaum &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); 1809cdb0ab79Smaybee delete_now = may_delete_now && !toobig && 1810fa9e4066Sahrens vp->v_count == 1 && !vn_has_cached_data(vp) && 18111412a1a2SMark Shellenbaum xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == 18120a586ceaSMark Shellenbaum acl_obj; 1813fa9e4066Sahrens mutex_exit(&vp->v_lock); 1814fa9e4066Sahrens } 1815fa9e4066Sahrens 18166db5d4ecSAndreas Jaekel txtype = TX_REMOVE; 18176db5d4ecSAndreas Jaekel if (flags & FIGNORECASE) 18186db5d4ecSAndreas Jaekel txtype |= TX_CI; 18196db5d4ecSAndreas Jaekel rw_enter(&rz_zev_rwlock, RW_READER); 18206db5d4ecSAndreas Jaekel if (rz_zev_callbacks && rz_zev_callbacks->rz_zev_znode_remove) 1821e206ace3SAndreas Jaekel rz_zev_callbacks->rz_zev_znode_remove(dzp, zp, tx, 1822e206ace3SAndreas Jaekel name, txtype); 18236db5d4ecSAndreas Jaekel rw_exit(&rz_zev_rwlock); 18246db5d4ecSAndreas Jaekel 1825fa9e4066Sahrens if (delete_now) { 18260a586ceaSMark Shellenbaum if (xattr_obj_unlinked) { 18270a586ceaSMark Shellenbaum ASSERT3U(xzp->z_links, ==, 2); 1828fa9e4066Sahrens mutex_enter(&xzp->z_lock); 1829893a6d32Sahrens xzp->z_unlinked = 1; 18300a586ceaSMark Shellenbaum xzp->z_links = 0; 18310a586ceaSMark Shellenbaum error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs), 18320a586ceaSMark Shellenbaum &xzp->z_links, sizeof (xzp->z_links), tx); 18330a586ceaSMark Shellenbaum ASSERT3U(error, ==, 0); 1834fa9e4066Sahrens mutex_exit(&xzp->z_lock); 1835893a6d32Sahrens zfs_unlinked_add(xzp, tx); 18361412a1a2SMark Shellenbaum 18370a586ceaSMark Shellenbaum if (zp->z_is_sa) 18380a586ceaSMark Shellenbaum error = sa_remove(zp->z_sa_hdl, 18390a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), tx); 18400a586ceaSMark Shellenbaum else 18410a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, 18420a586ceaSMark Shellenbaum SA_ZPL_XATTR(zfsvfs), &null_xattr, 18430a586ceaSMark Shellenbaum sizeof (uint64_t), tx); 1844fb09f5aaSMadhav Suresh ASSERT0(error); 1845fa9e4066Sahrens } 1846fa9e4066Sahrens mutex_enter(&vp->v_lock); 1847fa9e4066Sahrens vp->v_count--; 1848fb09f5aaSMadhav Suresh ASSERT0(vp->v_count); 1849fa9e4066Sahrens mutex_exit(&vp->v_lock); 1850fa9e4066Sahrens mutex_exit(&zp->z_lock); 1851fa9e4066Sahrens zfs_znode_delete(zp, tx); 1852893a6d32Sahrens } else if (unlinked) { 18531412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 1854893a6d32Sahrens zfs_unlinked_add(zp, tx); 1855fa9e4066Sahrens } 1856fa9e4066Sahrens 1857da6c28aaSamw txtype = TX_REMOVE; 1858da6c28aaSamw if (flags & FIGNORECASE) 1859da6c28aaSamw txtype |= TX_CI; 186051bd2f97SNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, obj); 1861fa9e4066Sahrens 1862fa9e4066Sahrens dmu_tx_commit(tx); 1863fa9e4066Sahrens out: 1864da6c28aaSamw if (realnmp) 1865da6c28aaSamw pn_free(realnmp); 1866da6c28aaSamw 1867fa9e4066Sahrens zfs_dirent_unlock(dl); 1868fa9e4066Sahrens 186906e0070dSMark Shellenbaum if (!delete_now) 1870fa9e4066Sahrens VN_RELE(vp); 187106e0070dSMark Shellenbaum if (xzp) 1872fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 1873fa9e4066Sahrens 187455da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 18755002558fSNeil Perrin zil_commit(zilog, 0); 187655da60b9SMark J Musante 1877fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1878fa9e4066Sahrens return (error); 1879fa9e4066Sahrens } 1880fa9e4066Sahrens 1881fa9e4066Sahrens /* 1882fa9e4066Sahrens * Create a new directory and insert it into dvp using the name 1883fa9e4066Sahrens * provided. Return a pointer to the inserted directory. 1884fa9e4066Sahrens * 1885fa9e4066Sahrens * IN: dvp - vnode of directory to add subdir to. 1886fa9e4066Sahrens * dirname - name of new directory. 1887fa9e4066Sahrens * vap - attributes of new directory. 1888fa9e4066Sahrens * cr - credentials of caller. 1889da6c28aaSamw * ct - caller context 1890f7170741SWill Andrews * flags - case flags 1891da6c28aaSamw * vsecp - ACL to be set 1892fa9e4066Sahrens * 1893fa9e4066Sahrens * OUT: vpp - vnode of created directory. 1894fa9e4066Sahrens * 1895f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 1896fa9e4066Sahrens * 1897fa9e4066Sahrens * Timestamps: 1898fa9e4066Sahrens * dvp - ctime|mtime updated 1899fa9e4066Sahrens * vp - ctime|mtime|atime updated 1900fa9e4066Sahrens */ 1901da6c28aaSamw /*ARGSUSED*/ 1902fa9e4066Sahrens static int 1903da6c28aaSamw zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr, 1904da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 1905fa9e4066Sahrens { 1906fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 1907fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 1908f18faf3fSek110237 zilog_t *zilog; 1909fa9e4066Sahrens zfs_dirlock_t *dl; 1910da6c28aaSamw uint64_t txtype; 1911fa9e4066Sahrens dmu_tx_t *tx; 1912fa9e4066Sahrens int error; 1913da6c28aaSamw int zf = ZNEW; 1914c1ce5987SMark Shellenbaum ksid_t *ksid; 1915c1ce5987SMark Shellenbaum uid_t uid; 1916c1ce5987SMark Shellenbaum gid_t gid = crgetgid(cr); 191789459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 191889459e17SMark Shellenbaum boolean_t fuid_dirtied; 191969962b56SMatthew Ahrens boolean_t waited = B_FALSE; 1920fa9e4066Sahrens 1921fa9e4066Sahrens ASSERT(vap->va_type == VDIR); 1922fa9e4066Sahrens 1923da6c28aaSamw /* 1924da6c28aaSamw * If we have an ephemeral id, ACL, or XVATTR then 1925da6c28aaSamw * make sure file system is at proper version 1926da6c28aaSamw */ 1927da6c28aaSamw 1928c1ce5987SMark Shellenbaum ksid = crgetsid(cr, KSID_OWNER); 1929c1ce5987SMark Shellenbaum if (ksid) 1930c1ce5987SMark Shellenbaum uid = ksid_getid(ksid); 1931c1ce5987SMark Shellenbaum else 1932c1ce5987SMark Shellenbaum uid = crgetuid(cr); 1933da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 1934c1ce5987SMark Shellenbaum (vsecp || (vap->va_mask & AT_XVATTR) || 1935756962ecSMark Shellenbaum IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid))) 1936be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1937da6c28aaSamw 19383cb34c60Sahrens ZFS_ENTER(zfsvfs); 19393cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 1940f18faf3fSek110237 zilog = zfsvfs->z_log; 1941fa9e4066Sahrens 19420a586ceaSMark Shellenbaum if (dzp->z_pflags & ZFS_XATTR) { 1943fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1944be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 1945fa9e4066Sahrens } 1946fa9e4066Sahrens 1947de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(dirname, 1948da6c28aaSamw strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 1949da6c28aaSamw ZFS_EXIT(zfsvfs); 1950be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 1951da6c28aaSamw } 1952da6c28aaSamw if (flags & FIGNORECASE) 1953da6c28aaSamw zf |= ZCILOOK; 1954da6c28aaSamw 1955c8c24165SMark Shellenbaum if (vap->va_mask & AT_XVATTR) { 1956da6c28aaSamw if ((error = secpolicy_xvattr((xvattr_t *)vap, 1957da6c28aaSamw crgetuid(cr), cr, vap->va_type)) != 0) { 1958fa9e4066Sahrens ZFS_EXIT(zfsvfs); 1959fa9e4066Sahrens return (error); 1960fa9e4066Sahrens } 1961c8c24165SMark Shellenbaum } 1962fa9e4066Sahrens 1963c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, vap, cr, 1964c8c24165SMark Shellenbaum vsecp, &acl_ids)) != 0) { 1965c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 1966c8c24165SMark Shellenbaum return (error); 1967c8c24165SMark Shellenbaum } 1968da6c28aaSamw /* 1969da6c28aaSamw * First make sure the new directory doesn't exist. 1970c8c24165SMark Shellenbaum * 1971c8c24165SMark Shellenbaum * Existence is checked first to make sure we don't return 1972c8c24165SMark Shellenbaum * EACCES instead of EEXIST which can cause some applications 1973c8c24165SMark Shellenbaum * to fail. 1974da6c28aaSamw */ 1975da6c28aaSamw top: 1976da6c28aaSamw *vpp = NULL; 1977da6c28aaSamw 1978da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf, 1979da6c28aaSamw NULL, NULL)) { 1980c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1981da6c28aaSamw ZFS_EXIT(zfsvfs); 1982da6c28aaSamw return (error); 1983da6c28aaSamw } 1984da6c28aaSamw 1985da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) { 1986c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 1987d2443e76Smarks zfs_dirent_unlock(dl); 1988d2443e76Smarks ZFS_EXIT(zfsvfs); 1989d2443e76Smarks return (error); 1990d2443e76Smarks } 1991d2443e76Smarks 199214843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 19934929fd5eSTim Haley zfs_acl_ids_free(&acl_ids); 199414843421SMatthew Ahrens zfs_dirent_unlock(dl); 199514843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 1996be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 199714843421SMatthew Ahrens } 199889459e17SMark Shellenbaum 1999fa9e4066Sahrens /* 2000fa9e4066Sahrens * Add a new entry to the directory. 2001fa9e4066Sahrens */ 2002fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2003ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); 2004ea8dc4b6Seschrock dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 200589459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 200614843421SMatthew Ahrens if (fuid_dirtied) 200714843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 20080a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 20090a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 20100a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 20110a586ceaSMark Shellenbaum } 20120a586ceaSMark Shellenbaum 20130a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 20140a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE); 20150a586ceaSMark Shellenbaum 201669962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2017fa9e4066Sahrens if (error) { 2018fa9e4066Sahrens zfs_dirent_unlock(dl); 20191209a471SNeil Perrin if (error == ERESTART) { 202069962b56SMatthew Ahrens waited = B_TRUE; 20218a2f1b91Sahrens dmu_tx_wait(tx); 20228a2f1b91Sahrens dmu_tx_abort(tx); 2023fa9e4066Sahrens goto top; 2024fa9e4066Sahrens } 2025c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20268a2f1b91Sahrens dmu_tx_abort(tx); 2027fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2028fa9e4066Sahrens return (error); 2029fa9e4066Sahrens } 2030fa9e4066Sahrens 2031fa9e4066Sahrens /* 2032fa9e4066Sahrens * Create new node. 2033fa9e4066Sahrens */ 20340a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 2035da6c28aaSamw 203689459e17SMark Shellenbaum if (fuid_dirtied) 203789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 20380a586ceaSMark Shellenbaum 2039fa9e4066Sahrens /* 2040fa9e4066Sahrens * Now put new name in parent dir. 2041fa9e4066Sahrens */ 2042fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 2043fa9e4066Sahrens 2044fa9e4066Sahrens *vpp = ZTOV(zp); 2045fa9e4066Sahrens 2046da6c28aaSamw txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap); 2047da6c28aaSamw if (flags & FIGNORECASE) 2048da6c28aaSamw txtype |= TX_CI; 204989459e17SMark Shellenbaum zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp, 205089459e17SMark Shellenbaum acl_ids.z_fuidp, vap); 2051da6c28aaSamw 205289459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 20530a586ceaSMark Shellenbaum 2054fa9e4066Sahrens dmu_tx_commit(tx); 2055fa9e4066Sahrens 2056fa9e4066Sahrens zfs_dirent_unlock(dl); 2057fa9e4066Sahrens 205855da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 20595002558fSNeil Perrin zil_commit(zilog, 0); 206055da60b9SMark J Musante 2061fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2062fa9e4066Sahrens return (0); 2063fa9e4066Sahrens } 2064fa9e4066Sahrens 2065fa9e4066Sahrens /* 2066fa9e4066Sahrens * Remove a directory subdir entry. If the current working 2067fa9e4066Sahrens * directory is the same as the subdir to be removed, the 2068fa9e4066Sahrens * remove will fail. 2069fa9e4066Sahrens * 2070fa9e4066Sahrens * IN: dvp - vnode of directory to remove from. 2071fa9e4066Sahrens * name - name of directory to be removed. 2072fa9e4066Sahrens * cwd - vnode of current working directory. 2073fa9e4066Sahrens * cr - credentials of caller. 2074da6c28aaSamw * ct - caller context 2075da6c28aaSamw * flags - case flags 2076fa9e4066Sahrens * 2077f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2078fa9e4066Sahrens * 2079fa9e4066Sahrens * Timestamps: 2080fa9e4066Sahrens * dvp - ctime|mtime updated 2081fa9e4066Sahrens */ 2082da6c28aaSamw /*ARGSUSED*/ 2083fa9e4066Sahrens static int 2084da6c28aaSamw zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr, 2085da6c28aaSamw caller_context_t *ct, int flags) 2086fa9e4066Sahrens { 2087fa9e4066Sahrens znode_t *dzp = VTOZ(dvp); 2088fa9e4066Sahrens znode_t *zp; 2089fa9e4066Sahrens vnode_t *vp; 2090fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 2091f18faf3fSek110237 zilog_t *zilog; 2092fa9e4066Sahrens zfs_dirlock_t *dl; 2093fa9e4066Sahrens dmu_tx_t *tx; 2094fa9e4066Sahrens int error; 2095da6c28aaSamw int zflg = ZEXISTS; 209669962b56SMatthew Ahrens boolean_t waited = B_FALSE; 2097fa9e4066Sahrens 20983cb34c60Sahrens ZFS_ENTER(zfsvfs); 20993cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 2100f18faf3fSek110237 zilog = zfsvfs->z_log; 2101fa9e4066Sahrens 2102da6c28aaSamw if (flags & FIGNORECASE) 2103da6c28aaSamw zflg |= ZCILOOK; 2104fa9e4066Sahrens top: 2105fa9e4066Sahrens zp = NULL; 2106fa9e4066Sahrens 2107fa9e4066Sahrens /* 2108fa9e4066Sahrens * Attempt to lock directory; fail if entry doesn't exist. 2109fa9e4066Sahrens */ 2110da6c28aaSamw if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, 2111da6c28aaSamw NULL, NULL)) { 2112fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2113fa9e4066Sahrens return (error); 2114fa9e4066Sahrens } 2115fa9e4066Sahrens 2116fa9e4066Sahrens vp = ZTOV(zp); 2117fa9e4066Sahrens 2118fa9e4066Sahrens if (error = zfs_zaccess_delete(dzp, zp, cr)) { 2119fa9e4066Sahrens goto out; 2120fa9e4066Sahrens } 2121fa9e4066Sahrens 2122fa9e4066Sahrens if (vp->v_type != VDIR) { 2123be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 2124fa9e4066Sahrens goto out; 2125fa9e4066Sahrens } 2126fa9e4066Sahrens 2127fa9e4066Sahrens if (vp == cwd) { 2128be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2129fa9e4066Sahrens goto out; 2130fa9e4066Sahrens } 2131fa9e4066Sahrens 2132da6c28aaSamw vnevent_rmdir(vp, dvp, name, ct); 2133fa9e4066Sahrens 2134fa9e4066Sahrens /* 2135af2c4821Smaybee * Grab a lock on the directory to make sure that noone is 2136af2c4821Smaybee * trying to add (or lookup) entries while we are removing it. 2137af2c4821Smaybee */ 2138af2c4821Smaybee rw_enter(&zp->z_name_lock, RW_WRITER); 2139af2c4821Smaybee 2140af2c4821Smaybee /* 2141af2c4821Smaybee * Grab a lock on the parent pointer to make sure we play well 2142fa9e4066Sahrens * with the treewalk and directory rename code. 2143fa9e4066Sahrens */ 2144fa9e4066Sahrens rw_enter(&zp->z_parent_lock, RW_WRITER); 2145fa9e4066Sahrens 2146fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 2147ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name); 21480a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 2149893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 21500a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 21510a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 2152*572b6878SSimon Klinkert dmu_tx_mark_netfree(tx); 215369962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 2154fa9e4066Sahrens if (error) { 2155fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2156af2c4821Smaybee rw_exit(&zp->z_name_lock); 2157fa9e4066Sahrens zfs_dirent_unlock(dl); 2158fa9e4066Sahrens VN_RELE(vp); 21591209a471SNeil Perrin if (error == ERESTART) { 216069962b56SMatthew Ahrens waited = B_TRUE; 21618a2f1b91Sahrens dmu_tx_wait(tx); 21628a2f1b91Sahrens dmu_tx_abort(tx); 2163fa9e4066Sahrens goto top; 2164fa9e4066Sahrens } 21658a2f1b91Sahrens dmu_tx_abort(tx); 2166fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2167fa9e4066Sahrens return (error); 2168fa9e4066Sahrens } 2169fa9e4066Sahrens 2170da6c28aaSamw error = zfs_link_destroy(dl, zp, tx, zflg, NULL); 2171fa9e4066Sahrens 2172da6c28aaSamw if (error == 0) { 2173da6c28aaSamw uint64_t txtype = TX_RMDIR; 2174da6c28aaSamw if (flags & FIGNORECASE) 2175da6c28aaSamw txtype |= TX_CI; 21766db5d4ecSAndreas Jaekel 21776db5d4ecSAndreas Jaekel rw_enter(&rz_zev_rwlock, RW_READER); 21786db5d4ecSAndreas Jaekel if (rz_zev_callbacks && rz_zev_callbacks->rz_zev_znode_remove) 2179e206ace3SAndreas Jaekel rz_zev_callbacks->rz_zev_znode_remove(dzp, zp, tx, 21806db5d4ecSAndreas Jaekel name, txtype); 21816db5d4ecSAndreas Jaekel rw_exit(&rz_zev_rwlock); 21826db5d4ecSAndreas Jaekel 21835002558fSNeil Perrin zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT); 2184da6c28aaSamw } 2185fa9e4066Sahrens 2186fa9e4066Sahrens dmu_tx_commit(tx); 2187fa9e4066Sahrens 2188fa9e4066Sahrens rw_exit(&zp->z_parent_lock); 2189af2c4821Smaybee rw_exit(&zp->z_name_lock); 2190fa9e4066Sahrens out: 2191fa9e4066Sahrens zfs_dirent_unlock(dl); 2192fa9e4066Sahrens 2193fa9e4066Sahrens VN_RELE(vp); 2194fa9e4066Sahrens 219555da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 21965002558fSNeil Perrin zil_commit(zilog, 0); 219755da60b9SMark J Musante 2198fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2199fa9e4066Sahrens return (error); 2200fa9e4066Sahrens } 2201fa9e4066Sahrens 2202fa9e4066Sahrens /* 2203fa9e4066Sahrens * Read as many directory entries as will fit into the provided 2204fa9e4066Sahrens * buffer from the given directory cursor position (specified in 2205f7170741SWill Andrews * the uio structure). 2206fa9e4066Sahrens * 2207fa9e4066Sahrens * IN: vp - vnode of directory to read. 2208fa9e4066Sahrens * uio - structure supplying read location, range info, 2209fa9e4066Sahrens * and return buffer. 2210fa9e4066Sahrens * cr - credentials of caller. 2211da6c28aaSamw * ct - caller context 2212da6c28aaSamw * flags - case flags 2213fa9e4066Sahrens * 2214fa9e4066Sahrens * OUT: uio - updated offset and range, buffer filled. 2215fa9e4066Sahrens * eofp - set to true if end-of-file detected. 2216fa9e4066Sahrens * 2217f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2218fa9e4066Sahrens * 2219fa9e4066Sahrens * Timestamps: 2220fa9e4066Sahrens * vp - atime updated 2221fa9e4066Sahrens * 2222fa9e4066Sahrens * Note that the low 4 bits of the cookie returned by zap is always zero. 2223fa9e4066Sahrens * This allows us to use the low range for "special" directory entries: 2224fa9e4066Sahrens * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, 2225fa9e4066Sahrens * we use the offset 2 for the '.zfs' directory. 2226fa9e4066Sahrens */ 2227fa9e4066Sahrens /* ARGSUSED */ 2228fa9e4066Sahrens static int 2229da6c28aaSamw zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp, 2230da6c28aaSamw caller_context_t *ct, int flags) 2231fa9e4066Sahrens { 2232fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2233fa9e4066Sahrens iovec_t *iovp; 2234da6c28aaSamw edirent_t *eodp; 2235fa9e4066Sahrens dirent64_t *odp; 2236fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 22377f6e3e7dSperrin objset_t *os; 2238fa9e4066Sahrens caddr_t outbuf; 2239fa9e4066Sahrens size_t bufsize; 2240fa9e4066Sahrens zap_cursor_t zc; 2241fa9e4066Sahrens zap_attribute_t zap; 2242fa9e4066Sahrens uint_t bytes_wanted; 2243fa9e4066Sahrens uint64_t offset; /* must be unsigned; checks for < 1 */ 22440a586ceaSMark Shellenbaum uint64_t parent; 2245fa9e4066Sahrens int local_eof; 22467f6e3e7dSperrin int outcount; 22477f6e3e7dSperrin int error; 22487f6e3e7dSperrin uint8_t prefetch; 2249b38f0970Sck153898 boolean_t check_sysattrs; 2250fa9e4066Sahrens 22513cb34c60Sahrens ZFS_ENTER(zfsvfs); 22523cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2253fa9e4066Sahrens 22540a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 22550a586ceaSMark Shellenbaum &parent, sizeof (parent))) != 0) { 22560a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 22570a586ceaSMark Shellenbaum return (error); 22580a586ceaSMark Shellenbaum } 22590a586ceaSMark Shellenbaum 2260fa9e4066Sahrens /* 2261fa9e4066Sahrens * If we are not given an eof variable, 2262fa9e4066Sahrens * use a local one. 2263fa9e4066Sahrens */ 2264fa9e4066Sahrens if (eofp == NULL) 2265fa9e4066Sahrens eofp = &local_eof; 2266fa9e4066Sahrens 2267fa9e4066Sahrens /* 2268fa9e4066Sahrens * Check for valid iov_len. 2269fa9e4066Sahrens */ 2270fa9e4066Sahrens if (uio->uio_iov->iov_len <= 0) { 2271fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2272be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2273fa9e4066Sahrens } 2274fa9e4066Sahrens 2275fa9e4066Sahrens /* 2276fa9e4066Sahrens * Quit if directory has been removed (posix) 2277fa9e4066Sahrens */ 2278893a6d32Sahrens if ((*eofp = zp->z_unlinked) != 0) { 2279fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2280fa9e4066Sahrens return (0); 2281fa9e4066Sahrens } 2282fa9e4066Sahrens 22837f6e3e7dSperrin error = 0; 22847f6e3e7dSperrin os = zfsvfs->z_os; 22857f6e3e7dSperrin offset = uio->uio_loffset; 22867f6e3e7dSperrin prefetch = zp->z_zn_prefetch; 22877f6e3e7dSperrin 2288fa9e4066Sahrens /* 2289fa9e4066Sahrens * Initialize the iterator cursor. 2290fa9e4066Sahrens */ 2291fa9e4066Sahrens if (offset <= 3) { 2292fa9e4066Sahrens /* 2293fa9e4066Sahrens * Start iteration from the beginning of the directory. 2294fa9e4066Sahrens */ 22957f6e3e7dSperrin zap_cursor_init(&zc, os, zp->z_id); 2296fa9e4066Sahrens } else { 2297fa9e4066Sahrens /* 2298fa9e4066Sahrens * The offset is a serialized cursor. 2299fa9e4066Sahrens */ 23007f6e3e7dSperrin zap_cursor_init_serialized(&zc, os, zp->z_id, offset); 2301fa9e4066Sahrens } 2302fa9e4066Sahrens 2303fa9e4066Sahrens /* 2304fa9e4066Sahrens * Get space to change directory entries into fs independent format. 2305fa9e4066Sahrens */ 2306fa9e4066Sahrens iovp = uio->uio_iov; 2307fa9e4066Sahrens bytes_wanted = iovp->iov_len; 2308fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) { 2309fa9e4066Sahrens bufsize = bytes_wanted; 2310fa9e4066Sahrens outbuf = kmem_alloc(bufsize, KM_SLEEP); 2311fa9e4066Sahrens odp = (struct dirent64 *)outbuf; 2312fa9e4066Sahrens } else { 2313fa9e4066Sahrens bufsize = bytes_wanted; 2314d5285caeSGeorge Wilson outbuf = NULL; 2315fa9e4066Sahrens odp = (struct dirent64 *)iovp->iov_base; 2316fa9e4066Sahrens } 2317da6c28aaSamw eodp = (struct edirent *)odp; 2318fa9e4066Sahrens 2319fa9e4066Sahrens /* 23209660e5cbSJanice Chang * If this VFS supports the system attribute view interface; and 23219660e5cbSJanice Chang * we're looking at an extended attribute directory; and we care 23229660e5cbSJanice Chang * about normalization conflicts on this vfs; then we must check 23239660e5cbSJanice Chang * for normalization conflicts with the sysattr name space. 2324b38f0970Sck153898 */ 23259660e5cbSJanice Chang check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2326b38f0970Sck153898 (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm && 2327b38f0970Sck153898 (flags & V_RDDIR_ENTFLAGS); 2328b38f0970Sck153898 2329b38f0970Sck153898 /* 2330fa9e4066Sahrens * Transform to file-system independent format 2331fa9e4066Sahrens */ 2332fa9e4066Sahrens outcount = 0; 2333fa9e4066Sahrens while (outcount < bytes_wanted) { 2334b1b8ab34Slling ino64_t objnum; 2335b1b8ab34Slling ushort_t reclen; 233697f85387STim Haley off64_t *next = NULL; 2337b1b8ab34Slling 2338fa9e4066Sahrens /* 2339fa9e4066Sahrens * Special case `.', `..', and `.zfs'. 2340fa9e4066Sahrens */ 2341fa9e4066Sahrens if (offset == 0) { 2342fa9e4066Sahrens (void) strcpy(zap.za_name, "."); 2343da6c28aaSamw zap.za_normalization_conflict = 0; 2344b1b8ab34Slling objnum = zp->z_id; 2345fa9e4066Sahrens } else if (offset == 1) { 2346fa9e4066Sahrens (void) strcpy(zap.za_name, ".."); 2347da6c28aaSamw zap.za_normalization_conflict = 0; 23480a586ceaSMark Shellenbaum objnum = parent; 2349fa9e4066Sahrens } else if (offset == 2 && zfs_show_ctldir(zp)) { 2350fa9e4066Sahrens (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME); 2351da6c28aaSamw zap.za_normalization_conflict = 0; 2352b1b8ab34Slling objnum = ZFSCTL_INO_ROOT; 2353fa9e4066Sahrens } else { 2354fa9e4066Sahrens /* 2355fa9e4066Sahrens * Grab next entry. 2356fa9e4066Sahrens */ 2357fa9e4066Sahrens if (error = zap_cursor_retrieve(&zc, &zap)) { 2358fa9e4066Sahrens if ((*eofp = (error == ENOENT)) != 0) 2359fa9e4066Sahrens break; 2360fa9e4066Sahrens else 2361fa9e4066Sahrens goto update; 2362fa9e4066Sahrens } 2363fa9e4066Sahrens 2364fa9e4066Sahrens if (zap.za_integer_length != 8 || 2365fa9e4066Sahrens zap.za_num_integers != 1) { 2366fa9e4066Sahrens cmn_err(CE_WARN, "zap_readdir: bad directory " 2367fa9e4066Sahrens "entry, obj = %lld, offset = %lld\n", 2368fa9e4066Sahrens (u_longlong_t)zp->z_id, 2369fa9e4066Sahrens (u_longlong_t)offset); 2370be6fd75aSMatthew Ahrens error = SET_ERROR(ENXIO); 2371fa9e4066Sahrens goto update; 2372fa9e4066Sahrens } 2373b1b8ab34Slling 2374b1b8ab34Slling objnum = ZFS_DIRENT_OBJ(zap.za_first_integer); 2375b1b8ab34Slling /* 2376b1b8ab34Slling * MacOS X can extract the object type here such as: 2377b1b8ab34Slling * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer); 2378b1b8ab34Slling */ 2379b38f0970Sck153898 2380b38f0970Sck153898 if (check_sysattrs && !zap.za_normalization_conflict) { 2381b38f0970Sck153898 zap.za_normalization_conflict = 2382b38f0970Sck153898 xattr_sysattr_casechk(zap.za_name); 2383b38f0970Sck153898 } 2384fa9e4066Sahrens } 2385da6c28aaSamw 2386e802abbdSTim Haley if (flags & V_RDDIR_ACCFILTER) { 2387e802abbdSTim Haley /* 2388e802abbdSTim Haley * If we have no access at all, don't include 2389e802abbdSTim Haley * this entry in the returned information 2390e802abbdSTim Haley */ 2391e802abbdSTim Haley znode_t *ezp; 2392e802abbdSTim Haley if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0) 2393e802abbdSTim Haley goto skip_entry; 2394e802abbdSTim Haley if (!zfs_has_access(ezp, cr)) { 2395e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2396e802abbdSTim Haley goto skip_entry; 2397e802abbdSTim Haley } 2398e802abbdSTim Haley VN_RELE(ZTOV(ezp)); 2399e802abbdSTim Haley } 2400e802abbdSTim Haley 2401da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) 2402da6c28aaSamw reclen = EDIRENT_RECLEN(strlen(zap.za_name)); 2403da6c28aaSamw else 2404b1b8ab34Slling reclen = DIRENT64_RECLEN(strlen(zap.za_name)); 2405fa9e4066Sahrens 2406fa9e4066Sahrens /* 2407fa9e4066Sahrens * Will this entry fit in the buffer? 2408fa9e4066Sahrens */ 2409b1b8ab34Slling if (outcount + reclen > bufsize) { 2410fa9e4066Sahrens /* 2411fa9e4066Sahrens * Did we manage to fit anything in the buffer? 2412fa9e4066Sahrens */ 2413fa9e4066Sahrens if (!outcount) { 2414be6fd75aSMatthew Ahrens error = SET_ERROR(EINVAL); 2415fa9e4066Sahrens goto update; 2416fa9e4066Sahrens } 2417fa9e4066Sahrens break; 2418fa9e4066Sahrens } 2419da6c28aaSamw if (flags & V_RDDIR_ENTFLAGS) { 2420fa9e4066Sahrens /* 2421da6c28aaSamw * Add extended flag entry: 2422da6c28aaSamw */ 2423da6c28aaSamw eodp->ed_ino = objnum; 2424da6c28aaSamw eodp->ed_reclen = reclen; 2425da6c28aaSamw /* NOTE: ed_off is the offset for the *next* entry */ 2426da6c28aaSamw next = &(eodp->ed_off); 2427da6c28aaSamw eodp->ed_eflags = zap.za_normalization_conflict ? 2428da6c28aaSamw ED_CASE_CONFLICT : 0; 2429da6c28aaSamw (void) strncpy(eodp->ed_name, zap.za_name, 2430da6c28aaSamw EDIRENT_NAMELEN(reclen)); 2431da6c28aaSamw eodp = (edirent_t *)((intptr_t)eodp + reclen); 2432da6c28aaSamw } else { 2433da6c28aaSamw /* 2434da6c28aaSamw * Add normal entry: 2435fa9e4066Sahrens */ 2436b1b8ab34Slling odp->d_ino = objnum; 2437b1b8ab34Slling odp->d_reclen = reclen; 2438fa9e4066Sahrens /* NOTE: d_off is the offset for the *next* entry */ 2439fa9e4066Sahrens next = &(odp->d_off); 2440fa9e4066Sahrens (void) strncpy(odp->d_name, zap.za_name, 2441b1b8ab34Slling DIRENT64_NAMELEN(reclen)); 2442b1b8ab34Slling odp = (dirent64_t *)((intptr_t)odp + reclen); 2443da6c28aaSamw } 2444da6c28aaSamw outcount += reclen; 2445fa9e4066Sahrens 2446fa9e4066Sahrens ASSERT(outcount <= bufsize); 2447fa9e4066Sahrens 2448fa9e4066Sahrens /* Prefetch znode */ 24497f6e3e7dSperrin if (prefetch) 2450a2cdcdd2SPaul Dagnelie dmu_prefetch(os, objnum, 0, 0, 0, 2451a2cdcdd2SPaul Dagnelie ZIO_PRIORITY_SYNC_READ); 2452fa9e4066Sahrens 2453e802abbdSTim Haley skip_entry: 2454fa9e4066Sahrens /* 2455fa9e4066Sahrens * Move to the next entry, fill in the previous offset. 2456fa9e4066Sahrens */ 2457fa9e4066Sahrens if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) { 2458fa9e4066Sahrens zap_cursor_advance(&zc); 2459fa9e4066Sahrens offset = zap_cursor_serialize(&zc); 2460fa9e4066Sahrens } else { 2461fa9e4066Sahrens offset += 1; 2462fa9e4066Sahrens } 246397f85387STim Haley if (next) 2464fa9e4066Sahrens *next = offset; 2465fa9e4066Sahrens } 24667f6e3e7dSperrin zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */ 2467fa9e4066Sahrens 2468fa9e4066Sahrens if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) { 2469fa9e4066Sahrens iovp->iov_base += outcount; 2470fa9e4066Sahrens iovp->iov_len -= outcount; 2471fa9e4066Sahrens uio->uio_resid -= outcount; 2472fa9e4066Sahrens } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) { 2473fa9e4066Sahrens /* 2474fa9e4066Sahrens * Reset the pointer. 2475fa9e4066Sahrens */ 2476fa9e4066Sahrens offset = uio->uio_loffset; 2477fa9e4066Sahrens } 2478fa9e4066Sahrens 2479fa9e4066Sahrens update: 248087e5029aSahrens zap_cursor_fini(&zc); 2481fa9e4066Sahrens if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) 2482fa9e4066Sahrens kmem_free(outbuf, bufsize); 2483fa9e4066Sahrens 2484fa9e4066Sahrens if (error == ENOENT) 2485fa9e4066Sahrens error = 0; 2486fa9e4066Sahrens 2487fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 2488fa9e4066Sahrens 2489fa9e4066Sahrens uio->uio_loffset = offset; 2490fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2491fa9e4066Sahrens return (error); 2492fa9e4066Sahrens } 2493fa9e4066Sahrens 2494ec533521Sfr157268 ulong_t zfs_fsync_sync_cnt = 4; 2495ec533521Sfr157268 2496fa9e4066Sahrens static int 2497da6c28aaSamw zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 2498fa9e4066Sahrens { 2499fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2500fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2501fa9e4066Sahrens 2502b468a217Seschrock /* 2503b468a217Seschrock * Regardless of whether this is required for standards conformance, 2504b468a217Seschrock * this is the logical behavior when fsync() is called on a file with 2505b468a217Seschrock * dirty pages. We use B_ASYNC since the ZIL transactions are already 2506b468a217Seschrock * going to be pushed out as part of the zil_commit(). 2507b468a217Seschrock */ 2508b468a217Seschrock if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) && 2509b468a217Seschrock (vp->v_type == VREG) && !(IS_SWAPVP(vp))) 2510da6c28aaSamw (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct); 2511b468a217Seschrock 2512ec533521Sfr157268 (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt); 2513ec533521Sfr157268 251455da60b9SMark J Musante if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) { 25153cb34c60Sahrens ZFS_ENTER(zfsvfs); 25163cb34c60Sahrens ZFS_VERIFY_ZP(zp); 25175002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 2518fa9e4066Sahrens ZFS_EXIT(zfsvfs); 251955da60b9SMark J Musante } 2520fa9e4066Sahrens return (0); 2521fa9e4066Sahrens } 2522fa9e4066Sahrens 2523da6c28aaSamw 2524fa9e4066Sahrens /* 2525fa9e4066Sahrens * Get the requested file attributes and place them in the provided 2526fa9e4066Sahrens * vattr structure. 2527fa9e4066Sahrens * 2528fa9e4066Sahrens * IN: vp - vnode of file. 2529fa9e4066Sahrens * vap - va_mask identifies requested attributes. 2530da6c28aaSamw * If AT_XVATTR set, then optional attrs are requested 2531da6c28aaSamw * flags - ATTR_NOACLCHECK (CIFS server context) 2532fa9e4066Sahrens * cr - credentials of caller. 2533da6c28aaSamw * ct - caller context 2534fa9e4066Sahrens * 2535fa9e4066Sahrens * OUT: vap - attribute values. 2536fa9e4066Sahrens * 2537f7170741SWill Andrews * RETURN: 0 (always succeeds). 2538fa9e4066Sahrens */ 2539fa9e4066Sahrens /* ARGSUSED */ 2540fa9e4066Sahrens static int 2541da6c28aaSamw zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2542da6c28aaSamw caller_context_t *ct) 2543fa9e4066Sahrens { 2544fa9e4066Sahrens znode_t *zp = VTOZ(vp); 2545fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2546da6c28aaSamw int error = 0; 2547ecd6cf80Smarks uint64_t links; 25480a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2549da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2550da6c28aaSamw xoptattr_t *xoap = NULL; 2551da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 25520a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[2]; 25530a586ceaSMark Shellenbaum int count = 0; 2554fa9e4066Sahrens 25553cb34c60Sahrens ZFS_ENTER(zfsvfs); 25563cb34c60Sahrens ZFS_VERIFY_ZP(zp); 25570a586ceaSMark Shellenbaum 2558f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid); 2559f1696b23SMark Shellenbaum 25600a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 25610a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 25620a586ceaSMark Shellenbaum 25630a586ceaSMark Shellenbaum if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) { 25640a586ceaSMark Shellenbaum ZFS_EXIT(zfsvfs); 25650a586ceaSMark Shellenbaum return (error); 25660a586ceaSMark Shellenbaum } 2567fa9e4066Sahrens 2568da6c28aaSamw /* 2569da6c28aaSamw * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES. 2570da6c28aaSamw * Also, if we are the owner don't bother, since owner should 2571da6c28aaSamw * always be allowed to read basic attributes of file. 2572da6c28aaSamw */ 2573f1696b23SMark Shellenbaum if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) && 2574f1696b23SMark Shellenbaum (vap->va_uid != crgetuid(cr))) { 2575da6c28aaSamw if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0, 2576da6c28aaSamw skipaclchk, cr)) { 2577da6c28aaSamw ZFS_EXIT(zfsvfs); 2578da6c28aaSamw return (error); 2579da6c28aaSamw } 2580da6c28aaSamw } 2581da6c28aaSamw 2582fa9e4066Sahrens /* 2583fa9e4066Sahrens * Return all attributes. It's cheaper to provide the answer 2584fa9e4066Sahrens * than to determine whether we were asked the question. 2585fa9e4066Sahrens */ 2586fa9e4066Sahrens 258734f345efSRay Hassan mutex_enter(&zp->z_lock); 2588fa9e4066Sahrens vap->va_type = vp->v_type; 25890a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode & MODEMASK; 2590fa9e4066Sahrens vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev; 2591fa9e4066Sahrens vap->va_nodeid = zp->z_id; 2592ecd6cf80Smarks if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp)) 25930a586ceaSMark Shellenbaum links = zp->z_links + 1; 2594ecd6cf80Smarks else 25950a586ceaSMark Shellenbaum links = zp->z_links; 2596ecd6cf80Smarks vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */ 25970a586ceaSMark Shellenbaum vap->va_size = zp->z_size; 259872fc53bcSmarks vap->va_rdev = vp->v_rdev; 2599fa9e4066Sahrens vap->va_seq = zp->z_seq; 2600fa9e4066Sahrens 2601da6c28aaSamw /* 2602da6c28aaSamw * Add in any requested optional attributes and the create time. 2603da6c28aaSamw * Also set the corresponding bits in the returned attribute bitmap. 2604da6c28aaSamw */ 2605da6c28aaSamw if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) { 2606da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 2607da6c28aaSamw xoap->xoa_archive = 26080a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_ARCHIVE) != 0); 2609da6c28aaSamw XVA_SET_RTN(xvap, XAT_ARCHIVE); 2610da6c28aaSamw } 2611da6c28aaSamw 2612da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 2613da6c28aaSamw xoap->xoa_readonly = 26140a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_READONLY) != 0); 2615da6c28aaSamw XVA_SET_RTN(xvap, XAT_READONLY); 2616da6c28aaSamw } 2617da6c28aaSamw 2618da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 2619da6c28aaSamw xoap->xoa_system = 26200a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_SYSTEM) != 0); 2621da6c28aaSamw XVA_SET_RTN(xvap, XAT_SYSTEM); 2622da6c28aaSamw } 2623da6c28aaSamw 2624da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 2625da6c28aaSamw xoap->xoa_hidden = 26260a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_HIDDEN) != 0); 2627da6c28aaSamw XVA_SET_RTN(xvap, XAT_HIDDEN); 2628da6c28aaSamw } 2629da6c28aaSamw 2630da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2631da6c28aaSamw xoap->xoa_nounlink = 26320a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0); 2633da6c28aaSamw XVA_SET_RTN(xvap, XAT_NOUNLINK); 2634da6c28aaSamw } 2635da6c28aaSamw 2636da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2637da6c28aaSamw xoap->xoa_immutable = 26380a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0); 2639da6c28aaSamw XVA_SET_RTN(xvap, XAT_IMMUTABLE); 2640da6c28aaSamw } 2641da6c28aaSamw 2642da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2643da6c28aaSamw xoap->xoa_appendonly = 26440a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0); 2645da6c28aaSamw XVA_SET_RTN(xvap, XAT_APPENDONLY); 2646da6c28aaSamw } 2647da6c28aaSamw 2648da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2649da6c28aaSamw xoap->xoa_nodump = 26500a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0); 2651da6c28aaSamw XVA_SET_RTN(xvap, XAT_NODUMP); 2652da6c28aaSamw } 2653da6c28aaSamw 2654da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 2655da6c28aaSamw xoap->xoa_opaque = 26560a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_OPAQUE) != 0); 2657da6c28aaSamw XVA_SET_RTN(xvap, XAT_OPAQUE); 2658da6c28aaSamw } 2659da6c28aaSamw 2660da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 2661da6c28aaSamw xoap->xoa_av_quarantined = 26620a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0); 2663da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 2664da6c28aaSamw } 2665da6c28aaSamw 2666da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2667da6c28aaSamw xoap->xoa_av_modified = 26680a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0); 2669da6c28aaSamw XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 2670da6c28aaSamw } 2671da6c28aaSamw 2672da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) && 26730a586ceaSMark Shellenbaum vp->v_type == VREG) { 26740a586ceaSMark Shellenbaum zfs_sa_get_scanstamp(zp, xvap); 2675da6c28aaSamw } 2676da6c28aaSamw 2677da6c28aaSamw if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 26780a586ceaSMark Shellenbaum uint64_t times[2]; 26790a586ceaSMark Shellenbaum 26800a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 26810a586ceaSMark Shellenbaum times, sizeof (times)); 26820a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&xoap->xoa_createtime, times); 2683da6c28aaSamw XVA_SET_RTN(xvap, XAT_CREATETIME); 2684da6c28aaSamw } 26857a286c47SDai Ngo 26867a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 26870a586ceaSMark Shellenbaum xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0); 26887a286c47SDai Ngo XVA_SET_RTN(xvap, XAT_REPARSE); 26897a286c47SDai Ngo } 269099d5e173STim Haley if (XVA_ISSET_REQ(xvap, XAT_GEN)) { 269199d5e173STim Haley xoap->xoa_generation = zp->z_gen; 269299d5e173STim Haley XVA_SET_RTN(xvap, XAT_GEN); 269399d5e173STim Haley } 2694fd9ee8b5Sjoyce mcintosh 2695fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 2696fd9ee8b5Sjoyce mcintosh xoap->xoa_offline = 2697fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_OFFLINE) != 0); 2698fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_OFFLINE); 2699fd9ee8b5Sjoyce mcintosh } 2700fd9ee8b5Sjoyce mcintosh 2701fd9ee8b5Sjoyce mcintosh if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 2702fd9ee8b5Sjoyce mcintosh xoap->xoa_sparse = 2703fd9ee8b5Sjoyce mcintosh ((zp->z_pflags & ZFS_SPARSE) != 0); 2704fd9ee8b5Sjoyce mcintosh XVA_SET_RTN(xvap, XAT_SPARSE); 2705fd9ee8b5Sjoyce mcintosh } 2706da6c28aaSamw } 2707da6c28aaSamw 27080a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime); 27090a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_mtime, mtime); 27100a586ceaSMark Shellenbaum ZFS_TIME_DECODE(&vap->va_ctime, ctime); 2711fa9e4066Sahrens 2712fa9e4066Sahrens mutex_exit(&zp->z_lock); 2713fa9e4066Sahrens 27140a586ceaSMark Shellenbaum sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks); 2715fa9e4066Sahrens 2716fa9e4066Sahrens if (zp->z_blksz == 0) { 2717fa9e4066Sahrens /* 2718fa9e4066Sahrens * Block size hasn't been set; suggest maximal I/O transfers. 2719fa9e4066Sahrens */ 2720fa9e4066Sahrens vap->va_blksize = zfsvfs->z_max_blksz; 2721fa9e4066Sahrens } 2722fa9e4066Sahrens 2723fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2724fa9e4066Sahrens return (0); 2725fa9e4066Sahrens } 2726fa9e4066Sahrens 2727fa9e4066Sahrens /* 2728fa9e4066Sahrens * Set the file attributes to the values contained in the 2729fa9e4066Sahrens * vattr structure. 2730fa9e4066Sahrens * 2731fa9e4066Sahrens * IN: vp - vnode of file to be modified. 2732fa9e4066Sahrens * vap - new attribute values. 2733da6c28aaSamw * If AT_XVATTR set, then optional attrs are being set 2734fa9e4066Sahrens * flags - ATTR_UTIME set if non-default time values provided. 2735da6c28aaSamw * - ATTR_NOACLCHECK (CIFS context only). 2736fa9e4066Sahrens * cr - credentials of caller. 2737da6c28aaSamw * ct - caller context 2738fa9e4066Sahrens * 2739f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 2740fa9e4066Sahrens * 2741fa9e4066Sahrens * Timestamps: 2742fa9e4066Sahrens * vp - ctime updated, mtime updated if size changed. 2743fa9e4066Sahrens */ 2744fa9e4066Sahrens /* ARGSUSED */ 2745fa9e4066Sahrens static int 2746fa9e4066Sahrens zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2747fa9e4066Sahrens caller_context_t *ct) 2748fa9e4066Sahrens { 2749f18faf3fSek110237 znode_t *zp = VTOZ(vp); 2750fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 2751f18faf3fSek110237 zilog_t *zilog; 2752fa9e4066Sahrens dmu_tx_t *tx; 2753fa9e4066Sahrens vattr_t oldva; 2754ae4caef8SMark Shellenbaum xvattr_t tmpxvattr; 27555730cc9aSmaybee uint_t mask = vap->va_mask; 2756d5285caeSGeorge Wilson uint_t saved_mask = 0; 2757f92daba9Smarks int trim_mask = 0; 2758fa9e4066Sahrens uint64_t new_mode; 275989459e17SMark Shellenbaum uint64_t new_uid, new_gid; 27600b2a8171SMark Shellenbaum uint64_t xattr_obj; 27610a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 2762d2443e76Smarks znode_t *attrzp; 2763fa9e4066Sahrens int need_policy = FALSE; 27640a586ceaSMark Shellenbaum int err, err2; 2765da6c28aaSamw zfs_fuid_info_t *fuidp = NULL; 2766da6c28aaSamw xvattr_t *xvap = (xvattr_t *)vap; /* vap may be an xvattr_t * */ 2767da6c28aaSamw xoptattr_t *xoap; 27680b2a8171SMark Shellenbaum zfs_acl_t *aclp; 2769da6c28aaSamw boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 277089459e17SMark Shellenbaum boolean_t fuid_dirtied = B_FALSE; 27710a586ceaSMark Shellenbaum sa_bulk_attr_t bulk[7], xattr_bulk[7]; 27720a586ceaSMark Shellenbaum int count = 0, xattr_count = 0; 2773fa9e4066Sahrens 2774fa9e4066Sahrens if (mask == 0) 2775fa9e4066Sahrens return (0); 2776fa9e4066Sahrens 2777fa9e4066Sahrens if (mask & AT_NOSET) 2778be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 2779fa9e4066Sahrens 27803cb34c60Sahrens ZFS_ENTER(zfsvfs); 27813cb34c60Sahrens ZFS_VERIFY_ZP(zp); 2782da6c28aaSamw 2783da6c28aaSamw zilog = zfsvfs->z_log; 2784da6c28aaSamw 2785da6c28aaSamw /* 2786da6c28aaSamw * Make sure that if we have ephemeral uid/gid or xvattr specified 2787da6c28aaSamw * that file system is at proper version level 2788da6c28aaSamw */ 2789da6c28aaSamw 2790da6c28aaSamw if (zfsvfs->z_use_fuids == B_FALSE && 2791da6c28aaSamw (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) || 2792da6c28aaSamw ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) || 279302dcba3bStimh (mask & AT_XVATTR))) { 279402dcba3bStimh ZFS_EXIT(zfsvfs); 2795be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 279602dcba3bStimh } 2797da6c28aaSamw 279802dcba3bStimh if (mask & AT_SIZE && vp->v_type == VDIR) { 279902dcba3bStimh ZFS_EXIT(zfsvfs); 2800be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 280102dcba3bStimh } 2802fa9e4066Sahrens 280302dcba3bStimh if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) { 280402dcba3bStimh ZFS_EXIT(zfsvfs); 2805be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 280602dcba3bStimh } 280784c5a155Smarks 2808da6c28aaSamw /* 2809da6c28aaSamw * If this is an xvattr_t, then get a pointer to the structure of 2810da6c28aaSamw * optional attributes. If this is NULL, then we have a vattr_t. 2811da6c28aaSamw */ 2812da6c28aaSamw xoap = xva_getxoptattr(xvap); 2813da6c28aaSamw 2814ae4caef8SMark Shellenbaum xva_init(&tmpxvattr); 2815ae4caef8SMark Shellenbaum 2816da6c28aaSamw /* 2817da6c28aaSamw * Immutable files can only alter immutable bit and atime 2818da6c28aaSamw */ 28190a586ceaSMark Shellenbaum if ((zp->z_pflags & ZFS_IMMUTABLE) && 2820da6c28aaSamw ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) || 282102dcba3bStimh ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) { 282202dcba3bStimh ZFS_EXIT(zfsvfs); 2823be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 282402dcba3bStimh } 2825da6c28aaSamw 28260a586ceaSMark Shellenbaum if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) { 282702dcba3bStimh ZFS_EXIT(zfsvfs); 2828be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 282902dcba3bStimh } 2830fa9e4066Sahrens 283193129341Smarks /* 283293129341Smarks * Verify timestamps doesn't overflow 32 bits. 283393129341Smarks * ZFS can handle large timestamps, but 32bit syscalls can't 283493129341Smarks * handle times greater than 2039. This check should be removed 283593129341Smarks * once large timestamps are fully supported. 283693129341Smarks */ 283793129341Smarks if (mask & (AT_ATIME | AT_MTIME)) { 283893129341Smarks if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) || 283993129341Smarks ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) { 284093129341Smarks ZFS_EXIT(zfsvfs); 2841be6fd75aSMatthew Ahrens return (SET_ERROR(EOVERFLOW)); 284293129341Smarks } 284393129341Smarks } 284493129341Smarks 2845fa9e4066Sahrens top: 2846d2443e76Smarks attrzp = NULL; 28470b2a8171SMark Shellenbaum aclp = NULL; 2848fa9e4066Sahrens 2849d47621a4STim Haley /* Can this be moved to before the top label? */ 2850fa9e4066Sahrens if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 2851fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2852be6fd75aSMatthew Ahrens return (SET_ERROR(EROFS)); 2853fa9e4066Sahrens } 2854fa9e4066Sahrens 2855fa9e4066Sahrens /* 2856fa9e4066Sahrens * First validate permissions 2857fa9e4066Sahrens */ 2858fa9e4066Sahrens 2859fa9e4066Sahrens if (mask & AT_SIZE) { 2860da6c28aaSamw err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr); 2861fa9e4066Sahrens if (err) { 2862fa9e4066Sahrens ZFS_EXIT(zfsvfs); 2863fa9e4066Sahrens return (err); 2864fa9e4066Sahrens } 28655730cc9aSmaybee /* 28665730cc9aSmaybee * XXX - Note, we are not providing any open 28675730cc9aSmaybee * mode flags here (like FNDELAY), so we may 28685730cc9aSmaybee * block if there are locks present... this 28695730cc9aSmaybee * should be addressed in openat(). 28705730cc9aSmaybee */ 2871cdb0ab79Smaybee /* XXX - would it be OK to generate a log record here? */ 28725730cc9aSmaybee err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE); 28735730cc9aSmaybee if (err) { 28745730cc9aSmaybee ZFS_EXIT(zfsvfs); 28755730cc9aSmaybee return (err); 28765730cc9aSmaybee } 287772102e74SBryan Cantrill 287872102e74SBryan Cantrill if (vap->va_size == 0) 287972102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 2880fa9e4066Sahrens } 2881fa9e4066Sahrens 2882da6c28aaSamw if (mask & (AT_ATIME|AT_MTIME) || 2883da6c28aaSamw ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) || 2884da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_READONLY) || 2885da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_ARCHIVE) || 2886fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_OFFLINE) || 2887fd9ee8b5Sjoyce mcintosh XVA_ISSET_REQ(xvap, XAT_SPARSE) || 2888da6c28aaSamw XVA_ISSET_REQ(xvap, XAT_CREATETIME) || 28890a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) { 2890da6c28aaSamw need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0, 2891da6c28aaSamw skipaclchk, cr); 28920a586ceaSMark Shellenbaum } 2893fa9e4066Sahrens 2894fa9e4066Sahrens if (mask & (AT_UID|AT_GID)) { 2895fa9e4066Sahrens int idmask = (mask & (AT_UID|AT_GID)); 2896fa9e4066Sahrens int take_owner; 2897fa9e4066Sahrens int take_group; 2898fa9e4066Sahrens 2899fa9e4066Sahrens /* 2900a933bc41Smarks * NOTE: even if a new mode is being set, 2901a933bc41Smarks * we may clear S_ISUID/S_ISGID bits. 2902a933bc41Smarks */ 2903a933bc41Smarks 2904a933bc41Smarks if (!(mask & AT_MODE)) 29050a586ceaSMark Shellenbaum vap->va_mode = zp->z_mode; 2906a933bc41Smarks 2907a933bc41Smarks /* 2908fa9e4066Sahrens * Take ownership or chgrp to group we are a member of 2909fa9e4066Sahrens */ 2910fa9e4066Sahrens 2911fa9e4066Sahrens take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr)); 2912da6c28aaSamw take_group = (mask & AT_GID) && 2913da6c28aaSamw zfs_groupmember(zfsvfs, vap->va_gid, cr); 2914fa9e4066Sahrens 2915fa9e4066Sahrens /* 2916fa9e4066Sahrens * If both AT_UID and AT_GID are set then take_owner and 2917fa9e4066Sahrens * take_group must both be set in order to allow taking 2918fa9e4066Sahrens * ownership. 2919fa9e4066Sahrens * 2920fa9e4066Sahrens * Otherwise, send the check through secpolicy_vnode_setattr() 2921fa9e4066Sahrens * 2922fa9e4066Sahrens */ 2923fa9e4066Sahrens 2924fa9e4066Sahrens if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) || 2925fa9e4066Sahrens ((idmask == AT_UID) && take_owner) || 2926fa9e4066Sahrens ((idmask == AT_GID) && take_group)) { 2927da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0, 2928da6c28aaSamw skipaclchk, cr) == 0) { 2929fa9e4066Sahrens /* 2930fa9e4066Sahrens * Remove setuid/setgid for non-privileged users 2931fa9e4066Sahrens */ 293213f9f30eSmarks secpolicy_setid_clear(vap, cr); 2933f92daba9Smarks trim_mask = (mask & (AT_UID|AT_GID)); 2934fa9e4066Sahrens } else { 2935fa9e4066Sahrens need_policy = TRUE; 2936fa9e4066Sahrens } 2937fa9e4066Sahrens } else { 2938fa9e4066Sahrens need_policy = TRUE; 2939fa9e4066Sahrens } 2940fa9e4066Sahrens } 2941fa9e4066Sahrens 2942fa9e4066Sahrens mutex_enter(&zp->z_lock); 29430a586ceaSMark Shellenbaum oldva.va_mode = zp->z_mode; 2944f1696b23SMark Shellenbaum zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid); 2945da6c28aaSamw if (mask & AT_XVATTR) { 2946ae4caef8SMark Shellenbaum /* 2947ae4caef8SMark Shellenbaum * Update xvattr mask to include only those attributes 2948ae4caef8SMark Shellenbaum * that are actually changing. 2949ae4caef8SMark Shellenbaum * 2950ae4caef8SMark Shellenbaum * the bits will be restored prior to actually setting 2951ae4caef8SMark Shellenbaum * the attributes so the caller thinks they were set. 2952ae4caef8SMark Shellenbaum */ 2953ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 2954ae4caef8SMark Shellenbaum if (xoap->xoa_appendonly != 29550a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_APPENDONLY) != 0)) { 2956ae4caef8SMark Shellenbaum need_policy = TRUE; 2957ae4caef8SMark Shellenbaum } else { 2958ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_APPENDONLY); 2959ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY); 2960ae4caef8SMark Shellenbaum } 2961ae4caef8SMark Shellenbaum } 2962ae4caef8SMark Shellenbaum 2963ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 2964ae4caef8SMark Shellenbaum if (xoap->xoa_nounlink != 29650a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NOUNLINK) != 0)) { 2966ae4caef8SMark Shellenbaum need_policy = TRUE; 2967ae4caef8SMark Shellenbaum } else { 2968ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NOUNLINK); 2969ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK); 2970ae4caef8SMark Shellenbaum } 2971ae4caef8SMark Shellenbaum } 2972ae4caef8SMark Shellenbaum 2973ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 2974ae4caef8SMark Shellenbaum if (xoap->xoa_immutable != 29750a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) { 2976ae4caef8SMark Shellenbaum need_policy = TRUE; 2977ae4caef8SMark Shellenbaum } else { 2978ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_IMMUTABLE); 2979ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE); 2980ae4caef8SMark Shellenbaum } 2981ae4caef8SMark Shellenbaum } 2982ae4caef8SMark Shellenbaum 2983ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 2984ae4caef8SMark Shellenbaum if (xoap->xoa_nodump != 29850a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_NODUMP) != 0)) { 2986ae4caef8SMark Shellenbaum need_policy = TRUE; 2987ae4caef8SMark Shellenbaum } else { 2988ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_NODUMP); 2989ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_NODUMP); 2990ae4caef8SMark Shellenbaum } 2991ae4caef8SMark Shellenbaum } 2992ae4caef8SMark Shellenbaum 2993ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 2994ae4caef8SMark Shellenbaum if (xoap->xoa_av_modified != 29950a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) { 2996ae4caef8SMark Shellenbaum need_policy = TRUE; 2997ae4caef8SMark Shellenbaum } else { 2998ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_MODIFIED); 2999ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED); 3000ae4caef8SMark Shellenbaum } 3001ae4caef8SMark Shellenbaum } 3002ae4caef8SMark Shellenbaum 3003ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 3004ae4caef8SMark Shellenbaum if ((vp->v_type != VREG && 3005ae4caef8SMark Shellenbaum xoap->xoa_av_quarantined) || 3006da6c28aaSamw xoap->xoa_av_quarantined != 30070a586ceaSMark Shellenbaum ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) { 3008ae4caef8SMark Shellenbaum need_policy = TRUE; 3009ae4caef8SMark Shellenbaum } else { 3010ae4caef8SMark Shellenbaum XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED); 3011ae4caef8SMark Shellenbaum XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED); 3012ae4caef8SMark Shellenbaum } 3013ae4caef8SMark Shellenbaum } 3014ae4caef8SMark Shellenbaum 30157a286c47SDai Ngo if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 30167a286c47SDai Ngo mutex_exit(&zp->z_lock); 30177a286c47SDai Ngo ZFS_EXIT(zfsvfs); 3018be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 30197a286c47SDai Ngo } 30207a286c47SDai Ngo 3021ae4caef8SMark Shellenbaum if (need_policy == FALSE && 3022ae4caef8SMark Shellenbaum (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) || 3023ae4caef8SMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_OPAQUE))) { 3024da6c28aaSamw need_policy = TRUE; 3025da6c28aaSamw } 3026da6c28aaSamw } 3027da6c28aaSamw 3028fa9e4066Sahrens mutex_exit(&zp->z_lock); 302913f9f30eSmarks 3030f92daba9Smarks if (mask & AT_MODE) { 3031da6c28aaSamw if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) { 3032f92daba9Smarks err = secpolicy_setid_setsticky_clear(vp, vap, 3033f92daba9Smarks &oldva, cr); 3034f92daba9Smarks if (err) { 3035f92daba9Smarks ZFS_EXIT(zfsvfs); 3036f92daba9Smarks return (err); 3037f92daba9Smarks } 3038f92daba9Smarks trim_mask |= AT_MODE; 3039f92daba9Smarks } else { 3040f92daba9Smarks need_policy = TRUE; 3041f92daba9Smarks } 3042f92daba9Smarks } 3043f92daba9Smarks 3044f92daba9Smarks if (need_policy) { 304513f9f30eSmarks /* 304613f9f30eSmarks * If trim_mask is set then take ownership 3047f92daba9Smarks * has been granted or write_acl is present and user 3048f92daba9Smarks * has the ability to modify mode. In that case remove 3049f92daba9Smarks * UID|GID and or MODE from mask so that 305013f9f30eSmarks * secpolicy_vnode_setattr() doesn't revoke it. 305113f9f30eSmarks */ 305213f9f30eSmarks 3053f92daba9Smarks if (trim_mask) { 3054f92daba9Smarks saved_mask = vap->va_mask; 3055f92daba9Smarks vap->va_mask &= ~trim_mask; 3056f92daba9Smarks } 3057fa9e4066Sahrens err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags, 3058da6c28aaSamw (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp); 3059fa9e4066Sahrens if (err) { 3060fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3061fa9e4066Sahrens return (err); 3062fa9e4066Sahrens } 306313f9f30eSmarks 306413f9f30eSmarks if (trim_mask) 3065f92daba9Smarks vap->va_mask |= saved_mask; 3066fa9e4066Sahrens } 3067fa9e4066Sahrens 3068fa9e4066Sahrens /* 3069fa9e4066Sahrens * secpolicy_vnode_setattr, or take ownership may have 3070fa9e4066Sahrens * changed va_mask 3071fa9e4066Sahrens */ 3072fa9e4066Sahrens mask = vap->va_mask; 3073fa9e4066Sahrens 30740a586ceaSMark Shellenbaum if ((mask & (AT_UID | AT_GID))) { 30750b2a8171SMark Shellenbaum err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), 30760b2a8171SMark Shellenbaum &xattr_obj, sizeof (xattr_obj)); 3077fa9e4066Sahrens 30780b2a8171SMark Shellenbaum if (err == 0 && xattr_obj) { 30790a586ceaSMark Shellenbaum err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp); 308014843421SMatthew Ahrens if (err) 30810a586ceaSMark Shellenbaum goto out2; 3082d2443e76Smarks } 308389459e17SMark Shellenbaum if (mask & AT_UID) { 308489459e17SMark Shellenbaum new_uid = zfs_fuid_create(zfsvfs, 308589459e17SMark Shellenbaum (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp); 3086f1696b23SMark Shellenbaum if (new_uid != zp->z_uid && 30870a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) { 30880b2a8171SMark Shellenbaum if (attrzp) 30890b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3090be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 30910a586ceaSMark Shellenbaum goto out2; 309289459e17SMark Shellenbaum } 309314843421SMatthew Ahrens } 309414843421SMatthew Ahrens 309589459e17SMark Shellenbaum if (mask & AT_GID) { 309689459e17SMark Shellenbaum new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, 309789459e17SMark Shellenbaum cr, ZFS_GROUP, &fuidp); 30980a586ceaSMark Shellenbaum if (new_gid != zp->z_gid && 30990a586ceaSMark Shellenbaum zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) { 31000b2a8171SMark Shellenbaum if (attrzp) 31010b2a8171SMark Shellenbaum VN_RELE(ZTOV(attrzp)); 3102be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 31030a586ceaSMark Shellenbaum goto out2; 31040a586ceaSMark Shellenbaum } 31050a586ceaSMark Shellenbaum } 31060a586ceaSMark Shellenbaum } 31070a586ceaSMark Shellenbaum tx = dmu_tx_create(zfsvfs->z_os); 31080a586ceaSMark Shellenbaum 31090a586ceaSMark Shellenbaum if (mask & AT_MODE) { 31100a586ceaSMark Shellenbaum uint64_t pmode = zp->z_mode; 31111412a1a2SMark Shellenbaum uint64_t acl_obj; 31120a586ceaSMark Shellenbaum new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT); 31130a586ceaSMark Shellenbaum 311471dbfc28SPaul B. Henson if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED && 311571dbfc28SPaul B. Henson !(zp->z_pflags & ZFS_ACL_TRIVIAL)) { 3116be6fd75aSMatthew Ahrens err = SET_ERROR(EPERM); 311771dbfc28SPaul B. Henson goto out; 311871dbfc28SPaul B. Henson } 311971dbfc28SPaul B. Henson 3120a3c49ce1SAlbert Lee if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)) 3121a3c49ce1SAlbert Lee goto out; 31220a586ceaSMark Shellenbaum 31231412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 31241412a1a2SMark Shellenbaum if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) { 31250a586ceaSMark Shellenbaum /* 31260a586ceaSMark Shellenbaum * Are we upgrading ACL from old V0 format 31270a586ceaSMark Shellenbaum * to V1 format? 31280a586ceaSMark Shellenbaum */ 31292bd6c4deSMark Shellenbaum if (zfsvfs->z_version >= ZPL_VERSION_FUID && 31301412a1a2SMark Shellenbaum zfs_znode_acl_version(zp) == 31310a586ceaSMark Shellenbaum ZFS_ACL_VERSION_INITIAL) { 31321412a1a2SMark Shellenbaum dmu_tx_hold_free(tx, acl_obj, 0, 31330a586ceaSMark Shellenbaum DMU_OBJECT_END); 31340a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31350a586ceaSMark Shellenbaum 0, aclp->z_acl_bytes); 313689459e17SMark Shellenbaum } else { 31371412a1a2SMark Shellenbaum dmu_tx_hold_write(tx, acl_obj, 0, 31380a586ceaSMark Shellenbaum aclp->z_acl_bytes); 313989459e17SMark Shellenbaum } 31400a586ceaSMark Shellenbaum } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { 31410a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 31420a586ceaSMark Shellenbaum 0, aclp->z_acl_bytes); 314389459e17SMark Shellenbaum } 31441412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 31450a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31460a586ceaSMark Shellenbaum } else { 31470a586ceaSMark Shellenbaum if ((mask & AT_XVATTR) && 31480a586ceaSMark Shellenbaum XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 31490a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); 31500a586ceaSMark Shellenbaum else 31510a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 315289459e17SMark Shellenbaum } 3153d2443e76Smarks 31540a586ceaSMark Shellenbaum if (attrzp) { 31550a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE); 31560a586ceaSMark Shellenbaum } 31570a586ceaSMark Shellenbaum 31580a586ceaSMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 31590a586ceaSMark Shellenbaum if (fuid_dirtied) 31600a586ceaSMark Shellenbaum zfs_fuid_txhold(zfsvfs, tx); 31610a586ceaSMark Shellenbaum 31620a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 31630a586ceaSMark Shellenbaum 3164e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 3165e722410cSMatthew Ahrens if (err) 316614843421SMatthew Ahrens goto out; 3167fa9e4066Sahrens 31680a586ceaSMark Shellenbaum count = 0; 3169fa9e4066Sahrens /* 3170fa9e4066Sahrens * Set each attribute requested. 3171fa9e4066Sahrens * We group settings according to the locks they need to acquire. 3172fa9e4066Sahrens * 3173fa9e4066Sahrens * Note: you cannot set ctime directly, although it will be 3174fa9e4066Sahrens * updated as a side-effect of calling this function. 3175fa9e4066Sahrens */ 3176fa9e4066Sahrens 31771412a1a2SMark Shellenbaum 31781412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31791412a1a2SMark Shellenbaum mutex_enter(&zp->z_acl_lock); 3180fa9e4066Sahrens mutex_enter(&zp->z_lock); 3181fa9e4066Sahrens 3182db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 3183db9986c7SMark Shellenbaum &zp->z_pflags, sizeof (zp->z_pflags)); 3184db9986c7SMark Shellenbaum 3185db9986c7SMark Shellenbaum if (attrzp) { 31861412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 31871412a1a2SMark Shellenbaum mutex_enter(&attrzp->z_acl_lock); 31880a586ceaSMark Shellenbaum mutex_enter(&attrzp->z_lock); 3189db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 3190db9986c7SMark Shellenbaum SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags, 3191db9986c7SMark Shellenbaum sizeof (attrzp->z_pflags)); 3192db9986c7SMark Shellenbaum } 31930a586ceaSMark Shellenbaum 319427dd1e87SMark Shellenbaum if (mask & (AT_UID|AT_GID)) { 319527dd1e87SMark Shellenbaum 31960a586ceaSMark Shellenbaum if (mask & AT_UID) { 31970a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 31980a586ceaSMark Shellenbaum &new_uid, sizeof (new_uid)); 3199f1696b23SMark Shellenbaum zp->z_uid = new_uid; 32000a586ceaSMark Shellenbaum if (attrzp) { 32010a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 32020a586ceaSMark Shellenbaum SA_ZPL_UID(zfsvfs), NULL, &new_uid, 32030a586ceaSMark Shellenbaum sizeof (new_uid)); 3204f1696b23SMark Shellenbaum attrzp->z_uid = new_uid; 32050a586ceaSMark Shellenbaum } 32060a586ceaSMark Shellenbaum } 32070a586ceaSMark Shellenbaum 32080a586ceaSMark Shellenbaum if (mask & AT_GID) { 320927dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), 321027dd1e87SMark Shellenbaum NULL, &new_gid, sizeof (new_gid)); 3211f1696b23SMark Shellenbaum zp->z_gid = new_gid; 32120a586ceaSMark Shellenbaum if (attrzp) { 32130a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 32140a586ceaSMark Shellenbaum SA_ZPL_GID(zfsvfs), NULL, &new_gid, 32150a586ceaSMark Shellenbaum sizeof (new_gid)); 3216f1696b23SMark Shellenbaum attrzp->z_gid = new_gid; 32170a586ceaSMark Shellenbaum } 32180a586ceaSMark Shellenbaum } 321927dd1e87SMark Shellenbaum if (!(mask & AT_MODE)) { 322027dd1e87SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), 322127dd1e87SMark Shellenbaum NULL, &new_mode, sizeof (new_mode)); 322227dd1e87SMark Shellenbaum new_mode = zp->z_mode; 322327dd1e87SMark Shellenbaum } 322427dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(zp); 322527dd1e87SMark Shellenbaum ASSERT(err == 0); 322627dd1e87SMark Shellenbaum if (attrzp) { 322727dd1e87SMark Shellenbaum err = zfs_acl_chown_setattr(attrzp); 322827dd1e87SMark Shellenbaum ASSERT(err == 0); 322927dd1e87SMark Shellenbaum } 323027dd1e87SMark Shellenbaum } 32310a586ceaSMark Shellenbaum 3232fa9e4066Sahrens if (mask & AT_MODE) { 32330a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 32340a586ceaSMark Shellenbaum &new_mode, sizeof (new_mode)); 32350a586ceaSMark Shellenbaum zp->z_mode = new_mode; 323627dd1e87SMark Shellenbaum ASSERT3U((uintptr_t)aclp, !=, NULL); 323789459e17SMark Shellenbaum err = zfs_aclset_common(zp, aclp, cr, tx); 3238fb09f5aaSMadhav Suresh ASSERT0(err); 32390b2a8171SMark Shellenbaum if (zp->z_acl_cached) 32400b2a8171SMark Shellenbaum zfs_acl_free(zp->z_acl_cached); 32414929fd5eSTim Haley zp->z_acl_cached = aclp; 32424929fd5eSTim Haley aclp = NULL; 3243fa9e4066Sahrens } 3244fa9e4066Sahrens 3245fa9e4066Sahrens 32460a586ceaSMark Shellenbaum if (mask & AT_ATIME) { 32470a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime); 32480a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 32490a586ceaSMark Shellenbaum &zp->z_atime, sizeof (zp->z_atime)); 32500a586ceaSMark Shellenbaum } 3251fa9e4066Sahrens 32520a586ceaSMark Shellenbaum if (mask & AT_MTIME) { 32530a586ceaSMark Shellenbaum ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 32540a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 32550a586ceaSMark Shellenbaum mtime, sizeof (mtime)); 32560a586ceaSMark Shellenbaum } 3257fa9e4066Sahrens 3258cdb0ab79Smaybee /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */ 32590a586ceaSMark Shellenbaum if (mask & AT_SIZE && !(mask & AT_MTIME)) { 32600a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), 32610a586ceaSMark Shellenbaum NULL, mtime, sizeof (mtime)); 32620a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32630a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32640a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 32650a586ceaSMark Shellenbaum B_TRUE); 32660a586ceaSMark Shellenbaum } else if (mask != 0) { 32670a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 32680a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32690a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime, 32700a586ceaSMark Shellenbaum B_TRUE); 32710a586ceaSMark Shellenbaum if (attrzp) { 32720a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(xattr_bulk, xattr_count, 32730a586ceaSMark Shellenbaum SA_ZPL_CTIME(zfsvfs), NULL, 32740a586ceaSMark Shellenbaum &ctime, sizeof (ctime)); 32750a586ceaSMark Shellenbaum zfs_tstamp_update_setup(attrzp, STATE_CHANGED, 32760a586ceaSMark Shellenbaum mtime, ctime, B_TRUE); 32770a586ceaSMark Shellenbaum } 32780a586ceaSMark Shellenbaum } 3279da6c28aaSamw /* 3280da6c28aaSamw * Do this after setting timestamps to prevent timestamp 3281da6c28aaSamw * update from toggling bit 3282da6c28aaSamw */ 3283da6c28aaSamw 3284da6c28aaSamw if (xoap && (mask & AT_XVATTR)) { 3285ae4caef8SMark Shellenbaum 3286ae4caef8SMark Shellenbaum /* 3287ae4caef8SMark Shellenbaum * restore trimmed off masks 3288ae4caef8SMark Shellenbaum * so that return masks can be set for caller. 3289ae4caef8SMark Shellenbaum */ 3290ae4caef8SMark Shellenbaum 3291ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) { 3292ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_APPENDONLY); 3293ae4caef8SMark Shellenbaum } 3294ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) { 3295ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NOUNLINK); 3296ae4caef8SMark Shellenbaum } 3297ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) { 3298ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_IMMUTABLE); 3299ae4caef8SMark Shellenbaum } 3300ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) { 3301ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_NODUMP); 3302ae4caef8SMark Shellenbaum } 3303ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) { 3304ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_MODIFIED); 3305ae4caef8SMark Shellenbaum } 3306ae4caef8SMark Shellenbaum if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) { 3307ae4caef8SMark Shellenbaum XVA_SET_REQ(xvap, XAT_AV_QUARANTINED); 3308ae4caef8SMark Shellenbaum } 3309ae4caef8SMark Shellenbaum 33100a586ceaSMark Shellenbaum if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 3311da6c28aaSamw ASSERT(vp->v_type == VREG); 3312da6c28aaSamw 33130a586ceaSMark Shellenbaum zfs_xvattr_set(zp, xvap, tx); 3314da6c28aaSamw } 3315fa9e4066Sahrens 331689459e17SMark Shellenbaum if (fuid_dirtied) 331789459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 331889459e17SMark Shellenbaum 33195730cc9aSmaybee if (mask != 0) 3320da6c28aaSamw zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp); 3321fa9e4066Sahrens 3322fa9e4066Sahrens mutex_exit(&zp->z_lock); 33231412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 33241412a1a2SMark Shellenbaum mutex_exit(&zp->z_acl_lock); 3325fa9e4066Sahrens 33261412a1a2SMark Shellenbaum if (attrzp) { 33271412a1a2SMark Shellenbaum if (mask & (AT_UID|AT_GID|AT_MODE)) 33281412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_acl_lock); 33291412a1a2SMark Shellenbaum mutex_exit(&attrzp->z_lock); 33301412a1a2SMark Shellenbaum } 333114843421SMatthew Ahrens out: 33320a586ceaSMark Shellenbaum if (err == 0 && attrzp) { 33330a586ceaSMark Shellenbaum err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk, 33340a586ceaSMark Shellenbaum xattr_count, tx); 33350a586ceaSMark Shellenbaum ASSERT(err2 == 0); 33360a586ceaSMark Shellenbaum } 33370a586ceaSMark Shellenbaum 3338d2443e76Smarks if (attrzp) 3339d2443e76Smarks VN_RELE(ZTOV(attrzp)); 3340f7170741SWill Andrews 33414929fd5eSTim Haley if (aclp) 33424929fd5eSTim Haley zfs_acl_free(aclp); 33434929fd5eSTim Haley 334414843421SMatthew Ahrens if (fuidp) { 334514843421SMatthew Ahrens zfs_fuid_info_free(fuidp); 334614843421SMatthew Ahrens fuidp = NULL; 334714843421SMatthew Ahrens } 334814843421SMatthew Ahrens 33490a586ceaSMark Shellenbaum if (err) { 335014843421SMatthew Ahrens dmu_tx_abort(tx); 335114843421SMatthew Ahrens if (err == ERESTART) 335214843421SMatthew Ahrens goto top; 33530a586ceaSMark Shellenbaum } else { 33540a586ceaSMark Shellenbaum err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 3355c035b1e8SAndreas Jaekel rw_enter(&rz_zev_rwlock, RW_READER); 3356c035b1e8SAndreas Jaekel if (rz_zev_callbacks && rz_zev_callbacks->rz_zev_znode_setattr) 3357e206ace3SAndreas Jaekel rz_zev_callbacks->rz_zev_znode_setattr(zp, tx); 3358c035b1e8SAndreas Jaekel rw_exit(&rz_zev_rwlock); 3359e206ace3SAndreas Jaekel dmu_tx_commit(tx); 3360e206ace3SAndreas Jaekel } 3361c035b1e8SAndreas Jaekel 33620a586ceaSMark Shellenbaum out2: 336355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 33645002558fSNeil Perrin zil_commit(zilog, 0); 336555da60b9SMark J Musante 3366fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3367fa9e4066Sahrens return (err); 3368fa9e4066Sahrens } 3369fa9e4066Sahrens 3370fa9e4066Sahrens typedef struct zfs_zlock { 3371fa9e4066Sahrens krwlock_t *zl_rwlock; /* lock we acquired */ 3372fa9e4066Sahrens znode_t *zl_znode; /* znode we held */ 3373fa9e4066Sahrens struct zfs_zlock *zl_next; /* next in list */ 3374fa9e4066Sahrens } zfs_zlock_t; 3375fa9e4066Sahrens 3376ff008e00Smaybee /* 3377ff008e00Smaybee * Drop locks and release vnodes that were held by zfs_rename_lock(). 3378ff008e00Smaybee */ 3379ff008e00Smaybee static void 3380ff008e00Smaybee zfs_rename_unlock(zfs_zlock_t **zlpp) 3381ff008e00Smaybee { 3382ff008e00Smaybee zfs_zlock_t *zl; 3383ff008e00Smaybee 3384ff008e00Smaybee while ((zl = *zlpp) != NULL) { 3385ff008e00Smaybee if (zl->zl_znode != NULL) 3386ff008e00Smaybee VN_RELE(ZTOV(zl->zl_znode)); 3387ff008e00Smaybee rw_exit(zl->zl_rwlock); 3388ff008e00Smaybee *zlpp = zl->zl_next; 3389ff008e00Smaybee kmem_free(zl, sizeof (*zl)); 3390ff008e00Smaybee } 3391ff008e00Smaybee } 3392ff008e00Smaybee 3393ff008e00Smaybee /* 3394ff008e00Smaybee * Search back through the directory tree, using the ".." entries. 3395ff008e00Smaybee * Lock each directory in the chain to prevent concurrent renames. 3396ff008e00Smaybee * Fail any attempt to move a directory into one of its own descendants. 3397ff008e00Smaybee * XXX - z_parent_lock can overlap with map or grow locks 3398ff008e00Smaybee */ 3399fa9e4066Sahrens static int 3400fa9e4066Sahrens zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp) 3401fa9e4066Sahrens { 3402fa9e4066Sahrens zfs_zlock_t *zl; 3403fa9e4066Sahrens znode_t *zp = tdzp; 3404fa9e4066Sahrens uint64_t rootid = zp->z_zfsvfs->z_root; 34050a586ceaSMark Shellenbaum uint64_t oidp = zp->z_id; 3406fa9e4066Sahrens krwlock_t *rwlp = &szp->z_parent_lock; 3407fa9e4066Sahrens krw_t rw = RW_WRITER; 3408fa9e4066Sahrens 3409fa9e4066Sahrens /* 3410fa9e4066Sahrens * First pass write-locks szp and compares to zp->z_id. 3411fa9e4066Sahrens * Later passes read-lock zp and compare to zp->z_parent. 3412fa9e4066Sahrens */ 3413fa9e4066Sahrens do { 3414ff008e00Smaybee if (!rw_tryenter(rwlp, rw)) { 3415ff008e00Smaybee /* 3416ff008e00Smaybee * Another thread is renaming in this path. 3417ff008e00Smaybee * Note that if we are a WRITER, we don't have any 3418ff008e00Smaybee * parent_locks held yet. 3419ff008e00Smaybee */ 3420ff008e00Smaybee if (rw == RW_READER && zp->z_id > szp->z_id) { 3421ff008e00Smaybee /* 3422ff008e00Smaybee * Drop our locks and restart 3423ff008e00Smaybee */ 3424ff008e00Smaybee zfs_rename_unlock(&zl); 3425ff008e00Smaybee *zlpp = NULL; 3426ff008e00Smaybee zp = tdzp; 34270a586ceaSMark Shellenbaum oidp = zp->z_id; 3428ff008e00Smaybee rwlp = &szp->z_parent_lock; 3429ff008e00Smaybee rw = RW_WRITER; 3430ff008e00Smaybee continue; 3431ff008e00Smaybee } else { 3432ff008e00Smaybee /* 3433ff008e00Smaybee * Wait for other thread to drop its locks 3434ff008e00Smaybee */ 3435ff008e00Smaybee rw_enter(rwlp, rw); 3436ff008e00Smaybee } 3437ff008e00Smaybee } 3438ff008e00Smaybee 3439fa9e4066Sahrens zl = kmem_alloc(sizeof (*zl), KM_SLEEP); 3440fa9e4066Sahrens zl->zl_rwlock = rwlp; 3441fa9e4066Sahrens zl->zl_znode = NULL; 3442fa9e4066Sahrens zl->zl_next = *zlpp; 3443fa9e4066Sahrens *zlpp = zl; 3444fa9e4066Sahrens 34450a586ceaSMark Shellenbaum if (oidp == szp->z_id) /* We're a descendant of szp */ 3446be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3447fa9e4066Sahrens 34480a586ceaSMark Shellenbaum if (oidp == rootid) /* We've hit the top */ 3449fa9e4066Sahrens return (0); 3450fa9e4066Sahrens 3451fa9e4066Sahrens if (rw == RW_READER) { /* i.e. not the first pass */ 34520a586ceaSMark Shellenbaum int error = zfs_zget(zp->z_zfsvfs, oidp, &zp); 3453fa9e4066Sahrens if (error) 3454fa9e4066Sahrens return (error); 3455fa9e4066Sahrens zl->zl_znode = zp; 3456fa9e4066Sahrens } 34570a586ceaSMark Shellenbaum (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), 34580a586ceaSMark Shellenbaum &oidp, sizeof (oidp)); 3459fa9e4066Sahrens rwlp = &zp->z_parent_lock; 3460fa9e4066Sahrens rw = RW_READER; 3461fa9e4066Sahrens 3462fa9e4066Sahrens } while (zp->z_id != sdzp->z_id); 3463fa9e4066Sahrens 3464fa9e4066Sahrens return (0); 3465fa9e4066Sahrens } 3466fa9e4066Sahrens 3467fa9e4066Sahrens /* 3468fa9e4066Sahrens * Move an entry from the provided source directory to the target 3469fa9e4066Sahrens * directory. Change the entry name as indicated. 3470fa9e4066Sahrens * 3471fa9e4066Sahrens * IN: sdvp - Source directory containing the "old entry". 3472fa9e4066Sahrens * snm - Old entry name. 3473fa9e4066Sahrens * tdvp - Target directory to contain the "new entry". 3474fa9e4066Sahrens * tnm - New entry name. 3475fa9e4066Sahrens * cr - credentials of caller. 3476da6c28aaSamw * ct - caller context 3477da6c28aaSamw * flags - case flags 3478fa9e4066Sahrens * 3479f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3480fa9e4066Sahrens * 3481fa9e4066Sahrens * Timestamps: 3482fa9e4066Sahrens * sdvp,tdvp - ctime|mtime updated 3483fa9e4066Sahrens */ 3484da6c28aaSamw /*ARGSUSED*/ 3485fa9e4066Sahrens static int 3486da6c28aaSamw zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr, 3487da6c28aaSamw caller_context_t *ct, int flags) 3488fa9e4066Sahrens { 3489fa9e4066Sahrens znode_t *tdzp, *szp, *tzp; 3490fa9e4066Sahrens znode_t *sdzp = VTOZ(sdvp); 3491fa9e4066Sahrens zfsvfs_t *zfsvfs = sdzp->z_zfsvfs; 3492f18faf3fSek110237 zilog_t *zilog; 3493fa9e4066Sahrens vnode_t *realvp; 3494fa9e4066Sahrens zfs_dirlock_t *sdl, *tdl; 3495fa9e4066Sahrens dmu_tx_t *tx; 3496fa9e4066Sahrens zfs_zlock_t *zl; 3497da6c28aaSamw int cmp, serr, terr; 3498da6c28aaSamw int error = 0; 3499da6c28aaSamw int zflg = 0; 350069962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3501fa9e4066Sahrens 35023cb34c60Sahrens ZFS_ENTER(zfsvfs); 35033cb34c60Sahrens ZFS_VERIFY_ZP(sdzp); 3504f18faf3fSek110237 zilog = zfsvfs->z_log; 3505fa9e4066Sahrens 3506fa9e4066Sahrens /* 3507fa9e4066Sahrens * Make sure we have the real vp for the target directory. 3508fa9e4066Sahrens */ 3509da6c28aaSamw if (VOP_REALVP(tdvp, &realvp, ct) == 0) 3510fa9e4066Sahrens tdvp = realvp; 3511fa9e4066Sahrens 351218e64978SMarcel Telka tdzp = VTOZ(tdvp); 351318e64978SMarcel Telka ZFS_VERIFY_ZP(tdzp); 351418e64978SMarcel Telka 351518e64978SMarcel Telka /* 351618e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 351718e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 351818e64978SMarcel Telka */ 351918e64978SMarcel Telka if (tdzp->z_zfsvfs != zfsvfs || zfsctl_is_node(tdvp)) { 3520fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3521be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 3522fa9e4066Sahrens } 3523fa9e4066Sahrens 3524de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(tnm, 3525da6c28aaSamw strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3526da6c28aaSamw ZFS_EXIT(zfsvfs); 3527be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3528da6c28aaSamw } 3529da6c28aaSamw 3530da6c28aaSamw if (flags & FIGNORECASE) 3531da6c28aaSamw zflg |= ZCILOOK; 3532da6c28aaSamw 3533fa9e4066Sahrens top: 3534fa9e4066Sahrens szp = NULL; 3535fa9e4066Sahrens tzp = NULL; 3536fa9e4066Sahrens zl = NULL; 3537fa9e4066Sahrens 3538fa9e4066Sahrens /* 3539fa9e4066Sahrens * This is to prevent the creation of links into attribute space 3540fa9e4066Sahrens * by renaming a linked file into/outof an attribute directory. 3541fa9e4066Sahrens * See the comment in zfs_link() for why this is considered bad. 3542fa9e4066Sahrens */ 35430a586ceaSMark Shellenbaum if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) { 3544fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3545be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 3546fa9e4066Sahrens } 3547fa9e4066Sahrens 3548fa9e4066Sahrens /* 3549fa9e4066Sahrens * Lock source and target directory entries. To prevent deadlock, 3550fa9e4066Sahrens * a lock ordering must be defined. We lock the directory with 3551fa9e4066Sahrens * the smallest object id first, or if it's a tie, the one with 3552fa9e4066Sahrens * the lexically first name. 3553fa9e4066Sahrens */ 3554fa9e4066Sahrens if (sdzp->z_id < tdzp->z_id) { 3555fa9e4066Sahrens cmp = -1; 3556fa9e4066Sahrens } else if (sdzp->z_id > tdzp->z_id) { 3557fa9e4066Sahrens cmp = 1; 3558fa9e4066Sahrens } else { 3559da6c28aaSamw /* 3560da6c28aaSamw * First compare the two name arguments without 3561da6c28aaSamw * considering any case folding. 3562da6c28aaSamw */ 3563da6c28aaSamw int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER); 3564da6c28aaSamw 3565da6c28aaSamw cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error); 3566de8267e0Stimh ASSERT(error == 0 || !zfsvfs->z_utf8); 3567fa9e4066Sahrens if (cmp == 0) { 3568fa9e4066Sahrens /* 3569fa9e4066Sahrens * POSIX: "If the old argument and the new argument 3570fa9e4066Sahrens * both refer to links to the same existing file, 3571fa9e4066Sahrens * the rename() function shall return successfully 3572fa9e4066Sahrens * and perform no other action." 3573fa9e4066Sahrens */ 3574fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3575fa9e4066Sahrens return (0); 3576fa9e4066Sahrens } 3577da6c28aaSamw /* 3578da6c28aaSamw * If the file system is case-folding, then we may 3579da6c28aaSamw * have some more checking to do. A case-folding file 3580da6c28aaSamw * system is either supporting mixed case sensitivity 3581da6c28aaSamw * access or is completely case-insensitive. Note 3582da6c28aaSamw * that the file system is always case preserving. 3583da6c28aaSamw * 3584da6c28aaSamw * In mixed sensitivity mode case sensitive behavior 3585da6c28aaSamw * is the default. FIGNORECASE must be used to 3586da6c28aaSamw * explicitly request case insensitive behavior. 3587da6c28aaSamw * 3588da6c28aaSamw * If the source and target names provided differ only 3589da6c28aaSamw * by case (e.g., a request to rename 'tim' to 'Tim'), 3590da6c28aaSamw * we will treat this as a special case in the 3591da6c28aaSamw * case-insensitive mode: as long as the source name 3592da6c28aaSamw * is an exact match, we will allow this to proceed as 3593da6c28aaSamw * a name-change request. 3594da6c28aaSamw */ 3595de8267e0Stimh if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 3596de8267e0Stimh (zfsvfs->z_case == ZFS_CASE_MIXED && 3597de8267e0Stimh flags & FIGNORECASE)) && 3598da6c28aaSamw u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST, 3599da6c28aaSamw &error) == 0) { 3600da6c28aaSamw /* 3601da6c28aaSamw * case preserving rename request, require exact 3602da6c28aaSamw * name matches 3603da6c28aaSamw */ 3604da6c28aaSamw zflg |= ZCIEXACT; 3605da6c28aaSamw zflg &= ~ZCILOOK; 3606fa9e4066Sahrens } 3607da6c28aaSamw } 3608da6c28aaSamw 3609afefc7e4SSanjeev Bagewadi /* 3610afefc7e4SSanjeev Bagewadi * If the source and destination directories are the same, we should 3611afefc7e4SSanjeev Bagewadi * grab the z_name_lock of that directory only once. 3612afefc7e4SSanjeev Bagewadi */ 3613afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) { 3614afefc7e4SSanjeev Bagewadi zflg |= ZHAVELOCK; 3615afefc7e4SSanjeev Bagewadi rw_enter(&sdzp->z_name_lock, RW_READER); 3616afefc7e4SSanjeev Bagewadi } 3617afefc7e4SSanjeev Bagewadi 3618fa9e4066Sahrens if (cmp < 0) { 3619da6c28aaSamw serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp, 3620da6c28aaSamw ZEXISTS | zflg, NULL, NULL); 3621da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3622da6c28aaSamw tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL); 3623fa9e4066Sahrens } else { 3624da6c28aaSamw terr = zfs_dirent_lock(&tdl, 3625da6c28aaSamw tdzp, tnm, &tzp, zflg, NULL, NULL); 3626da6c28aaSamw serr = zfs_dirent_lock(&sdl, 3627da6c28aaSamw sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg, 3628da6c28aaSamw NULL, NULL); 3629fa9e4066Sahrens } 3630fa9e4066Sahrens 3631fa9e4066Sahrens if (serr) { 3632fa9e4066Sahrens /* 3633fa9e4066Sahrens * Source entry invalid or not there. 3634fa9e4066Sahrens */ 3635fa9e4066Sahrens if (!terr) { 3636fa9e4066Sahrens zfs_dirent_unlock(tdl); 3637fa9e4066Sahrens if (tzp) 3638fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3639fa9e4066Sahrens } 3640afefc7e4SSanjeev Bagewadi 3641afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3642afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3643afefc7e4SSanjeev Bagewadi 3644fa9e4066Sahrens if (strcmp(snm, "..") == 0) 3645be6fd75aSMatthew Ahrens serr = SET_ERROR(EINVAL); 3646fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3647fa9e4066Sahrens return (serr); 3648fa9e4066Sahrens } 3649fa9e4066Sahrens if (terr) { 3650fa9e4066Sahrens zfs_dirent_unlock(sdl); 3651fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3652afefc7e4SSanjeev Bagewadi 3653afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3654afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3655afefc7e4SSanjeev Bagewadi 3656fa9e4066Sahrens if (strcmp(tnm, "..") == 0) 3657be6fd75aSMatthew Ahrens terr = SET_ERROR(EINVAL); 3658fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3659fa9e4066Sahrens return (terr); 3660fa9e4066Sahrens } 3661fa9e4066Sahrens 3662fa9e4066Sahrens /* 3663fa9e4066Sahrens * Must have write access at the source to remove the old entry 3664fa9e4066Sahrens * and write access at the target to create the new entry. 3665fa9e4066Sahrens * Note that if target and source are the same, this can be 3666fa9e4066Sahrens * done in a single check. 3667fa9e4066Sahrens */ 3668fa9e4066Sahrens 3669fa9e4066Sahrens if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)) 3670fa9e4066Sahrens goto out; 3671fa9e4066Sahrens 3672fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3673fa9e4066Sahrens /* 3674fa9e4066Sahrens * Check to make sure rename is valid. 3675fa9e4066Sahrens * Can't do a move like this: /usr/a/b to /usr/a/b/c/d 3676fa9e4066Sahrens */ 3677fa9e4066Sahrens if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl)) 3678fa9e4066Sahrens goto out; 3679fa9e4066Sahrens } 3680fa9e4066Sahrens 3681fa9e4066Sahrens /* 3682fa9e4066Sahrens * Does target exist? 3683fa9e4066Sahrens */ 3684fa9e4066Sahrens if (tzp) { 3685fa9e4066Sahrens /* 3686fa9e4066Sahrens * Source and target must be the same type. 3687fa9e4066Sahrens */ 3688fa9e4066Sahrens if (ZTOV(szp)->v_type == VDIR) { 3689fa9e4066Sahrens if (ZTOV(tzp)->v_type != VDIR) { 3690be6fd75aSMatthew Ahrens error = SET_ERROR(ENOTDIR); 3691fa9e4066Sahrens goto out; 3692fa9e4066Sahrens } 3693fa9e4066Sahrens } else { 3694fa9e4066Sahrens if (ZTOV(tzp)->v_type == VDIR) { 3695be6fd75aSMatthew Ahrens error = SET_ERROR(EISDIR); 3696fa9e4066Sahrens goto out; 3697fa9e4066Sahrens } 3698fa9e4066Sahrens } 3699fa9e4066Sahrens /* 3700fa9e4066Sahrens * POSIX dictates that when the source and target 3701fa9e4066Sahrens * entries refer to the same file object, rename 3702fa9e4066Sahrens * must do nothing and exit without error. 3703fa9e4066Sahrens */ 3704fa9e4066Sahrens if (szp->z_id == tzp->z_id) { 3705fa9e4066Sahrens error = 0; 3706fa9e4066Sahrens goto out; 3707fa9e4066Sahrens } 3708fa9e4066Sahrens } 3709fa9e4066Sahrens 3710da6c28aaSamw vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); 3711fa9e4066Sahrens if (tzp) 3712da6c28aaSamw vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct); 3713df2381bfSpraks 3714df2381bfSpraks /* 3715df2381bfSpraks * notify the target directory if it is not the same 3716df2381bfSpraks * as source directory. 3717df2381bfSpraks */ 3718df2381bfSpraks if (tdvp != sdvp) { 3719da6c28aaSamw vnevent_rename_dest_dir(tdvp, ct); 3720df2381bfSpraks } 3721fa9e4066Sahrens 3722fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 37230a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 37240a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE); 3725ea8dc4b6Seschrock dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm); 3726ea8dc4b6Seschrock dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm); 37270a586ceaSMark Shellenbaum if (sdzp != tdzp) { 37280a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE); 37290a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tdzp); 37300a586ceaSMark Shellenbaum } 37310a586ceaSMark Shellenbaum if (tzp) { 37320a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE); 37330a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, tzp); 37340a586ceaSMark Shellenbaum } 37350a586ceaSMark Shellenbaum 37360a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 3737893a6d32Sahrens dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL); 373869962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3739fa9e4066Sahrens if (error) { 3740fa9e4066Sahrens if (zl != NULL) 3741fa9e4066Sahrens zfs_rename_unlock(&zl); 3742fa9e4066Sahrens zfs_dirent_unlock(sdl); 3743fa9e4066Sahrens zfs_dirent_unlock(tdl); 3744afefc7e4SSanjeev Bagewadi 3745afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3746afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3747afefc7e4SSanjeev Bagewadi 3748fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3749fa9e4066Sahrens if (tzp) 3750fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 37511209a471SNeil Perrin if (error == ERESTART) { 375269962b56SMatthew Ahrens waited = B_TRUE; 37538a2f1b91Sahrens dmu_tx_wait(tx); 37548a2f1b91Sahrens dmu_tx_abort(tx); 3755fa9e4066Sahrens goto top; 3756fa9e4066Sahrens } 37578a2f1b91Sahrens dmu_tx_abort(tx); 3758fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3759fa9e4066Sahrens return (error); 3760fa9e4066Sahrens } 3761fa9e4066Sahrens 3762fa9e4066Sahrens if (tzp) /* Attempt to remove the existing target */ 3763da6c28aaSamw error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); 3764fa9e4066Sahrens 3765fa9e4066Sahrens if (error == 0) { 3766fa9e4066Sahrens error = zfs_link_create(tdl, szp, tx, ZRENAMING); 3767fa9e4066Sahrens if (error == 0) { 37680a586ceaSMark Shellenbaum szp->z_pflags |= ZFS_AV_MODIFIED; 37690a586ceaSMark Shellenbaum 37700a586ceaSMark Shellenbaum error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs), 37710a586ceaSMark Shellenbaum (void *)&szp->z_pflags, sizeof (uint64_t), tx); 3772fb09f5aaSMadhav Suresh ASSERT0(error); 3773da6c28aaSamw 3774fa9e4066Sahrens error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); 37756ed5e6abSSam Falkner if (error == 0) { 37766ed5e6abSSam Falkner zfs_log_rename(zilog, tx, TX_RENAME | 377791de656bSNeil Perrin (flags & FIGNORECASE ? TX_CI : 0), sdzp, 3778f8e3fee2SAndreas Jaekel sdl->dl_name, tdzp, tdl->dl_name, szp, tzp); 3779da6c28aaSamw 37806ed5e6abSSam Falkner /* 37816ed5e6abSSam Falkner * Update path information for the target vnode 37826ed5e6abSSam Falkner */ 37836ed5e6abSSam Falkner vn_renamepath(tdvp, ZTOV(szp), tnm, 37846ed5e6abSSam Falkner strlen(tnm)); 37856ed5e6abSSam Falkner } else { 37866ed5e6abSSam Falkner /* 37876ed5e6abSSam Falkner * At this point, we have successfully created 37886ed5e6abSSam Falkner * the target name, but have failed to remove 37896ed5e6abSSam Falkner * the source name. Since the create was done 37906ed5e6abSSam Falkner * with the ZRENAMING flag, there are 37916ed5e6abSSam Falkner * complications; for one, the link count is 37926ed5e6abSSam Falkner * wrong. The easiest way to deal with this 37936ed5e6abSSam Falkner * is to remove the newly created target, and 37946ed5e6abSSam Falkner * return the original error. This must 37956ed5e6abSSam Falkner * succeed; fortunately, it is very unlikely to 37966ed5e6abSSam Falkner * fail, since we just created it. 37976ed5e6abSSam Falkner */ 37986ed5e6abSSam Falkner VERIFY3U(zfs_link_destroy(tdl, szp, tx, 37996ed5e6abSSam Falkner ZRENAMING, NULL), ==, 0); 38006ed5e6abSSam Falkner } 3801fa9e4066Sahrens } 3802fa9e4066Sahrens } 3803fa9e4066Sahrens 3804fa9e4066Sahrens dmu_tx_commit(tx); 3805fa9e4066Sahrens out: 3806fa9e4066Sahrens if (zl != NULL) 3807fa9e4066Sahrens zfs_rename_unlock(&zl); 3808fa9e4066Sahrens 3809fa9e4066Sahrens zfs_dirent_unlock(sdl); 3810fa9e4066Sahrens zfs_dirent_unlock(tdl); 3811fa9e4066Sahrens 3812afefc7e4SSanjeev Bagewadi if (sdzp == tdzp) 3813afefc7e4SSanjeev Bagewadi rw_exit(&sdzp->z_name_lock); 3814afefc7e4SSanjeev Bagewadi 3815afefc7e4SSanjeev Bagewadi 3816fa9e4066Sahrens VN_RELE(ZTOV(szp)); 3817fa9e4066Sahrens if (tzp) 3818fa9e4066Sahrens VN_RELE(ZTOV(tzp)); 3819fa9e4066Sahrens 382055da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 38215002558fSNeil Perrin zil_commit(zilog, 0); 382255da60b9SMark J Musante 3823fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3824fa9e4066Sahrens return (error); 3825fa9e4066Sahrens } 3826fa9e4066Sahrens 3827fa9e4066Sahrens /* 3828fa9e4066Sahrens * Insert the indicated symbolic reference entry into the directory. 3829fa9e4066Sahrens * 3830fa9e4066Sahrens * IN: dvp - Directory to contain new symbolic link. 3831fa9e4066Sahrens * link - Name for new symlink entry. 3832fa9e4066Sahrens * vap - Attributes of new entry. 3833fa9e4066Sahrens * cr - credentials of caller. 3834da6c28aaSamw * ct - caller context 3835da6c28aaSamw * flags - case flags 3836fa9e4066Sahrens * 3837f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3838fa9e4066Sahrens * 3839fa9e4066Sahrens * Timestamps: 3840fa9e4066Sahrens * dvp - ctime|mtime updated 3841fa9e4066Sahrens */ 3842da6c28aaSamw /*ARGSUSED*/ 3843fa9e4066Sahrens static int 3844da6c28aaSamw zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr, 3845da6c28aaSamw caller_context_t *ct, int flags) 3846fa9e4066Sahrens { 3847fa9e4066Sahrens znode_t *zp, *dzp = VTOZ(dvp); 3848fa9e4066Sahrens zfs_dirlock_t *dl; 3849fa9e4066Sahrens dmu_tx_t *tx; 3850fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 3851f18faf3fSek110237 zilog_t *zilog; 38520a586ceaSMark Shellenbaum uint64_t len = strlen(link); 3853fa9e4066Sahrens int error; 3854da6c28aaSamw int zflg = ZNEW; 385589459e17SMark Shellenbaum zfs_acl_ids_t acl_ids; 385689459e17SMark Shellenbaum boolean_t fuid_dirtied; 38570a586ceaSMark Shellenbaum uint64_t txtype = TX_SYMLINK; 385869962b56SMatthew Ahrens boolean_t waited = B_FALSE; 3859fa9e4066Sahrens 3860fa9e4066Sahrens ASSERT(vap->va_type == VLNK); 3861fa9e4066Sahrens 38623cb34c60Sahrens ZFS_ENTER(zfsvfs); 38633cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 3864f18faf3fSek110237 zilog = zfsvfs->z_log; 3865da6c28aaSamw 3866de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, strlen(name), 3867da6c28aaSamw NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 3868da6c28aaSamw ZFS_EXIT(zfsvfs); 3869be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 3870da6c28aaSamw } 3871da6c28aaSamw if (flags & FIGNORECASE) 3872da6c28aaSamw zflg |= ZCILOOK; 3873fa9e4066Sahrens 3874fa9e4066Sahrens if (len > MAXPATHLEN) { 3875fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3876be6fd75aSMatthew Ahrens return (SET_ERROR(ENAMETOOLONG)); 3877fa9e4066Sahrens } 3878fa9e4066Sahrens 3879c8c24165SMark Shellenbaum if ((error = zfs_acl_ids_create(dzp, 0, 3880c8c24165SMark Shellenbaum vap, cr, NULL, &acl_ids)) != 0) { 3881c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3882c8c24165SMark Shellenbaum return (error); 3883c8c24165SMark Shellenbaum } 3884c8c24165SMark Shellenbaum top: 3885fa9e4066Sahrens /* 3886fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 3887fa9e4066Sahrens */ 3888da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL); 3889da6c28aaSamw if (error) { 3890c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3891fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3892fa9e4066Sahrens return (error); 3893fa9e4066Sahrens } 3894fa9e4066Sahrens 3895c8c24165SMark Shellenbaum if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 3896c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 38978e303ae0SMark Shellenbaum zfs_dirent_unlock(dl); 3898c8c24165SMark Shellenbaum ZFS_EXIT(zfsvfs); 3899c8c24165SMark Shellenbaum return (error); 3900c8c24165SMark Shellenbaum } 3901c8c24165SMark Shellenbaum 390214843421SMatthew Ahrens if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) { 390314843421SMatthew Ahrens zfs_acl_ids_free(&acl_ids); 390414843421SMatthew Ahrens zfs_dirent_unlock(dl); 390514843421SMatthew Ahrens ZFS_EXIT(zfsvfs); 3906be6fd75aSMatthew Ahrens return (SET_ERROR(EDQUOT)); 390714843421SMatthew Ahrens } 3908fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 390989459e17SMark Shellenbaum fuid_dirtied = zfsvfs->z_fuid_dirty; 3910fa9e4066Sahrens dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); 3911ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 39120a586ceaSMark Shellenbaum dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + 39130a586ceaSMark Shellenbaum ZFS_SA_BASE_ATTR_SIZE + len); 39140a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE); 39150a586ceaSMark Shellenbaum if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) { 39160a586ceaSMark Shellenbaum dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, 39170a586ceaSMark Shellenbaum acl_ids.z_aclp->z_acl_bytes); 39180a586ceaSMark Shellenbaum } 391914843421SMatthew Ahrens if (fuid_dirtied) 392014843421SMatthew Ahrens zfs_fuid_txhold(zfsvfs, tx); 392169962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 3922fa9e4066Sahrens if (error) { 3923fa9e4066Sahrens zfs_dirent_unlock(dl); 39241209a471SNeil Perrin if (error == ERESTART) { 392569962b56SMatthew Ahrens waited = B_TRUE; 39268a2f1b91Sahrens dmu_tx_wait(tx); 39278a2f1b91Sahrens dmu_tx_abort(tx); 3928fa9e4066Sahrens goto top; 3929fa9e4066Sahrens } 3930c8c24165SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 39318a2f1b91Sahrens dmu_tx_abort(tx); 3932fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3933fa9e4066Sahrens return (error); 3934fa9e4066Sahrens } 3935fa9e4066Sahrens 3936fa9e4066Sahrens /* 3937fa9e4066Sahrens * Create a new object for the symlink. 39380a586ceaSMark Shellenbaum * for version 4 ZPL datsets the symlink will be an SA attribute 3939fa9e4066Sahrens */ 39400a586ceaSMark Shellenbaum zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids); 394189459e17SMark Shellenbaum 394289459e17SMark Shellenbaum if (fuid_dirtied) 394389459e17SMark Shellenbaum zfs_fuid_sync(zfsvfs, tx); 3944fa9e4066Sahrens 39451412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 39460a586ceaSMark Shellenbaum if (zp->z_is_sa) 39470a586ceaSMark Shellenbaum error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs), 39480a586ceaSMark Shellenbaum link, len, tx); 39490a586ceaSMark Shellenbaum else 39500a586ceaSMark Shellenbaum zfs_sa_symlink(zp, link, len, tx); 39511412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 3952fa9e4066Sahrens 39530a586ceaSMark Shellenbaum zp->z_size = len; 39540a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs), 39550a586ceaSMark Shellenbaum &zp->z_size, sizeof (zp->z_size), tx); 3956fa9e4066Sahrens /* 3957fa9e4066Sahrens * Insert the new object into the directory. 3958fa9e4066Sahrens */ 3959fa9e4066Sahrens (void) zfs_link_create(dl, zp, tx, ZNEW); 39600a586ceaSMark Shellenbaum 3961da6c28aaSamw if (flags & FIGNORECASE) 3962da6c28aaSamw txtype |= TX_CI; 3963da6c28aaSamw zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link); 396489459e17SMark Shellenbaum 396589459e17SMark Shellenbaum zfs_acl_ids_free(&acl_ids); 3966fa9e4066Sahrens 3967fa9e4066Sahrens dmu_tx_commit(tx); 3968fa9e4066Sahrens 3969fa9e4066Sahrens zfs_dirent_unlock(dl); 3970fa9e4066Sahrens 3971fa9e4066Sahrens VN_RELE(ZTOV(zp)); 3972fa9e4066Sahrens 397355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 39745002558fSNeil Perrin zil_commit(zilog, 0); 397555da60b9SMark J Musante 3976fa9e4066Sahrens ZFS_EXIT(zfsvfs); 3977fa9e4066Sahrens return (error); 3978fa9e4066Sahrens } 3979fa9e4066Sahrens 3980fa9e4066Sahrens /* 3981fa9e4066Sahrens * Return, in the buffer contained in the provided uio structure, 3982fa9e4066Sahrens * the symbolic path referred to by vp. 3983fa9e4066Sahrens * 3984fa9e4066Sahrens * IN: vp - vnode of symbolic link. 3985f7170741SWill Andrews * uio - structure to contain the link path. 3986fa9e4066Sahrens * cr - credentials of caller. 3987da6c28aaSamw * ct - caller context 3988fa9e4066Sahrens * 3989f7170741SWill Andrews * OUT: uio - structure containing the link path. 3990fa9e4066Sahrens * 3991f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 3992fa9e4066Sahrens * 3993fa9e4066Sahrens * Timestamps: 3994fa9e4066Sahrens * vp - atime updated 3995fa9e4066Sahrens */ 3996fa9e4066Sahrens /* ARGSUSED */ 3997fa9e4066Sahrens static int 3998da6c28aaSamw zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct) 3999fa9e4066Sahrens { 4000fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4001fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4002fa9e4066Sahrens int error; 4003fa9e4066Sahrens 40043cb34c60Sahrens ZFS_ENTER(zfsvfs); 40053cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4006fa9e4066Sahrens 40071412a1a2SMark Shellenbaum mutex_enter(&zp->z_lock); 40080a586ceaSMark Shellenbaum if (zp->z_is_sa) 40090a586ceaSMark Shellenbaum error = sa_lookup_uio(zp->z_sa_hdl, 40100a586ceaSMark Shellenbaum SA_ZPL_SYMLINK(zfsvfs), uio); 40110a586ceaSMark Shellenbaum else 40120a586ceaSMark Shellenbaum error = zfs_sa_readlink(zp, uio); 40131412a1a2SMark Shellenbaum mutex_exit(&zp->z_lock); 4014fa9e4066Sahrens 4015fa9e4066Sahrens ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 40160a586ceaSMark Shellenbaum 4017fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4018fa9e4066Sahrens return (error); 4019fa9e4066Sahrens } 4020fa9e4066Sahrens 4021fa9e4066Sahrens /* 4022fa9e4066Sahrens * Insert a new entry into directory tdvp referencing svp. 4023fa9e4066Sahrens * 4024fa9e4066Sahrens * IN: tdvp - Directory to contain new entry. 4025fa9e4066Sahrens * svp - vnode of new entry. 4026fa9e4066Sahrens * name - name of new entry. 4027fa9e4066Sahrens * cr - credentials of caller. 4028da6c28aaSamw * ct - caller context 4029fa9e4066Sahrens * 4030f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4031fa9e4066Sahrens * 4032fa9e4066Sahrens * Timestamps: 4033fa9e4066Sahrens * tdvp - ctime|mtime updated 4034fa9e4066Sahrens * svp - ctime updated 4035fa9e4066Sahrens */ 4036fa9e4066Sahrens /* ARGSUSED */ 4037fa9e4066Sahrens static int 4038da6c28aaSamw zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr, 4039da6c28aaSamw caller_context_t *ct, int flags) 4040fa9e4066Sahrens { 4041fa9e4066Sahrens znode_t *dzp = VTOZ(tdvp); 4042fa9e4066Sahrens znode_t *tzp, *szp; 4043fa9e4066Sahrens zfsvfs_t *zfsvfs = dzp->z_zfsvfs; 4044f18faf3fSek110237 zilog_t *zilog; 4045fa9e4066Sahrens zfs_dirlock_t *dl; 4046fa9e4066Sahrens dmu_tx_t *tx; 4047fa9e4066Sahrens vnode_t *realvp; 4048fa9e4066Sahrens int error; 4049da6c28aaSamw int zf = ZNEW; 4050d39ee142SMark Shellenbaum uint64_t parent; 4051f1696b23SMark Shellenbaum uid_t owner; 405269962b56SMatthew Ahrens boolean_t waited = B_FALSE; 4053fa9e4066Sahrens 4054fa9e4066Sahrens ASSERT(tdvp->v_type == VDIR); 4055fa9e4066Sahrens 40563cb34c60Sahrens ZFS_ENTER(zfsvfs); 40573cb34c60Sahrens ZFS_VERIFY_ZP(dzp); 4058f18faf3fSek110237 zilog = zfsvfs->z_log; 4059fa9e4066Sahrens 4060da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 4061fa9e4066Sahrens svp = realvp; 4062fa9e4066Sahrens 4063d39ee142SMark Shellenbaum /* 4064d39ee142SMark Shellenbaum * POSIX dictates that we return EPERM here. 4065d39ee142SMark Shellenbaum * Better choices include ENOTSUP or EISDIR. 4066d39ee142SMark Shellenbaum */ 4067d39ee142SMark Shellenbaum if (svp->v_type == VDIR) { 4068d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4069be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4070d39ee142SMark Shellenbaum } 4071d39ee142SMark Shellenbaum 407218e64978SMarcel Telka szp = VTOZ(svp); 407318e64978SMarcel Telka ZFS_VERIFY_ZP(szp); 407418e64978SMarcel Telka 407518e64978SMarcel Telka /* 407618e64978SMarcel Telka * We check z_zfsvfs rather than v_vfsp here, because snapshots and the 407718e64978SMarcel Telka * ctldir appear to have the same v_vfsp. 407818e64978SMarcel Telka */ 407918e64978SMarcel Telka if (szp->z_zfsvfs != zfsvfs || zfsctl_is_node(svp)) { 4080fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4081be6fd75aSMatthew Ahrens return (SET_ERROR(EXDEV)); 4082fa9e4066Sahrens } 4083d39ee142SMark Shellenbaum 4084d39ee142SMark Shellenbaum /* Prevent links to .zfs/shares files */ 4085d39ee142SMark Shellenbaum 4086d39ee142SMark Shellenbaum if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs), 4087d39ee142SMark Shellenbaum &parent, sizeof (uint64_t))) != 0) { 4088d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4089d39ee142SMark Shellenbaum return (error); 4090d39ee142SMark Shellenbaum } 4091d39ee142SMark Shellenbaum if (parent == zfsvfs->z_shares_dir) { 4092d39ee142SMark Shellenbaum ZFS_EXIT(zfsvfs); 4093be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4094d39ee142SMark Shellenbaum } 4095d39ee142SMark Shellenbaum 4096de8267e0Stimh if (zfsvfs->z_utf8 && u8_validate(name, 4097da6c28aaSamw strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { 4098da6c28aaSamw ZFS_EXIT(zfsvfs); 4099be6fd75aSMatthew Ahrens return (SET_ERROR(EILSEQ)); 4100da6c28aaSamw } 4101da6c28aaSamw if (flags & FIGNORECASE) 4102da6c28aaSamw zf |= ZCILOOK; 4103da6c28aaSamw 4104fa9e4066Sahrens /* 4105fa9e4066Sahrens * We do not support links between attributes and non-attributes 4106fa9e4066Sahrens * because of the potential security risk of creating links 4107fa9e4066Sahrens * into "normal" file space in order to circumvent restrictions 4108fa9e4066Sahrens * imposed in attribute space. 4109fa9e4066Sahrens */ 41100a586ceaSMark Shellenbaum if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) { 4111fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4112be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4113fa9e4066Sahrens } 4114fa9e4066Sahrens 4115fa9e4066Sahrens 4116f1696b23SMark Shellenbaum owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER); 4117f1696b23SMark Shellenbaum if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) { 4118fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4119be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 4120fa9e4066Sahrens } 4121fa9e4066Sahrens 4122da6c28aaSamw if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) { 4123fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4124fa9e4066Sahrens return (error); 4125fa9e4066Sahrens } 4126fa9e4066Sahrens 4127d39ee142SMark Shellenbaum top: 4128fa9e4066Sahrens /* 4129fa9e4066Sahrens * Attempt to lock directory; fail if entry already exists. 4130fa9e4066Sahrens */ 4131da6c28aaSamw error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL); 4132da6c28aaSamw if (error) { 4133fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4134fa9e4066Sahrens return (error); 4135fa9e4066Sahrens } 4136fa9e4066Sahrens 4137fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 41380a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE); 4139ea8dc4b6Seschrock dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name); 41400a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, szp); 41410a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, dzp); 414269962b56SMatthew Ahrens error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT); 4143fa9e4066Sahrens if (error) { 4144fa9e4066Sahrens zfs_dirent_unlock(dl); 41451209a471SNeil Perrin if (error == ERESTART) { 414669962b56SMatthew Ahrens waited = B_TRUE; 41478a2f1b91Sahrens dmu_tx_wait(tx); 41488a2f1b91Sahrens dmu_tx_abort(tx); 4149fa9e4066Sahrens goto top; 4150fa9e4066Sahrens } 41518a2f1b91Sahrens dmu_tx_abort(tx); 4152fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4153fa9e4066Sahrens return (error); 4154fa9e4066Sahrens } 4155fa9e4066Sahrens 4156fa9e4066Sahrens error = zfs_link_create(dl, szp, tx, 0); 4157fa9e4066Sahrens 4158da6c28aaSamw if (error == 0) { 4159da6c28aaSamw uint64_t txtype = TX_LINK; 4160da6c28aaSamw if (flags & FIGNORECASE) 4161da6c28aaSamw txtype |= TX_CI; 4162da6c28aaSamw zfs_log_link(zilog, tx, txtype, dzp, szp, name); 4163da6c28aaSamw } 4164fa9e4066Sahrens 4165fa9e4066Sahrens dmu_tx_commit(tx); 4166fa9e4066Sahrens 4167fa9e4066Sahrens zfs_dirent_unlock(dl); 4168fa9e4066Sahrens 4169df2381bfSpraks if (error == 0) { 4170da6c28aaSamw vnevent_link(svp, ct); 4171df2381bfSpraks } 4172df2381bfSpraks 417355da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 41745002558fSNeil Perrin zil_commit(zilog, 0); 417555da60b9SMark J Musante 4176fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4177fa9e4066Sahrens return (error); 4178fa9e4066Sahrens } 4179fa9e4066Sahrens 4180fa9e4066Sahrens /* 4181fa9e4066Sahrens * zfs_null_putapage() is used when the file system has been force 4182fa9e4066Sahrens * unmounted. It just drops the pages. 4183fa9e4066Sahrens */ 4184fa9e4066Sahrens /* ARGSUSED */ 4185fa9e4066Sahrens static int 4186fa9e4066Sahrens zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4187fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4188fa9e4066Sahrens { 4189fa9e4066Sahrens pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); 4190fa9e4066Sahrens return (0); 4191fa9e4066Sahrens } 4192fa9e4066Sahrens 419344eda4d7Smaybee /* 419444eda4d7Smaybee * Push a page out to disk, klustering if possible. 419544eda4d7Smaybee * 419644eda4d7Smaybee * IN: vp - file to push page to. 419744eda4d7Smaybee * pp - page to push. 419844eda4d7Smaybee * flags - additional flags. 419944eda4d7Smaybee * cr - credentials of caller. 420044eda4d7Smaybee * 420144eda4d7Smaybee * OUT: offp - start of range pushed. 420244eda4d7Smaybee * lenp - len of range pushed. 420344eda4d7Smaybee * 4204f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 420544eda4d7Smaybee * 420644eda4d7Smaybee * NOTE: callers must have locked the page to be pushed. On 420744eda4d7Smaybee * exit, the page (and all other pages in the kluster) must be 420844eda4d7Smaybee * unlocked. 420944eda4d7Smaybee */ 4210fa9e4066Sahrens /* ARGSUSED */ 4211fa9e4066Sahrens static int 4212fa9e4066Sahrens zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, 4213fa9e4066Sahrens size_t *lenp, int flags, cred_t *cr) 4214fa9e4066Sahrens { 4215fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4216fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4217fa9e4066Sahrens dmu_tx_t *tx; 421844eda4d7Smaybee u_offset_t off, koff; 421944eda4d7Smaybee size_t len, klen; 4220fa9e4066Sahrens int err; 4221fa9e4066Sahrens 4222fa9e4066Sahrens off = pp->p_offset; 422344eda4d7Smaybee len = PAGESIZE; 422444eda4d7Smaybee /* 422544eda4d7Smaybee * If our blocksize is bigger than the page size, try to kluster 42261209a471SNeil Perrin * multiple pages so that we write a full block (thus avoiding 422744eda4d7Smaybee * a read-modify-write). 422844eda4d7Smaybee */ 42290a586ceaSMark Shellenbaum if (off < zp->z_size && zp->z_blksz > PAGESIZE) { 423044eda4d7Smaybee klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); 4231ac05c741SMark Maybee koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; 42320a586ceaSMark Shellenbaum ASSERT(koff <= zp->z_size); 42330a586ceaSMark Shellenbaum if (koff + klen > zp->z_size) 42340a586ceaSMark Shellenbaum klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); 423544eda4d7Smaybee pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); 423644eda4d7Smaybee } 423744eda4d7Smaybee ASSERT3U(btop(len), ==, btopr(len)); 4238ac05c741SMark Maybee 4239dd6ef538Smaybee /* 4240dd6ef538Smaybee * Can't push pages past end-of-file. 4241dd6ef538Smaybee */ 42420a586ceaSMark Shellenbaum if (off >= zp->z_size) { 4243f4d2e9e6Smaybee /* ignore all pages */ 424444eda4d7Smaybee err = 0; 424544eda4d7Smaybee goto out; 42460a586ceaSMark Shellenbaum } else if (off + len > zp->z_size) { 42470a586ceaSMark Shellenbaum int npages = btopr(zp->z_size - off); 424844eda4d7Smaybee page_t *trunc; 424944eda4d7Smaybee 425044eda4d7Smaybee page_list_break(&pp, &trunc, npages); 4251f4d2e9e6Smaybee /* ignore pages past end of file */ 425244eda4d7Smaybee if (trunc) 4253f4d2e9e6Smaybee pvn_write_done(trunc, flags); 42540a586ceaSMark Shellenbaum len = zp->z_size - off; 4255dd6ef538Smaybee } 425614843421SMatthew Ahrens 42570a586ceaSMark Shellenbaum if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || 42580a586ceaSMark Shellenbaum zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { 4259be6fd75aSMatthew Ahrens err = SET_ERROR(EDQUOT); 426014843421SMatthew Ahrens goto out; 426114843421SMatthew Ahrens } 4262fa9e4066Sahrens tx = dmu_tx_create(zfsvfs->z_os); 4263fa9e4066Sahrens dmu_tx_hold_write(tx, zp->z_id, off, len); 42640a586ceaSMark Shellenbaum 42650a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 42660a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4267e722410cSMatthew Ahrens err = dmu_tx_assign(tx, TXG_WAIT); 4268fa9e4066Sahrens if (err != 0) { 42698a2f1b91Sahrens dmu_tx_abort(tx); 4270fa9e4066Sahrens goto out; 4271fa9e4066Sahrens } 4272fa9e4066Sahrens 427344eda4d7Smaybee if (zp->z_blksz <= PAGESIZE) { 42740fab61baSJonathan W Adams caddr_t va = zfs_map_page(pp, S_READ); 427544eda4d7Smaybee ASSERT3U(len, <=, PAGESIZE); 4276fa9e4066Sahrens dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); 42770fab61baSJonathan W Adams zfs_unmap_page(pp, va); 427844eda4d7Smaybee } else { 427944eda4d7Smaybee err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); 428044eda4d7Smaybee } 4281fa9e4066Sahrens 428244eda4d7Smaybee if (err == 0) { 42830a586ceaSMark Shellenbaum uint64_t mtime[2], ctime[2]; 4284db9986c7SMark Shellenbaum sa_bulk_attr_t bulk[3]; 42850a586ceaSMark Shellenbaum int count = 0; 42860a586ceaSMark Shellenbaum 42870a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 42880a586ceaSMark Shellenbaum &mtime, 16); 42890a586ceaSMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 42900a586ceaSMark Shellenbaum &ctime, 16); 4291db9986c7SMark Shellenbaum SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 4292db9986c7SMark Shellenbaum &zp->z_pflags, 8); 42930a586ceaSMark Shellenbaum zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, 42940a586ceaSMark Shellenbaum B_TRUE); 4295ac05c741SMark Maybee zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); 429644eda4d7Smaybee } 429768857716SLin Ling dmu_tx_commit(tx); 4298fa9e4066Sahrens 429944eda4d7Smaybee out: 4300f4d2e9e6Smaybee pvn_write_done(pp, (err ? B_ERROR : 0) | flags); 4301fa9e4066Sahrens if (offp) 4302fa9e4066Sahrens *offp = off; 4303fa9e4066Sahrens if (lenp) 4304fa9e4066Sahrens *lenp = len; 4305fa9e4066Sahrens 4306fa9e4066Sahrens return (err); 4307fa9e4066Sahrens } 4308fa9e4066Sahrens 4309fa9e4066Sahrens /* 4310fa9e4066Sahrens * Copy the portion of the file indicated from pages into the file. 4311fa9e4066Sahrens * The pages are stored in a page list attached to the files vnode. 4312fa9e4066Sahrens * 4313fa9e4066Sahrens * IN: vp - vnode of file to push page data to. 4314fa9e4066Sahrens * off - position in file to put data. 4315fa9e4066Sahrens * len - amount of data to write. 4316fa9e4066Sahrens * flags - flags to control the operation. 4317fa9e4066Sahrens * cr - credentials of caller. 4318da6c28aaSamw * ct - caller context. 4319fa9e4066Sahrens * 4320f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4321fa9e4066Sahrens * 4322fa9e4066Sahrens * Timestamps: 4323fa9e4066Sahrens * vp - ctime|mtime updated 4324fa9e4066Sahrens */ 4325da6c28aaSamw /*ARGSUSED*/ 4326fa9e4066Sahrens static int 4327da6c28aaSamw zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4328da6c28aaSamw caller_context_t *ct) 4329fa9e4066Sahrens { 4330fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4331fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4332fa9e4066Sahrens page_t *pp; 4333fa9e4066Sahrens size_t io_len; 4334fa9e4066Sahrens u_offset_t io_off; 4335ac05c741SMark Maybee uint_t blksz; 4336ac05c741SMark Maybee rl_t *rl; 4337fa9e4066Sahrens int error = 0; 4338fa9e4066Sahrens 43393cb34c60Sahrens ZFS_ENTER(zfsvfs); 43403cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4341fa9e4066Sahrens 4342fa9e4066Sahrens /* 4343c4fc6b21SGarrett D'Amore * There's nothing to do if no data is cached. 4344c4fc6b21SGarrett D'Amore */ 4345c4fc6b21SGarrett D'Amore if (!vn_has_cached_data(vp)) { 4346c4fc6b21SGarrett D'Amore ZFS_EXIT(zfsvfs); 4347c4fc6b21SGarrett D'Amore return (0); 4348c4fc6b21SGarrett D'Amore } 4349c4fc6b21SGarrett D'Amore 4350c4fc6b21SGarrett D'Amore /* 4351ac05c741SMark Maybee * Align this request to the file block size in case we kluster. 4352ac05c741SMark Maybee * XXX - this can result in pretty aggresive locking, which can 4353ac05c741SMark Maybee * impact simultanious read/write access. One option might be 4354ac05c741SMark Maybee * to break up long requests (len == 0) into block-by-block 4355ac05c741SMark Maybee * operations to get narrower locking. 4356fa9e4066Sahrens */ 4357ac05c741SMark Maybee blksz = zp->z_blksz; 4358ac05c741SMark Maybee if (ISP2(blksz)) 4359ac05c741SMark Maybee io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); 4360ac05c741SMark Maybee else 4361ac05c741SMark Maybee io_off = 0; 4362ac05c741SMark Maybee if (len > 0 && ISP2(blksz)) 43635a6f5619SMark Maybee io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); 4364ac05c741SMark Maybee else 4365ac05c741SMark Maybee io_len = 0; 4366ac05c741SMark Maybee 4367ac05c741SMark Maybee if (io_len == 0) { 4368ac05c741SMark Maybee /* 4369ac05c741SMark Maybee * Search the entire vp list for pages >= io_off. 4370ac05c741SMark Maybee */ 4371ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); 4372ac05c741SMark Maybee error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); 4373fe9cf88cSperrin goto out; 4374fa9e4066Sahrens } 4375ac05c741SMark Maybee rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); 4376fa9e4066Sahrens 43770a586ceaSMark Shellenbaum if (off > zp->z_size) { 4378fa9e4066Sahrens /* past end of file */ 4379ac05c741SMark Maybee zfs_range_unlock(rl); 4380fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4381fa9e4066Sahrens return (0); 4382fa9e4066Sahrens } 4383fa9e4066Sahrens 43840a586ceaSMark Shellenbaum len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); 4385fa9e4066Sahrens 4386ac05c741SMark Maybee for (off = io_off; io_off < off + len; io_off += io_len) { 4387fa9e4066Sahrens if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 4388fa9e4066Sahrens pp = page_lookup(vp, io_off, 4389ecb72030Sperrin (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); 4390fa9e4066Sahrens } else { 4391fa9e4066Sahrens pp = page_lookup_nowait(vp, io_off, 4392fa9e4066Sahrens (flags & B_FREE) ? SE_EXCL : SE_SHARED); 4393fa9e4066Sahrens } 4394fa9e4066Sahrens 4395fa9e4066Sahrens if (pp != NULL && pvn_getdirty(pp, flags)) { 4396fa9e4066Sahrens int err; 4397fa9e4066Sahrens 4398fa9e4066Sahrens /* 4399fa9e4066Sahrens * Found a dirty page to push 4400fa9e4066Sahrens */ 4401104e2ed7Sperrin err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); 4402104e2ed7Sperrin if (err) 4403fa9e4066Sahrens error = err; 4404fa9e4066Sahrens } else { 4405fa9e4066Sahrens io_len = PAGESIZE; 4406fa9e4066Sahrens } 4407fa9e4066Sahrens } 4408fe9cf88cSperrin out: 4409ac05c741SMark Maybee zfs_range_unlock(rl); 441055da60b9SMark J Musante if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 44115002558fSNeil Perrin zil_commit(zfsvfs->z_log, zp->z_id); 4412fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4413fa9e4066Sahrens return (error); 4414fa9e4066Sahrens } 4415fa9e4066Sahrens 4416da6c28aaSamw /*ARGSUSED*/ 4417fa9e4066Sahrens void 4418da6c28aaSamw zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4419fa9e4066Sahrens { 4420fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4421fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4422fa9e4066Sahrens int error; 4423fa9e4066Sahrens 4424f18faf3fSek110237 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); 44250a586ceaSMark Shellenbaum if (zp->z_sa_hdl == NULL) { 44264ccbb6e7Sahrens /* 4427874395d5Smaybee * The fs has been unmounted, or we did a 4428874395d5Smaybee * suspend/resume and this file no longer exists. 44294ccbb6e7Sahrens */ 4430fa9e4066Sahrens if (vn_has_cached_data(vp)) { 4431fa9e4066Sahrens (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage, 4432fa9e4066Sahrens B_INVAL, cr); 4433fa9e4066Sahrens } 4434fa9e4066Sahrens 4435ea8dc4b6Seschrock mutex_enter(&zp->z_lock); 4436cd2adeceSChris Kirby mutex_enter(&vp->v_lock); 4437cd2adeceSChris Kirby ASSERT(vp->v_count == 1); 4438cd2adeceSChris Kirby vp->v_count = 0; 4439cd2adeceSChris Kirby mutex_exit(&vp->v_lock); 4440ea8dc4b6Seschrock mutex_exit(&zp->z_lock); 4441f18faf3fSek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4442874395d5Smaybee zfs_znode_free(zp); 4443fa9e4066Sahrens return; 4444fa9e4066Sahrens } 4445fa9e4066Sahrens 4446fa9e4066Sahrens /* 4447fa9e4066Sahrens * Attempt to push any data in the page cache. If this fails 4448fa9e4066Sahrens * we will get kicked out later in zfs_zinactive(). 4449fa9e4066Sahrens */ 44508afd4dd6Sperrin if (vn_has_cached_data(vp)) { 44518afd4dd6Sperrin (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC, 44528afd4dd6Sperrin cr); 44538afd4dd6Sperrin } 4454fa9e4066Sahrens 4455893a6d32Sahrens if (zp->z_atime_dirty && zp->z_unlinked == 0) { 4456fa9e4066Sahrens dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); 4457fa9e4066Sahrens 44580a586ceaSMark Shellenbaum dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 44590a586ceaSMark Shellenbaum zfs_sa_upgrade_txholds(tx, zp); 4460fa9e4066Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 4461fa9e4066Sahrens if (error) { 4462fa9e4066Sahrens dmu_tx_abort(tx); 4463fa9e4066Sahrens } else { 4464fa9e4066Sahrens mutex_enter(&zp->z_lock); 44650a586ceaSMark Shellenbaum (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs), 44660a586ceaSMark Shellenbaum (void *)&zp->z_atime, sizeof (zp->z_atime), tx); 4467fa9e4066Sahrens zp->z_atime_dirty = 0; 4468fa9e4066Sahrens mutex_exit(&zp->z_lock); 4469fa9e4066Sahrens dmu_tx_commit(tx); 4470fa9e4066Sahrens } 4471fa9e4066Sahrens } 4472fa9e4066Sahrens 4473fa9e4066Sahrens zfs_zinactive(zp); 4474f18faf3fSek110237 rw_exit(&zfsvfs->z_teardown_inactive_lock); 4475fa9e4066Sahrens } 4476fa9e4066Sahrens 4477fa9e4066Sahrens /* 4478fa9e4066Sahrens * Bounds-check the seek operation. 4479fa9e4066Sahrens * 4480fa9e4066Sahrens * IN: vp - vnode seeking within 4481fa9e4066Sahrens * ooff - old file offset 4482fa9e4066Sahrens * noffp - pointer to new file offset 4483da6c28aaSamw * ct - caller context 4484fa9e4066Sahrens * 4485f7170741SWill Andrews * RETURN: 0 on success, EINVAL if new offset invalid. 4486fa9e4066Sahrens */ 4487fa9e4066Sahrens /* ARGSUSED */ 4488fa9e4066Sahrens static int 4489da6c28aaSamw zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, 4490da6c28aaSamw caller_context_t *ct) 4491fa9e4066Sahrens { 4492fa9e4066Sahrens if (vp->v_type == VDIR) 4493fa9e4066Sahrens return (0); 4494fa9e4066Sahrens return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 4495fa9e4066Sahrens } 4496fa9e4066Sahrens 4497fa9e4066Sahrens /* 4498fa9e4066Sahrens * Pre-filter the generic locking function to trap attempts to place 4499fa9e4066Sahrens * a mandatory lock on a memory mapped file. 4500fa9e4066Sahrens */ 4501fa9e4066Sahrens static int 4502fa9e4066Sahrens zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, 4503da6c28aaSamw flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct) 4504fa9e4066Sahrens { 4505fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4506fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4507fa9e4066Sahrens 45083cb34c60Sahrens ZFS_ENTER(zfsvfs); 45093cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4510fa9e4066Sahrens 4511fa9e4066Sahrens /* 4512ea8dc4b6Seschrock * We are following the UFS semantics with respect to mapcnt 4513ea8dc4b6Seschrock * here: If we see that the file is mapped already, then we will 4514ea8dc4b6Seschrock * return an error, but we don't worry about races between this 4515ea8dc4b6Seschrock * function and zfs_map(). 4516fa9e4066Sahrens */ 45170a586ceaSMark Shellenbaum if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) { 4518fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4519be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4520fa9e4066Sahrens } 4521fa9e4066Sahrens ZFS_EXIT(zfsvfs); 452204ce3d0bSMark Shellenbaum return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 4523fa9e4066Sahrens } 4524fa9e4066Sahrens 4525fa9e4066Sahrens /* 4526fa9e4066Sahrens * If we can't find a page in the cache, we will create a new page 4527fa9e4066Sahrens * and fill it with file data. For efficiency, we may try to fill 4528ac05c741SMark Maybee * multiple pages at once (klustering) to fill up the supplied page 4529ed886187SMark Maybee * list. Note that the pages to be filled are held with an exclusive 4530ed886187SMark Maybee * lock to prevent access by other threads while they are being filled. 4531fa9e4066Sahrens */ 4532fa9e4066Sahrens static int 4533fa9e4066Sahrens zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, 4534fa9e4066Sahrens caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) 4535fa9e4066Sahrens { 4536fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4537fa9e4066Sahrens page_t *pp, *cur_pp; 4538fa9e4066Sahrens objset_t *os = zp->z_zfsvfs->z_os; 4539fa9e4066Sahrens u_offset_t io_off, total; 4540fa9e4066Sahrens size_t io_len; 4541fa9e4066Sahrens int err; 4542fa9e4066Sahrens 454344eda4d7Smaybee if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { 4544ac05c741SMark Maybee /* 4545ac05c741SMark Maybee * We only have a single page, don't bother klustering 4546ac05c741SMark Maybee */ 4547fa9e4066Sahrens io_off = off; 4548fa9e4066Sahrens io_len = PAGESIZE; 4549ed886187SMark Maybee pp = page_create_va(vp, io_off, io_len, 4550ed886187SMark Maybee PG_EXCL | PG_WAIT, seg, addr); 4551fa9e4066Sahrens } else { 4552fa9e4066Sahrens /* 4553ac05c741SMark Maybee * Try to find enough pages to fill the page list 4554fa9e4066Sahrens */ 4555fa9e4066Sahrens pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 4556ac05c741SMark Maybee &io_len, off, plsz, 0); 4557fa9e4066Sahrens } 4558fa9e4066Sahrens if (pp == NULL) { 4559fa9e4066Sahrens /* 4560ac05c741SMark Maybee * The page already exists, nothing to do here. 4561fa9e4066Sahrens */ 4562fa9e4066Sahrens *pl = NULL; 4563fa9e4066Sahrens return (0); 4564fa9e4066Sahrens } 4565fa9e4066Sahrens 4566fa9e4066Sahrens /* 4567fa9e4066Sahrens * Fill the pages in the kluster. 4568fa9e4066Sahrens */ 4569fa9e4066Sahrens cur_pp = pp; 4570fa9e4066Sahrens for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { 4571ac05c741SMark Maybee caddr_t va; 4572ac05c741SMark Maybee 457344eda4d7Smaybee ASSERT3U(io_off, ==, cur_pp->p_offset); 45740fab61baSJonathan W Adams va = zfs_map_page(cur_pp, S_WRITE); 45757bfdf011SNeil Perrin err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, 45767bfdf011SNeil Perrin DMU_READ_PREFETCH); 45770fab61baSJonathan W Adams zfs_unmap_page(cur_pp, va); 4578fa9e4066Sahrens if (err) { 4579fa9e4066Sahrens /* On error, toss the entire kluster */ 4580fa9e4066Sahrens pvn_read_done(pp, B_ERROR); 4581b87f3af3Sperrin /* convert checksum errors into IO errors */ 4582b87f3af3Sperrin if (err == ECKSUM) 4583be6fd75aSMatthew Ahrens err = SET_ERROR(EIO); 4584fa9e4066Sahrens return (err); 4585fa9e4066Sahrens } 4586fa9e4066Sahrens cur_pp = cur_pp->p_next; 4587fa9e4066Sahrens } 4588ac05c741SMark Maybee 4589fa9e4066Sahrens /* 4590ac05c741SMark Maybee * Fill in the page list array from the kluster starting 4591ac05c741SMark Maybee * from the desired offset `off'. 4592fa9e4066Sahrens * NOTE: the page list will always be null terminated. 4593fa9e4066Sahrens */ 4594fa9e4066Sahrens pvn_plist_init(pp, pl, plsz, off, io_len, rw); 4595ac05c741SMark Maybee ASSERT(pl == NULL || (*pl)->p_offset == off); 4596fa9e4066Sahrens 4597fa9e4066Sahrens return (0); 4598fa9e4066Sahrens } 4599fa9e4066Sahrens 4600fa9e4066Sahrens /* 4601fa9e4066Sahrens * Return pointers to the pages for the file region [off, off + len] 4602fa9e4066Sahrens * in the pl array. If plsz is greater than len, this function may 4603ac05c741SMark Maybee * also return page pointers from after the specified region 4604ac05c741SMark Maybee * (i.e. the region [off, off + plsz]). These additional pages are 4605ac05c741SMark Maybee * only returned if they are already in the cache, or were created as 4606ac05c741SMark Maybee * part of a klustered read. 4607fa9e4066Sahrens * 4608fa9e4066Sahrens * IN: vp - vnode of file to get data from. 4609fa9e4066Sahrens * off - position in file to get data from. 4610fa9e4066Sahrens * len - amount of data to retrieve. 4611fa9e4066Sahrens * plsz - length of provided page list. 4612fa9e4066Sahrens * seg - segment to obtain pages for. 4613fa9e4066Sahrens * addr - virtual address of fault. 4614fa9e4066Sahrens * rw - mode of created pages. 4615fa9e4066Sahrens * cr - credentials of caller. 4616da6c28aaSamw * ct - caller context. 4617fa9e4066Sahrens * 4618fa9e4066Sahrens * OUT: protp - protection mode of created pages. 4619fa9e4066Sahrens * pl - list of pages created. 4620fa9e4066Sahrens * 4621f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4622fa9e4066Sahrens * 4623fa9e4066Sahrens * Timestamps: 4624fa9e4066Sahrens * vp - atime updated 4625fa9e4066Sahrens */ 4626fa9e4066Sahrens /* ARGSUSED */ 4627fa9e4066Sahrens static int 4628fa9e4066Sahrens zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 4629fa9e4066Sahrens page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4630da6c28aaSamw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 4631fa9e4066Sahrens { 4632fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4633fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4634ac05c741SMark Maybee page_t **pl0 = pl; 4635ac05c741SMark Maybee int err = 0; 4636ac05c741SMark Maybee 4637ac05c741SMark Maybee /* we do our own caching, faultahead is unnecessary */ 4638ac05c741SMark Maybee if (pl == NULL) 4639ac05c741SMark Maybee return (0); 4640ac05c741SMark Maybee else if (len > plsz) 4641ac05c741SMark Maybee len = plsz; 464227bd165aSMark Maybee else 464327bd165aSMark Maybee len = P2ROUNDUP(len, PAGESIZE); 4644ac05c741SMark Maybee ASSERT(plsz >= len); 4645fa9e4066Sahrens 46463cb34c60Sahrens ZFS_ENTER(zfsvfs); 46473cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4648fa9e4066Sahrens 4649fa9e4066Sahrens if (protp) 4650fa9e4066Sahrens *protp = PROT_ALL; 4651fa9e4066Sahrens 4652fa9e4066Sahrens /* 4653ed886187SMark Maybee * Loop through the requested range [off, off + len) looking 4654fa9e4066Sahrens * for pages. If we don't find a page, we will need to create 4655fa9e4066Sahrens * a new page and fill it with data from the file. 4656fa9e4066Sahrens */ 4657fa9e4066Sahrens while (len > 0) { 4658ac05c741SMark Maybee if (*pl = page_lookup(vp, off, SE_SHARED)) 4659ac05c741SMark Maybee *(pl+1) = NULL; 4660ac05c741SMark Maybee else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) 4661fe2f476aSperrin goto out; 4662fa9e4066Sahrens while (*pl) { 4663ac05c741SMark Maybee ASSERT3U((*pl)->p_offset, ==, off); 4664fa9e4066Sahrens off += PAGESIZE; 4665fa9e4066Sahrens addr += PAGESIZE; 466627bd165aSMark Maybee if (len > 0) { 466727bd165aSMark Maybee ASSERT3U(len, >=, PAGESIZE); 4668fa9e4066Sahrens len -= PAGESIZE; 466927bd165aSMark Maybee } 4670ac05c741SMark Maybee ASSERT3U(plsz, >=, PAGESIZE); 4671ac05c741SMark Maybee plsz -= PAGESIZE; 4672ac05c741SMark Maybee pl++; 4673104e2ed7Sperrin } 4674104e2ed7Sperrin } 4675fa9e4066Sahrens 4676fa9e4066Sahrens /* 4677fa9e4066Sahrens * Fill out the page array with any pages already in the cache. 4678fa9e4066Sahrens */ 4679ac05c741SMark Maybee while (plsz > 0 && 4680ac05c741SMark Maybee (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { 4681fa9e4066Sahrens off += PAGESIZE; 4682fa9e4066Sahrens plsz -= PAGESIZE; 4683fa9e4066Sahrens } 4684fa9e4066Sahrens out: 4685fe2f476aSperrin if (err) { 4686fe2f476aSperrin /* 4687fe2f476aSperrin * Release any pages we have previously locked. 4688fe2f476aSperrin */ 4689fe2f476aSperrin while (pl > pl0) 4690fe2f476aSperrin page_unlock(*--pl); 4691ac05c741SMark Maybee } else { 4692ac05c741SMark Maybee ZFS_ACCESSTIME_STAMP(zfsvfs, zp); 4693fe2f476aSperrin } 4694fe2f476aSperrin 4695fa9e4066Sahrens *pl = NULL; 4696fa9e4066Sahrens 4697fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4698fa9e4066Sahrens return (err); 4699fa9e4066Sahrens } 4700fa9e4066Sahrens 4701ea8dc4b6Seschrock /* 4702ea8dc4b6Seschrock * Request a memory map for a section of a file. This code interacts 4703ea8dc4b6Seschrock * with common code and the VM system as follows: 4704ea8dc4b6Seschrock * 4705f7170741SWill Andrews * - common code calls mmap(), which ends up in smmap_common() 4706f7170741SWill Andrews * - this calls VOP_MAP(), which takes you into (say) zfs 4707f7170741SWill Andrews * - zfs_map() calls as_map(), passing segvn_create() as the callback 4708f7170741SWill Andrews * - segvn_create() creates the new segment and calls VOP_ADDMAP() 4709f7170741SWill Andrews * - zfs_addmap() updates z_mapcnt 4710ea8dc4b6Seschrock */ 4711da6c28aaSamw /*ARGSUSED*/ 4712fa9e4066Sahrens static int 4713fa9e4066Sahrens zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 4714da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4715da6c28aaSamw caller_context_t *ct) 4716fa9e4066Sahrens { 4717fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4718fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4719fa9e4066Sahrens segvn_crargs_t vn_a; 4720fa9e4066Sahrens int error; 4721fa9e4066Sahrens 47223cb34c60Sahrens ZFS_ENTER(zfsvfs); 47233cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4724fa9e4066Sahrens 47250a586ceaSMark Shellenbaum if ((prot & PROT_WRITE) && (zp->z_pflags & 47260a586ceaSMark Shellenbaum (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) { 47270616c50eSmarks ZFS_EXIT(zfsvfs); 4728be6fd75aSMatthew Ahrens return (SET_ERROR(EPERM)); 47290616c50eSmarks } 47300616c50eSmarks 47310616c50eSmarks if ((prot & (PROT_READ | PROT_EXEC)) && 47320a586ceaSMark Shellenbaum (zp->z_pflags & ZFS_AV_QUARANTINED)) { 47330616c50eSmarks ZFS_EXIT(zfsvfs); 4734be6fd75aSMatthew Ahrens return (SET_ERROR(EACCES)); 47350616c50eSmarks } 47360616c50eSmarks 4737fa9e4066Sahrens if (vp->v_flag & VNOMAP) { 4738fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4739be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSYS)); 4740fa9e4066Sahrens } 4741fa9e4066Sahrens 4742fa9e4066Sahrens if (off < 0 || len > MAXOFFSET_T - off) { 4743fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4744be6fd75aSMatthew Ahrens return (SET_ERROR(ENXIO)); 4745fa9e4066Sahrens } 4746fa9e4066Sahrens 4747fa9e4066Sahrens if (vp->v_type != VREG) { 4748fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4749be6fd75aSMatthew Ahrens return (SET_ERROR(ENODEV)); 4750fa9e4066Sahrens } 4751fa9e4066Sahrens 4752fa9e4066Sahrens /* 4753fa9e4066Sahrens * If file is locked, disallow mapping. 4754fa9e4066Sahrens */ 47550a586ceaSMark Shellenbaum if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) { 4756fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4757be6fd75aSMatthew Ahrens return (SET_ERROR(EAGAIN)); 4758fa9e4066Sahrens } 4759fa9e4066Sahrens 4760fa9e4066Sahrens as_rangelock(as); 476160946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 476260946fe0Smec if (error != 0) { 4763fa9e4066Sahrens as_rangeunlock(as); 4764fa9e4066Sahrens ZFS_EXIT(zfsvfs); 476560946fe0Smec return (error); 4766fa9e4066Sahrens } 4767fa9e4066Sahrens 4768fa9e4066Sahrens vn_a.vp = vp; 4769fa9e4066Sahrens vn_a.offset = (u_offset_t)off; 4770fa9e4066Sahrens vn_a.type = flags & MAP_TYPE; 4771fa9e4066Sahrens vn_a.prot = prot; 4772fa9e4066Sahrens vn_a.maxprot = maxprot; 4773fa9e4066Sahrens vn_a.cred = cr; 4774fa9e4066Sahrens vn_a.amp = NULL; 4775fa9e4066Sahrens vn_a.flags = flags & ~MAP_TYPE; 47764944b02eSkchow vn_a.szc = 0; 47774944b02eSkchow vn_a.lgrp_mem_policy_flags = 0; 4778fa9e4066Sahrens 4779fa9e4066Sahrens error = as_map(as, *addrp, len, segvn_create, &vn_a); 4780fa9e4066Sahrens 4781fa9e4066Sahrens as_rangeunlock(as); 4782fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4783fa9e4066Sahrens return (error); 4784fa9e4066Sahrens } 4785fa9e4066Sahrens 4786fa9e4066Sahrens /* ARGSUSED */ 4787fa9e4066Sahrens static int 4788fa9e4066Sahrens zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4789da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 4790da6c28aaSamw caller_context_t *ct) 4791fa9e4066Sahrens { 4792ea8dc4b6Seschrock uint64_t pages = btopr(len); 4793ea8dc4b6Seschrock 4794ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, pages); 4795fa9e4066Sahrens return (0); 4796fa9e4066Sahrens } 4797fa9e4066Sahrens 4798b468a217Seschrock /* 4799b468a217Seschrock * The reason we push dirty pages as part of zfs_delmap() is so that we get a 4800b468a217Seschrock * more accurate mtime for the associated file. Since we don't have a way of 4801b468a217Seschrock * detecting when the data was actually modified, we have to resort to 4802b468a217Seschrock * heuristics. If an explicit msync() is done, then we mark the mtime when the 4803b468a217Seschrock * last page is pushed. The problem occurs when the msync() call is omitted, 4804b468a217Seschrock * which by far the most common case: 4805b468a217Seschrock * 4806b468a217Seschrock * open() 4807b468a217Seschrock * mmap() 4808b468a217Seschrock * <modify memory> 4809b468a217Seschrock * munmap() 4810b468a217Seschrock * close() 4811b468a217Seschrock * <time lapse> 4812b468a217Seschrock * putpage() via fsflush 4813b468a217Seschrock * 4814b468a217Seschrock * If we wait until fsflush to come along, we can have a modification time that 4815b468a217Seschrock * is some arbitrary point in the future. In order to prevent this in the 4816b468a217Seschrock * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is 4817b468a217Seschrock * torn down. 4818b468a217Seschrock */ 4819fa9e4066Sahrens /* ARGSUSED */ 4820fa9e4066Sahrens static int 4821fa9e4066Sahrens zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 4822da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr, 4823da6c28aaSamw caller_context_t *ct) 4824fa9e4066Sahrens { 4825ea8dc4b6Seschrock uint64_t pages = btopr(len); 4826ea8dc4b6Seschrock 4827ea8dc4b6Seschrock ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages); 4828ea8dc4b6Seschrock atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages); 4829b468a217Seschrock 4830b468a217Seschrock if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && 4831b468a217Seschrock vn_has_cached_data(vp)) 4832da6c28aaSamw (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct); 4833b468a217Seschrock 4834fa9e4066Sahrens return (0); 4835fa9e4066Sahrens } 4836fa9e4066Sahrens 4837fa9e4066Sahrens /* 4838fa9e4066Sahrens * Free or allocate space in a file. Currently, this function only 4839fa9e4066Sahrens * supports the `F_FREESP' command. However, this command is somewhat 4840fa9e4066Sahrens * misnamed, as its functionality includes the ability to allocate as 4841fa9e4066Sahrens * well as free space. 4842fa9e4066Sahrens * 4843fa9e4066Sahrens * IN: vp - vnode of file to free data in. 4844fa9e4066Sahrens * cmd - action to take (only F_FREESP supported). 4845fa9e4066Sahrens * bfp - section of file to free/alloc. 4846fa9e4066Sahrens * flag - current file open mode flags. 4847fa9e4066Sahrens * offset - current file offset. 4848fa9e4066Sahrens * cr - credentials of caller [UNUSED]. 4849da6c28aaSamw * ct - caller context. 4850fa9e4066Sahrens * 4851f7170741SWill Andrews * RETURN: 0 on success, error code on failure. 4852fa9e4066Sahrens * 4853fa9e4066Sahrens * Timestamps: 4854fa9e4066Sahrens * vp - ctime|mtime updated 4855fa9e4066Sahrens */ 4856fa9e4066Sahrens /* ARGSUSED */ 4857fa9e4066Sahrens static int 4858fa9e4066Sahrens zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag, 4859fa9e4066Sahrens offset_t offset, cred_t *cr, caller_context_t *ct) 4860fa9e4066Sahrens { 4861fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4862fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4863fa9e4066Sahrens uint64_t off, len; 4864fa9e4066Sahrens int error; 4865fa9e4066Sahrens 48663cb34c60Sahrens ZFS_ENTER(zfsvfs); 48673cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4868fa9e4066Sahrens 4869fa9e4066Sahrens if (cmd != F_FREESP) { 4870fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4871be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4872fa9e4066Sahrens } 4873fa9e4066Sahrens 48742144b121SMarcel Telka /* 48752144b121SMarcel Telka * In a case vp->v_vfsp != zp->z_zfsvfs->z_vfs (e.g. snapshots) our 48762144b121SMarcel Telka * callers might not be able to detect properly that we are read-only, 48772144b121SMarcel Telka * so check it explicitly here. 48782144b121SMarcel Telka */ 48792144b121SMarcel Telka if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) { 48802144b121SMarcel Telka ZFS_EXIT(zfsvfs); 48812144b121SMarcel Telka return (SET_ERROR(EROFS)); 48822144b121SMarcel Telka } 48832144b121SMarcel Telka 4884fa9e4066Sahrens if (error = convoff(vp, bfp, 0, offset)) { 4885fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4886fa9e4066Sahrens return (error); 4887fa9e4066Sahrens } 4888fa9e4066Sahrens 4889fa9e4066Sahrens if (bfp->l_len < 0) { 4890fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4891be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 4892fa9e4066Sahrens } 4893fa9e4066Sahrens 4894fa9e4066Sahrens off = bfp->l_start; 4895104e2ed7Sperrin len = bfp->l_len; /* 0 means from off to end of file */ 4896104e2ed7Sperrin 48975730cc9aSmaybee error = zfs_freesp(zp, off, len, flag, TRUE); 4898fa9e4066Sahrens 489972102e74SBryan Cantrill if (error == 0 && off == 0 && len == 0) 490072102e74SBryan Cantrill vnevent_truncate(ZTOV(zp), ct); 490172102e74SBryan Cantrill 4902fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4903fa9e4066Sahrens return (error); 4904fa9e4066Sahrens } 4905fa9e4066Sahrens 4906da6c28aaSamw /*ARGSUSED*/ 4907fa9e4066Sahrens static int 4908da6c28aaSamw zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 4909fa9e4066Sahrens { 4910fa9e4066Sahrens znode_t *zp = VTOZ(vp); 4911fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 4912f18faf3fSek110237 uint32_t gen; 49130a586ceaSMark Shellenbaum uint64_t gen64; 4914fa9e4066Sahrens uint64_t object = zp->z_id; 4915fa9e4066Sahrens zfid_short_t *zfid; 49160a586ceaSMark Shellenbaum int size, i, error; 4917fa9e4066Sahrens 49183cb34c60Sahrens ZFS_ENTER(zfsvfs); 49193cb34c60Sahrens ZFS_VERIFY_ZP(zp); 49200a586ceaSMark Shellenbaum 49210a586ceaSMark Shellenbaum if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), 4922f3e6fb2fSMark Shellenbaum &gen64, sizeof (uint64_t))) != 0) { 4923f3e6fb2fSMark Shellenbaum ZFS_EXIT(zfsvfs); 49240a586ceaSMark Shellenbaum return (error); 4925f3e6fb2fSMark Shellenbaum } 49260a586ceaSMark Shellenbaum 49270a586ceaSMark Shellenbaum gen = (uint32_t)gen64; 4928fa9e4066Sahrens 4929fa9e4066Sahrens size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN; 4930fa9e4066Sahrens if (fidp->fid_len < size) { 4931fa9e4066Sahrens fidp->fid_len = size; 49320f2dc02eSek110237 ZFS_EXIT(zfsvfs); 4933be6fd75aSMatthew Ahrens return (SET_ERROR(ENOSPC)); 4934fa9e4066Sahrens } 4935fa9e4066Sahrens 4936fa9e4066Sahrens zfid = (zfid_short_t *)fidp; 4937fa9e4066Sahrens 4938fa9e4066Sahrens zfid->zf_len = size; 4939fa9e4066Sahrens 4940fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 4941fa9e4066Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 4942fa9e4066Sahrens 4943fa9e4066Sahrens /* Must have a non-zero generation number to distinguish from .zfs */ 4944fa9e4066Sahrens if (gen == 0) 4945fa9e4066Sahrens gen = 1; 4946fa9e4066Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 4947fa9e4066Sahrens zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 4948fa9e4066Sahrens 4949fa9e4066Sahrens if (size == LONG_FID_LEN) { 4950fa9e4066Sahrens uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 4951fa9e4066Sahrens zfid_long_t *zlfid; 4952fa9e4066Sahrens 4953fa9e4066Sahrens zlfid = (zfid_long_t *)fidp; 4954fa9e4066Sahrens 4955fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 4956fa9e4066Sahrens zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 4957fa9e4066Sahrens 4958fa9e4066Sahrens /* XXX - this should be the generation number for the objset */ 4959fa9e4066Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 4960fa9e4066Sahrens zlfid->zf_setgen[i] = 0; 4961fa9e4066Sahrens } 4962fa9e4066Sahrens 4963fa9e4066Sahrens ZFS_EXIT(zfsvfs); 4964fa9e4066Sahrens return (0); 4965fa9e4066Sahrens } 4966fa9e4066Sahrens 4967fa9e4066Sahrens static int 4968da6c28aaSamw zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 4969da6c28aaSamw caller_context_t *ct) 4970fa9e4066Sahrens { 4971fa9e4066Sahrens znode_t *zp, *xzp; 4972fa9e4066Sahrens zfsvfs_t *zfsvfs; 4973fa9e4066Sahrens zfs_dirlock_t *dl; 4974fa9e4066Sahrens int error; 4975fa9e4066Sahrens 4976fa9e4066Sahrens switch (cmd) { 4977fa9e4066Sahrens case _PC_LINK_MAX: 4978fa9e4066Sahrens *valp = ULONG_MAX; 4979fa9e4066Sahrens return (0); 4980fa9e4066Sahrens 4981fa9e4066Sahrens case _PC_FILESIZEBITS: 4982fa9e4066Sahrens *valp = 64; 4983fa9e4066Sahrens return (0); 4984fa9e4066Sahrens 4985fa9e4066Sahrens case _PC_XATTR_EXISTS: 4986fa9e4066Sahrens zp = VTOZ(vp); 4987fa9e4066Sahrens zfsvfs = zp->z_zfsvfs; 49883cb34c60Sahrens ZFS_ENTER(zfsvfs); 49893cb34c60Sahrens ZFS_VERIFY_ZP(zp); 4990fa9e4066Sahrens *valp = 0; 4991fa9e4066Sahrens error = zfs_dirent_lock(&dl, zp, "", &xzp, 4992da6c28aaSamw ZXATTR | ZEXISTS | ZSHARED, NULL, NULL); 4993fa9e4066Sahrens if (error == 0) { 4994fa9e4066Sahrens zfs_dirent_unlock(dl); 4995fa9e4066Sahrens if (!zfs_dirempty(xzp)) 4996fa9e4066Sahrens *valp = 1; 4997fa9e4066Sahrens VN_RELE(ZTOV(xzp)); 4998fa9e4066Sahrens } else if (error == ENOENT) { 4999fa9e4066Sahrens /* 5000fa9e4066Sahrens * If there aren't extended attributes, it's the 5001fa9e4066Sahrens * same as having zero of them. 5002fa9e4066Sahrens */ 5003fa9e4066Sahrens error = 0; 5004fa9e4066Sahrens } 5005fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5006fa9e4066Sahrens return (error); 5007fa9e4066Sahrens 5008da6c28aaSamw case _PC_SATTR_ENABLED: 5009da6c28aaSamw case _PC_SATTR_EXISTS: 50109660e5cbSJanice Chang *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 5011da6c28aaSamw (vp->v_type == VREG || vp->v_type == VDIR); 5012da6c28aaSamw return (0); 5013da6c28aaSamw 5014e802abbdSTim Haley case _PC_ACCESS_FILTERING: 5015e802abbdSTim Haley *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) && 5016e802abbdSTim Haley vp->v_type == VDIR; 5017e802abbdSTim Haley return (0); 5018e802abbdSTim Haley 5019fa9e4066Sahrens case _PC_ACL_ENABLED: 5020fa9e4066Sahrens *valp = _ACL_ACE_ENABLED; 5021fa9e4066Sahrens return (0); 5022fa9e4066Sahrens 5023fa9e4066Sahrens case _PC_MIN_HOLE_SIZE: 5024fa9e4066Sahrens *valp = (ulong_t)SPA_MINBLOCKSIZE; 5025fa9e4066Sahrens return (0); 5026fa9e4066Sahrens 50273b862e9aSRoger A. Faulkner case _PC_TIMESTAMP_RESOLUTION: 50283b862e9aSRoger A. Faulkner /* nanosecond timestamp resolution */ 50293b862e9aSRoger A. Faulkner *valp = 1L; 50303b862e9aSRoger A. Faulkner return (0); 50313b862e9aSRoger A. Faulkner 5032fa9e4066Sahrens default: 5033da6c28aaSamw return (fs_pathconf(vp, cmd, valp, cr, ct)); 5034fa9e4066Sahrens } 5035fa9e4066Sahrens } 5036fa9e4066Sahrens 5037fa9e4066Sahrens /*ARGSUSED*/ 5038fa9e4066Sahrens static int 5039da6c28aaSamw zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5040da6c28aaSamw caller_context_t *ct) 5041fa9e4066Sahrens { 5042fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5043fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5044fa9e4066Sahrens int error; 5045da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 5046fa9e4066Sahrens 50473cb34c60Sahrens ZFS_ENTER(zfsvfs); 50483cb34c60Sahrens ZFS_VERIFY_ZP(zp); 5049da6c28aaSamw error = zfs_getacl(zp, vsecp, skipaclchk, cr); 5050fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5051fa9e4066Sahrens 5052fa9e4066Sahrens return (error); 5053fa9e4066Sahrens } 5054fa9e4066Sahrens 5055fa9e4066Sahrens /*ARGSUSED*/ 5056fa9e4066Sahrens static int 5057da6c28aaSamw zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr, 5058da6c28aaSamw caller_context_t *ct) 5059fa9e4066Sahrens { 5060fa9e4066Sahrens znode_t *zp = VTOZ(vp); 5061fa9e4066Sahrens zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5062fa9e4066Sahrens int error; 5063da6c28aaSamw boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE; 506455da60b9SMark J Musante zilog_t *zilog = zfsvfs->z_log; 5065fa9e4066Sahrens 50663cb34c60Sahrens ZFS_ENTER(zfsvfs); 50673cb34c60Sahrens ZFS_VERIFY_ZP(zp); 506855da60b9SMark J Musante 5069da6c28aaSamw error = zfs_setacl(zp, vsecp, skipaclchk, cr); 507055da60b9SMark J Musante 507155da60b9SMark J Musante if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) 50725002558fSNeil Perrin zil_commit(zilog, 0); 507355da60b9SMark J Musante 5074fa9e4066Sahrens ZFS_EXIT(zfsvfs); 5075fa9e4066Sahrens return (error); 5076fa9e4066Sahrens } 5077fa9e4066Sahrens 5078fa9e4066Sahrens /* 5079f7170741SWill Andrews * The smallest read we may consider to loan out an arcbuf. 5080f7170741SWill Andrews * This must be a power of 2. 5081c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5082c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_min = (1 << 10); /* 1K */ 5083f7170741SWill Andrews /* 5084f7170741SWill Andrews * If set to less than the file block size, allow loaning out of an 5085f7170741SWill Andrews * arcbuf for a partial block read. This must be a power of 2. 5086f7170741SWill Andrews */ 5087c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int zcr_blksz_max = (1 << 17); /* 128K */ 5088c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5089c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5090c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5091c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr, 5092c242f9a0Schunli zhang - Sun Microsystems - Irvine United States caller_context_t *ct) 5093c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5094c242f9a0Schunli zhang - Sun Microsystems - Irvine United States znode_t *zp = VTOZ(vp); 5095c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfsvfs_t *zfsvfs = zp->z_zfsvfs; 5096c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int max_blksz = zfsvfs->z_max_blksz; 5097c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio_t *uio = &xuio->xu_uio; 5098c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t size = uio->uio_resid; 5099c242f9a0Schunli zhang - Sun Microsystems - Irvine United States offset_t offset = uio->uio_loffset; 5100c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int blksz; 5101c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int fullblk, i; 5102c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5103c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ssize_t maxsize; 5104c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int preamble, postamble; 5105c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5106c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (xuio->xu_type != UIOTYPE_ZEROCOPY) 5107be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5108c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5109c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_ENTER(zfsvfs); 5110c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_VERIFY_ZP(zp); 5111c242f9a0Schunli zhang - Sun Microsystems - Irvine United States switch (ioflag) { 5112c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_WRITE: 5113c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5114c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for write if write size is bigger than 5115c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * max_blksz, and the file's block size is also max_blksz. 5116c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5117c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = max_blksz; 5118c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || zp->z_blksz != blksz) { 5119c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5120be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5121c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5122c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5123c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Caller requests buffers for write before knowing where the 5124c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * write offset might be (e.g. NFS TCP write). 5125c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5126c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (offset == -1) { 5127c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = 0; 5128c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } else { 5129c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = P2PHASE(offset, blksz); 5130c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5131c242f9a0Schunli zhang - Sun Microsystems - Irvine United States preamble = blksz - preamble; 5132c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= preamble; 5133c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5134c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5135c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5136c242f9a0Schunli zhang - Sun Microsystems - Irvine United States postamble = P2PHASE(size, blksz); 5137c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size -= postamble; 5138c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5139c242f9a0Schunli zhang - Sun Microsystems - Irvine United States fullblk = size / blksz; 5140570de38fSSurya Prakki (void) dmu_xuio_init(xuio, 5141c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5142c242f9a0Schunli zhang - Sun Microsystems - Irvine United States DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble, 5143c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int, postamble, int, 5144c242f9a0Schunli zhang - Sun Microsystems - Irvine United States (preamble != 0) + fullblk + (postamble != 0)); 5145c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5146c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5147c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Have to fix iov base/len for partial buffers. They 5148c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * currently represent full arc_buf's. 5149c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5150c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (preamble) { 5151c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data begins in the middle of the arc_buf */ 51520a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51530a586ceaSMark Shellenbaum blksz); 5154c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5155570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 5156570de38fSSurya Prakki blksz - preamble, preamble); 5157c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5158c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5159c242f9a0Schunli zhang - Sun Microsystems - Irvine United States for (i = 0; i < fullblk; i++) { 51600a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51610a586ceaSMark Shellenbaum blksz); 5162c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5163570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, blksz); 5164c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5165c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5166c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (postamble) { 5167c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* data ends in the middle of the arc_buf */ 51680a586ceaSMark Shellenbaum abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl), 51690a586ceaSMark Shellenbaum blksz); 5170c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf); 5171570de38fSSurya Prakki (void) dmu_xuio_add(xuio, abuf, 0, postamble); 5172c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5173c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5174c242f9a0Schunli zhang - Sun Microsystems - Irvine United States case UIO_READ: 5175c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5176c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * Loan out an arc_buf for read if the read size is larger than 5177c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * the current file block size. Block alignment is not 5178c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * considered. Partial arc_buf will be loaned out for read. 5179c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5180c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zp->z_blksz; 5181c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz < zcr_blksz_min) 5182c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_min; 5183c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > zcr_blksz_max) 5184c242f9a0Schunli zhang - Sun Microsystems - Irvine United States blksz = zcr_blksz_max; 5185c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* avoid potential complexity of dealing with it */ 5186c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (blksz > max_blksz) { 5187c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5188be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5189c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5190c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 51910a586ceaSMark Shellenbaum maxsize = zp->z_size - uio->uio_loffset; 5192c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size > maxsize) 5193c242f9a0Schunli zhang - Sun Microsystems - Irvine United States size = maxsize; 5194c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5195c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (size < blksz || vn_has_cached_data(vp)) { 5196c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5197be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5198c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5199c242f9a0Schunli zhang - Sun Microsystems - Irvine United States break; 5200c242f9a0Schunli zhang - Sun Microsystems - Irvine United States default: 5201c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5202be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5203c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5204c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5205c242f9a0Schunli zhang - Sun Microsystems - Irvine United States uio->uio_extflg = UIO_XUIO; 5206c242f9a0Schunli zhang - Sun Microsystems - Irvine United States XUIO_XUZC_RW(xuio) = ioflag; 5207c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ZFS_EXIT(zfsvfs); 5208c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5209c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5210c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5211c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /*ARGSUSED*/ 5212c242f9a0Schunli zhang - Sun Microsystems - Irvine United States static int 5213c242f9a0Schunli zhang - Sun Microsystems - Irvine United States zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct) 5214c242f9a0Schunli zhang - Sun Microsystems - Irvine United States { 5215c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int i; 5216c242f9a0Schunli zhang - Sun Microsystems - Irvine United States arc_buf_t *abuf; 5217c242f9a0Schunli zhang - Sun Microsystems - Irvine United States int ioflag = XUIO_XUZC_RW(xuio); 5218c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5219c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY); 5220c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5221c242f9a0Schunli zhang - Sun Microsystems - Irvine United States i = dmu_xuio_cnt(xuio); 5222c242f9a0Schunli zhang - Sun Microsystems - Irvine United States while (i-- > 0) { 5223c242f9a0Schunli zhang - Sun Microsystems - Irvine United States abuf = dmu_xuio_arcbuf(xuio, i); 5224c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5225c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * if abuf == NULL, it must be a write buffer 5226c242f9a0Schunli zhang - Sun Microsystems - Irvine United States * that has been returned in zfs_write(). 5227c242f9a0Schunli zhang - Sun Microsystems - Irvine United States */ 5228c242f9a0Schunli zhang - Sun Microsystems - Irvine United States if (abuf) 5229c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_return_arcbuf(abuf); 5230c242f9a0Schunli zhang - Sun Microsystems - Irvine United States ASSERT(abuf || ioflag == UIO_WRITE); 5231c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5232c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5233c242f9a0Schunli zhang - Sun Microsystems - Irvine United States dmu_xuio_fini(xuio); 5234c242f9a0Schunli zhang - Sun Microsystems - Irvine United States return (0); 5235c242f9a0Schunli zhang - Sun Microsystems - Irvine United States } 5236c242f9a0Schunli zhang - Sun Microsystems - Irvine United States 5237c242f9a0Schunli zhang - Sun Microsystems - Irvine United States /* 5238fa9e4066Sahrens * Predeclare these here so that the compiler assumes that 5239fa9e4066Sahrens * this is an "old style" function declaration that does 5240fa9e4066Sahrens * not include arguments => we won't get type mismatch errors 5241fa9e4066Sahrens * in the initializations that follow. 5242fa9e4066Sahrens */ 5243fa9e4066Sahrens static int zfs_inval(); 5244fa9e4066Sahrens static int zfs_isdir(); 5245fa9e4066Sahrens 5246fa9e4066Sahrens static int 5247fa9e4066Sahrens zfs_inval() 5248fa9e4066Sahrens { 5249be6fd75aSMatthew Ahrens return (SET_ERROR(EINVAL)); 5250fa9e4066Sahrens } 5251fa9e4066Sahrens 5252fa9e4066Sahrens static int 5253fa9e4066Sahrens zfs_isdir() 5254fa9e4066Sahrens { 5255be6fd75aSMatthew Ahrens return (SET_ERROR(EISDIR)); 5256fa9e4066Sahrens } 5257fa9e4066Sahrens /* 5258fa9e4066Sahrens * Directory vnode operations template 5259fa9e4066Sahrens */ 5260fa9e4066Sahrens vnodeops_t *zfs_dvnodeops; 5261fa9e4066Sahrens const fs_operation_def_t zfs_dvnodeops_template[] = { 5262aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5263aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5264aa59c4cbSrsb VOPNAME_READ, { .error = zfs_isdir }, 5265aa59c4cbSrsb VOPNAME_WRITE, { .error = zfs_isdir }, 5266aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5267aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5268aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5269aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5270aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5271aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5272aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5273aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5274aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5275aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = zfs_mkdir }, 5276aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5277aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5278aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = zfs_symlink }, 5279aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5280aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5281aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5282aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5283aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5284aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5285aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5286df2381bfSpraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5287fa9e4066Sahrens NULL, NULL 5288fa9e4066Sahrens }; 5289fa9e4066Sahrens 5290fa9e4066Sahrens /* 5291fa9e4066Sahrens * Regular file vnode operations template 5292fa9e4066Sahrens */ 5293fa9e4066Sahrens vnodeops_t *zfs_fvnodeops; 5294fa9e4066Sahrens const fs_operation_def_t zfs_fvnodeops_template[] = { 5295aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5296aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5297aa59c4cbSrsb VOPNAME_READ, { .vop_read = zfs_read }, 5298aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = zfs_write }, 5299aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5300aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5301aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5302aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5303aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5304aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5305aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5306aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5307aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5308aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5309aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = zfs_frlock }, 5310aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = zfs_space }, 5311aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = zfs_getpage }, 5312aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = zfs_putpage }, 5313aa59c4cbSrsb VOPNAME_MAP, { .vop_map = zfs_map }, 5314aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = zfs_addmap }, 5315aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = zfs_delmap }, 5316aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5317aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5318aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5319aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5320c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_REQZCBUF, { .vop_reqzcbuf = zfs_reqzcbuf }, 5321c242f9a0Schunli zhang - Sun Microsystems - Irvine United States VOPNAME_RETZCBUF, { .vop_retzcbuf = zfs_retzcbuf }, 5322fa9e4066Sahrens NULL, NULL 5323fa9e4066Sahrens }; 5324fa9e4066Sahrens 5325fa9e4066Sahrens /* 5326fa9e4066Sahrens * Symbolic link vnode operations template 5327fa9e4066Sahrens */ 5328fa9e4066Sahrens vnodeops_t *zfs_symvnodeops; 5329fa9e4066Sahrens const fs_operation_def_t zfs_symvnodeops_template[] = { 5330aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5331aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5332aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5333aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5334aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = zfs_readlink }, 5335aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5336aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5337aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5338aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5339fa9e4066Sahrens NULL, NULL 5340fa9e4066Sahrens }; 5341fa9e4066Sahrens 5342fa9e4066Sahrens /* 5343743a77edSAlan Wright * special share hidden files vnode operations template 5344743a77edSAlan Wright */ 5345743a77edSAlan Wright vnodeops_t *zfs_sharevnodeops; 5346743a77edSAlan Wright const fs_operation_def_t zfs_sharevnodeops_template[] = { 5347743a77edSAlan Wright VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5348743a77edSAlan Wright VOPNAME_ACCESS, { .vop_access = zfs_access }, 5349743a77edSAlan Wright VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5350743a77edSAlan Wright VOPNAME_FID, { .vop_fid = zfs_fid }, 5351743a77edSAlan Wright VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5352743a77edSAlan Wright VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5353743a77edSAlan Wright VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5354743a77edSAlan Wright VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5355743a77edSAlan Wright NULL, NULL 5356743a77edSAlan Wright }; 5357743a77edSAlan Wright 5358743a77edSAlan Wright /* 5359fa9e4066Sahrens * Extended attribute directory vnode operations template 5360f7170741SWill Andrews * 5361fa9e4066Sahrens * This template is identical to the directory vnodes 5362fa9e4066Sahrens * operation template except for restricted operations: 5363fa9e4066Sahrens * VOP_MKDIR() 5364fa9e4066Sahrens * VOP_SYMLINK() 5365f7170741SWill Andrews * 5366fa9e4066Sahrens * Note that there are other restrictions embedded in: 5367fa9e4066Sahrens * zfs_create() - restrict type to VREG 5368fa9e4066Sahrens * zfs_link() - no links into/out of attribute space 5369fa9e4066Sahrens * zfs_rename() - no moves into/out of attribute space 5370fa9e4066Sahrens */ 5371fa9e4066Sahrens vnodeops_t *zfs_xdvnodeops; 5372fa9e4066Sahrens const fs_operation_def_t zfs_xdvnodeops_template[] = { 5373aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = zfs_open }, 5374aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = zfs_close }, 5375aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = zfs_ioctl }, 5376aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = zfs_getattr }, 5377aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = zfs_setattr }, 5378aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = zfs_access }, 5379aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = zfs_lookup }, 5380aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = zfs_create }, 5381aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = zfs_remove }, 5382aa59c4cbSrsb VOPNAME_LINK, { .vop_link = zfs_link }, 5383aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = zfs_rename }, 5384aa59c4cbSrsb VOPNAME_MKDIR, { .error = zfs_inval }, 5385aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = zfs_rmdir }, 5386aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = zfs_readdir }, 5387aa59c4cbSrsb VOPNAME_SYMLINK, { .error = zfs_inval }, 5388aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = zfs_fsync }, 5389aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5390aa59c4cbSrsb VOPNAME_FID, { .vop_fid = zfs_fid }, 5391aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = zfs_seek }, 5392aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5393aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = zfs_getsecattr }, 5394aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = zfs_setsecattr }, 5395aa59c4cbSrsb VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 5396fa9e4066Sahrens NULL, NULL 5397fa9e4066Sahrens }; 5398fa9e4066Sahrens 5399fa9e4066Sahrens /* 5400fa9e4066Sahrens * Error vnode operations template 5401fa9e4066Sahrens */ 5402fa9e4066Sahrens vnodeops_t *zfs_evnodeops; 5403fa9e4066Sahrens const fs_operation_def_t zfs_evnodeops_template[] = { 5404aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = zfs_inactive }, 5405aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = zfs_pathconf }, 5406fa9e4066Sahrens NULL, NULL 5407fa9e4066Sahrens }; 5408