160727d8bSWarner Losh /*-
24d846d26SWarner Losh * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
351369649SPedro F. Giffuni *
49080ff25SRobert Watson * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
5e179b40fSPoul-Henning Kamp * All rights reserved.
6e179b40fSPoul-Henning Kamp *
7e179b40fSPoul-Henning Kamp * This software was developed for the FreeBSD Project by Marshall
8e179b40fSPoul-Henning Kamp * Kirk McKusick and Network Associates Laboratories, the Security
9e179b40fSPoul-Henning Kamp * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10e179b40fSPoul-Henning Kamp * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11e179b40fSPoul-Henning Kamp * research program
12e179b40fSPoul-Henning Kamp *
1360c97629SRobert Watson * Redistribution and use in source and binary forms, with or without
1460c97629SRobert Watson * modification, are permitted provided that the following conditions
1560c97629SRobert Watson * are met:
1660c97629SRobert Watson * 1. Redistributions of source code must retain the above copyright
1760c97629SRobert Watson * notice, this list of conditions and the following disclaimer.
1860c97629SRobert Watson * 2. Redistributions in binary form must reproduce the above copyright
1960c97629SRobert Watson * notice, this list of conditions and the following disclaimer in the
2060c97629SRobert Watson * documentation and/or other materials provided with the distribution.
2160c97629SRobert Watson *
2260c97629SRobert Watson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
2360c97629SRobert Watson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2460c97629SRobert Watson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2560c97629SRobert Watson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2660c97629SRobert Watson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2760c97629SRobert Watson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2860c97629SRobert Watson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2960c97629SRobert Watson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3060c97629SRobert Watson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3160c97629SRobert Watson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3260c97629SRobert Watson * SUCH DAMAGE.
3360c97629SRobert Watson *
34df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1989, 1993
35df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved.
36df8bae1dSRodney W. Grimes *
37df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without
38df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions
39df8bae1dSRodney W. Grimes * are met:
40df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright
41df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer.
42df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright
43df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the
44df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution.
45fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors
46df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software
47df8bae1dSRodney W. Grimes * without specific prior written permission.
48df8bae1dSRodney W. Grimes *
49df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59df8bae1dSRodney W. Grimes * SUCH DAMAGE.
60f679aa45SBruce Evans * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
61df8bae1dSRodney W. Grimes */
62df8bae1dSRodney W. Grimes
63f4636c59SDavid E. O'Brien #include <sys/cdefs.h>
640281f88eSKonstantin Belousov #include "opt_directio.h"
650281f88eSKonstantin Belousov #include "opt_ffs.h"
6674a3652fSKonstantin Belousov #include "opt_ufs.h"
670281f88eSKonstantin Belousov
68df8bae1dSRodney W. Grimes #include <sys/param.h>
698f767abfSBruce Evans #include <sys/bio.h>
70df8bae1dSRodney W. Grimes #include <sys/systm.h>
718f767abfSBruce Evans #include <sys/buf.h>
728f767abfSBruce Evans #include <sys/conf.h>
730e168822SPoul-Henning Kamp #include <sys/extattr.h>
748f767abfSBruce Evans #include <sys/kernel.h>
75104a9b7eSAlexander Kabaev #include <sys/limits.h>
768f767abfSBruce Evans #include <sys/malloc.h>
778f767abfSBruce Evans #include <sys/mount.h>
78acd3428bSRobert Watson #include <sys/priv.h>
7989f6b863SAttilio Rao #include <sys/rwlock.h>
80df8bae1dSRodney W. Grimes #include <sys/stat.h>
8189521983SKonstantin Belousov #include <sys/sysctl.h>
828f767abfSBruce Evans #include <sys/vmmeter.h>
83df8bae1dSRodney W. Grimes #include <sys/vnode.h>
84df8bae1dSRodney W. Grimes
85df8bae1dSRodney W. Grimes #include <vm/vm.h>
861c771f92SKonstantin Belousov #include <vm/vm_param.h>
87efeaf95aSDavid Greenman #include <vm/vm_extern.h>
888f767abfSBruce Evans #include <vm/vm_object.h>
898f767abfSBruce Evans #include <vm/vm_page.h>
90e179b40fSPoul-Henning Kamp #include <vm/vm_pager.h>
91e179b40fSPoul-Henning Kamp #include <vm/vnode_pager.h>
92df8bae1dSRodney W. Grimes
93a64ed089SRobert Watson #include <ufs/ufs/extattr.h>
94df8bae1dSRodney W. Grimes #include <ufs/ufs/quota.h>
95df8bae1dSRodney W. Grimes #include <ufs/ufs/inode.h>
96df8bae1dSRodney W. Grimes #include <ufs/ufs/ufs_extern.h>
978f767abfSBruce Evans #include <ufs/ufs/ufsmount.h>
9874a3652fSKonstantin Belousov #include <ufs/ufs/dir.h>
9974a3652fSKonstantin Belousov #ifdef UFS_DIRHASH
10074a3652fSKonstantin Belousov #include <ufs/ufs/dirhash.h>
10174a3652fSKonstantin Belousov #endif
102df8bae1dSRodney W. Grimes
103df8bae1dSRodney W. Grimes #include <ufs/ffs/fs.h>
104df8bae1dSRodney W. Grimes #include <ufs/ffs/ffs_extern.h>
105df8bae1dSRodney W. Grimes
106675c187cSConrad Meyer #define ALIGNED_TO(ptr, s) \
107675c187cSConrad Meyer (((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
108675c187cSConrad Meyer
1095bbb8060STor Egge #ifdef DIRECTIO
1105bbb8060STor Egge extern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
1115bbb8060STor Egge #endif
1122f514f92SKonstantin Belousov static vop_fdatasync_t ffs_fdatasync;
11389521983SKonstantin Belousov static vop_fsync_t ffs_fsync;
11489521983SKonstantin Belousov static vop_getpages_t ffs_getpages;
1154775b07eSJason A. Harmening static vop_getpages_async_t ffs_getpages_async;
116d413d210SKonstantin Belousov static vop_lock1_t ffs_lock;
11780663cadSMateusz Guzik #ifdef INVARIANTS
11880663cadSMateusz Guzik static vop_unlock_t ffs_unlock_debug;
11980663cadSMateusz Guzik #endif
1206fde64c7SPoul-Henning Kamp static vop_read_t ffs_read;
1216fde64c7SPoul-Henning Kamp static vop_write_t ffs_write;
1220176455bSPoul-Henning Kamp static int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
12348f0495dSKirk McKusick static int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
12448f0495dSKirk McKusick struct ucred *cred);
1256fde64c7SPoul-Henning Kamp static vop_strategy_t ffsext_strategy;
1266fde64c7SPoul-Henning Kamp static vop_closeextattr_t ffs_closeextattr;
1276fde64c7SPoul-Henning Kamp static vop_deleteextattr_t ffs_deleteextattr;
1286fde64c7SPoul-Henning Kamp static vop_getextattr_t ffs_getextattr;
1296fde64c7SPoul-Henning Kamp static vop_listextattr_t ffs_listextattr;
1306fde64c7SPoul-Henning Kamp static vop_openextattr_t ffs_openextattr;
1316fde64c7SPoul-Henning Kamp static vop_setextattr_t ffs_setextattr;
13210bcafe9SPawel Jakub Dawidek static vop_vptofh_t ffs_vptofh;
133f2c9d038SKonstantin Belousov static vop_vput_pair_t ffs_vput_pair;
134e179b40fSPoul-Henning Kamp
135a87c6962SMateusz Guzik vop_fplookup_vexec_t ufs_fplookup_vexec;
136a87c6962SMateusz Guzik
137df8bae1dSRodney W. Grimes /* Global vfs data structures for ufs. */
13802f2c6a9SPoul-Henning Kamp struct vop_vector ffs_vnodeops1 = {
13902f2c6a9SPoul-Henning Kamp .vop_default = &ufs_vnodeops,
14002f2c6a9SPoul-Henning Kamp .vop_fsync = ffs_fsync,
1412f514f92SKonstantin Belousov .vop_fdatasync = ffs_fdatasync,
14289521983SKonstantin Belousov .vop_getpages = ffs_getpages,
1434775b07eSJason A. Harmening .vop_getpages_async = ffs_getpages_async,
144d413d210SKonstantin Belousov .vop_lock1 = ffs_lock,
14580663cadSMateusz Guzik #ifdef INVARIANTS
14680663cadSMateusz Guzik .vop_unlock = ffs_unlock_debug,
14780663cadSMateusz Guzik #endif
14802f2c6a9SPoul-Henning Kamp .vop_read = ffs_read,
14902f2c6a9SPoul-Henning Kamp .vop_reallocblks = ffs_reallocblks,
15002f2c6a9SPoul-Henning Kamp .vop_write = ffs_write,
15110bcafe9SPawel Jakub Dawidek .vop_vptofh = ffs_vptofh,
152f2c9d038SKonstantin Belousov .vop_vput_pair = ffs_vput_pair,
153a87c6962SMateusz Guzik .vop_fplookup_vexec = ufs_fplookup_vexec,
154f3c81b97SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN,
15502f2c6a9SPoul-Henning Kamp };
1566fa079fcSMateusz Guzik VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1);
15702f2c6a9SPoul-Henning Kamp
15802f2c6a9SPoul-Henning Kamp struct vop_vector ffs_fifoops1 = {
15902f2c6a9SPoul-Henning Kamp .vop_default = &ufs_fifoops,
16002f2c6a9SPoul-Henning Kamp .vop_fsync = ffs_fsync,
1612f514f92SKonstantin Belousov .vop_fdatasync = ffs_fdatasync,
1621c521f70SKirk McKusick .vop_lock1 = ffs_lock,
16380663cadSMateusz Guzik #ifdef INVARIANTS
16480663cadSMateusz Guzik .vop_unlock = ffs_unlock_debug,
16580663cadSMateusz Guzik #endif
16610bcafe9SPawel Jakub Dawidek .vop_vptofh = ffs_vptofh,
1674032c388SMateusz Guzik .vop_fplookup_vexec = VOP_EAGAIN,
1684032c388SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN,
16902f2c6a9SPoul-Henning Kamp };
1706fa079fcSMateusz Guzik VFS_VOP_VECTOR_REGISTER(ffs_fifoops1);
17102f2c6a9SPoul-Henning Kamp
17202f2c6a9SPoul-Henning Kamp /* Global vfs data structures for ufs. */
17302f2c6a9SPoul-Henning Kamp struct vop_vector ffs_vnodeops2 = {
174aec0fb7bSPoul-Henning Kamp .vop_default = &ufs_vnodeops,
175aec0fb7bSPoul-Henning Kamp .vop_fsync = ffs_fsync,
1762f514f92SKonstantin Belousov .vop_fdatasync = ffs_fdatasync,
17789521983SKonstantin Belousov .vop_getpages = ffs_getpages,
1784775b07eSJason A. Harmening .vop_getpages_async = ffs_getpages_async,
179d413d210SKonstantin Belousov .vop_lock1 = ffs_lock,
18080663cadSMateusz Guzik #ifdef INVARIANTS
18180663cadSMateusz Guzik .vop_unlock = ffs_unlock_debug,
18280663cadSMateusz Guzik #endif
183aec0fb7bSPoul-Henning Kamp .vop_read = ffs_read,
184aec0fb7bSPoul-Henning Kamp .vop_reallocblks = ffs_reallocblks,
185aec0fb7bSPoul-Henning Kamp .vop_write = ffs_write,
186aec0fb7bSPoul-Henning Kamp .vop_closeextattr = ffs_closeextattr,
187aec0fb7bSPoul-Henning Kamp .vop_deleteextattr = ffs_deleteextattr,
188aec0fb7bSPoul-Henning Kamp .vop_getextattr = ffs_getextattr,
189aec0fb7bSPoul-Henning Kamp .vop_listextattr = ffs_listextattr,
190aec0fb7bSPoul-Henning Kamp .vop_openextattr = ffs_openextattr,
191aec0fb7bSPoul-Henning Kamp .vop_setextattr = ffs_setextattr,
19210bcafe9SPawel Jakub Dawidek .vop_vptofh = ffs_vptofh,
193f2c9d038SKonstantin Belousov .vop_vput_pair = ffs_vput_pair,
194a87c6962SMateusz Guzik .vop_fplookup_vexec = ufs_fplookup_vexec,
195f3c81b97SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN,
196df8bae1dSRodney W. Grimes };
1976fa079fcSMateusz Guzik VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2);
198df8bae1dSRodney W. Grimes
19902f2c6a9SPoul-Henning Kamp struct vop_vector ffs_fifoops2 = {
200aec0fb7bSPoul-Henning Kamp .vop_default = &ufs_fifoops,
201aec0fb7bSPoul-Henning Kamp .vop_fsync = ffs_fsync,
2022f514f92SKonstantin Belousov .vop_fdatasync = ffs_fdatasync,
203d413d210SKonstantin Belousov .vop_lock1 = ffs_lock,
20480663cadSMateusz Guzik #ifdef INVARIANTS
20580663cadSMateusz Guzik .vop_unlock = ffs_unlock_debug,
20680663cadSMateusz Guzik #endif
207aec0fb7bSPoul-Henning Kamp .vop_reallocblks = ffs_reallocblks,
208aec0fb7bSPoul-Henning Kamp .vop_strategy = ffsext_strategy,
209aec0fb7bSPoul-Henning Kamp .vop_closeextattr = ffs_closeextattr,
210aec0fb7bSPoul-Henning Kamp .vop_deleteextattr = ffs_deleteextattr,
211aec0fb7bSPoul-Henning Kamp .vop_getextattr = ffs_getextattr,
212aec0fb7bSPoul-Henning Kamp .vop_listextattr = ffs_listextattr,
213aec0fb7bSPoul-Henning Kamp .vop_openextattr = ffs_openextattr,
214aec0fb7bSPoul-Henning Kamp .vop_setextattr = ffs_setextattr,
21510bcafe9SPawel Jakub Dawidek .vop_vptofh = ffs_vptofh,
2164032c388SMateusz Guzik .vop_fplookup_vexec = VOP_EAGAIN,
2174032c388SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN,
218df8bae1dSRodney W. Grimes };
2196fa079fcSMateusz Guzik VFS_VOP_VECTOR_REGISTER(ffs_fifoops2);
220c901836cSGarrett Wollman
221df8bae1dSRodney W. Grimes /*
222df8bae1dSRodney W. Grimes * Synch an open file.
223df8bae1dSRodney W. Grimes */
224df8bae1dSRodney W. Grimes /* ARGSUSED */
22548f0495dSKirk McKusick static int
ffs_fsync(struct vop_fsync_args * ap)22640854ff5SPoul-Henning Kamp ffs_fsync(struct vop_fsync_args *ap)
227df8bae1dSRodney W. Grimes {
228cfba50c0SKonstantin Belousov struct vnode *vp;
229cfba50c0SKonstantin Belousov struct bufobj *bo;
23040854ff5SPoul-Henning Kamp int error;
23140854ff5SPoul-Henning Kamp
232cfba50c0SKonstantin Belousov vp = ap->a_vp;
233cfba50c0SKonstantin Belousov bo = &vp->v_bufobj;
234cfba50c0SKonstantin Belousov retry:
23575a58389SKirk McKusick error = ffs_syncvnode(vp, ap->a_waitfor, 0);
23688e5b12aSPoul-Henning Kamp if (error)
23788e5b12aSPoul-Henning Kamp return (error);
238fddf7baeSKirk McKusick if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
239cfba50c0SKonstantin Belousov error = softdep_fsync(vp);
240cfba50c0SKonstantin Belousov if (error)
24140854ff5SPoul-Henning Kamp return (error);
242cfba50c0SKonstantin Belousov
243cfba50c0SKonstantin Belousov /*
244cfba50c0SKonstantin Belousov * The softdep_fsync() function may drop vp lock,
245cfba50c0SKonstantin Belousov * allowing for dirty buffers to reappear on the
246cfba50c0SKonstantin Belousov * bo_dirty list. Recheck and resync as needed.
247cfba50c0SKonstantin Belousov */
248cfba50c0SKonstantin Belousov BO_LOCK(bo);
24946c3d3acSKonstantin Belousov if ((vp->v_type == VREG || vp->v_type == VDIR) &&
25046c3d3acSKonstantin Belousov (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
251cfba50c0SKonstantin Belousov BO_UNLOCK(bo);
252cfba50c0SKonstantin Belousov goto retry;
253cfba50c0SKonstantin Belousov }
254cfba50c0SKonstantin Belousov BO_UNLOCK(bo);
255cfba50c0SKonstantin Belousov }
256d79ff54bSChuck Silvers if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
257d79ff54bSChuck Silvers return (ENXIO);
258cfba50c0SKonstantin Belousov return (0);
25940854ff5SPoul-Henning Kamp }
26040854ff5SPoul-Henning Kamp
26140854ff5SPoul-Henning Kamp int
ffs_syncvnode(struct vnode * vp,int waitfor,int flags)26275a58389SKirk McKusick ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
26340854ff5SPoul-Henning Kamp {
264280e091aSJeff Roberson struct inode *ip;
265698b1a66SJeff Roberson struct bufobj *bo;
266d79ff54bSChuck Silvers struct ufsmount *ump;
2672f514f92SKonstantin Belousov struct buf *bp, *nbp;
2681c85e6a3SKirk McKusick ufs_lbn_t lbn;
26999aa3b73SKonstantin Belousov int error, passes, wflag;
2708a1509e4SKonstantin Belousov bool still_dirty, unlocked, wait;
27195e5e988SJohn Dyson
272280e091aSJeff Roberson ip = VTOI(vp);
273280e091aSJeff Roberson bo = &vp->v_bufobj;
274d79ff54bSChuck Silvers ump = VFSTOUFS(vp->v_mount);
27599aa3b73SKonstantin Belousov #ifdef WITNESS
27699aa3b73SKonstantin Belousov wflag = IS_SNAPSHOT(ip) ? LK_NOWITNESS : 0;
27799aa3b73SKonstantin Belousov #else
27899aa3b73SKonstantin Belousov wflag = 0;
27999aa3b73SKonstantin Belousov #endif
280280e091aSJeff Roberson
281280e091aSJeff Roberson /*
282280e091aSJeff Roberson * When doing MNT_WAIT we must first flush all dependencies
283280e091aSJeff Roberson * on the inode.
284280e091aSJeff Roberson */
285280e091aSJeff Roberson if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
286d79ff54bSChuck Silvers (error = softdep_sync_metadata(vp)) != 0) {
287d79ff54bSChuck Silvers if (ffs_fsfail_cleanup(ump, error))
288d79ff54bSChuck Silvers error = 0;
289280e091aSJeff Roberson return (error);
290d79ff54bSChuck Silvers }
291df8bae1dSRodney W. Grimes
292f6b04d2bSDavid Greenman /*
293df8bae1dSRodney W. Grimes * Flush all dirty buffers associated with a vnode.
294df8bae1dSRodney W. Grimes */
295280e091aSJeff Roberson error = 0;
296280e091aSJeff Roberson passes = 0;
2972f514f92SKonstantin Belousov wait = false; /* Always do an async pass first. */
2988a1509e4SKonstantin Belousov unlocked = false;
299e1db6897SKonstantin Belousov lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
300698b1a66SJeff Roberson BO_LOCK(bo);
301eef33ce9SKirk McKusick loop:
302698b1a66SJeff Roberson TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
303767b9a52SJeff Roberson bp->b_vflags &= ~BV_SCANNED;
304698b1a66SJeff Roberson TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
305b1897c19SJulian Elischer /*
306cf60e8e4SKirk McKusick * Reasons to skip this buffer: it has already been considered
307280e091aSJeff Roberson * on this pass, the buffer has dependencies that will cause
308cf60e8e4SKirk McKusick * it to be redirtied and it has not already been deferred,
309cf60e8e4SKirk McKusick * or it is already being written.
310b1897c19SJulian Elischer */
311767b9a52SJeff Roberson if ((bp->b_vflags & BV_SCANNED) != 0)
312cf60e8e4SKirk McKusick continue;
313767b9a52SJeff Roberson bp->b_vflags |= BV_SCANNED;
3142f514f92SKonstantin Belousov /*
3152f514f92SKonstantin Belousov * Flush indirects in order, if requested.
3162f514f92SKonstantin Belousov *
3172f514f92SKonstantin Belousov * Note that if only datasync is requested, we can
3182f514f92SKonstantin Belousov * skip indirect blocks when softupdates are not
3192f514f92SKonstantin Belousov * active. Otherwise we must flush them with data,
3202f514f92SKonstantin Belousov * since dependencies prevent data block writes.
3212f514f92SKonstantin Belousov */
3221dc349abSEd Maste if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
3232f514f92SKonstantin Belousov (lbn_level(bp->b_lblkno) >= passes ||
3242f514f92SKonstantin Belousov ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
325cf60e8e4SKirk McKusick continue;
326280e091aSJeff Roberson if (bp->b_lblkno > lbn)
327280e091aSJeff Roberson panic("ffs_syncvnode: syncing truncated data.");
3284803948fSJeff Roberson if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
329698b1a66SJeff Roberson BO_UNLOCK(bo);
3302f514f92SKonstantin Belousov } else if (wait) {
33199aa3b73SKonstantin Belousov if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
33299aa3b73SKonstantin Belousov LK_INTERLOCK | wflag, BO_LOCKPTR(bo)) != 0) {
333e3d67595SKonstantin Belousov BO_LOCK(bo);
3344803948fSJeff Roberson bp->b_vflags &= ~BV_SCANNED;
335e3d67595SKonstantin Belousov goto next_locked;
3364803948fSJeff Roberson }
3374803948fSJeff Roberson } else
3384803948fSJeff Roberson continue;
339df8bae1dSRodney W. Grimes if ((bp->b_flags & B_DELWRI) == 0)
340df8bae1dSRodney W. Grimes panic("ffs_fsync: not dirty");
341b1897c19SJulian Elischer /*
342280e091aSJeff Roberson * Check for dependencies and potentially complete them.
343b1897c19SJulian Elischer */
344280e091aSJeff Roberson if (!LIST_EMPTY(&bp->b_dep) &&
345280e091aSJeff Roberson (error = softdep_sync_buf(vp, bp,
346280e091aSJeff Roberson wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
3478a1509e4SKonstantin Belousov /*
3488a1509e4SKonstantin Belousov * Lock order conflict, buffer was already unlocked,
3498a1509e4SKonstantin Belousov * and vnode possibly unlocked.
3508a1509e4SKonstantin Belousov */
3518a1509e4SKonstantin Belousov if (error == ERELOOKUP) {
3528a1509e4SKonstantin Belousov if (vp->v_data == NULL)
3538a1509e4SKonstantin Belousov return (EBADF);
3548a1509e4SKonstantin Belousov unlocked = true;
3558a1509e4SKonstantin Belousov if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
3568a1509e4SKonstantin Belousov (error = softdep_sync_metadata(vp)) != 0) {
3578a1509e4SKonstantin Belousov if (ffs_fsfail_cleanup(ump, error))
3588a1509e4SKonstantin Belousov error = 0;
3598a1509e4SKonstantin Belousov return (unlocked && error == 0 ?
3608a1509e4SKonstantin Belousov ERELOOKUP : error);
3618a1509e4SKonstantin Belousov }
3628a1509e4SKonstantin Belousov /* Re-evaluate inode size */
3638a1509e4SKonstantin Belousov lbn = lblkno(ITOFS(ip), (ip->i_size +
3648a1509e4SKonstantin Belousov ITOFS(ip)->fs_bsize - 1));
3658a1509e4SKonstantin Belousov goto next;
3668a1509e4SKonstantin Belousov }
367280e091aSJeff Roberson /* I/O error. */
368280e091aSJeff Roberson if (error != EBUSY) {
369280e091aSJeff Roberson BUF_UNLOCK(bp);
370280e091aSJeff Roberson return (error);
371280e091aSJeff Roberson }
372280e091aSJeff Roberson /* If we deferred once, don't defer again. */
373280e091aSJeff Roberson if ((bp->b_flags & B_DEFERRED) == 0) {
374280e091aSJeff Roberson bp->b_flags |= B_DEFERRED;
375280e091aSJeff Roberson BUF_UNLOCK(bp);
376280e091aSJeff Roberson goto next;
377280e091aSJeff Roberson }
378280e091aSJeff Roberson }
379280e091aSJeff Roberson if (wait) {
380280e091aSJeff Roberson bremfree(bp);
381d79ff54bSChuck Silvers error = bwrite(bp);
382d79ff54bSChuck Silvers if (ffs_fsfail_cleanup(ump, error))
383d79ff54bSChuck Silvers error = 0;
384d79ff54bSChuck Silvers if (error != 0)
385280e091aSJeff Roberson return (error);
386280e091aSJeff Roberson } else if ((bp->b_flags & B_CLUSTEROK)) {
38795e5e988SJohn Dyson (void) vfs_bio_awrite(bp);
38895e5e988SJohn Dyson } else {
38934f72be5SJohn Dyson bremfree(bp);
390df8bae1dSRodney W. Grimes (void) bawrite(bp);
39195e5e988SJohn Dyson }
392280e091aSJeff Roberson next:
393eef33ce9SKirk McKusick /*
394eef33ce9SKirk McKusick * Since we may have slept during the I/O, we need
395eef33ce9SKirk McKusick * to start from a known point.
396eef33ce9SKirk McKusick */
397698b1a66SJeff Roberson BO_LOCK(bo);
398e3d67595SKonstantin Belousov next_locked:
399698b1a66SJeff Roberson nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
400df8bae1dSRodney W. Grimes }
401280e091aSJeff Roberson if (waitfor != MNT_WAIT) {
402698b1a66SJeff Roberson BO_UNLOCK(bo);
40375a58389SKirk McKusick if ((flags & NO_INO_UPDT) != 0)
4048a1509e4SKonstantin Belousov return (unlocked ? ERELOOKUP : 0);
4058a1509e4SKonstantin Belousov error = ffs_update(vp, 0);
4068a1509e4SKonstantin Belousov if (error == 0 && unlocked)
4078a1509e4SKonstantin Belousov error = ERELOOKUP;
4088a1509e4SKonstantin Belousov return (error);
409df8bae1dSRodney W. Grimes }
410280e091aSJeff Roberson /* Drain IO to see if we're done. */
411280e091aSJeff Roberson bufobj_wwait(bo, 0, 0);
412280e091aSJeff Roberson /*
413280e091aSJeff Roberson * Block devices associated with filesystems may have new I/O
414280e091aSJeff Roberson * requests posted for them even if the vnode is locked, so no
415280e091aSJeff Roberson * amount of trying will get them clean. We make several passes
416280e091aSJeff Roberson * as a best effort.
417280e091aSJeff Roberson *
418280e091aSJeff Roberson * Regular files may need multiple passes to flush all dependency
419280e091aSJeff Roberson * work as it is possible that we must write once per indirect
420280e091aSJeff Roberson * level, once for the leaf, and once for the inode and each of
421280e091aSJeff Roberson * these will be done with one sync and one async pass.
422280e091aSJeff Roberson */
423280e091aSJeff Roberson if (bo->bo_dirty.bv_cnt > 0) {
4242f514f92SKonstantin Belousov if ((flags & DATA_ONLY) == 0) {
4252f514f92SKonstantin Belousov still_dirty = true;
4262f514f92SKonstantin Belousov } else {
4272f514f92SKonstantin Belousov /*
4282f514f92SKonstantin Belousov * For data-only sync, dirty indirect buffers
4292f514f92SKonstantin Belousov * are ignored.
4302f514f92SKonstantin Belousov */
4312f514f92SKonstantin Belousov still_dirty = false;
4322f514f92SKonstantin Belousov TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
4331dc349abSEd Maste if (bp->b_lblkno > -UFS_NDADDR) {
4342f514f92SKonstantin Belousov still_dirty = true;
4352f514f92SKonstantin Belousov break;
4362f514f92SKonstantin Belousov }
4372f514f92SKonstantin Belousov }
4382f514f92SKonstantin Belousov }
4392f514f92SKonstantin Belousov
4402f514f92SKonstantin Belousov if (still_dirty) {
441280e091aSJeff Roberson /* Write the inode after sync passes to flush deps. */
4422f514f92SKonstantin Belousov if (wait && DOINGSOFTDEP(vp) &&
4432f514f92SKonstantin Belousov (flags & NO_INO_UPDT) == 0) {
444280e091aSJeff Roberson BO_UNLOCK(bo);
445064f517dSKonstantin Belousov ffs_update(vp, 1);
446280e091aSJeff Roberson BO_LOCK(bo);
447280e091aSJeff Roberson }
448280e091aSJeff Roberson /* switch between sync/async. */
449280e091aSJeff Roberson wait = !wait;
4501dc349abSEd Maste if (wait || ++passes < UFS_NIADDR + 2)
451280e091aSJeff Roberson goto loop;
452df8bae1dSRodney W. Grimes }
4532f514f92SKonstantin Belousov }
454698b1a66SJeff Roberson BO_UNLOCK(bo);
45535338e60SKirk McKusick error = 0;
4562f514f92SKonstantin Belousov if ((flags & DATA_ONLY) == 0) {
45775a58389SKirk McKusick if ((flags & NO_INO_UPDT) == 0)
458064f517dSKonstantin Belousov error = ffs_update(vp, 1);
459280e091aSJeff Roberson if (DOINGSUJ(vp))
460280e091aSJeff Roberson softdep_journal_fsync(VTOI(vp));
46152488b51SKirk McKusick } else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
4627428630bSKonstantin Belousov error = ffs_update(vp, 1);
4632f514f92SKonstantin Belousov }
4648a1509e4SKonstantin Belousov if (error == 0 && unlocked)
4658a1509e4SKonstantin Belousov error = ERELOOKUP;
4661de1e2bfSKonstantin Belousov if (error == 0)
4671de1e2bfSKonstantin Belousov ip->i_flag &= ~IN_NEEDSYNC;
468280e091aSJeff Roberson return (error);
469df8bae1dSRodney W. Grimes }
470e179b40fSPoul-Henning Kamp
471d6f622ccSPoul-Henning Kamp static int
ffs_fdatasync(struct vop_fdatasync_args * ap)4722f514f92SKonstantin Belousov ffs_fdatasync(struct vop_fdatasync_args *ap)
4732f514f92SKonstantin Belousov {
4742f514f92SKonstantin Belousov
4752f514f92SKonstantin Belousov return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
4762f514f92SKonstantin Belousov }
4772f514f92SKonstantin Belousov
4782f514f92SKonstantin Belousov static int
ffs_lock(struct vop_lock1_args * ap)479064e6b43SKirk McKusick ffs_lock(
480d413d210SKonstantin Belousov struct vop_lock1_args /* {
481d6f622ccSPoul-Henning Kamp struct vnode *a_vp;
482d6f622ccSPoul-Henning Kamp int a_flags;
4832f6a774bSKip Macy char *file;
4842f6a774bSKip Macy int line;
485064e6b43SKirk McKusick } */ *ap)
486d6f622ccSPoul-Henning Kamp {
48761846fc4SKonstantin Belousov #if !defined(NO_FFS_SNAPSHOT) || defined(DIAGNOSTIC)
48861846fc4SKonstantin Belousov struct vnode *vp = ap->a_vp;
48961846fc4SKonstantin Belousov #endif /* !NO_FFS_SNAPSHOT || DIAGNOSTIC */
49061846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
49161846fc4SKonstantin Belousov struct inode *ip;
49261846fc4SKonstantin Belousov #endif /* DIAGNOSTIC */
49361846fc4SKonstantin Belousov int result;
494c7793f61STor Egge #ifndef NO_FFS_SNAPSHOT
4956d94935dSTor Egge int flags;
4966d94935dSTor Egge struct lock *lkp;
4976d94935dSTor Egge
498e5e10c82SMateusz Guzik /*
499e5e10c82SMateusz Guzik * Adaptive spinning mixed with SU leads to trouble. use a giant hammer
500e5e10c82SMateusz Guzik * and only use it when LK_NODDLKTREAT is set. Currently this means it
501e5e10c82SMateusz Guzik * is only used during path lookup.
502e5e10c82SMateusz Guzik */
503e5e10c82SMateusz Guzik if ((ap->a_flags & LK_NODDLKTREAT) != 0)
50431ad4050SMateusz Guzik ap->a_flags |= LK_ADAPTIVE;
5056d94935dSTor Egge switch (ap->a_flags & LK_TYPE_MASK) {
5066d94935dSTor Egge case LK_SHARED:
5076d94935dSTor Egge case LK_UPGRADE:
5086d94935dSTor Egge case LK_EXCLUSIVE:
5096d94935dSTor Egge flags = ap->a_flags;
5106d94935dSTor Egge for (;;) {
511016f98f9SKonstantin Belousov #ifdef DEBUG_VFS_LOCKS
512f1fa1ba3SMateusz Guzik VNPASS(vp->v_holdcnt != 0, vp);
51361846fc4SKonstantin Belousov #endif /* DEBUG_VFS_LOCKS */
5146d94935dSTor Egge lkp = vp->v_vnlock;
5154d51e175SMateusz Guzik result = lockmgr_lock_flags(lkp, flags,
5164d51e175SMateusz Guzik &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line);
5176d94935dSTor Egge if (lkp == vp->v_vnlock || result != 0)
5186d94935dSTor Egge break;
5196d94935dSTor Egge /*
5206d94935dSTor Egge * Apparent success, except that the vnode
5216d94935dSTor Egge * mutated between snapshot file vnode and
5226d94935dSTor Egge * regular file vnode while this process
5236d94935dSTor Egge * slept. The lock currently held is not the
5246d94935dSTor Egge * right lock. Release it, and try to get the
5256d94935dSTor Egge * new lock.
5266d94935dSTor Egge */
5274d51e175SMateusz Guzik lockmgr_unlock(lkp);
528d04963d0SJeff Roberson if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
529d04963d0SJeff Roberson (LK_INTERLOCK | LK_NOWAIT))
530d04963d0SJeff Roberson return (EBUSY);
5316d94935dSTor Egge if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
5326d94935dSTor Egge flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
5336d94935dSTor Egge flags &= ~LK_INTERLOCK;
5346d94935dSTor Egge }
53561846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
53661846fc4SKonstantin Belousov switch (ap->a_flags & LK_TYPE_MASK) {
53761846fc4SKonstantin Belousov case LK_UPGRADE:
53861846fc4SKonstantin Belousov case LK_EXCLUSIVE:
53961846fc4SKonstantin Belousov if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
54061846fc4SKonstantin Belousov ip = VTOI(vp);
54161846fc4SKonstantin Belousov if (ip != NULL)
54261846fc4SKonstantin Belousov ip->i_lock_gen++;
54361846fc4SKonstantin Belousov }
54461846fc4SKonstantin Belousov }
54561846fc4SKonstantin Belousov #endif /* DIAGNOSTIC */
5466d94935dSTor Egge break;
5476d94935dSTor Egge default:
54861846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
54961846fc4SKonstantin Belousov if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
55061846fc4SKonstantin Belousov ip = VTOI(vp);
55161846fc4SKonstantin Belousov if (ip != NULL)
55261846fc4SKonstantin Belousov ufs_unlock_tracker(ip);
5536d94935dSTor Egge }
55461846fc4SKonstantin Belousov #endif /* DIAGNOSTIC */
55561846fc4SKonstantin Belousov result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
55661846fc4SKonstantin Belousov break;
55761846fc4SKonstantin Belousov }
55861846fc4SKonstantin Belousov #else /* NO_FFS_SNAPSHOT */
559e5e10c82SMateusz Guzik /*
560e5e10c82SMateusz Guzik * See above for an explanation.
561e5e10c82SMateusz Guzik */
562e5e10c82SMateusz Guzik if ((ap->a_flags & LK_NODDLKTREAT) != 0)
56331ad4050SMateusz Guzik ap->a_flags |= LK_ADAPTIVE;
56461846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
56561846fc4SKonstantin Belousov if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
56661846fc4SKonstantin Belousov ip = VTOI(vp);
56761846fc4SKonstantin Belousov if (ip != NULL)
56861846fc4SKonstantin Belousov ufs_unlock_tracker(ip);
56961846fc4SKonstantin Belousov }
57061846fc4SKonstantin Belousov #endif /* DIAGNOSTIC */
57161846fc4SKonstantin Belousov result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
57261846fc4SKonstantin Belousov #endif /* NO_FFS_SNAPSHOT */
57361846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
57461846fc4SKonstantin Belousov switch (ap->a_flags & LK_TYPE_MASK) {
57561846fc4SKonstantin Belousov case LK_UPGRADE:
57661846fc4SKonstantin Belousov case LK_EXCLUSIVE:
57761846fc4SKonstantin Belousov if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
57861846fc4SKonstantin Belousov ip = VTOI(vp);
57961846fc4SKonstantin Belousov if (ip != NULL)
58061846fc4SKonstantin Belousov ip->i_lock_gen++;
58161846fc4SKonstantin Belousov }
58261846fc4SKonstantin Belousov }
58361846fc4SKonstantin Belousov #endif /* DIAGNOSTIC */
58461846fc4SKonstantin Belousov return (result);
585d6f622ccSPoul-Henning Kamp }
586e179b40fSPoul-Henning Kamp
58780663cadSMateusz Guzik #ifdef INVARIANTS
58880663cadSMateusz Guzik static int
ffs_unlock_debug(struct vop_unlock_args * ap)58980663cadSMateusz Guzik ffs_unlock_debug(struct vop_unlock_args *ap)
59080663cadSMateusz Guzik {
59161846fc4SKonstantin Belousov struct vnode *vp;
59261846fc4SKonstantin Belousov struct inode *ip;
59380663cadSMateusz Guzik
59461846fc4SKonstantin Belousov vp = ap->a_vp;
59561846fc4SKonstantin Belousov ip = VTOI(vp);
596f1fcaffdSMateusz Guzik if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
59780663cadSMateusz Guzik if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
59880663cadSMateusz Guzik VI_LOCK(vp);
59980663cadSMateusz Guzik VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp,
60080663cadSMateusz Guzik ("%s: modified vnode (%x) not on lazy list",
60180663cadSMateusz Guzik __func__, ip->i_flag));
60280663cadSMateusz Guzik VI_UNLOCK(vp);
60380663cadSMateusz Guzik }
60480663cadSMateusz Guzik }
60526af9f72SKonstantin Belousov KASSERT(vp->v_type != VDIR || vp->v_vnlock->lk_recurse != 0 ||
60626af9f72SKonstantin Belousov (ip->i_flag & IN_ENDOFF) == 0,
60726af9f72SKonstantin Belousov ("ufs dir vp %p ip %p flags %#x", vp, ip, ip->i_flag));
60861846fc4SKonstantin Belousov #ifdef DIAGNOSTIC
60961846fc4SKonstantin Belousov if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && ip != NULL &&
61061846fc4SKonstantin Belousov vp->v_vnlock->lk_recurse == 0)
61161846fc4SKonstantin Belousov ufs_unlock_tracker(ip);
61261846fc4SKonstantin Belousov #endif
61380663cadSMateusz Guzik return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
61480663cadSMateusz Guzik }
61580663cadSMateusz Guzik #endif
61680663cadSMateusz Guzik
6172ebc8829SKonstantin Belousov static int
ffs_read_hole(struct uio * uio,long xfersize,long * size)6182ebc8829SKonstantin Belousov ffs_read_hole(struct uio *uio, long xfersize, long *size)
6192ebc8829SKonstantin Belousov {
6202ebc8829SKonstantin Belousov ssize_t saved_resid, tlen;
6212ebc8829SKonstantin Belousov int error;
6222ebc8829SKonstantin Belousov
6232ebc8829SKonstantin Belousov while (xfersize > 0) {
6242ebc8829SKonstantin Belousov tlen = min(xfersize, ZERO_REGION_SIZE);
6252ebc8829SKonstantin Belousov saved_resid = uio->uio_resid;
6262ebc8829SKonstantin Belousov error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
6272ebc8829SKonstantin Belousov tlen, uio);
6282ebc8829SKonstantin Belousov if (error != 0)
6292ebc8829SKonstantin Belousov return (error);
6302ebc8829SKonstantin Belousov tlen = saved_resid - uio->uio_resid;
6312ebc8829SKonstantin Belousov xfersize -= tlen;
6322ebc8829SKonstantin Belousov *size -= tlen;
6332ebc8829SKonstantin Belousov }
6342ebc8829SKonstantin Belousov return (0);
6352ebc8829SKonstantin Belousov }
6362ebc8829SKonstantin Belousov
637e179b40fSPoul-Henning Kamp /*
638e179b40fSPoul-Henning Kamp * Vnode op for reading.
639e179b40fSPoul-Henning Kamp */
64037c84183SPoul-Henning Kamp static int
ffs_read(struct vop_read_args * ap)641064e6b43SKirk McKusick ffs_read(
642e179b40fSPoul-Henning Kamp struct vop_read_args /* {
643e179b40fSPoul-Henning Kamp struct vnode *a_vp;
644e179b40fSPoul-Henning Kamp struct uio *a_uio;
645e179b40fSPoul-Henning Kamp int a_ioflag;
646e179b40fSPoul-Henning Kamp struct ucred *a_cred;
647064e6b43SKirk McKusick } */ *ap)
648e179b40fSPoul-Henning Kamp {
649e179b40fSPoul-Henning Kamp struct vnode *vp;
650e179b40fSPoul-Henning Kamp struct inode *ip;
651e179b40fSPoul-Henning Kamp struct uio *uio;
652e179b40fSPoul-Henning Kamp struct fs *fs;
653e179b40fSPoul-Henning Kamp struct buf *bp;
654e179b40fSPoul-Henning Kamp ufs_lbn_t lbn, nextlbn;
655e179b40fSPoul-Henning Kamp off_t bytesinfile;
656e179b40fSPoul-Henning Kamp long size, xfersize, blkoffset;
657526d0bd5SKonstantin Belousov ssize_t orig_resid;
6582ebc8829SKonstantin Belousov int bflag, error, ioflag, seqcount;
659e179b40fSPoul-Henning Kamp
66018280bc6SPoul-Henning Kamp vp = ap->a_vp;
66118280bc6SPoul-Henning Kamp uio = ap->a_uio;
66218280bc6SPoul-Henning Kamp ioflag = ap->a_ioflag;
663e179b40fSPoul-Henning Kamp if (ap->a_ioflag & IO_EXT)
6640176455bSPoul-Henning Kamp #ifdef notyet
6650176455bSPoul-Henning Kamp return (ffs_extread(vp, uio, ioflag));
6660176455bSPoul-Henning Kamp #else
6670176455bSPoul-Henning Kamp panic("ffs_read+IO_EXT");
6680176455bSPoul-Henning Kamp #endif
6695bbb8060STor Egge #ifdef DIRECTIO
6705bbb8060STor Egge if ((ioflag & IO_DIRECT) != 0) {
6715bbb8060STor Egge int workdone;
6725bbb8060STor Egge
6735bbb8060STor Egge error = ffs_rawread(vp, uio, &workdone);
6745bbb8060STor Egge if (error != 0 || workdone != 0)
6755bbb8060STor Egge return error;
6765bbb8060STor Egge }
6775bbb8060STor Egge #endif
678e179b40fSPoul-Henning Kamp
6796bd39fe9SAlexander Kabaev seqcount = ap->a_ioflag >> IO_SEQSHIFT;
680e179b40fSPoul-Henning Kamp ip = VTOI(vp);
681e179b40fSPoul-Henning Kamp
6821102b89bSDavid E. O'Brien #ifdef INVARIANTS
683e179b40fSPoul-Henning Kamp if (uio->uio_rw != UIO_READ)
684e179b40fSPoul-Henning Kamp panic("ffs_read: mode");
685e179b40fSPoul-Henning Kamp
686e179b40fSPoul-Henning Kamp if (vp->v_type == VLNK) {
687f784da88SKonstantin Belousov if ((int)ip->i_size < VFSTOUFS(vp->v_mount)->um_maxsymlinklen)
688e179b40fSPoul-Henning Kamp panic("ffs_read: short symlink");
689e179b40fSPoul-Henning Kamp } else if (vp->v_type != VREG && vp->v_type != VDIR)
690e179b40fSPoul-Henning Kamp panic("ffs_read: type %d", vp->v_type);
691e179b40fSPoul-Henning Kamp #endif
692e179b40fSPoul-Henning Kamp orig_resid = uio->uio_resid;
693e521b528SPeter Holm KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
694bd0cc177SAndrey A. Chernov if (orig_resid == 0)
695e179b40fSPoul-Henning Kamp return (0);
696e521b528SPeter Holm KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
697e1db6897SKonstantin Belousov fs = ITOFS(ip);
698bd0cc177SAndrey A. Chernov if (uio->uio_offset < ip->i_size &&
699bd0cc177SAndrey A. Chernov uio->uio_offset >= fs->fs_maxfilesize)
700bd0cc177SAndrey A. Chernov return (EOVERFLOW);
701e179b40fSPoul-Henning Kamp
7022ebc8829SKonstantin Belousov bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
7030af463e6SKonstantin Belousov #ifdef WITNESS
7040af463e6SKonstantin Belousov bflag |= IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0;
7050af463e6SKonstantin Belousov #endif
706e179b40fSPoul-Henning Kamp for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
707e179b40fSPoul-Henning Kamp if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
708e179b40fSPoul-Henning Kamp break;
709e179b40fSPoul-Henning Kamp lbn = lblkno(fs, uio->uio_offset);
710e179b40fSPoul-Henning Kamp nextlbn = lbn + 1;
711e179b40fSPoul-Henning Kamp
712e179b40fSPoul-Henning Kamp /*
713e179b40fSPoul-Henning Kamp * size of buffer. The buffer representing the
714e179b40fSPoul-Henning Kamp * end of the file is rounded up to the size of
715e179b40fSPoul-Henning Kamp * the block type ( fragment or full block,
716e179b40fSPoul-Henning Kamp * depending ).
717e179b40fSPoul-Henning Kamp */
718e179b40fSPoul-Henning Kamp size = blksize(fs, ip, lbn);
719e179b40fSPoul-Henning Kamp blkoffset = blkoff(fs, uio->uio_offset);
720e179b40fSPoul-Henning Kamp
721e179b40fSPoul-Henning Kamp /*
722e179b40fSPoul-Henning Kamp * The amount we want to transfer in this iteration is
723e179b40fSPoul-Henning Kamp * one FS block less the amount of the data before
724e179b40fSPoul-Henning Kamp * our startpoint (duh!)
725e179b40fSPoul-Henning Kamp */
726e179b40fSPoul-Henning Kamp xfersize = fs->fs_bsize - blkoffset;
727e179b40fSPoul-Henning Kamp
728e179b40fSPoul-Henning Kamp /*
729e179b40fSPoul-Henning Kamp * But if we actually want less than the block,
730e179b40fSPoul-Henning Kamp * or the file doesn't have a whole block more of data,
731e179b40fSPoul-Henning Kamp * then use the lesser number.
732e179b40fSPoul-Henning Kamp */
733e179b40fSPoul-Henning Kamp if (uio->uio_resid < xfersize)
734e179b40fSPoul-Henning Kamp xfersize = uio->uio_resid;
735e179b40fSPoul-Henning Kamp if (bytesinfile < xfersize)
736e179b40fSPoul-Henning Kamp xfersize = bytesinfile;
737e179b40fSPoul-Henning Kamp
738e179b40fSPoul-Henning Kamp if (lblktosize(fs, nextlbn) >= ip->i_size) {
739e179b40fSPoul-Henning Kamp /*
740e179b40fSPoul-Henning Kamp * Don't do readahead if this is the end of the file.
741e179b40fSPoul-Henning Kamp */
7422ebc8829SKonstantin Belousov error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
743e179b40fSPoul-Henning Kamp } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
744e179b40fSPoul-Henning Kamp /*
745e179b40fSPoul-Henning Kamp * Otherwise if we are allowed to cluster,
746e179b40fSPoul-Henning Kamp * grab as much as we can.
747e179b40fSPoul-Henning Kamp *
748e179b40fSPoul-Henning Kamp * XXX This may not be a win if we are not
749e179b40fSPoul-Henning Kamp * doing sequential access.
750e179b40fSPoul-Henning Kamp */
751e179b40fSPoul-Henning Kamp error = cluster_read(vp, ip->i_size, lbn,
752c535690bSKonstantin Belousov size, NOCRED, blkoffset + uio->uio_resid,
7532ebc8829SKonstantin Belousov seqcount, bflag, &bp);
754e179b40fSPoul-Henning Kamp } else if (seqcount > 1) {
755e179b40fSPoul-Henning Kamp /*
756e179b40fSPoul-Henning Kamp * If we are NOT allowed to cluster, then
757e179b40fSPoul-Henning Kamp * if we appear to be acting sequentially,
758e179b40fSPoul-Henning Kamp * fire off a request for a readahead
759e179b40fSPoul-Henning Kamp * as well as a read. Note that the 4th and 5th
760e179b40fSPoul-Henning Kamp * arguments point to arrays of the size specified in
761e179b40fSPoul-Henning Kamp * the 6th argument.
762e179b40fSPoul-Henning Kamp */
763831b1ff7SKirk McKusick int nextsize = blksize(fs, ip, nextlbn);
764d00066a5SKirk McKusick error = breadn_flags(vp, lbn, lbn, size, &nextlbn,
7652ebc8829SKonstantin Belousov &nextsize, 1, NOCRED, bflag, NULL, &bp);
766e179b40fSPoul-Henning Kamp } else {
767e179b40fSPoul-Henning Kamp /*
768e179b40fSPoul-Henning Kamp * Failing all of the above, just read what the
769e179b40fSPoul-Henning Kamp * user asked for. Interestingly, the same as
770e179b40fSPoul-Henning Kamp * the first option above.
771e179b40fSPoul-Henning Kamp */
7722ebc8829SKonstantin Belousov error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
773e179b40fSPoul-Henning Kamp }
7742ebc8829SKonstantin Belousov if (error == EJUSTRETURN) {
7752ebc8829SKonstantin Belousov error = ffs_read_hole(uio, xfersize, &size);
7762ebc8829SKonstantin Belousov if (error == 0)
7772ebc8829SKonstantin Belousov continue;
7782ebc8829SKonstantin Belousov }
7792ebc8829SKonstantin Belousov if (error != 0) {
780e179b40fSPoul-Henning Kamp brelse(bp);
781e179b40fSPoul-Henning Kamp bp = NULL;
782e179b40fSPoul-Henning Kamp break;
783e179b40fSPoul-Henning Kamp }
784e179b40fSPoul-Henning Kamp
785e179b40fSPoul-Henning Kamp /*
786e179b40fSPoul-Henning Kamp * We should only get non-zero b_resid when an I/O error
787e179b40fSPoul-Henning Kamp * has occurred, which should cause us to break above.
788e179b40fSPoul-Henning Kamp * However, if the short read did not cause an error,
789e179b40fSPoul-Henning Kamp * then we want to ensure that we do not uiomove bad
790e179b40fSPoul-Henning Kamp * or uninitialized data.
791e179b40fSPoul-Henning Kamp */
792e179b40fSPoul-Henning Kamp size -= bp->b_resid;
793e179b40fSPoul-Henning Kamp if (size < xfersize) {
794e179b40fSPoul-Henning Kamp if (size == 0)
795e179b40fSPoul-Henning Kamp break;
796e179b40fSPoul-Henning Kamp xfersize = size;
797e179b40fSPoul-Henning Kamp }
798e179b40fSPoul-Henning Kamp
799fade8dd7SJeff Roberson if (buf_mapped(bp)) {
80059a01b70SKonstantin Belousov error = vn_io_fault_uiomove((char *)bp->b_data +
80159a01b70SKonstantin Belousov blkoffset, (int)xfersize, uio);
80259a01b70SKonstantin Belousov } else {
80382817f26SChuck Silvers error = vn_io_fault_pgmove(bp->b_pages,
80482817f26SChuck Silvers blkoffset + (bp->b_offset & PAGE_MASK),
805e179b40fSPoul-Henning Kamp (int)xfersize, uio);
80659a01b70SKonstantin Belousov }
807e179b40fSPoul-Henning Kamp if (error)
808e179b40fSPoul-Henning Kamp break;
809e179b40fSPoul-Henning Kamp
81099e6e193SMark Johnston vfs_bio_brelse(bp, ioflag);
811e179b40fSPoul-Henning Kamp }
812e179b40fSPoul-Henning Kamp
813e179b40fSPoul-Henning Kamp /*
814e179b40fSPoul-Henning Kamp * This can only happen in the case of an error
815e179b40fSPoul-Henning Kamp * because the loop above resets bp to NULL on each iteration
816e179b40fSPoul-Henning Kamp * and on normal completion has not set a new value into it.
817e179b40fSPoul-Henning Kamp * so it must have come from a 'break' statement
818e179b40fSPoul-Henning Kamp */
81999e6e193SMark Johnston if (bp != NULL)
82099e6e193SMark Johnston vfs_bio_brelse(bp, ioflag);
821e179b40fSPoul-Henning Kamp
822e179b40fSPoul-Henning Kamp if ((error == 0 || uio->uio_resid != orig_resid) &&
82380663cadSMateusz Guzik (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
82480663cadSMateusz Guzik UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
825e179b40fSPoul-Henning Kamp return (error);
826e179b40fSPoul-Henning Kamp }
827e179b40fSPoul-Henning Kamp
828e179b40fSPoul-Henning Kamp /*
829e179b40fSPoul-Henning Kamp * Vnode op for writing.
830e179b40fSPoul-Henning Kamp */
83137c84183SPoul-Henning Kamp static int
ffs_write(struct vop_write_args * ap)832064e6b43SKirk McKusick ffs_write(
833e179b40fSPoul-Henning Kamp struct vop_write_args /* {
834e179b40fSPoul-Henning Kamp struct vnode *a_vp;
835e179b40fSPoul-Henning Kamp struct uio *a_uio;
836e179b40fSPoul-Henning Kamp int a_ioflag;
837e179b40fSPoul-Henning Kamp struct ucred *a_cred;
838064e6b43SKirk McKusick } */ *ap)
839e179b40fSPoul-Henning Kamp {
840e179b40fSPoul-Henning Kamp struct vnode *vp;
841e179b40fSPoul-Henning Kamp struct uio *uio;
842e179b40fSPoul-Henning Kamp struct inode *ip;
843e179b40fSPoul-Henning Kamp struct fs *fs;
844e179b40fSPoul-Henning Kamp struct buf *bp;
845e179b40fSPoul-Henning Kamp ufs_lbn_t lbn;
846e179b40fSPoul-Henning Kamp off_t osize;
84787525ef9SKonstantin Belousov ssize_t resid, r;
848e179b40fSPoul-Henning Kamp int seqcount;
849526d0bd5SKonstantin Belousov int blkoffset, error, flags, ioflag, size, xfersize;
850e179b40fSPoul-Henning Kamp
85118280bc6SPoul-Henning Kamp vp = ap->a_vp;
85249831462SKonstantin Belousov if (DOINGSUJ(vp))
85349831462SKonstantin Belousov softdep_prealloc(vp, MNT_WAIT);
85449831462SKonstantin Belousov if (vp->v_data == NULL)
85549831462SKonstantin Belousov return (EBADF);
85649831462SKonstantin Belousov
85718280bc6SPoul-Henning Kamp uio = ap->a_uio;
85818280bc6SPoul-Henning Kamp ioflag = ap->a_ioflag;
859e179b40fSPoul-Henning Kamp if (ap->a_ioflag & IO_EXT)
8600176455bSPoul-Henning Kamp #ifdef notyet
86118280bc6SPoul-Henning Kamp return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
8620176455bSPoul-Henning Kamp #else
863070f8eefSAndrey A. Chernov panic("ffs_write+IO_EXT");
8640176455bSPoul-Henning Kamp #endif
865e179b40fSPoul-Henning Kamp
8666bd39fe9SAlexander Kabaev seqcount = ap->a_ioflag >> IO_SEQSHIFT;
867e179b40fSPoul-Henning Kamp ip = VTOI(vp);
868e179b40fSPoul-Henning Kamp
8691102b89bSDavid E. O'Brien #ifdef INVARIANTS
870e179b40fSPoul-Henning Kamp if (uio->uio_rw != UIO_WRITE)
871070f8eefSAndrey A. Chernov panic("ffs_write: mode");
872e179b40fSPoul-Henning Kamp #endif
873e179b40fSPoul-Henning Kamp
874e179b40fSPoul-Henning Kamp switch (vp->v_type) {
875e179b40fSPoul-Henning Kamp case VREG:
876e179b40fSPoul-Henning Kamp if (ioflag & IO_APPEND)
877e179b40fSPoul-Henning Kamp uio->uio_offset = ip->i_size;
8781723bc36SBruce Evans if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
879e179b40fSPoul-Henning Kamp return (EPERM);
880e179b40fSPoul-Henning Kamp /* FALLTHROUGH */
881e179b40fSPoul-Henning Kamp case VLNK:
882e179b40fSPoul-Henning Kamp break;
883e179b40fSPoul-Henning Kamp case VDIR:
884070f8eefSAndrey A. Chernov panic("ffs_write: dir write");
885e179b40fSPoul-Henning Kamp break;
886e179b40fSPoul-Henning Kamp default:
887070f8eefSAndrey A. Chernov panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
888e179b40fSPoul-Henning Kamp (int)uio->uio_offset,
889e179b40fSPoul-Henning Kamp (int)uio->uio_resid
890e179b40fSPoul-Henning Kamp );
891e179b40fSPoul-Henning Kamp }
892e179b40fSPoul-Henning Kamp
893a0036d23SAndrey A. Chernov KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
894a0036d23SAndrey A. Chernov KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
895e1db6897SKonstantin Belousov fs = ITOFS(ip);
89687525ef9SKonstantin Belousov
897e179b40fSPoul-Henning Kamp /*
898e179b40fSPoul-Henning Kamp * Maybe this should be above the vnode op call, but so long as
899e179b40fSPoul-Henning Kamp * file servers have no limits, I don't think it matters.
900e179b40fSPoul-Henning Kamp */
90187525ef9SKonstantin Belousov error = vn_rlimit_fsizex(vp, uio, fs->fs_maxfilesize, &r,
90287525ef9SKonstantin Belousov uio->uio_td);
90387525ef9SKonstantin Belousov if (error != 0) {
90487525ef9SKonstantin Belousov vn_rlimit_fsizex_res(uio, r);
905cc65a412SKonstantin Belousov return (error);
90687525ef9SKonstantin Belousov }
907e179b40fSPoul-Henning Kamp
908e179b40fSPoul-Henning Kamp resid = uio->uio_resid;
909e179b40fSPoul-Henning Kamp osize = ip->i_size;
9101b7e3dafSMatthew Dillon if (seqcount > BA_SEQMAX)
9111b7e3dafSMatthew Dillon flags = BA_SEQMAX << BA_SEQSHIFT;
9121b7e3dafSMatthew Dillon else
9131b7e3dafSMatthew Dillon flags = seqcount << BA_SEQSHIFT;
9140c01bcb9SBruce Evans if (ioflag & IO_SYNC)
9151b7e3dafSMatthew Dillon flags |= IO_SYNC;
91659a01b70SKonstantin Belousov flags |= BA_UNMAPPED;
917e179b40fSPoul-Henning Kamp
918e179b40fSPoul-Henning Kamp for (error = 0; uio->uio_resid > 0;) {
919e179b40fSPoul-Henning Kamp lbn = lblkno(fs, uio->uio_offset);
920e179b40fSPoul-Henning Kamp blkoffset = blkoff(fs, uio->uio_offset);
921e179b40fSPoul-Henning Kamp xfersize = fs->fs_bsize - blkoffset;
922e179b40fSPoul-Henning Kamp if (uio->uio_resid < xfersize)
923e179b40fSPoul-Henning Kamp xfersize = uio->uio_resid;
924e179b40fSPoul-Henning Kamp if (uio->uio_offset + xfersize > ip->i_size)
925e179b40fSPoul-Henning Kamp vnode_pager_setsize(vp, uio->uio_offset + xfersize);
926e179b40fSPoul-Henning Kamp
927e179b40fSPoul-Henning Kamp /*
928e179b40fSPoul-Henning Kamp * We must perform a read-before-write if the transfer size
929e179b40fSPoul-Henning Kamp * does not cover the entire buffer.
930e179b40fSPoul-Henning Kamp */
931e179b40fSPoul-Henning Kamp if (fs->fs_bsize > xfersize)
932e179b40fSPoul-Henning Kamp flags |= BA_CLRBUF;
933e179b40fSPoul-Henning Kamp else
934e179b40fSPoul-Henning Kamp flags &= ~BA_CLRBUF;
935e179b40fSPoul-Henning Kamp /* XXX is uio->uio_offset the right thing here? */
936e179b40fSPoul-Henning Kamp error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
937e179b40fSPoul-Henning Kamp ap->a_cred, flags, &bp);
938b1a4c8e5SKonstantin Belousov if (error != 0) {
939b1a4c8e5SKonstantin Belousov vnode_pager_setsize(vp, ip->i_size);
940e179b40fSPoul-Henning Kamp break;
941b1a4c8e5SKonstantin Belousov }
9424b14cc02SKen Smith if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
9434b14cc02SKen Smith bp->b_flags |= B_NOCACHE;
944e179b40fSPoul-Henning Kamp
945e179b40fSPoul-Henning Kamp if (uio->uio_offset + xfersize > ip->i_size) {
946e179b40fSPoul-Henning Kamp ip->i_size = uio->uio_offset + xfersize;
947b403319bSAlexander Kabaev DIP_SET(ip, i_size, ip->i_size);
94852488b51SKirk McKusick UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
949e179b40fSPoul-Henning Kamp }
950e179b40fSPoul-Henning Kamp
951e179b40fSPoul-Henning Kamp size = blksize(fs, ip, lbn) - bp->b_resid;
952e179b40fSPoul-Henning Kamp if (size < xfersize)
953e179b40fSPoul-Henning Kamp xfersize = size;
954e179b40fSPoul-Henning Kamp
955fade8dd7SJeff Roberson if (buf_mapped(bp)) {
95659a01b70SKonstantin Belousov error = vn_io_fault_uiomove((char *)bp->b_data +
95759a01b70SKonstantin Belousov blkoffset, (int)xfersize, uio);
95859a01b70SKonstantin Belousov } else {
95982817f26SChuck Silvers error = vn_io_fault_pgmove(bp->b_pages,
96082817f26SChuck Silvers blkoffset + (bp->b_offset & PAGE_MASK),
961b569050aSKonstantin Belousov (int)xfersize, uio);
96259a01b70SKonstantin Belousov }
9635ecba769SKirk McKusick /*
9645ecba769SKirk McKusick * If the buffer is not already filled and we encounter an
9655ecba769SKirk McKusick * error while trying to fill it, we have to clear out any
9665ecba769SKirk McKusick * garbage data from the pages instantiated for the buffer.
9675ecba769SKirk McKusick * If we do not, a failed uiomove() during a write can leave
9685ecba769SKirk McKusick * the prior contents of the pages exposed to a userland mmap.
9695ecba769SKirk McKusick *
9705ecba769SKirk McKusick * Note that we need only clear buffers with a transfer size
9715ecba769SKirk McKusick * equal to the block size because buffers with a shorter
9725ecba769SKirk McKusick * transfer size were cleared above by the call to UFS_BALLOC()
9735ecba769SKirk McKusick * with the BA_CLRBUF flag set.
9745ecba769SKirk McKusick *
9755ecba769SKirk McKusick * If the source region for uiomove identically mmaps the
9765ecba769SKirk McKusick * buffer, uiomove() performed the NOP copy, and the buffer
9775ecba769SKirk McKusick * content remains valid because the page fault handler
9785ecba769SKirk McKusick * validated the pages.
9795ecba769SKirk McKusick */
9805ecba769SKirk McKusick if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
981ca39f233SKonstantin Belousov fs->fs_bsize == xfersize) {
982ca39f233SKonstantin Belousov if (error == EFAULT && LIST_EMPTY(&bp->b_dep)) {
983ca39f233SKonstantin Belousov bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
984ca39f233SKonstantin Belousov brelse(bp);
985ca39f233SKonstantin Belousov break;
986ca39f233SKonstantin Belousov } else {
9875ecba769SKirk McKusick vfs_bio_clrbuf(bp);
988ca39f233SKonstantin Belousov }
989ca39f233SKonstantin Belousov }
99099e6e193SMark Johnston
99199e6e193SMark Johnston vfs_bio_set_flags(bp, ioflag);
992e179b40fSPoul-Henning Kamp
993e179b40fSPoul-Henning Kamp /*
994e179b40fSPoul-Henning Kamp * If IO_SYNC each buffer is written synchronously. Otherwise
995e179b40fSPoul-Henning Kamp * if we have a severe page deficiency write the buffer
996e179b40fSPoul-Henning Kamp * asynchronously. Otherwise try to cluster, and if that
997e179b40fSPoul-Henning Kamp * doesn't do it then either do an async write (if O_DIRECT),
998e179b40fSPoul-Henning Kamp * or a delayed write (if not).
999e179b40fSPoul-Henning Kamp */
1000e179b40fSPoul-Henning Kamp if (ioflag & IO_SYNC) {
1001e179b40fSPoul-Henning Kamp (void)bwrite(bp);
1002e179b40fSPoul-Henning Kamp } else if (vm_page_count_severe() ||
1003e179b40fSPoul-Henning Kamp buf_dirty_count_severe() ||
1004e179b40fSPoul-Henning Kamp (ioflag & IO_ASYNC)) {
1005e179b40fSPoul-Henning Kamp bp->b_flags |= B_CLUSTEROK;
1006e179b40fSPoul-Henning Kamp bawrite(bp);
1007e179b40fSPoul-Henning Kamp } else if (xfersize + blkoffset == fs->fs_bsize) {
1008e179b40fSPoul-Henning Kamp if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
1009e179b40fSPoul-Henning Kamp bp->b_flags |= B_CLUSTEROK;
10102bfd8992SKonstantin Belousov cluster_write(vp, &ip->i_clusterw, bp,
10112bfd8992SKonstantin Belousov ip->i_size, seqcount, GB_UNMAPPED);
1012e179b40fSPoul-Henning Kamp } else {
1013e179b40fSPoul-Henning Kamp bawrite(bp);
1014e179b40fSPoul-Henning Kamp }
1015e179b40fSPoul-Henning Kamp } else if (ioflag & IO_DIRECT) {
1016e179b40fSPoul-Henning Kamp bp->b_flags |= B_CLUSTEROK;
1017e179b40fSPoul-Henning Kamp bawrite(bp);
1018e179b40fSPoul-Henning Kamp } else {
1019e179b40fSPoul-Henning Kamp bp->b_flags |= B_CLUSTEROK;
1020e179b40fSPoul-Henning Kamp bdwrite(bp);
1021e179b40fSPoul-Henning Kamp }
1022e179b40fSPoul-Henning Kamp if (error || xfersize == 0)
1023e179b40fSPoul-Henning Kamp break;
1024ac4ec141SMateusz Guzik UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1025e179b40fSPoul-Henning Kamp }
1026e179b40fSPoul-Henning Kamp /*
1027e179b40fSPoul-Henning Kamp * If we successfully wrote any data, and we are not the superuser
1028e179b40fSPoul-Henning Kamp * we clear the setuid and setgid bits as a precaution against
1029e179b40fSPoul-Henning Kamp * tampering.
1030e179b40fSPoul-Henning Kamp */
1031d8ba45e2SEd Maste if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
10323b2eb461SPawel Jakub Dawidek ap->a_cred) {
1033cc426dd3SMateusz Guzik if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
10349d5a594fSMateusz Guzik vn_seqc_write_begin(vp);
10359d5a594fSMateusz Guzik UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
1036b403319bSAlexander Kabaev DIP_SET(ip, i_mode, ip->i_mode);
10379d5a594fSMateusz Guzik vn_seqc_write_end(vp);
1038e179b40fSPoul-Henning Kamp }
10393b2eb461SPawel Jakub Dawidek }
1040e179b40fSPoul-Henning Kamp if (error) {
1041e179b40fSPoul-Henning Kamp if (ioflag & IO_UNIT) {
1042efd6d980SPoul-Henning Kamp (void)ffs_truncate(vp, osize,
1043c52fd858SEdward Tomasz Napierala IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
1044e179b40fSPoul-Henning Kamp uio->uio_offset -= resid - uio->uio_resid;
1045e179b40fSPoul-Henning Kamp uio->uio_resid = resid;
1046e179b40fSPoul-Henning Kamp }
1047d79ff54bSChuck Silvers } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
1048e7347be9SThomas Munro if (!(ioflag & IO_DATASYNC) ||
1049e7347be9SThomas Munro (ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)))
1050efd6d980SPoul-Henning Kamp error = ffs_update(vp, 1);
1051d79ff54bSChuck Silvers if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
1052d79ff54bSChuck Silvers error = ENXIO;
1053d79ff54bSChuck Silvers }
105487525ef9SKonstantin Belousov vn_rlimit_fsizex_res(uio, r);
1055e179b40fSPoul-Henning Kamp return (error);
1056e179b40fSPoul-Henning Kamp }
1057e179b40fSPoul-Henning Kamp
1058e179b40fSPoul-Henning Kamp /*
10590e168822SPoul-Henning Kamp * Extended attribute area reading.
1060e179b40fSPoul-Henning Kamp */
1061e179b40fSPoul-Henning Kamp static int
ffs_extread(struct vnode * vp,struct uio * uio,int ioflag)10620176455bSPoul-Henning Kamp ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
1063e179b40fSPoul-Henning Kamp {
1064e179b40fSPoul-Henning Kamp struct inode *ip;
1065e179b40fSPoul-Henning Kamp struct ufs2_dinode *dp;
1066e179b40fSPoul-Henning Kamp struct fs *fs;
1067e179b40fSPoul-Henning Kamp struct buf *bp;
1068e179b40fSPoul-Henning Kamp ufs_lbn_t lbn, nextlbn;
1069e179b40fSPoul-Henning Kamp off_t bytesinfile;
1070e179b40fSPoul-Henning Kamp long size, xfersize, blkoffset;
1071526d0bd5SKonstantin Belousov ssize_t orig_resid;
1072526d0bd5SKonstantin Belousov int error;
1073e179b40fSPoul-Henning Kamp
1074e179b40fSPoul-Henning Kamp ip = VTOI(vp);
1075e1db6897SKonstantin Belousov fs = ITOFS(ip);
1076e179b40fSPoul-Henning Kamp dp = ip->i_din2;
1077e179b40fSPoul-Henning Kamp
10781102b89bSDavid E. O'Brien #ifdef INVARIANTS
1079e179b40fSPoul-Henning Kamp if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
1080e179b40fSPoul-Henning Kamp panic("ffs_extread: mode");
1081e179b40fSPoul-Henning Kamp
1082e179b40fSPoul-Henning Kamp #endif
1083e179b40fSPoul-Henning Kamp orig_resid = uio->uio_resid;
1084a0036d23SAndrey A. Chernov KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
108551cf0176SAndrey A. Chernov if (orig_resid == 0)
1086e179b40fSPoul-Henning Kamp return (0);
1087a0036d23SAndrey A. Chernov KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
1088e179b40fSPoul-Henning Kamp
1089e179b40fSPoul-Henning Kamp for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1090e179b40fSPoul-Henning Kamp if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
1091e179b40fSPoul-Henning Kamp break;
1092e179b40fSPoul-Henning Kamp lbn = lblkno(fs, uio->uio_offset);
1093e179b40fSPoul-Henning Kamp nextlbn = lbn + 1;
1094e179b40fSPoul-Henning Kamp
1095e179b40fSPoul-Henning Kamp /*
1096e179b40fSPoul-Henning Kamp * size of buffer. The buffer representing the
1097e179b40fSPoul-Henning Kamp * end of the file is rounded up to the size of
1098e179b40fSPoul-Henning Kamp * the block type ( fragment or full block,
1099e179b40fSPoul-Henning Kamp * depending ).
1100e179b40fSPoul-Henning Kamp */
1101e179b40fSPoul-Henning Kamp size = sblksize(fs, dp->di_extsize, lbn);
1102e179b40fSPoul-Henning Kamp blkoffset = blkoff(fs, uio->uio_offset);
1103e179b40fSPoul-Henning Kamp
1104e179b40fSPoul-Henning Kamp /*
1105e179b40fSPoul-Henning Kamp * The amount we want to transfer in this iteration is
1106e179b40fSPoul-Henning Kamp * one FS block less the amount of the data before
1107e179b40fSPoul-Henning Kamp * our startpoint (duh!)
1108e179b40fSPoul-Henning Kamp */
1109e179b40fSPoul-Henning Kamp xfersize = fs->fs_bsize - blkoffset;
1110e179b40fSPoul-Henning Kamp
1111e179b40fSPoul-Henning Kamp /*
1112e179b40fSPoul-Henning Kamp * But if we actually want less than the block,
1113e179b40fSPoul-Henning Kamp * or the file doesn't have a whole block more of data,
1114e179b40fSPoul-Henning Kamp * then use the lesser number.
1115e179b40fSPoul-Henning Kamp */
1116e179b40fSPoul-Henning Kamp if (uio->uio_resid < xfersize)
1117e179b40fSPoul-Henning Kamp xfersize = uio->uio_resid;
1118e179b40fSPoul-Henning Kamp if (bytesinfile < xfersize)
1119e179b40fSPoul-Henning Kamp xfersize = bytesinfile;
1120e179b40fSPoul-Henning Kamp
1121e179b40fSPoul-Henning Kamp if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
1122e179b40fSPoul-Henning Kamp /*
1123e179b40fSPoul-Henning Kamp * Don't do readahead if this is the end of the info.
1124e179b40fSPoul-Henning Kamp */
1125e179b40fSPoul-Henning Kamp error = bread(vp, -1 - lbn, size, NOCRED, &bp);
1126e179b40fSPoul-Henning Kamp } else {
1127e179b40fSPoul-Henning Kamp /*
1128e179b40fSPoul-Henning Kamp * If we have a second block, then
1129e179b40fSPoul-Henning Kamp * fire off a request for a readahead
1130e179b40fSPoul-Henning Kamp * as well as a read. Note that the 4th and 5th
1131e179b40fSPoul-Henning Kamp * arguments point to arrays of the size specified in
1132e179b40fSPoul-Henning Kamp * the 6th argument.
1133e179b40fSPoul-Henning Kamp */
1134831b1ff7SKirk McKusick int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
1135e179b40fSPoul-Henning Kamp nextlbn = -1 - nextlbn;
1136e179b40fSPoul-Henning Kamp error = breadn(vp, -1 - lbn,
1137e179b40fSPoul-Henning Kamp size, &nextlbn, &nextsize, 1, NOCRED, &bp);
1138e179b40fSPoul-Henning Kamp }
1139e179b40fSPoul-Henning Kamp if (error) {
1140e179b40fSPoul-Henning Kamp brelse(bp);
1141e179b40fSPoul-Henning Kamp bp = NULL;
1142e179b40fSPoul-Henning Kamp break;
1143e179b40fSPoul-Henning Kamp }
1144e179b40fSPoul-Henning Kamp
1145e179b40fSPoul-Henning Kamp /*
1146e179b40fSPoul-Henning Kamp * We should only get non-zero b_resid when an I/O error
1147e179b40fSPoul-Henning Kamp * has occurred, which should cause us to break above.
1148e179b40fSPoul-Henning Kamp * However, if the short read did not cause an error,
1149e179b40fSPoul-Henning Kamp * then we want to ensure that we do not uiomove bad
1150e179b40fSPoul-Henning Kamp * or uninitialized data.
1151e179b40fSPoul-Henning Kamp */
1152e179b40fSPoul-Henning Kamp size -= bp->b_resid;
1153e179b40fSPoul-Henning Kamp if (size < xfersize) {
1154e179b40fSPoul-Henning Kamp if (size == 0)
1155e179b40fSPoul-Henning Kamp break;
1156e179b40fSPoul-Henning Kamp xfersize = size;
1157e179b40fSPoul-Henning Kamp }
1158e179b40fSPoul-Henning Kamp
1159e179b40fSPoul-Henning Kamp error = uiomove((char *)bp->b_data + blkoffset,
1160e179b40fSPoul-Henning Kamp (int)xfersize, uio);
1161e179b40fSPoul-Henning Kamp if (error)
1162e179b40fSPoul-Henning Kamp break;
116399e6e193SMark Johnston vfs_bio_brelse(bp, ioflag);
1164e179b40fSPoul-Henning Kamp }
1165e179b40fSPoul-Henning Kamp
1166e179b40fSPoul-Henning Kamp /*
1167e179b40fSPoul-Henning Kamp * This can only happen in the case of an error
1168e179b40fSPoul-Henning Kamp * because the loop above resets bp to NULL on each iteration
1169e179b40fSPoul-Henning Kamp * and on normal completion has not set a new value into it.
1170e179b40fSPoul-Henning Kamp * so it must have come from a 'break' statement
1171e179b40fSPoul-Henning Kamp */
117299e6e193SMark Johnston if (bp != NULL)
117399e6e193SMark Johnston vfs_bio_brelse(bp, ioflag);
1174e179b40fSPoul-Henning Kamp return (error);
1175e179b40fSPoul-Henning Kamp }
1176e179b40fSPoul-Henning Kamp
1177e179b40fSPoul-Henning Kamp /*
11780e168822SPoul-Henning Kamp * Extended attribute area writing.
1179e179b40fSPoul-Henning Kamp */
1180e179b40fSPoul-Henning Kamp static int
ffs_extwrite(struct vnode * vp,struct uio * uio,int ioflag,struct ucred * ucred)118118280bc6SPoul-Henning Kamp ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
1182e179b40fSPoul-Henning Kamp {
1183e179b40fSPoul-Henning Kamp struct inode *ip;
1184e179b40fSPoul-Henning Kamp struct ufs2_dinode *dp;
1185e179b40fSPoul-Henning Kamp struct fs *fs;
1186e179b40fSPoul-Henning Kamp struct buf *bp;
1187e179b40fSPoul-Henning Kamp ufs_lbn_t lbn;
1188e179b40fSPoul-Henning Kamp off_t osize;
1189526d0bd5SKonstantin Belousov ssize_t resid;
1190526d0bd5SKonstantin Belousov int blkoffset, error, flags, size, xfersize;
1191e179b40fSPoul-Henning Kamp
1192e179b40fSPoul-Henning Kamp ip = VTOI(vp);
1193e1db6897SKonstantin Belousov fs = ITOFS(ip);
1194e179b40fSPoul-Henning Kamp dp = ip->i_din2;
1195e179b40fSPoul-Henning Kamp
11961102b89bSDavid E. O'Brien #ifdef INVARIANTS
1197e179b40fSPoul-Henning Kamp if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
1198070f8eefSAndrey A. Chernov panic("ffs_extwrite: mode");
1199e179b40fSPoul-Henning Kamp #endif
1200e179b40fSPoul-Henning Kamp
1201e179b40fSPoul-Henning Kamp if (ioflag & IO_APPEND)
1202e179b40fSPoul-Henning Kamp uio->uio_offset = dp->di_extsize;
1203a0036d23SAndrey A. Chernov KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
1204a0036d23SAndrey A. Chernov KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
12051dc349abSEd Maste if ((uoff_t)uio->uio_offset + uio->uio_resid >
12061dc349abSEd Maste UFS_NXADDR * fs->fs_bsize)
1207e179b40fSPoul-Henning Kamp return (EFBIG);
1208e179b40fSPoul-Henning Kamp
1209e179b40fSPoul-Henning Kamp resid = uio->uio_resid;
1210e179b40fSPoul-Henning Kamp osize = dp->di_extsize;
1211e179b40fSPoul-Henning Kamp flags = IO_EXT;
12120c01bcb9SBruce Evans if (ioflag & IO_SYNC)
1213e179b40fSPoul-Henning Kamp flags |= IO_SYNC;
1214e179b40fSPoul-Henning Kamp
1215e179b40fSPoul-Henning Kamp for (error = 0; uio->uio_resid > 0;) {
1216e179b40fSPoul-Henning Kamp lbn = lblkno(fs, uio->uio_offset);
1217e179b40fSPoul-Henning Kamp blkoffset = blkoff(fs, uio->uio_offset);
1218e179b40fSPoul-Henning Kamp xfersize = fs->fs_bsize - blkoffset;
1219e179b40fSPoul-Henning Kamp if (uio->uio_resid < xfersize)
1220e179b40fSPoul-Henning Kamp xfersize = uio->uio_resid;
1221e179b40fSPoul-Henning Kamp
1222e179b40fSPoul-Henning Kamp /*
1223e179b40fSPoul-Henning Kamp * We must perform a read-before-write if the transfer size
1224e179b40fSPoul-Henning Kamp * does not cover the entire buffer.
1225e179b40fSPoul-Henning Kamp */
1226e179b40fSPoul-Henning Kamp if (fs->fs_bsize > xfersize)
1227e179b40fSPoul-Henning Kamp flags |= BA_CLRBUF;
1228e179b40fSPoul-Henning Kamp else
1229e179b40fSPoul-Henning Kamp flags &= ~BA_CLRBUF;
1230e179b40fSPoul-Henning Kamp error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
123118280bc6SPoul-Henning Kamp ucred, flags, &bp);
1232e179b40fSPoul-Henning Kamp if (error != 0)
1233e179b40fSPoul-Henning Kamp break;
1234e179b40fSPoul-Henning Kamp /*
1235e179b40fSPoul-Henning Kamp * If the buffer is not valid we have to clear out any
1236e179b40fSPoul-Henning Kamp * garbage data from the pages instantiated for the buffer.
1237e179b40fSPoul-Henning Kamp * If we do not, a failed uiomove() during a write can leave
1238e179b40fSPoul-Henning Kamp * the prior contents of the pages exposed to a userland
1239e179b40fSPoul-Henning Kamp * mmap(). XXX deal with uiomove() errors a better way.
1240e179b40fSPoul-Henning Kamp */
1241e179b40fSPoul-Henning Kamp if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
1242e179b40fSPoul-Henning Kamp vfs_bio_clrbuf(bp);
1243e179b40fSPoul-Henning Kamp
124452488b51SKirk McKusick if (uio->uio_offset + xfersize > dp->di_extsize) {
1245e179b40fSPoul-Henning Kamp dp->di_extsize = uio->uio_offset + xfersize;
124652488b51SKirk McKusick UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
124752488b51SKirk McKusick }
1248e179b40fSPoul-Henning Kamp
1249e179b40fSPoul-Henning Kamp size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
1250e179b40fSPoul-Henning Kamp if (size < xfersize)
1251e179b40fSPoul-Henning Kamp xfersize = size;
1252e179b40fSPoul-Henning Kamp
1253e179b40fSPoul-Henning Kamp error =
1254e179b40fSPoul-Henning Kamp uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
125599e6e193SMark Johnston
125699e6e193SMark Johnston vfs_bio_set_flags(bp, ioflag);
1257e179b40fSPoul-Henning Kamp
1258e179b40fSPoul-Henning Kamp /*
1259e179b40fSPoul-Henning Kamp * If IO_SYNC each buffer is written synchronously. Otherwise
1260e179b40fSPoul-Henning Kamp * if we have a severe page deficiency write the buffer
1261e179b40fSPoul-Henning Kamp * asynchronously. Otherwise try to cluster, and if that
1262e179b40fSPoul-Henning Kamp * doesn't do it then either do an async write (if O_DIRECT),
1263e179b40fSPoul-Henning Kamp * or a delayed write (if not).
1264e179b40fSPoul-Henning Kamp */
1265e179b40fSPoul-Henning Kamp if (ioflag & IO_SYNC) {
1266e179b40fSPoul-Henning Kamp (void)bwrite(bp);
1267e179b40fSPoul-Henning Kamp } else if (vm_page_count_severe() ||
1268e179b40fSPoul-Henning Kamp buf_dirty_count_severe() ||
1269e179b40fSPoul-Henning Kamp xfersize + blkoffset == fs->fs_bsize ||
1270e179b40fSPoul-Henning Kamp (ioflag & (IO_ASYNC | IO_DIRECT)))
1271e179b40fSPoul-Henning Kamp bawrite(bp);
1272e179b40fSPoul-Henning Kamp else
1273e179b40fSPoul-Henning Kamp bdwrite(bp);
1274e179b40fSPoul-Henning Kamp if (error || xfersize == 0)
1275e179b40fSPoul-Henning Kamp break;
1276ac4ec141SMateusz Guzik UFS_INODE_SET_FLAG(ip, IN_CHANGE);
1277e179b40fSPoul-Henning Kamp }
1278e179b40fSPoul-Henning Kamp /*
1279e179b40fSPoul-Henning Kamp * If we successfully wrote any data, and we are not the superuser
1280e179b40fSPoul-Henning Kamp * we clear the setuid and setgid bits as a precaution against
1281e179b40fSPoul-Henning Kamp * tampering.
1282e179b40fSPoul-Henning Kamp */
1283d8ba45e2SEd Maste if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
1284cc426dd3SMateusz Guzik if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
12859d5a594fSMateusz Guzik vn_seqc_write_begin(vp);
12869d5a594fSMateusz Guzik UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
1287e179b40fSPoul-Henning Kamp dp->di_mode = ip->i_mode;
12889d5a594fSMateusz Guzik vn_seqc_write_end(vp);
1289e179b40fSPoul-Henning Kamp }
12903b2eb461SPawel Jakub Dawidek }
1291e179b40fSPoul-Henning Kamp if (error) {
1292e179b40fSPoul-Henning Kamp if (ioflag & IO_UNIT) {
1293efd6d980SPoul-Henning Kamp (void)ffs_truncate(vp, osize,
1294c52fd858SEdward Tomasz Napierala IO_EXT | (ioflag&IO_SYNC), ucred);
1295e179b40fSPoul-Henning Kamp uio->uio_offset -= resid - uio->uio_resid;
1296e179b40fSPoul-Henning Kamp uio->uio_resid = resid;
1297e179b40fSPoul-Henning Kamp }
1298e179b40fSPoul-Henning Kamp } else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
1299efd6d980SPoul-Henning Kamp error = ffs_update(vp, 1);
1300e179b40fSPoul-Henning Kamp return (error);
1301e179b40fSPoul-Henning Kamp }
1302d6fe88e4SPoul-Henning Kamp
13030176455bSPoul-Henning Kamp /*
13040176455bSPoul-Henning Kamp * Vnode operating to retrieve a named extended attribute.
13050176455bSPoul-Henning Kamp *
13060176455bSPoul-Henning Kamp * Locate a particular EA (nspace:name) in the area (ptr:length), and return
13070176455bSPoul-Henning Kamp * the length of the EA, and possibly the pointer to the entry and to the data.
13080176455bSPoul-Henning Kamp */
13090176455bSPoul-Henning Kamp static int
ffs_findextattr(uint8_t * ptr,uint64_t length,int nspace,const char * name,struct extattr ** eapp,uint8_t ** eac)1310831b1ff7SKirk McKusick ffs_findextattr(uint8_t *ptr, uint64_t length, int nspace, const char *name,
1311831b1ff7SKirk McKusick struct extattr **eapp, uint8_t **eac)
13120176455bSPoul-Henning Kamp {
1313675c187cSConrad Meyer struct extattr *eap, *eaend;
1314675c187cSConrad Meyer size_t nlen;
13150176455bSPoul-Henning Kamp
13160176455bSPoul-Henning Kamp nlen = strlen(name);
1317675c187cSConrad Meyer KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
1318675c187cSConrad Meyer eap = (struct extattr *)ptr;
1319675c187cSConrad Meyer eaend = (struct extattr *)(ptr + length);
1320675c187cSConrad Meyer for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
1321e6790841SConrad Meyer KASSERT(EXTATTR_NEXT(eap) <= eaend,
1322e6790841SConrad Meyer ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
1323675c187cSConrad Meyer if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
1324675c187cSConrad Meyer || memcmp(eap->ea_name, name, nlen) != 0)
13250176455bSPoul-Henning Kamp continue;
1326675c187cSConrad Meyer if (eapp != NULL)
1327675c187cSConrad Meyer *eapp = eap;
13280176455bSPoul-Henning Kamp if (eac != NULL)
1329675c187cSConrad Meyer *eac = EXTATTR_CONTENT(eap);
1330675c187cSConrad Meyer return (EXTATTR_CONTENT_SIZE(eap));
13310176455bSPoul-Henning Kamp }
1332d0e9b8dbSPoul-Henning Kamp return (-1);
13330176455bSPoul-Henning Kamp }
13340176455bSPoul-Henning Kamp
13350176455bSPoul-Henning Kamp static int
ffs_rdextattr(uint8_t ** p,struct vnode * vp,struct thread * td)1336831b1ff7SKirk McKusick ffs_rdextattr(uint8_t **p, struct vnode *vp, struct thread *td)
13370176455bSPoul-Henning Kamp {
1338e6790841SConrad Meyer const struct extattr *eap, *eaend, *eapnext;
13390176455bSPoul-Henning Kamp struct inode *ip;
13400176455bSPoul-Henning Kamp struct ufs2_dinode *dp;
13417a920f57SCraig Rodrigues struct fs *fs;
13420176455bSPoul-Henning Kamp struct uio luio;
13430176455bSPoul-Henning Kamp struct iovec liovec;
1344831b1ff7SKirk McKusick uint64_t easize;
13454b367145SPedro F. Giffuni int error;
1346831b1ff7SKirk McKusick uint8_t *eae;
13470176455bSPoul-Henning Kamp
13480176455bSPoul-Henning Kamp ip = VTOI(vp);
1349e1db6897SKonstantin Belousov fs = ITOFS(ip);
13500176455bSPoul-Henning Kamp dp = ip->i_din2;
13510176455bSPoul-Henning Kamp easize = dp->di_extsize;
1352e6790841SConrad Meyer if ((uoff_t)easize > UFS_NXADDR * fs->fs_bsize)
13537a920f57SCraig Rodrigues return (EFBIG);
13540176455bSPoul-Henning Kamp
1355e6790841SConrad Meyer eae = malloc(easize, M_TEMP, M_WAITOK);
13560176455bSPoul-Henning Kamp
13570176455bSPoul-Henning Kamp liovec.iov_base = eae;
13580176455bSPoul-Henning Kamp liovec.iov_len = easize;
13590176455bSPoul-Henning Kamp luio.uio_iov = &liovec;
13600176455bSPoul-Henning Kamp luio.uio_iovcnt = 1;
13610176455bSPoul-Henning Kamp luio.uio_offset = 0;
13620176455bSPoul-Henning Kamp luio.uio_resid = easize;
13630176455bSPoul-Henning Kamp luio.uio_segflg = UIO_SYSSPACE;
13640176455bSPoul-Henning Kamp luio.uio_rw = UIO_READ;
13650176455bSPoul-Henning Kamp luio.uio_td = td;
13660176455bSPoul-Henning Kamp
13670176455bSPoul-Henning Kamp error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
13680176455bSPoul-Henning Kamp if (error) {
13690176455bSPoul-Henning Kamp free(eae, M_TEMP);
13700176455bSPoul-Henning Kamp return (error);
13710176455bSPoul-Henning Kamp }
1372e6790841SConrad Meyer /* Validate disk xattrfile contents. */
1373e6790841SConrad Meyer for (eap = (void *)eae, eaend = (void *)(eae + easize); eap < eaend;
1374e6790841SConrad Meyer eap = eapnext) {
13758742817bSKonstantin Belousov /* Detect zeroed out tail */
13768742817bSKonstantin Belousov if (eap->ea_length < sizeof(*eap) || eap->ea_length == 0) {
1377831b1ff7SKirk McKusick easize = (const uint8_t *)eap - eae;
13788742817bSKonstantin Belousov break;
13798742817bSKonstantin Belousov }
13808742817bSKonstantin Belousov
1381e6790841SConrad Meyer eapnext = EXTATTR_NEXT(eap);
13828742817bSKonstantin Belousov /* Bogusly long entry. */
13838742817bSKonstantin Belousov if (eapnext > eaend) {
1384e6790841SConrad Meyer free(eae, M_TEMP);
1385e6790841SConrad Meyer return (EINTEGRITY);
1386e6790841SConrad Meyer }
1387e6790841SConrad Meyer }
13888742817bSKonstantin Belousov ip->i_ea_len = easize;
13890176455bSPoul-Henning Kamp *p = eae;
13900176455bSPoul-Henning Kamp return (0);
13910176455bSPoul-Henning Kamp }
13920176455bSPoul-Henning Kamp
1393e65f5a4eSKonstantin Belousov static void
ffs_lock_ea(struct vnode * vp)1394e65f5a4eSKonstantin Belousov ffs_lock_ea(struct vnode *vp)
1395e65f5a4eSKonstantin Belousov {
1396e65f5a4eSKonstantin Belousov struct inode *ip;
1397e65f5a4eSKonstantin Belousov
1398e65f5a4eSKonstantin Belousov ip = VTOI(vp);
1399e65f5a4eSKonstantin Belousov VI_LOCK(vp);
1400e65f5a4eSKonstantin Belousov while (ip->i_flag & IN_EA_LOCKED) {
1401ac4ec141SMateusz Guzik UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
1402*8ecc4191SOlivier Certner msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD, "ufs_ea", 0);
1403e65f5a4eSKonstantin Belousov }
1404ac4ec141SMateusz Guzik UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
1405e65f5a4eSKonstantin Belousov VI_UNLOCK(vp);
1406e65f5a4eSKonstantin Belousov }
1407e65f5a4eSKonstantin Belousov
1408e65f5a4eSKonstantin Belousov static void
ffs_unlock_ea(struct vnode * vp)1409e65f5a4eSKonstantin Belousov ffs_unlock_ea(struct vnode *vp)
1410e65f5a4eSKonstantin Belousov {
1411e65f5a4eSKonstantin Belousov struct inode *ip;
1412e65f5a4eSKonstantin Belousov
1413e65f5a4eSKonstantin Belousov ip = VTOI(vp);
1414e65f5a4eSKonstantin Belousov VI_LOCK(vp);
1415e65f5a4eSKonstantin Belousov if (ip->i_flag & IN_EA_LOCKWAIT)
1416e65f5a4eSKonstantin Belousov wakeup(&ip->i_ea_refs);
1417e65f5a4eSKonstantin Belousov ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
1418e65f5a4eSKonstantin Belousov VI_UNLOCK(vp);
1419e65f5a4eSKonstantin Belousov }
1420e65f5a4eSKonstantin Belousov
14210e168822SPoul-Henning Kamp static int
ffs_open_ea(struct vnode * vp,struct ucred * cred,struct thread * td)14220e168822SPoul-Henning Kamp ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
14230e168822SPoul-Henning Kamp {
14240e168822SPoul-Henning Kamp struct inode *ip;
14250e168822SPoul-Henning Kamp int error;
14260e168822SPoul-Henning Kamp
14270e168822SPoul-Henning Kamp ip = VTOI(vp);
14280e168822SPoul-Henning Kamp
1429e65f5a4eSKonstantin Belousov ffs_lock_ea(vp);
1430e65f5a4eSKonstantin Belousov if (ip->i_ea_area != NULL) {
1431e65f5a4eSKonstantin Belousov ip->i_ea_refs++;
1432e65f5a4eSKonstantin Belousov ffs_unlock_ea(vp);
1433e65f5a4eSKonstantin Belousov return (0);
1434e65f5a4eSKonstantin Belousov }
1435e6790841SConrad Meyer error = ffs_rdextattr(&ip->i_ea_area, vp, td);
1436e65f5a4eSKonstantin Belousov if (error) {
1437e65f5a4eSKonstantin Belousov ffs_unlock_ea(vp);
14380e168822SPoul-Henning Kamp return (error);
1439e65f5a4eSKonstantin Belousov }
14400e168822SPoul-Henning Kamp ip->i_ea_error = 0;
1441e65f5a4eSKonstantin Belousov ip->i_ea_refs++;
1442e65f5a4eSKonstantin Belousov ffs_unlock_ea(vp);
14430e168822SPoul-Henning Kamp return (0);
14440e168822SPoul-Henning Kamp }
14450e168822SPoul-Henning Kamp
1446d6fe88e4SPoul-Henning Kamp /*
14470e168822SPoul-Henning Kamp * Vnode extattr transaction commit/abort
14480e168822SPoul-Henning Kamp */
14490e168822SPoul-Henning Kamp static int
ffs_close_ea(struct vnode * vp,int commit,struct ucred * cred,struct thread * td)14500e168822SPoul-Henning Kamp ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
14510e168822SPoul-Henning Kamp {
14520e168822SPoul-Henning Kamp struct inode *ip;
14530e168822SPoul-Henning Kamp struct uio luio;
14545e198e76SKonstantin Belousov struct iovec *liovec;
14550e168822SPoul-Henning Kamp struct ufs2_dinode *dp;
14565e198e76SKonstantin Belousov size_t ea_len, tlen;
14575e198e76SKonstantin Belousov int error, i, lcnt;
14588742817bSKonstantin Belousov bool truncate;
14590e168822SPoul-Henning Kamp
14600e168822SPoul-Henning Kamp ip = VTOI(vp);
1461e65f5a4eSKonstantin Belousov
1462e65f5a4eSKonstantin Belousov ffs_lock_ea(vp);
1463e65f5a4eSKonstantin Belousov if (ip->i_ea_area == NULL) {
1464e65f5a4eSKonstantin Belousov ffs_unlock_ea(vp);
14650e168822SPoul-Henning Kamp return (EINVAL);
1466e65f5a4eSKonstantin Belousov }
14670e168822SPoul-Henning Kamp dp = ip->i_din2;
14680e168822SPoul-Henning Kamp error = ip->i_ea_error;
14698742817bSKonstantin Belousov truncate = false;
14700e168822SPoul-Henning Kamp if (commit && error == 0) {
1471e65f5a4eSKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
1472993b0567SPoul-Henning Kamp if (cred == NOCRED)
1473993b0567SPoul-Henning Kamp cred = vp->v_mount->mnt_cred;
14745e198e76SKonstantin Belousov
14755e198e76SKonstantin Belousov ea_len = MAX(ip->i_ea_len, dp->di_extsize);
14765e198e76SKonstantin Belousov for (lcnt = 1, tlen = ea_len - ip->i_ea_len; tlen > 0;) {
14775e198e76SKonstantin Belousov tlen -= MIN(ZERO_REGION_SIZE, tlen);
14785e198e76SKonstantin Belousov lcnt++;
14795e198e76SKonstantin Belousov }
14805e198e76SKonstantin Belousov
14815e198e76SKonstantin Belousov liovec = __builtin_alloca(lcnt * sizeof(struct iovec));
14825e198e76SKonstantin Belousov luio.uio_iovcnt = lcnt;
14835e198e76SKonstantin Belousov
14845e198e76SKonstantin Belousov liovec[0].iov_base = ip->i_ea_area;
14855e198e76SKonstantin Belousov liovec[0].iov_len = ip->i_ea_len;
14868742817bSKonstantin Belousov for (i = 1, tlen = ea_len - ip->i_ea_len; i < lcnt; i++) {
14875e198e76SKonstantin Belousov liovec[i].iov_base = __DECONST(void *, zero_region);
14885e198e76SKonstantin Belousov liovec[i].iov_len = MIN(ZERO_REGION_SIZE, tlen);
14895e198e76SKonstantin Belousov tlen -= liovec[i].iov_len;
14905e198e76SKonstantin Belousov }
14918742817bSKonstantin Belousov MPASS(tlen == 0);
14925e198e76SKonstantin Belousov
14935e198e76SKonstantin Belousov luio.uio_iov = liovec;
14940e168822SPoul-Henning Kamp luio.uio_offset = 0;
14955e198e76SKonstantin Belousov luio.uio_resid = ea_len;
14960e168822SPoul-Henning Kamp luio.uio_segflg = UIO_SYSSPACE;
14970e168822SPoul-Henning Kamp luio.uio_rw = UIO_WRITE;
14980e168822SPoul-Henning Kamp luio.uio_td = td;
14990e168822SPoul-Henning Kamp error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
15008742817bSKonstantin Belousov if (error == 0 && ip->i_ea_len == 0)
15018742817bSKonstantin Belousov truncate = true;
15020e168822SPoul-Henning Kamp }
1503e65f5a4eSKonstantin Belousov if (--ip->i_ea_refs == 0) {
15040e168822SPoul-Henning Kamp free(ip->i_ea_area, M_TEMP);
15050e168822SPoul-Henning Kamp ip->i_ea_area = NULL;
15060e168822SPoul-Henning Kamp ip->i_ea_len = 0;
15070e168822SPoul-Henning Kamp ip->i_ea_error = 0;
1508e65f5a4eSKonstantin Belousov }
1509e65f5a4eSKonstantin Belousov ffs_unlock_ea(vp);
15105e198e76SKonstantin Belousov
15118742817bSKonstantin Belousov if (truncate)
15125e198e76SKonstantin Belousov ffs_truncate(vp, 0, IO_EXT, cred);
15130e168822SPoul-Henning Kamp return (error);
15140e168822SPoul-Henning Kamp }
15150e168822SPoul-Henning Kamp
15160e168822SPoul-Henning Kamp /*
151743920011SPoul-Henning Kamp * Vnode extattr strategy routine for fifos.
151848f0495dSKirk McKusick *
151948f0495dSKirk McKusick * We need to check for a read or write of the external attributes.
152048f0495dSKirk McKusick * Otherwise we just fall through and do the usual thing.
152148f0495dSKirk McKusick */
152248f0495dSKirk McKusick static int
ffsext_strategy(struct vop_strategy_args * ap)1523064e6b43SKirk McKusick ffsext_strategy(
1524064e6b43SKirk McKusick struct vop_strategy_args /* {
152548f0495dSKirk McKusick struct vnodeop_desc *a_desc;
152648f0495dSKirk McKusick struct vnode *a_vp;
152748f0495dSKirk McKusick struct buf *a_bp;
1528064e6b43SKirk McKusick } */ *ap)
152948f0495dSKirk McKusick {
153048f0495dSKirk McKusick struct vnode *vp;
153148f0495dSKirk McKusick daddr_t lbn;
153248f0495dSKirk McKusick
153348f0495dSKirk McKusick vp = ap->a_vp;
153448f0495dSKirk McKusick lbn = ap->a_bp->b_lblkno;
15351dc349abSEd Maste if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
153632a870daSPoul-Henning Kamp return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
153748f0495dSKirk McKusick if (vp->v_type == VFIFO)
153832a870daSPoul-Henning Kamp return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
15394f116178SPoul-Henning Kamp panic("spec nodes went here");
154048f0495dSKirk McKusick }
154148f0495dSKirk McKusick
154248f0495dSKirk McKusick /*
15430e168822SPoul-Henning Kamp * Vnode extattr transaction commit/abort
15440e168822SPoul-Henning Kamp */
154537c84183SPoul-Henning Kamp static int
ffs_openextattr(struct vop_openextattr_args * ap)1546064e6b43SKirk McKusick ffs_openextattr(
1547064e6b43SKirk McKusick struct vop_openextattr_args /* {
15480e168822SPoul-Henning Kamp struct vnodeop_desc *a_desc;
15490e168822SPoul-Henning Kamp struct vnode *a_vp;
15500e168822SPoul-Henning Kamp IN struct ucred *a_cred;
15510e168822SPoul-Henning Kamp IN struct thread *a_td;
1552064e6b43SKirk McKusick } */ *ap)
15530e168822SPoul-Henning Kamp {
1554e1249defSRobert Watson
15554bc61fd4SEdward Tomasz Napierala if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1556e1249defSRobert Watson return (EOPNOTSUPP);
1557e1249defSRobert Watson
15580e168822SPoul-Henning Kamp return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
15590e168822SPoul-Henning Kamp }
15600e168822SPoul-Henning Kamp
15610e168822SPoul-Henning Kamp /*
15620e168822SPoul-Henning Kamp * Vnode extattr transaction commit/abort
15630e168822SPoul-Henning Kamp */
156437c84183SPoul-Henning Kamp static int
ffs_closeextattr(struct vop_closeextattr_args * ap)1565064e6b43SKirk McKusick ffs_closeextattr(
1566064e6b43SKirk McKusick struct vop_closeextattr_args /* {
15670e168822SPoul-Henning Kamp struct vnodeop_desc *a_desc;
15680e168822SPoul-Henning Kamp struct vnode *a_vp;
15690e168822SPoul-Henning Kamp int a_commit;
15700e168822SPoul-Henning Kamp IN struct ucred *a_cred;
15710e168822SPoul-Henning Kamp IN struct thread *a_td;
1572064e6b43SKirk McKusick } */ *ap)
15730e168822SPoul-Henning Kamp {
1574c6d68ca8SKonstantin Belousov struct vnode *vp;
1575e1249defSRobert Watson
1576c6d68ca8SKonstantin Belousov vp = ap->a_vp;
1577c6d68ca8SKonstantin Belousov if (vp->v_type == VCHR || vp->v_type == VBLK)
1578e1249defSRobert Watson return (EOPNOTSUPP);
1579c6d68ca8SKonstantin Belousov if (ap->a_commit && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0)
15806e6b7d44SKirk McKusick return (EROFS);
15816e6b7d44SKirk McKusick
15826f30ac99SKonstantin Belousov if (ap->a_commit && DOINGSUJ(vp)) {
15836f30ac99SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "ffs_closeextattr commit");
15846f30ac99SKonstantin Belousov softdep_prealloc(vp, MNT_WAIT);
15856f30ac99SKonstantin Belousov if (vp->v_data == NULL)
15866f30ac99SKonstantin Belousov return (EBADF);
15876f30ac99SKonstantin Belousov }
1588c6d68ca8SKonstantin Belousov return (ffs_close_ea(vp, ap->a_commit, ap->a_cred, ap->a_td));
15890e168822SPoul-Henning Kamp }
15900e168822SPoul-Henning Kamp
15919080ff25SRobert Watson /*
15929080ff25SRobert Watson * Vnode operation to remove a named attribute.
15939080ff25SRobert Watson */
15949080ff25SRobert Watson static int
ffs_deleteextattr(struct vop_deleteextattr_args * ap)1595064e6b43SKirk McKusick ffs_deleteextattr(
1596064e6b43SKirk McKusick struct vop_deleteextattr_args /* {
15979080ff25SRobert Watson IN struct vnode *a_vp;
15989080ff25SRobert Watson IN int a_attrnamespace;
15999080ff25SRobert Watson IN const char *a_name;
16009080ff25SRobert Watson IN struct ucred *a_cred;
16019080ff25SRobert Watson IN struct thread *a_td;
1602064e6b43SKirk McKusick } */ *ap)
16039080ff25SRobert Watson {
1604c6d68ca8SKonstantin Belousov struct vnode *vp;
16059080ff25SRobert Watson struct inode *ip;
1606675c187cSConrad Meyer struct extattr *eap;
1607675c187cSConrad Meyer uint32_t ul;
1608675c187cSConrad Meyer int olen, error, i, easize;
1609831b1ff7SKirk McKusick uint8_t *eae;
1610675c187cSConrad Meyer void *tmp;
16110e168822SPoul-Henning Kamp
1612c6d68ca8SKonstantin Belousov vp = ap->a_vp;
1613c6d68ca8SKonstantin Belousov ip = VTOI(vp);
16149080ff25SRobert Watson
1615c6d68ca8SKonstantin Belousov if (vp->v_type == VCHR || vp->v_type == VBLK)
16169080ff25SRobert Watson return (EOPNOTSUPP);
16179080ff25SRobert Watson if (strlen(ap->a_name) == 0)
16189080ff25SRobert Watson return (EINVAL);
1619c6d68ca8SKonstantin Belousov if (vp->v_mount->mnt_flag & MNT_RDONLY)
16206e6b7d44SKirk McKusick return (EROFS);
16216e6b7d44SKirk McKusick
1622c6d68ca8SKonstantin Belousov error = extattr_check_cred(vp, ap->a_attrnamespace,
162386a0c0aaSEdward Tomasz Napierala ap->a_cred, ap->a_td, VWRITE);
16249080ff25SRobert Watson if (error) {
1625e65f5a4eSKonstantin Belousov /*
1626e65f5a4eSKonstantin Belousov * ffs_lock_ea is not needed there, because the vnode
162702e06d99SKonstantin Belousov * must be exclusively locked.
1628e65f5a4eSKonstantin Belousov */
16299080ff25SRobert Watson if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
16309080ff25SRobert Watson ip->i_ea_error = error;
16319080ff25SRobert Watson return (error);
16329080ff25SRobert Watson }
16339080ff25SRobert Watson
16346f30ac99SKonstantin Belousov if (DOINGSUJ(vp)) {
16356f30ac99SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr");
16366f30ac99SKonstantin Belousov softdep_prealloc(vp, MNT_WAIT);
16376f30ac99SKonstantin Belousov if (vp->v_data == NULL)
16386f30ac99SKonstantin Belousov return (EBADF);
16396f30ac99SKonstantin Belousov }
16406f30ac99SKonstantin Belousov
1641c6d68ca8SKonstantin Belousov error = ffs_open_ea(vp, ap->a_cred, ap->a_td);
16429080ff25SRobert Watson if (error)
16439080ff25SRobert Watson return (error);
16449080ff25SRobert Watson
1645675c187cSConrad Meyer /* CEM: delete could be done in-place instead */
16469080ff25SRobert Watson eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
16479080ff25SRobert Watson bcopy(ip->i_ea_area, eae, ip->i_ea_len);
16489080ff25SRobert Watson easize = ip->i_ea_len;
16499080ff25SRobert Watson
16509080ff25SRobert Watson olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1651675c187cSConrad Meyer &eap, NULL);
16529080ff25SRobert Watson if (olen == -1) {
16539080ff25SRobert Watson /* delete but nonexistent */
16549080ff25SRobert Watson free(eae, M_TEMP);
1655c6d68ca8SKonstantin Belousov ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
16569080ff25SRobert Watson return (ENOATTR);
16579080ff25SRobert Watson }
1658675c187cSConrad Meyer ul = eap->ea_length;
1659831b1ff7SKirk McKusick i = (uint8_t *)EXTATTR_NEXT(eap) - eae;
1660675c187cSConrad Meyer bcopy(EXTATTR_NEXT(eap), eap, easize - i);
1661675c187cSConrad Meyer easize -= ul;
1662675c187cSConrad Meyer
1663675c187cSConrad Meyer tmp = ip->i_ea_area;
16649080ff25SRobert Watson ip->i_ea_area = eae;
16659080ff25SRobert Watson ip->i_ea_len = easize;
1666675c187cSConrad Meyer free(tmp, M_TEMP);
1667c6d68ca8SKonstantin Belousov error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td);
16689080ff25SRobert Watson return (error);
16699080ff25SRobert Watson }
16700e168822SPoul-Henning Kamp
16710e168822SPoul-Henning Kamp /*
16720e168822SPoul-Henning Kamp * Vnode operation to retrieve a named extended attribute.
1673d6fe88e4SPoul-Henning Kamp */
167437c84183SPoul-Henning Kamp static int
ffs_getextattr(struct vop_getextattr_args * ap)1675064e6b43SKirk McKusick ffs_getextattr(
1676064e6b43SKirk McKusick struct vop_getextattr_args /* {
1677d6fe88e4SPoul-Henning Kamp IN struct vnode *a_vp;
1678d6fe88e4SPoul-Henning Kamp IN int a_attrnamespace;
1679d6fe88e4SPoul-Henning Kamp IN const char *a_name;
1680d6fe88e4SPoul-Henning Kamp INOUT struct uio *a_uio;
168185bba629SDima Dorfman OUT size_t *a_size;
1682d6fe88e4SPoul-Henning Kamp IN struct ucred *a_cred;
1683d6fe88e4SPoul-Henning Kamp IN struct thread *a_td;
1684064e6b43SKirk McKusick } */ *ap)
1685d6fe88e4SPoul-Henning Kamp {
16860176455bSPoul-Henning Kamp struct inode *ip;
1687831b1ff7SKirk McKusick uint8_t *eae, *p;
16881e9e2eb5SRobert Watson unsigned easize;
1689e65f5a4eSKonstantin Belousov int error, ealen;
16901e9e2eb5SRobert Watson
16911e9e2eb5SRobert Watson ip = VTOI(ap->a_vp);
16921e9e2eb5SRobert Watson
16934bc61fd4SEdward Tomasz Napierala if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
16941e9e2eb5SRobert Watson return (EOPNOTSUPP);
16951e9e2eb5SRobert Watson
16961e9e2eb5SRobert Watson error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
169786a0c0aaSEdward Tomasz Napierala ap->a_cred, ap->a_td, VREAD);
16981e9e2eb5SRobert Watson if (error)
16991e9e2eb5SRobert Watson return (error);
17001e9e2eb5SRobert Watson
17011e9e2eb5SRobert Watson error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
17021e9e2eb5SRobert Watson if (error)
17031e9e2eb5SRobert Watson return (error);
1704e65f5a4eSKonstantin Belousov
17051e9e2eb5SRobert Watson eae = ip->i_ea_area;
17061e9e2eb5SRobert Watson easize = ip->i_ea_len;
17071e9e2eb5SRobert Watson
17081e9e2eb5SRobert Watson ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
17091e9e2eb5SRobert Watson NULL, &p);
17101e9e2eb5SRobert Watson if (ealen >= 0) {
17111e9e2eb5SRobert Watson error = 0;
17121e9e2eb5SRobert Watson if (ap->a_size != NULL)
17131e9e2eb5SRobert Watson *ap->a_size = ealen;
17141e9e2eb5SRobert Watson else if (ap->a_uio != NULL)
17151e9e2eb5SRobert Watson error = uiomove(p, ealen, ap->a_uio);
17161e9e2eb5SRobert Watson } else
17171e9e2eb5SRobert Watson error = ENOATTR;
1718e65f5a4eSKonstantin Belousov
17191e9e2eb5SRobert Watson ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
17201e9e2eb5SRobert Watson return (error);
17211e9e2eb5SRobert Watson }
17221e9e2eb5SRobert Watson
17231e9e2eb5SRobert Watson /*
17241e9e2eb5SRobert Watson * Vnode operation to retrieve extended attributes on a vnode.
17251e9e2eb5SRobert Watson */
17261e9e2eb5SRobert Watson static int
ffs_listextattr(struct vop_listextattr_args * ap)1727064e6b43SKirk McKusick ffs_listextattr(
1728064e6b43SKirk McKusick struct vop_listextattr_args /* {
17291e9e2eb5SRobert Watson IN struct vnode *a_vp;
17301e9e2eb5SRobert Watson IN int a_attrnamespace;
17311e9e2eb5SRobert Watson INOUT struct uio *a_uio;
17321e9e2eb5SRobert Watson OUT size_t *a_size;
17331e9e2eb5SRobert Watson IN struct ucred *a_cred;
17341e9e2eb5SRobert Watson IN struct thread *a_td;
1735064e6b43SKirk McKusick } */ *ap)
17361e9e2eb5SRobert Watson {
17371e9e2eb5SRobert Watson struct inode *ip;
1738675c187cSConrad Meyer struct extattr *eap, *eaend;
1739e65f5a4eSKonstantin Belousov int error, ealen;
1740d6fe88e4SPoul-Henning Kamp
17410176455bSPoul-Henning Kamp ip = VTOI(ap->a_vp);
17420176455bSPoul-Henning Kamp
17434bc61fd4SEdward Tomasz Napierala if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
1744e1249defSRobert Watson return (EOPNOTSUPP);
1745e1249defSRobert Watson
17460e168822SPoul-Henning Kamp error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
174786a0c0aaSEdward Tomasz Napierala ap->a_cred, ap->a_td, VREAD);
17480176455bSPoul-Henning Kamp if (error)
17490176455bSPoul-Henning Kamp return (error);
17500e168822SPoul-Henning Kamp
17510e168822SPoul-Henning Kamp error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
17520e168822SPoul-Henning Kamp if (error)
17530e168822SPoul-Henning Kamp return (error);
17541e9e2eb5SRobert Watson
17557428de69SPoul-Henning Kamp error = 0;
17567428de69SPoul-Henning Kamp if (ap->a_size != NULL)
17577428de69SPoul-Henning Kamp *ap->a_size = 0;
1758675c187cSConrad Meyer
1759675c187cSConrad Meyer KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
1760675c187cSConrad Meyer eap = (struct extattr *)ip->i_ea_area;
1761675c187cSConrad Meyer eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
1762675c187cSConrad Meyer for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
1763e6790841SConrad Meyer KASSERT(EXTATTR_NEXT(eap) <= eaend,
1764e6790841SConrad Meyer ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
1765675c187cSConrad Meyer if (eap->ea_namespace != ap->a_attrnamespace)
17667428de69SPoul-Henning Kamp continue;
1767675c187cSConrad Meyer
1768675c187cSConrad Meyer ealen = eap->ea_namelength;
1769675c187cSConrad Meyer if (ap->a_size != NULL)
17707428de69SPoul-Henning Kamp *ap->a_size += ealen + 1;
1771675c187cSConrad Meyer else if (ap->a_uio != NULL)
1772675c187cSConrad Meyer error = uiomove(&eap->ea_namelength, ealen + 1,
1773675c187cSConrad Meyer ap->a_uio);
17747428de69SPoul-Henning Kamp }
1775675c187cSConrad Meyer
17760e168822SPoul-Henning Kamp ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
17770176455bSPoul-Henning Kamp return (error);
1778d6fe88e4SPoul-Henning Kamp }
1779d6fe88e4SPoul-Henning Kamp
1780d6fe88e4SPoul-Henning Kamp /*
1781d6fe88e4SPoul-Henning Kamp * Vnode operation to set a named attribute.
1782d6fe88e4SPoul-Henning Kamp */
178337c84183SPoul-Henning Kamp static int
ffs_setextattr(struct vop_setextattr_args * ap)1784064e6b43SKirk McKusick ffs_setextattr(
1785064e6b43SKirk McKusick struct vop_setextattr_args /* {
1786d6fe88e4SPoul-Henning Kamp IN struct vnode *a_vp;
1787d6fe88e4SPoul-Henning Kamp IN int a_attrnamespace;
1788d6fe88e4SPoul-Henning Kamp IN const char *a_name;
1789d6fe88e4SPoul-Henning Kamp INOUT struct uio *a_uio;
1790d6fe88e4SPoul-Henning Kamp IN struct ucred *a_cred;
1791d6fe88e4SPoul-Henning Kamp IN struct thread *a_td;
1792064e6b43SKirk McKusick } */ *ap)
1793d6fe88e4SPoul-Henning Kamp {
1794c6d68ca8SKonstantin Belousov struct vnode *vp;
17950176455bSPoul-Henning Kamp struct inode *ip;
17960176455bSPoul-Henning Kamp struct fs *fs;
1797675c187cSConrad Meyer struct extattr *eap;
17980176455bSPoul-Henning Kamp uint32_t ealength, ul;
17997aac7bc1SKonstantin Belousov ssize_t ealen;
18007aac7bc1SKonstantin Belousov int olen, eapad1, eapad2, error, i, easize;
1801831b1ff7SKirk McKusick uint8_t *eae;
1802675c187cSConrad Meyer void *tmp;
1803d6fe88e4SPoul-Henning Kamp
1804c6d68ca8SKonstantin Belousov vp = ap->a_vp;
1805c6d68ca8SKonstantin Belousov ip = VTOI(vp);
1806e1db6897SKonstantin Belousov fs = ITOFS(ip);
18070176455bSPoul-Henning Kamp
1808c6d68ca8SKonstantin Belousov if (vp->v_type == VCHR || vp->v_type == VBLK)
1809e1249defSRobert Watson return (EOPNOTSUPP);
18101e9e2eb5SRobert Watson if (strlen(ap->a_name) == 0)
18111e9e2eb5SRobert Watson return (EINVAL);
18121e9e2eb5SRobert Watson
18139080ff25SRobert Watson /* XXX Now unsupported API to delete EAs using NULL uio. */
18149080ff25SRobert Watson if (ap->a_uio == NULL)
18159080ff25SRobert Watson return (EOPNOTSUPP);
18169080ff25SRobert Watson
1817c6d68ca8SKonstantin Belousov if (vp->v_mount->mnt_flag & MNT_RDONLY)
18186e6b7d44SKirk McKusick return (EROFS);
18196e6b7d44SKirk McKusick
18207aac7bc1SKonstantin Belousov ealen = ap->a_uio->uio_resid;
18211dc349abSEd Maste if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
18227aac7bc1SKonstantin Belousov return (EINVAL);
18237aac7bc1SKonstantin Belousov
1824c6d68ca8SKonstantin Belousov error = extattr_check_cred(vp, ap->a_attrnamespace,
182586a0c0aaSEdward Tomasz Napierala ap->a_cred, ap->a_td, VWRITE);
18260e168822SPoul-Henning Kamp if (error) {
1827e65f5a4eSKonstantin Belousov /*
1828e65f5a4eSKonstantin Belousov * ffs_lock_ea is not needed there, because the vnode
182902e06d99SKonstantin Belousov * must be exclusively locked.
1830e65f5a4eSKonstantin Belousov */
18310e168822SPoul-Henning Kamp if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
18320e168822SPoul-Henning Kamp ip->i_ea_error = error;
18330e168822SPoul-Henning Kamp return (error);
18340e168822SPoul-Henning Kamp }
18350e168822SPoul-Henning Kamp
18366f30ac99SKonstantin Belousov if (DOINGSUJ(vp)) {
18376f30ac99SKonstantin Belousov ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr");
18386f30ac99SKonstantin Belousov softdep_prealloc(vp, MNT_WAIT);
18396f30ac99SKonstantin Belousov if (vp->v_data == NULL)
18406f30ac99SKonstantin Belousov return (EBADF);
18416f30ac99SKonstantin Belousov }
18426f30ac99SKonstantin Belousov
1843c6d68ca8SKonstantin Belousov error = ffs_open_ea(vp, ap->a_cred, ap->a_td);
18440e168822SPoul-Henning Kamp if (error)
18450e168822SPoul-Henning Kamp return (error);
18460e168822SPoul-Henning Kamp
18470176455bSPoul-Henning Kamp ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
1848675c187cSConrad Meyer eapad1 = roundup2(ealength, 8) - ealength;
1849675c187cSConrad Meyer eapad2 = roundup2(ealen, 8) - ealen;
18500176455bSPoul-Henning Kamp ealength += eapad1 + ealen + eapad2;
18510176455bSPoul-Henning Kamp
1852675c187cSConrad Meyer /*
1853675c187cSConrad Meyer * CEM: rewrites of the same size or smaller could be done in-place
1854675c187cSConrad Meyer * instead. (We don't acquire any fine-grained locks in here either,
1855675c187cSConrad Meyer * so we could also do bigger writes in-place.)
1856675c187cSConrad Meyer */
1857a163d034SWarner Losh eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
18580e168822SPoul-Henning Kamp bcopy(ip->i_ea_area, eae, ip->i_ea_len);
18590e168822SPoul-Henning Kamp easize = ip->i_ea_len;
18600176455bSPoul-Henning Kamp
1861675c187cSConrad Meyer olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
1862675c187cSConrad Meyer &eap, NULL);
18630e168822SPoul-Henning Kamp if (olen == -1) {
18640176455bSPoul-Henning Kamp /* new, append at end */
1865675c187cSConrad Meyer KASSERT(ALIGNED_TO(eae + easize, struct extattr),
1866675c187cSConrad Meyer ("unaligned"));
1867675c187cSConrad Meyer eap = (struct extattr *)(eae + easize);
18680176455bSPoul-Henning Kamp easize += ealength;
1869d0e9b8dbSPoul-Henning Kamp } else {
1870675c187cSConrad Meyer ul = eap->ea_length;
1871831b1ff7SKirk McKusick i = (uint8_t *)EXTATTR_NEXT(eap) - eae;
1872d0e9b8dbSPoul-Henning Kamp if (ul != ealength) {
1873831b1ff7SKirk McKusick bcopy(EXTATTR_NEXT(eap), (uint8_t *)eap + ealength,
1874675c187cSConrad Meyer easize - i);
1875d0e9b8dbSPoul-Henning Kamp easize += (ealength - ul);
1876d0e9b8dbSPoul-Henning Kamp }
18770176455bSPoul-Henning Kamp }
18781dc349abSEd Maste if (easize > lblktosize(fs, UFS_NXADDR)) {
18790176455bSPoul-Henning Kamp free(eae, M_TEMP);
1880c6d68ca8SKonstantin Belousov ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
1881e65f5a4eSKonstantin Belousov if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
18820e168822SPoul-Henning Kamp ip->i_ea_error = ENOSPC;
18830176455bSPoul-Henning Kamp return (ENOSPC);
18840176455bSPoul-Henning Kamp }
1885675c187cSConrad Meyer eap->ea_length = ealength;
1886675c187cSConrad Meyer eap->ea_namespace = ap->a_attrnamespace;
1887675c187cSConrad Meyer eap->ea_contentpadlen = eapad2;
1888675c187cSConrad Meyer eap->ea_namelength = strlen(ap->a_name);
1889675c187cSConrad Meyer memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
1890675c187cSConrad Meyer bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
1891675c187cSConrad Meyer error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
18920176455bSPoul-Henning Kamp if (error) {
18930176455bSPoul-Henning Kamp free(eae, M_TEMP);
1894c6d68ca8SKonstantin Belousov ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
1895e65f5a4eSKonstantin Belousov if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
18960e168822SPoul-Henning Kamp ip->i_ea_error = error;
18970176455bSPoul-Henning Kamp return (error);
18980176455bSPoul-Henning Kamp }
1899831b1ff7SKirk McKusick bzero((uint8_t *)EXTATTR_CONTENT(eap) + ealen, eapad2);
19009080ff25SRobert Watson
1901675c187cSConrad Meyer tmp = ip->i_ea_area;
19020e168822SPoul-Henning Kamp ip->i_ea_area = eae;
19030e168822SPoul-Henning Kamp ip->i_ea_len = easize;
1904675c187cSConrad Meyer free(tmp, M_TEMP);
1905c6d68ca8SKonstantin Belousov error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td);
19060176455bSPoul-Henning Kamp return (error);
1907d6fe88e4SPoul-Henning Kamp }
190810bcafe9SPawel Jakub Dawidek
190910bcafe9SPawel Jakub Dawidek /*
191010bcafe9SPawel Jakub Dawidek * Vnode pointer to File handle
191110bcafe9SPawel Jakub Dawidek */
191210bcafe9SPawel Jakub Dawidek static int
ffs_vptofh(struct vop_vptofh_args * ap)1913064e6b43SKirk McKusick ffs_vptofh(
1914064e6b43SKirk McKusick struct vop_vptofh_args /* {
191510bcafe9SPawel Jakub Dawidek IN struct vnode *a_vp;
191610bcafe9SPawel Jakub Dawidek IN struct fid *a_fhp;
1917064e6b43SKirk McKusick } */ *ap)
191810bcafe9SPawel Jakub Dawidek {
191910bcafe9SPawel Jakub Dawidek struct inode *ip;
192010bcafe9SPawel Jakub Dawidek struct ufid *ufhp;
192191b5592aSRick Macklem _Static_assert(sizeof(struct ufid) <= sizeof(struct fid),
192291b5592aSRick Macklem "struct ufid cannot be larger than struct fid");
192310bcafe9SPawel Jakub Dawidek
192410bcafe9SPawel Jakub Dawidek ip = VTOI(ap->a_vp);
192510bcafe9SPawel Jakub Dawidek ufhp = (struct ufid *)ap->a_fhp;
192610bcafe9SPawel Jakub Dawidek ufhp->ufid_len = sizeof(struct ufid);
192710bcafe9SPawel Jakub Dawidek ufhp->ufid_ino = ip->i_number;
192810bcafe9SPawel Jakub Dawidek ufhp->ufid_gen = ip->i_gen;
192910bcafe9SPawel Jakub Dawidek return (0);
193010bcafe9SPawel Jakub Dawidek }
193189521983SKonstantin Belousov
193289521983SKonstantin Belousov SYSCTL_DECL(_vfs_ffs);
193389521983SKonstantin Belousov static int use_buf_pager = 1;
193489521983SKonstantin Belousov SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
193589521983SKonstantin Belousov "Always use buffer pager instead of bmap");
193689521983SKonstantin Belousov
1937c39baa74SKonstantin Belousov static daddr_t
ffs_gbp_getblkno(struct vnode * vp,vm_ooffset_t off)1938c39baa74SKonstantin Belousov ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1939c39baa74SKonstantin Belousov {
1940c39baa74SKonstantin Belousov
1941c39baa74SKonstantin Belousov return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
1942c39baa74SKonstantin Belousov }
1943c39baa74SKonstantin Belousov
1944c39baa74SKonstantin Belousov static int
ffs_gbp_getblksz(struct vnode * vp,daddr_t lbn,long * sz)1945197a4f29SKonstantin Belousov ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
1946c39baa74SKonstantin Belousov {
1947c39baa74SKonstantin Belousov
1948197a4f29SKonstantin Belousov *sz = blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn);
1949197a4f29SKonstantin Belousov return (0);
1950c39baa74SKonstantin Belousov }
1951c39baa74SKonstantin Belousov
195289521983SKonstantin Belousov static int
ffs_getpages(struct vop_getpages_args * ap)195389521983SKonstantin Belousov ffs_getpages(struct vop_getpages_args *ap)
195489521983SKonstantin Belousov {
195589521983SKonstantin Belousov struct vnode *vp;
195689521983SKonstantin Belousov struct ufsmount *um;
195789521983SKonstantin Belousov
195889521983SKonstantin Belousov vp = ap->a_vp;
1959c39baa74SKonstantin Belousov um = VFSTOUFS(vp->v_mount);
196089521983SKonstantin Belousov
1961c39baa74SKonstantin Belousov if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
1962c39baa74SKonstantin Belousov return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
196389521983SKonstantin Belousov ap->a_rbehind, ap->a_rahead, NULL, NULL));
1964c39baa74SKonstantin Belousov return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
1965c39baa74SKonstantin Belousov ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
196689521983SKonstantin Belousov }
19674775b07eSJason A. Harmening
19684775b07eSJason A. Harmening static int
ffs_getpages_async(struct vop_getpages_async_args * ap)19694775b07eSJason A. Harmening ffs_getpages_async(struct vop_getpages_async_args *ap)
19704775b07eSJason A. Harmening {
19714775b07eSJason A. Harmening struct vnode *vp;
19724775b07eSJason A. Harmening struct ufsmount *um;
1973abfdf767SKonstantin Belousov bool do_iodone;
19744775b07eSJason A. Harmening int error;
19754775b07eSJason A. Harmening
19764775b07eSJason A. Harmening vp = ap->a_vp;
19774775b07eSJason A. Harmening um = VFSTOUFS(vp->v_mount);
1978abfdf767SKonstantin Belousov do_iodone = true;
19794775b07eSJason A. Harmening
1980abfdf767SKonstantin Belousov if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
1981abfdf767SKonstantin Belousov error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
1982abfdf767SKonstantin Belousov ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
1983abfdf767SKonstantin Belousov if (error == 0)
1984abfdf767SKonstantin Belousov do_iodone = false;
1985abfdf767SKonstantin Belousov } else {
1986abfdf767SKonstantin Belousov error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
1987abfdf767SKonstantin Belousov ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
1988abfdf767SKonstantin Belousov ffs_gbp_getblksz);
1989abfdf767SKonstantin Belousov }
1990abfdf767SKonstantin Belousov if (do_iodone && ap->a_iodone != NULL)
19914775b07eSJason A. Harmening ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
19924775b07eSJason A. Harmening
19934775b07eSJason A. Harmening return (error);
19944775b07eSJason A. Harmening }
1995f2c9d038SKonstantin Belousov
1996f2c9d038SKonstantin Belousov static int
ffs_vput_pair(struct vop_vput_pair_args * ap)1997f2c9d038SKonstantin Belousov ffs_vput_pair(struct vop_vput_pair_args *ap)
1998f2c9d038SKonstantin Belousov {
199930bfb2faSKonstantin Belousov struct mount *mp;
200030bfb2faSKonstantin Belousov struct vnode *dvp, *vp, *vp1, **vpp;
200130bfb2faSKonstantin Belousov struct inode *dp, *ip;
200230bfb2faSKonstantin Belousov ino_t ip_ino;
2003831b1ff7SKirk McKusick uint64_t ip_gen;
2004f2c9d038SKonstantin Belousov int error, vp_locked;
2005f2c9d038SKonstantin Belousov
2006f2c9d038SKonstantin Belousov dvp = ap->a_dvp;
2007f2c9d038SKonstantin Belousov dp = VTOI(dvp);
2008f2c9d038SKonstantin Belousov vpp = ap->a_vpp;
2009f2c9d038SKonstantin Belousov vp = vpp != NULL ? *vpp : NULL;
2010f2c9d038SKonstantin Belousov
201174a3652fSKonstantin Belousov if ((dp->i_flag & (IN_NEEDSYNC | IN_ENDOFF)) == 0) {
2012f2c9d038SKonstantin Belousov vput(dvp);
2013f2c9d038SKonstantin Belousov if (vp != NULL && ap->a_unlock_vp)
2014f2c9d038SKonstantin Belousov vput(vp);
2015f2c9d038SKonstantin Belousov return (0);
2016f2c9d038SKonstantin Belousov }
2017f2c9d038SKonstantin Belousov
201874a3652fSKonstantin Belousov mp = dvp->v_mount;
2019f2c9d038SKonstantin Belousov if (vp != NULL) {
2020f2c9d038SKonstantin Belousov if (ap->a_unlock_vp) {
2021f2c9d038SKonstantin Belousov vput(vp);
2022f2c9d038SKonstantin Belousov } else {
2023f2c9d038SKonstantin Belousov MPASS(vp->v_type != VNON);
2024f2c9d038SKonstantin Belousov vp_locked = VOP_ISLOCKED(vp);
202530bfb2faSKonstantin Belousov ip = VTOI(vp);
202630bfb2faSKonstantin Belousov ip_ino = ip->i_number;
202730bfb2faSKonstantin Belousov ip_gen = ip->i_gen;
2028f2c9d038SKonstantin Belousov VOP_UNLOCK(vp);
2029f2c9d038SKonstantin Belousov }
2030f2c9d038SKonstantin Belousov }
2031f2c9d038SKonstantin Belousov
203274a3652fSKonstantin Belousov /*
203374a3652fSKonstantin Belousov * If compaction or fsync was requested do it in ffs_vput_pair()
203474a3652fSKonstantin Belousov * now that other locks are no longer held.
203574a3652fSKonstantin Belousov */
203674a3652fSKonstantin Belousov if ((dp->i_flag & IN_ENDOFF) != 0) {
203706f2918aSKonstantin Belousov VNASSERT(I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size, dvp,
203806f2918aSKonstantin Belousov ("IN_ENDOFF set but I_ENDOFF() is not"));
203974a3652fSKonstantin Belousov dp->i_flag &= ~IN_ENDOFF;
204006f2918aSKonstantin Belousov error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp), IO_NORMAL |
204106f2918aSKonstantin Belousov (DOINGASYNC(dvp) ? 0 : IO_SYNC), curthread->td_ucred);
204274a3652fSKonstantin Belousov if (error != 0 && error != ERELOOKUP) {
204374a3652fSKonstantin Belousov if (!ffs_fsfail_cleanup(VFSTOUFS(mp), error)) {
204474a3652fSKonstantin Belousov vn_printf(dvp,
204574a3652fSKonstantin Belousov "IN_ENDOFF: failed to truncate, "
204674a3652fSKonstantin Belousov "error %d\n", error);
204774a3652fSKonstantin Belousov }
204874a3652fSKonstantin Belousov #ifdef UFS_DIRHASH
204974a3652fSKonstantin Belousov ufsdirhash_free(dp);
205074a3652fSKonstantin Belousov #endif
205174a3652fSKonstantin Belousov }
205274a3652fSKonstantin Belousov SET_I_ENDOFF(dp, 0);
205374a3652fSKonstantin Belousov }
205474a3652fSKonstantin Belousov if ((dp->i_flag & IN_NEEDSYNC) != 0) {
2055f2c9d038SKonstantin Belousov do {
2056f2c9d038SKonstantin Belousov error = ffs_syncvnode(dvp, MNT_WAIT, 0);
2057f2c9d038SKonstantin Belousov } while (error == ERELOOKUP);
205874a3652fSKonstantin Belousov }
205974a3652fSKonstantin Belousov
2060f2c9d038SKonstantin Belousov vput(dvp);
2061f2c9d038SKonstantin Belousov
2062f2c9d038SKonstantin Belousov if (vp == NULL || ap->a_unlock_vp)
2063f2c9d038SKonstantin Belousov return (0);
206430bfb2faSKonstantin Belousov MPASS(mp != NULL);
2065f2c9d038SKonstantin Belousov
2066f2c9d038SKonstantin Belousov /*
2067f2c9d038SKonstantin Belousov * It is possible that vp is reclaimed at this point. Only
2068f2c9d038SKonstantin Belousov * routines that call us with a_unlock_vp == false can find
2069f2c9d038SKonstantin Belousov * that their vp has been reclaimed. There are three areas
2070f2c9d038SKonstantin Belousov * that are affected:
2071f2c9d038SKonstantin Belousov * 1) vn_open_cred() - later VOPs could fail, but
2072f2c9d038SKonstantin Belousov * dead_open() returns 0 to simulate successful open.
2073f2c9d038SKonstantin Belousov * 2) ffs_snapshot() - creation of snapshot fails with EBADF.
2074f2c9d038SKonstantin Belousov * 3) NFS server (several places) - code is prepared to detect
2075f2c9d038SKonstantin Belousov * and respond to dead vnodes by returning ESTALE.
2076f2c9d038SKonstantin Belousov */
2077f2c9d038SKonstantin Belousov VOP_LOCK(vp, vp_locked | LK_RETRY);
20788d8589b3SKonstantin Belousov if (IS_UFS(vp))
2079f2c9d038SKonstantin Belousov return (0);
208030bfb2faSKonstantin Belousov
208130bfb2faSKonstantin Belousov /*
208230bfb2faSKonstantin Belousov * Try harder to recover from reclaimed vp if reclaim was not
208330bfb2faSKonstantin Belousov * because underlying inode was cleared. We saved inode
208430bfb2faSKonstantin Belousov * number and inode generation, so we can try to reinstantiate
208530bfb2faSKonstantin Belousov * exactly same version of inode. If this fails, return
208630bfb2faSKonstantin Belousov * original doomed vnode and let caller to handle
208730bfb2faSKonstantin Belousov * consequences.
208830bfb2faSKonstantin Belousov *
208930bfb2faSKonstantin Belousov * Note that callers must keep write started around
209030bfb2faSKonstantin Belousov * VOP_VPUT_PAIR() calls, so it is safe to use mp without
209130bfb2faSKonstantin Belousov * busying it.
209230bfb2faSKonstantin Belousov */
209330bfb2faSKonstantin Belousov VOP_UNLOCK(vp);
209430bfb2faSKonstantin Belousov error = ffs_inotovp(mp, ip_ino, ip_gen, LK_EXCLUSIVE, &vp1,
209530bfb2faSKonstantin Belousov FFSV_REPLACE_DOOMED);
209630bfb2faSKonstantin Belousov if (error != 0) {
209730bfb2faSKonstantin Belousov VOP_LOCK(vp, vp_locked | LK_RETRY);
209830bfb2faSKonstantin Belousov } else {
209930bfb2faSKonstantin Belousov vrele(vp);
210030bfb2faSKonstantin Belousov *vpp = vp1;
210130bfb2faSKonstantin Belousov }
210230bfb2faSKonstantin Belousov return (error);
2103f2c9d038SKonstantin Belousov }
2104