xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision c64d15a587b6038b85a928885fc997da7315fbfe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/time.h>
35 #include <sys/vnode.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/file.h>
39 #include <sys/filio.h>
40 #include <sys/uio.h>
41 #include <sys/buf.h>
42 #include <sys/mman.h>
43 #include <sys/tiuser.h>
44 #include <sys/pathname.h>
45 #include <sys/dirent.h>
46 #include <sys/conf.h>
47 #include <sys/debug.h>
48 #include <sys/vmsystm.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/swap.h>
52 #include <sys/errno.h>
53 #include <sys/sysmacros.h>
54 #include <sys/disp.h>
55 #include <sys/kmem.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vtrace.h>
58 #include <sys/mount.h>
59 #include <sys/bootconf.h>
60 #include <sys/dnlc.h>
61 #include <sys/stat.h>
62 #include <sys/acl.h>
63 #include <sys/policy.h>
64 #include <rpc/types.h>
65 
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/rm.h>
74 #include <sys/fs/cachefs_fs.h>
75 #include <sys/fs/cachefs_dir.h>
76 #include <sys/fs/cachefs_dlog.h>
77 #include <sys/fs/cachefs_ioctl.h>
78 #include <sys/fs/cachefs_log.h>
79 #include <fs/fs_subr.h>
80 
81 int cachefs_dnlc;	/* use dnlc, debugging */
82 
83 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
84     cred_t *cr);
85 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
86     cred_t *cr);
87 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
88 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
89 static int cachefs_getacldirvp(cnode_t *cp);
90 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
91 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
92 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
93 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
94     u_offset_t iooff, cred_t *cr);
95 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
96     u_offset_t iooff, cred_t *cr);
97 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
98     cred_t *cr, caller_context_t *ct);
99 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
100     int flags, cred_t *cr, caller_context_t *ct);
101 static int cachefs_access_connected(struct vnode *vp, int mode,
102     int flags, cred_t *cr);
103 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
104     cred_t *cr);
105 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
106     char *tnm, cred_t *cr);
107 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
108     vattr_t *tva, char *tnm, cred_t *cr);
109 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
110     cred_t *cr);
111 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
112     char *tnm, cred_t *cr);
113 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
114     vnode_t **vpp, cred_t *cr);
115 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
116     vnode_t **vpp, cred_t *cr);
117 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
118 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
120 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
121     vnode_t *cdir, cred_t *cr, vnode_t *vp);
122 static char *cachefs_newname(void);
123 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
124     cred_t *cr);
125 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
127 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
128     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
129 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
130     int *eofp);
131 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
132     cred_t *cr, int *eofp);
133 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
134 	cred_t *cr, int *eofp);
135 
136 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
137     cred_t *cr, caller_context_t *ct);
138 
139 static	int	cachefs_open(struct vnode **, int, cred_t *,
140 			caller_context_t *);
141 static	int	cachefs_close(struct vnode *, int, int, offset_t,
142 			cred_t *, caller_context_t *);
143 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
144 			caller_context_t *);
145 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
146 			caller_context_t *);
147 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
148 			int *, caller_context_t *);
149 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
150 			cred_t *, caller_context_t *);
151 static	int	cachefs_setattr(struct vnode *, struct vattr *,
152 			int, cred_t *, caller_context_t *);
153 static	int	cachefs_access(struct vnode *, int, int, cred_t *,
154 			caller_context_t *);
155 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
156 			struct pathname *, int, struct vnode *, cred_t *,
157 			caller_context_t *, int *, pathname_t *);
158 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
159 			enum vcexcl, int, struct vnode **, cred_t *, int,
160 			caller_context_t *, vsecattr_t *);
161 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
162 			vattr_t *vap, enum vcexcl exclusive, int mode,
163 			vnode_t **vpp, cred_t *cr);
164 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
165 			vattr_t *vap, enum vcexcl exclusive, int mode,
166 			vnode_t **vpp, cred_t *cr);
167 static	int	cachefs_remove(struct vnode *, char *, cred_t *,
168 			caller_context_t *, int);
169 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
170 			cred_t *, caller_context_t *, int);
171 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
172 			char *, cred_t *, caller_context_t *, int);
173 static	int	cachefs_mkdir(struct vnode *, char *, struct
174 			vattr *, struct vnode **, cred_t *, caller_context_t *,
175 			int, vsecattr_t *);
176 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
177 			cred_t *, caller_context_t *, int);
178 static	int	cachefs_readdir(struct vnode *, struct uio *,
179 			cred_t *, int *, caller_context_t *, int);
180 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
181 			char *, cred_t *, caller_context_t *, int);
182 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
183 			caller_context_t *);
184 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
185 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
186 static	int	cachefs_fsync(struct vnode *, int, cred_t *,
187 			caller_context_t *);
188 static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
189 static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
190 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
191 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
192 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
193 			caller_context_t *);
194 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
195 			int, offset_t, struct flk_callback *, cred_t *,
196 			caller_context_t *);
197 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
198 			offset_t, cred_t *, caller_context_t *);
199 static	int	cachefs_realvp(struct vnode *, struct vnode **,
200 			caller_context_t *);
201 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
202 			struct page *[], size_t, struct seg *, caddr_t,
203 			enum seg_rw, cred_t *, caller_context_t *);
204 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
205 			struct page *[], size_t, struct seg *, caddr_t,
206 			enum seg_rw, cred_t *);
207 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
208 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
209 			enum seg_rw, cred_t *);
210 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
211 			cred_t *, caller_context_t *);
212 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
213 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
214 			caller_context_t *);
215 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
216 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
217 			caller_context_t *);
218 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
219 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
220 			caller_context_t *);
221 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
222 			int flag, cred_t *cr, caller_context_t *);
223 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
224 			int flag, cred_t *cr, caller_context_t *);
225 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
226 			cred_t *, caller_context_t *);
227 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
228     cred_t *cr);
229 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
230     int flag, cred_t *cr);
231 
232 static int	cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
233 			caller_context_t *);
234 static int	cachefs_pageio(struct vnode *, page_t *,
235 		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
236 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
237 		    int tcount, struct uio *uiop);
238 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
239 			caller_context_t *);
240 
241 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
242 			cred_t *cr, caller_context_t *ct);
243 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
244 			cred_t *cr, caller_context_t *ct);
245 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
246 			int flags, cred_t *cr, caller_context_t *ct);
247 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
248 			vnode_t *vp);
249 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
250 			size_t len, uint_t *protp, struct page *pl[],
251 			size_t plsz, struct seg *seg, caddr_t addr,
252 			enum seg_rw rw, cred_t *cr);
253 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
254 			size_t len, int flags, cred_t *cr);
255 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
256 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
257 			uchar_t maxprot, uint_t flags, cred_t *cr);
258 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
259 			struct flock64 *bfp, int flag, offset_t offset,
260 			cred_t *cr, caller_context_t *ct);
261 
262 struct vnodeops *cachefs_vnodeops;
263 
264 static const fs_operation_def_t cachefs_vnodeops_template[] = {
265 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
266 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
267 	VOPNAME_READ,		{ .vop_read = cachefs_read },
268 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
269 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
270 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
271 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
272 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
273 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
274 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
275 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
276 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
277 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
278 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
279 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
280 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
281 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
282 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
283 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
284 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
285 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
286 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
287 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
288 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
289 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
290 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
291 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
292 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
293 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
294 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
295 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
296 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
297 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
298 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
299 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
300 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
301 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
302 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
303 	NULL,			NULL
304 };
305 
306 /* forward declarations of statics */
307 static void cachefs_modified(cnode_t *cp);
308 static int cachefs_modified_alloc(cnode_t *cp);
309 
310 int
311 cachefs_init_vnops(char *name)
312 {
313 	return (vn_make_ops(name,
314 	    cachefs_vnodeops_template, &cachefs_vnodeops));
315 }
316 
317 struct vnodeops *
318 cachefs_getvnodeops(void)
319 {
320 	return (cachefs_vnodeops);
321 }
322 
323 static int
324 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
325 {
326 	int error = 0;
327 	cnode_t *cp = VTOC(*vpp);
328 	fscache_t *fscp = C_TO_FSCACHE(cp);
329 	int held = 0;
330 	int type;
331 	int connected = 0;
332 
333 #ifdef CFSDEBUG
334 	CFS_DEBUG(CFSDEBUG_VOPS)
335 		printf("cachefs_open: ENTER vpp %p flag %x\n",
336 		    (void *)vpp, flag);
337 #endif
338 	if (getzoneid() != GLOBAL_ZONEID) {
339 		error = EPERM;
340 		goto out;
341 	}
342 	if ((flag & FWRITE) &&
343 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
344 		error = EISDIR;
345 		goto out;
346 	}
347 
348 	/*
349 	 * Cachefs only provides pass-through support for NFSv4,
350 	 * and all vnode operations are passed through to the
351 	 * back file system. For NFSv4 pass-through to work, only
352 	 * connected operation is supported, the cnode backvp must
353 	 * exist, and cachefs optional (eg., disconnectable) flags
354 	 * are turned off. Assert these conditions to ensure that
355 	 * the backfilesystem is called for the open operation.
356 	 */
357 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
358 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
359 
360 	for (;;) {
361 		/* get (or renew) access to the file system */
362 		if (held) {
363 			/* Won't loop with NFSv4 connected behavior */
364 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
365 			cachefs_cd_release(fscp);
366 			held = 0;
367 		}
368 		error = cachefs_cd_access(fscp, connected, 0);
369 		if (error)
370 			goto out;
371 		held = 1;
372 
373 		mutex_enter(&cp->c_statelock);
374 
375 		/* grab creds if we do not have any yet */
376 		if (cp->c_cred == NULL) {
377 			crhold(cr);
378 			cp->c_cred = cr;
379 		}
380 		cp->c_flags |= CN_NEEDOPEN;
381 
382 		/* if we are disconnected */
383 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
384 			/* if we cannot write to the file system */
385 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
386 				mutex_exit(&cp->c_statelock);
387 				connected = 1;
388 				continue;
389 			}
390 			/*
391 			 * Allow read only requests to continue
392 			 */
393 			if ((flag & (FWRITE|FREAD)) == FREAD) {
394 				/* track the flag for opening the backvp */
395 				cp->c_rdcnt++;
396 				mutex_exit(&cp->c_statelock);
397 				error = 0;
398 				break;
399 			}
400 
401 			/*
402 			 * check credentials  - if this procs
403 			 * credentials don't match the creds in the
404 			 * cnode disallow writing while disconnected.
405 			 */
406 			if (crcmp(cp->c_cred, CRED()) != 0 &&
407 			    secpolicy_vnode_access(CRED(), *vpp,
408 			    cp->c_attr.va_uid, VWRITE) != 0) {
409 				mutex_exit(&cp->c_statelock);
410 				connected = 1;
411 				continue;
412 			}
413 			/* to get here, we know that the WRITE flag is on */
414 			cp->c_wrcnt++;
415 			if (flag & FREAD)
416 				cp->c_rdcnt++;
417 		}
418 
419 		/* else if we are connected */
420 		else {
421 			/* if cannot use the cached copy of the file */
422 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
423 			    ((cp->c_flags & CN_NOCACHE) == 0))
424 				cachefs_nocache(cp);
425 
426 			/* pass open to the back file */
427 			if (cp->c_backvp) {
428 				cp->c_flags &= ~CN_NEEDOPEN;
429 				CFS_DPRINT_BACKFS_NFSV4(fscp,
430 				    ("cachefs_open (nfsv4): cnode %p, "
431 				    "backvp %p\n", cp, cp->c_backvp));
432 				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
433 				if (CFS_TIMEOUT(fscp, error)) {
434 					mutex_exit(&cp->c_statelock);
435 					cachefs_cd_release(fscp);
436 					held = 0;
437 					cachefs_cd_timedout(fscp);
438 					continue;
439 				} else if (error) {
440 					mutex_exit(&cp->c_statelock);
441 					break;
442 				}
443 			} else {
444 				/* backvp will be VOP_OPEN'd later */
445 				if (flag & FREAD)
446 					cp->c_rdcnt++;
447 				if (flag & FWRITE)
448 					cp->c_wrcnt++;
449 			}
450 
451 			/*
452 			 * Now perform a consistency check on the file.
453 			 * If strict consistency then force a check to
454 			 * the backfs even if the timeout has not expired
455 			 * for close-to-open consistency.
456 			 */
457 			type = 0;
458 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
459 				type = C_BACK_CHECK;
460 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
461 			if (CFS_TIMEOUT(fscp, error)) {
462 				mutex_exit(&cp->c_statelock);
463 				cachefs_cd_release(fscp);
464 				held = 0;
465 				cachefs_cd_timedout(fscp);
466 				continue;
467 			}
468 		}
469 		mutex_exit(&cp->c_statelock);
470 		break;
471 	}
472 	if (held)
473 		cachefs_cd_release(fscp);
474 out:
475 #ifdef CFS_CD_DEBUG
476 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
477 #endif
478 #ifdef CFSDEBUG
479 	CFS_DEBUG(CFSDEBUG_VOPS)
480 		printf("cachefs_open: EXIT vpp %p error %d\n",
481 		    (void *)vpp, error);
482 #endif
483 	return (error);
484 }
485 
486 /* ARGSUSED */
487 static int
488 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
489 	caller_context_t *ct)
490 {
491 	int error = 0;
492 	cnode_t *cp = VTOC(vp);
493 	fscache_t *fscp = C_TO_FSCACHE(cp);
494 	int held = 0;
495 	int connected = 0;
496 	int close_cnt = 1;
497 	cachefscache_t *cachep;
498 
499 #ifdef CFSDEBUG
500 	CFS_DEBUG(CFSDEBUG_VOPS)
501 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
502 #endif
503 	/*
504 	 * Cachefs only provides pass-through support for NFSv4,
505 	 * and all vnode operations are passed through to the
506 	 * back file system. For NFSv4 pass-through to work, only
507 	 * connected operation is supported, the cnode backvp must
508 	 * exist, and cachefs optional (eg., disconnectable) flags
509 	 * are turned off. Assert these conditions to ensure that
510 	 * the backfilesystem is called for the close operation.
511 	 */
512 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
513 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
514 
515 	/*
516 	 * File could have been passed in or inherited from the global zone, so
517 	 * we don't want to flat out reject the request; we'll just leave things
518 	 * the way they are and let the backfs (NFS) deal with it.
519 	 */
520 	/* get rid of any local locks */
521 	if (CFS_ISFS_LLOCK(fscp)) {
522 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
523 	}
524 
525 	/* clean up if this is the daemon closing down */
526 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
527 	    ((ttoproc(curthread)->p_pid) != 0) &&
528 	    (vp == fscp->fs_rootvp) &&
529 	    (count == 1)) {
530 		mutex_enter(&fscp->fs_cdlock);
531 		fscp->fs_cddaemonid = 0;
532 		if (fscp->fs_dlogfile)
533 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
534 		else
535 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
536 		cv_broadcast(&fscp->fs_cdwaitcv);
537 		mutex_exit(&fscp->fs_cdlock);
538 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
539 			cachep = fscp->fs_cache;
540 			mutex_enter(&cachep->c_contentslock);
541 			ASSERT(cachep->c_rootdaemonid != 0);
542 			cachep->c_rootdaemonid = 0;
543 			mutex_exit(&cachep->c_contentslock);
544 		}
545 		return (0);
546 	}
547 
548 	for (;;) {
549 		/* get (or renew) access to the file system */
550 		if (held) {
551 			/* Won't loop with NFSv4 connected behavior */
552 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
553 			cachefs_cd_release(fscp);
554 			held = 0;
555 		}
556 		error = cachefs_cd_access(fscp, connected, 0);
557 		if (error)
558 			goto out;
559 		held = 1;
560 		connected = 0;
561 
562 		/* if not the last close */
563 		if (count > 1) {
564 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
565 				goto out;
566 			mutex_enter(&cp->c_statelock);
567 			if (cp->c_backvp) {
568 				CFS_DPRINT_BACKFS_NFSV4(fscp,
569 				    ("cachefs_close (nfsv4): cnode %p, "
570 				    "backvp %p\n", cp, cp->c_backvp));
571 				error = VOP_CLOSE(cp->c_backvp, flag, count,
572 				    offset, cr, ct);
573 				if (CFS_TIMEOUT(fscp, error)) {
574 					mutex_exit(&cp->c_statelock);
575 					cachefs_cd_release(fscp);
576 					held = 0;
577 					cachefs_cd_timedout(fscp);
578 					continue;
579 				}
580 			}
581 			mutex_exit(&cp->c_statelock);
582 			goto out;
583 		}
584 
585 		/*
586 		 * If the file is an unlinked file, then flush the lookup
587 		 * cache so that inactive will be called if this is
588 		 * the last reference.  It will invalidate all of the
589 		 * cached pages, without writing them out.  Writing them
590 		 * out is not required because they will be written to a
591 		 * file which will be immediately removed.
592 		 */
593 		if (cp->c_unldvp != NULL) {
594 			dnlc_purge_vp(vp);
595 			mutex_enter(&cp->c_statelock);
596 			error = cp->c_error;
597 			cp->c_error = 0;
598 			mutex_exit(&cp->c_statelock);
599 			/* always call VOP_CLOSE() for back fs vnode */
600 		}
601 
602 		/* force dirty data to stable storage */
603 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
604 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
605 			/* clean the cachefs pages synchronously */
606 			error = cachefs_putpage_common(vp, (offset_t)0,
607 			    0, 0, cr);
608 			if (CFS_TIMEOUT(fscp, error)) {
609 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
610 					cachefs_cd_release(fscp);
611 					held = 0;
612 					cachefs_cd_timedout(fscp);
613 					continue;
614 				} else {
615 					connected = 1;
616 					continue;
617 				}
618 			}
619 
620 			/* if no space left in cache, wait until connected */
621 			if ((error == ENOSPC) &&
622 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
623 				connected = 1;
624 				continue;
625 			}
626 
627 			/* clear the cnode error if putpage worked */
628 			if ((error == 0) && cp->c_error) {
629 				mutex_enter(&cp->c_statelock);
630 				cp->c_error = 0;
631 				mutex_exit(&cp->c_statelock);
632 			}
633 
634 			/* if any other important error */
635 			if (cp->c_error) {
636 				/* get rid of the pages */
637 				(void) cachefs_putpage_common(vp,
638 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
639 				dnlc_purge_vp(vp);
640 			}
641 		}
642 
643 		mutex_enter(&cp->c_statelock);
644 		if (cp->c_backvp &&
645 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
646 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
647 			    offset, cr, ct);
648 			if (CFS_TIMEOUT(fscp, error)) {
649 				mutex_exit(&cp->c_statelock);
650 				cachefs_cd_release(fscp);
651 				held = 0;
652 				cachefs_cd_timedout(fscp);
653 				/* don't decrement the vnode counts again */
654 				close_cnt = 0;
655 				continue;
656 			}
657 		}
658 		mutex_exit(&cp->c_statelock);
659 		break;
660 	}
661 
662 	mutex_enter(&cp->c_statelock);
663 	if (!error)
664 		error = cp->c_error;
665 	cp->c_error = 0;
666 	mutex_exit(&cp->c_statelock);
667 
668 out:
669 	if (held)
670 		cachefs_cd_release(fscp);
671 #ifdef CFS_CD_DEBUG
672 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
673 #endif
674 
675 #ifdef CFSDEBUG
676 	CFS_DEBUG(CFSDEBUG_VOPS)
677 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
678 #endif
679 	return (error);
680 }
681 
682 /*ARGSUSED*/
683 static int
684 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
685 	caller_context_t *ct)
686 {
687 	struct cnode *cp = VTOC(vp);
688 	fscache_t *fscp = C_TO_FSCACHE(cp);
689 	register u_offset_t off;
690 	register int mapoff;
691 	register caddr_t base;
692 	int n;
693 	offset_t diff;
694 	uint_t flags = 0;
695 	int error = 0;
696 
697 #if 0
698 	if (vp->v_flag & VNOCACHE)
699 		flags = SM_INVAL;
700 #endif
701 	if (getzoneid() != GLOBAL_ZONEID)
702 		return (EPERM);
703 	if (vp->v_type != VREG)
704 		return (EISDIR);
705 
706 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
707 
708 	if (uiop->uio_resid == 0)
709 		return (0);
710 
711 
712 	if (uiop->uio_loffset < (offset_t)0)
713 		return (EINVAL);
714 
715 	/*
716 	 * Call backfilesystem to read if NFSv4, the cachefs code
717 	 * does the read from the back filesystem asynchronously
718 	 * which is not supported by pass-through functionality.
719 	 */
720 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
721 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
722 		goto out;
723 	}
724 
725 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
726 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
727 		    uiop->uio_resid, uiop->uio_fmode, ct);
728 		if (error)
729 			return (error);
730 	}
731 
732 	/*
733 	 * Sit in a loop and transfer (uiomove) the data in up to
734 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
735 	 * address space as needed and then released.
736 	 */
737 	do {
738 		/*
739 		 *	off	Offset of current MAXBSIZE chunk
740 		 *	mapoff	Offset within the current chunk
741 		 *	n	Number of bytes to move from this chunk
742 		 *	base	kernel address of mapped in chunk
743 		 */
744 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
745 		mapoff = uiop->uio_loffset & MAXBOFFSET;
746 		n = MAXBSIZE - mapoff;
747 		if (n > uiop->uio_resid)
748 			n = (uint_t)uiop->uio_resid;
749 
750 		/* perform consistency check */
751 		error = cachefs_cd_access(fscp, 0, 0);
752 		if (error)
753 			break;
754 		mutex_enter(&cp->c_statelock);
755 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
756 		diff = cp->c_size - uiop->uio_loffset;
757 		mutex_exit(&cp->c_statelock);
758 		if (CFS_TIMEOUT(fscp, error)) {
759 			cachefs_cd_release(fscp);
760 			cachefs_cd_timedout(fscp);
761 			error = 0;
762 			continue;
763 		}
764 		cachefs_cd_release(fscp);
765 
766 		if (error)
767 			break;
768 
769 		if (diff <= (offset_t)0)
770 			break;
771 		if (diff < (offset_t)n)
772 			n = diff;
773 
774 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
775 
776 		error = segmap_fault(kas.a_hat, segkmap, base, n,
777 		    F_SOFTLOCK, S_READ);
778 		if (error) {
779 			(void) segmap_release(segkmap, base, 0);
780 			if (FC_CODE(error) == FC_OBJERR)
781 				error =  FC_ERRNO(error);
782 			else
783 				error = EIO;
784 			break;
785 		}
786 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
787 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
788 		    F_SOFTUNLOCK, S_READ);
789 		if (error == 0) {
790 			/*
791 			 * if we read a whole page(s), or to eof,
792 			 *  we won't need this page(s) again soon.
793 			 */
794 			if (n + mapoff == MAXBSIZE ||
795 			    uiop->uio_loffset == cp->c_size)
796 				flags |= SM_DONTNEED;
797 		}
798 		(void) segmap_release(segkmap, base, flags);
799 	} while (error == 0 && uiop->uio_resid > 0);
800 
801 out:
802 #ifdef CFSDEBUG
803 	CFS_DEBUG(CFSDEBUG_VOPS)
804 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
805 		    uiop->uio_resid);
806 #endif
807 	return (error);
808 }
809 
810 /*
811  * cachefs_read_backfs_nfsv4
812  *
813  * Call NFSv4 back filesystem to handle the read (cachefs
814  * pass-through support for NFSv4).
815  */
816 static int
817 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
818 			caller_context_t *ct)
819 {
820 	cnode_t *cp = VTOC(vp);
821 	fscache_t *fscp = C_TO_FSCACHE(cp);
822 	vnode_t *backvp;
823 	int error;
824 
825 	/*
826 	 * For NFSv4 pass-through to work, only connected operation
827 	 * is supported, the cnode backvp must exist, and cachefs
828 	 * optional (eg., disconnectable) flags are turned off. Assert
829 	 * these conditions for the read operation.
830 	 */
831 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
832 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
833 
834 	/* Call backfs vnode op after extracting backvp */
835 	mutex_enter(&cp->c_statelock);
836 	backvp = cp->c_backvp;
837 	mutex_exit(&cp->c_statelock);
838 
839 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
840 	    "backvp %p\n", cp, backvp));
841 
842 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
843 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
844 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
845 
846 	/* Increment cache miss counter */
847 	fscp->fs_stats.st_misses++;
848 
849 	return (error);
850 }
851 
852 /*ARGSUSED*/
853 static int
854 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
855 	caller_context_t *ct)
856 {
857 	struct cnode *cp = VTOC(vp);
858 	fscache_t *fscp = C_TO_FSCACHE(cp);
859 	int error = 0;
860 	u_offset_t off;
861 	caddr_t base;
862 	uint_t bsize;
863 	uint_t flags;
864 	int n, on;
865 	rlim64_t limit = uiop->uio_llimit;
866 	ssize_t resid;
867 	offset_t offset;
868 	offset_t remainder;
869 
870 #ifdef CFSDEBUG
871 	CFS_DEBUG(CFSDEBUG_VOPS)
872 		printf(
873 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
874 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid,
875 		    cp->c_flags);
876 #endif
877 	if (getzoneid() != GLOBAL_ZONEID) {
878 		error = EPERM;
879 		goto out;
880 	}
881 	if (vp->v_type != VREG) {
882 		error = EISDIR;
883 		goto out;
884 	}
885 
886 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
887 
888 	if (uiop->uio_resid == 0) {
889 		goto out;
890 	}
891 
892 	/* Call backfilesystem to write if NFSv4 */
893 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
894 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
895 		goto out2;
896 	}
897 
898 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
899 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
900 		    uiop->uio_resid, uiop->uio_fmode, ct);
901 		if (error)
902 			goto out;
903 	}
904 
905 	if (ioflag & FAPPEND) {
906 		for (;;) {
907 			/* do consistency check to get correct file size */
908 			error = cachefs_cd_access(fscp, 0, 1);
909 			if (error)
910 				goto out;
911 			mutex_enter(&cp->c_statelock);
912 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
913 			uiop->uio_loffset = cp->c_size;
914 			mutex_exit(&cp->c_statelock);
915 			if (CFS_TIMEOUT(fscp, error)) {
916 				cachefs_cd_release(fscp);
917 				cachefs_cd_timedout(fscp);
918 				continue;
919 			}
920 			cachefs_cd_release(fscp);
921 			if (error)
922 				goto out;
923 			break;
924 		}
925 	}
926 
927 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
928 		limit = MAXOFFSET_T;
929 
930 	if (uiop->uio_loffset >= limit) {
931 		proc_t *p = ttoproc(curthread);
932 
933 		mutex_enter(&p->p_lock);
934 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
935 		    p, RCA_UNSAFE_SIGINFO);
936 		mutex_exit(&p->p_lock);
937 		error = EFBIG;
938 		goto out;
939 	}
940 	if (uiop->uio_loffset > fscp->fs_offmax) {
941 		error = EFBIG;
942 		goto out;
943 	}
944 
945 	if (limit > fscp->fs_offmax)
946 		limit = fscp->fs_offmax;
947 
948 	if (uiop->uio_loffset < (offset_t)0) {
949 		error = EINVAL;
950 		goto out;
951 	}
952 
953 	offset = uiop->uio_loffset + uiop->uio_resid;
954 	/*
955 	 * Check to make sure that the process will not exceed
956 	 * its limit on file size.  It is okay to write up to
957 	 * the limit, but not beyond.  Thus, the write which
958 	 * reaches the limit will be short and the next write
959 	 * will return an error.
960 	 */
961 	remainder = 0;
962 	if (offset > limit) {
963 		remainder = (int)(offset - (u_offset_t)limit);
964 		uiop->uio_resid = limit - uiop->uio_loffset;
965 		if (uiop->uio_resid <= 0) {
966 			proc_t *p = ttoproc(curthread);
967 
968 			uiop->uio_resid += remainder;
969 			mutex_enter(&p->p_lock);
970 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
971 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
972 			mutex_exit(&p->p_lock);
973 			error = EFBIG;
974 			goto out;
975 		}
976 	}
977 
978 	resid = uiop->uio_resid;
979 	offset = uiop->uio_loffset;
980 	bsize = vp->v_vfsp->vfs_bsize;
981 
982 	/* loop around and do the write in MAXBSIZE chunks */
983 	do {
984 		/* mapping offset */
985 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
986 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
987 		n = MAXBSIZE - on;
988 		if (n > uiop->uio_resid)
989 			n = (int)uiop->uio_resid;
990 		base = segmap_getmap(segkmap, vp, off);
991 		error = cachefs_writepage(vp, (base + on), n, uiop);
992 		if (error == 0) {
993 			flags = 0;
994 			/*
995 			 * Have written a whole block.Start an
996 			 * asynchronous write and mark the buffer to
997 			 * indicate that it won't be needed again
998 			 * soon.
999 			 */
1000 			if (n + on == bsize) {
1001 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1002 			}
1003 #if 0
1004 			/* XXX need to understand this */
1005 			if ((ioflag & (FSYNC|FDSYNC)) ||
1006 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1007 				flags &= ~SM_ASYNC;
1008 				flags |= SM_WRITE;
1009 			}
1010 #else
1011 			if (ioflag & (FSYNC|FDSYNC)) {
1012 				flags &= ~SM_ASYNC;
1013 				flags |= SM_WRITE;
1014 			}
1015 #endif
1016 			error = segmap_release(segkmap, base, flags);
1017 		} else {
1018 			(void) segmap_release(segkmap, base, 0);
1019 		}
1020 	} while (error == 0 && uiop->uio_resid > 0);
1021 
1022 out:
1023 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1024 		uiop->uio_resid = resid;
1025 		uiop->uio_loffset = offset;
1026 	} else
1027 		uiop->uio_resid += remainder;
1028 
1029 out2:
1030 #ifdef CFSDEBUG
1031 	CFS_DEBUG(CFSDEBUG_VOPS)
1032 		printf("cachefs_write: EXIT error %d\n", error);
1033 #endif
1034 	return (error);
1035 }
1036 
1037 /*
1038  * cachefs_write_backfs_nfsv4
1039  *
1040  * Call NFSv4 back filesystem to handle the write (cachefs
1041  * pass-through support for NFSv4).
1042  */
1043 static int
1044 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1045 			caller_context_t *ct)
1046 {
1047 	cnode_t *cp = VTOC(vp);
1048 	fscache_t *fscp = C_TO_FSCACHE(cp);
1049 	vnode_t *backvp;
1050 	int error;
1051 
1052 	/*
1053 	 * For NFSv4 pass-through to work, only connected operation
1054 	 * is supported, the cnode backvp must exist, and cachefs
1055 	 * optional (eg., disconnectable) flags are turned off. Assert
1056 	 * these conditions for the read operation.
1057 	 */
1058 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1059 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1060 
1061 	/* Call backfs vnode op after extracting the backvp */
1062 	mutex_enter(&cp->c_statelock);
1063 	backvp = cp->c_backvp;
1064 	mutex_exit(&cp->c_statelock);
1065 
1066 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1067 	    "backvp %p\n", cp, backvp));
1068 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1069 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1070 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1071 
1072 	return (error);
1073 }
1074 
1075 /*
1076  * see if we've charged ourselves for frontfile data at
1077  * the given offset.  If not, allocate a block for it now.
1078  */
1079 static int
1080 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1081 {
1082 	u_offset_t blockoff;
1083 	int error;
1084 	int inc;
1085 
1086 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1087 	/*LINTED*/
1088 	ASSERT(PAGESIZE <= MAXBSIZE);
1089 
1090 	error = 0;
1091 	blockoff = offset & (offset_t)MAXBMASK;
1092 
1093 	/* get the front file if necessary so allocblocks works */
1094 	if ((cp->c_frontvp == NULL) &&
1095 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1096 		(void) cachefs_getfrontfile(cp);
1097 	}
1098 	if (cp->c_flags & CN_NOCACHE)
1099 		return (1);
1100 
1101 	if (cachefs_check_allocmap(cp, blockoff))
1102 		return (0);
1103 
1104 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1105 		if (cachefs_check_allocmap(cp, blockoff+inc))
1106 			return (0);
1107 
1108 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1109 	    cp->c_metadata.md_rltype);
1110 	if (error == 0) {
1111 		cp->c_metadata.md_frontblks++;
1112 		cp->c_flags |= CN_UPDATED;
1113 	}
1114 	return (error);
1115 }
1116 
1117 /*
1118  * Called only by cachefs_write to write 1 page or less of data.
1119  *	base   - base address kernel addr space
1120  *	tcount - Total bytes to move - < MAXBSIZE
1121  */
1122 static int
1123 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1124 {
1125 	struct cnode *cp =  VTOC(vp);
1126 	fscache_t *fscp = C_TO_FSCACHE(cp);
1127 	register int n;
1128 	register u_offset_t offset;
1129 	int error = 0, terror;
1130 	extern struct as kas;
1131 	u_offset_t lastpage_off;
1132 	int pagecreate = 0;
1133 	int newpage;
1134 
1135 #ifdef CFSDEBUG
1136 	CFS_DEBUG(CFSDEBUG_VOPS)
1137 		printf(
1138 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1139 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1140 #endif
1141 
1142 	/*
1143 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1144 	 * uiomove() because page faults may cause the cache to be invalidated
1145 	 * out from under us.
1146 	 */
1147 	do {
1148 		offset = uiop->uio_loffset;
1149 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1150 
1151 		/*
1152 		 * If not connected then need to make sure we have space
1153 		 * to perform the write.  We could make this check
1154 		 * a little tighter by only doing it if we are growing the file.
1155 		 */
1156 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1157 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1158 			    cp->c_metadata.md_rltype);
1159 			if (error)
1160 				break;
1161 			cachefs_freeblocks(fscp->fs_cache, 1,
1162 			    cp->c_metadata.md_rltype);
1163 		}
1164 
1165 		/*
1166 		 * n is the number of bytes required to satisfy the request
1167 		 * or the number of bytes to fill out the page.
1168 		 */
1169 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1170 		if (n > tcount)
1171 			n = tcount;
1172 
1173 		/*
1174 		 * The number of bytes of data in the last page can not
1175 		 * be accurately be determined while page is being
1176 		 * uiomove'd to and the size of the file being updated.
1177 		 * Thus, inform threads which need to know accurately
1178 		 * how much data is in the last page of the file.  They
1179 		 * will not do the i/o immediately, but will arrange for
1180 		 * the i/o to happen later when this modify operation
1181 		 * will have finished.
1182 		 *
1183 		 * in similar NFS code, this is done right before the
1184 		 * uiomove(), which is best.  but here in cachefs, we
1185 		 * have two uiomove()s, so we must do it here.
1186 		 */
1187 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1188 		mutex_enter(&cp->c_statelock);
1189 		cp->c_flags |= CN_CMODINPROG;
1190 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1191 		mutex_exit(&cp->c_statelock);
1192 
1193 		/*
1194 		 * Check to see if we can skip reading in the page
1195 		 * and just allocate the memory.  We can do this
1196 		 * if we are going to rewrite the entire mapping
1197 		 * or if we are going to write to or beyond the current
1198 		 * end of file from the beginning of the mapping.
1199 		 */
1200 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1201 		    ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1202 		    ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1203 		    ((offset + n) >= cp->c_size))) {
1204 			pagecreate = 1;
1205 
1206 			/*
1207 			 * segmap_pagecreate() returns 1 if it calls
1208 			 * page_create_va() to allocate any pages.
1209 			 */
1210 			newpage = segmap_pagecreate(segkmap,
1211 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1212 			    PAGESIZE, 0);
1213 			/* do not zero page if we are overwriting all of it */
1214 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1215 			    (n == PAGESIZE))) {
1216 				(void) kzero((void *)
1217 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1218 				    PAGESIZE);
1219 			}
1220 			error = uiomove(base, n, UIO_WRITE, uiop);
1221 
1222 			/*
1223 			 * Unlock the page allocated by page_create_va()
1224 			 * in segmap_pagecreate()
1225 			 */
1226 			if (newpage)
1227 				segmap_pageunlock(segkmap,
1228 				    (caddr_t)((uintptr_t)base &
1229 				    (uintptr_t)PAGEMASK),
1230 				    PAGESIZE, S_WRITE);
1231 		} else {
1232 			/*
1233 			 * KLUDGE ! Use segmap_fault instead of faulting and
1234 			 * using as_fault() to avoid a recursive readers lock
1235 			 * on kas.
1236 			 */
1237 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1238 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1239 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1240 			if (error) {
1241 				if (FC_CODE(error) == FC_OBJERR)
1242 					error =  FC_ERRNO(error);
1243 				else
1244 					error = EIO;
1245 				break;
1246 			}
1247 			error = uiomove(base, n, UIO_WRITE, uiop);
1248 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1249 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1250 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1251 		}
1252 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1253 		base += n;
1254 		tcount -= n;
1255 
1256 		/* get access to the file system */
1257 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1258 			error = terror;
1259 			break;
1260 		}
1261 
1262 		/*
1263 		 * cp->c_attr.va_size is the maximum number of
1264 		 * bytes known to be in the file.
1265 		 * Make sure it is at least as high as the
1266 		 * last byte we just wrote into the buffer.
1267 		 */
1268 		mutex_enter(&cp->c_statelock);
1269 		if (cp->c_size < uiop->uio_loffset) {
1270 			cp->c_size = uiop->uio_loffset;
1271 		}
1272 		if (cp->c_size != cp->c_attr.va_size) {
1273 			cp->c_attr.va_size = cp->c_size;
1274 			cp->c_flags |= CN_UPDATED;
1275 		}
1276 		/* c_size is now correct, so we can clear modinprog */
1277 		cp->c_flags &= ~CN_CMODINPROG;
1278 		if (error == 0) {
1279 			cp->c_flags |= CDIRTY;
1280 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1281 				/*
1282 				 * if we're not in NOCACHE mode
1283 				 * (i.e., single-writer), we update the
1284 				 * allocmap here rather than waiting until
1285 				 * cachefspush is called.  This prevents
1286 				 * getpage from clustering up pages from
1287 				 * the backfile and stomping over the changes
1288 				 * we make here.
1289 				 */
1290 				if (cachefs_charge_page(cp, offset) == 0) {
1291 					cachefs_update_allocmap(cp,
1292 					    offset & (offset_t)PAGEMASK,
1293 					    (size_t)PAGESIZE);
1294 				}
1295 
1296 				/* else we ran out of space */
1297 				else {
1298 					/* nocache file if connected */
1299 					if (fscp->fs_cdconnected ==
1300 					    CFS_CD_CONNECTED)
1301 						cachefs_nocache(cp);
1302 					/*
1303 					 * If disconnected then cannot
1304 					 * nocache the file.  Let it have
1305 					 * the space.
1306 					 */
1307 					else {
1308 						cp->c_metadata.md_frontblks++;
1309 						cp->c_flags |= CN_UPDATED;
1310 						cachefs_update_allocmap(cp,
1311 						    offset & (offset_t)PAGEMASK,
1312 						    (size_t)PAGESIZE);
1313 					}
1314 				}
1315 			}
1316 		}
1317 		mutex_exit(&cp->c_statelock);
1318 		cachefs_cd_release(fscp);
1319 	} while (tcount > 0 && error == 0);
1320 
1321 	if (cp->c_flags & CN_CMODINPROG) {
1322 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1323 		mutex_enter(&cp->c_statelock);
1324 		cp->c_flags &= ~CN_CMODINPROG;
1325 		mutex_exit(&cp->c_statelock);
1326 	}
1327 
1328 #ifdef CFS_CD_DEBUG
1329 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1330 #endif
1331 
1332 #ifdef CFSDEBUG
1333 	CFS_DEBUG(CFSDEBUG_VOPS)
1334 		printf("cachefs_writepage: EXIT error %d\n", error);
1335 #endif
1336 
1337 	return (error);
1338 }
1339 
1340 /*
1341  * Pushes out pages to the back and/or front file system.
1342  */
1343 static int
1344 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1345     int flags, cred_t *cr)
1346 {
1347 	struct cnode *cp = VTOC(vp);
1348 	struct buf *bp;
1349 	int error;
1350 	fscache_t *fscp = C_TO_FSCACHE(cp);
1351 	u_offset_t iooff;
1352 	size_t iolen;
1353 	u_offset_t lbn;
1354 	u_offset_t lbn_off;
1355 	uint_t bsize;
1356 
1357 	ASSERT((flags & B_ASYNC) == 0);
1358 	ASSERT(!vn_is_readonly(vp));
1359 	ASSERT(pp != NULL);
1360 	ASSERT(cr != NULL);
1361 
1362 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1363 	lbn = pp->p_offset / bsize;
1364 	lbn_off = lbn * bsize;
1365 
1366 	/*
1367 	 * Find a kluster that fits in one block, or in
1368 	 * one page if pages are bigger than blocks.  If
1369 	 * there is less file space allocated than a whole
1370 	 * page, we'll shorten the i/o request below.
1371 	 */
1372 
1373 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1374 	    roundup(bsize, PAGESIZE), flags);
1375 
1376 	/*
1377 	 * The CN_CMODINPROG flag makes sure that we use a correct
1378 	 * value of c_size, below.  CN_CMODINPROG is set in
1379 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1380 	 * indicates that a uiomove() is in progress and the c_size
1381 	 * has not been made consistent with the new size of the
1382 	 * file. When the uiomove() completes the c_size is updated
1383 	 * and the CN_CMODINPROG flag is cleared.
1384 	 *
1385 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1386 	 * and cachefs_push_connected see a consistent value of
1387 	 * c_size.  Without this handshaking, it is possible that
1388 	 * these routines will pick up the old value of c_size before
1389 	 * the uiomove() in cachefs_writepage() completes.  This will
1390 	 * result in the vn_rdwr() being too small, and data loss.
1391 	 *
1392 	 * More precisely, there is a window between the time the
1393 	 * uiomove() completes and the time the c_size is updated. If
1394 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1395 	 * page will be picked up, because it is dirty; it will be
1396 	 * unlocked, unless it was pagecreate'd. When the page is
1397 	 * picked up as dirty, the dirty bit is reset
1398 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1399 	 * checked.  This will still be the old size.  Therefore, the
1400 	 * page will not be written out to the correct length, and the
1401 	 * page will be clean, so the data may disappear.
1402 	 */
1403 	if (cp->c_flags & CN_CMODINPROG) {
1404 		mutex_enter(&cp->c_statelock);
1405 		if ((cp->c_flags & CN_CMODINPROG) &&
1406 		    cp->c_modaddr + MAXBSIZE > iooff &&
1407 		    cp->c_modaddr < iooff + iolen) {
1408 			page_t *plist;
1409 
1410 			/*
1411 			 * A write is in progress for this region of
1412 			 * the file.  If we did not detect
1413 			 * CN_CMODINPROG here then this path through
1414 			 * cachefs_push_connected() would eventually
1415 			 * do the vn_rdwr() and may not write out all
1416 			 * of the data in the pages.  We end up losing
1417 			 * data. So we decide to set the modified bit
1418 			 * on each page in the page list and mark the
1419 			 * cnode with CDIRTY.  This push will be
1420 			 * restarted at some later time.
1421 			 */
1422 
1423 			plist = pp;
1424 			while (plist != NULL) {
1425 				pp = plist;
1426 				page_sub(&plist, pp);
1427 				hat_setmod(pp);
1428 				page_io_unlock(pp);
1429 				page_unlock(pp);
1430 			}
1431 			cp->c_flags |= CDIRTY;
1432 			mutex_exit(&cp->c_statelock);
1433 			if (offp)
1434 				*offp = iooff;
1435 			if (lenp)
1436 				*lenp = iolen;
1437 			return (0);
1438 		}
1439 		mutex_exit(&cp->c_statelock);
1440 	}
1441 
1442 	/*
1443 	 * Set the pages up for pageout.
1444 	 */
1445 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1446 	if (bp == NULL) {
1447 
1448 		/*
1449 		 * currently, there is no way for pageio_setup() to
1450 		 * return NULL, since it uses its own scheme for
1451 		 * kmem_alloc()ing that shouldn't return NULL, and
1452 		 * since pageio_setup() itself dereferences the thing
1453 		 * it's about to return.  still, we need to be ready
1454 		 * in case this ever does start happening.
1455 		 */
1456 
1457 		error = ENOMEM;
1458 		goto writedone;
1459 	}
1460 	/*
1461 	 * pageio_setup should have set b_addr to 0.  This
1462 	 * is correct since we want to do I/O on a page
1463 	 * boundary.  bp_mapin will use this addr to calculate
1464 	 * an offset, and then set b_addr to the kernel virtual
1465 	 * address it allocated for us.
1466 	 */
1467 	bp->b_edev = 0;
1468 	bp->b_dev = 0;
1469 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1470 	bp_mapin(bp);
1471 
1472 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1473 	if (iolen > bp->b_bcount)
1474 		iolen  = bp->b_bcount;
1475 
1476 	/* if connected */
1477 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1478 		/* write to the back file first */
1479 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1480 
1481 		/* write to the front file if allowed */
1482 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1483 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1484 			/* try to write to the front file */
1485 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1486 		}
1487 	}
1488 
1489 	/* else if disconnected */
1490 	else {
1491 		/* try to write to the front file */
1492 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1493 	}
1494 
1495 	bp_mapout(bp);
1496 	pageio_done(bp);
1497 
1498 writedone:
1499 
1500 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1501 	if (offp)
1502 		*offp = iooff;
1503 	if (lenp)
1504 		*lenp = iolen;
1505 
1506 	/* XXX ask bob mastors how to fix this someday */
1507 	mutex_enter(&cp->c_statelock);
1508 	if (error) {
1509 		if (error == ENOSPC) {
1510 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1511 			    CFS_ISFS_SOFT(fscp)) {
1512 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1513 				cp->c_error = error;
1514 			}
1515 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1516 		    (error != EINTR)) {
1517 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1518 			cp->c_error = error;
1519 		}
1520 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1521 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1522 	}
1523 	mutex_exit(&cp->c_statelock);
1524 
1525 	return (error);
1526 }
1527 
1528 /*
1529  * Pushes out pages to the back file system.
1530  */
1531 static int
1532 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1533     u_offset_t iooff, cred_t *cr)
1534 {
1535 	struct cnode *cp = VTOC(vp);
1536 	int error = 0;
1537 	int mode = 0;
1538 	fscache_t *fscp = C_TO_FSCACHE(cp);
1539 	ssize_t resid;
1540 	vnode_t *backvp;
1541 
1542 	/* get the back file if necessary */
1543 	mutex_enter(&cp->c_statelock);
1544 	if (cp->c_backvp == NULL) {
1545 		error = cachefs_getbackvp(fscp, cp);
1546 		if (error) {
1547 			mutex_exit(&cp->c_statelock);
1548 			goto out;
1549 		}
1550 	}
1551 	backvp = cp->c_backvp;
1552 	VN_HOLD(backvp);
1553 	mutex_exit(&cp->c_statelock);
1554 
1555 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1556 		mode = FSYNC;
1557 
1558 	/* write to the back file */
1559 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1560 	    iolen, iooff, UIO_SYSSPACE, mode,
1561 	    RLIM64_INFINITY, cr, &resid);
1562 	if (error) {
1563 #ifdef CFSDEBUG
1564 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1565 			printf("cachefspush: error %d cr %p\n",
1566 			    error, (void *)cr);
1567 #endif
1568 		bp->b_flags |= B_ERROR;
1569 	}
1570 	VN_RELE(backvp);
1571 out:
1572 	return (error);
1573 }
1574 
1575 /*
1576  * Pushes out pages to the front file system.
1577  * Called for both connected and disconnected states.
1578  */
1579 static int
1580 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1581     u_offset_t iooff, cred_t *cr)
1582 {
1583 	struct cnode *cp = VTOC(vp);
1584 	fscache_t *fscp = C_TO_FSCACHE(cp);
1585 	int error = 0;
1586 	ssize_t resid;
1587 	u_offset_t popoff;
1588 	off_t commit = 0;
1589 	uint_t seq;
1590 	enum cachefs_rl_type type;
1591 	vnode_t *frontvp = NULL;
1592 
1593 	mutex_enter(&cp->c_statelock);
1594 
1595 	if (!CFS_ISFS_NONSHARED(fscp)) {
1596 		error = ETIMEDOUT;
1597 		goto out;
1598 	}
1599 
1600 	/* get the front file if necessary */
1601 	if ((cp->c_frontvp == NULL) &&
1602 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1603 		(void) cachefs_getfrontfile(cp);
1604 	}
1605 	if (cp->c_flags & CN_NOCACHE) {
1606 		error = ETIMEDOUT;
1607 		goto out;
1608 	}
1609 
1610 	/* if disconnected, needs to be populated and have good attributes */
1611 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1612 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1613 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1614 		error = ETIMEDOUT;
1615 		goto out;
1616 	}
1617 
1618 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1619 		if (cachefs_charge_page(cp, popoff)) {
1620 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1621 				cachefs_nocache(cp);
1622 				goto out;
1623 			} else {
1624 				error = ENOSPC;
1625 				goto out;
1626 			}
1627 		}
1628 	}
1629 
1630 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1631 		/* log the first putpage to a file */
1632 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1633 			/* uses open's creds if we have them */
1634 			if (cp->c_cred)
1635 				cr = cp->c_cred;
1636 
1637 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1638 				error = cachefs_dlog_cidmap(fscp);
1639 				if (error) {
1640 					error = ENOSPC;
1641 					goto out;
1642 				}
1643 				cp->c_metadata.md_flags |= MD_MAPPING;
1644 			}
1645 
1646 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1647 			if (commit == 0) {
1648 				/* out of space */
1649 				error = ENOSPC;
1650 				goto out;
1651 			}
1652 
1653 			cp->c_metadata.md_seq = seq;
1654 			type = cp->c_metadata.md_rltype;
1655 			cachefs_modified(cp);
1656 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1657 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1658 			cp->c_flags |= CN_UPDATED;
1659 		}
1660 
1661 		/* subsequent putpages just get a new sequence number */
1662 		else {
1663 			/* but only if it matters */
1664 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1665 				seq = cachefs_dlog_seqnext(fscp);
1666 				if (seq == 0) {
1667 					error = ENOSPC;
1668 					goto out;
1669 				}
1670 				cp->c_metadata.md_seq = seq;
1671 				cp->c_flags |= CN_UPDATED;
1672 				/* XXX maybe should do write_metadata here */
1673 			}
1674 		}
1675 	}
1676 
1677 	frontvp = cp->c_frontvp;
1678 	VN_HOLD(frontvp);
1679 	mutex_exit(&cp->c_statelock);
1680 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1681 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1682 	    RLIM64_INFINITY, kcred, &resid);
1683 	mutex_enter(&cp->c_statelock);
1684 	VN_RELE(frontvp);
1685 	frontvp = NULL;
1686 	if (error) {
1687 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1688 			cachefs_nocache(cp);
1689 			error = 0;
1690 			goto out;
1691 		} else {
1692 			goto out;
1693 		}
1694 	}
1695 
1696 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1697 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1698 	    CN_POPULATION_PENDING);
1699 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1700 		gethrestime(&cp->c_metadata.md_localmtime);
1701 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1702 	}
1703 
1704 out:
1705 	if (commit) {
1706 		/* commit the log record */
1707 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1708 		if (cachefs_dlog_commit(fscp, commit, error)) {
1709 			/*EMPTY*/
1710 			/* XXX fix on panic */
1711 		}
1712 	}
1713 
1714 	if (error && commit) {
1715 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1716 		cachefs_rlent_moveto(fscp->fs_cache, type,
1717 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1718 		cp->c_metadata.md_rltype = type;
1719 		cp->c_flags |= CN_UPDATED;
1720 	}
1721 	mutex_exit(&cp->c_statelock);
1722 	return (error);
1723 }
1724 
1725 /*ARGSUSED*/
1726 static int
1727 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1728     caller_context_t *ct)
1729 {
1730 	return (ENOSYS); /* should we panic if we get here? */
1731 }
1732 
1733 /*ARGSUSED*/
1734 static int
1735 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1736 	int *rvalp, caller_context_t *ct)
1737 {
1738 	int error;
1739 	struct cnode *cp = VTOC(vp);
1740 	struct fscache *fscp = C_TO_FSCACHE(cp);
1741 	struct cachefscache *cachep;
1742 	extern kmutex_t cachefs_cachelock;
1743 	extern cachefscache_t *cachefs_cachelist;
1744 	cachefsio_pack_t *packp;
1745 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1746 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1747 	void *dinp, *doutp;
1748 	int (*dcmd_routine)(vnode_t *, void *, void *);
1749 
1750 	if (getzoneid() != GLOBAL_ZONEID)
1751 		return (EPERM);
1752 
1753 	/*
1754 	 * Cachefs only provides pass-through support for NFSv4,
1755 	 * and all vnode operations are passed through to the
1756 	 * back file system. For NFSv4 pass-through to work, only
1757 	 * connected operation is supported, the cnode backvp must
1758 	 * exist, and cachefs optional (eg., disconnectable) flags
1759 	 * are turned off. Assert these conditions which ensure
1760 	 * that only a subset of the ioctls are "truly supported"
1761 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1762 	 * The packing operations are meaningless since there is
1763 	 * no caching for NFSv4, and the called functions silently
1764 	 * return if the backfilesystem is NFSv4. The daemon
1765 	 * commands except for those above are essentially used
1766 	 * for disconnectable operation support (including log
1767 	 * rolling), so in each called function, we assert that
1768 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1769 	 * are from "cfsfstype" which is not a documented
1770 	 * command. However, the command is visible in
1771 	 * /usr/lib/fs/cachefs so the commands are simply let
1772 	 * through (don't seem to impact pass-through functionality).
1773 	 */
1774 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1775 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1776 
1777 	switch (cmd) {
1778 	case CACHEFSIO_PACK:
1779 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1780 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1781 		if (!error)
1782 			error = cachefs_pack(vp, packp->p_name, cred);
1783 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1784 		break;
1785 
1786 	case CACHEFSIO_UNPACK:
1787 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1788 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1789 		if (!error)
1790 			error = cachefs_unpack(vp, packp->p_name, cred);
1791 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1792 		break;
1793 
1794 	case CACHEFSIO_PACKINFO:
1795 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1796 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1797 		if (!error)
1798 			error = cachefs_packinfo(vp, packp->p_name,
1799 			    &packp->p_status, cred);
1800 		if (!error)
1801 			error = xcopyout(packp, (void *)arg,
1802 			    sizeof (cachefsio_pack_t));
1803 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1804 		break;
1805 
1806 	case CACHEFSIO_UNPACKALL:
1807 		error = cachefs_unpackall(vp);
1808 		break;
1809 
1810 	case CACHEFSIO_DCMD:
1811 		/*
1812 		 * This is a private interface between the cachefsd and
1813 		 * this file system.
1814 		 */
1815 
1816 		/* must be root to use these commands */
1817 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1818 			return (EPERM);
1819 
1820 		/* get the command packet */
1821 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1822 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1823 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1824 		if (error)
1825 			return (error);
1826 
1827 		/* copy in the data for the operation */
1828 		dinp = NULL;
1829 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1830 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1831 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1832 			    inlen);
1833 			if (error)
1834 				return (error);
1835 		}
1836 
1837 		/* allocate space for the result */
1838 		doutp = NULL;
1839 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1840 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1841 
1842 		/*
1843 		 * Assert NFSv4 only allows the daemonid and getstats
1844 		 * daemon requests
1845 		 */
1846 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1847 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1848 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1849 
1850 		/* get the routine to execute */
1851 		dcmd_routine = NULL;
1852 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1853 		case CFSDCMD_DAEMONID:
1854 			dcmd_routine = cachefs_io_daemonid;
1855 			break;
1856 		case CFSDCMD_STATEGET:
1857 			dcmd_routine = cachefs_io_stateget;
1858 			break;
1859 		case CFSDCMD_STATESET:
1860 			dcmd_routine = cachefs_io_stateset;
1861 			break;
1862 		case CFSDCMD_XWAIT:
1863 			dcmd_routine = cachefs_io_xwait;
1864 			break;
1865 		case CFSDCMD_EXISTS:
1866 			dcmd_routine = cachefs_io_exists;
1867 			break;
1868 		case CFSDCMD_LOSTFOUND:
1869 			dcmd_routine = cachefs_io_lostfound;
1870 			break;
1871 		case CFSDCMD_GETINFO:
1872 			dcmd_routine = cachefs_io_getinfo;
1873 			break;
1874 		case CFSDCMD_CIDTOFID:
1875 			dcmd_routine = cachefs_io_cidtofid;
1876 			break;
1877 		case CFSDCMD_GETATTRFID:
1878 			dcmd_routine = cachefs_io_getattrfid;
1879 			break;
1880 		case CFSDCMD_GETATTRNAME:
1881 			dcmd_routine = cachefs_io_getattrname;
1882 			break;
1883 		case CFSDCMD_GETSTATS:
1884 			dcmd_routine = cachefs_io_getstats;
1885 			break;
1886 		case CFSDCMD_ROOTFID:
1887 			dcmd_routine = cachefs_io_rootfid;
1888 			break;
1889 		case CFSDCMD_CREATE:
1890 			dcmd_routine = cachefs_io_create;
1891 			break;
1892 		case CFSDCMD_REMOVE:
1893 			dcmd_routine = cachefs_io_remove;
1894 			break;
1895 		case CFSDCMD_LINK:
1896 			dcmd_routine = cachefs_io_link;
1897 			break;
1898 		case CFSDCMD_RENAME:
1899 			dcmd_routine = cachefs_io_rename;
1900 			break;
1901 		case CFSDCMD_MKDIR:
1902 			dcmd_routine = cachefs_io_mkdir;
1903 			break;
1904 		case CFSDCMD_RMDIR:
1905 			dcmd_routine = cachefs_io_rmdir;
1906 			break;
1907 		case CFSDCMD_SYMLINK:
1908 			dcmd_routine = cachefs_io_symlink;
1909 			break;
1910 		case CFSDCMD_SETATTR:
1911 			dcmd_routine = cachefs_io_setattr;
1912 			break;
1913 		case CFSDCMD_SETSECATTR:
1914 			dcmd_routine = cachefs_io_setsecattr;
1915 			break;
1916 		case CFSDCMD_PUSHBACK:
1917 			dcmd_routine = cachefs_io_pushback;
1918 			break;
1919 		default:
1920 			error = ENOTTY;
1921 			break;
1922 		}
1923 
1924 		/* execute the routine */
1925 		if (dcmd_routine)
1926 			error = (*dcmd_routine)(vp, dinp, doutp);
1927 
1928 		/* copy out the result */
1929 		if ((error == 0) && doutp)
1930 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1931 			    outlen);
1932 
1933 		/* free allocated memory */
1934 		if (dinp)
1935 			cachefs_kmem_free(dinp, inlen);
1936 		if (doutp)
1937 			cachefs_kmem_free(doutp, outlen);
1938 
1939 		break;
1940 
1941 	case _FIOCOD:
1942 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1943 			error = EPERM;
1944 			break;
1945 		}
1946 
1947 		error = EBUSY;
1948 		if (arg) {
1949 			/* non-zero arg means do all filesystems */
1950 			mutex_enter(&cachefs_cachelock);
1951 			for (cachep = cachefs_cachelist; cachep != NULL;
1952 			    cachep = cachep->c_next) {
1953 				mutex_enter(&cachep->c_fslistlock);
1954 				for (fscp = cachep->c_fslist;
1955 				    fscp != NULL;
1956 				    fscp = fscp->fs_next) {
1957 					if (CFS_ISFS_CODCONST(fscp)) {
1958 						gethrestime(&fscp->fs_cod_time);
1959 						error = 0;
1960 					}
1961 				}
1962 				mutex_exit(&cachep->c_fslistlock);
1963 			}
1964 			mutex_exit(&cachefs_cachelock);
1965 		} else {
1966 			if (CFS_ISFS_CODCONST(fscp)) {
1967 				gethrestime(&fscp->fs_cod_time);
1968 				error = 0;
1969 			}
1970 		}
1971 		break;
1972 
1973 	case _FIOSTOPCACHE:
1974 		error = cachefs_stop_cache(cp);
1975 		break;
1976 
1977 	default:
1978 		error = ENOTTY;
1979 		break;
1980 	}
1981 
1982 	/* return the result */
1983 	return (error);
1984 }
1985 
1986 ino64_t
1987 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1988 {
1989 	ino64_t new;
1990 
1991 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1992 
1993 	for (;;) {
1994 		fscp->fs_info.fi_localfileno++;
1995 		if (fscp->fs_info.fi_localfileno == 0)
1996 			fscp->fs_info.fi_localfileno = 3;
1997 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
1998 
1999 		new = fscp->fs_info.fi_localfileno;
2000 		if (! cachefs_fileno_inuse(fscp, new))
2001 			break;
2002 	}
2003 
2004 	cachefs_inum_register(fscp, old, new);
2005 	cachefs_inum_register(fscp, new, 0);
2006 	return (new);
2007 }
2008 
2009 /*ARGSUSED*/
2010 static int
2011 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2012 	caller_context_t *ct)
2013 {
2014 	struct cnode *cp = VTOC(vp);
2015 	fscache_t *fscp = C_TO_FSCACHE(cp);
2016 	int error = 0;
2017 	int held = 0;
2018 	int connected = 0;
2019 
2020 #ifdef CFSDEBUG
2021 	CFS_DEBUG(CFSDEBUG_VOPS)
2022 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2023 #endif
2024 
2025 	if (getzoneid() != GLOBAL_ZONEID)
2026 		return (EPERM);
2027 
2028 	/* Call backfilesystem getattr if NFSv4 */
2029 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2030 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2031 		goto out;
2032 	}
2033 
2034 	/*
2035 	 * If it has been specified that the return value will
2036 	 * just be used as a hint, and we are only being asked
2037 	 * for size, fsid or rdevid, then return the client's
2038 	 * notion of these values without checking to make sure
2039 	 * that the attribute cache is up to date.
2040 	 * The whole point is to avoid an over the wire GETATTR
2041 	 * call.
2042 	 */
2043 	if (flags & ATTR_HINT) {
2044 		if (vap->va_mask ==
2045 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2046 			if (vap->va_mask | AT_SIZE)
2047 				vap->va_size = cp->c_size;
2048 			/*
2049 			 * Return the FSID of the cachefs filesystem,
2050 			 * not the back filesystem
2051 			 */
2052 			if (vap->va_mask | AT_FSID)
2053 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2054 			if (vap->va_mask | AT_RDEV)
2055 				vap->va_rdev = cp->c_attr.va_rdev;
2056 			return (0);
2057 		}
2058 	}
2059 
2060 	/*
2061 	 * Only need to flush pages if asking for the mtime
2062 	 * and if there any dirty pages.
2063 	 */
2064 	if (vap->va_mask & AT_MTIME) {
2065 		/*EMPTY*/
2066 #if 0
2067 		/*
2068 		 * XXX bob: stolen from nfs code, need to do something similar
2069 		 */
2070 		rp = VTOR(vp);
2071 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2072 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2073 #endif
2074 	}
2075 
2076 	for (;;) {
2077 		/* get (or renew) access to the file system */
2078 		if (held) {
2079 			cachefs_cd_release(fscp);
2080 			held = 0;
2081 		}
2082 		error = cachefs_cd_access(fscp, connected, 0);
2083 		if (error)
2084 			goto out;
2085 		held = 1;
2086 
2087 		/*
2088 		 * If it has been specified that the return value will
2089 		 * just be used as a hint, and we are only being asked
2090 		 * for size, fsid or rdevid, then return the client's
2091 		 * notion of these values without checking to make sure
2092 		 * that the attribute cache is up to date.
2093 		 * The whole point is to avoid an over the wire GETATTR
2094 		 * call.
2095 		 */
2096 		if (flags & ATTR_HINT) {
2097 			if (vap->va_mask ==
2098 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2099 				if (vap->va_mask | AT_SIZE)
2100 					vap->va_size = cp->c_size;
2101 				/*
2102 				 * Return the FSID of the cachefs filesystem,
2103 				 * not the back filesystem
2104 				 */
2105 				if (vap->va_mask | AT_FSID)
2106 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2107 				if (vap->va_mask | AT_RDEV)
2108 					vap->va_rdev = cp->c_attr.va_rdev;
2109 				goto out;
2110 			}
2111 		}
2112 
2113 		mutex_enter(&cp->c_statelock);
2114 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2115 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2116 			mutex_exit(&cp->c_statelock);
2117 			connected = 1;
2118 			continue;
2119 		}
2120 
2121 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2122 		if (CFS_TIMEOUT(fscp, error)) {
2123 			mutex_exit(&cp->c_statelock);
2124 			cachefs_cd_release(fscp);
2125 			held = 0;
2126 			cachefs_cd_timedout(fscp);
2127 			continue;
2128 		}
2129 		if (error) {
2130 			mutex_exit(&cp->c_statelock);
2131 			break;
2132 		}
2133 
2134 		/* check for fileno conflict */
2135 		if ((fscp->fs_inum_size > 0) &&
2136 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2137 			ino64_t fakenum;
2138 
2139 			mutex_exit(&cp->c_statelock);
2140 			mutex_enter(&fscp->fs_fslock);
2141 			fakenum = cachefs_inum_real2fake(fscp,
2142 			    cp->c_attr.va_nodeid);
2143 			if (fakenum == 0) {
2144 				fakenum = cachefs_fileno_conflict(fscp,
2145 				    cp->c_attr.va_nodeid);
2146 			}
2147 			mutex_exit(&fscp->fs_fslock);
2148 
2149 			mutex_enter(&cp->c_statelock);
2150 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2151 			cp->c_metadata.md_localfileno = fakenum;
2152 			cp->c_flags |= CN_UPDATED;
2153 		}
2154 
2155 		/* copy out the attributes */
2156 		*vap = cp->c_attr;
2157 
2158 		/*
2159 		 * return the FSID of the cachefs filesystem,
2160 		 * not the back filesystem
2161 		 */
2162 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2163 
2164 		/* return our idea of the size */
2165 		if (cp->c_size > vap->va_size)
2166 			vap->va_size = cp->c_size;
2167 
2168 		/* overwrite with our version of fileno and timestamps */
2169 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2170 		vap->va_mtime = cp->c_metadata.md_localmtime;
2171 		vap->va_ctime = cp->c_metadata.md_localctime;
2172 
2173 		mutex_exit(&cp->c_statelock);
2174 		break;
2175 	}
2176 out:
2177 	if (held)
2178 		cachefs_cd_release(fscp);
2179 #ifdef CFS_CD_DEBUG
2180 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2181 #endif
2182 
2183 #ifdef CFSDEBUG
2184 	CFS_DEBUG(CFSDEBUG_VOPS)
2185 		printf("cachefs_getattr: EXIT error = %d\n", error);
2186 #endif
2187 	return (error);
2188 }
2189 
2190 /*
2191  * cachefs_getattr_backfs_nfsv4
2192  *
2193  * Call NFSv4 back filesystem to handle the getattr (cachefs
2194  * pass-through support for NFSv4).
2195  */
2196 static int
2197 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2198     int flags, cred_t *cr, caller_context_t *ct)
2199 {
2200 	cnode_t *cp = VTOC(vp);
2201 	fscache_t *fscp = C_TO_FSCACHE(cp);
2202 	vnode_t *backvp;
2203 	int error;
2204 
2205 	/*
2206 	 * For NFSv4 pass-through to work, only connected operation
2207 	 * is supported, the cnode backvp must exist, and cachefs
2208 	 * optional (eg., disconnectable) flags are turned off. Assert
2209 	 * these conditions for the getattr operation.
2210 	 */
2211 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2212 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2213 
2214 	/* Call backfs vnode op after extracting backvp */
2215 	mutex_enter(&cp->c_statelock);
2216 	backvp = cp->c_backvp;
2217 	mutex_exit(&cp->c_statelock);
2218 
2219 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2220 	    " backvp %p\n", cp, backvp));
2221 	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2222 
2223 	/* Update attributes */
2224 	cp->c_attr = *vap;
2225 
2226 	/*
2227 	 * return the FSID of the cachefs filesystem,
2228 	 * not the back filesystem
2229 	 */
2230 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2231 
2232 	return (error);
2233 }
2234 
2235 /*ARGSUSED4*/
2236 static int
2237 cachefs_setattr(
2238 	vnode_t *vp,
2239 	vattr_t *vap,
2240 	int flags,
2241 	cred_t *cr,
2242 	caller_context_t *ct)
2243 {
2244 	cnode_t *cp = VTOC(vp);
2245 	fscache_t *fscp = C_TO_FSCACHE(cp);
2246 	int error;
2247 	int connected;
2248 	int held = 0;
2249 
2250 	if (getzoneid() != GLOBAL_ZONEID)
2251 		return (EPERM);
2252 
2253 	/*
2254 	 * Cachefs only provides pass-through support for NFSv4,
2255 	 * and all vnode operations are passed through to the
2256 	 * back file system. For NFSv4 pass-through to work, only
2257 	 * connected operation is supported, the cnode backvp must
2258 	 * exist, and cachefs optional (eg., disconnectable) flags
2259 	 * are turned off. Assert these conditions to ensure that
2260 	 * the backfilesystem is called for the setattr operation.
2261 	 */
2262 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2263 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2264 
2265 	connected = 0;
2266 	for (;;) {
2267 		/* drop hold on file system */
2268 		if (held) {
2269 			/* Won't loop with NFSv4 connected behavior */
2270 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2271 			cachefs_cd_release(fscp);
2272 			held = 0;
2273 		}
2274 
2275 		/* acquire access to the file system */
2276 		error = cachefs_cd_access(fscp, connected, 1);
2277 		if (error)
2278 			break;
2279 		held = 1;
2280 
2281 		/* perform the setattr */
2282 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2283 		if (error) {
2284 			/* if connected */
2285 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2286 				if (CFS_TIMEOUT(fscp, error)) {
2287 					cachefs_cd_release(fscp);
2288 					held = 0;
2289 					cachefs_cd_timedout(fscp);
2290 					connected = 0;
2291 					continue;
2292 				}
2293 			}
2294 
2295 			/* else must be disconnected */
2296 			else {
2297 				if (CFS_TIMEOUT(fscp, error)) {
2298 					connected = 1;
2299 					continue;
2300 				}
2301 			}
2302 		}
2303 		break;
2304 	}
2305 
2306 	if (held) {
2307 		cachefs_cd_release(fscp);
2308 	}
2309 #ifdef CFS_CD_DEBUG
2310 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2311 #endif
2312 	return (error);
2313 }
2314 
2315 static int
2316 cachefs_setattr_common(
2317 	vnode_t *vp,
2318 	vattr_t *vap,
2319 	int flags,
2320 	cred_t *cr,
2321 	caller_context_t *ct)
2322 {
2323 	cnode_t *cp = VTOC(vp);
2324 	fscache_t *fscp = C_TO_FSCACHE(cp);
2325 	cachefscache_t *cachep = fscp->fs_cache;
2326 	uint_t mask = vap->va_mask;
2327 	int error = 0;
2328 	uint_t bcnt;
2329 
2330 	/* Cannot set these attributes. */
2331 	if (mask & AT_NOSET)
2332 		return (EINVAL);
2333 
2334 	/*
2335 	 * Truncate file.  Must have write permission and not be a directory.
2336 	 */
2337 	if (mask & AT_SIZE) {
2338 		if (vp->v_type == VDIR) {
2339 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2340 				cachefs_log_truncate(cachep, EISDIR,
2341 				    fscp->fs_cfsvfsp,
2342 				    &cp->c_metadata.md_cookie,
2343 				    cp->c_id.cid_fileno,
2344 				    crgetuid(cr), vap->va_size);
2345 			return (EISDIR);
2346 		}
2347 	}
2348 
2349 	/*
2350 	 * Gotta deal with one special case here, where we're setting the
2351 	 * size of the file. First, we zero out part of the page after the
2352 	 * new size of the file. Then we toss (not write) all pages after
2353 	 * page in which the new offset occurs. Note that the NULL passed
2354 	 * in instead of a putapage() fn parameter is correct, since
2355 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2356 	 */
2357 
2358 	rw_enter(&cp->c_rwlock, RW_WRITER);
2359 
2360 	/* sync dirty pages */
2361 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2362 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2363 		if (error == EINTR)
2364 			goto out;
2365 	}
2366 	error = 0;
2367 
2368 	/* if connected */
2369 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2370 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2371 	}
2372 	/* else must be disconnected */
2373 	else {
2374 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2375 	}
2376 	if (error)
2377 		goto out;
2378 
2379 	/*
2380 	 * If the file size has been changed then
2381 	 * toss whole pages beyond the end of the file and zero
2382 	 * the portion of the last page that is beyond the end of the file.
2383 	 */
2384 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2385 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2386 		if (bcnt)
2387 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2388 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2389 		    B_TRUNC | B_INVAL, cr);
2390 	}
2391 
2392 out:
2393 	rw_exit(&cp->c_rwlock);
2394 
2395 	if ((mask & AT_SIZE) &&
2396 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2397 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2398 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2399 		    crgetuid(cr), vap->va_size);
2400 
2401 	return (error);
2402 }
2403 
2404 static int
2405 cachefs_setattr_connected(
2406 	vnode_t *vp,
2407 	vattr_t *vap,
2408 	int flags,
2409 	cred_t *cr,
2410 	caller_context_t *ct)
2411 {
2412 	cnode_t *cp = VTOC(vp);
2413 	fscache_t *fscp = C_TO_FSCACHE(cp);
2414 	uint_t mask = vap->va_mask;
2415 	int error = 0;
2416 	int setsize;
2417 
2418 	mutex_enter(&cp->c_statelock);
2419 
2420 	if (cp->c_backvp == NULL) {
2421 		error = cachefs_getbackvp(fscp, cp);
2422 		if (error)
2423 			goto out;
2424 	}
2425 
2426 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2427 	if (error)
2428 		goto out;
2429 
2430 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2431 	    "backvp %p\n", cp, cp->c_backvp));
2432 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2433 	if (error) {
2434 		goto out;
2435 	}
2436 
2437 	/* if the size of the file is being changed */
2438 	if (mask & AT_SIZE) {
2439 		cp->c_size = vap->va_size;
2440 		error = 0;
2441 		setsize = 0;
2442 
2443 		/* see if okay to try to set the file size */
2444 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2445 		    CFS_ISFS_NONSHARED(fscp)) {
2446 			/* okay to set size if file is populated */
2447 			if (cp->c_metadata.md_flags & MD_POPULATED)
2448 				setsize = 1;
2449 
2450 			/*
2451 			 * Okay to set size if front file exists and setting
2452 			 * file size to zero.
2453 			 */
2454 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2455 			    (vap->va_size == 0))
2456 				setsize = 1;
2457 		}
2458 
2459 		/* if okay to try to set the file size */
2460 		if (setsize) {
2461 			error = 0;
2462 			if (cp->c_frontvp == NULL)
2463 				error = cachefs_getfrontfile(cp);
2464 			if (error == 0)
2465 				error = cachefs_frontfile_size(cp, cp->c_size);
2466 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2467 			/* make sure file gets nocached */
2468 			error = EEXIST;
2469 		}
2470 
2471 		/* if we have to nocache the file */
2472 		if (error) {
2473 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2474 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2475 				cachefs_nocache(cp);
2476 			error = 0;
2477 		}
2478 	}
2479 
2480 	cp->c_flags |= CN_UPDATED;
2481 
2482 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2483 	cp->c_attr.va_mask = AT_ALL;
2484 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2485 	if (error)
2486 		goto out;
2487 
2488 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2489 	cp->c_size = cp->c_attr.va_size;
2490 
2491 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2492 out:
2493 	mutex_exit(&cp->c_statelock);
2494 	return (error);
2495 }
2496 
2497 /*
2498  * perform the setattr on the local file system
2499  */
2500 /*ARGSUSED4*/
2501 static int
2502 cachefs_setattr_disconnected(
2503 	vnode_t *vp,
2504 	vattr_t *vap,
2505 	int flags,
2506 	cred_t *cr,
2507 	caller_context_t *ct)
2508 {
2509 	cnode_t *cp = VTOC(vp);
2510 	fscache_t *fscp = C_TO_FSCACHE(cp);
2511 	int mask;
2512 	int error;
2513 	int newfile;
2514 	off_t commit = 0;
2515 
2516 	if (CFS_ISFS_WRITE_AROUND(fscp))
2517 		return (ETIMEDOUT);
2518 
2519 	/* if we do not have good attributes */
2520 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2521 		return (ETIMEDOUT);
2522 
2523 	/* primary concern is to keep this routine as much like ufs_setattr */
2524 
2525 	mutex_enter(&cp->c_statelock);
2526 
2527 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2528 	    cachefs_access_local, cp);
2529 
2530 	if (error)
2531 		goto out;
2532 
2533 	mask = vap->va_mask;
2534 
2535 	/* if changing the size of the file */
2536 	if (mask & AT_SIZE) {
2537 		if (vp->v_type == VDIR) {
2538 			error = EISDIR;
2539 			goto out;
2540 		}
2541 
2542 		if (vp->v_type == VFIFO) {
2543 			error = 0;
2544 			goto out;
2545 		}
2546 
2547 		if ((vp->v_type != VREG) &&
2548 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2549 			error = EINVAL;
2550 			goto out;
2551 		}
2552 
2553 		if (vap->va_size > fscp->fs_offmax) {
2554 			error = EFBIG;
2555 			goto out;
2556 		}
2557 
2558 		/* if the file is not populated and we are not truncating it */
2559 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2560 		    (vap->va_size != 0)) {
2561 			error = ETIMEDOUT;
2562 			goto out;
2563 		}
2564 
2565 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2566 			error = cachefs_dlog_cidmap(fscp);
2567 			if (error) {
2568 				error = ENOSPC;
2569 				goto out;
2570 			}
2571 			cp->c_metadata.md_flags |= MD_MAPPING;
2572 		}
2573 
2574 		/* log the operation */
2575 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2576 		if (commit == 0) {
2577 			error = ENOSPC;
2578 			goto out;
2579 		}
2580 		cp->c_flags &= ~CN_NOCACHE;
2581 
2582 		/* special case truncating fast sym links */
2583 		if ((vp->v_type == VLNK) &&
2584 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2585 			/* XXX how can we get here */
2586 			/* XXX should update mtime */
2587 			cp->c_size = 0;
2588 			error = 0;
2589 			goto out;
2590 		}
2591 
2592 		/* get the front file, this may create one */
2593 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2594 		if (cp->c_frontvp == NULL) {
2595 			error = cachefs_getfrontfile(cp);
2596 			if (error)
2597 				goto out;
2598 		}
2599 		ASSERT(cp->c_frontvp);
2600 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2601 			/* allocate space for the metadata */
2602 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2603 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2604 			    == 0);
2605 			error = filegrp_write_metadata(cp->c_filegrp,
2606 			    &cp->c_id, &cp->c_metadata);
2607 			if (error)
2608 				goto out;
2609 		}
2610 
2611 		/* change the size of the front file */
2612 		error = cachefs_frontfile_size(cp, vap->va_size);
2613 		if (error)
2614 			goto out;
2615 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2616 		gethrestime(&cp->c_metadata.md_localmtime);
2617 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2618 		cachefs_modified(cp);
2619 		cp->c_flags |= CN_UPDATED;
2620 	}
2621 
2622 	if (mask & AT_MODE) {
2623 		/* mark as modified */
2624 		if (cachefs_modified_alloc(cp)) {
2625 			error = ENOSPC;
2626 			goto out;
2627 		}
2628 
2629 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2630 			error = cachefs_dlog_cidmap(fscp);
2631 			if (error) {
2632 				error = ENOSPC;
2633 				goto out;
2634 			}
2635 			cp->c_metadata.md_flags |= MD_MAPPING;
2636 		}
2637 
2638 		/* log the operation if not already logged */
2639 		if (commit == 0) {
2640 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2641 			if (commit == 0) {
2642 				error = ENOSPC;
2643 				goto out;
2644 			}
2645 		}
2646 
2647 		cp->c_attr.va_mode &= S_IFMT;
2648 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2649 		gethrestime(&cp->c_metadata.md_localctime);
2650 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2651 		cp->c_flags |= CN_UPDATED;
2652 	}
2653 
2654 	if (mask & (AT_UID|AT_GID)) {
2655 
2656 		/* mark as modified */
2657 		if (cachefs_modified_alloc(cp)) {
2658 			error = ENOSPC;
2659 			goto out;
2660 		}
2661 
2662 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2663 			error = cachefs_dlog_cidmap(fscp);
2664 			if (error) {
2665 				error = ENOSPC;
2666 				goto out;
2667 			}
2668 			cp->c_metadata.md_flags |= MD_MAPPING;
2669 		}
2670 
2671 		/* log the operation if not already logged */
2672 		if (commit == 0) {
2673 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2674 			if (commit == 0) {
2675 				error = ENOSPC;
2676 				goto out;
2677 			}
2678 		}
2679 
2680 		if (mask & AT_UID)
2681 			cp->c_attr.va_uid = vap->va_uid;
2682 
2683 		if (mask & AT_GID)
2684 			cp->c_attr.va_gid = vap->va_gid;
2685 		gethrestime(&cp->c_metadata.md_localctime);
2686 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2687 		cp->c_flags |= CN_UPDATED;
2688 	}
2689 
2690 
2691 	if (mask & (AT_MTIME|AT_ATIME)) {
2692 		/* mark as modified */
2693 		if (cachefs_modified_alloc(cp)) {
2694 			error = ENOSPC;
2695 			goto out;
2696 		}
2697 
2698 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2699 			error = cachefs_dlog_cidmap(fscp);
2700 			if (error) {
2701 				error = ENOSPC;
2702 				goto out;
2703 			}
2704 			cp->c_metadata.md_flags |= MD_MAPPING;
2705 		}
2706 
2707 		/* log the operation if not already logged */
2708 		if (commit == 0) {
2709 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2710 			if (commit == 0) {
2711 				error = ENOSPC;
2712 				goto out;
2713 			}
2714 		}
2715 
2716 		if (mask & AT_MTIME) {
2717 			cp->c_metadata.md_localmtime = vap->va_mtime;
2718 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2719 		}
2720 		if (mask & AT_ATIME)
2721 			cp->c_attr.va_atime = vap->va_atime;
2722 		gethrestime(&cp->c_metadata.md_localctime);
2723 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2724 		cp->c_flags |= CN_UPDATED;
2725 	}
2726 
2727 out:
2728 	mutex_exit(&cp->c_statelock);
2729 
2730 	/* commit the log entry */
2731 	if (commit) {
2732 		if (cachefs_dlog_commit(fscp, commit, error)) {
2733 			/*EMPTY*/
2734 			/* XXX bob: fix on panic */
2735 		}
2736 	}
2737 	return (error);
2738 }
2739 
2740 /* ARGSUSED */
2741 static int
2742 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2743 	caller_context_t *ct)
2744 {
2745 	cnode_t *cp = VTOC(vp);
2746 	fscache_t *fscp = C_TO_FSCACHE(cp);
2747 	int error;
2748 	int held = 0;
2749 	int connected = 0;
2750 
2751 #ifdef CFSDEBUG
2752 	CFS_DEBUG(CFSDEBUG_VOPS)
2753 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2754 #endif
2755 	if (getzoneid() != GLOBAL_ZONEID) {
2756 		error = EPERM;
2757 		goto out;
2758 	}
2759 
2760 	/*
2761 	 * Cachefs only provides pass-through support for NFSv4,
2762 	 * and all vnode operations are passed through to the
2763 	 * back file system. For NFSv4 pass-through to work, only
2764 	 * connected operation is supported, the cnode backvp must
2765 	 * exist, and cachefs optional (eg., disconnectable) flags
2766 	 * are turned off. Assert these conditions to ensure that
2767 	 * the backfilesystem is called for the access operation.
2768 	 */
2769 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2770 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2771 
2772 	for (;;) {
2773 		/* get (or renew) access to the file system */
2774 		if (held) {
2775 			/* Won't loop with NFSv4 connected behavior */
2776 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2777 			cachefs_cd_release(fscp);
2778 			held = 0;
2779 		}
2780 		error = cachefs_cd_access(fscp, connected, 0);
2781 		if (error)
2782 			break;
2783 		held = 1;
2784 
2785 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2786 			error = cachefs_access_connected(vp, mode, flags,
2787 			    cr);
2788 			if (CFS_TIMEOUT(fscp, error)) {
2789 				cachefs_cd_release(fscp);
2790 				held = 0;
2791 				cachefs_cd_timedout(fscp);
2792 				connected = 0;
2793 				continue;
2794 			}
2795 		} else {
2796 			mutex_enter(&cp->c_statelock);
2797 			error = cachefs_access_local(cp, mode, cr);
2798 			mutex_exit(&cp->c_statelock);
2799 			if (CFS_TIMEOUT(fscp, error)) {
2800 				if (cachefs_cd_access_miss(fscp)) {
2801 					mutex_enter(&cp->c_statelock);
2802 					if (cp->c_backvp == NULL) {
2803 						(void) cachefs_getbackvp(fscp,
2804 						    cp);
2805 					}
2806 					mutex_exit(&cp->c_statelock);
2807 					error = cachefs_access_connected(vp,
2808 					    mode, flags, cr);
2809 					if (!CFS_TIMEOUT(fscp, error))
2810 						break;
2811 					delay(5*hz);
2812 					connected = 0;
2813 					continue;
2814 				}
2815 				connected = 1;
2816 				continue;
2817 			}
2818 		}
2819 		break;
2820 	}
2821 	if (held)
2822 		cachefs_cd_release(fscp);
2823 #ifdef CFS_CD_DEBUG
2824 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2825 #endif
2826 out:
2827 #ifdef CFSDEBUG
2828 	CFS_DEBUG(CFSDEBUG_VOPS)
2829 		printf("cachefs_access: EXIT error = %d\n", error);
2830 #endif
2831 	return (error);
2832 }
2833 
2834 static int
2835 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2836 {
2837 	cnode_t *cp = VTOC(vp);
2838 	fscache_t *fscp = C_TO_FSCACHE(cp);
2839 	int error = 0;
2840 
2841 	mutex_enter(&cp->c_statelock);
2842 
2843 	/* Make sure the cnode attrs are valid first. */
2844 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2845 	if (error)
2846 		goto out;
2847 
2848 	/* see if can do a local file system check */
2849 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2850 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2851 		error = cachefs_access_local(cp, mode, cr);
2852 		goto out;
2853 	}
2854 
2855 	/* else do a remote file system check */
2856 	else {
2857 		if (cp->c_backvp == NULL) {
2858 			error = cachefs_getbackvp(fscp, cp);
2859 			if (error)
2860 				goto out;
2861 		}
2862 
2863 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2864 		    ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2865 		    cp, cp->c_backvp));
2866 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2867 
2868 		/*
2869 		 * even though we don't `need' the ACL to do access
2870 		 * via the backvp, we should cache it here to make our
2871 		 * behavior more reasonable if we go disconnected.
2872 		 */
2873 
2874 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2875 		    (cachefs_vtype_aclok(vp)) &&
2876 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2877 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2878 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2879 			(void) cachefs_cacheacl(cp, NULL);
2880 	}
2881 out:
2882 	/*
2883 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2884 	 * the vn_open retry will read the file anew from backfs
2885 	 */
2886 	if (error == ESTALE)
2887 		cachefs_cnode_stale(cp);
2888 
2889 	mutex_exit(&cp->c_statelock);
2890 	return (error);
2891 }
2892 
2893 /*
2894  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2895  * the link is placed in the metadata itself (no front file is allocated).
2896  */
2897 /*ARGSUSED*/
2898 static int
2899 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2900 {
2901 	int error = 0;
2902 	cnode_t *cp = VTOC(vp);
2903 	fscache_t *fscp = C_TO_FSCACHE(cp);
2904 	cachefscache_t *cachep = fscp->fs_cache;
2905 	int held = 0;
2906 	int connected = 0;
2907 
2908 	if (getzoneid() != GLOBAL_ZONEID)
2909 		return (EPERM);
2910 
2911 	if (vp->v_type != VLNK)
2912 		return (EINVAL);
2913 
2914 	/*
2915 	 * Cachefs only provides pass-through support for NFSv4,
2916 	 * and all vnode operations are passed through to the
2917 	 * back file system. For NFSv4 pass-through to work, only
2918 	 * connected operation is supported, the cnode backvp must
2919 	 * exist, and cachefs optional (eg., disconnectable) flags
2920 	 * are turned off. Assert these conditions to ensure that
2921 	 * the backfilesystem is called for the readlink operation.
2922 	 */
2923 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2924 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2925 
2926 	for (;;) {
2927 		/* get (or renew) access to the file system */
2928 		if (held) {
2929 			/* Won't loop with NFSv4 connected behavior */
2930 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2931 			cachefs_cd_release(fscp);
2932 			held = 0;
2933 		}
2934 		error = cachefs_cd_access(fscp, connected, 0);
2935 		if (error)
2936 			break;
2937 		held = 1;
2938 
2939 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2940 			/*
2941 			 * since readlink_connected will call stuffsymlink
2942 			 * on success, have to serialize access
2943 			 */
2944 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2945 				cachefs_cd_release(fscp);
2946 				rw_enter(&cp->c_rwlock, RW_WRITER);
2947 				error = cachefs_cd_access(fscp, connected, 0);
2948 				if (error) {
2949 					held = 0;
2950 					rw_exit(&cp->c_rwlock);
2951 					break;
2952 				}
2953 			}
2954 			error = cachefs_readlink_connected(vp, uiop, cr);
2955 			rw_exit(&cp->c_rwlock);
2956 			if (CFS_TIMEOUT(fscp, error)) {
2957 				cachefs_cd_release(fscp);
2958 				held = 0;
2959 				cachefs_cd_timedout(fscp);
2960 				connected = 0;
2961 				continue;
2962 			}
2963 		} else {
2964 			error = cachefs_readlink_disconnected(vp, uiop);
2965 			if (CFS_TIMEOUT(fscp, error)) {
2966 				if (cachefs_cd_access_miss(fscp)) {
2967 					/* as above */
2968 					if (!rw_tryenter(&cp->c_rwlock,
2969 					    RW_WRITER)) {
2970 						cachefs_cd_release(fscp);
2971 						rw_enter(&cp->c_rwlock,
2972 						    RW_WRITER);
2973 						error = cachefs_cd_access(fscp,
2974 						    connected, 0);
2975 						if (error) {
2976 							held = 0;
2977 							rw_exit(&cp->c_rwlock);
2978 							break;
2979 						}
2980 					}
2981 					error = cachefs_readlink_connected(vp,
2982 					    uiop, cr);
2983 					rw_exit(&cp->c_rwlock);
2984 					if (!CFS_TIMEOUT(fscp, error))
2985 						break;
2986 					delay(5*hz);
2987 					connected = 0;
2988 					continue;
2989 				}
2990 				connected = 1;
2991 				continue;
2992 			}
2993 		}
2994 		break;
2995 	}
2996 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
2997 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
2998 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2999 		    crgetuid(cr), cp->c_size);
3000 
3001 	if (held)
3002 		cachefs_cd_release(fscp);
3003 #ifdef CFS_CD_DEBUG
3004 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3005 #endif
3006 
3007 	/*
3008 	 * The over the wire error for attempting to readlink something
3009 	 * other than a symbolic link is ENXIO.  However, we need to
3010 	 * return EINVAL instead of ENXIO, so we map it here.
3011 	 */
3012 	return (error == ENXIO ? EINVAL : error);
3013 }
3014 
3015 static int
3016 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3017 {
3018 	int error;
3019 	cnode_t *cp = VTOC(vp);
3020 	fscache_t *fscp = C_TO_FSCACHE(cp);
3021 	caddr_t buf;
3022 	int buflen;
3023 	int readcache = 0;
3024 
3025 	mutex_enter(&cp->c_statelock);
3026 
3027 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3028 	if (error)
3029 		goto out;
3030 
3031 	/* if the sym link is cached as a fast sym link */
3032 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3033 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3034 		error = uiomove(cp->c_metadata.md_allocinfo,
3035 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3036 #ifdef CFSDEBUG
3037 		readcache = 1;
3038 		goto out;
3039 #else /* CFSDEBUG */
3040 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3041 		if (cp->c_size != 0) {
3042 			readcache = 1;
3043 			goto out;
3044 		}
3045 #endif /* CFSDEBUG */
3046 	}
3047 
3048 	/* if the sym link is cached in a front file */
3049 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3050 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3051 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3052 		if (cp->c_frontvp == NULL) {
3053 			(void) cachefs_getfrontfile(cp);
3054 		}
3055 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3056 			/* read symlink data from frontfile */
3057 			uiop->uio_offset = 0;
3058 			(void) VOP_RWLOCK(cp->c_frontvp,
3059 			    V_WRITELOCK_FALSE, NULL);
3060 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3061 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3062 
3063 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3064 			if (cp->c_size != 0) {
3065 				readcache = 1;
3066 				goto out;
3067 			}
3068 		}
3069 	}
3070 
3071 	/* get the sym link contents from the back fs */
3072 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3073 	if (error)
3074 		goto out;
3075 
3076 	/* copy the contents out to the user */
3077 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3078 
3079 	/*
3080 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3081 	 * or if NFSv4 pass-through is enabled.
3082 	 */
3083 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3084 		cachefs_nocache(cp);
3085 	}
3086 
3087 	cachefs_kmem_free(buf, MAXPATHLEN);
3088 
3089 out:
3090 	mutex_exit(&cp->c_statelock);
3091 	if (error == 0) {
3092 		if (readcache)
3093 			fscp->fs_stats.st_hits++;
3094 		else
3095 			fscp->fs_stats.st_misses++;
3096 	}
3097 	return (error);
3098 }
3099 
3100 static int
3101 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3102 {
3103 	int error;
3104 	cnode_t *cp = VTOC(vp);
3105 	fscache_t *fscp = C_TO_FSCACHE(cp);
3106 	int readcache = 0;
3107 
3108 	mutex_enter(&cp->c_statelock);
3109 
3110 	/* if the sym link is cached as a fast sym link */
3111 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3112 		error = uiomove(cp->c_metadata.md_allocinfo,
3113 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3114 		readcache = 1;
3115 		goto out;
3116 	}
3117 
3118 	/* if the sym link is cached in a front file */
3119 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3120 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3121 		if (cp->c_frontvp == NULL) {
3122 			(void) cachefs_getfrontfile(cp);
3123 		}
3124 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3125 			/* read symlink data from frontfile */
3126 			uiop->uio_offset = 0;
3127 			(void) VOP_RWLOCK(cp->c_frontvp,
3128 			    V_WRITELOCK_FALSE, NULL);
3129 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3130 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3131 			readcache = 1;
3132 			goto out;
3133 		}
3134 	}
3135 	error = ETIMEDOUT;
3136 
3137 out:
3138 	mutex_exit(&cp->c_statelock);
3139 	if (error == 0) {
3140 		if (readcache)
3141 			fscp->fs_stats.st_hits++;
3142 		else
3143 			fscp->fs_stats.st_misses++;
3144 	}
3145 	return (error);
3146 }
3147 
3148 /*ARGSUSED*/
3149 static int
3150 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3151 {
3152 	cnode_t *cp = VTOC(vp);
3153 	int error = 0;
3154 	fscache_t *fscp = C_TO_FSCACHE(cp);
3155 	int held = 0;
3156 	int connected = 0;
3157 
3158 #ifdef CFSDEBUG
3159 	CFS_DEBUG(CFSDEBUG_VOPS)
3160 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3161 #endif
3162 
3163 	if (getzoneid() != GLOBAL_ZONEID) {
3164 		error = EPERM;
3165 		goto out;
3166 	}
3167 
3168 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3169 		goto out;
3170 
3171 	/*
3172 	 * Cachefs only provides pass-through support for NFSv4,
3173 	 * and all vnode operations are passed through to the
3174 	 * back file system. For NFSv4 pass-through to work, only
3175 	 * connected operation is supported, the cnode backvp must
3176 	 * exist, and cachefs optional (eg., disconnectable) flags
3177 	 * are turned off. Assert these conditions to ensure that
3178 	 * the backfilesystem is called for the fsync operation.
3179 	 */
3180 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3181 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3182 
3183 	for (;;) {
3184 		/* get (or renew) access to the file system */
3185 		if (held) {
3186 			/* Won't loop with NFSv4 connected behavior */
3187 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3188 			cachefs_cd_release(fscp);
3189 			held = 0;
3190 		}
3191 		error = cachefs_cd_access(fscp, connected, 1);
3192 		if (error)
3193 			break;
3194 		held = 1;
3195 		connected = 0;
3196 
3197 		/* if a regular file, write out the pages */
3198 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3199 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3200 			error = cachefs_putpage_common(vp, (offset_t)0,
3201 			    0, 0, cr);
3202 			if (CFS_TIMEOUT(fscp, error)) {
3203 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3204 					cachefs_cd_release(fscp);
3205 					held = 0;
3206 					cachefs_cd_timedout(fscp);
3207 					continue;
3208 				} else {
3209 					connected = 1;
3210 					continue;
3211 				}
3212 			}
3213 
3214 			/* if no space left in cache, wait until connected */
3215 			if ((error == ENOSPC) &&
3216 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3217 				connected = 1;
3218 				continue;
3219 			}
3220 
3221 			/* clear the cnode error if putpage worked */
3222 			if ((error == 0) && cp->c_error) {
3223 				mutex_enter(&cp->c_statelock);
3224 				cp->c_error = 0;
3225 				mutex_exit(&cp->c_statelock);
3226 			}
3227 
3228 			if (error)
3229 				break;
3230 		}
3231 
3232 		/* if connected, sync the backvp */
3233 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3234 		    cp->c_backvp) {
3235 			mutex_enter(&cp->c_statelock);
3236 			if (cp->c_backvp) {
3237 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3238 				    ("cachefs_fsync (nfsv4): cnode %p, "
3239 				    "backvp %p\n", cp, cp->c_backvp));
3240 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3241 				    ct);
3242 				if (CFS_TIMEOUT(fscp, error)) {
3243 					mutex_exit(&cp->c_statelock);
3244 					cachefs_cd_release(fscp);
3245 					held = 0;
3246 					cachefs_cd_timedout(fscp);
3247 					continue;
3248 				} else if (error && (error != EINTR))
3249 					cp->c_error = error;
3250 			}
3251 			mutex_exit(&cp->c_statelock);
3252 		}
3253 
3254 		/* sync the metadata and the front file to the front fs */
3255 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3256 			error = cachefs_sync_metadata(cp);
3257 			if (error &&
3258 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3259 				error = 0;
3260 		}
3261 		break;
3262 	}
3263 
3264 	if (error == 0)
3265 		error = cp->c_error;
3266 
3267 	if (held)
3268 		cachefs_cd_release(fscp);
3269 
3270 out:
3271 #ifdef CFS_CD_DEBUG
3272 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3273 #endif
3274 
3275 #ifdef CFSDEBUG
3276 	CFS_DEBUG(CFSDEBUG_VOPS)
3277 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3278 #endif
3279 	return (error);
3280 }
3281 
3282 /*
3283  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3284  */
3285 int
3286 cachefs_sync_metadata(cnode_t *cp)
3287 {
3288 	int error = 0;
3289 	struct filegrp *fgp;
3290 	struct vattr va;
3291 	fscache_t *fscp = C_TO_FSCACHE(cp);
3292 
3293 #ifdef CFSDEBUG
3294 	CFS_DEBUG(CFSDEBUG_VOPS)
3295 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3296 		    (void *)cp, cp->c_flags);
3297 #endif
3298 
3299 	mutex_enter(&cp->c_statelock);
3300 	if ((cp->c_flags & CN_UPDATED) == 0)
3301 		goto out;
3302 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3303 		goto out;
3304 	fgp = cp->c_filegrp;
3305 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3306 		goto out;
3307 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3308 		goto out;
3309 
3310 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3311 		mutex_exit(&cp->c_statelock);
3312 		error = filegrp_allocattr(fgp);
3313 		mutex_enter(&cp->c_statelock);
3314 		if (error) {
3315 			error = 0;
3316 			goto out;
3317 		}
3318 	}
3319 
3320 	if (cp->c_flags & CN_ALLOC_PENDING) {
3321 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3322 		    &cp->c_id);
3323 		if (error)
3324 			goto out;
3325 		cp->c_flags &= ~CN_ALLOC_PENDING;
3326 	}
3327 
3328 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3329 		if (cp->c_frontvp != NULL) {
3330 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3331 			if (error) {
3332 				cp->c_metadata.md_timestamp.tv_sec = 0;
3333 			} else {
3334 				va.va_mask = AT_MTIME;
3335 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3336 				    kcred, NULL);
3337 				if (error)
3338 					goto out;
3339 				cp->c_metadata.md_timestamp = va.va_mtime;
3340 				cp->c_flags &=
3341 				    ~(CN_NEED_FRONT_SYNC |
3342 				    CN_POPULATION_PENDING);
3343 			}
3344 		} else {
3345 			cp->c_flags &=
3346 			    ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3347 		}
3348 	}
3349 
3350 	/*
3351 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3352 	 * XXX tony: How can CN_UPDATED not be set?????
3353 	 */
3354 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3355 	    (cp->c_flags & CN_UPDATED)) {
3356 		error = filegrp_write_metadata(fgp, &cp->c_id,
3357 		    &cp->c_metadata);
3358 		if (error)
3359 			goto out;
3360 	}
3361 out:
3362 	if (error) {
3363 		/* XXX modified files? */
3364 		if (cp->c_metadata.md_rlno) {
3365 			cachefs_removefrontfile(&cp->c_metadata,
3366 			    &cp->c_id, fgp);
3367 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3368 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3369 			cp->c_metadata.md_rlno = 0;
3370 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3371 			if (cp->c_frontvp) {
3372 				VN_RELE(cp->c_frontvp);
3373 				cp->c_frontvp = NULL;
3374 			}
3375 		}
3376 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3377 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3378 		cp->c_flags |= CN_ALLOC_PENDING;
3379 		cachefs_nocache(cp);
3380 	}
3381 	/*
3382 	 * we clear the updated bit even on errors because a retry
3383 	 * will probably fail also.
3384 	 */
3385 	cp->c_flags &= ~CN_UPDATED;
3386 	mutex_exit(&cp->c_statelock);
3387 
3388 #ifdef CFSDEBUG
3389 	CFS_DEBUG(CFSDEBUG_VOPS)
3390 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3391 		    (void *)cp, cp->c_flags);
3392 #endif
3393 
3394 	return (error);
3395 }
3396 
3397 /*
3398  * This is the vop entry point for inactivating a vnode.
3399  * It just queues the request for the async thread which
3400  * calls cachefs_inactive.
3401  * Because of the dnlc, it is not safe to grab most locks here.
3402  */
3403 /*ARGSUSED*/
3404 static void
3405 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3406 {
3407 	cnode_t *cp;
3408 	struct cachefs_req *rp;
3409 	fscache_t *fscp;
3410 
3411 #ifdef CFSDEBUG
3412 	CFS_DEBUG(CFSDEBUG_VOPS)
3413 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3414 #endif
3415 
3416 	cp = VTOC(vp);
3417 	fscp = C_TO_FSCACHE(cp);
3418 
3419 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3420 
3421 	/*
3422 	 * Cachefs only provides pass-through support for NFSv4,
3423 	 * and all vnode operations are passed through to the
3424 	 * back file system. For NFSv4 pass-through to work, only
3425 	 * connected operation is supported, the cnode backvp must
3426 	 * exist, and cachefs optional (eg., disconnectable) flags
3427 	 * are turned off. Assert these conditions to ensure that
3428 	 * the backfilesystem is called for the inactive operation.
3429 	 */
3430 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3431 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3432 
3433 	/* vn_rele() set the v_count == 1 */
3434 
3435 	cp->c_ipending = 1;
3436 
3437 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3438 	rp->cfs_cmd = CFS_IDLE;
3439 	rp->cfs_cr = cr;
3440 	crhold(rp->cfs_cr);
3441 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3442 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3443 
3444 #ifdef CFSDEBUG
3445 	CFS_DEBUG(CFSDEBUG_VOPS)
3446 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3447 #endif
3448 }
3449 
3450 /* ARGSUSED */
3451 static int
3452 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3453     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3454     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3455 
3456 {
3457 	int error = 0;
3458 	cnode_t *dcp = VTOC(dvp);
3459 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3460 	int held = 0;
3461 	int connected = 0;
3462 
3463 #ifdef CFSDEBUG
3464 	CFS_DEBUG(CFSDEBUG_VOPS)
3465 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3466 #endif
3467 
3468 	if (getzoneid() != GLOBAL_ZONEID) {
3469 		error = EPERM;
3470 		goto out;
3471 	}
3472 
3473 	/*
3474 	 * Cachefs only provides pass-through support for NFSv4,
3475 	 * and all vnode operations are passed through to the
3476 	 * back file system. For NFSv4 pass-through to work, only
3477 	 * connected operation is supported, the cnode backvp must
3478 	 * exist, and cachefs optional (eg., disconnectable) flags
3479 	 * are turned off. Assert these conditions to ensure that
3480 	 * the backfilesystem is called for the lookup operation.
3481 	 */
3482 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3483 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3484 
3485 	for (;;) {
3486 		/* get (or renew) access to the file system */
3487 		if (held) {
3488 			/* Won't loop with NFSv4 connected behavior */
3489 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3490 			cachefs_cd_release(fscp);
3491 			held = 0;
3492 		}
3493 		error = cachefs_cd_access(fscp, connected, 0);
3494 		if (error)
3495 			break;
3496 		held = 1;
3497 
3498 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3499 			flags, rdir, cr);
3500 		if (CFS_TIMEOUT(fscp, error)) {
3501 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3502 				cachefs_cd_release(fscp);
3503 				held = 0;
3504 				cachefs_cd_timedout(fscp);
3505 				connected = 0;
3506 				continue;
3507 			} else {
3508 				if (cachefs_cd_access_miss(fscp)) {
3509 					rw_enter(&dcp->c_rwlock, RW_READER);
3510 					error = cachefs_lookup_back(dvp, nm,
3511 					    vpp, cr);
3512 					rw_exit(&dcp->c_rwlock);
3513 					if (!CFS_TIMEOUT(fscp, error))
3514 						break;
3515 					delay(5*hz);
3516 					connected = 0;
3517 					continue;
3518 				}
3519 				connected = 1;
3520 				continue;
3521 			}
3522 		}
3523 		break;
3524 	}
3525 	if (held)
3526 		cachefs_cd_release(fscp);
3527 
3528 	if (error == 0 && IS_DEVVP(*vpp)) {
3529 		struct vnode *newvp;
3530 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3531 		VN_RELE(*vpp);
3532 		if (newvp == NULL) {
3533 			error = ENOSYS;
3534 		} else {
3535 			*vpp = newvp;
3536 		}
3537 	}
3538 
3539 #ifdef CFS_CD_DEBUG
3540 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3541 #endif
3542 out:
3543 #ifdef CFSDEBUG
3544 	CFS_DEBUG(CFSDEBUG_VOPS)
3545 		printf("cachefs_lookup: EXIT error = %d\n", error);
3546 #endif
3547 
3548 	return (error);
3549 }
3550 
3551 /* ARGSUSED */
3552 int
3553 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3554     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3555 {
3556 	int error = 0;
3557 	cnode_t *cp, *dcp = VTOC(dvp);
3558 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3559 	struct fid cookie;
3560 	u_offset_t d_offset;
3561 	struct cachefs_req *rp;
3562 	cfs_cid_t cid, dircid;
3563 	uint_t flag;
3564 	uint_t uncached = 0;
3565 
3566 	*vpp = NULL;
3567 
3568 	/*
3569 	 * If lookup is for "", just return dvp.  Don't need
3570 	 * to send it over the wire, look it up in the dnlc,
3571 	 * or perform any access checks.
3572 	 */
3573 	if (*nm == '\0') {
3574 		VN_HOLD(dvp);
3575 		*vpp = dvp;
3576 		return (0);
3577 	}
3578 
3579 	/* can't do lookups in non-directories */
3580 	if (dvp->v_type != VDIR)
3581 		return (ENOTDIR);
3582 
3583 	/* perform access check, also does consistency check if connected */
3584 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3585 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3586 	} else {
3587 		mutex_enter(&dcp->c_statelock);
3588 		error = cachefs_access_local(dcp, VEXEC, cr);
3589 		mutex_exit(&dcp->c_statelock);
3590 	}
3591 	if (error)
3592 		return (error);
3593 
3594 	/*
3595 	 * If lookup is for ".", just return dvp.  Don't need
3596 	 * to send it over the wire or look it up in the dnlc,
3597 	 * just need to check access.
3598 	 */
3599 	if (strcmp(nm, ".") == 0) {
3600 		VN_HOLD(dvp);
3601 		*vpp = dvp;
3602 		return (0);
3603 	}
3604 
3605 	/* check the dnlc */
3606 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3607 	if (*vpp)
3608 		return (0);
3609 
3610 	/* read lock the dir before starting the search */
3611 	rw_enter(&dcp->c_rwlock, RW_READER);
3612 
3613 	mutex_enter(&dcp->c_statelock);
3614 	dircid = dcp->c_id;
3615 
3616 	dcp->c_usage++;
3617 
3618 	/* if front file is not usable, lookup on the back fs */
3619 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3620 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3621 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3622 		mutex_exit(&dcp->c_statelock);
3623 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3624 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3625 		else
3626 			error = ETIMEDOUT;
3627 		goto out;
3628 	}
3629 
3630 	/* if the front file is not populated, try to populate it */
3631 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3632 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3633 			error = ETIMEDOUT;
3634 			mutex_exit(&dcp->c_statelock);
3635 			goto out;
3636 		}
3637 
3638 		if (cachefs_async_okay()) {
3639 			/* cannot populate if cache is not writable */
3640 			ASSERT((dcp->c_flags &
3641 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3642 			dcp->c_flags |= CN_ASYNC_POPULATE;
3643 
3644 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3645 			rp->cfs_cmd = CFS_POPULATE;
3646 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3647 			rp->cfs_cr = cr;
3648 
3649 			crhold(cr);
3650 			VN_HOLD(dvp);
3651 
3652 			cachefs_addqueue(rp, &fscp->fs_workq);
3653 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3654 			error = cachefs_dir_fill(dcp, cr);
3655 			if (error != 0) {
3656 				mutex_exit(&dcp->c_statelock);
3657 				goto out;
3658 			}
3659 		}
3660 		/* no populate if too many asyncs and we have to cache ACLs */
3661 
3662 		mutex_exit(&dcp->c_statelock);
3663 
3664 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3665 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3666 		else
3667 			error = ETIMEDOUT;
3668 		goto out;
3669 	}
3670 
3671 	/* by now we have a valid cached front file that we can search */
3672 
3673 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3674 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3675 	    &d_offset, &cid);
3676 	mutex_exit(&dcp->c_statelock);
3677 
3678 	if (error) {
3679 		/* if the entry does not have the fid, go get it */
3680 		if (error == EINVAL) {
3681 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3682 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3683 			else
3684 				error = ETIMEDOUT;
3685 		}
3686 
3687 		/* errors other than does not exist */
3688 		else if (error != ENOENT) {
3689 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3690 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3691 			else
3692 				error = ETIMEDOUT;
3693 		}
3694 		goto out;
3695 	}
3696 
3697 	/*
3698 	 * Else we found the entry in the cached directory.
3699 	 * Make a cnode for it.
3700 	 */
3701 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3702 	    cr, 0, &cp);
3703 	if (error == ESTALE) {
3704 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3705 		mutex_enter(&dcp->c_statelock);
3706 		cachefs_nocache(dcp);
3707 		mutex_exit(&dcp->c_statelock);
3708 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3709 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3710 			uncached = 1;
3711 		} else
3712 			error = ETIMEDOUT;
3713 	} else if (error == 0) {
3714 		*vpp = CTOV(cp);
3715 	}
3716 
3717 out:
3718 	if (error == 0) {
3719 		/* put the entry in the dnlc */
3720 		if (cachefs_dnlc)
3721 			dnlc_enter(dvp, nm, *vpp);
3722 
3723 		/* save the cid of the parent so can find the name */
3724 		cp = VTOC(*vpp);
3725 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3726 		    sizeof (cfs_cid_t)) != 0) {
3727 			mutex_enter(&cp->c_statelock);
3728 			cp->c_metadata.md_parent = dircid;
3729 			cp->c_flags |= CN_UPDATED;
3730 			mutex_exit(&cp->c_statelock);
3731 		}
3732 	}
3733 
3734 	rw_exit(&dcp->c_rwlock);
3735 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3736 		(void) cachefs_pack_common(dvp, cr);
3737 	return (error);
3738 }
3739 
3740 /*
3741  * Called from cachefs_lookup_common when the back file system needs to be
3742  * examined to perform the lookup.
3743  */
3744 static int
3745 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3746     cred_t *cr)
3747 {
3748 	int error = 0;
3749 	cnode_t *cp, *dcp = VTOC(dvp);
3750 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3751 	vnode_t *backvp = NULL;
3752 	struct vattr va;
3753 	struct fid cookie;
3754 	cfs_cid_t cid;
3755 	uint32_t valid_fid;
3756 
3757 	mutex_enter(&dcp->c_statelock);
3758 
3759 	/* do a lookup on the back FS to get the back vnode */
3760 	if (dcp->c_backvp == NULL) {
3761 		error = cachefs_getbackvp(fscp, dcp);
3762 		if (error)
3763 			goto out;
3764 	}
3765 
3766 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3767 	    ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3768 	    dcp, dcp->c_backvp, nm));
3769 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3770 	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3771 	if (error)
3772 		goto out;
3773 	if (IS_DEVVP(backvp)) {
3774 		struct vnode *devvp = backvp;
3775 
3776 		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3777 			VN_HOLD(backvp);
3778 			VN_RELE(devvp);
3779 		}
3780 	}
3781 
3782 	/* get the fid and attrs from the back fs */
3783 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3784 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3785 	if (error)
3786 		goto out;
3787 
3788 	cid.cid_fileno = va.va_nodeid;
3789 	cid.cid_flags = 0;
3790 
3791 #if 0
3792 	/* XXX bob: this is probably no longer necessary */
3793 	/* if the directory entry was incomplete, we can complete it now */
3794 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3795 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3796 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3797 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3798 	}
3799 #endif
3800 
3801 out:
3802 	mutex_exit(&dcp->c_statelock);
3803 
3804 	/* create the cnode */
3805 	if (error == 0) {
3806 		error = cachefs_cnode_make(&cid, fscp,
3807 		    (valid_fid ? &cookie : NULL),
3808 		    &va, backvp, cr, 0, &cp);
3809 		if (error == 0) {
3810 			*vpp = CTOV(cp);
3811 		}
3812 	}
3813 
3814 	if (backvp)
3815 		VN_RELE(backvp);
3816 
3817 	return (error);
3818 }
3819 
3820 /*ARGSUSED7*/
3821 static int
3822 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3823     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3824     caller_context_t *ct, vsecattr_t *vsecp)
3825 
3826 {
3827 	cnode_t *dcp = VTOC(dvp);
3828 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3829 	cachefscache_t *cachep = fscp->fs_cache;
3830 	int error;
3831 	int connected = 0;
3832 	int held = 0;
3833 
3834 #ifdef CFSDEBUG
3835 	CFS_DEBUG(CFSDEBUG_VOPS)
3836 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3837 		    (void *)dvp, nm);
3838 #endif
3839 	if (getzoneid() != GLOBAL_ZONEID) {
3840 		error = EPERM;
3841 		goto out;
3842 	}
3843 
3844 	/*
3845 	 * Cachefs only provides pass-through support for NFSv4,
3846 	 * and all vnode operations are passed through to the
3847 	 * back file system. For NFSv4 pass-through to work, only
3848 	 * connected operation is supported, the cnode backvp must
3849 	 * exist, and cachefs optional (eg., disconnectable) flags
3850 	 * are turned off. Assert these conditions to ensure that
3851 	 * the backfilesystem is called for the create operation.
3852 	 */
3853 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3854 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3855 
3856 	for (;;) {
3857 		/* get (or renew) access to the file system */
3858 		if (held) {
3859 			/* Won't loop with NFSv4 connected behavior */
3860 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3861 			cachefs_cd_release(fscp);
3862 			held = 0;
3863 		}
3864 		error = cachefs_cd_access(fscp, connected, 1);
3865 		if (error)
3866 			break;
3867 		held = 1;
3868 
3869 		/*
3870 		 * if we are connected, perform the remote portion of the
3871 		 * create.
3872 		 */
3873 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3874 			error = cachefs_create_connected(dvp, nm, vap,
3875 			    exclusive, mode, vpp, cr);
3876 			if (CFS_TIMEOUT(fscp, error)) {
3877 				cachefs_cd_release(fscp);
3878 				held = 0;
3879 				cachefs_cd_timedout(fscp);
3880 				connected = 0;
3881 				continue;
3882 			} else if (error) {
3883 				break;
3884 			}
3885 		}
3886 
3887 		/* else we must be disconnected */
3888 		else {
3889 			error = cachefs_create_disconnected(dvp, nm, vap,
3890 			    exclusive, mode, vpp, cr);
3891 			if (CFS_TIMEOUT(fscp, error)) {
3892 				connected = 1;
3893 				continue;
3894 			} else if (error) {
3895 				break;
3896 			}
3897 		}
3898 		break;
3899 	}
3900 
3901 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3902 		fid_t *fidp = NULL;
3903 		ino64_t fileno = 0;
3904 		cnode_t *cp = NULL;
3905 		if (error == 0)
3906 			cp = VTOC(*vpp);
3907 
3908 		if (cp != NULL) {
3909 			fidp = &cp->c_metadata.md_cookie;
3910 			fileno = cp->c_id.cid_fileno;
3911 		}
3912 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3913 		    fidp, fileno, crgetuid(cr));
3914 	}
3915 
3916 	if (held)
3917 		cachefs_cd_release(fscp);
3918 
3919 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3920 		(void) cachefs_pack(dvp, nm, cr);
3921 	if (error == 0 && IS_DEVVP(*vpp)) {
3922 		struct vnode *spcvp;
3923 
3924 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3925 		VN_RELE(*vpp);
3926 		if (spcvp == NULL) {
3927 			error = ENOSYS;
3928 		} else {
3929 			*vpp = spcvp;
3930 		}
3931 	}
3932 
3933 #ifdef CFS_CD_DEBUG
3934 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3935 #endif
3936 out:
3937 #ifdef CFSDEBUG
3938 	CFS_DEBUG(CFSDEBUG_VOPS)
3939 		printf("cachefs_create: EXIT error %d\n", error);
3940 #endif
3941 	return (error);
3942 }
3943 
3944 
3945 static int
3946 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3947     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3948 {
3949 	cnode_t *dcp = VTOC(dvp);
3950 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3951 	int error;
3952 	vnode_t *tvp = NULL;
3953 	vnode_t *devvp;
3954 	fid_t cookie;
3955 	vattr_t va;
3956 	cnode_t *ncp;
3957 	cfs_cid_t cid;
3958 	vnode_t *vp;
3959 	uint32_t valid_fid;
3960 
3961 	/* special case if file already exists */
3962 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3963 	if (CFS_TIMEOUT(fscp, error))
3964 		return (error);
3965 	if (error == 0) {
3966 		if (exclusive == EXCL)
3967 			error = EEXIST;
3968 		else if (vp->v_type == VDIR && (mode & VWRITE))
3969 			error = EISDIR;
3970 		else if ((error =
3971 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3972 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3973 				vap->va_mask = AT_SIZE;
3974 				error = cachefs_setattr_common(vp, vap, 0,
3975 				    cr, NULL);
3976 			}
3977 		}
3978 		if (error) {
3979 			VN_RELE(vp);
3980 		} else
3981 			*vpp = vp;
3982 		return (error);
3983 	}
3984 
3985 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3986 	mutex_enter(&dcp->c_statelock);
3987 
3988 	/* consistency check the directory */
3989 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3990 	if (error) {
3991 		mutex_exit(&dcp->c_statelock);
3992 		goto out;
3993 	}
3994 
3995 	/* get the backvp if necessary */
3996 	if (dcp->c_backvp == NULL) {
3997 		error = cachefs_getbackvp(fscp, dcp);
3998 		if (error) {
3999 			mutex_exit(&dcp->c_statelock);
4000 			goto out;
4001 		}
4002 	}
4003 
4004 	/* create the file on the back fs */
4005 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4006 	    ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4007 	    "name %s\n", dcp, dcp->c_backvp, nm));
4008 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4009 	    &devvp, cr, 0, NULL, NULL);
4010 	mutex_exit(&dcp->c_statelock);
4011 	if (error)
4012 		goto out;
4013 	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4014 		VN_HOLD(tvp);
4015 		VN_RELE(devvp);
4016 	} else {
4017 		tvp = devvp;
4018 	}
4019 
4020 	/* get the fid and attrs from the back fs */
4021 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4022 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4023 	if (error)
4024 		goto out;
4025 
4026 	/* make the cnode */
4027 	cid.cid_fileno = va.va_nodeid;
4028 	cid.cid_flags = 0;
4029 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4030 	    &va, tvp, cr, 0, &ncp);
4031 	if (error)
4032 		goto out;
4033 
4034 	*vpp = CTOV(ncp);
4035 
4036 	/* enter it in the parent directory */
4037 	mutex_enter(&dcp->c_statelock);
4038 	if (CFS_ISFS_NONSHARED(fscp) &&
4039 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4040 		/* see if entry already exists */
4041 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4042 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4043 		if (error == ENOENT) {
4044 			/* entry, does not exist, add the new file */
4045 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4046 			    &ncp->c_id, SM_ASYNC);
4047 			if (error) {
4048 				cachefs_nocache(dcp);
4049 				error = 0;
4050 			}
4051 			/* XXX should this be done elsewhere, too? */
4052 			dnlc_enter(dvp, nm, *vpp);
4053 		} else {
4054 			/* entry exists or some other problem */
4055 			cachefs_nocache(dcp);
4056 			error = 0;
4057 		}
4058 	}
4059 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4060 	mutex_exit(&dcp->c_statelock);
4061 
4062 out:
4063 	rw_exit(&dcp->c_rwlock);
4064 	if (tvp)
4065 		VN_RELE(tvp);
4066 
4067 	return (error);
4068 }
4069 
4070 static int
4071 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4072 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4073 {
4074 	cnode_t *dcp = VTOC(dvp);
4075 	cnode_t *cp;
4076 	cnode_t *ncp = NULL;
4077 	vnode_t *vp;
4078 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4079 	int error = 0;
4080 	struct vattr va;
4081 	timestruc_t current_time;
4082 	off_t commit = 0;
4083 	fid_t cookie;
4084 	cfs_cid_t cid;
4085 
4086 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4087 	mutex_enter(&dcp->c_statelock);
4088 
4089 	/* give up if the directory is not populated */
4090 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4091 		mutex_exit(&dcp->c_statelock);
4092 		rw_exit(&dcp->c_rwlock);
4093 		return (ETIMEDOUT);
4094 	}
4095 
4096 	/* special case if file already exists */
4097 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4098 	if (error == EINVAL) {
4099 		mutex_exit(&dcp->c_statelock);
4100 		rw_exit(&dcp->c_rwlock);
4101 		return (ETIMEDOUT);
4102 	}
4103 	if (error == 0) {
4104 		mutex_exit(&dcp->c_statelock);
4105 		rw_exit(&dcp->c_rwlock);
4106 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4107 		    cr, 0, &cp);
4108 		if (error) {
4109 			return (error);
4110 		}
4111 		vp = CTOV(cp);
4112 
4113 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4114 			error = ETIMEDOUT;
4115 		else if (exclusive == EXCL)
4116 			error = EEXIST;
4117 		else if (vp->v_type == VDIR && (mode & VWRITE))
4118 			error = EISDIR;
4119 		else {
4120 			mutex_enter(&cp->c_statelock);
4121 			error = cachefs_access_local(cp, mode, cr);
4122 			mutex_exit(&cp->c_statelock);
4123 			if (!error) {
4124 				if ((vap->va_mask & AT_SIZE) &&
4125 				    (vp->v_type == VREG)) {
4126 					vap->va_mask = AT_SIZE;
4127 					error = cachefs_setattr_common(vp,
4128 					    vap, 0, cr, NULL);
4129 				}
4130 			}
4131 		}
4132 		if (error) {
4133 			VN_RELE(vp);
4134 		} else
4135 			*vpp = vp;
4136 		return (error);
4137 	}
4138 
4139 	/* give up if cannot modify the cache */
4140 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4141 		mutex_exit(&dcp->c_statelock);
4142 		error = ETIMEDOUT;
4143 		goto out;
4144 	}
4145 
4146 	/* check access */
4147 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4148 		mutex_exit(&dcp->c_statelock);
4149 		goto out;
4150 	}
4151 
4152 	/* mark dir as modified */
4153 	cachefs_modified(dcp);
4154 	mutex_exit(&dcp->c_statelock);
4155 
4156 	/* must be privileged to set sticky bit */
4157 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4158 		vap->va_mode &= ~VSVTX;
4159 
4160 	/* make up a reasonable set of attributes */
4161 	cachefs_attr_setup(vap, &va, dcp, cr);
4162 
4163 	/* create the cnode */
4164 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4165 	if (error)
4166 		goto out;
4167 
4168 	mutex_enter(&ncp->c_statelock);
4169 
4170 	/* get the front file now instead of later */
4171 	if (vap->va_type == VREG) {
4172 		error = cachefs_getfrontfile(ncp);
4173 		if (error) {
4174 			mutex_exit(&ncp->c_statelock);
4175 			goto out;
4176 		}
4177 		ASSERT(ncp->c_frontvp != NULL);
4178 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4179 		ncp->c_metadata.md_flags |= MD_POPULATED;
4180 	} else {
4181 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4182 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4183 			(void) filegrp_allocattr(ncp->c_filegrp);
4184 		}
4185 		error = filegrp_create_metadata(ncp->c_filegrp,
4186 		    &ncp->c_metadata, &ncp->c_id);
4187 		if (error) {
4188 			mutex_exit(&ncp->c_statelock);
4189 			goto out;
4190 		}
4191 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4192 	}
4193 	mutex_enter(&dcp->c_statelock);
4194 	cachefs_creategid(dcp, ncp, vap, cr);
4195 	cachefs_createacl(dcp, ncp);
4196 	mutex_exit(&dcp->c_statelock);
4197 
4198 	/* set times on the file */
4199 	gethrestime(&current_time);
4200 	ncp->c_metadata.md_vattr.va_atime = current_time;
4201 	ncp->c_metadata.md_localctime = current_time;
4202 	ncp->c_metadata.md_localmtime = current_time;
4203 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4204 
4205 	/* reserve space for the daemon cid mapping */
4206 	error = cachefs_dlog_cidmap(fscp);
4207 	if (error) {
4208 		mutex_exit(&ncp->c_statelock);
4209 		goto out;
4210 	}
4211 	ncp->c_metadata.md_flags |= MD_MAPPING;
4212 
4213 	/* mark the new file as modified */
4214 	if (cachefs_modified_alloc(ncp)) {
4215 		mutex_exit(&ncp->c_statelock);
4216 		error = ENOSPC;
4217 		goto out;
4218 	}
4219 	ncp->c_flags |= CN_UPDATED;
4220 
4221 	/*
4222 	 * write the metadata now rather than waiting until
4223 	 * inactive so that if there's no space we can let
4224 	 * the caller know.
4225 	 */
4226 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4227 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4228 	error = filegrp_write_metadata(ncp->c_filegrp,
4229 	    &ncp->c_id, &ncp->c_metadata);
4230 	if (error) {
4231 		mutex_exit(&ncp->c_statelock);
4232 		goto out;
4233 	}
4234 
4235 	/* log the operation */
4236 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4237 	    mode, ncp, 0, cr);
4238 	if (commit == 0) {
4239 		mutex_exit(&ncp->c_statelock);
4240 		error = ENOSPC;
4241 		goto out;
4242 	}
4243 
4244 	mutex_exit(&ncp->c_statelock);
4245 
4246 	mutex_enter(&dcp->c_statelock);
4247 
4248 	/* update parent dir times */
4249 	dcp->c_metadata.md_localmtime = current_time;
4250 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4251 	dcp->c_flags |= CN_UPDATED;
4252 
4253 	/* enter new file name in the parent directory */
4254 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4255 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4256 		    &ncp->c_id, 0);
4257 		if (error) {
4258 			cachefs_nocache(dcp);
4259 			mutex_exit(&dcp->c_statelock);
4260 			error = ETIMEDOUT;
4261 			goto out;
4262 		}
4263 		dnlc_enter(dvp, nm, CTOV(ncp));
4264 	} else {
4265 		mutex_exit(&dcp->c_statelock);
4266 		error = ETIMEDOUT;
4267 		goto out;
4268 	}
4269 	mutex_exit(&dcp->c_statelock);
4270 
4271 out:
4272 	rw_exit(&dcp->c_rwlock);
4273 
4274 	if (commit) {
4275 		if (cachefs_dlog_commit(fscp, commit, error)) {
4276 			/*EMPTY*/
4277 			/* XXX bob: fix on panic */
4278 		}
4279 	}
4280 	if (error) {
4281 		/* destroy the cnode we created */
4282 		if (ncp) {
4283 			mutex_enter(&ncp->c_statelock);
4284 			ncp->c_flags |= CN_DESTROY;
4285 			mutex_exit(&ncp->c_statelock);
4286 			VN_RELE(CTOV(ncp));
4287 		}
4288 	} else {
4289 		*vpp = CTOV(ncp);
4290 	}
4291 	return (error);
4292 }
4293 
4294 /*ARGSUSED*/
4295 static int
4296 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4297     int flags)
4298 {
4299 	cnode_t *dcp = VTOC(dvp);
4300 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4301 	cachefscache_t *cachep = fscp->fs_cache;
4302 	int error = 0;
4303 	int held = 0;
4304 	int connected = 0;
4305 	size_t namlen;
4306 	vnode_t *vp = NULL;
4307 	int vfslock = 0;
4308 
4309 #ifdef CFSDEBUG
4310 	CFS_DEBUG(CFSDEBUG_VOPS)
4311 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4312 		    (void *)dvp, nm);
4313 #endif
4314 	if (getzoneid() != GLOBAL_ZONEID) {
4315 		error = EPERM;
4316 		goto out;
4317 	}
4318 
4319 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4320 		ASSERT(dcp->c_flags & CN_NOCACHE);
4321 
4322 	/*
4323 	 * Cachefs only provides pass-through support for NFSv4,
4324 	 * and all vnode operations are passed through to the
4325 	 * back file system. For NFSv4 pass-through to work, only
4326 	 * connected operation is supported, the cnode backvp must
4327 	 * exist, and cachefs optional (eg., disconnectable) flags
4328 	 * are turned off. Assert these conditions to ensure that
4329 	 * the backfilesystem is called for the remove operation.
4330 	 */
4331 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4332 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4333 
4334 	for (;;) {
4335 		if (vfslock) {
4336 			vn_vfsunlock(vp);
4337 			vfslock = 0;
4338 		}
4339 		if (vp) {
4340 			VN_RELE(vp);
4341 			vp = NULL;
4342 		}
4343 
4344 		/* get (or renew) access to the file system */
4345 		if (held) {
4346 			/* Won't loop with NFSv4 connected behavior */
4347 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4348 			cachefs_cd_release(fscp);
4349 			held = 0;
4350 		}
4351 		error = cachefs_cd_access(fscp, connected, 1);
4352 		if (error)
4353 			break;
4354 		held = 1;
4355 
4356 		/* if disconnected, do some extra error checking */
4357 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4358 			/* check permissions */
4359 			mutex_enter(&dcp->c_statelock);
4360 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4361 			mutex_exit(&dcp->c_statelock);
4362 			if (CFS_TIMEOUT(fscp, error)) {
4363 				connected = 1;
4364 				continue;
4365 			}
4366 			if (error)
4367 				break;
4368 
4369 			namlen = strlen(nm);
4370 			if (namlen == 0) {
4371 				error = EINVAL;
4372 				break;
4373 			}
4374 
4375 			/* cannot remove . and .. */
4376 			if (nm[0] == '.') {
4377 				if (namlen == 1) {
4378 					error = EINVAL;
4379 					break;
4380 				} else if (namlen == 2 && nm[1] == '.') {
4381 					error = EEXIST;
4382 					break;
4383 				}
4384 			}
4385 
4386 		}
4387 
4388 		/* get the cnode of the file to delete */
4389 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4390 		if (error) {
4391 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4392 				if (CFS_TIMEOUT(fscp, error)) {
4393 					cachefs_cd_release(fscp);
4394 					held = 0;
4395 					cachefs_cd_timedout(fscp);
4396 					connected = 0;
4397 					continue;
4398 				}
4399 			} else {
4400 				if (CFS_TIMEOUT(fscp, error)) {
4401 					connected = 1;
4402 					continue;
4403 				}
4404 			}
4405 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4406 				struct fid foo;
4407 
4408 				bzero(&foo, sizeof (foo));
4409 				cachefs_log_remove(cachep, error,
4410 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4411 			}
4412 			break;
4413 		}
4414 
4415 		if (vp->v_type == VDIR) {
4416 			/* must be privileged to remove dirs with unlink() */
4417 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4418 				break;
4419 
4420 			/* see ufs_dirremove for why this is done, mount race */
4421 			if (vn_vfswlock(vp)) {
4422 				error = EBUSY;
4423 				break;
4424 			}
4425 			vfslock = 1;
4426 			if (vn_mountedvfs(vp) != NULL) {
4427 				error = EBUSY;
4428 				break;
4429 			}
4430 		}
4431 
4432 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4433 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4434 			if (CFS_TIMEOUT(fscp, error)) {
4435 				cachefs_cd_release(fscp);
4436 				held = 0;
4437 				cachefs_cd_timedout(fscp);
4438 				connected = 0;
4439 				continue;
4440 			}
4441 		} else {
4442 			error = cachefs_remove_disconnected(dvp, nm, cr,
4443 			    vp);
4444 			if (CFS_TIMEOUT(fscp, error)) {
4445 				connected = 1;
4446 				continue;
4447 			}
4448 		}
4449 		break;
4450 	}
4451 
4452 #if 0
4453 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4454 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4455 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4456 		    crgetuid(cr));
4457 #endif
4458 
4459 	if (held)
4460 		cachefs_cd_release(fscp);
4461 
4462 	if (vfslock)
4463 		vn_vfsunlock(vp);
4464 
4465 	if (vp)
4466 		VN_RELE(vp);
4467 
4468 #ifdef CFS_CD_DEBUG
4469 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4470 #endif
4471 out:
4472 #ifdef CFSDEBUG
4473 	CFS_DEBUG(CFSDEBUG_VOPS)
4474 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4475 #endif
4476 
4477 	return (error);
4478 }
4479 
4480 int
4481 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4482 {
4483 	cnode_t *dcp = VTOC(dvp);
4484 	cnode_t *cp = VTOC(vp);
4485 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4486 	int error = 0;
4487 
4488 	/*
4489 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4490 	 * activity on the directory.
4491 	 */
4492 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4493 
4494 	/* purge dnlc of this entry so can get accurate vnode count */
4495 	dnlc_purge_vp(vp);
4496 
4497 	/*
4498 	 * If the cnode is active, make a link to the file
4499 	 * so operations on the file will continue.
4500 	 */
4501 	if ((vp->v_type != VDIR) &&
4502 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4503 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4504 		if (error)
4505 			goto out;
4506 	}
4507 
4508 	/* else call backfs NFSv4 handler if NFSv4 */
4509 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4510 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4511 		goto out;
4512 	}
4513 
4514 	/* else drop the backvp so nfs does not do rename */
4515 	else if (cp->c_backvp) {
4516 		mutex_enter(&cp->c_statelock);
4517 		if (cp->c_backvp) {
4518 			VN_RELE(cp->c_backvp);
4519 			cp->c_backvp = NULL;
4520 		}
4521 		mutex_exit(&cp->c_statelock);
4522 	}
4523 
4524 	mutex_enter(&dcp->c_statelock);
4525 
4526 	/* get the backvp */
4527 	if (dcp->c_backvp == NULL) {
4528 		error = cachefs_getbackvp(fscp, dcp);
4529 		if (error) {
4530 			mutex_exit(&dcp->c_statelock);
4531 			goto out;
4532 		}
4533 	}
4534 
4535 	/* check directory consistency */
4536 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4537 	if (error) {
4538 		mutex_exit(&dcp->c_statelock);
4539 		goto out;
4540 	}
4541 
4542 	/* perform the remove on the back fs */
4543 	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4544 	if (error) {
4545 		mutex_exit(&dcp->c_statelock);
4546 		goto out;
4547 	}
4548 
4549 	/* the dir has been modified */
4550 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4551 
4552 	/* remove the entry from the populated directory */
4553 	if (CFS_ISFS_NONSHARED(fscp) &&
4554 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4555 		error = cachefs_dir_rmentry(dcp, nm);
4556 		if (error) {
4557 			cachefs_nocache(dcp);
4558 			error = 0;
4559 		}
4560 	}
4561 	mutex_exit(&dcp->c_statelock);
4562 
4563 	/* fix up the file we deleted */
4564 	mutex_enter(&cp->c_statelock);
4565 	if (cp->c_attr.va_nlink == 1)
4566 		cp->c_flags |= CN_DESTROY;
4567 	else
4568 		cp->c_flags |= CN_UPDATED;
4569 
4570 	cp->c_attr.va_nlink--;
4571 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4572 	mutex_exit(&cp->c_statelock);
4573 
4574 out:
4575 	rw_exit(&dcp->c_rwlock);
4576 	return (error);
4577 }
4578 
4579 /*
4580  * cachefs_remove_backfs_nfsv4
4581  *
4582  * Call NFSv4 back filesystem to handle the remove (cachefs
4583  * pass-through support for NFSv4).
4584  */
4585 int
4586 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4587 {
4588 	cnode_t *dcp = VTOC(dvp);
4589 	cnode_t *cp = VTOC(vp);
4590 	vnode_t *dbackvp;
4591 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4592 	int error = 0;
4593 
4594 	/*
4595 	 * For NFSv4 pass-through to work, only connected operation
4596 	 * is supported, the cnode backvp must exist, and cachefs
4597 	 * optional (eg., disconnectable) flags are turned off. Assert
4598 	 * these conditions for the getattr operation.
4599 	 */
4600 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4601 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4602 
4603 	/* Should hold the directory readwrite lock to update directory */
4604 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4605 
4606 	/*
4607 	 * Update attributes for directory. Note that
4608 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4609 	 * held, so grab it before calling the routine.
4610 	 */
4611 	mutex_enter(&dcp->c_statelock);
4612 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4613 	mutex_exit(&dcp->c_statelock);
4614 	if (error)
4615 		goto out;
4616 
4617 	/*
4618 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4619 	 * asserts for c_statelock being held, so grab it before
4620 	 * calling the routine.
4621 	 */
4622 	mutex_enter(&cp->c_statelock);
4623 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4624 	if (error) {
4625 		mutex_exit(&cp->c_statelock);
4626 		goto out;
4627 	}
4628 
4629 	/*
4630 	 * Drop the backvp so nfs if the link count is 1 so that
4631 	 * nfs does not do rename. Ensure that we will destroy the cnode
4632 	 * since this cnode no longer contains the backvp. Note that we
4633 	 * maintain lock on this cnode to prevent change till the remove
4634 	 * completes, otherwise other operations will encounter an ESTALE
4635 	 * if they try to use the cnode with CN_DESTROY set (see
4636 	 * cachefs_get_backvp()), or change the state of the cnode
4637 	 * while we're removing it.
4638 	 */
4639 	if (cp->c_attr.va_nlink == 1) {
4640 		/*
4641 		 * The unldvp information is created for the case
4642 		 * when there is more than one reference on the
4643 		 * vnode when a remove operation is called. If the
4644 		 * remove itself was holding a reference to the
4645 		 * vnode, then a subsequent remove will remove the
4646 		 * backvp, so we need to get rid of the unldvp
4647 		 * before removing the backvp. An alternate would
4648 		 * be to simply ignore the remove and let the
4649 		 * inactivation routine do the deletion of the
4650 		 * unldvp.
4651 		 */
4652 		if (cp->c_unldvp) {
4653 			VN_RELE(cp->c_unldvp);
4654 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4655 			crfree(cp->c_unlcred);
4656 			cp->c_unldvp = NULL;
4657 			cp->c_unlcred = NULL;
4658 		}
4659 		cp->c_flags |= CN_DESTROY;
4660 		cp->c_attr.va_nlink = 0;
4661 		VN_RELE(cp->c_backvp);
4662 		cp->c_backvp = NULL;
4663 	}
4664 
4665 	/* perform the remove on back fs after extracting directory backvp */
4666 	mutex_enter(&dcp->c_statelock);
4667 	dbackvp = dcp->c_backvp;
4668 	mutex_exit(&dcp->c_statelock);
4669 
4670 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4671 	    ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4672 	    dcp, dbackvp, nm));
4673 	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4674 	if (error) {
4675 		mutex_exit(&cp->c_statelock);
4676 		goto out;
4677 	}
4678 
4679 	/* fix up the file we deleted, if not destroying the cnode */
4680 	if ((cp->c_flags & CN_DESTROY) == 0) {
4681 		cp->c_attr.va_nlink--;
4682 		cp->c_flags |= CN_UPDATED;
4683 	}
4684 
4685 	mutex_exit(&cp->c_statelock);
4686 
4687 out:
4688 	return (error);
4689 }
4690 
4691 int
4692 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4693     vnode_t *vp)
4694 {
4695 	cnode_t *dcp = VTOC(dvp);
4696 	cnode_t *cp = VTOC(vp);
4697 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4698 	int error = 0;
4699 	off_t commit = 0;
4700 	timestruc_t current_time;
4701 
4702 	if (CFS_ISFS_WRITE_AROUND(fscp))
4703 		return (ETIMEDOUT);
4704 
4705 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4706 		return (ETIMEDOUT);
4707 
4708 	/*
4709 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4710 	 * activity on the directory.
4711 	 */
4712 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4713 
4714 	/* dir must be populated */
4715 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4716 		error = ETIMEDOUT;
4717 		goto out;
4718 	}
4719 
4720 	mutex_enter(&dcp->c_statelock);
4721 	mutex_enter(&cp->c_statelock);
4722 
4723 	error = cachefs_stickyrmchk(dcp, cp, cr);
4724 
4725 	mutex_exit(&cp->c_statelock);
4726 	mutex_exit(&dcp->c_statelock);
4727 	if (error)
4728 		goto out;
4729 
4730 	/* purge dnlc of this entry so can get accurate vnode count */
4731 	dnlc_purge_vp(vp);
4732 
4733 	/*
4734 	 * If the cnode is active, make a link to the file
4735 	 * so operations on the file will continue.
4736 	 */
4737 	if ((vp->v_type != VDIR) &&
4738 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4739 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4740 		if (error)
4741 			goto out;
4742 	}
4743 
4744 	if (cp->c_attr.va_nlink > 1) {
4745 		mutex_enter(&cp->c_statelock);
4746 		if (cachefs_modified_alloc(cp)) {
4747 			mutex_exit(&cp->c_statelock);
4748 			error = ENOSPC;
4749 			goto out;
4750 		}
4751 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4752 			error = cachefs_dlog_cidmap(fscp);
4753 			if (error) {
4754 				mutex_exit(&cp->c_statelock);
4755 				error = ENOSPC;
4756 				goto out;
4757 			}
4758 			cp->c_metadata.md_flags |= MD_MAPPING;
4759 			cp->c_flags |= CN_UPDATED;
4760 		}
4761 		mutex_exit(&cp->c_statelock);
4762 	}
4763 
4764 	/* log the remove */
4765 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4766 	if (commit == 0) {
4767 		error = ENOSPC;
4768 		goto out;
4769 	}
4770 
4771 	/* remove the file from the dir */
4772 	mutex_enter(&dcp->c_statelock);
4773 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4774 		mutex_exit(&dcp->c_statelock);
4775 		error = ETIMEDOUT;
4776 		goto out;
4777 
4778 	}
4779 	cachefs_modified(dcp);
4780 	error = cachefs_dir_rmentry(dcp, nm);
4781 	if (error) {
4782 		mutex_exit(&dcp->c_statelock);
4783 		if (error == ENOTDIR)
4784 			error = ETIMEDOUT;
4785 		goto out;
4786 	}
4787 
4788 	/* update parent dir times */
4789 	gethrestime(&current_time);
4790 	dcp->c_metadata.md_localctime = current_time;
4791 	dcp->c_metadata.md_localmtime = current_time;
4792 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4793 	dcp->c_flags |= CN_UPDATED;
4794 	mutex_exit(&dcp->c_statelock);
4795 
4796 	/* adjust file we are deleting */
4797 	mutex_enter(&cp->c_statelock);
4798 	cp->c_attr.va_nlink--;
4799 	cp->c_metadata.md_localctime = current_time;
4800 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4801 	if (cp->c_attr.va_nlink == 0) {
4802 		cp->c_flags |= CN_DESTROY;
4803 	} else {
4804 		cp->c_flags |= CN_UPDATED;
4805 	}
4806 	mutex_exit(&cp->c_statelock);
4807 
4808 out:
4809 	if (commit) {
4810 		/* commit the log entry */
4811 		if (cachefs_dlog_commit(fscp, commit, error)) {
4812 			/*EMPTY*/
4813 			/* XXX bob: fix on panic */
4814 		}
4815 	}
4816 
4817 	rw_exit(&dcp->c_rwlock);
4818 	return (error);
4819 }
4820 
4821 /*ARGSUSED*/
4822 static int
4823 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4824     caller_context_t *ct, int flags)
4825 {
4826 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4827 	cnode_t *tdcp = VTOC(tdvp);
4828 	struct vnode *realvp;
4829 	int error = 0;
4830 	int held = 0;
4831 	int connected = 0;
4832 
4833 #ifdef CFSDEBUG
4834 	CFS_DEBUG(CFSDEBUG_VOPS)
4835 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4836 		    (void *)fvp, (void *)tdvp, tnm);
4837 #endif
4838 
4839 	if (getzoneid() != GLOBAL_ZONEID) {
4840 		error = EPERM;
4841 		goto out;
4842 	}
4843 
4844 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4845 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4846 
4847 	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4848 		fvp = realvp;
4849 	}
4850 
4851 	/*
4852 	 * Cachefs only provides pass-through support for NFSv4,
4853 	 * and all vnode operations are passed through to the
4854 	 * back file system. For NFSv4 pass-through to work, only
4855 	 * connected operation is supported, the cnode backvp must
4856 	 * exist, and cachefs optional (eg., disconnectable) flags
4857 	 * are turned off. Assert these conditions to ensure that
4858 	 * the backfilesystem is called for the link operation.
4859 	 */
4860 
4861 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4862 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4863 
4864 	for (;;) {
4865 		/* get (or renew) access to the file system */
4866 		if (held) {
4867 			/* Won't loop with NFSv4 connected behavior */
4868 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4869 			rw_exit(&tdcp->c_rwlock);
4870 			cachefs_cd_release(fscp);
4871 			held = 0;
4872 		}
4873 		error = cachefs_cd_access(fscp, connected, 1);
4874 		if (error)
4875 			break;
4876 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4877 		held = 1;
4878 
4879 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4880 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4881 			if (CFS_TIMEOUT(fscp, error)) {
4882 				rw_exit(&tdcp->c_rwlock);
4883 				cachefs_cd_release(fscp);
4884 				held = 0;
4885 				cachefs_cd_timedout(fscp);
4886 				connected = 0;
4887 				continue;
4888 			}
4889 		} else {
4890 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4891 			    cr);
4892 			if (CFS_TIMEOUT(fscp, error)) {
4893 				connected = 1;
4894 				continue;
4895 			}
4896 		}
4897 		break;
4898 	}
4899 
4900 	if (held) {
4901 		rw_exit(&tdcp->c_rwlock);
4902 		cachefs_cd_release(fscp);
4903 	}
4904 
4905 #ifdef CFS_CD_DEBUG
4906 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4907 #endif
4908 out:
4909 #ifdef CFSDEBUG
4910 	CFS_DEBUG(CFSDEBUG_VOPS)
4911 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4912 		    (void *)fvp, (void *)tdvp, tnm);
4913 #endif
4914 	return (error);
4915 }
4916 
4917 static int
4918 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4919 {
4920 	cnode_t *tdcp = VTOC(tdvp);
4921 	cnode_t *fcp = VTOC(fvp);
4922 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4923 	int error = 0;
4924 	vnode_t *backvp = NULL;
4925 
4926 	if (tdcp != fcp) {
4927 		mutex_enter(&fcp->c_statelock);
4928 
4929 		if (fcp->c_backvp == NULL) {
4930 			error = cachefs_getbackvp(fscp, fcp);
4931 			if (error) {
4932 				mutex_exit(&fcp->c_statelock);
4933 				goto out;
4934 			}
4935 		}
4936 
4937 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4938 		if (error) {
4939 			mutex_exit(&fcp->c_statelock);
4940 			goto out;
4941 		}
4942 		backvp = fcp->c_backvp;
4943 		VN_HOLD(backvp);
4944 		mutex_exit(&fcp->c_statelock);
4945 	}
4946 
4947 	mutex_enter(&tdcp->c_statelock);
4948 
4949 	/* get backvp of target directory */
4950 	if (tdcp->c_backvp == NULL) {
4951 		error = cachefs_getbackvp(fscp, tdcp);
4952 		if (error) {
4953 			mutex_exit(&tdcp->c_statelock);
4954 			goto out;
4955 		}
4956 	}
4957 
4958 	/* consistency check target directory */
4959 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4960 	if (error) {
4961 		mutex_exit(&tdcp->c_statelock);
4962 		goto out;
4963 	}
4964 	if (backvp == NULL) {
4965 		backvp = tdcp->c_backvp;
4966 		VN_HOLD(backvp);
4967 	}
4968 
4969 	/* perform the link on the back fs */
4970 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4971 	    ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4972 	    "name %s\n", tdcp, tdcp->c_backvp, tnm));
4973 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4974 	if (error) {
4975 		mutex_exit(&tdcp->c_statelock);
4976 		goto out;
4977 	}
4978 
4979 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4980 
4981 	/* if the dir is populated, add the new link */
4982 	if (CFS_ISFS_NONSHARED(fscp) &&
4983 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4984 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4985 		    &fcp->c_id, SM_ASYNC);
4986 		if (error) {
4987 			cachefs_nocache(tdcp);
4988 			error = 0;
4989 		}
4990 	}
4991 	mutex_exit(&tdcp->c_statelock);
4992 
4993 	/* get the new link count on the file */
4994 	mutex_enter(&fcp->c_statelock);
4995 	fcp->c_flags |= CN_UPDATED;
4996 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
4997 	if (fcp->c_backvp == NULL) {
4998 		error = cachefs_getbackvp(fscp, fcp);
4999 		if (error) {
5000 			mutex_exit(&fcp->c_statelock);
5001 			goto out;
5002 		}
5003 	}
5004 
5005 	/* XXX bob: given what modify_cobject does this seems unnecessary */
5006 	fcp->c_attr.va_mask = AT_ALL;
5007 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5008 	mutex_exit(&fcp->c_statelock);
5009 out:
5010 	if (backvp)
5011 		VN_RELE(backvp);
5012 
5013 	return (error);
5014 }
5015 
5016 static int
5017 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5018     cred_t *cr)
5019 {
5020 	cnode_t *tdcp = VTOC(tdvp);
5021 	cnode_t *fcp = VTOC(fvp);
5022 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5023 	int error = 0;
5024 	timestruc_t current_time;
5025 	off_t commit = 0;
5026 
5027 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5028 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5029 		return (EPERM);
5030 
5031 	if (CFS_ISFS_WRITE_AROUND(fscp))
5032 		return (ETIMEDOUT);
5033 
5034 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5035 		return (ETIMEDOUT);
5036 
5037 	mutex_enter(&tdcp->c_statelock);
5038 
5039 	/* check permissions */
5040 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5041 		mutex_exit(&tdcp->c_statelock);
5042 		goto out;
5043 	}
5044 
5045 	/* the directory front file must be populated */
5046 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5047 		error = ETIMEDOUT;
5048 		mutex_exit(&tdcp->c_statelock);
5049 		goto out;
5050 	}
5051 
5052 	/* make sure tnm does not already exist in the directory */
5053 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5054 	if (error == ENOTDIR) {
5055 		error = ETIMEDOUT;
5056 		mutex_exit(&tdcp->c_statelock);
5057 		goto out;
5058 	}
5059 	if (error != ENOENT) {
5060 		error = EEXIST;
5061 		mutex_exit(&tdcp->c_statelock);
5062 		goto out;
5063 	}
5064 
5065 	mutex_enter(&fcp->c_statelock);
5066 
5067 	/* create a mapping for the file if necessary */
5068 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5069 		error = cachefs_dlog_cidmap(fscp);
5070 		if (error) {
5071 			mutex_exit(&fcp->c_statelock);
5072 			mutex_exit(&tdcp->c_statelock);
5073 			error = ENOSPC;
5074 			goto out;
5075 		}
5076 		fcp->c_metadata.md_flags |= MD_MAPPING;
5077 		fcp->c_flags |= CN_UPDATED;
5078 	}
5079 
5080 	/* mark file as modified */
5081 	if (cachefs_modified_alloc(fcp)) {
5082 		mutex_exit(&fcp->c_statelock);
5083 		mutex_exit(&tdcp->c_statelock);
5084 		error = ENOSPC;
5085 		goto out;
5086 	}
5087 	mutex_exit(&fcp->c_statelock);
5088 
5089 	/* log the operation */
5090 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5091 	if (commit == 0) {
5092 		mutex_exit(&tdcp->c_statelock);
5093 		error = ENOSPC;
5094 		goto out;
5095 	}
5096 
5097 	gethrestime(&current_time);
5098 
5099 	/* make the new link */
5100 	cachefs_modified(tdcp);
5101 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5102 	    &fcp->c_id, SM_ASYNC);
5103 	if (error) {
5104 		error = 0;
5105 		mutex_exit(&tdcp->c_statelock);
5106 		goto out;
5107 	}
5108 
5109 	/* Update mtime/ctime of parent dir */
5110 	tdcp->c_metadata.md_localmtime = current_time;
5111 	tdcp->c_metadata.md_localctime = current_time;
5112 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5113 	tdcp->c_flags |= CN_UPDATED;
5114 	mutex_exit(&tdcp->c_statelock);
5115 
5116 	/* update the file we linked to */
5117 	mutex_enter(&fcp->c_statelock);
5118 	fcp->c_attr.va_nlink++;
5119 	fcp->c_metadata.md_localctime = current_time;
5120 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5121 	fcp->c_flags |= CN_UPDATED;
5122 	mutex_exit(&fcp->c_statelock);
5123 
5124 out:
5125 	if (commit) {
5126 		/* commit the log entry */
5127 		if (cachefs_dlog_commit(fscp, commit, error)) {
5128 			/*EMPTY*/
5129 			/* XXX bob: fix on panic */
5130 		}
5131 	}
5132 
5133 	return (error);
5134 }
5135 
5136 /*
5137  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5138  * cnodes atomically.
5139  */
5140 kmutex_t cachefs_rename_lock;
5141 
5142 /*ARGSUSED*/
5143 static int
5144 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5145     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5146 {
5147 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5148 	cachefscache_t *cachep = fscp->fs_cache;
5149 	int error = 0;
5150 	int held = 0;
5151 	int connected = 0;
5152 	vnode_t *delvp = NULL;
5153 	vnode_t *tvp = NULL;
5154 	int vfslock = 0;
5155 	struct vnode *realvp;
5156 
5157 	if (getzoneid() != GLOBAL_ZONEID)
5158 		return (EPERM);
5159 
5160 	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5161 		ndvp = realvp;
5162 
5163 	/*
5164 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5165 	 * directory cnodes better indicate NOCACHE mode as well.
5166 	 */
5167 	ASSERT(
5168 	    (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5169 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5170 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5171 
5172 	/*
5173 	 * Cachefs only provides pass-through support for NFSv4,
5174 	 * and all vnode operations are passed through to the
5175 	 * back file system. For NFSv4 pass-through to work, only
5176 	 * connected operation is supported, the cnode backvp must
5177 	 * exist, and cachefs optional (eg., disconnectable) flags
5178 	 * are turned off. Assert these conditions to ensure that
5179 	 * the backfilesystem is called for the rename operation.
5180 	 */
5181 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5182 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5183 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5184 
5185 	for (;;) {
5186 		if (vfslock) {
5187 			vn_vfsunlock(delvp);
5188 			vfslock = 0;
5189 		}
5190 		if (delvp) {
5191 			VN_RELE(delvp);
5192 			delvp = NULL;
5193 		}
5194 
5195 		/* get (or renew) access to the file system */
5196 		if (held) {
5197 			/* Won't loop for NFSv4 connected support */
5198 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5199 			cachefs_cd_release(fscp);
5200 			held = 0;
5201 		}
5202 		error = cachefs_cd_access(fscp, connected, 1);
5203 		if (error)
5204 			break;
5205 		held = 1;
5206 
5207 		/* sanity check */
5208 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5209 			error = EINVAL;
5210 			break;
5211 		}
5212 
5213 		/* cannot rename from or to . or .. */
5214 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5215 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5216 			error = EINVAL;
5217 			break;
5218 		}
5219 
5220 		if (odvp != ndvp) {
5221 			/*
5222 			 * if moving a directory, its notion
5223 			 * of ".." will change
5224 			 */
5225 			error = cachefs_lookup_common(odvp, onm, &tvp,
5226 			    NULL, 0, NULL, cr);
5227 			if (error == 0) {
5228 				ASSERT(tvp != NULL);
5229 				if (tvp->v_type == VDIR) {
5230 					cnode_t *cp = VTOC(tvp);
5231 
5232 					dnlc_remove(tvp, "..");
5233 
5234 					mutex_enter(&cp->c_statelock);
5235 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5236 					mutex_exit(&cp->c_statelock);
5237 				}
5238 			} else {
5239 				tvp = NULL;
5240 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5241 					if (CFS_TIMEOUT(fscp, error)) {
5242 						cachefs_cd_release(fscp);
5243 						held = 0;
5244 						cachefs_cd_timedout(fscp);
5245 						connected = 0;
5246 						continue;
5247 					}
5248 				} else {
5249 					if (CFS_TIMEOUT(fscp, error)) {
5250 						connected = 1;
5251 						continue;
5252 					}
5253 				}
5254 				break;
5255 			}
5256 		}
5257 
5258 		/* get the cnode if file being deleted */
5259 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5260 		    NULL, cr);
5261 		if (error) {
5262 			delvp = NULL;
5263 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5264 				if (CFS_TIMEOUT(fscp, error)) {
5265 					cachefs_cd_release(fscp);
5266 					held = 0;
5267 					cachefs_cd_timedout(fscp);
5268 					connected = 0;
5269 					continue;
5270 				}
5271 			} else {
5272 				if (CFS_TIMEOUT(fscp, error)) {
5273 					connected = 1;
5274 					continue;
5275 				}
5276 			}
5277 			if (error != ENOENT)
5278 				break;
5279 		}
5280 
5281 		if (delvp && delvp->v_type == VDIR) {
5282 			/* see ufs_dirremove for why this is done, mount race */
5283 			if (vn_vfswlock(delvp)) {
5284 				error = EBUSY;
5285 				break;
5286 			}
5287 			vfslock = 1;
5288 			if (vn_mountedvfs(delvp) != NULL) {
5289 				error = EBUSY;
5290 				break;
5291 			}
5292 		}
5293 
5294 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5295 			error = cachefs_rename_connected(odvp, onm,
5296 			    ndvp, nnm, cr, delvp);
5297 			if (CFS_TIMEOUT(fscp, error)) {
5298 				cachefs_cd_release(fscp);
5299 				held = 0;
5300 				cachefs_cd_timedout(fscp);
5301 				connected = 0;
5302 				continue;
5303 			}
5304 		} else {
5305 			error = cachefs_rename_disconnected(odvp, onm,
5306 			    ndvp, nnm, cr, delvp);
5307 			if (CFS_TIMEOUT(fscp, error)) {
5308 				connected = 1;
5309 				continue;
5310 			}
5311 		}
5312 		break;
5313 	}
5314 
5315 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5316 		struct fid gone;
5317 
5318 		bzero(&gone, sizeof (gone));
5319 		gone.fid_len = MAXFIDSZ;
5320 		if (delvp != NULL)
5321 			(void) VOP_FID(delvp, &gone, ct);
5322 
5323 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5324 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5325 	}
5326 
5327 	if (held)
5328 		cachefs_cd_release(fscp);
5329 
5330 	if (vfslock)
5331 		vn_vfsunlock(delvp);
5332 
5333 	if (delvp)
5334 		VN_RELE(delvp);
5335 	if (tvp)
5336 		VN_RELE(tvp);
5337 
5338 #ifdef CFS_CD_DEBUG
5339 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5340 #endif
5341 	return (error);
5342 }
5343 
5344 static int
5345 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5346     char *nnm, cred_t *cr, vnode_t *delvp)
5347 {
5348 	cnode_t *odcp = VTOC(odvp);
5349 	cnode_t *ndcp = VTOC(ndvp);
5350 	vnode_t *revp = NULL;
5351 	cnode_t *recp;
5352 	cnode_t *delcp;
5353 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5354 	int error = 0;
5355 	struct fid cookie;
5356 	struct fid *cookiep;
5357 	cfs_cid_t cid;
5358 	int gotdirent;
5359 
5360 	/* find the file we are renaming */
5361 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5362 	if (error)
5363 		return (error);
5364 	recp = VTOC(revp);
5365 
5366 	/*
5367 	 * To avoid deadlock, we acquire this global rename lock before
5368 	 * we try to get the locks for the source and target directories.
5369 	 */
5370 	mutex_enter(&cachefs_rename_lock);
5371 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5372 	if (odcp != ndcp) {
5373 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5374 	}
5375 	mutex_exit(&cachefs_rename_lock);
5376 
5377 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5378 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5379 
5380 	mutex_enter(&odcp->c_statelock);
5381 	if (odcp->c_backvp == NULL) {
5382 		error = cachefs_getbackvp(fscp, odcp);
5383 		if (error) {
5384 			mutex_exit(&odcp->c_statelock);
5385 			goto out;
5386 		}
5387 	}
5388 
5389 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5390 	if (error) {
5391 		mutex_exit(&odcp->c_statelock);
5392 		goto out;
5393 	}
5394 	mutex_exit(&odcp->c_statelock);
5395 
5396 	if (odcp != ndcp) {
5397 		mutex_enter(&ndcp->c_statelock);
5398 		if (ndcp->c_backvp == NULL) {
5399 			error = cachefs_getbackvp(fscp, ndcp);
5400 			if (error) {
5401 				mutex_exit(&ndcp->c_statelock);
5402 				goto out;
5403 			}
5404 		}
5405 
5406 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5407 		if (error) {
5408 			mutex_exit(&ndcp->c_statelock);
5409 			goto out;
5410 		}
5411 		mutex_exit(&ndcp->c_statelock);
5412 	}
5413 
5414 	/* if a file is being deleted because of this rename */
5415 	if (delvp) {
5416 		/* if src and dest file are same */
5417 		if (delvp == revp) {
5418 			error = 0;
5419 			goto out;
5420 		}
5421 
5422 		/*
5423 		 * If the cnode is active, make a link to the file
5424 		 * so operations on the file will continue.
5425 		 */
5426 		dnlc_purge_vp(delvp);
5427 		delcp = VTOC(delvp);
5428 		if ((delvp->v_type != VDIR) &&
5429 		    !((delvp->v_count == 1) ||
5430 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5431 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5432 			if (error)
5433 				goto out;
5434 		}
5435 	}
5436 
5437 	/* do the rename on the back fs */
5438 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5439 	    ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5440 	    " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5441 	    odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5442 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5443 	    0);
5444 	if (error)
5445 		goto out;
5446 
5447 	/* purge mappings to file in the old directory */
5448 	dnlc_purge_vp(odvp);
5449 
5450 	/* purge mappings in the new dir if we deleted a file */
5451 	if (delvp && (odvp != ndvp))
5452 		dnlc_purge_vp(ndvp);
5453 
5454 	/* update the file we just deleted */
5455 	if (delvp) {
5456 		mutex_enter(&delcp->c_statelock);
5457 		if (delcp->c_attr.va_nlink == 1) {
5458 			delcp->c_flags |= CN_DESTROY;
5459 		} else {
5460 			delcp->c_flags |= CN_UPDATED;
5461 		}
5462 		delcp->c_attr.va_nlink--;
5463 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5464 		mutex_exit(&delcp->c_statelock);
5465 	}
5466 
5467 	/* find the entry in the old directory */
5468 	mutex_enter(&odcp->c_statelock);
5469 	gotdirent = 0;
5470 	cookiep = NULL;
5471 	if (CFS_ISFS_NONSHARED(fscp) &&
5472 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5473 		error = cachefs_dir_look(odcp, onm, &cookie,
5474 		    NULL, NULL, &cid);
5475 		if (error == 0 || error == EINVAL) {
5476 			gotdirent = 1;
5477 			if (error == 0)
5478 				cookiep = &cookie;
5479 		} else {
5480 			cachefs_inval_object(odcp);
5481 		}
5482 	}
5483 	error = 0;
5484 
5485 	/* remove the directory entry from the old directory */
5486 	if (gotdirent) {
5487 		error = cachefs_dir_rmentry(odcp, onm);
5488 		if (error) {
5489 			cachefs_nocache(odcp);
5490 			error = 0;
5491 		}
5492 	}
5493 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5494 	mutex_exit(&odcp->c_statelock);
5495 
5496 	/* install the directory entry in the new directory */
5497 	mutex_enter(&ndcp->c_statelock);
5498 	if (CFS_ISFS_NONSHARED(fscp) &&
5499 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5500 		error = 1;
5501 		if (gotdirent) {
5502 			ASSERT(cid.cid_fileno != 0);
5503 			error = 0;
5504 			if (delvp) {
5505 				error = cachefs_dir_rmentry(ndcp, nnm);
5506 			}
5507 			if (error == 0) {
5508 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5509 				    &cid, SM_ASYNC);
5510 			}
5511 		}
5512 		if (error) {
5513 			cachefs_nocache(ndcp);
5514 			error = 0;
5515 		}
5516 	}
5517 	if (odcp != ndcp)
5518 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5519 	mutex_exit(&ndcp->c_statelock);
5520 
5521 	/* ctime of renamed file has changed */
5522 	mutex_enter(&recp->c_statelock);
5523 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5524 	mutex_exit(&recp->c_statelock);
5525 
5526 out:
5527 	if (odcp != ndcp)
5528 		rw_exit(&ndcp->c_rwlock);
5529 	rw_exit(&odcp->c_rwlock);
5530 
5531 	VN_RELE(revp);
5532 
5533 	return (error);
5534 }
5535 
5536 static int
5537 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5538     char *nnm, cred_t *cr, vnode_t *delvp)
5539 {
5540 	cnode_t *odcp = VTOC(odvp);
5541 	cnode_t *ndcp = VTOC(ndvp);
5542 	cnode_t *delcp = NULL;
5543 	vnode_t *revp = NULL;
5544 	cnode_t *recp;
5545 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5546 	int error = 0;
5547 	struct fid cookie;
5548 	struct fid *cookiep;
5549 	cfs_cid_t cid;
5550 	off_t commit = 0;
5551 	timestruc_t current_time;
5552 
5553 	if (CFS_ISFS_WRITE_AROUND(fscp))
5554 		return (ETIMEDOUT);
5555 
5556 	/* find the file we are renaming */
5557 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5558 	if (error)
5559 		return (error);
5560 	recp = VTOC(revp);
5561 
5562 	/*
5563 	 * To avoid deadlock, we acquire this global rename lock before
5564 	 * we try to get the locks for the source and target directories.
5565 	 */
5566 	mutex_enter(&cachefs_rename_lock);
5567 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5568 	if (odcp != ndcp) {
5569 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5570 	}
5571 	mutex_exit(&cachefs_rename_lock);
5572 
5573 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5574 		error = ETIMEDOUT;
5575 		goto out;
5576 	}
5577 
5578 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5579 		mutex_enter(&recp->c_statelock);
5580 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5581 			error = cachefs_dlog_cidmap(fscp);
5582 			if (error) {
5583 				mutex_exit(&recp->c_statelock);
5584 				error = ENOSPC;
5585 				goto out;
5586 			}
5587 			recp->c_metadata.md_flags |= MD_MAPPING;
5588 			recp->c_flags |= CN_UPDATED;
5589 		}
5590 		mutex_exit(&recp->c_statelock);
5591 	}
5592 
5593 	/* check permissions */
5594 	/* XXX clean up this mutex junk sometime */
5595 	mutex_enter(&odcp->c_statelock);
5596 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5597 	mutex_exit(&odcp->c_statelock);
5598 	if (error != 0)
5599 		goto out;
5600 	mutex_enter(&ndcp->c_statelock);
5601 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5602 	mutex_exit(&ndcp->c_statelock);
5603 	if (error != 0)
5604 		goto out;
5605 	mutex_enter(&odcp->c_statelock);
5606 	error = cachefs_stickyrmchk(odcp, recp, cr);
5607 	mutex_exit(&odcp->c_statelock);
5608 	if (error != 0)
5609 		goto out;
5610 
5611 	/* dirs must be populated */
5612 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5613 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5614 		error = ETIMEDOUT;
5615 		goto out;
5616 	}
5617 
5618 	/* for now do not allow moving dirs because could cause cycles */
5619 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5620 	    (revp == odvp)) {
5621 		error = ETIMEDOUT;
5622 		goto out;
5623 	}
5624 
5625 	/* if a file is being deleted because of this rename */
5626 	if (delvp) {
5627 		delcp = VTOC(delvp);
5628 
5629 		/* if src and dest file are the same */
5630 		if (delvp == revp) {
5631 			error = 0;
5632 			goto out;
5633 		}
5634 
5635 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5636 			error = ETIMEDOUT;
5637 			goto out;
5638 		}
5639 
5640 		/* if there are hard links to this file */
5641 		if (delcp->c_attr.va_nlink > 1) {
5642 			mutex_enter(&delcp->c_statelock);
5643 			if (cachefs_modified_alloc(delcp)) {
5644 				mutex_exit(&delcp->c_statelock);
5645 				error = ENOSPC;
5646 				goto out;
5647 			}
5648 
5649 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5650 				error = cachefs_dlog_cidmap(fscp);
5651 				if (error) {
5652 					mutex_exit(&delcp->c_statelock);
5653 					error = ENOSPC;
5654 					goto out;
5655 				}
5656 				delcp->c_metadata.md_flags |= MD_MAPPING;
5657 				delcp->c_flags |= CN_UPDATED;
5658 			}
5659 			mutex_exit(&delcp->c_statelock);
5660 		}
5661 
5662 		/* make sure we can delete file */
5663 		mutex_enter(&ndcp->c_statelock);
5664 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5665 		mutex_exit(&ndcp->c_statelock);
5666 		if (error != 0)
5667 			goto out;
5668 
5669 		/*
5670 		 * If the cnode is active, make a link to the file
5671 		 * so operations on the file will continue.
5672 		 */
5673 		dnlc_purge_vp(delvp);
5674 		if ((delvp->v_type != VDIR) &&
5675 		    !((delvp->v_count == 1) ||
5676 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5677 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5678 			if (error)
5679 				goto out;
5680 		}
5681 	}
5682 
5683 	/* purge mappings to file in the old directory */
5684 	dnlc_purge_vp(odvp);
5685 
5686 	/* purge mappings in the new dir if we deleted a file */
5687 	if (delvp && (odvp != ndvp))
5688 		dnlc_purge_vp(ndvp);
5689 
5690 	/* find the entry in the old directory */
5691 	mutex_enter(&odcp->c_statelock);
5692 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5693 		mutex_exit(&odcp->c_statelock);
5694 		error = ETIMEDOUT;
5695 		goto out;
5696 	}
5697 	cookiep = NULL;
5698 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5699 	if (error == 0 || error == EINVAL) {
5700 		if (error == 0)
5701 			cookiep = &cookie;
5702 	} else {
5703 		mutex_exit(&odcp->c_statelock);
5704 		if (error == ENOTDIR)
5705 			error = ETIMEDOUT;
5706 		goto out;
5707 	}
5708 	error = 0;
5709 
5710 	/* write the log entry */
5711 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5712 	    recp, delcp);
5713 	if (commit == 0) {
5714 		mutex_exit(&odcp->c_statelock);
5715 		error = ENOSPC;
5716 		goto out;
5717 	}
5718 
5719 	/* remove the directory entry from the old directory */
5720 	cachefs_modified(odcp);
5721 	error = cachefs_dir_rmentry(odcp, onm);
5722 	if (error) {
5723 		mutex_exit(&odcp->c_statelock);
5724 		if (error == ENOTDIR)
5725 			error = ETIMEDOUT;
5726 		goto out;
5727 	}
5728 	mutex_exit(&odcp->c_statelock);
5729 
5730 	/* install the directory entry in the new directory */
5731 	mutex_enter(&ndcp->c_statelock);
5732 	error = ENOTDIR;
5733 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5734 		ASSERT(cid.cid_fileno != 0);
5735 		cachefs_modified(ndcp);
5736 		error = 0;
5737 		if (delvp) {
5738 			error = cachefs_dir_rmentry(ndcp, nnm);
5739 		}
5740 		if (error == 0) {
5741 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5742 			    &cid, SM_ASYNC);
5743 		}
5744 	}
5745 	if (error) {
5746 		cachefs_nocache(ndcp);
5747 		mutex_exit(&ndcp->c_statelock);
5748 		mutex_enter(&odcp->c_statelock);
5749 		cachefs_nocache(odcp);
5750 		mutex_exit(&odcp->c_statelock);
5751 		if (error == ENOTDIR)
5752 			error = ETIMEDOUT;
5753 		goto out;
5754 	}
5755 	mutex_exit(&ndcp->c_statelock);
5756 
5757 	gethrestime(&current_time);
5758 
5759 	/* update the file we just deleted */
5760 	if (delvp) {
5761 		mutex_enter(&delcp->c_statelock);
5762 		delcp->c_attr.va_nlink--;
5763 		delcp->c_metadata.md_localctime = current_time;
5764 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5765 		if (delcp->c_attr.va_nlink == 0) {
5766 			delcp->c_flags |= CN_DESTROY;
5767 		} else {
5768 			delcp->c_flags |= CN_UPDATED;
5769 		}
5770 		mutex_exit(&delcp->c_statelock);
5771 	}
5772 
5773 	/* update the file we renamed */
5774 	mutex_enter(&recp->c_statelock);
5775 	recp->c_metadata.md_localctime = current_time;
5776 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5777 	recp->c_flags |= CN_UPDATED;
5778 	mutex_exit(&recp->c_statelock);
5779 
5780 	/* update the source directory */
5781 	mutex_enter(&odcp->c_statelock);
5782 	odcp->c_metadata.md_localctime = current_time;
5783 	odcp->c_metadata.md_localmtime = current_time;
5784 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5785 	odcp->c_flags |= CN_UPDATED;
5786 	mutex_exit(&odcp->c_statelock);
5787 
5788 	/* update the destination directory */
5789 	if (odcp != ndcp) {
5790 		mutex_enter(&ndcp->c_statelock);
5791 		ndcp->c_metadata.md_localctime = current_time;
5792 		ndcp->c_metadata.md_localmtime = current_time;
5793 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5794 		ndcp->c_flags |= CN_UPDATED;
5795 		mutex_exit(&ndcp->c_statelock);
5796 	}
5797 
5798 out:
5799 	if (commit) {
5800 		/* commit the log entry */
5801 		if (cachefs_dlog_commit(fscp, commit, error)) {
5802 			/*EMPTY*/
5803 			/* XXX bob: fix on panic */
5804 		}
5805 	}
5806 
5807 	if (odcp != ndcp)
5808 		rw_exit(&ndcp->c_rwlock);
5809 	rw_exit(&odcp->c_rwlock);
5810 
5811 	VN_RELE(revp);
5812 
5813 	return (error);
5814 }
5815 
5816 /*ARGSUSED*/
5817 static int
5818 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5819     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5820 {
5821 	cnode_t *dcp = VTOC(dvp);
5822 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5823 	cachefscache_t *cachep = fscp->fs_cache;
5824 	int error = 0;
5825 	int held = 0;
5826 	int connected = 0;
5827 
5828 #ifdef CFSDEBUG
5829 	CFS_DEBUG(CFSDEBUG_VOPS)
5830 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5831 #endif
5832 
5833 	if (getzoneid() != GLOBAL_ZONEID) {
5834 		error = EPERM;
5835 		goto out;
5836 	}
5837 
5838 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5839 		ASSERT(dcp->c_flags & CN_NOCACHE);
5840 
5841 	/*
5842 	 * Cachefs only provides pass-through support for NFSv4,
5843 	 * and all vnode operations are passed through to the
5844 	 * back file system. For NFSv4 pass-through to work, only
5845 	 * connected operation is supported, the cnode backvp must
5846 	 * exist, and cachefs optional (eg., disconnectable) flags
5847 	 * are turned off. Assert these conditions to ensure that
5848 	 * the backfilesystem is called for the mkdir operation.
5849 	 */
5850 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5851 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5852 
5853 	for (;;) {
5854 		/* get (or renew) access to the file system */
5855 		if (held) {
5856 			/* Won't loop with NFSv4 connected behavior */
5857 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5858 			rw_exit(&dcp->c_rwlock);
5859 			cachefs_cd_release(fscp);
5860 			held = 0;
5861 		}
5862 		error = cachefs_cd_access(fscp, connected, 1);
5863 		if (error)
5864 			break;
5865 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5866 		held = 1;
5867 
5868 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5869 			error = cachefs_mkdir_connected(dvp, nm, vap,
5870 			    vpp, cr);
5871 			if (CFS_TIMEOUT(fscp, error)) {
5872 				rw_exit(&dcp->c_rwlock);
5873 				cachefs_cd_release(fscp);
5874 				held = 0;
5875 				cachefs_cd_timedout(fscp);
5876 				connected = 0;
5877 				continue;
5878 			}
5879 		} else {
5880 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5881 			    vpp, cr);
5882 			if (CFS_TIMEOUT(fscp, error)) {
5883 				connected = 1;
5884 				continue;
5885 			}
5886 		}
5887 		break;
5888 	}
5889 
5890 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5891 		fid_t *fidp = NULL;
5892 		ino64_t fileno = 0;
5893 		cnode_t *cp = NULL;
5894 		if (error == 0)
5895 			cp = VTOC(*vpp);
5896 
5897 		if (cp != NULL) {
5898 			fidp = &cp->c_metadata.md_cookie;
5899 			fileno = cp->c_id.cid_fileno;
5900 		}
5901 
5902 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5903 		    fidp, fileno, crgetuid(cr));
5904 	}
5905 
5906 	if (held) {
5907 		rw_exit(&dcp->c_rwlock);
5908 		cachefs_cd_release(fscp);
5909 	}
5910 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5911 		(void) cachefs_pack(dvp, nm, cr);
5912 
5913 #ifdef CFS_CD_DEBUG
5914 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5915 #endif
5916 out:
5917 #ifdef CFSDEBUG
5918 	CFS_DEBUG(CFSDEBUG_VOPS)
5919 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5920 #endif
5921 	return (error);
5922 }
5923 
5924 static int
5925 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5926     vnode_t **vpp, cred_t *cr)
5927 {
5928 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5929 	struct vnode *vp = NULL;
5930 	int error = 0;
5931 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5932 	struct fid cookie;
5933 	struct vattr attr;
5934 	cfs_cid_t cid, dircid;
5935 	uint32_t valid_fid;
5936 
5937 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5938 		ASSERT(dcp->c_flags & CN_NOCACHE);
5939 
5940 	mutex_enter(&dcp->c_statelock);
5941 
5942 	/* get backvp of dir */
5943 	if (dcp->c_backvp == NULL) {
5944 		error = cachefs_getbackvp(fscp, dcp);
5945 		if (error) {
5946 			mutex_exit(&dcp->c_statelock);
5947 			goto out;
5948 		}
5949 	}
5950 
5951 	/* consistency check the directory */
5952 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5953 	if (error) {
5954 		mutex_exit(&dcp->c_statelock);
5955 		goto out;
5956 	}
5957 	dircid = dcp->c_id;
5958 
5959 	/* make the dir on the back fs */
5960 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5961 	    ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5962 	    "name %s\n", dcp, dcp->c_backvp, nm));
5963 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5964 	mutex_exit(&dcp->c_statelock);
5965 	if (error) {
5966 		goto out;
5967 	}
5968 
5969 	/* get the cookie and make the cnode */
5970 	attr.va_mask = AT_ALL;
5971 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5972 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5973 	if (error) {
5974 		goto out;
5975 	}
5976 	cid.cid_flags = 0;
5977 	cid.cid_fileno = attr.va_nodeid;
5978 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5979 	    &attr, vp, cr, 0, &newcp);
5980 	if (error) {
5981 		goto out;
5982 	}
5983 	ASSERT(CTOV(newcp)->v_type == VDIR);
5984 	*vpp = CTOV(newcp);
5985 
5986 	/* if the dir is populated, add the new entry */
5987 	mutex_enter(&dcp->c_statelock);
5988 	if (CFS_ISFS_NONSHARED(fscp) &&
5989 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5990 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5991 		    SM_ASYNC);
5992 		if (error) {
5993 			cachefs_nocache(dcp);
5994 			error = 0;
5995 		}
5996 	}
5997 	dcp->c_attr.va_nlink++;
5998 	dcp->c_flags |= CN_UPDATED;
5999 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6000 	mutex_exit(&dcp->c_statelock);
6001 
6002 	/* XXX bob: should we do a filldir here? or just add . and .. */
6003 	/* maybe should kick off an async filldir so caller does not wait */
6004 
6005 	/* put the entry in the dnlc */
6006 	if (cachefs_dnlc)
6007 		dnlc_enter(dvp, nm, *vpp);
6008 
6009 	/* save the fileno of the parent so can find the name */
6010 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6011 	    sizeof (cfs_cid_t)) != 0) {
6012 		mutex_enter(&newcp->c_statelock);
6013 		newcp->c_metadata.md_parent = dircid;
6014 		newcp->c_flags |= CN_UPDATED;
6015 		mutex_exit(&newcp->c_statelock);
6016 	}
6017 out:
6018 	if (vp)
6019 		VN_RELE(vp);
6020 
6021 	return (error);
6022 }
6023 
6024 static int
6025 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6026     vnode_t **vpp, cred_t *cr)
6027 {
6028 	cnode_t *dcp = VTOC(dvp);
6029 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6030 	int error;
6031 	cnode_t *newcp = NULL;
6032 	struct vattr va;
6033 	timestruc_t current_time;
6034 	off_t commit = 0;
6035 	char *s;
6036 	int namlen;
6037 
6038 	/* don't allow '/' characters in pathname component */
6039 	for (s = nm, namlen = 0; *s; s++, namlen++)
6040 		if (*s == '/')
6041 			return (EACCES);
6042 	if (namlen == 0)
6043 		return (EINVAL);
6044 
6045 	if (CFS_ISFS_WRITE_AROUND(fscp))
6046 		return (ETIMEDOUT);
6047 
6048 	mutex_enter(&dcp->c_statelock);
6049 
6050 	/* check permissions */
6051 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6052 		mutex_exit(&dcp->c_statelock);
6053 		goto out;
6054 	}
6055 
6056 	/* the directory front file must be populated */
6057 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6058 		error = ETIMEDOUT;
6059 		mutex_exit(&dcp->c_statelock);
6060 		goto out;
6061 	}
6062 
6063 	/* make sure nm does not already exist in the directory */
6064 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6065 	if (error == ENOTDIR) {
6066 		error = ETIMEDOUT;
6067 		mutex_exit(&dcp->c_statelock);
6068 		goto out;
6069 	}
6070 	if (error != ENOENT) {
6071 		error = EEXIST;
6072 		mutex_exit(&dcp->c_statelock);
6073 		goto out;
6074 	}
6075 
6076 	/* make up a reasonable set of attributes */
6077 	cachefs_attr_setup(vap, &va, dcp, cr);
6078 	va.va_type = VDIR;
6079 	va.va_mode |= S_IFDIR;
6080 	va.va_nlink = 2;
6081 
6082 	mutex_exit(&dcp->c_statelock);
6083 
6084 	/* create the cnode */
6085 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6086 	if (error)
6087 		goto out;
6088 
6089 	mutex_enter(&newcp->c_statelock);
6090 
6091 	error = cachefs_dlog_cidmap(fscp);
6092 	if (error) {
6093 		mutex_exit(&newcp->c_statelock);
6094 		goto out;
6095 	}
6096 
6097 	cachefs_creategid(dcp, newcp, vap, cr);
6098 	mutex_enter(&dcp->c_statelock);
6099 	cachefs_createacl(dcp, newcp);
6100 	mutex_exit(&dcp->c_statelock);
6101 	gethrestime(&current_time);
6102 	newcp->c_metadata.md_vattr.va_atime = current_time;
6103 	newcp->c_metadata.md_localctime = current_time;
6104 	newcp->c_metadata.md_localmtime = current_time;
6105 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6106 	    MD_LOCALCTIME;
6107 	newcp->c_flags |= CN_UPDATED;
6108 
6109 	/* make a front file for the new directory, add . and .. */
6110 	error = cachefs_dir_new(dcp, newcp);
6111 	if (error) {
6112 		mutex_exit(&newcp->c_statelock);
6113 		goto out;
6114 	}
6115 	cachefs_modified(newcp);
6116 
6117 	/*
6118 	 * write the metadata now rather than waiting until
6119 	 * inactive so that if there's no space we can let
6120 	 * the caller know.
6121 	 */
6122 	ASSERT(newcp->c_frontvp);
6123 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6124 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6125 	error = filegrp_write_metadata(newcp->c_filegrp,
6126 	    &newcp->c_id, &newcp->c_metadata);
6127 	if (error) {
6128 		mutex_exit(&newcp->c_statelock);
6129 		goto out;
6130 	}
6131 	mutex_exit(&newcp->c_statelock);
6132 
6133 	/* log the operation */
6134 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6135 	if (commit == 0) {
6136 		error = ENOSPC;
6137 		goto out;
6138 	}
6139 
6140 	mutex_enter(&dcp->c_statelock);
6141 
6142 	/* make sure directory is still populated */
6143 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6144 		mutex_exit(&dcp->c_statelock);
6145 		error = ETIMEDOUT;
6146 		goto out;
6147 	}
6148 	cachefs_modified(dcp);
6149 
6150 	/* enter the new file in the directory */
6151 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6152 	    &newcp->c_id, SM_ASYNC);
6153 	if (error) {
6154 		mutex_exit(&dcp->c_statelock);
6155 		goto out;
6156 	}
6157 
6158 	/* update parent dir times */
6159 	dcp->c_metadata.md_localctime = current_time;
6160 	dcp->c_metadata.md_localmtime = current_time;
6161 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6162 	dcp->c_attr.va_nlink++;
6163 	dcp->c_flags |= CN_UPDATED;
6164 	mutex_exit(&dcp->c_statelock);
6165 
6166 out:
6167 	if (commit) {
6168 		/* commit the log entry */
6169 		if (cachefs_dlog_commit(fscp, commit, error)) {
6170 			/*EMPTY*/
6171 			/* XXX bob: fix on panic */
6172 		}
6173 	}
6174 	if (error) {
6175 		if (newcp) {
6176 			mutex_enter(&newcp->c_statelock);
6177 			newcp->c_flags |= CN_DESTROY;
6178 			mutex_exit(&newcp->c_statelock);
6179 			VN_RELE(CTOV(newcp));
6180 		}
6181 	} else {
6182 		*vpp = CTOV(newcp);
6183 	}
6184 	return (error);
6185 }
6186 
6187 /*ARGSUSED*/
6188 static int
6189 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6190     caller_context_t *ct, int flags)
6191 {
6192 	cnode_t *dcp = VTOC(dvp);
6193 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6194 	cachefscache_t *cachep = fscp->fs_cache;
6195 	int error = 0;
6196 	int held = 0;
6197 	int connected = 0;
6198 	size_t namlen;
6199 	vnode_t *vp = NULL;
6200 	int vfslock = 0;
6201 
6202 #ifdef CFSDEBUG
6203 	CFS_DEBUG(CFSDEBUG_VOPS)
6204 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6205 #endif
6206 
6207 	if (getzoneid() != GLOBAL_ZONEID) {
6208 		error = EPERM;
6209 		goto out;
6210 	}
6211 
6212 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6213 		ASSERT(dcp->c_flags & CN_NOCACHE);
6214 
6215 	/*
6216 	 * Cachefs only provides pass-through support for NFSv4,
6217 	 * and all vnode operations are passed through to the
6218 	 * back file system. For NFSv4 pass-through to work, only
6219 	 * connected operation is supported, the cnode backvp must
6220 	 * exist, and cachefs optional (eg., disconnectable) flags
6221 	 * are turned off. Assert these conditions to ensure that
6222 	 * the backfilesystem is called for the rmdir operation.
6223 	 */
6224 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6225 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6226 
6227 	for (;;) {
6228 		if (vfslock) {
6229 			vn_vfsunlock(vp);
6230 			vfslock = 0;
6231 		}
6232 		if (vp) {
6233 			VN_RELE(vp);
6234 			vp = NULL;
6235 		}
6236 
6237 		/* get (or renew) access to the file system */
6238 		if (held) {
6239 			/* Won't loop with NFSv4 connected behavior */
6240 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6241 			cachefs_cd_release(fscp);
6242 			held = 0;
6243 		}
6244 		error = cachefs_cd_access(fscp, connected, 1);
6245 		if (error)
6246 			break;
6247 		held = 1;
6248 
6249 		/* if disconnected, do some extra error checking */
6250 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6251 			/* check permissions */
6252 			mutex_enter(&dcp->c_statelock);
6253 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6254 			mutex_exit(&dcp->c_statelock);
6255 			if (CFS_TIMEOUT(fscp, error)) {
6256 				connected = 1;
6257 				continue;
6258 			}
6259 			if (error)
6260 				break;
6261 
6262 			namlen = strlen(nm);
6263 			if (namlen == 0) {
6264 				error = EINVAL;
6265 				break;
6266 			}
6267 
6268 			/* cannot remove . and .. */
6269 			if (nm[0] == '.') {
6270 				if (namlen == 1) {
6271 					error = EINVAL;
6272 					break;
6273 				} else if (namlen == 2 && nm[1] == '.') {
6274 					error = EEXIST;
6275 					break;
6276 				}
6277 			}
6278 
6279 		}
6280 
6281 		/* get the cnode of the dir to remove */
6282 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6283 		if (error) {
6284 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6285 				if (CFS_TIMEOUT(fscp, error)) {
6286 					cachefs_cd_release(fscp);
6287 					held = 0;
6288 					cachefs_cd_timedout(fscp);
6289 					connected = 0;
6290 					continue;
6291 				}
6292 			} else {
6293 				if (CFS_TIMEOUT(fscp, error)) {
6294 					connected = 1;
6295 					continue;
6296 				}
6297 			}
6298 			break;
6299 		}
6300 
6301 		/* must be a dir */
6302 		if (vp->v_type != VDIR) {
6303 			error = ENOTDIR;
6304 			break;
6305 		}
6306 
6307 		/* must not be current dir */
6308 		if (VOP_CMP(vp, cdir, ct)) {
6309 			error = EINVAL;
6310 			break;
6311 		}
6312 
6313 		/* see ufs_dirremove for why this is done, mount race */
6314 		if (vn_vfswlock(vp)) {
6315 			error = EBUSY;
6316 			break;
6317 		}
6318 		vfslock = 1;
6319 		if (vn_mountedvfs(vp) != NULL) {
6320 			error = EBUSY;
6321 			break;
6322 		}
6323 
6324 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6325 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6326 			    cr, vp);
6327 			if (CFS_TIMEOUT(fscp, error)) {
6328 				cachefs_cd_release(fscp);
6329 				held = 0;
6330 				cachefs_cd_timedout(fscp);
6331 				connected = 0;
6332 				continue;
6333 			}
6334 		} else {
6335 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6336 			    cr, vp);
6337 			if (CFS_TIMEOUT(fscp, error)) {
6338 				connected = 1;
6339 				continue;
6340 			}
6341 		}
6342 		break;
6343 	}
6344 
6345 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6346 		ino64_t fileno = 0;
6347 		fid_t *fidp = NULL;
6348 		cnode_t *cp = NULL;
6349 		if (vp)
6350 			cp = VTOC(vp);
6351 
6352 		if (cp != NULL) {
6353 			fidp = &cp->c_metadata.md_cookie;
6354 			fileno = cp->c_id.cid_fileno;
6355 		}
6356 
6357 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6358 		    fidp, fileno, crgetuid(cr));
6359 	}
6360 
6361 	if (held) {
6362 		cachefs_cd_release(fscp);
6363 	}
6364 
6365 	if (vfslock)
6366 		vn_vfsunlock(vp);
6367 
6368 	if (vp)
6369 		VN_RELE(vp);
6370 
6371 #ifdef CFS_CD_DEBUG
6372 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6373 #endif
6374 out:
6375 #ifdef CFSDEBUG
6376 	CFS_DEBUG(CFSDEBUG_VOPS)
6377 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6378 #endif
6379 
6380 	return (error);
6381 }
6382 
6383 static int
6384 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6385     vnode_t *vp)
6386 {
6387 	cnode_t *dcp = VTOC(dvp);
6388 	cnode_t *cp = VTOC(vp);
6389 	int error = 0;
6390 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6391 
6392 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6393 	mutex_enter(&dcp->c_statelock);
6394 	mutex_enter(&cp->c_statelock);
6395 
6396 	if (dcp->c_backvp == NULL) {
6397 		error = cachefs_getbackvp(fscp, dcp);
6398 		if (error) {
6399 			goto out;
6400 		}
6401 	}
6402 
6403 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6404 	if (error)
6405 		goto out;
6406 
6407 	/* rmdir on the back fs */
6408 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6409 	    ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6410 	    "name %s\n", dcp, dcp->c_backvp, nm));
6411 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6412 	if (error)
6413 		goto out;
6414 
6415 	/* if the dir is populated, remove the entry from it */
6416 	if (CFS_ISFS_NONSHARED(fscp) &&
6417 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6418 		error = cachefs_dir_rmentry(dcp, nm);
6419 		if (error) {
6420 			cachefs_nocache(dcp);
6421 			error = 0;
6422 		}
6423 	}
6424 
6425 	/*
6426 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6427 	 * flag on the cnode. The cached object will then be destroyed
6428 	 * at inactive time where the chickens come home to roost :-)
6429 	 * The link cnt for directories is bumped down by 2 'cause the "."
6430 	 * entry has to be elided too ! The link cnt for the parent goes down
6431 	 * by 1 (because of "..").
6432 	 */
6433 	cp->c_attr.va_nlink -= 2;
6434 	dcp->c_attr.va_nlink--;
6435 	if (cp->c_attr.va_nlink == 0) {
6436 		cp->c_flags |= CN_DESTROY;
6437 	} else {
6438 		cp->c_flags |= CN_UPDATED;
6439 	}
6440 	dcp->c_flags |= CN_UPDATED;
6441 
6442 	dnlc_purge_vp(vp);
6443 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6444 
6445 out:
6446 	mutex_exit(&cp->c_statelock);
6447 	mutex_exit(&dcp->c_statelock);
6448 	rw_exit(&dcp->c_rwlock);
6449 
6450 	return (error);
6451 }
6452 
6453 static int
6454 /*ARGSUSED*/
6455 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6456     cred_t *cr, vnode_t *vp)
6457 {
6458 	cnode_t *dcp = VTOC(dvp);
6459 	cnode_t *cp = VTOC(vp);
6460 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6461 	int error = 0;
6462 	off_t commit = 0;
6463 	timestruc_t current_time;
6464 
6465 	if (CFS_ISFS_WRITE_AROUND(fscp))
6466 		return (ETIMEDOUT);
6467 
6468 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6469 	mutex_enter(&dcp->c_statelock);
6470 	mutex_enter(&cp->c_statelock);
6471 
6472 	/* both directories must be populated */
6473 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6474 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6475 		error = ETIMEDOUT;
6476 		goto out;
6477 	}
6478 
6479 	/* if sticky bit set on the dir, more access checks to perform */
6480 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6481 		goto out;
6482 	}
6483 
6484 	/* make sure dir is empty */
6485 	if (cp->c_attr.va_nlink > 2) {
6486 		error = cachefs_dir_empty(cp);
6487 		if (error) {
6488 			if (error == ENOTDIR)
6489 				error = ETIMEDOUT;
6490 			goto out;
6491 		}
6492 		cachefs_modified(cp);
6493 	}
6494 	cachefs_modified(dcp);
6495 
6496 	/* log the operation */
6497 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6498 	if (commit == 0) {
6499 		error = ENOSPC;
6500 		goto out;
6501 	}
6502 
6503 	/* remove name from parent dir */
6504 	error = cachefs_dir_rmentry(dcp, nm);
6505 	if (error == ENOTDIR) {
6506 		error = ETIMEDOUT;
6507 		goto out;
6508 	}
6509 	if (error)
6510 		goto out;
6511 
6512 	gethrestime(&current_time);
6513 
6514 	/* update deleted dir values */
6515 	cp->c_attr.va_nlink -= 2;
6516 	if (cp->c_attr.va_nlink == 0)
6517 		cp->c_flags |= CN_DESTROY;
6518 	else {
6519 		cp->c_metadata.md_localctime = current_time;
6520 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6521 		cp->c_flags |= CN_UPDATED;
6522 	}
6523 
6524 	/* update parent values */
6525 	dcp->c_metadata.md_localctime = current_time;
6526 	dcp->c_metadata.md_localmtime = current_time;
6527 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6528 	dcp->c_attr.va_nlink--;
6529 	dcp->c_flags |= CN_UPDATED;
6530 
6531 out:
6532 	mutex_exit(&cp->c_statelock);
6533 	mutex_exit(&dcp->c_statelock);
6534 	rw_exit(&dcp->c_rwlock);
6535 	if (commit) {
6536 		/* commit the log entry */
6537 		if (cachefs_dlog_commit(fscp, commit, error)) {
6538 			/*EMPTY*/
6539 			/* XXX bob: fix on panic */
6540 		}
6541 		dnlc_purge_vp(vp);
6542 	}
6543 	return (error);
6544 }
6545 
6546 /*ARGSUSED*/
6547 static int
6548 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6549     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6550 {
6551 	cnode_t *dcp = VTOC(dvp);
6552 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6553 	cachefscache_t *cachep = fscp->fs_cache;
6554 	int error = 0;
6555 	int held = 0;
6556 	int connected = 0;
6557 
6558 #ifdef CFSDEBUG
6559 	CFS_DEBUG(CFSDEBUG_VOPS)
6560 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6561 		    (void *)dvp, lnm, tnm);
6562 #endif
6563 
6564 	if (getzoneid() != GLOBAL_ZONEID) {
6565 		error = EPERM;
6566 		goto out;
6567 	}
6568 
6569 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6570 		ASSERT(dcp->c_flags & CN_NOCACHE);
6571 
6572 	/*
6573 	 * Cachefs only provides pass-through support for NFSv4,
6574 	 * and all vnode operations are passed through to the
6575 	 * back file system. For NFSv4 pass-through to work, only
6576 	 * connected operation is supported, the cnode backvp must
6577 	 * exist, and cachefs optional (eg., disconnectable) flags
6578 	 * are turned off. Assert these conditions to ensure that
6579 	 * the backfilesystem is called for the symlink operation.
6580 	 */
6581 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6582 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6583 
6584 	for (;;) {
6585 		/* get (or renew) access to the file system */
6586 		if (held) {
6587 			/* Won't loop with NFSv4 connected behavior */
6588 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6589 			rw_exit(&dcp->c_rwlock);
6590 			cachefs_cd_release(fscp);
6591 			held = 0;
6592 		}
6593 		error = cachefs_cd_access(fscp, connected, 1);
6594 		if (error)
6595 			break;
6596 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6597 		held = 1;
6598 
6599 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6600 			error = cachefs_symlink_connected(dvp, lnm, tva,
6601 			    tnm, cr);
6602 			if (CFS_TIMEOUT(fscp, error)) {
6603 				rw_exit(&dcp->c_rwlock);
6604 				cachefs_cd_release(fscp);
6605 				held = 0;
6606 				cachefs_cd_timedout(fscp);
6607 				connected = 0;
6608 				continue;
6609 			}
6610 		} else {
6611 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6612 			    tnm, cr);
6613 			if (CFS_TIMEOUT(fscp, error)) {
6614 				connected = 1;
6615 				continue;
6616 			}
6617 		}
6618 		break;
6619 	}
6620 
6621 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6622 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6623 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6624 		    crgetuid(cr), (uint_t)strlen(tnm));
6625 
6626 	if (held) {
6627 		rw_exit(&dcp->c_rwlock);
6628 		cachefs_cd_release(fscp);
6629 	}
6630 
6631 #ifdef CFS_CD_DEBUG
6632 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6633 #endif
6634 out:
6635 #ifdef CFSDEBUG
6636 	CFS_DEBUG(CFSDEBUG_VOPS)
6637 		printf("cachefs_symlink: EXIT error = %d\n", error);
6638 #endif
6639 	return (error);
6640 }
6641 
6642 static int
6643 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6644     char *tnm, cred_t *cr)
6645 {
6646 	cnode_t *dcp = VTOC(dvp);
6647 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6648 	int error = 0;
6649 	vnode_t *backvp = NULL;
6650 	cnode_t *newcp = NULL;
6651 	struct vattr va;
6652 	struct fid cookie;
6653 	cfs_cid_t cid;
6654 	uint32_t valid_fid;
6655 
6656 	mutex_enter(&dcp->c_statelock);
6657 
6658 	if (dcp->c_backvp == NULL) {
6659 		error = cachefs_getbackvp(fscp, dcp);
6660 		if (error) {
6661 			cachefs_nocache(dcp);
6662 			mutex_exit(&dcp->c_statelock);
6663 			goto out;
6664 		}
6665 	}
6666 
6667 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6668 	if (error) {
6669 		mutex_exit(&dcp->c_statelock);
6670 		goto out;
6671 	}
6672 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6673 	    ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6674 	    "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6675 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6676 	if (error) {
6677 		mutex_exit(&dcp->c_statelock);
6678 		goto out;
6679 	}
6680 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6681 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6682 		cachefs_nocache(dcp);
6683 		mutex_exit(&dcp->c_statelock);
6684 		goto out;
6685 	}
6686 
6687 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6688 
6689 	/* lookup the symlink we just created and get its fid and attrs */
6690 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6691 	    NULL, NULL, NULL);
6692 	if (backvp == NULL) {
6693 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6694 			cachefs_nocache(dcp);
6695 		mutex_exit(&dcp->c_statelock);
6696 		goto out;
6697 	}
6698 
6699 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6700 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6701 	if (error) {
6702 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6703 		error = 0;
6704 		cachefs_nocache(dcp);
6705 		mutex_exit(&dcp->c_statelock);
6706 		goto out;
6707 	}
6708 	cid.cid_fileno = va.va_nodeid;
6709 	cid.cid_flags = 0;
6710 
6711 	/* if the dir is cached, add the symlink to it */
6712 	if (CFS_ISFS_NONSHARED(fscp) &&
6713 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6714 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6715 		if (error) {
6716 			cachefs_nocache(dcp);
6717 			error = 0;
6718 		}
6719 	}
6720 	mutex_exit(&dcp->c_statelock);
6721 
6722 	/* make the cnode for the sym link */
6723 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6724 	    &va, backvp, cr, 0, &newcp);
6725 	if (error) {
6726 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6727 		cachefs_nocache(dcp);
6728 		error = 0;
6729 		goto out;
6730 	}
6731 
6732 	/* try to cache the symlink contents */
6733 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6734 	mutex_enter(&newcp->c_statelock);
6735 
6736 	/*
6737 	 * try to cache the sym link, note that its a noop if NOCACHE
6738 	 * or NFSv4 is set
6739 	 */
6740 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6741 	if (error) {
6742 		cachefs_nocache(newcp);
6743 		error = 0;
6744 	}
6745 	mutex_exit(&newcp->c_statelock);
6746 	rw_exit(&newcp->c_rwlock);
6747 
6748 out:
6749 	if (backvp)
6750 		VN_RELE(backvp);
6751 	if (newcp)
6752 		VN_RELE(CTOV(newcp));
6753 	return (error);
6754 }
6755 
6756 static int
6757 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6758     char *tnm, cred_t *cr)
6759 {
6760 	cnode_t *dcp = VTOC(dvp);
6761 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6762 	int error;
6763 	cnode_t *newcp = NULL;
6764 	struct vattr va;
6765 	timestruc_t current_time;
6766 	off_t commit = 0;
6767 
6768 	if (CFS_ISFS_WRITE_AROUND(fscp))
6769 		return (ETIMEDOUT);
6770 
6771 	mutex_enter(&dcp->c_statelock);
6772 
6773 	/* check permissions */
6774 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6775 		mutex_exit(&dcp->c_statelock);
6776 		goto out;
6777 	}
6778 
6779 	/* the directory front file must be populated */
6780 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6781 		error = ETIMEDOUT;
6782 		mutex_exit(&dcp->c_statelock);
6783 		goto out;
6784 	}
6785 
6786 	/* make sure lnm does not already exist in the directory */
6787 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6788 	if (error == ENOTDIR) {
6789 		error = ETIMEDOUT;
6790 		mutex_exit(&dcp->c_statelock);
6791 		goto out;
6792 	}
6793 	if (error != ENOENT) {
6794 		error = EEXIST;
6795 		mutex_exit(&dcp->c_statelock);
6796 		goto out;
6797 	}
6798 
6799 	/* make up a reasonable set of attributes */
6800 	cachefs_attr_setup(tva, &va, dcp, cr);
6801 	va.va_type = VLNK;
6802 	va.va_mode |= S_IFLNK;
6803 	va.va_size = strlen(tnm);
6804 
6805 	mutex_exit(&dcp->c_statelock);
6806 
6807 	/* create the cnode */
6808 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6809 	if (error)
6810 		goto out;
6811 
6812 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6813 	mutex_enter(&newcp->c_statelock);
6814 
6815 	error = cachefs_dlog_cidmap(fscp);
6816 	if (error) {
6817 		mutex_exit(&newcp->c_statelock);
6818 		rw_exit(&newcp->c_rwlock);
6819 		error = ENOSPC;
6820 		goto out;
6821 	}
6822 
6823 	cachefs_creategid(dcp, newcp, tva, cr);
6824 	mutex_enter(&dcp->c_statelock);
6825 	cachefs_createacl(dcp, newcp);
6826 	mutex_exit(&dcp->c_statelock);
6827 	gethrestime(&current_time);
6828 	newcp->c_metadata.md_vattr.va_atime = current_time;
6829 	newcp->c_metadata.md_localctime = current_time;
6830 	newcp->c_metadata.md_localmtime = current_time;
6831 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6832 	    MD_LOCALCTIME;
6833 	newcp->c_flags |= CN_UPDATED;
6834 
6835 	/* log the operation */
6836 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6837 	if (commit == 0) {
6838 		mutex_exit(&newcp->c_statelock);
6839 		rw_exit(&newcp->c_rwlock);
6840 		error = ENOSPC;
6841 		goto out;
6842 	}
6843 
6844 	/* store the symlink contents */
6845 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6846 	if (error) {
6847 		mutex_exit(&newcp->c_statelock);
6848 		rw_exit(&newcp->c_rwlock);
6849 		goto out;
6850 	}
6851 	if (cachefs_modified_alloc(newcp)) {
6852 		mutex_exit(&newcp->c_statelock);
6853 		rw_exit(&newcp->c_rwlock);
6854 		error = ENOSPC;
6855 		goto out;
6856 	}
6857 
6858 	/*
6859 	 * write the metadata now rather than waiting until
6860 	 * inactive so that if there's no space we can let
6861 	 * the caller know.
6862 	 */
6863 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6864 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6865 			(void) filegrp_allocattr(newcp->c_filegrp);
6866 		}
6867 		error = filegrp_create_metadata(newcp->c_filegrp,
6868 		    &newcp->c_metadata, &newcp->c_id);
6869 		if (error) {
6870 			mutex_exit(&newcp->c_statelock);
6871 			rw_exit(&newcp->c_rwlock);
6872 			goto out;
6873 		}
6874 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6875 	}
6876 	error = filegrp_write_metadata(newcp->c_filegrp,
6877 	    &newcp->c_id, &newcp->c_metadata);
6878 	if (error) {
6879 		mutex_exit(&newcp->c_statelock);
6880 		rw_exit(&newcp->c_rwlock);
6881 		goto out;
6882 	}
6883 	mutex_exit(&newcp->c_statelock);
6884 	rw_exit(&newcp->c_rwlock);
6885 
6886 	mutex_enter(&dcp->c_statelock);
6887 
6888 	/* enter the new file in the directory */
6889 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6890 		error = ETIMEDOUT;
6891 		mutex_exit(&dcp->c_statelock);
6892 		goto out;
6893 	}
6894 	cachefs_modified(dcp);
6895 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6896 	    &newcp->c_id, SM_ASYNC);
6897 	if (error) {
6898 		mutex_exit(&dcp->c_statelock);
6899 		goto out;
6900 	}
6901 
6902 	/* update parent dir times */
6903 	dcp->c_metadata.md_localctime = current_time;
6904 	dcp->c_metadata.md_localmtime = current_time;
6905 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6906 	dcp->c_flags |= CN_UPDATED;
6907 	mutex_exit(&dcp->c_statelock);
6908 
6909 out:
6910 	if (commit) {
6911 		/* commit the log entry */
6912 		if (cachefs_dlog_commit(fscp, commit, error)) {
6913 			/*EMPTY*/
6914 			/* XXX bob: fix on panic */
6915 		}
6916 	}
6917 
6918 	if (error) {
6919 		if (newcp) {
6920 			mutex_enter(&newcp->c_statelock);
6921 			newcp->c_flags |= CN_DESTROY;
6922 			mutex_exit(&newcp->c_statelock);
6923 		}
6924 	}
6925 	if (newcp) {
6926 		VN_RELE(CTOV(newcp));
6927 	}
6928 
6929 	return (error);
6930 }
6931 
6932 /*ARGSUSED*/
6933 static int
6934 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6935     caller_context_t *ct, int flags)
6936 {
6937 	cnode_t *dcp = VTOC(vp);
6938 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6939 	cachefscache_t *cachep = fscp->fs_cache;
6940 	int error = 0;
6941 	int held = 0;
6942 	int connected = 0;
6943 
6944 #ifdef CFSDEBUG
6945 	CFS_DEBUG(CFSDEBUG_VOPS)
6946 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6947 #endif
6948 	if (getzoneid() != GLOBAL_ZONEID) {
6949 		error = EPERM;
6950 		goto out;
6951 	}
6952 
6953 	/*
6954 	 * Cachefs only provides pass-through support for NFSv4,
6955 	 * and all vnode operations are passed through to the
6956 	 * back file system. For NFSv4 pass-through to work, only
6957 	 * connected operation is supported, the cnode backvp must
6958 	 * exist, and cachefs optional (eg., disconnectable) flags
6959 	 * are turned off. Assert these conditions to ensure that
6960 	 * the backfilesystem is called for the readdir operation.
6961 	 */
6962 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6963 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6964 
6965 	for (;;) {
6966 		/* get (or renew) access to the file system */
6967 		if (held) {
6968 			/* Won't loop with NFSv4 connected behavior */
6969 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6970 			rw_exit(&dcp->c_rwlock);
6971 			cachefs_cd_release(fscp);
6972 			held = 0;
6973 		}
6974 		error = cachefs_cd_access(fscp, connected, 0);
6975 		if (error)
6976 			break;
6977 		rw_enter(&dcp->c_rwlock, RW_READER);
6978 		held = 1;
6979 
6980 		/* quit if link count of zero (posix) */
6981 		if (dcp->c_attr.va_nlink == 0) {
6982 			if (eofp)
6983 				*eofp = 1;
6984 			error = 0;
6985 			break;
6986 		}
6987 
6988 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6989 			error = cachefs_readdir_connected(vp, uiop, cr,
6990 			    eofp);
6991 			if (CFS_TIMEOUT(fscp, error)) {
6992 				rw_exit(&dcp->c_rwlock);
6993 				cachefs_cd_release(fscp);
6994 				held = 0;
6995 				cachefs_cd_timedout(fscp);
6996 				connected = 0;
6997 				continue;
6998 			}
6999 		} else {
7000 			error = cachefs_readdir_disconnected(vp, uiop, cr,
7001 			    eofp);
7002 			if (CFS_TIMEOUT(fscp, error)) {
7003 				if (cachefs_cd_access_miss(fscp)) {
7004 					error = cachefs_readdir_connected(vp,
7005 					    uiop, cr, eofp);
7006 					if (!CFS_TIMEOUT(fscp, error))
7007 						break;
7008 					delay(5*hz);
7009 					connected = 0;
7010 					continue;
7011 				}
7012 				connected = 1;
7013 				continue;
7014 			}
7015 		}
7016 		break;
7017 	}
7018 
7019 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7020 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7021 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7022 		    crgetuid(cr), uiop->uio_loffset, *eofp);
7023 
7024 	if (held) {
7025 		rw_exit(&dcp->c_rwlock);
7026 		cachefs_cd_release(fscp);
7027 	}
7028 
7029 #ifdef CFS_CD_DEBUG
7030 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7031 #endif
7032 out:
7033 #ifdef CFSDEBUG
7034 	CFS_DEBUG(CFSDEBUG_VOPS)
7035 		printf("cachefs_readdir: EXIT error = %d\n", error);
7036 #endif
7037 
7038 	return (error);
7039 }
7040 
7041 static int
7042 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7043 {
7044 	cnode_t *dcp = VTOC(vp);
7045 	int error;
7046 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7047 	struct cachefs_req *rp;
7048 
7049 	mutex_enter(&dcp->c_statelock);
7050 
7051 	/* check directory consistency */
7052 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7053 	if (error)
7054 		goto out;
7055 	dcp->c_usage++;
7056 
7057 	/* if dir was modified, toss old contents */
7058 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7059 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7060 		cachefs_inval_object(dcp);
7061 	}
7062 
7063 	error = 0;
7064 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7065 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7066 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7067 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7068 
7069 		if (cachefs_async_okay()) {
7070 
7071 			/*
7072 			 * Set up asynchronous request to fill this
7073 			 * directory.
7074 			 */
7075 
7076 			dcp->c_flags |= CN_ASYNC_POPULATE;
7077 
7078 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7079 			rp->cfs_cmd = CFS_POPULATE;
7080 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7081 			rp->cfs_cr = cr;
7082 
7083 			crhold(cr);
7084 			VN_HOLD(vp);
7085 
7086 			cachefs_addqueue(rp, &fscp->fs_workq);
7087 		} else {
7088 			error = cachefs_dir_fill(dcp, cr);
7089 			if (error != 0)
7090 				cachefs_nocache(dcp);
7091 		}
7092 	}
7093 
7094 	/* if front file is populated */
7095 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7096 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7097 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7098 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7099 		error = cachefs_dir_read(dcp, uiop, eofp);
7100 		if (error == 0)
7101 			fscp->fs_stats.st_hits++;
7102 	}
7103 
7104 	/* if front file could not be used */
7105 	if ((error != 0) ||
7106 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7107 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7108 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7109 
7110 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7111 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
7112 			cachefs_nocache(dcp);
7113 
7114 		/* get the back vp */
7115 		if (dcp->c_backvp == NULL) {
7116 			error = cachefs_getbackvp(fscp, dcp);
7117 			if (error)
7118 				goto out;
7119 		}
7120 
7121 		if (fscp->fs_inum_size > 0) {
7122 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7123 		} else {
7124 			/* do the dir read from the back fs */
7125 			(void) VOP_RWLOCK(dcp->c_backvp,
7126 			    V_WRITELOCK_FALSE, NULL);
7127 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7128 			    ("cachefs_readdir (nfsv4): "
7129 			    "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7130 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7131 			    NULL, 0);
7132 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7133 		}
7134 
7135 		if (error == 0)
7136 			fscp->fs_stats.st_misses++;
7137 	}
7138 
7139 out:
7140 	mutex_exit(&dcp->c_statelock);
7141 
7142 	return (error);
7143 }
7144 
7145 static int
7146 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7147 {
7148 	int error = 0;
7149 	fscache_t *fscp = C_TO_FSCACHE(cp);
7150 	caddr_t buffy = NULL;
7151 	int buffysize = MAXBSIZE;
7152 	caddr_t chrp, end;
7153 	ino64_t newinum;
7154 	struct dirent64 *de;
7155 	uio_t uioin;
7156 	iovec_t iov;
7157 
7158 	ASSERT(cp->c_backvp != NULL);
7159 	ASSERT(fscp->fs_inum_size > 0);
7160 
7161 	if (uiop->uio_resid < buffysize)
7162 		buffysize = (int)uiop->uio_resid;
7163 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7164 
7165 	iov.iov_base = buffy;
7166 	iov.iov_len = buffysize;
7167 	uioin.uio_iov = &iov;
7168 	uioin.uio_iovcnt = 1;
7169 	uioin.uio_segflg = UIO_SYSSPACE;
7170 	uioin.uio_fmode = 0;
7171 	uioin.uio_extflg = UIO_COPY_CACHED;
7172 	uioin.uio_loffset = uiop->uio_loffset;
7173 	uioin.uio_resid = buffysize;
7174 
7175 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7176 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7177 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7178 
7179 	if (error != 0)
7180 		goto out;
7181 
7182 	end = buffy + buffysize - uioin.uio_resid;
7183 
7184 	mutex_exit(&cp->c_statelock);
7185 	mutex_enter(&fscp->fs_fslock);
7186 
7187 
7188 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7189 		de = (dirent64_t *)chrp;
7190 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7191 		if (newinum == 0)
7192 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7193 		de->d_ino = newinum;
7194 	}
7195 	mutex_exit(&fscp->fs_fslock);
7196 	mutex_enter(&cp->c_statelock);
7197 
7198 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7199 	uiop->uio_loffset = uioin.uio_loffset;
7200 
7201 out:
7202 
7203 	if (buffy != NULL)
7204 		cachefs_kmem_free(buffy, buffysize);
7205 
7206 	return (error);
7207 }
7208 
7209 static int
7210 /*ARGSUSED*/
7211 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7212     int *eofp)
7213 {
7214 	cnode_t *dcp = VTOC(vp);
7215 	int error;
7216 
7217 	mutex_enter(&dcp->c_statelock);
7218 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7219 		error = ETIMEDOUT;
7220 	} else {
7221 		error = cachefs_dir_read(dcp, uiop, eofp);
7222 		if (error == ENOTDIR)
7223 			error = ETIMEDOUT;
7224 	}
7225 	mutex_exit(&dcp->c_statelock);
7226 
7227 	return (error);
7228 }
7229 
7230 /*ARGSUSED*/
7231 static int
7232 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7233 {
7234 	int error = 0;
7235 	struct cnode *cp = VTOC(vp);
7236 	fscache_t *fscp = C_TO_FSCACHE(cp);
7237 
7238 	/*
7239 	 * Cachefs only provides pass-through support for NFSv4,
7240 	 * and all vnode operations are passed through to the
7241 	 * back file system. For NFSv4 pass-through to work, only
7242 	 * connected operation is supported, the cnode backvp must
7243 	 * exist, and cachefs optional (eg., disconnectable) flags
7244 	 * are turned off. Assert these conditions, then bail
7245 	 * as  NFSv4 doesn't support VOP_FID.
7246 	 */
7247 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7248 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7249 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7250 		return (ENOTSUP);
7251 	}
7252 
7253 	mutex_enter(&cp->c_statelock);
7254 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7255 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7256 		error = ENOSPC;
7257 	} else {
7258 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7259 		    cp->c_metadata.md_cookie.fid_len);
7260 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7261 	}
7262 	mutex_exit(&cp->c_statelock);
7263 	return (error);
7264 }
7265 
7266 /* ARGSUSED2 */
7267 static int
7268 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7269 {
7270 	cnode_t *cp = VTOC(vp);
7271 
7272 	/*
7273 	 * XXX - This is ifdef'ed out for now. The problem -
7274 	 * getdents() acquires the read version of rwlock, then we come
7275 	 * into cachefs_readdir() and that wants to acquire the write version
7276 	 * of this lock (if its going to populate the directory). This is
7277 	 * a problem, this can be solved by introducing another lock in the
7278 	 * cnode.
7279 	 */
7280 /* XXX */
7281 	if (vp->v_type != VREG)
7282 		return (-1);
7283 	if (write_lock)
7284 		rw_enter(&cp->c_rwlock, RW_WRITER);
7285 	else
7286 		rw_enter(&cp->c_rwlock, RW_READER);
7287 	return (write_lock);
7288 }
7289 
7290 /* ARGSUSED */
7291 static void
7292 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7293 {
7294 	cnode_t *cp = VTOC(vp);
7295 	if (vp->v_type != VREG)
7296 		return;
7297 	rw_exit(&cp->c_rwlock);
7298 }
7299 
7300 /* ARGSUSED */
7301 static int
7302 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7303     caller_context_t *ct)
7304 {
7305 	return (0);
7306 }
7307 
7308 static int cachefs_lostpage = 0;
7309 /*
7310  * Return all the pages from [off..off+len] in file
7311  */
7312 /*ARGSUSED*/
7313 static int
7314 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7315 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7316 	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7317 {
7318 	cnode_t *cp = VTOC(vp);
7319 	int error;
7320 	fscache_t *fscp = C_TO_FSCACHE(cp);
7321 	cachefscache_t *cachep = fscp->fs_cache;
7322 	int held = 0;
7323 	int connected = 0;
7324 
7325 #ifdef CFSDEBUG
7326 	u_offset_t offx = (u_offset_t)off;
7327 
7328 	CFS_DEBUG(CFSDEBUG_VOPS)
7329 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7330 		    (void *)vp, offx, len, rw);
7331 #endif
7332 	if (getzoneid() != GLOBAL_ZONEID) {
7333 		error = EPERM;
7334 		goto out;
7335 	}
7336 
7337 	if (vp->v_flag & VNOMAP) {
7338 		error = ENOSYS;
7339 		goto out;
7340 	}
7341 
7342 	/* Call backfilesystem if NFSv4 */
7343 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7344 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7345 		    plsz, seg, addr, rw, cr);
7346 		goto out;
7347 	}
7348 
7349 	/* XXX sam: make this do an async populate? */
7350 	if (pl == NULL) {
7351 		error = 0;
7352 		goto out;
7353 	}
7354 	if (protp != NULL)
7355 		*protp = PROT_ALL;
7356 
7357 	for (;;) {
7358 		/* get (or renew) access to the file system */
7359 		if (held) {
7360 			cachefs_cd_release(fscp);
7361 			held = 0;
7362 		}
7363 		error = cachefs_cd_access(fscp, connected, 0);
7364 		if (error)
7365 			break;
7366 		held = 1;
7367 
7368 		/*
7369 		 * If we are getting called as a side effect of a
7370 		 * cachefs_write()
7371 		 * operation the local file size might not be extended yet.
7372 		 * In this case we want to be able to return pages of zeroes.
7373 		 */
7374 		if ((u_offset_t)off + len >
7375 		    ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7376 			if (seg != segkmap) {
7377 				error = EFAULT;
7378 				break;
7379 			}
7380 		}
7381 		if (len <= PAGESIZE)
7382 			error = cachefs_getapage(vp, (u_offset_t)off, len,
7383 			    protp, pl, plsz, seg, addr, rw, cr);
7384 		else
7385 			error = pvn_getpages(cachefs_getapage, vp,
7386 			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7387 			    rw, cr);
7388 		if (error == 0)
7389 			break;
7390 
7391 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7392 		    error == EAGAIN) {
7393 			connected = 0;
7394 			continue;
7395 		}
7396 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7397 			if (CFS_TIMEOUT(fscp, error)) {
7398 				cachefs_cd_release(fscp);
7399 				held = 0;
7400 				cachefs_cd_timedout(fscp);
7401 				connected = 0;
7402 				continue;
7403 			}
7404 		} else {
7405 			if (CFS_TIMEOUT(fscp, error)) {
7406 				if (cachefs_cd_access_miss(fscp)) {
7407 					if (len <= PAGESIZE)
7408 						error = cachefs_getapage_back(
7409 						    vp, (u_offset_t)off,
7410 						    len, protp, pl,
7411 						    plsz, seg, addr, rw, cr);
7412 					else
7413 						error = pvn_getpages(
7414 						    cachefs_getapage_back, vp,
7415 						    (u_offset_t)off, len,
7416 						    protp, pl,
7417 						    plsz, seg, addr, rw, cr);
7418 					if (!CFS_TIMEOUT(fscp, error) &&
7419 					    (error != EAGAIN))
7420 						break;
7421 					delay(5*hz);
7422 					connected = 0;
7423 					continue;
7424 				}
7425 				connected = 1;
7426 				continue;
7427 			}
7428 		}
7429 		break;
7430 	}
7431 
7432 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7433 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7434 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7435 		    crgetuid(cr), off, len);
7436 
7437 	if (held) {
7438 		cachefs_cd_release(fscp);
7439 	}
7440 
7441 out:
7442 #ifdef CFS_CD_DEBUG
7443 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7444 #endif
7445 #ifdef CFSDEBUG
7446 	CFS_DEBUG(CFSDEBUG_VOPS)
7447 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7448 		    (void *)vp, error);
7449 #endif
7450 	return (error);
7451 }
7452 
7453 /*
7454  * cachefs_getpage_backfs_nfsv4
7455  *
7456  * Call NFSv4 back filesystem to handle the getpage (cachefs
7457  * pass-through support for NFSv4).
7458  */
7459 static int
7460 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7461 			uint_t *protp, struct page *pl[], size_t plsz,
7462 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7463 			cred_t *cr)
7464 {
7465 	cnode_t *cp = VTOC(vp);
7466 	fscache_t *fscp = C_TO_FSCACHE(cp);
7467 	vnode_t *backvp;
7468 	int error;
7469 
7470 	/*
7471 	 * For NFSv4 pass-through to work, only connected operation is
7472 	 * supported, the cnode backvp must exist, and cachefs optional
7473 	 * (eg., disconnectable) flags are turned off. Assert these
7474 	 * conditions for the getpage operation.
7475 	 */
7476 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7477 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7478 
7479 	/* Call backfs vnode op after extracting backvp */
7480 	mutex_enter(&cp->c_statelock);
7481 	backvp = cp->c_backvp;
7482 	mutex_exit(&cp->c_statelock);
7483 
7484 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7485 	    ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7486 	    cp, backvp));
7487 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7488 	    addr, rw, cr, NULL);
7489 
7490 	return (error);
7491 }
7492 
7493 /*
7494  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7495  */
7496 /*ARGSUSED*/
7497 static int
7498 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7499 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7500 	enum seg_rw rw, cred_t *cr)
7501 {
7502 	cnode_t *cp = VTOC(vp);
7503 	page_t **ppp, *pp = NULL;
7504 	fscache_t *fscp = C_TO_FSCACHE(cp);
7505 	cachefscache_t *cachep = fscp->fs_cache;
7506 	int error = 0;
7507 	struct page **ourpl;
7508 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7509 	int index = 0;
7510 	int downgrade;
7511 	int have_statelock = 0;
7512 	u_offset_t popoff;
7513 	size_t popsize = 0;
7514 
7515 	/*LINTED*/
7516 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7517 
7518 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7519 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7520 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7521 	else
7522 		ourpl = ourstackpl;
7523 
7524 	ourpl[0] = NULL;
7525 	off = off & (offset_t)PAGEMASK;
7526 again:
7527 	/*
7528 	 * Look for the page
7529 	 */
7530 	if (page_exists(vp, off) == 0) {
7531 		/*
7532 		 * Need to do work to get the page.
7533 		 * Grab our lock because we are going to
7534 		 * modify the state of the cnode.
7535 		 */
7536 		if (! have_statelock) {
7537 			mutex_enter(&cp->c_statelock);
7538 			have_statelock = 1;
7539 		}
7540 		/*
7541 		 * If we're in NOCACHE mode, we will need a backvp
7542 		 */
7543 		if (cp->c_flags & CN_NOCACHE) {
7544 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7545 				error = ETIMEDOUT;
7546 				goto out;
7547 			}
7548 			if (cp->c_backvp == NULL) {
7549 				error = cachefs_getbackvp(fscp, cp);
7550 				if (error)
7551 					goto out;
7552 			}
7553 			error = VOP_GETPAGE(cp->c_backvp, off,
7554 			    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7555 			    addr, S_READ, cr, NULL);
7556 			/*
7557 			 * backfs returns EFAULT when we are trying for a
7558 			 * page beyond EOF but cachefs has the knowledge that
7559 			 * it is not beyond EOF be cause cp->c_size is
7560 			 * greater then the offset requested.
7561 			 */
7562 			if (error == EFAULT) {
7563 				error = 0;
7564 				pp = page_create_va(vp, off, PAGESIZE,
7565 				    PG_EXCL | PG_WAIT, seg, addr);
7566 				if (pp == NULL)
7567 					goto again;
7568 				pagezero(pp, 0, PAGESIZE);
7569 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7570 				goto out;
7571 			}
7572 			if (error)
7573 				goto out;
7574 			goto getpages;
7575 		}
7576 		/*
7577 		 * We need a front file. If we can't get it,
7578 		 * put the cnode in NOCACHE mode and try again.
7579 		 */
7580 		if (cp->c_frontvp == NULL) {
7581 			error = cachefs_getfrontfile(cp);
7582 			if (error) {
7583 				cachefs_nocache(cp);
7584 				error = EAGAIN;
7585 				goto out;
7586 			}
7587 		}
7588 		/*
7589 		 * Check if the front file needs population.
7590 		 * If population is necessary, make sure we have a
7591 		 * backvp as well. We will get the page from the backvp.
7592 		 * bug 4152459-
7593 		 * But if the file system is in disconnected mode
7594 		 * and the file is a local file then do not check the
7595 		 * allocmap.
7596 		 */
7597 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7598 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7599 		    (cachefs_check_allocmap(cp, off) == 0)) {
7600 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7601 				error = ETIMEDOUT;
7602 				goto out;
7603 			}
7604 			if (cp->c_backvp == NULL) {
7605 				error = cachefs_getbackvp(fscp, cp);
7606 				if (error)
7607 					goto out;
7608 			}
7609 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7610 				cachefs_cluster_allocmap(off, &popoff,
7611 				    &popsize,
7612 				    fscp->fs_info.fi_popsize, cp);
7613 				if (popsize != 0) {
7614 					error = cachefs_populate(cp,
7615 					    popoff, popsize,
7616 					    cp->c_frontvp, cp->c_backvp,
7617 					    cp->c_size, cr);
7618 					if (error) {
7619 						cachefs_nocache(cp);
7620 						error = EAGAIN;
7621 						goto out;
7622 					} else {
7623 						cp->c_flags |=
7624 						    CN_UPDATED |
7625 						    CN_NEED_FRONT_SYNC |
7626 						    CN_POPULATION_PENDING;
7627 					}
7628 					popsize = popsize - (off - popoff);
7629 				} else {
7630 					popsize = PAGESIZE;
7631 				}
7632 			}
7633 			/* else XXX assert CN_NOCACHE? */
7634 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7635 			    PAGESIZE, protp, ourpl, popsize,
7636 			    seg, addr, S_READ, cr, NULL);
7637 			if (error)
7638 				goto out;
7639 			fscp->fs_stats.st_misses++;
7640 		} else {
7641 			if (cp->c_flags & CN_POPULATION_PENDING) {
7642 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7643 				    NULL);
7644 				cp->c_flags &= ~CN_POPULATION_PENDING;
7645 				if (error) {
7646 					cachefs_nocache(cp);
7647 					error = EAGAIN;
7648 					goto out;
7649 				}
7650 			}
7651 			/*
7652 			 * File was populated so we get the page from the
7653 			 * frontvp
7654 			 */
7655 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7656 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7657 			    rw, cr, NULL);
7658 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7659 				cachefs_log_gpfront(cachep, error,
7660 				    fscp->fs_cfsvfsp,
7661 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7662 				    crgetuid(cr), off, PAGESIZE);
7663 			if (error) {
7664 				cachefs_nocache(cp);
7665 				error = EAGAIN;
7666 				goto out;
7667 			}
7668 			fscp->fs_stats.st_hits++;
7669 		}
7670 getpages:
7671 		ASSERT(have_statelock);
7672 		if (have_statelock) {
7673 			mutex_exit(&cp->c_statelock);
7674 			have_statelock = 0;
7675 		}
7676 		downgrade = 0;
7677 		for (ppp = ourpl; *ppp; ppp++) {
7678 			if ((*ppp)->p_offset < off) {
7679 				index++;
7680 				page_unlock(*ppp);
7681 				continue;
7682 			}
7683 			if (PAGE_SHARED(*ppp)) {
7684 				if (page_tryupgrade(*ppp) == 0) {
7685 					for (ppp = &ourpl[index]; *ppp; ppp++)
7686 						page_unlock(*ppp);
7687 					error = EAGAIN;
7688 					goto out;
7689 				}
7690 				downgrade = 1;
7691 			}
7692 			ASSERT(PAGE_EXCL(*ppp));
7693 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7694 			page_rename(*ppp, vp, (*ppp)->p_offset);
7695 		}
7696 		pl[0] = ourpl[index];
7697 		pl[1] = NULL;
7698 		if (downgrade) {
7699 			page_downgrade(ourpl[index]);
7700 		}
7701 		/* Unlock the rest of the pages from the cluster */
7702 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7703 			page_unlock(*ppp);
7704 	} else {
7705 		ASSERT(! have_statelock);
7706 		if (have_statelock) {
7707 			mutex_exit(&cp->c_statelock);
7708 			have_statelock = 0;
7709 		}
7710 		/* XXX SE_SHARED probably isn't what we *always* want */
7711 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7712 			cachefs_lostpage++;
7713 			goto again;
7714 		}
7715 		pl[0] = pp;
7716 		pl[1] = NULL;
7717 		/* XXX increment st_hits?  i don't think so, but... */
7718 	}
7719 
7720 out:
7721 	if (have_statelock) {
7722 		mutex_exit(&cp->c_statelock);
7723 		have_statelock = 0;
7724 	}
7725 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7726 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7727 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7728 	return (error);
7729 }
7730 
7731 /* gets a page but only from the back fs */
7732 /*ARGSUSED*/
7733 static int
7734 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7735     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7736     caddr_t addr, enum seg_rw rw, cred_t *cr)
7737 {
7738 	cnode_t *cp = VTOC(vp);
7739 	page_t **ppp, *pp = NULL;
7740 	fscache_t *fscp = C_TO_FSCACHE(cp);
7741 	int error = 0;
7742 	struct page *ourpl[17];
7743 	int index = 0;
7744 	int have_statelock = 0;
7745 	int downgrade;
7746 
7747 	/*
7748 	 * Grab the cnode statelock so the cnode state won't change
7749 	 * while we're in here.
7750 	 */
7751 	ourpl[0] = NULL;
7752 	off = off & (offset_t)PAGEMASK;
7753 again:
7754 	if (page_exists(vp, off) == 0) {
7755 		if (! have_statelock) {
7756 			mutex_enter(&cp->c_statelock);
7757 			have_statelock = 1;
7758 		}
7759 
7760 		if (cp->c_backvp == NULL) {
7761 			error = cachefs_getbackvp(fscp, cp);
7762 			if (error)
7763 				goto out;
7764 		}
7765 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7766 		    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7767 		    addr, S_READ, cr, NULL);
7768 		if (error)
7769 			goto out;
7770 
7771 		if (have_statelock) {
7772 			mutex_exit(&cp->c_statelock);
7773 			have_statelock = 0;
7774 		}
7775 		downgrade = 0;
7776 		for (ppp = ourpl; *ppp; ppp++) {
7777 			if ((*ppp)->p_offset < off) {
7778 				index++;
7779 				page_unlock(*ppp);
7780 				continue;
7781 			}
7782 			if (PAGE_SHARED(*ppp)) {
7783 				if (page_tryupgrade(*ppp) == 0) {
7784 					for (ppp = &ourpl[index]; *ppp; ppp++)
7785 						page_unlock(*ppp);
7786 					error = EAGAIN;
7787 					goto out;
7788 				}
7789 				downgrade = 1;
7790 			}
7791 			ASSERT(PAGE_EXCL(*ppp));
7792 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7793 			page_rename(*ppp, vp, (*ppp)->p_offset);
7794 		}
7795 		pl[0] = ourpl[index];
7796 		pl[1] = NULL;
7797 		if (downgrade) {
7798 			page_downgrade(ourpl[index]);
7799 		}
7800 		/* Unlock the rest of the pages from the cluster */
7801 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7802 			page_unlock(*ppp);
7803 	} else {
7804 		ASSERT(! have_statelock);
7805 		if (have_statelock) {
7806 			mutex_exit(&cp->c_statelock);
7807 			have_statelock = 0;
7808 		}
7809 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7810 			cachefs_lostpage++;
7811 			goto again;
7812 		}
7813 		pl[0] = pp;
7814 		pl[1] = NULL;
7815 	}
7816 
7817 out:
7818 	if (have_statelock) {
7819 		mutex_exit(&cp->c_statelock);
7820 		have_statelock = 0;
7821 	}
7822 	return (error);
7823 }
7824 
7825 /*ARGSUSED*/
7826 static int
7827 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7828     caller_context_t *ct)
7829 {
7830 	cnode_t *cp = VTOC(vp);
7831 	int error = 0;
7832 	fscache_t *fscp = C_TO_FSCACHE(cp);
7833 	int held = 0;
7834 	int connected = 0;
7835 
7836 	if (getzoneid() != GLOBAL_ZONEID)
7837 		return (EPERM);
7838 
7839 	/* Call backfilesytem if NFSv4 */
7840 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7841 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7842 		goto out;
7843 	}
7844 
7845 	for (;;) {
7846 		/* get (or renew) access to the file system */
7847 		if (held) {
7848 			cachefs_cd_release(fscp);
7849 			held = 0;
7850 		}
7851 		error = cachefs_cd_access(fscp, connected, 1);
7852 		if (error)
7853 			break;
7854 		held = 1;
7855 
7856 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7857 		if (error == 0)
7858 			break;
7859 
7860 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7861 			if (CFS_TIMEOUT(fscp, error)) {
7862 				cachefs_cd_release(fscp);
7863 				held = 0;
7864 				cachefs_cd_timedout(fscp);
7865 				connected = 0;
7866 				continue;
7867 			}
7868 		} else {
7869 			if (NOMEMWAIT()) {
7870 				error = 0;
7871 				goto out;
7872 			}
7873 			if (CFS_TIMEOUT(fscp, error)) {
7874 				connected = 1;
7875 				continue;
7876 			}
7877 		}
7878 		break;
7879 	}
7880 
7881 out:
7882 
7883 	if (held) {
7884 		cachefs_cd_release(fscp);
7885 	}
7886 
7887 #ifdef CFS_CD_DEBUG
7888 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7889 #endif
7890 	return (error);
7891 }
7892 
7893 /*
7894  * cachefs_putpage_backfs_nfsv4
7895  *
7896  * Call NFSv4 back filesystem to handle the putpage (cachefs
7897  * pass-through support for NFSv4).
7898  */
7899 static int
7900 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7901 			cred_t *cr)
7902 {
7903 	cnode_t *cp = VTOC(vp);
7904 	fscache_t *fscp = C_TO_FSCACHE(cp);
7905 	vnode_t *backvp;
7906 	int error;
7907 
7908 	/*
7909 	 * For NFSv4 pass-through to work, only connected operation is
7910 	 * supported, the cnode backvp must exist, and cachefs optional
7911 	 * (eg., disconnectable) flags are turned off. Assert these
7912 	 * conditions for the putpage operation.
7913 	 */
7914 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7915 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7916 
7917 	/* Call backfs vnode op after extracting backvp */
7918 	mutex_enter(&cp->c_statelock);
7919 	backvp = cp->c_backvp;
7920 	mutex_exit(&cp->c_statelock);
7921 
7922 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7923 	    ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7924 	    cp, backvp));
7925 	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7926 
7927 	return (error);
7928 }
7929 
7930 /*
7931  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7932  * If len == 0, do from off to EOF.
7933  *
7934  * The normal cases should be len == 0 & off == 0 (entire vp list),
7935  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7936  * (from pageout).
7937  */
7938 
7939 /*ARGSUSED*/
7940 int
7941 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7942     int flags, cred_t *cr)
7943 {
7944 	struct cnode *cp  = VTOC(vp);
7945 	struct page *pp;
7946 	size_t io_len;
7947 	u_offset_t eoff, io_off;
7948 	int error = 0;
7949 	fscache_t *fscp = C_TO_FSCACHE(cp);
7950 	cachefscache_t *cachep = fscp->fs_cache;
7951 
7952 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7953 		return (0);
7954 	}
7955 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7956 	    (flags & B_INVAL) == 0))
7957 		return (0);
7958 
7959 	/*
7960 	 * Should never have cached data for the cachefs vnode
7961 	 * if NFSv4 is in use.
7962 	 */
7963 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7964 
7965 	/*
7966 	 * If this is an async putpage let a thread handle it.
7967 	 */
7968 	if (flags & B_ASYNC) {
7969 		struct cachefs_req *rp;
7970 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7971 
7972 		if (ttoproc(curthread) == proc_pageout) {
7973 			/*
7974 			 * If this is the page daemon we
7975 			 * do the push synchronously (Dangerous!) and hope
7976 			 * we can free enough to keep running...
7977 			 */
7978 			flags &= ~B_ASYNC;
7979 			goto again;
7980 		}
7981 
7982 		if (! cachefs_async_okay()) {
7983 
7984 			/*
7985 			 * this is somewhat like NFS's behavior.  keep
7986 			 * the system from thrashing.  we've seen
7987 			 * cases where async queues get out of
7988 			 * control, especially if
7989 			 * madvise(MADV_SEQUENTIAL) is done on a large
7990 			 * mmap()ed file that is read sequentially.
7991 			 */
7992 
7993 			flags &= ~B_ASYNC;
7994 			goto again;
7995 		}
7996 
7997 		/*
7998 		 * if no flags other than B_ASYNC were set,
7999 		 * we coalesce putpage requests into a single one for the
8000 		 * whole file (len = off = 0).  If such a request is
8001 		 * already queued, we're done.
8002 		 *
8003 		 * If there are other flags set (e.g., B_INVAL), we don't
8004 		 * attempt to coalesce and we use the specified length and
8005 		 * offset.
8006 		 */
8007 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8008 		mutex_enter(&cp->c_iomutex);
8009 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8010 			rp->cfs_cmd = CFS_PUTPAGE;
8011 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8012 			if (tflags == 0) {
8013 				off = len = 0;
8014 				cp->c_ioflags |= CIO_PUTPAGES;
8015 			}
8016 			rp->cfs_req_u.cu_putpage.cp_off = off;
8017 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8018 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8019 			rp->cfs_cr = cr;
8020 			crhold(rp->cfs_cr);
8021 			VN_HOLD(vp);
8022 			cp->c_nio++;
8023 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8024 		} else {
8025 			kmem_cache_free(cachefs_req_cache, rp);
8026 		}
8027 
8028 		mutex_exit(&cp->c_iomutex);
8029 		return (0);
8030 	}
8031 
8032 
8033 again:
8034 	if (len == 0) {
8035 		/*
8036 		 * Search the entire vp list for pages >= off
8037 		 */
8038 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8039 	} else {
8040 		/*
8041 		 * Do a range from [off...off + len] looking for pages
8042 		 * to deal with.
8043 		 */
8044 		eoff = (u_offset_t)off + len;
8045 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8046 		    io_off += io_len) {
8047 			/*
8048 			 * If we are not invalidating, synchronously
8049 			 * freeing or writing pages use the routine
8050 			 * page_lookup_nowait() to prevent reclaiming
8051 			 * them from the free list.
8052 			 */
8053 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8054 				pp = page_lookup(vp, io_off,
8055 				    (flags & (B_INVAL | B_FREE)) ?
8056 				    SE_EXCL : SE_SHARED);
8057 			} else {
8058 				/* XXX this looks like dead code */
8059 				pp = page_lookup_nowait(vp, io_off,
8060 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8061 			}
8062 
8063 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8064 				io_len = PAGESIZE;
8065 			else {
8066 				error = cachefs_push(vp, pp, &io_off,
8067 				    &io_len, flags, cr);
8068 				if (error != 0)
8069 					break;
8070 				/*
8071 				 * "io_off" and "io_len" are returned as
8072 				 * the range of pages we actually wrote.
8073 				 * This allows us to skip ahead more quickly
8074 				 * since several pages may've been dealt
8075 				 * with by this iteration of the loop.
8076 				 */
8077 			}
8078 		}
8079 	}
8080 
8081 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8082 		cp->c_flags &= ~CDIRTY;
8083 	}
8084 
8085 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8086 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8087 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8088 		    crgetuid(cr), off, len);
8089 
8090 	return (error);
8091 
8092 }
8093 
8094 /*ARGSUSED*/
8095 static int
8096 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8097     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8098     caller_context_t *ct)
8099 {
8100 	cnode_t *cp = VTOC(vp);
8101 	fscache_t *fscp = C_TO_FSCACHE(cp);
8102 	struct segvn_crargs vn_a;
8103 	int error;
8104 	int held = 0;
8105 	int writing;
8106 	int connected = 0;
8107 
8108 #ifdef CFSDEBUG
8109 	u_offset_t offx = (u_offset_t)off;
8110 
8111 	CFS_DEBUG(CFSDEBUG_VOPS)
8112 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8113 		    (void *)vp, offx, len, flags);
8114 #endif
8115 	if (getzoneid() != GLOBAL_ZONEID) {
8116 		error = EPERM;
8117 		goto out;
8118 	}
8119 
8120 	if (vp->v_flag & VNOMAP) {
8121 		error = ENOSYS;
8122 		goto out;
8123 	}
8124 	if (off < 0 || (offset_t)(off + len) < 0) {
8125 		error = ENXIO;
8126 		goto out;
8127 	}
8128 	if (vp->v_type != VREG) {
8129 		error = ENODEV;
8130 		goto out;
8131 	}
8132 
8133 	/*
8134 	 * Check to see if the vnode is currently marked as not cachable.
8135 	 * If so, we have to refuse the map request as this violates the
8136 	 * don't cache attribute.
8137 	 */
8138 	if (vp->v_flag & VNOCACHE)
8139 		return (EAGAIN);
8140 
8141 #ifdef OBSOLETE
8142 	/*
8143 	 * If file is being locked, disallow mapping.
8144 	 */
8145 	if (vn_has_flocks(vp)) {
8146 		error = EAGAIN;
8147 		goto out;
8148 	}
8149 #endif
8150 
8151 	/* call backfilesystem if NFSv4 */
8152 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8153 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8154 		    maxprot, flags, cr);
8155 		goto out;
8156 	}
8157 
8158 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8159 
8160 	for (;;) {
8161 		/* get (or renew) access to the file system */
8162 		if (held) {
8163 			cachefs_cd_release(fscp);
8164 			held = 0;
8165 		}
8166 		error = cachefs_cd_access(fscp, connected, writing);
8167 		if (error)
8168 			break;
8169 		held = 1;
8170 
8171 		if (writing) {
8172 			mutex_enter(&cp->c_statelock);
8173 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8174 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8175 					connected = 1;
8176 					continue;
8177 				} else {
8178 					cachefs_nocache(cp);
8179 				}
8180 			}
8181 
8182 			/*
8183 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8184 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8185 			 * not need to try to push out any pages.
8186 			 * This bit gets cleared when the cnode goes inactive.
8187 			 */
8188 			cp->c_flags |= CN_MAPWRITE;
8189 
8190 			mutex_exit(&cp->c_statelock);
8191 		}
8192 		break;
8193 	}
8194 
8195 	if (held) {
8196 		cachefs_cd_release(fscp);
8197 	}
8198 
8199 	as_rangelock(as);
8200 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8201 	if (error != 0) {
8202 		as_rangeunlock(as);
8203 		goto out;
8204 	}
8205 
8206 	/*
8207 	 * package up all the data passed in into a segvn_args struct and
8208 	 * call as_map with segvn_create function to create a new segment
8209 	 * in the address space.
8210 	 */
8211 	vn_a.vp = vp;
8212 	vn_a.offset = off;
8213 	vn_a.type = flags & MAP_TYPE;
8214 	vn_a.prot = (uchar_t)prot;
8215 	vn_a.maxprot = (uchar_t)maxprot;
8216 	vn_a.cred = cr;
8217 	vn_a.amp = NULL;
8218 	vn_a.flags = flags & ~MAP_TYPE;
8219 	vn_a.szc = 0;
8220 	vn_a.lgrp_mem_policy_flags = 0;
8221 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8222 	as_rangeunlock(as);
8223 out:
8224 
8225 #ifdef CFS_CD_DEBUG
8226 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8227 #endif
8228 #ifdef CFSDEBUG
8229 	CFS_DEBUG(CFSDEBUG_VOPS)
8230 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8231 #endif
8232 	return (error);
8233 }
8234 
8235 /*
8236  * cachefs_map_backfs_nfsv4
8237  *
8238  * Call NFSv4 back filesystem to handle the map (cachefs
8239  * pass-through support for NFSv4).
8240  */
8241 static int
8242 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8243 			caddr_t *addrp, size_t len, uchar_t prot,
8244 			uchar_t maxprot, uint_t flags, cred_t *cr)
8245 {
8246 	cnode_t *cp = VTOC(vp);
8247 	fscache_t *fscp = C_TO_FSCACHE(cp);
8248 	vnode_t *backvp;
8249 	int error;
8250 
8251 	/*
8252 	 * For NFSv4 pass-through to work, only connected operation is
8253 	 * supported, the cnode backvp must exist, and cachefs optional
8254 	 * (eg., disconnectable) flags are turned off. Assert these
8255 	 * conditions for the map operation.
8256 	 */
8257 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8258 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8259 
8260 	/* Call backfs vnode op after extracting backvp */
8261 	mutex_enter(&cp->c_statelock);
8262 	backvp = cp->c_backvp;
8263 	mutex_exit(&cp->c_statelock);
8264 
8265 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8266 	    ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8267 	    cp, backvp));
8268 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8269 	    NULL);
8270 
8271 	return (error);
8272 }
8273 
8274 /*ARGSUSED*/
8275 static int
8276 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8277     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8278     cred_t *cr, caller_context_t *ct)
8279 {
8280 	cnode_t *cp = VTOC(vp);
8281 	fscache_t *fscp = C_TO_FSCACHE(cp);
8282 
8283 	if (getzoneid() != GLOBAL_ZONEID)
8284 		return (EPERM);
8285 
8286 	if (vp->v_flag & VNOMAP)
8287 		return (ENOSYS);
8288 
8289 	/*
8290 	 * Check this is not an NFSv4 filesystem, as the mapping
8291 	 * is not done on the cachefs filesystem if NFSv4 is in
8292 	 * use.
8293 	 */
8294 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8295 
8296 	mutex_enter(&cp->c_statelock);
8297 	cp->c_mapcnt += btopr(len);
8298 	mutex_exit(&cp->c_statelock);
8299 	return (0);
8300 }
8301 
8302 /*ARGSUSED*/
8303 static int
8304 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8305 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8306 	cred_t *cr, caller_context_t *ct)
8307 {
8308 	cnode_t *cp = VTOC(vp);
8309 	fscache_t *fscp = C_TO_FSCACHE(cp);
8310 	int error;
8311 	int connected = 0;
8312 	int held = 0;
8313 
8314 	/*
8315 	 * The file may be passed in to (or inherited into) the zone, so we
8316 	 * need to let this operation go through since it happens as part of
8317 	 * exiting.
8318 	 */
8319 	if (vp->v_flag & VNOMAP)
8320 		return (ENOSYS);
8321 
8322 	/*
8323 	 * Check this is not an NFSv4 filesystem, as the mapping
8324 	 * is not done on the cachefs filesystem if NFSv4 is in
8325 	 * use.
8326 	 */
8327 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8328 
8329 	mutex_enter(&cp->c_statelock);
8330 	cp->c_mapcnt -= btopr(len);
8331 	ASSERT(cp->c_mapcnt >= 0);
8332 	mutex_exit(&cp->c_statelock);
8333 
8334 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8335 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8336 		return (0);
8337 
8338 	for (;;) {
8339 		/* get (or renew) access to the file system */
8340 		if (held) {
8341 			cachefs_cd_release(fscp);
8342 			held = 0;
8343 		}
8344 		error = cachefs_cd_access(fscp, connected, 1);
8345 		if (error)
8346 			break;
8347 		held = 1;
8348 		connected = 0;
8349 
8350 		error = cachefs_putpage_common(vp, (offset_t)0,
8351 		    (uint_t)0, 0, cr);
8352 		if (CFS_TIMEOUT(fscp, error)) {
8353 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8354 				cachefs_cd_release(fscp);
8355 				held = 0;
8356 				cachefs_cd_timedout(fscp);
8357 				continue;
8358 			} else {
8359 				connected = 1;
8360 				continue;
8361 			}
8362 		}
8363 
8364 		/* if no space left in cache, wait until connected */
8365 		if ((error == ENOSPC) &&
8366 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8367 			connected = 1;
8368 			continue;
8369 		}
8370 
8371 		mutex_enter(&cp->c_statelock);
8372 		if (!error)
8373 			error = cp->c_error;
8374 		cp->c_error = 0;
8375 		mutex_exit(&cp->c_statelock);
8376 		break;
8377 	}
8378 
8379 	if (held)
8380 		cachefs_cd_release(fscp);
8381 
8382 #ifdef CFS_CD_DEBUG
8383 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8384 #endif
8385 	return (error);
8386 }
8387 
8388 /* ARGSUSED */
8389 static int
8390 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8391 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8392 	caller_context_t *ct)
8393 {
8394 	struct cnode *cp = VTOC(vp);
8395 	int error;
8396 	struct fscache *fscp = C_TO_FSCACHE(cp);
8397 	vnode_t *backvp;
8398 	int held = 0;
8399 	int connected = 0;
8400 
8401 	if (getzoneid() != GLOBAL_ZONEID)
8402 		return (EPERM);
8403 
8404 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8405 		return (EINVAL);
8406 
8407 	/* Disallow locking of files that are currently mapped */
8408 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8409 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8410 		return (EAGAIN);
8411 	}
8412 
8413 	/*
8414 	 * Cachefs only provides pass-through support for NFSv4,
8415 	 * and all vnode operations are passed through to the
8416 	 * back file system. For NFSv4 pass-through to work, only
8417 	 * connected operation is supported, the cnode backvp must
8418 	 * exist, and cachefs optional (eg., disconnectable) flags
8419 	 * are turned off. Assert these conditions to ensure that
8420 	 * the backfilesystem is called for the frlock operation.
8421 	 */
8422 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8423 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8424 
8425 	/* XXX bob: nfs does a bunch more checks than we do */
8426 	if (CFS_ISFS_LLOCK(fscp)) {
8427 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8428 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8429 	}
8430 
8431 	for (;;) {
8432 		/* get (or renew) access to the file system */
8433 		if (held) {
8434 			/* Won't loop with NFSv4 connected behavior */
8435 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8436 			cachefs_cd_release(fscp);
8437 			held = 0;
8438 		}
8439 		error = cachefs_cd_access(fscp, connected, 0);
8440 		if (error)
8441 			break;
8442 		held = 1;
8443 
8444 		/* if not connected, quit or wait */
8445 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8446 			connected = 1;
8447 			continue;
8448 		}
8449 
8450 		/* nocache the file */
8451 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8452 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8453 			mutex_enter(&cp->c_statelock);
8454 			cachefs_nocache(cp);
8455 			mutex_exit(&cp->c_statelock);
8456 		}
8457 
8458 		/*
8459 		 * XXX bob: probably should do a consistency check
8460 		 * Pass arguments unchanged if NFSv4 is the backfs.
8461 		 */
8462 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8463 			bfp->l_start += cp->c_size;
8464 			bfp->l_whence = 0;
8465 		}
8466 
8467 		/* get the back vp */
8468 		mutex_enter(&cp->c_statelock);
8469 		if (cp->c_backvp == NULL) {
8470 			error = cachefs_getbackvp(fscp, cp);
8471 			if (error) {
8472 				mutex_exit(&cp->c_statelock);
8473 				break;
8474 			}
8475 		}
8476 		backvp = cp->c_backvp;
8477 		VN_HOLD(backvp);
8478 		mutex_exit(&cp->c_statelock);
8479 
8480 		/*
8481 		 * make sure we can flush currently dirty pages before
8482 		 * allowing the lock
8483 		 */
8484 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8485 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8486 			error = cachefs_putpage(
8487 			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8488 			if (error) {
8489 				error = ENOLCK;
8490 				VN_RELE(backvp);
8491 				break;
8492 			}
8493 		}
8494 
8495 		/* do lock on the back file */
8496 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8497 		    ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8498 		    cp, backvp));
8499 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8500 		    ct);
8501 		VN_RELE(backvp);
8502 		if (CFS_TIMEOUT(fscp, error)) {
8503 			connected = 1;
8504 			continue;
8505 		}
8506 		break;
8507 	}
8508 
8509 	if (held) {
8510 		cachefs_cd_release(fscp);
8511 	}
8512 
8513 	/*
8514 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8515 	 * cache does not give inconsistent results on locked files shared
8516 	 * between clients.  The VNOCACHE flag is never turned off as long
8517 	 * as the vnode is active because it is hard to figure out when the
8518 	 * last lock is gone.
8519 	 * XXX - what if some already has the vnode mapped in?
8520 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8521 	 */
8522 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8523 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8524 		vp->v_flag |= VNOCACHE;
8525 
8526 #ifdef CFS_CD_DEBUG
8527 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8528 #endif
8529 	return (error);
8530 }
8531 
8532 /*
8533  * Free storage space associated with the specified vnode.  The portion
8534  * to be freed is specified by bfp->l_start and bfp->l_len (already
8535  * normalized to a "whence" of 0).
8536  *
8537  * This is an experimental facility whose continued existence is not
8538  * guaranteed.  Currently, we only support the special case
8539  * of l_len == 0, meaning free to end of file.
8540  */
8541 /* ARGSUSED */
8542 static int
8543 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8544 	offset_t offset, cred_t *cr, caller_context_t *ct)
8545 {
8546 	cnode_t *cp = VTOC(vp);
8547 	fscache_t *fscp = C_TO_FSCACHE(cp);
8548 	int error;
8549 
8550 	ASSERT(vp->v_type == VREG);
8551 	if (getzoneid() != GLOBAL_ZONEID)
8552 		return (EPERM);
8553 	if (cmd != F_FREESP)
8554 		return (EINVAL);
8555 
8556 	/* call backfilesystem if NFSv4 */
8557 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8558 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8559 		    offset, cr, ct);
8560 		goto out;
8561 	}
8562 
8563 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8564 		ASSERT(bfp->l_start >= 0);
8565 		if (bfp->l_len == 0) {
8566 			struct vattr va;
8567 
8568 			va.va_size = bfp->l_start;
8569 			va.va_mask = AT_SIZE;
8570 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8571 		} else
8572 			error = EINVAL;
8573 	}
8574 
8575 out:
8576 	return (error);
8577 }
8578 
8579 /*
8580  * cachefs_space_backfs_nfsv4
8581  *
8582  * Call NFSv4 back filesystem to handle the space (cachefs
8583  * pass-through support for NFSv4).
8584  */
8585 static int
8586 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8587 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8588 {
8589 	cnode_t *cp = VTOC(vp);
8590 	fscache_t *fscp = C_TO_FSCACHE(cp);
8591 	vnode_t *backvp;
8592 	int error;
8593 
8594 	/*
8595 	 * For NFSv4 pass-through to work, only connected operation is
8596 	 * supported, the cnode backvp must exist, and cachefs optional
8597 	 * (eg., disconnectable) flags are turned off. Assert these
8598 	 * conditions for the space operation.
8599 	 */
8600 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8601 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8602 
8603 	/* Call backfs vnode op after extracting backvp */
8604 	mutex_enter(&cp->c_statelock);
8605 	backvp = cp->c_backvp;
8606 	mutex_exit(&cp->c_statelock);
8607 
8608 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8609 	    ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8610 	    cp, backvp));
8611 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8612 
8613 	return (error);
8614 }
8615 
8616 /*ARGSUSED*/
8617 static int
8618 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8619 {
8620 	return (EINVAL);
8621 }
8622 
8623 /*ARGSUSED*/
8624 static int
8625 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8626 	int flags, cred_t *cr, caller_context_t *ct)
8627 {
8628 	return (ENOSYS);
8629 }
8630 
8631 static int
8632 cachefs_setsecattr_connected(cnode_t *cp,
8633     vsecattr_t *vsec, int flag, cred_t *cr)
8634 {
8635 	fscache_t *fscp = C_TO_FSCACHE(cp);
8636 	int error = 0;
8637 
8638 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8639 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8640 
8641 	mutex_enter(&cp->c_statelock);
8642 
8643 	if (cp->c_backvp == NULL) {
8644 		error = cachefs_getbackvp(fscp, cp);
8645 		if (error) {
8646 			cachefs_nocache(cp);
8647 			goto out;
8648 		}
8649 	}
8650 
8651 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8652 	if (error)
8653 		goto out;
8654 
8655 	/* only owner can set acl */
8656 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8657 		error = EINVAL;
8658 		goto out;
8659 	}
8660 
8661 
8662 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8663 	    ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8664 	    cp, cp->c_backvp));
8665 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8666 	if (error) {
8667 		goto out;
8668 	}
8669 
8670 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8671 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8672 		cachefs_nocache(cp);
8673 		goto out;
8674 	}
8675 
8676 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8677 
8678 	/* acl may have changed permissions -- handle this. */
8679 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8680 		cachefs_acl2perm(cp, vsec);
8681 
8682 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8683 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8684 		error = cachefs_cacheacl(cp, vsec);
8685 		if (error != 0) {
8686 #ifdef CFSDEBUG
8687 			CFS_DEBUG(CFSDEBUG_VOPS)
8688 				printf("cachefs_setacl: cacheacl: error %d\n",
8689 				    error);
8690 #endif /* CFSDEBUG */
8691 			error = 0;
8692 			cachefs_nocache(cp);
8693 		}
8694 	}
8695 
8696 out:
8697 	mutex_exit(&cp->c_statelock);
8698 
8699 	return (error);
8700 }
8701 
8702 static int
8703 cachefs_setsecattr_disconnected(cnode_t *cp,
8704     vsecattr_t *vsec, int flag, cred_t *cr)
8705 {
8706 	fscache_t *fscp = C_TO_FSCACHE(cp);
8707 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8708 	off_t commit = 0;
8709 	int error = 0;
8710 
8711 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8712 
8713 	if (CFS_ISFS_WRITE_AROUND(fscp))
8714 		return (ETIMEDOUT);
8715 
8716 	mutex_enter(&cp->c_statelock);
8717 
8718 	/* only owner can set acl */
8719 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8720 		error = EINVAL;
8721 		goto out;
8722 	}
8723 
8724 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8725 		error = ETIMEDOUT;
8726 		goto out;
8727 	}
8728 
8729 	/* XXX do i need this?  is this right? */
8730 	if (cp->c_flags & CN_ALLOC_PENDING) {
8731 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8732 			(void) filegrp_allocattr(cp->c_filegrp);
8733 		}
8734 		error = filegrp_create_metadata(cp->c_filegrp,
8735 		    &cp->c_metadata, &cp->c_id);
8736 		if (error) {
8737 			goto out;
8738 		}
8739 		cp->c_flags &= ~CN_ALLOC_PENDING;
8740 	}
8741 
8742 	/* XXX is this right? */
8743 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8744 		error = cachefs_dlog_cidmap(fscp);
8745 		if (error) {
8746 			error = ENOSPC;
8747 			goto out;
8748 		}
8749 		cp->c_metadata.md_flags |= MD_MAPPING;
8750 		cp->c_flags |= CN_UPDATED;
8751 	}
8752 
8753 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8754 	if (commit == 0)
8755 		goto out;
8756 
8757 	/* fix modes in metadata */
8758 	cachefs_acl2perm(cp, vsec);
8759 
8760 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8761 		error = cachefs_cacheacl(cp, vsec);
8762 		if (error != 0) {
8763 			goto out;
8764 		}
8765 	}
8766 
8767 	/* XXX is this right? */
8768 	if (cachefs_modified_alloc(cp)) {
8769 		error = ENOSPC;
8770 		goto out;
8771 	}
8772 
8773 out:
8774 	if (error != 0)
8775 		cp->c_metadata.md_vattr.va_mode = failmode;
8776 
8777 	mutex_exit(&cp->c_statelock);
8778 
8779 	if (commit) {
8780 		if (cachefs_dlog_commit(fscp, commit, error)) {
8781 			/*EMPTY*/
8782 			/* XXX fix on panic? */
8783 		}
8784 	}
8785 
8786 	return (error);
8787 }
8788 
8789 /*ARGSUSED*/
8790 static int
8791 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8792     caller_context_t *ct)
8793 {
8794 	cnode_t *cp = VTOC(vp);
8795 	fscache_t *fscp = C_TO_FSCACHE(cp);
8796 	int connected = 0;
8797 	int held = 0;
8798 	int error = 0;
8799 
8800 #ifdef CFSDEBUG
8801 	CFS_DEBUG(CFSDEBUG_VOPS)
8802 		printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8803 #endif
8804 	if (getzoneid() != GLOBAL_ZONEID) {
8805 		error = EPERM;
8806 		goto out;
8807 	}
8808 
8809 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8810 		error = ENOSYS;
8811 		goto out;
8812 	}
8813 
8814 	if (! cachefs_vtype_aclok(vp)) {
8815 		error = EINVAL;
8816 		goto out;
8817 	}
8818 
8819 	/*
8820 	 * Cachefs only provides pass-through support for NFSv4,
8821 	 * and all vnode operations are passed through to the
8822 	 * back file system. For NFSv4 pass-through to work, only
8823 	 * connected operation is supported, the cnode backvp must
8824 	 * exist, and cachefs optional (eg., disconnectable) flags
8825 	 * are turned off. Assert these conditions to ensure that
8826 	 * the backfilesystem is called for the setsecattr operation.
8827 	 */
8828 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8829 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8830 
8831 	for (;;) {
8832 		/* drop hold on file system */
8833 		if (held) {
8834 			/* Won't loop with NFSv4 connected operation */
8835 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8836 			cachefs_cd_release(fscp);
8837 			held = 0;
8838 		}
8839 
8840 		/* acquire access to the file system */
8841 		error = cachefs_cd_access(fscp, connected, 1);
8842 		if (error)
8843 			break;
8844 		held = 1;
8845 
8846 		/* perform the setattr */
8847 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8848 			error = cachefs_setsecattr_connected(cp,
8849 			    vsec, flag, cr);
8850 		else
8851 			error = cachefs_setsecattr_disconnected(cp,
8852 			    vsec, flag, cr);
8853 		if (error) {
8854 			/* if connected */
8855 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8856 				if (CFS_TIMEOUT(fscp, error)) {
8857 					cachefs_cd_release(fscp);
8858 					held = 0;
8859 					cachefs_cd_timedout(fscp);
8860 					connected = 0;
8861 					continue;
8862 				}
8863 			}
8864 
8865 			/* else must be disconnected */
8866 			else {
8867 				if (CFS_TIMEOUT(fscp, error)) {
8868 					connected = 1;
8869 					continue;
8870 				}
8871 			}
8872 		}
8873 		break;
8874 	}
8875 
8876 	if (held) {
8877 		cachefs_cd_release(fscp);
8878 	}
8879 	return (error);
8880 
8881 out:
8882 #ifdef CFS_CD_DEBUG
8883 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8884 #endif
8885 
8886 #ifdef CFSDEBUG
8887 	CFS_DEBUG(CFSDEBUG_VOPS)
8888 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8889 #endif
8890 	return (error);
8891 }
8892 
8893 /*
8894  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8895  * sanitize the acl.
8896  */
8897 
8898 static void
8899 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8900 {
8901 	aclent_t *aclp;
8902 	int i;
8903 
8904 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8905 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8906 		switch (aclp->a_type) {
8907 		case USER_OBJ:
8908 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8909 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8910 			break;
8911 
8912 		case GROUP_OBJ:
8913 			cp->c_metadata.md_vattr.va_mode &= (~070);
8914 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8915 			break;
8916 
8917 		case OTHER_OBJ:
8918 			cp->c_metadata.md_vattr.va_mode &= (~07);
8919 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8920 			break;
8921 
8922 		case CLASS_OBJ:
8923 			cp->c_metadata.md_aclclass = aclp->a_perm;
8924 			break;
8925 		}
8926 	}
8927 
8928 	cp->c_flags |= CN_UPDATED;
8929 }
8930 
8931 static int
8932 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8933     caller_context_t *ct)
8934 {
8935 	cnode_t *cp = VTOC(vp);
8936 	fscache_t *fscp = C_TO_FSCACHE(cp);
8937 	int held = 0, connected = 0;
8938 	int error = 0;
8939 
8940 #ifdef CFSDEBUG
8941 	CFS_DEBUG(CFSDEBUG_VOPS)
8942 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8943 #endif
8944 
8945 	if (getzoneid() != GLOBAL_ZONEID) {
8946 		error = EPERM;
8947 		goto out;
8948 	}
8949 
8950 	/*
8951 	 * Cachefs only provides pass-through support for NFSv4,
8952 	 * and all vnode operations are passed through to the
8953 	 * back file system. For NFSv4 pass-through to work, only
8954 	 * connected operation is supported, the cnode backvp must
8955 	 * exist, and cachefs optional (eg., disconnectable) flags
8956 	 * are turned off. Assert these conditions to ensure that
8957 	 * the backfilesystem is called for the getsecattr operation.
8958 	 */
8959 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8960 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8961 
8962 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8963 		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8964 		goto out;
8965 	}
8966 
8967 	for (;;) {
8968 		if (held) {
8969 			/* Won't loop with NFSv4 connected behavior */
8970 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8971 			cachefs_cd_release(fscp);
8972 			held = 0;
8973 		}
8974 		error = cachefs_cd_access(fscp, connected, 0);
8975 		if (error)
8976 			break;
8977 		held = 1;
8978 
8979 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8980 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8981 			    cr);
8982 			if (CFS_TIMEOUT(fscp, error)) {
8983 				cachefs_cd_release(fscp);
8984 				held = 0;
8985 				cachefs_cd_timedout(fscp);
8986 				connected = 0;
8987 				continue;
8988 			}
8989 		} else {
8990 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8991 			    cr);
8992 			if (CFS_TIMEOUT(fscp, error)) {
8993 				if (cachefs_cd_access_miss(fscp)) {
8994 					error = cachefs_getsecattr_connected(vp,
8995 					    vsec, flag, cr);
8996 					if (!CFS_TIMEOUT(fscp, error))
8997 						break;
8998 					delay(5*hz);
8999 					connected = 0;
9000 					continue;
9001 				}
9002 				connected = 1;
9003 				continue;
9004 			}
9005 		}
9006 		break;
9007 	}
9008 
9009 out:
9010 	if (held)
9011 		cachefs_cd_release(fscp);
9012 
9013 #ifdef CFS_CD_DEBUG
9014 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9015 #endif
9016 #ifdef CFSDEBUG
9017 	CFS_DEBUG(CFSDEBUG_VOPS)
9018 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9019 #endif
9020 	return (error);
9021 }
9022 
9023 static int
9024 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9025     caller_context_t *ct)
9026 {
9027 	cnode_t *cp = VTOC(vp);
9028 	fscache_t *fscp = C_TO_FSCACHE(cp);
9029 	int error = 0;
9030 	vnode_t *backvp;
9031 
9032 #ifdef CFSDEBUG
9033 	CFS_DEBUG(CFSDEBUG_VOPS)
9034 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9035 #endif
9036 
9037 	if (getzoneid() != GLOBAL_ZONEID) {
9038 		error = EPERM;
9039 		goto out;
9040 	}
9041 
9042 	/*
9043 	 * Cachefs only provides pass-through support for NFSv4,
9044 	 * and all vnode operations are passed through to the
9045 	 * back file system. For NFSv4 pass-through to work, only
9046 	 * connected operation is supported, the cnode backvp must
9047 	 * exist, and cachefs optional (eg., disconnectable) flags
9048 	 * are turned off. Assert these conditions to ensure that
9049 	 * the backfilesystem is called for the shrlock operation.
9050 	 */
9051 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9052 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9053 
9054 	mutex_enter(&cp->c_statelock);
9055 	if (cp->c_backvp == NULL)
9056 		error = cachefs_getbackvp(fscp, cp);
9057 	backvp = cp->c_backvp;
9058 	mutex_exit(&cp->c_statelock);
9059 	ASSERT((error != 0) || (backvp != NULL));
9060 
9061 	if (error == 0) {
9062 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9063 		    ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9064 		    cp, backvp));
9065 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9066 	}
9067 
9068 out:
9069 #ifdef CFSDEBUG
9070 	CFS_DEBUG(CFSDEBUG_VOPS)
9071 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9072 #endif
9073 	return (error);
9074 }
9075 
9076 static int
9077 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9078     cred_t *cr)
9079 {
9080 	cnode_t *cp = VTOC(vp);
9081 	fscache_t *fscp = C_TO_FSCACHE(cp);
9082 	int hit = 0;
9083 	int error = 0;
9084 
9085 
9086 	mutex_enter(&cp->c_statelock);
9087 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9088 	if (error)
9089 		goto out;
9090 
9091 	/* read from the cache if we can */
9092 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9093 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9094 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9095 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9096 		error = cachefs_getaclfromcache(cp, vsec);
9097 		if (error) {
9098 			cachefs_nocache(cp);
9099 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9100 			error = 0;
9101 		} else {
9102 			hit = 1;
9103 			goto out;
9104 		}
9105 	}
9106 
9107 	ASSERT(error == 0);
9108 	if (cp->c_backvp == NULL)
9109 		error = cachefs_getbackvp(fscp, cp);
9110 	if (error)
9111 		goto out;
9112 
9113 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9114 	    ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9115 	    cp, cp->c_backvp));
9116 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9117 	if (error)
9118 		goto out;
9119 
9120 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9121 	    (cachefs_vtype_aclok(vp)) &&
9122 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9123 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9124 		error = cachefs_cacheacl(cp, vsec);
9125 		if (error) {
9126 			error = 0;
9127 			cachefs_nocache(cp);
9128 		}
9129 	}
9130 
9131 out:
9132 	if (error == 0) {
9133 		if (hit)
9134 			fscp->fs_stats.st_hits++;
9135 		else
9136 			fscp->fs_stats.st_misses++;
9137 	}
9138 	mutex_exit(&cp->c_statelock);
9139 
9140 	return (error);
9141 }
9142 
9143 static int
9144 /*ARGSUSED*/
9145 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9146     cred_t *cr)
9147 {
9148 	cnode_t *cp = VTOC(vp);
9149 	fscache_t *fscp = C_TO_FSCACHE(cp);
9150 	int hit = 0;
9151 	int error = 0;
9152 
9153 
9154 	mutex_enter(&cp->c_statelock);
9155 
9156 	/* read from the cache if we can */
9157 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9158 	    (cp->c_metadata.md_flags & MD_ACL)) {
9159 		error = cachefs_getaclfromcache(cp, vsec);
9160 		if (error) {
9161 			cachefs_nocache(cp);
9162 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9163 			error = 0;
9164 		} else {
9165 			hit = 1;
9166 			goto out;
9167 		}
9168 	}
9169 	error = ETIMEDOUT;
9170 
9171 out:
9172 	if (error == 0) {
9173 		if (hit)
9174 			fscp->fs_stats.st_hits++;
9175 		else
9176 			fscp->fs_stats.st_misses++;
9177 	}
9178 	mutex_exit(&cp->c_statelock);
9179 
9180 	return (error);
9181 }
9182 
9183 /*
9184  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9185  * the frontfile if possible; otherwise, the adjunct directory.
9186  *
9187  * inputs:
9188  * cp - the cnode, with its statelock already held
9189  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9190  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9191  *
9192  * returns:
9193  * 0 - all is well
9194  * nonzero - errno
9195  */
9196 
9197 int
9198 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9199 {
9200 	fscache_t *fscp = C_TO_FSCACHE(cp);
9201 	vsecattr_t vsec;
9202 	aclent_t *aclp;
9203 	int gotvsec = 0;
9204 	int error = 0;
9205 	vnode_t *vp = NULL;
9206 	void *aclkeep = NULL;
9207 	int i;
9208 
9209 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9210 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9211 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9212 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9213 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9214 
9215 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9216 		error = ENOSYS;
9217 		goto out;
9218 	}
9219 
9220 	if (vsecp == NULL) {
9221 		if (cp->c_backvp == NULL)
9222 			error = cachefs_getbackvp(fscp, cp);
9223 		if (error != 0)
9224 			goto out;
9225 		vsecp = &vsec;
9226 		bzero(&vsec, sizeof (vsec));
9227 		vsecp->vsa_mask =
9228 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9229 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9230 		if (error != 0) {
9231 			goto out;
9232 		}
9233 		gotvsec = 1;
9234 	} else if (vsecp->vsa_mask & VSA_ACL) {
9235 		aclkeep = vsecp->vsa_aclentp;
9236 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9237 		    sizeof (aclent_t), KM_SLEEP);
9238 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9239 		    sizeof (aclent_t));
9240 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9241 		/* unless there's real data, we can cache nothing. */
9242 		return (0);
9243 	}
9244 
9245 	/*
9246 	 * prevent the ACL from chmoding our frontfile, and
9247 	 * snarf the class info
9248 	 */
9249 
9250 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9251 	    (VSA_ACL | VSA_ACLCNT)) {
9252 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9253 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9254 			switch (aclp->a_type) {
9255 			case CLASS_OBJ:
9256 				cp->c_metadata.md_aclclass =
9257 				    aclp->a_perm;
9258 				/*FALLTHROUGH*/
9259 			case USER_OBJ:
9260 			case GROUP_OBJ:
9261 			case OTHER_OBJ:
9262 				aclp->a_perm = 06;
9263 			}
9264 		}
9265 	}
9266 
9267 	/*
9268 	 * if the frontfile exists, then we always do the work.  but,
9269 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9270 	 * then we don't want to do the work.  otherwise, an `ls -l'
9271 	 * will create tons of emtpy frontfiles.
9272 	 */
9273 
9274 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9275 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9276 	    <= MIN_ACL_ENTRIES)) {
9277 		cp->c_metadata.md_flags |= MD_ACL;
9278 		cp->c_flags |= CN_UPDATED;
9279 		goto out;
9280 	}
9281 
9282 	/*
9283 	 * if we have a default ACL, then we need a
9284 	 * real live directory in the frontfs that we
9285 	 * can apply the ACL to.  if not, then we just
9286 	 * use the frontfile.  we get the frontfile
9287 	 * regardless -- that way, we know the
9288 	 * directory for the frontfile exists.
9289 	 */
9290 
9291 	if (vsecp->vsa_dfaclcnt > 0) {
9292 		if (cp->c_acldirvp == NULL)
9293 			error = cachefs_getacldirvp(cp);
9294 		if (error != 0)
9295 			goto out;
9296 		vp = cp->c_acldirvp;
9297 	} else {
9298 		if (cp->c_frontvp == NULL)
9299 			error = cachefs_getfrontfile(cp);
9300 		if (error != 0)
9301 			goto out;
9302 		vp = cp->c_frontvp;
9303 	}
9304 	ASSERT(vp != NULL);
9305 
9306 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9307 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9308 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9309 	if (error != 0) {
9310 #ifdef CFSDEBUG
9311 		CFS_DEBUG(CFSDEBUG_VOPS)
9312 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9313 			    error);
9314 #endif /* CFSDEBUG */
9315 		/*
9316 		 * If there was an error, we don't want to call
9317 		 * cachefs_nocache(); so, set error to 0.
9318 		 * We will call cachefs_purgeacl(), in order to
9319 		 * clean such things as adjunct ACL directories.
9320 		 */
9321 		cachefs_purgeacl(cp);
9322 		error = 0;
9323 		goto out;
9324 	}
9325 	if (vp == cp->c_frontvp)
9326 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9327 
9328 	cp->c_metadata.md_flags |= MD_ACL;
9329 	cp->c_flags |= CN_UPDATED;
9330 
9331 out:
9332 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9333 		cachefs_nocache(cp);
9334 
9335 	if (gotvsec) {
9336 		if (vsec.vsa_aclcnt)
9337 			kmem_free(vsec.vsa_aclentp,
9338 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9339 		if (vsec.vsa_dfaclcnt)
9340 			kmem_free(vsec.vsa_dfaclentp,
9341 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9342 	} else if (aclkeep != NULL) {
9343 		cachefs_kmem_free(vsecp->vsa_aclentp,
9344 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9345 		vsecp->vsa_aclentp = aclkeep;
9346 	}
9347 
9348 	return (error);
9349 }
9350 
9351 void
9352 cachefs_purgeacl(cnode_t *cp)
9353 {
9354 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9355 
9356 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9357 
9358 	if (cp->c_acldirvp != NULL) {
9359 		VN_RELE(cp->c_acldirvp);
9360 		cp->c_acldirvp = NULL;
9361 	}
9362 
9363 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9364 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9365 
9366 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9367 		make_ascii_name(&cp->c_id, name);
9368 		(void) strcat(name, ".d");
9369 
9370 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9371 		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9372 	}
9373 
9374 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9375 	cp->c_flags |= CN_UPDATED;
9376 }
9377 
9378 static int
9379 cachefs_getacldirvp(cnode_t *cp)
9380 {
9381 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9382 	int error = 0;
9383 
9384 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9385 	ASSERT(cp->c_acldirvp == NULL);
9386 
9387 	if (cp->c_frontvp == NULL)
9388 		error = cachefs_getfrontfile(cp);
9389 	if (error != 0)
9390 		goto out;
9391 
9392 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9393 	make_ascii_name(&cp->c_id, name);
9394 	(void) strcat(name, ".d");
9395 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9396 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9397 	if ((error != 0) && (error != ENOENT))
9398 		goto out;
9399 
9400 	if (error != 0) {
9401 		vattr_t va;
9402 
9403 		va.va_mode = S_IFDIR | 0777;
9404 		va.va_uid = 0;
9405 		va.va_gid = 0;
9406 		va.va_type = VDIR;
9407 		va.va_mask = AT_TYPE | AT_MODE |
9408 		    AT_UID | AT_GID;
9409 		error =
9410 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9411 		    name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9412 		if (error != 0)
9413 			goto out;
9414 	}
9415 
9416 	ASSERT(cp->c_acldirvp != NULL);
9417 	cp->c_metadata.md_flags |= MD_ACLDIR;
9418 	cp->c_flags |= CN_UPDATED;
9419 
9420 out:
9421 	if (error != 0)
9422 		cp->c_acldirvp = NULL;
9423 	return (error);
9424 }
9425 
9426 static int
9427 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9428 {
9429 	aclent_t *aclp;
9430 	int error = 0;
9431 	vnode_t *vp = NULL;
9432 	int i;
9433 
9434 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9435 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9436 	ASSERT(vsec->vsa_aclentp == NULL);
9437 
9438 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9439 		if (cp->c_acldirvp == NULL)
9440 			error = cachefs_getacldirvp(cp);
9441 		if (error != 0)
9442 			goto out;
9443 		vp = cp->c_acldirvp;
9444 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9445 		if (cp->c_frontvp == NULL)
9446 			error = cachefs_getfrontfile(cp);
9447 		if (error != 0)
9448 			goto out;
9449 		vp = cp->c_frontvp;
9450 	} else {
9451 
9452 		/*
9453 		 * if we get here, then we know that MD_ACL is on,
9454 		 * meaning an ACL was successfully cached.  we also
9455 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9456 		 * this has to be an entry without a `real' ACL.
9457 		 * thus, we forge whatever is necessary.
9458 		 */
9459 
9460 		if (vsec->vsa_mask & VSA_ACLCNT)
9461 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9462 
9463 		if (vsec->vsa_mask & VSA_ACL) {
9464 			vsec->vsa_aclentp =
9465 			    kmem_zalloc(MIN_ACL_ENTRIES *
9466 			    sizeof (aclent_t), KM_SLEEP);
9467 			aclp = (aclent_t *)vsec->vsa_aclentp;
9468 			aclp->a_type = USER_OBJ;
9469 			++aclp;
9470 			aclp->a_type = GROUP_OBJ;
9471 			++aclp;
9472 			aclp->a_type = OTHER_OBJ;
9473 			++aclp;
9474 			aclp->a_type = CLASS_OBJ;
9475 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9476 			    sizeof (aclent_t), cmp2acls);
9477 		}
9478 
9479 		ASSERT(vp == NULL);
9480 	}
9481 
9482 	if (vp != NULL) {
9483 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9484 #ifdef CFSDEBUG
9485 			CFS_DEBUG(CFSDEBUG_VOPS)
9486 				printf("cachefs_getaclfromcache: error %d\n",
9487 				    error);
9488 #endif /* CFSDEBUG */
9489 			goto out;
9490 		}
9491 	}
9492 
9493 	if (vsec->vsa_aclentp != NULL) {
9494 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9495 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9496 			switch (aclp->a_type) {
9497 			case USER_OBJ:
9498 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9499 				aclp->a_perm =
9500 				    cp->c_metadata.md_vattr.va_mode & 0700;
9501 				aclp->a_perm >>= 6;
9502 				break;
9503 
9504 			case GROUP_OBJ:
9505 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9506 				aclp->a_perm =
9507 				    cp->c_metadata.md_vattr.va_mode & 070;
9508 				aclp->a_perm >>= 3;
9509 				break;
9510 
9511 			case OTHER_OBJ:
9512 				aclp->a_perm =
9513 				    cp->c_metadata.md_vattr.va_mode & 07;
9514 				break;
9515 
9516 			case CLASS_OBJ:
9517 				aclp->a_perm =
9518 				    cp->c_metadata.md_aclclass;
9519 				break;
9520 			}
9521 		}
9522 	}
9523 
9524 out:
9525 
9526 	if (error != 0)
9527 		cachefs_nocache(cp);
9528 
9529 	return (error);
9530 }
9531 
9532 /*
9533  * Fills in targp with attribute information from srcp, cp
9534  * and if necessary the system.
9535  */
9536 static void
9537 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9538 {
9539 	time_t	now;
9540 
9541 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9542 
9543 	/*
9544 	 * Add code to fill in the va struct.  We use the fields from
9545 	 * the srcp struct if they are populated, otherwise we guess
9546 	 */
9547 
9548 	targp->va_mask = 0;	/* initialize all fields */
9549 	targp->va_mode = srcp->va_mode;
9550 	targp->va_type = srcp->va_type;
9551 	targp->va_nlink = 1;
9552 	targp->va_nodeid = 0;
9553 
9554 	if (srcp->va_mask & AT_UID)
9555 		targp->va_uid = srcp->va_uid;
9556 	else
9557 		targp->va_uid = crgetuid(cr);
9558 
9559 	if (srcp->va_mask & AT_GID)
9560 		targp->va_gid = srcp->va_gid;
9561 	else
9562 		targp->va_gid = crgetgid(cr);
9563 
9564 	if (srcp->va_mask & AT_FSID)
9565 		targp->va_fsid = srcp->va_fsid;
9566 	else
9567 		targp->va_fsid = 0;	/* initialize all fields */
9568 
9569 	now = gethrestime_sec();
9570 	if (srcp->va_mask & AT_ATIME)
9571 		targp->va_atime = srcp->va_atime;
9572 	else
9573 		targp->va_atime.tv_sec = now;
9574 
9575 	if (srcp->va_mask & AT_MTIME)
9576 		targp->va_mtime = srcp->va_mtime;
9577 	else
9578 		targp->va_mtime.tv_sec = now;
9579 
9580 	if (srcp->va_mask & AT_CTIME)
9581 		targp->va_ctime = srcp->va_ctime;
9582 	else
9583 		targp->va_ctime.tv_sec = now;
9584 
9585 
9586 	if (srcp->va_mask & AT_SIZE)
9587 		targp->va_size = srcp->va_size;
9588 	else
9589 		targp->va_size = 0;
9590 
9591 	/*
9592 	 * the remaing fields are set by the fs and not changable.
9593 	 * we populate these entries useing the parent directory
9594 	 * values.  It's a small hack, but should work.
9595 	 */
9596 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9597 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9598 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9599 	targp->va_seq = 0; /* Never keep the sequence number */
9600 }
9601 
9602 /*
9603  * set the gid for a newly created file.  The algorithm is as follows:
9604  *
9605  *	1) If the gid is set in the attribute list, then use it if
9606  *	   the caller is privileged, belongs to the target group, or
9607  *	   the group is the same as the parent directory.
9608  *
9609  *	2) If the parent directory's set-gid bit is clear, then use
9610  *	   the process gid
9611  *
9612  *	3) Otherwise, use the gid of the parent directory.
9613  *
9614  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9615  * this routine.
9616  */
9617 static void
9618 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9619 {
9620 	if ((vap->va_mask & AT_GID) &&
9621 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9622 	    groupmember(vap->va_gid, cr) ||
9623 	    secpolicy_vnode_create_gid(cr) != 0)) {
9624 		newcp->c_attr.va_gid = vap->va_gid;
9625 	} else {
9626 		if (dcp->c_attr.va_mode & S_ISGID)
9627 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9628 		else
9629 			newcp->c_attr.va_gid = crgetgid(cr);
9630 	}
9631 
9632 	/*
9633 	 * if we're creating a directory, and the parent directory has the
9634 	 * set-GID bit set, set it on the new directory.
9635 	 * Otherwise, if the user is neither privileged nor a member of the
9636 	 * file's new group, clear the file's set-GID bit.
9637 	 */
9638 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9639 		newcp->c_attr.va_mode |= S_ISGID;
9640 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9641 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9642 		newcp->c_attr.va_mode &= ~S_ISGID;
9643 }
9644 
9645 /*
9646  * create an acl for the newly created file.  should be called right
9647  * after cachefs_creategid.
9648  */
9649 
9650 static void
9651 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9652 {
9653 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9654 	vsecattr_t vsec;
9655 	int gotvsec = 0;
9656 	int error = 0; /* placeholder */
9657 	aclent_t *aclp;
9658 	o_mode_t *classp = NULL;
9659 	o_mode_t gunion = 0;
9660 	int i;
9661 
9662 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9663 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9664 		return;
9665 
9666 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9667 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9668 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9669 
9670 	/*
9671 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9672 	 * would hit code paths that isn't hit anywhere else.
9673 	 */
9674 
9675 	bzero(&vsec, sizeof (vsec));
9676 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9677 	error = cachefs_getaclfromcache(dcp, &vsec);
9678 	if (error != 0)
9679 		goto out;
9680 	gotvsec = 1;
9681 
9682 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9683 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9684 			kmem_free(vsec.vsa_aclentp,
9685 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9686 
9687 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9688 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9689 		vsec.vsa_dfaclcnt = 0;
9690 		vsec.vsa_dfaclentp = NULL;
9691 
9692 		if (newcp->c_attr.va_type == VDIR) {
9693 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9694 			    sizeof (aclent_t), KM_SLEEP);
9695 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9696 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9697 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9698 		}
9699 
9700 		/*
9701 		 * this function should be called pretty much after
9702 		 * the rest of the file creation stuff is done.  so,
9703 		 * uid, gid, etc. should be `right'.  we'll go with
9704 		 * that, rather than trying to determine whether to
9705 		 * get stuff from cr or va.
9706 		 */
9707 
9708 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9709 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9710 			switch (aclp->a_type) {
9711 			case DEF_USER_OBJ:
9712 				aclp->a_type = USER_OBJ;
9713 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9714 				aclp->a_perm =
9715 				    newcp->c_metadata.md_vattr.va_mode;
9716 				aclp->a_perm &= 0700;
9717 				aclp->a_perm >>= 6;
9718 				break;
9719 
9720 			case DEF_GROUP_OBJ:
9721 				aclp->a_type = GROUP_OBJ;
9722 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9723 				aclp->a_perm =
9724 				    newcp->c_metadata.md_vattr.va_mode;
9725 				aclp->a_perm &= 070;
9726 				aclp->a_perm >>= 3;
9727 				gunion |= aclp->a_perm;
9728 				break;
9729 
9730 			case DEF_OTHER_OBJ:
9731 				aclp->a_type = OTHER_OBJ;
9732 				aclp->a_perm =
9733 				    newcp->c_metadata.md_vattr.va_mode & 07;
9734 				break;
9735 
9736 			case DEF_CLASS_OBJ:
9737 				aclp->a_type = CLASS_OBJ;
9738 				classp = &(aclp->a_perm);
9739 				break;
9740 
9741 			case DEF_USER:
9742 				aclp->a_type = USER;
9743 				gunion |= aclp->a_perm;
9744 				break;
9745 
9746 			case DEF_GROUP:
9747 				aclp->a_type = GROUP;
9748 				gunion |= aclp->a_perm;
9749 				break;
9750 			}
9751 		}
9752 
9753 		/* XXX is this the POSIX thing to do? */
9754 		if (classp != NULL)
9755 			*classp &= gunion;
9756 
9757 		/*
9758 		 * we don't need to log this; rather, we clear the
9759 		 * MD_ACL bit when we reconnect.
9760 		 */
9761 
9762 		error = cachefs_cacheacl(newcp, &vsec);
9763 		if (error != 0)
9764 			goto out;
9765 	}
9766 
9767 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9768 	newcp->c_metadata.md_flags |= MD_ACL;
9769 	newcp->c_flags |= CN_UPDATED;
9770 
9771 out:
9772 
9773 	if (gotvsec) {
9774 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9775 			kmem_free(vsec.vsa_aclentp,
9776 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9777 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9778 			kmem_free(vsec.vsa_dfaclentp,
9779 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9780 	}
9781 }
9782 
9783 /*
9784  * this is translated from the UFS code for access checking.
9785  */
9786 
9787 static int
9788 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9789 {
9790 	cnode_t *cp = vcp;
9791 	fscache_t *fscp = C_TO_FSCACHE(cp);
9792 	int shift = 0;
9793 
9794 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9795 
9796 	if (mode & VWRITE) {
9797 		/*
9798 		 * Disallow write attempts on read-only
9799 		 * file systems, unless the file is special.
9800 		 */
9801 		struct vnode *vp = CTOV(cp);
9802 		if (vn_is_readonly(vp)) {
9803 			if (!IS_DEVVP(vp)) {
9804 				return (EROFS);
9805 			}
9806 		}
9807 	}
9808 
9809 	/*
9810 	 * if we need to do ACLs, do it.  this works whether anyone
9811 	 * has explicitly made an ACL or not.
9812 	 */
9813 
9814 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9815 	    (cachefs_vtype_aclok(CTOV(cp))))
9816 		return (cachefs_acl_access(cp, mode, cr));
9817 
9818 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9819 		shift += 3;
9820 		if (!groupmember(cp->c_attr.va_gid, cr))
9821 			shift += 3;
9822 	}
9823 
9824 	/* compute missing mode bits */
9825 	mode &= ~(cp->c_attr.va_mode << shift);
9826 
9827 	if (mode == 0)
9828 		return (0);
9829 
9830 	return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode));
9831 }
9832 
9833 /*
9834  * This is transcribed from ufs_acl_access().  If that changes, then
9835  * this should, too.
9836  *
9837  * Check the cnode's ACL's to see if this mode of access is
9838  * allowed; return 0 if allowed, EACCES if not.
9839  *
9840  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9841  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9842  */
9843 
9844 #define	ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \
9845 		    secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM)))
9846 
9847 static int
9848 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9849 {
9850 	int error = 0;
9851 
9852 	fscache_t *fscp = C_TO_FSCACHE(cp);
9853 
9854 	int mask = ~0;
9855 	int ismask = 0;
9856 
9857 	int gperm = 0;
9858 	int ngroup = 0;
9859 
9860 	vsecattr_t vsec;
9861 	int gotvsec = 0;
9862 	aclent_t *aclp;
9863 
9864 	uid_t owner = cp->c_attr.va_uid;
9865 
9866 	int i;
9867 
9868 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9869 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9870 
9871 	/*
9872 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9873 	 * but then i believe we'd be the only thing exercising those
9874 	 * code paths -- probably a bad thing.
9875 	 */
9876 
9877 	bzero(&vsec, sizeof (vsec));
9878 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9879 
9880 	/* XXX KLUDGE! correct insidious 0-class problem */
9881 	if (cp->c_metadata.md_aclclass == 0 &&
9882 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9883 		cachefs_purgeacl(cp);
9884 again:
9885 	if (cp->c_metadata.md_flags & MD_ACL) {
9886 		error = cachefs_getaclfromcache(cp, &vsec);
9887 		if (error != 0) {
9888 #ifdef CFSDEBUG
9889 			if (error != ETIMEDOUT)
9890 				CFS_DEBUG(CFSDEBUG_VOPS)
9891 					printf("cachefs_acl_access():"
9892 					    "error %d from getaclfromcache()\n",
9893 					    error);
9894 #endif /* CFSDEBUG */
9895 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9896 				goto again;
9897 			} else {
9898 				goto out;
9899 			}
9900 		}
9901 	} else {
9902 		if (cp->c_backvp == NULL) {
9903 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9904 				error = cachefs_getbackvp(fscp, cp);
9905 			else
9906 				error = ETIMEDOUT;
9907 		}
9908 		if (error == 0)
9909 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9910 			    NULL);
9911 		if (error != 0) {
9912 #ifdef CFSDEBUG
9913 			CFS_DEBUG(CFSDEBUG_VOPS)
9914 				printf("cachefs_acl_access():"
9915 				    "error %d from getsecattr(backvp)\n",
9916 				    error);
9917 #endif /* CFSDEBUG */
9918 			goto out;
9919 		}
9920 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9921 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9922 			(void) cachefs_cacheacl(cp, &vsec);
9923 	}
9924 	gotvsec = 1;
9925 
9926 	ASSERT(error == 0);
9927 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9928 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9929 		switch (aclp->a_type) {
9930 		case USER_OBJ:
9931 			/*
9932 			 * this might look cleaner in the 2nd loop
9933 			 * below, but we do it here as an
9934 			 * optimization.
9935 			 */
9936 
9937 			owner = aclp->a_id;
9938 			if (crgetuid(cr) == owner) {
9939 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9940 				    cr, cp);
9941 				goto out;
9942 			}
9943 			break;
9944 
9945 		case CLASS_OBJ:
9946 			mask = aclp->a_perm;
9947 			ismask = 1;
9948 			break;
9949 		}
9950 	}
9951 
9952 	ASSERT(error == 0);
9953 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9954 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9955 		switch (aclp->a_type) {
9956 		case USER:
9957 			if (crgetuid(cr) == aclp->a_id) {
9958 				error = ACL_MODE_CHECK(mode,
9959 				    (aclp->a_perm & mask) << 6, cr, cp);
9960 				goto out;
9961 			}
9962 			break;
9963 
9964 		case GROUP_OBJ:
9965 			if (groupmember(aclp->a_id, cr)) {
9966 				++ngroup;
9967 				gperm |= aclp->a_perm;
9968 				if (! ismask) {
9969 					error = ACL_MODE_CHECK(mode,
9970 					    aclp->a_perm << 6,
9971 					    cr, cp);
9972 					goto out;
9973 				}
9974 			}
9975 			break;
9976 
9977 		case GROUP:
9978 			if (groupmember(aclp->a_id, cr)) {
9979 				++ngroup;
9980 				gperm |= aclp->a_perm;
9981 			}
9982 			break;
9983 
9984 		case OTHER_OBJ:
9985 			if (ngroup == 0) {
9986 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9987 				    cr, cp);
9988 				goto out;
9989 			}
9990 			break;
9991 
9992 		default:
9993 			break;
9994 		}
9995 	}
9996 
9997 	ASSERT(ngroup > 0);
9998 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9999 
10000 out:
10001 	if (gotvsec) {
10002 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10003 			kmem_free(vsec.vsa_aclentp,
10004 			    vsec.vsa_aclcnt * sizeof (aclent_t));
10005 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10006 			kmem_free(vsec.vsa_dfaclentp,
10007 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
10008 	}
10009 
10010 	return (error);
10011 }
10012 
10013 /*
10014  * see if permissions allow for removal of the given file from
10015  * the given directory.
10016  */
10017 static int
10018 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10019 {
10020 	uid_t uid;
10021 	/*
10022 	 * If the containing directory is sticky, the user must:
10023 	 *  - own the directory, or
10024 	 *  - own the file, or
10025 	 *  - be able to write the file (if it's a plain file), or
10026 	 *  - be sufficiently privileged.
10027 	 */
10028 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10029 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10030 	    (uid != cp->c_attr.va_uid) &&
10031 	    (cp->c_attr.va_type != VREG ||
10032 	    cachefs_access_local(cp, VWRITE, cr) != 0))
10033 		return (secpolicy_vnode_remove(cr));
10034 
10035 	return (0);
10036 }
10037 
10038 /*
10039  * Returns a new name, may even be unique.
10040  * Stolen from nfs code.
10041  * Since now we will use renaming to .cfs* in place of .nfs*
10042  * for CacheFS. Both NFS and CacheFS will rename opened files.
10043  */
10044 static char cachefs_prefix[] = ".cfs";
10045 kmutex_t cachefs_newnum_lock;
10046 
10047 static char *
10048 cachefs_newname(void)
10049 {
10050 	static uint_t newnum = 0;
10051 	char *news;
10052 	char *s, *p;
10053 	uint_t id;
10054 
10055 	mutex_enter(&cachefs_newnum_lock);
10056 	if (newnum == 0) {
10057 		newnum = gethrestime_sec() & 0xfffff;
10058 		newnum |= 0x10000;
10059 	}
10060 	id = newnum++;
10061 	mutex_exit(&cachefs_newnum_lock);
10062 
10063 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10064 	s = news;
10065 	p = cachefs_prefix;
10066 	while (*p != '\0')
10067 		*s++ = *p++;
10068 	while (id != 0) {
10069 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10070 		id >>= 4;
10071 	}
10072 	*s = '\0';
10073 	return (news);
10074 }
10075 
10076 /*
10077  * Called to rename the specified file to a temporary file so
10078  * operations to the file after remove work.
10079  * Must call this routine with the dir c_rwlock held as a writer.
10080  */
10081 static int
10082 /*ARGSUSED*/
10083 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10084 {
10085 	cnode_t *cp = VTOC(vp);
10086 	char *tmpname;
10087 	fscache_t *fscp = C_TO_FSCACHE(cp);
10088 	int error;
10089 
10090 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10091 
10092 	/* get the new name for the file */
10093 	tmpname = cachefs_newname();
10094 
10095 	/* do the link */
10096 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10097 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10098 	else
10099 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10100 	if (error) {
10101 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10102 		return (error);
10103 	}
10104 
10105 	mutex_enter(&cp->c_statelock);
10106 	if (cp->c_unldvp) {
10107 		VN_RELE(cp->c_unldvp);
10108 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10109 		crfree(cp->c_unlcred);
10110 	}
10111 
10112 	VN_HOLD(dvp);
10113 	cp->c_unldvp = dvp;
10114 	crhold(cr);
10115 	cp->c_unlcred = cr;
10116 	cp->c_unlname = tmpname;
10117 
10118 	/* drop the backvp so NFS does not also do a rename */
10119 	mutex_exit(&cp->c_statelock);
10120 
10121 	return (0);
10122 }
10123 
10124 /*
10125  * Marks the cnode as modified.
10126  */
10127 static void
10128 cachefs_modified(cnode_t *cp)
10129 {
10130 	fscache_t *fscp = C_TO_FSCACHE(cp);
10131 	struct vattr va;
10132 	int error;
10133 
10134 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10135 	ASSERT(cp->c_metadata.md_rlno);
10136 
10137 	/* if not on the modify list */
10138 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10139 		/* put on modified list, also marks the file as modified */
10140 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10141 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10142 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10143 		cp->c_flags |= CN_UPDATED;
10144 
10145 		/* if a modified regular file that is not local */
10146 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10147 		    (cp->c_metadata.md_flags & MD_FILE) &&
10148 		    (cp->c_attr.va_type == VREG)) {
10149 
10150 			if (cp->c_frontvp == NULL)
10151 				(void) cachefs_getfrontfile(cp);
10152 			if (cp->c_frontvp) {
10153 				/* identify file so fsck knows it is modified */
10154 				va.va_mode = 0766;
10155 				va.va_mask = AT_MODE;
10156 				error = VOP_SETATTR(cp->c_frontvp,
10157 				    &va, 0, kcred, NULL);
10158 				if (error) {
10159 					cmn_err(CE_WARN,
10160 					    "Cannot change ff mode.\n");
10161 				}
10162 			}
10163 		}
10164 	}
10165 }
10166 
10167 /*
10168  * Marks the cnode as modified.
10169  * Allocates a rl slot for the cnode if necessary.
10170  * Returns 0 for success, !0 if cannot get an rl slot.
10171  */
10172 static int
10173 cachefs_modified_alloc(cnode_t *cp)
10174 {
10175 	fscache_t *fscp = C_TO_FSCACHE(cp);
10176 	filegrp_t *fgp = cp->c_filegrp;
10177 	int error;
10178 	rl_entry_t rl_ent;
10179 
10180 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10181 
10182 	/* get the rl slot if needed */
10183 	if (cp->c_metadata.md_rlno == 0) {
10184 		/* get a metadata slot if we do not have one yet */
10185 		if (cp->c_flags & CN_ALLOC_PENDING) {
10186 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10187 				(void) filegrp_allocattr(cp->c_filegrp);
10188 			}
10189 			error = filegrp_create_metadata(cp->c_filegrp,
10190 			    &cp->c_metadata, &cp->c_id);
10191 			if (error)
10192 				return (error);
10193 			cp->c_flags &= ~CN_ALLOC_PENDING;
10194 		}
10195 
10196 		/* get a free rl entry */
10197 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10198 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10199 		rl_ent.rl_fsid = fscp->fs_cfsid;
10200 		rl_ent.rl_attrc = 0;
10201 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10202 		    &cp->c_metadata.md_rlno);
10203 		if (error)
10204 			return (error);
10205 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10206 
10207 		/* hold the filegrp so the attrcache file is not gc */
10208 		error = filegrp_ffhold(fgp);
10209 		if (error) {
10210 			cachefs_rlent_moveto(fscp->fs_cache,
10211 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10212 			cp->c_metadata.md_rlno = 0;
10213 			return (error);
10214 		}
10215 	}
10216 	cachefs_modified(cp);
10217 	return (0);
10218 }
10219 
10220 int
10221 cachefs_vtype_aclok(vnode_t *vp)
10222 {
10223 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10224 
10225 	if (vp->v_type == VNON)
10226 		return (0);
10227 
10228 	for (vtp = oktypes; *vtp != VNON; vtp++)
10229 		if (vp->v_type == *vtp)
10230 			break;
10231 
10232 	return (*vtp != VNON);
10233 }
10234 
10235 static int
10236 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10237     caller_context_t *ct)
10238 {
10239 	int error = 0;
10240 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10241 
10242 	/* Assert cachefs compatibility if NFSv4 is in use */
10243 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10244 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10245 
10246 	if (cmd == _PC_FILESIZEBITS) {
10247 		u_offset_t maxsize = fscp->fs_offmax;
10248 		(*valp) = 0;
10249 		while (maxsize != 0) {
10250 			maxsize >>= 1;
10251 			(*valp)++;
10252 		}
10253 		(*valp)++;
10254 	} else
10255 		error = fs_pathconf(vp, cmd, valp, cr, ct);
10256 
10257 	return (error);
10258 }
10259