xref: /titanic_41/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision d50c8f9072726f065d6f78328111db69c651db00)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/time.h>
35 #include <sys/vnode.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/file.h>
39 #include <sys/filio.h>
40 #include <sys/uio.h>
41 #include <sys/buf.h>
42 #include <sys/mman.h>
43 #include <sys/tiuser.h>
44 #include <sys/pathname.h>
45 #include <sys/dirent.h>
46 #include <sys/conf.h>
47 #include <sys/debug.h>
48 #include <sys/vmsystm.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/swap.h>
52 #include <sys/errno.h>
53 #include <sys/sysmacros.h>
54 #include <sys/disp.h>
55 #include <sys/kmem.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vtrace.h>
58 #include <sys/mount.h>
59 #include <sys/bootconf.h>
60 #include <sys/dnlc.h>
61 #include <sys/stat.h>
62 #include <sys/acl.h>
63 #include <sys/policy.h>
64 #include <rpc/types.h>
65 
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/rm.h>
74 #include <sys/fs/cachefs_fs.h>
75 #include <sys/fs/cachefs_dir.h>
76 #include <sys/fs/cachefs_dlog.h>
77 #include <sys/fs/cachefs_ioctl.h>
78 #include <sys/fs/cachefs_log.h>
79 #include <fs/fs_subr.h>
80 
81 int cachefs_dnlc;	/* use dnlc, debugging */
82 
83 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
84     cred_t *cr);
85 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
86     cred_t *cr);
87 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
88 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
89 static int cachefs_getacldirvp(cnode_t *cp);
90 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
91 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
92 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
93 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
94     u_offset_t iooff, cred_t *cr);
95 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
96     u_offset_t iooff, cred_t *cr);
97 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
98     cred_t *cr, caller_context_t *ct);
99 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
100     int flags, cred_t *cr, caller_context_t *ct);
101 static int cachefs_access_connected(struct vnode *vp, int mode,
102     int flags, cred_t *cr);
103 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
104     cred_t *cr);
105 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
106     char *tnm, cred_t *cr);
107 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
108     vattr_t *tva, char *tnm, cred_t *cr);
109 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
110     cred_t *cr);
111 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
112     char *tnm, cred_t *cr);
113 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
114     vnode_t **vpp, cred_t *cr);
115 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
116     vnode_t **vpp, cred_t *cr);
117 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
118 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
120 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
121     vnode_t *cdir, cred_t *cr, vnode_t *vp);
122 static char *cachefs_newname(void);
123 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
124     cred_t *cr);
125 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
127 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
128     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
129 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
130     int *eofp);
131 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
132     cred_t *cr, int *eofp);
133 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
134 	cred_t *cr, int *eofp);
135 
136 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
137     cred_t *cr, caller_context_t *ct);
138 
139 static	int	cachefs_open(struct vnode **, int, cred_t *);
140 static	int	cachefs_close(struct vnode *, int, int, offset_t,
141 			cred_t *);
142 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
143 			caller_context_t *);
144 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
145 			caller_context_t *);
146 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
147 			int *);
148 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
149 			cred_t *);
150 static	int	cachefs_setattr(struct vnode *, struct vattr *,
151 			int, cred_t *, caller_context_t *);
152 static	int	cachefs_access(struct vnode *, int, int, cred_t *);
153 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
154 			struct pathname *, int, struct vnode *, cred_t *);
155 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
156 			enum vcexcl, int, struct vnode **, cred_t *, int);
157 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
158 			vattr_t *vap, enum vcexcl exclusive, int mode,
159 			vnode_t **vpp, cred_t *cr);
160 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
161 			vattr_t *vap, enum vcexcl exclusive, int mode,
162 			vnode_t **vpp, cred_t *cr);
163 static	int	cachefs_remove(struct vnode *, char *, cred_t *);
164 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
165 			cred_t *);
166 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
167 			char *, cred_t *);
168 static	int	cachefs_mkdir(struct vnode *, char *, struct
169 			vattr *, struct vnode **, cred_t *);
170 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
171 			cred_t *);
172 static	int	cachefs_readdir(struct vnode *, struct uio *,
173 			cred_t *, int *);
174 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
175 			char *, cred_t *);
176 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *);
177 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
178 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
179 static	int	cachefs_fsync(struct vnode *, int, cred_t *);
180 static	void	cachefs_inactive(struct vnode *, cred_t *);
181 static	int	cachefs_fid(struct vnode *, struct fid *);
182 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
183 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
184 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *);
185 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
186 			int, offset_t, struct flk_callback *, cred_t *);
187 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
188 			offset_t, cred_t *, caller_context_t *);
189 static	int	cachefs_realvp(struct vnode *, struct vnode **);
190 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
191 			struct page *[], size_t, struct seg *, caddr_t,
192 			enum seg_rw, cred_t *);
193 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
194 			struct page *[], size_t, struct seg *, caddr_t,
195 			enum seg_rw, cred_t *);
196 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
197 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
198 			enum seg_rw, cred_t *);
199 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
200 			cred_t *);
201 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
202 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *);
203 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
204 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *);
205 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
206 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *);
207 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
208 			int flag, cred_t *cr);
209 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
210 			int flag, cred_t *cr);
211 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
212 			cred_t *);
213 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
214     cred_t *cr);
215 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
216     int flag, cred_t *cr);
217 
218 static int	cachefs_dump(struct vnode *, caddr_t, int, int);
219 static int	cachefs_pageio(struct vnode *, page_t *,
220 		    u_offset_t, size_t, int, cred_t *);
221 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
222 		    int tcount, struct uio *uiop);
223 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *);
224 
225 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
226 			cred_t *cr, caller_context_t *ct);
227 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
228 			cred_t *cr, caller_context_t *ct);
229 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
230 			int flags, cred_t *cr);
231 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
232 			vnode_t *vp);
233 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
234 			size_t len, uint_t *protp, struct page *pl[],
235 			size_t plsz, struct seg *seg, caddr_t addr,
236 			enum seg_rw rw, cred_t *cr);
237 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
238 			size_t len, int flags, cred_t *cr);
239 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
240 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
241 			uchar_t maxprot, uint_t flags, cred_t *cr);
242 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
243 			struct flock64 *bfp, int flag, offset_t offset,
244 			cred_t *cr, caller_context_t *ct);
245 
246 struct vnodeops *cachefs_vnodeops;
247 
248 static const fs_operation_def_t cachefs_vnodeops_template[] = {
249 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
250 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
251 	VOPNAME_READ,		{ .vop_read = cachefs_read },
252 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
253 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
254 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
255 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
256 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
257 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
258 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
259 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
260 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
261 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
262 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
263 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
264 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
265 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
266 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
267 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
268 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
269 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
270 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
271 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
272 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
273 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
274 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
275 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
276 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
277 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
278 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
279 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
280 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
281 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
282 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
283 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
284 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
285 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
286 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
287 	NULL,			NULL
288 };
289 
290 /* forward declarations of statics */
291 static void cachefs_modified(cnode_t *cp);
292 static int cachefs_modified_alloc(cnode_t *cp);
293 
294 int
295 cachefs_init_vnops(char *name)
296 {
297 	return (vn_make_ops(name,
298 		    cachefs_vnodeops_template, &cachefs_vnodeops));
299 }
300 
301 struct vnodeops *
302 cachefs_getvnodeops(void)
303 {
304 	return (cachefs_vnodeops);
305 }
306 
307 static int
308 cachefs_open(vnode_t **vpp, int flag, cred_t *cr)
309 {
310 	int error = 0;
311 	cnode_t *cp = VTOC(*vpp);
312 	fscache_t *fscp = C_TO_FSCACHE(cp);
313 	int held = 0;
314 	int type;
315 	int connected = 0;
316 
317 #ifdef CFSDEBUG
318 	CFS_DEBUG(CFSDEBUG_VOPS)
319 		printf("cachefs_open: ENTER vpp %p flag %x\n",
320 		    (void *)vpp, flag);
321 #endif
322 	if (getzoneid() != GLOBAL_ZONEID) {
323 		error = EPERM;
324 		goto out;
325 	}
326 	if ((flag & FWRITE) &&
327 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
328 		error = EISDIR;
329 		goto out;
330 	}
331 
332 	/*
333 	 * Cachefs only provides pass-through support for NFSv4,
334 	 * and all vnode operations are passed through to the
335 	 * back file system. For NFSv4 pass-through to work, only
336 	 * connected operation is supported, the cnode backvp must
337 	 * exist, and cachefs optional (eg., disconnectable) flags
338 	 * are turned off. Assert these conditions to ensure that
339 	 * the backfilesystem is called for the open operation.
340 	 */
341 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
342 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
343 
344 	for (;;) {
345 		/* get (or renew) access to the file system */
346 		if (held) {
347 			/* Won't loop with NFSv4 connected behavior */
348 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
349 			cachefs_cd_release(fscp);
350 			held = 0;
351 		}
352 		error = cachefs_cd_access(fscp, connected, 0);
353 		if (error)
354 			goto out;
355 		held = 1;
356 
357 		mutex_enter(&cp->c_statelock);
358 
359 		/* grab creds if we do not have any yet */
360 		if (cp->c_cred == NULL) {
361 			crhold(cr);
362 			cp->c_cred = cr;
363 		}
364 		cp->c_flags |= CN_NEEDOPEN;
365 
366 		/* if we are disconnected */
367 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
368 			/* if we cannot write to the file system */
369 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
370 				mutex_exit(&cp->c_statelock);
371 				connected = 1;
372 				continue;
373 			}
374 			/*
375 			 * Allow read only requests to continue
376 			 */
377 			if ((flag & (FWRITE|FREAD)) == FREAD) {
378 				/* track the flag for opening the backvp */
379 				cp->c_rdcnt++;
380 				mutex_exit(&cp->c_statelock);
381 				error = 0;
382 				break;
383 			}
384 
385 			/*
386 			 * check credentials  - if this procs
387 			 * credentials don't match the creds in the
388 			 * cnode disallow writing while disconnected.
389 			 */
390 			if (crcmp(cp->c_cred, CRED()) != 0 &&
391 			    secpolicy_vnode_access(CRED(), *vpp,
392 					    cp->c_attr.va_uid, VWRITE) != 0) {
393 				mutex_exit(&cp->c_statelock);
394 				connected = 1;
395 				continue;
396 			}
397 			/* to get here, we know that the WRITE flag is on */
398 			cp->c_wrcnt++;
399 			if (flag & FREAD)
400 				cp->c_rdcnt++;
401 		}
402 
403 		/* else if we are connected */
404 		else {
405 			/* if cannot use the cached copy of the file */
406 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
407 			    ((cp->c_flags & CN_NOCACHE) == 0))
408 				cachefs_nocache(cp);
409 
410 			/* pass open to the back file */
411 			if (cp->c_backvp) {
412 				cp->c_flags &= ~CN_NEEDOPEN;
413 				CFS_DPRINT_BACKFS_NFSV4(fscp,
414 					("cachefs_open (nfsv4): cnode %p, "
415 					"backvp %p\n", cp, cp->c_backvp));
416 				error = VOP_OPEN(&cp->c_backvp, flag, cr);
417 				if (CFS_TIMEOUT(fscp, error)) {
418 					mutex_exit(&cp->c_statelock);
419 					cachefs_cd_release(fscp);
420 					held = 0;
421 					cachefs_cd_timedout(fscp);
422 					continue;
423 				} else if (error) {
424 					mutex_exit(&cp->c_statelock);
425 					break;
426 				}
427 			} else {
428 				/* backvp will be VOP_OPEN'd later */
429 				if (flag & FREAD)
430 					cp->c_rdcnt++;
431 				if (flag & FWRITE)
432 					cp->c_wrcnt++;
433 			}
434 
435 			/*
436 			 * Now perform a consistency check on the file.
437 			 * If strict consistency then force a check to
438 			 * the backfs even if the timeout has not expired
439 			 * for close-to-open consistency.
440 			 */
441 			type = 0;
442 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
443 				type = C_BACK_CHECK;
444 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
445 			if (CFS_TIMEOUT(fscp, error)) {
446 				mutex_exit(&cp->c_statelock);
447 				cachefs_cd_release(fscp);
448 				held = 0;
449 				cachefs_cd_timedout(fscp);
450 				continue;
451 			}
452 		}
453 		mutex_exit(&cp->c_statelock);
454 		break;
455 	}
456 	if (held)
457 		cachefs_cd_release(fscp);
458 out:
459 #ifdef CFS_CD_DEBUG
460 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
461 #endif
462 #ifdef CFSDEBUG
463 	CFS_DEBUG(CFSDEBUG_VOPS)
464 		printf("cachefs_open: EXIT vpp %p error %d\n",
465 		    (void *)vpp, error);
466 #endif
467 	return (error);
468 }
469 
470 /* ARGSUSED */
471 static int
472 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
473 {
474 	int error = 0;
475 	cnode_t *cp = VTOC(vp);
476 	fscache_t *fscp = C_TO_FSCACHE(cp);
477 	int held = 0;
478 	int connected = 0;
479 	int close_cnt = 1;
480 	cachefscache_t *cachep;
481 
482 #ifdef CFSDEBUG
483 	CFS_DEBUG(CFSDEBUG_VOPS)
484 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
485 #endif
486 	/*
487 	 * Cachefs only provides pass-through support for NFSv4,
488 	 * and all vnode operations are passed through to the
489 	 * back file system. For NFSv4 pass-through to work, only
490 	 * connected operation is supported, the cnode backvp must
491 	 * exist, and cachefs optional (eg., disconnectable) flags
492 	 * are turned off. Assert these conditions to ensure that
493 	 * the backfilesystem is called for the close operation.
494 	 */
495 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
496 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
497 
498 	/*
499 	 * File could have been passed in or inherited from the global zone, so
500 	 * we don't want to flat out reject the request; we'll just leave things
501 	 * the way they are and let the backfs (NFS) deal with it.
502 	 */
503 	/* get rid of any local locks */
504 	if (CFS_ISFS_LLOCK(fscp)) {
505 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
506 	}
507 
508 	/* clean up if this is the daemon closing down */
509 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
510 	    ((ttoproc(curthread)->p_pid) != 0) &&
511 	    (vp == fscp->fs_rootvp) &&
512 	    (count == 1)) {
513 		mutex_enter(&fscp->fs_cdlock);
514 		fscp->fs_cddaemonid = 0;
515 		if (fscp->fs_dlogfile)
516 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
517 		else
518 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
519 		cv_broadcast(&fscp->fs_cdwaitcv);
520 		mutex_exit(&fscp->fs_cdlock);
521 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
522 			cachep = fscp->fs_cache;
523 			mutex_enter(&cachep->c_contentslock);
524 			ASSERT(cachep->c_rootdaemonid != 0);
525 			cachep->c_rootdaemonid = 0;
526 			mutex_exit(&cachep->c_contentslock);
527 		}
528 		return (0);
529 	}
530 
531 	for (;;) {
532 		/* get (or renew) access to the file system */
533 		if (held) {
534 			/* Won't loop with NFSv4 connected behavior */
535 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
536 			cachefs_cd_release(fscp);
537 			held = 0;
538 		}
539 		error = cachefs_cd_access(fscp, connected, 0);
540 		if (error)
541 			goto out;
542 		held = 1;
543 		connected = 0;
544 
545 		/* if not the last close */
546 		if (count > 1) {
547 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
548 				goto out;
549 			mutex_enter(&cp->c_statelock);
550 			if (cp->c_backvp) {
551 				CFS_DPRINT_BACKFS_NFSV4(fscp,
552 					("cachefs_close (nfsv4): cnode %p, "
553 					"backvp %p\n", cp, cp->c_backvp));
554 				error = VOP_CLOSE(cp->c_backvp, flag, count,
555 				    offset, cr);
556 				if (CFS_TIMEOUT(fscp, error)) {
557 					mutex_exit(&cp->c_statelock);
558 					cachefs_cd_release(fscp);
559 					held = 0;
560 					cachefs_cd_timedout(fscp);
561 					continue;
562 				}
563 			}
564 			mutex_exit(&cp->c_statelock);
565 			goto out;
566 		}
567 
568 		/*
569 		 * If the file is an unlinked file, then flush the lookup
570 		 * cache so that inactive will be called if this is
571 		 * the last reference.  It will invalidate all of the
572 		 * cached pages, without writing them out.  Writing them
573 		 * out is not required because they will be written to a
574 		 * file which will be immediately removed.
575 		 */
576 		if (cp->c_unldvp != NULL) {
577 			dnlc_purge_vp(vp);
578 			mutex_enter(&cp->c_statelock);
579 			error = cp->c_error;
580 			cp->c_error = 0;
581 			mutex_exit(&cp->c_statelock);
582 			/* always call VOP_CLOSE() for back fs vnode */
583 		}
584 
585 		/* force dirty data to stable storage */
586 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
587 				!CFS_ISFS_BACKFS_NFSV4(fscp)) {
588 			/* clean the cachefs pages synchronously */
589 			error = cachefs_putpage_common(vp, (offset_t)0,
590 			    0, 0, cr);
591 			if (CFS_TIMEOUT(fscp, error)) {
592 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
593 					cachefs_cd_release(fscp);
594 					held = 0;
595 					cachefs_cd_timedout(fscp);
596 					continue;
597 				} else {
598 					connected = 1;
599 					continue;
600 				}
601 			}
602 
603 			/* if no space left in cache, wait until connected */
604 			if ((error == ENOSPC) &&
605 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
606 				connected = 1;
607 				continue;
608 			}
609 
610 			/* clear the cnode error if putpage worked */
611 			if ((error == 0) && cp->c_error) {
612 				mutex_enter(&cp->c_statelock);
613 				cp->c_error = 0;
614 				mutex_exit(&cp->c_statelock);
615 			}
616 
617 			/* if any other important error */
618 			if (cp->c_error) {
619 				/* get rid of the pages */
620 				(void) cachefs_putpage_common(vp,
621 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
622 				dnlc_purge_vp(vp);
623 			}
624 		}
625 
626 		mutex_enter(&cp->c_statelock);
627 		if (cp->c_backvp &&
628 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
629 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
630 			    offset, cr);
631 			if (CFS_TIMEOUT(fscp, error)) {
632 				mutex_exit(&cp->c_statelock);
633 				cachefs_cd_release(fscp);
634 				held = 0;
635 				cachefs_cd_timedout(fscp);
636 				/* don't decrement the vnode counts again */
637 				close_cnt = 0;
638 				continue;
639 			}
640 		}
641 		mutex_exit(&cp->c_statelock);
642 		break;
643 	}
644 
645 	mutex_enter(&cp->c_statelock);
646 	if (!error)
647 		error = cp->c_error;
648 	cp->c_error = 0;
649 	mutex_exit(&cp->c_statelock);
650 
651 out:
652 	if (held)
653 		cachefs_cd_release(fscp);
654 #ifdef CFS_CD_DEBUG
655 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
656 #endif
657 
658 #ifdef CFSDEBUG
659 	CFS_DEBUG(CFSDEBUG_VOPS)
660 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
661 #endif
662 	return (error);
663 }
664 
665 /*ARGSUSED*/
666 static int
667 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
668 	caller_context_t *ct)
669 {
670 	struct cnode *cp = VTOC(vp);
671 	fscache_t *fscp = C_TO_FSCACHE(cp);
672 	register u_offset_t off;
673 	register int mapoff;
674 	register caddr_t base;
675 	int n;
676 	offset_t diff;
677 	uint_t flags = 0;
678 	int error = 0;
679 
680 #if 0
681 	if (vp->v_flag & VNOCACHE)
682 		flags = SM_INVAL;
683 #endif
684 	if (getzoneid() != GLOBAL_ZONEID)
685 		return (EPERM);
686 	if (vp->v_type != VREG)
687 		return (EISDIR);
688 
689 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
690 
691 	if (uiop->uio_resid == 0)
692 		return (0);
693 
694 
695 	if (uiop->uio_loffset < (offset_t)0)
696 		return (EINVAL);
697 
698 	/*
699 	 * Call backfilesystem to read if NFSv4, the cachefs code
700 	 * does the read from the back filesystem asynchronously
701 	 * which is not supported by pass-through functionality.
702 	 */
703 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
704 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
705 		goto out;
706 	}
707 
708 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
709 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
710 			    uiop->uio_resid, uiop->uio_fmode, ct);
711 		if (error)
712 			return (error);
713 	}
714 
715 	/*
716 	 * Sit in a loop and transfer (uiomove) the data in up to
717 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
718 	 * address space as needed and then released.
719 	 */
720 	do {
721 		/*
722 		 *	off	Offset of current MAXBSIZE chunk
723 		 *	mapoff	Offset within the current chunk
724 		 *	n	Number of bytes to move from this chunk
725 		 *	base	kernel address of mapped in chunk
726 		 */
727 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
728 		mapoff = uiop->uio_loffset & MAXBOFFSET;
729 		n = MAXBSIZE - mapoff;
730 		if (n > uiop->uio_resid)
731 			n = (uint_t)uiop->uio_resid;
732 
733 		/* perform consistency check */
734 		error = cachefs_cd_access(fscp, 0, 0);
735 		if (error)
736 			break;
737 		mutex_enter(&cp->c_statelock);
738 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
739 		diff = cp->c_size - uiop->uio_loffset;
740 		mutex_exit(&cp->c_statelock);
741 		if (CFS_TIMEOUT(fscp, error)) {
742 			cachefs_cd_release(fscp);
743 			cachefs_cd_timedout(fscp);
744 			error = 0;
745 			continue;
746 		}
747 		cachefs_cd_release(fscp);
748 
749 		if (error)
750 			break;
751 
752 		if (diff <= (offset_t)0)
753 			break;
754 		if (diff < (offset_t)n)
755 			n = diff;
756 
757 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
758 
759 		error = segmap_fault(kas.a_hat, segkmap, base, n,
760 			F_SOFTLOCK, S_READ);
761 		if (error) {
762 			(void) segmap_release(segkmap, base, 0);
763 			if (FC_CODE(error) == FC_OBJERR)
764 				error =  FC_ERRNO(error);
765 			else
766 				error = EIO;
767 			break;
768 		}
769 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
770 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
771 				F_SOFTUNLOCK, S_READ);
772 		if (error == 0) {
773 			/*
774 			 * if we read a whole page(s), or to eof,
775 			 *  we won't need this page(s) again soon.
776 			 */
777 			if (n + mapoff == MAXBSIZE ||
778 				uiop->uio_loffset == cp->c_size)
779 				flags |= SM_DONTNEED;
780 		}
781 		(void) segmap_release(segkmap, base, flags);
782 	} while (error == 0 && uiop->uio_resid > 0);
783 
784 out:
785 #ifdef CFSDEBUG
786 	CFS_DEBUG(CFSDEBUG_VOPS)
787 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
788 			uiop->uio_resid);
789 #endif
790 	return (error);
791 }
792 
793 /*
794  * cachefs_read_backfs_nfsv4
795  *
796  * Call NFSv4 back filesystem to handle the read (cachefs
797  * pass-through support for NFSv4).
798  */
799 static int
800 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
801 			caller_context_t *ct)
802 {
803 	cnode_t *cp = VTOC(vp);
804 	fscache_t *fscp = C_TO_FSCACHE(cp);
805 	vnode_t *backvp;
806 	int error;
807 
808 	/*
809 	 * For NFSv4 pass-through to work, only connected operation
810 	 * is supported, the cnode backvp must exist, and cachefs
811 	 * optional (eg., disconnectable) flags are turned off. Assert
812 	 * these conditions for the read operation.
813 	 */
814 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
815 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
816 
817 	/* Call backfs vnode op after extracting backvp */
818 	mutex_enter(&cp->c_statelock);
819 	backvp = cp->c_backvp;
820 	mutex_exit(&cp->c_statelock);
821 
822 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
823 					"backvp %p\n", cp, backvp));
824 
825 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
826 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
827 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
828 
829 	/* Increment cache miss counter */
830 	fscp->fs_stats.st_misses++;
831 
832 	return (error);
833 }
834 
835 /*ARGSUSED*/
836 static int
837 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
838 	caller_context_t *ct)
839 {
840 	struct cnode *cp = VTOC(vp);
841 	fscache_t *fscp = C_TO_FSCACHE(cp);
842 	int error = 0;
843 	u_offset_t off;
844 	caddr_t base;
845 	uint_t bsize;
846 	uint_t flags;
847 	int n, on;
848 	rlim64_t limit = uiop->uio_llimit;
849 	ssize_t resid;
850 	offset_t offset;
851 	offset_t remainder;
852 
853 #ifdef CFSDEBUG
854 	CFS_DEBUG(CFSDEBUG_VOPS)
855 		printf(
856 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
857 			(void *)vp, uiop->uio_loffset, uiop->uio_resid,
858 			cp->c_flags);
859 #endif
860 	if (getzoneid() != GLOBAL_ZONEID) {
861 		error = EPERM;
862 		goto out;
863 	}
864 	if (vp->v_type != VREG) {
865 		error = EISDIR;
866 		goto out;
867 	}
868 
869 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
870 
871 	if (uiop->uio_resid == 0) {
872 		goto out;
873 	}
874 
875 	/* Call backfilesystem to write if NFSv4 */
876 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
877 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
878 		goto out2;
879 	}
880 
881 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
882 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
883 			    uiop->uio_resid, uiop->uio_fmode, ct);
884 		if (error)
885 			goto out;
886 	}
887 
888 	if (ioflag & FAPPEND) {
889 		for (;;) {
890 			/* do consistency check to get correct file size */
891 			error = cachefs_cd_access(fscp, 0, 1);
892 			if (error)
893 				goto out;
894 			mutex_enter(&cp->c_statelock);
895 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
896 			uiop->uio_loffset = cp->c_size;
897 			mutex_exit(&cp->c_statelock);
898 			if (CFS_TIMEOUT(fscp, error)) {
899 				cachefs_cd_release(fscp);
900 				cachefs_cd_timedout(fscp);
901 				continue;
902 			}
903 			cachefs_cd_release(fscp);
904 			if (error)
905 				goto out;
906 			break;
907 		}
908 	}
909 
910 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
911 		limit = MAXOFFSET_T;
912 
913 	if (uiop->uio_loffset >= limit) {
914 		proc_t *p = ttoproc(curthread);
915 
916 		mutex_enter(&p->p_lock);
917 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
918 		    p, RCA_UNSAFE_SIGINFO);
919 		mutex_exit(&p->p_lock);
920 		error = EFBIG;
921 		goto out;
922 	}
923 	if (uiop->uio_loffset > fscp->fs_offmax) {
924 		error = EFBIG;
925 		goto out;
926 	}
927 
928 	if (limit > fscp->fs_offmax)
929 		limit = fscp->fs_offmax;
930 
931 	if (uiop->uio_loffset < (offset_t)0) {
932 		error = EINVAL;
933 		goto out;
934 	}
935 
936 	offset = uiop->uio_loffset + uiop->uio_resid;
937 	/*
938 	 * Check to make sure that the process will not exceed
939 	 * its limit on file size.  It is okay to write up to
940 	 * the limit, but not beyond.  Thus, the write which
941 	 * reaches the limit will be short and the next write
942 	 * will return an error.
943 	 */
944 	remainder = 0;
945 	if (offset > limit) {
946 		remainder = (int)(offset - (u_offset_t)limit);
947 		uiop->uio_resid = limit - uiop->uio_loffset;
948 		if (uiop->uio_resid <= 0) {
949 			proc_t *p = ttoproc(curthread);
950 
951 			uiop->uio_resid += remainder;
952 			mutex_enter(&p->p_lock);
953 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
954 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
955 			mutex_exit(&p->p_lock);
956 			error = EFBIG;
957 			goto out;
958 		}
959 	}
960 
961 	resid = uiop->uio_resid;
962 	offset = uiop->uio_loffset;
963 	bsize = vp->v_vfsp->vfs_bsize;
964 
965 	/* loop around and do the write in MAXBSIZE chunks */
966 	do {
967 		/* mapping offset */
968 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
969 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
970 		n = MAXBSIZE - on;
971 		if (n > uiop->uio_resid)
972 			n = (int)uiop->uio_resid;
973 		base = segmap_getmap(segkmap, vp, off);
974 		error = cachefs_writepage(vp, (base + on), n, uiop);
975 		if (error == 0) {
976 			flags = 0;
977 			/*
978 			 * Have written a whole block.Start an
979 			 * asynchronous write and mark the buffer to
980 			 * indicate that it won't be needed again
981 			 * soon.
982 			 */
983 			if (n + on == bsize) {
984 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
985 			}
986 #if 0
987 			/* XXX need to understand this */
988 			if ((ioflag & (FSYNC|FDSYNC)) ||
989 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
990 				flags &= ~SM_ASYNC;
991 				flags |= SM_WRITE;
992 			}
993 #else
994 			if (ioflag & (FSYNC|FDSYNC)) {
995 				flags &= ~SM_ASYNC;
996 				flags |= SM_WRITE;
997 			}
998 #endif
999 			error = segmap_release(segkmap, base, flags);
1000 		} else {
1001 			(void) segmap_release(segkmap, base, 0);
1002 		}
1003 	} while (error == 0 && uiop->uio_resid > 0);
1004 
1005 out:
1006 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1007 		uiop->uio_resid = resid;
1008 		uiop->uio_loffset = offset;
1009 	} else
1010 		uiop->uio_resid += remainder;
1011 
1012 out2:
1013 #ifdef CFSDEBUG
1014 	CFS_DEBUG(CFSDEBUG_VOPS)
1015 		printf("cachefs_write: EXIT error %d\n", error);
1016 #endif
1017 	return (error);
1018 }
1019 
1020 /*
1021  * cachefs_write_backfs_nfsv4
1022  *
1023  * Call NFSv4 back filesystem to handle the write (cachefs
1024  * pass-through support for NFSv4).
1025  */
1026 static int
1027 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1028 			caller_context_t *ct)
1029 {
1030 	cnode_t *cp = VTOC(vp);
1031 	fscache_t *fscp = C_TO_FSCACHE(cp);
1032 	vnode_t *backvp;
1033 	int error;
1034 
1035 	/*
1036 	 * For NFSv4 pass-through to work, only connected operation
1037 	 * is supported, the cnode backvp must exist, and cachefs
1038 	 * optional (eg., disconnectable) flags are turned off. Assert
1039 	 * these conditions for the read operation.
1040 	 */
1041 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1042 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1043 
1044 	/* Call backfs vnode op after extracting the backvp */
1045 	mutex_enter(&cp->c_statelock);
1046 	backvp = cp->c_backvp;
1047 	mutex_exit(&cp->c_statelock);
1048 
1049 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1050 					"backvp %p\n", cp, backvp));
1051 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1052 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1053 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1054 
1055 	return (error);
1056 }
1057 
1058 /*
1059  * see if we've charged ourselves for frontfile data at
1060  * the given offset.  If not, allocate a block for it now.
1061  */
1062 static int
1063 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1064 {
1065 	u_offset_t blockoff;
1066 	int error;
1067 	int inc;
1068 
1069 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1070 	ASSERT(PAGESIZE <= MAXBSIZE);
1071 
1072 	error = 0;
1073 	blockoff = offset & (offset_t)MAXBMASK;
1074 
1075 	/* get the front file if necessary so allocblocks works */
1076 	if ((cp->c_frontvp == NULL) &&
1077 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1078 		(void) cachefs_getfrontfile(cp);
1079 	}
1080 	if (cp->c_flags & CN_NOCACHE)
1081 		return (1);
1082 
1083 	if (cachefs_check_allocmap(cp, blockoff))
1084 		return (0);
1085 
1086 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1087 		if (cachefs_check_allocmap(cp, blockoff+inc))
1088 			return (0);
1089 
1090 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1091 	    cp->c_metadata.md_rltype);
1092 	if (error == 0) {
1093 		cp->c_metadata.md_frontblks++;
1094 		cp->c_flags |= CN_UPDATED;
1095 	}
1096 	return (error);
1097 }
1098 
1099 /*
1100  * Called only by cachefs_write to write 1 page or less of data.
1101  *	base   - base address kernel addr space
1102  *	tcount - Total bytes to move - < MAXBSIZE
1103  */
1104 static int
1105 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1106 {
1107 	struct cnode *cp =  VTOC(vp);
1108 	fscache_t *fscp = C_TO_FSCACHE(cp);
1109 	register int n;
1110 	register u_offset_t offset;
1111 	int error = 0, terror;
1112 	extern struct as kas;
1113 	u_offset_t lastpage_off;
1114 	int pagecreate = 0;
1115 	int newpage;
1116 
1117 #ifdef CFSDEBUG
1118 	CFS_DEBUG(CFSDEBUG_VOPS)
1119 		printf(
1120 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1121 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1122 #endif
1123 
1124 	/*
1125 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1126 	 * uiomove() because page faults may cause the cache to be invalidated
1127 	 * out from under us.
1128 	 */
1129 	do {
1130 		offset = uiop->uio_loffset;
1131 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1132 
1133 		/*
1134 		 * If not connected then need to make sure we have space
1135 		 * to perform the write.  We could make this check
1136 		 * a little tighter by only doing it if we are growing the file.
1137 		 */
1138 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1139 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1140 			    cp->c_metadata.md_rltype);
1141 			if (error)
1142 				break;
1143 			cachefs_freeblocks(fscp->fs_cache, 1,
1144 			    cp->c_metadata.md_rltype);
1145 		}
1146 
1147 		/*
1148 		 * n is the number of bytes required to satisfy the request
1149 		 * or the number of bytes to fill out the page.
1150 		 */
1151 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1152 		if (n > tcount)
1153 			n = tcount;
1154 
1155 		/*
1156 		 * The number of bytes of data in the last page can not
1157 		 * be accurately be determined while page is being
1158 		 * uiomove'd to and the size of the file being updated.
1159 		 * Thus, inform threads which need to know accurately
1160 		 * how much data is in the last page of the file.  They
1161 		 * will not do the i/o immediately, but will arrange for
1162 		 * the i/o to happen later when this modify operation
1163 		 * will have finished.
1164 		 *
1165 		 * in similar NFS code, this is done right before the
1166 		 * uiomove(), which is best.  but here in cachefs, we
1167 		 * have two uiomove()s, so we must do it here.
1168 		 */
1169 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1170 		mutex_enter(&cp->c_statelock);
1171 		cp->c_flags |= CN_CMODINPROG;
1172 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1173 		mutex_exit(&cp->c_statelock);
1174 
1175 		/*
1176 		 * Check to see if we can skip reading in the page
1177 		 * and just allocate the memory.  We can do this
1178 		 * if we are going to rewrite the entire mapping
1179 		 * or if we are going to write to or beyond the current
1180 		 * end of file from the beginning of the mapping.
1181 		 */
1182 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1183 			((cp->c_size == 0) && (offset < PAGESIZE)) ||
1184 			((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1185 			((offset + n) >= cp->c_size))) {
1186 			pagecreate = 1;
1187 
1188 			/*
1189 			 * segmap_pagecreate() returns 1 if it calls
1190 			 * page_create_va() to allocate any pages.
1191 			 */
1192 			newpage = segmap_pagecreate(segkmap,
1193 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1194 			    PAGESIZE, 0);
1195 			/* do not zero page if we are overwriting all of it */
1196 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1197 			    (n == PAGESIZE))) {
1198 				(void) kzero((void *)
1199 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1200 				    PAGESIZE);
1201 			}
1202 			error = uiomove(base, n, UIO_WRITE, uiop);
1203 
1204 			/*
1205 			 * Unlock the page allocated by page_create_va()
1206 			 * in segmap_pagecreate()
1207 			 */
1208 			if (newpage)
1209 				segmap_pageunlock(segkmap,
1210 					(caddr_t)((uintptr_t)base &
1211 						(uintptr_t)PAGEMASK),
1212 					PAGESIZE, S_WRITE);
1213 		} else {
1214 			/*
1215 			 * KLUDGE ! Use segmap_fault instead of faulting and
1216 			 * using as_fault() to avoid a recursive readers lock
1217 			 * on kas.
1218 			 */
1219 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1220 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1221 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1222 			if (error) {
1223 				if (FC_CODE(error) == FC_OBJERR)
1224 					error =  FC_ERRNO(error);
1225 				else
1226 					error = EIO;
1227 				break;
1228 			}
1229 			error = uiomove(base, n, UIO_WRITE, uiop);
1230 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1231 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1232 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1233 		}
1234 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1235 		base += n;
1236 		tcount -= n;
1237 
1238 		/* get access to the file system */
1239 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1240 			error = terror;
1241 			break;
1242 		}
1243 
1244 		/*
1245 		 * cp->c_attr.va_size is the maximum number of
1246 		 * bytes known to be in the file.
1247 		 * Make sure it is at least as high as the
1248 		 * last byte we just wrote into the buffer.
1249 		 */
1250 		mutex_enter(&cp->c_statelock);
1251 		if (cp->c_size < uiop->uio_loffset) {
1252 			cp->c_size = uiop->uio_loffset;
1253 		}
1254 		if (cp->c_size != cp->c_attr.va_size) {
1255 			cp->c_attr.va_size = cp->c_size;
1256 			cp->c_flags |= CN_UPDATED;
1257 		}
1258 		/* c_size is now correct, so we can clear modinprog */
1259 		cp->c_flags &= ~CN_CMODINPROG;
1260 		if (error == 0) {
1261 			cp->c_flags |= CDIRTY;
1262 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1263 				/*
1264 				 * if we're not in NOCACHE mode
1265 				 * (i.e., single-writer), we update the
1266 				 * allocmap here rather than waiting until
1267 				 * cachefspush is called.  This prevents
1268 				 * getpage from clustering up pages from
1269 				 * the backfile and stomping over the changes
1270 				 * we make here.
1271 				 */
1272 				if (cachefs_charge_page(cp, offset) == 0) {
1273 					cachefs_update_allocmap(cp,
1274 					    offset & (offset_t)PAGEMASK,
1275 							(size_t)PAGESIZE);
1276 				}
1277 
1278 				/* else we ran out of space */
1279 				else {
1280 					/* nocache file if connected */
1281 					if (fscp->fs_cdconnected ==
1282 					    CFS_CD_CONNECTED)
1283 						cachefs_nocache(cp);
1284 					/*
1285 					 * If disconnected then cannot
1286 					 * nocache the file.  Let it have
1287 					 * the space.
1288 					 */
1289 					else {
1290 						cp->c_metadata.md_frontblks++;
1291 						cp->c_flags |= CN_UPDATED;
1292 						cachefs_update_allocmap(cp,
1293 						    offset & (offset_t)PAGEMASK,
1294 						    (size_t)PAGESIZE);
1295 					}
1296 				}
1297 			}
1298 		}
1299 		mutex_exit(&cp->c_statelock);
1300 		cachefs_cd_release(fscp);
1301 	} while (tcount > 0 && error == 0);
1302 
1303 	if (cp->c_flags & CN_CMODINPROG) {
1304 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1305 		mutex_enter(&cp->c_statelock);
1306 		cp->c_flags &= ~CN_CMODINPROG;
1307 		mutex_exit(&cp->c_statelock);
1308 	}
1309 
1310 #ifdef CFS_CD_DEBUG
1311 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1312 #endif
1313 
1314 #ifdef CFSDEBUG
1315 	CFS_DEBUG(CFSDEBUG_VOPS)
1316 		printf("cachefs_writepage: EXIT error %d\n", error);
1317 #endif
1318 
1319 	return (error);
1320 }
1321 
1322 /*
1323  * Pushes out pages to the back and/or front file system.
1324  */
1325 static int
1326 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1327     int flags, cred_t *cr)
1328 {
1329 	struct cnode *cp = VTOC(vp);
1330 	struct buf *bp;
1331 	int error;
1332 	fscache_t *fscp = C_TO_FSCACHE(cp);
1333 	u_offset_t iooff;
1334 	size_t iolen;
1335 	u_offset_t lbn;
1336 	u_offset_t lbn_off;
1337 	uint_t bsize;
1338 
1339 	ASSERT((flags & B_ASYNC) == 0);
1340 	ASSERT(!vn_is_readonly(vp));
1341 	ASSERT(pp != NULL);
1342 	ASSERT(cr != NULL);
1343 
1344 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1345 	lbn = pp->p_offset / bsize;
1346 	lbn_off = lbn * bsize;
1347 
1348 	/*
1349 	 * Find a kluster that fits in one block, or in
1350 	 * one page if pages are bigger than blocks.  If
1351 	 * there is less file space allocated than a whole
1352 	 * page, we'll shorten the i/o request below.
1353 	 */
1354 
1355 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1356 			roundup(bsize, PAGESIZE), flags);
1357 
1358 	/*
1359 	 * The CN_CMODINPROG flag makes sure that we use a correct
1360 	 * value of c_size, below.  CN_CMODINPROG is set in
1361 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1362 	 * indicates that a uiomove() is in progress and the c_size
1363 	 * has not been made consistent with the new size of the
1364 	 * file. When the uiomove() completes the c_size is updated
1365 	 * and the CN_CMODINPROG flag is cleared.
1366 	 *
1367 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1368 	 * and cachefs_push_connected see a consistent value of
1369 	 * c_size.  Without this handshaking, it is possible that
1370 	 * these routines will pick up the old value of c_size before
1371 	 * the uiomove() in cachefs_writepage() completes.  This will
1372 	 * result in the vn_rdwr() being too small, and data loss.
1373 	 *
1374 	 * More precisely, there is a window between the time the
1375 	 * uiomove() completes and the time the c_size is updated. If
1376 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1377 	 * page will be picked up, because it is dirty; it will be
1378 	 * unlocked, unless it was pagecreate'd. When the page is
1379 	 * picked up as dirty, the dirty bit is reset
1380 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1381 	 * checked.  This will still be the old size.  Therefore, the
1382 	 * page will not be written out to the correct length, and the
1383 	 * page will be clean, so the data may disappear.
1384 	 */
1385 	if (cp->c_flags & CN_CMODINPROG) {
1386 		mutex_enter(&cp->c_statelock);
1387 		if ((cp->c_flags & CN_CMODINPROG) &&
1388 		    cp->c_modaddr + MAXBSIZE > iooff &&
1389 		    cp->c_modaddr < iooff + iolen) {
1390 			page_t *plist;
1391 
1392 			/*
1393 			 * A write is in progress for this region of
1394 			 * the file.  If we did not detect
1395 			 * CN_CMODINPROG here then this path through
1396 			 * cachefs_push_connected() would eventually
1397 			 * do the vn_rdwr() and may not write out all
1398 			 * of the data in the pages.  We end up losing
1399 			 * data. So we decide to set the modified bit
1400 			 * on each page in the page list and mark the
1401 			 * cnode with CDIRTY.  This push will be
1402 			 * restarted at some later time.
1403 			 */
1404 
1405 			plist = pp;
1406 			while (plist != NULL) {
1407 				pp = plist;
1408 				page_sub(&plist, pp);
1409 				hat_setmod(pp);
1410 				page_io_unlock(pp);
1411 				page_unlock(pp);
1412 			}
1413 			cp->c_flags |= CDIRTY;
1414 			mutex_exit(&cp->c_statelock);
1415 			if (offp)
1416 				*offp = iooff;
1417 			if (lenp)
1418 				*lenp = iolen;
1419 			return (0);
1420 		}
1421 		mutex_exit(&cp->c_statelock);
1422 	}
1423 
1424 	/*
1425 	 * Set the pages up for pageout.
1426 	 */
1427 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1428 	if (bp == NULL) {
1429 
1430 		/*
1431 		 * currently, there is no way for pageio_setup() to
1432 		 * return NULL, since it uses its own scheme for
1433 		 * kmem_alloc()ing that shouldn't return NULL, and
1434 		 * since pageio_setup() itself dereferences the thing
1435 		 * it's about to return.  still, we need to be ready
1436 		 * in case this ever does start happening.
1437 		 */
1438 
1439 		error = ENOMEM;
1440 		goto writedone;
1441 	}
1442 	/*
1443 	 * pageio_setup should have set b_addr to 0.  This
1444 	 * is correct since we want to do I/O on a page
1445 	 * boundary.  bp_mapin will use this addr to calculate
1446 	 * an offset, and then set b_addr to the kernel virtual
1447 	 * address it allocated for us.
1448 	 */
1449 	bp->b_edev = 0;
1450 	bp->b_dev = 0;
1451 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1452 	bp_mapin(bp);
1453 
1454 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1455 	if (iolen > bp->b_bcount)
1456 		iolen  = bp->b_bcount;
1457 
1458 	/* if connected */
1459 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1460 		/* write to the back file first */
1461 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1462 
1463 		/* write to the front file if allowed */
1464 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1465 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1466 			/* try to write to the front file */
1467 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1468 		}
1469 	}
1470 
1471 	/* else if disconnected */
1472 	else {
1473 		/* try to write to the front file */
1474 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1475 	}
1476 
1477 	bp_mapout(bp);
1478 	pageio_done(bp);
1479 
1480 writedone:
1481 
1482 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1483 	if (offp)
1484 		*offp = iooff;
1485 	if (lenp)
1486 		*lenp = iolen;
1487 
1488 	/* XXX ask bob mastors how to fix this someday */
1489 	mutex_enter(&cp->c_statelock);
1490 	if (error) {
1491 		if (error == ENOSPC) {
1492 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1493 			    CFS_ISFS_SOFT(fscp)) {
1494 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1495 				cp->c_error = error;
1496 			}
1497 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1498 		    (error != EINTR)) {
1499 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1500 			cp->c_error = error;
1501 		}
1502 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1503 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1504 	}
1505 	mutex_exit(&cp->c_statelock);
1506 
1507 	return (error);
1508 }
1509 
1510 /*
1511  * Pushes out pages to the back file system.
1512  */
1513 static int
1514 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1515     u_offset_t iooff, cred_t *cr)
1516 {
1517 	struct cnode *cp = VTOC(vp);
1518 	int error = 0;
1519 	int mode = 0;
1520 	fscache_t *fscp = C_TO_FSCACHE(cp);
1521 	ssize_t resid;
1522 	vnode_t *backvp;
1523 
1524 	/* get the back file if necessary */
1525 	mutex_enter(&cp->c_statelock);
1526 	if (cp->c_backvp == NULL) {
1527 		error = cachefs_getbackvp(fscp, cp);
1528 		if (error) {
1529 			mutex_exit(&cp->c_statelock);
1530 			goto out;
1531 		}
1532 	}
1533 	backvp = cp->c_backvp;
1534 	VN_HOLD(backvp);
1535 	mutex_exit(&cp->c_statelock);
1536 
1537 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1538 		mode = FSYNC;
1539 
1540 	/* write to the back file */
1541 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1542 	    iolen, iooff, UIO_SYSSPACE, mode,
1543 	    RLIM64_INFINITY, cr, &resid);
1544 	if (error) {
1545 #ifdef CFSDEBUG
1546 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1547 			printf("cachefspush: error %d cr %p\n",
1548 				error, (void *)cr);
1549 #endif
1550 		bp->b_flags |= B_ERROR;
1551 	}
1552 	VN_RELE(backvp);
1553 out:
1554 	return (error);
1555 }
1556 
1557 /*
1558  * Pushes out pages to the front file system.
1559  * Called for both connected and disconnected states.
1560  */
1561 static int
1562 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1563     u_offset_t iooff, cred_t *cr)
1564 {
1565 	struct cnode *cp = VTOC(vp);
1566 	fscache_t *fscp = C_TO_FSCACHE(cp);
1567 	int error = 0;
1568 	ssize_t resid;
1569 	u_offset_t popoff;
1570 	off_t commit = 0;
1571 	uint_t seq;
1572 	enum cachefs_rl_type type;
1573 	vnode_t *frontvp = NULL;
1574 
1575 	mutex_enter(&cp->c_statelock);
1576 
1577 	if (!CFS_ISFS_NONSHARED(fscp)) {
1578 		error = ETIMEDOUT;
1579 		goto out;
1580 	}
1581 
1582 	/* get the front file if necessary */
1583 	if ((cp->c_frontvp == NULL) &&
1584 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1585 		(void) cachefs_getfrontfile(cp);
1586 	}
1587 	if (cp->c_flags & CN_NOCACHE) {
1588 		error = ETIMEDOUT;
1589 		goto out;
1590 	}
1591 
1592 	/* if disconnected, needs to be populated and have good attributes */
1593 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1594 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1595 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1596 		error = ETIMEDOUT;
1597 		goto out;
1598 	}
1599 
1600 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1601 		if (cachefs_charge_page(cp, popoff)) {
1602 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1603 				cachefs_nocache(cp);
1604 				goto out;
1605 			} else {
1606 				error = ENOSPC;
1607 				goto out;
1608 			}
1609 		}
1610 	}
1611 
1612 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1613 		/* log the first putpage to a file */
1614 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1615 			/* uses open's creds if we have them */
1616 			if (cp->c_cred)
1617 				cr = cp->c_cred;
1618 
1619 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1620 				error = cachefs_dlog_cidmap(fscp);
1621 				if (error) {
1622 					error = ENOSPC;
1623 					goto out;
1624 				}
1625 				cp->c_metadata.md_flags |= MD_MAPPING;
1626 			}
1627 
1628 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1629 			if (commit == 0) {
1630 				/* out of space */
1631 				error = ENOSPC;
1632 				goto out;
1633 			}
1634 
1635 			cp->c_metadata.md_seq = seq;
1636 			type = cp->c_metadata.md_rltype;
1637 			cachefs_modified(cp);
1638 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1639 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1640 			cp->c_flags |= CN_UPDATED;
1641 		}
1642 
1643 		/* subsequent putpages just get a new sequence number */
1644 		else {
1645 			/* but only if it matters */
1646 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1647 				seq = cachefs_dlog_seqnext(fscp);
1648 				if (seq == 0) {
1649 					error = ENOSPC;
1650 					goto out;
1651 				}
1652 				cp->c_metadata.md_seq = seq;
1653 				cp->c_flags |= CN_UPDATED;
1654 				/* XXX maybe should do write_metadata here */
1655 			}
1656 		}
1657 	}
1658 
1659 	frontvp = cp->c_frontvp;
1660 	VN_HOLD(frontvp);
1661 	mutex_exit(&cp->c_statelock);
1662 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1663 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1664 	    RLIM64_INFINITY, kcred, &resid);
1665 	mutex_enter(&cp->c_statelock);
1666 	VN_RELE(frontvp);
1667 	frontvp = NULL;
1668 	if (error) {
1669 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1670 			cachefs_nocache(cp);
1671 			error = 0;
1672 			goto out;
1673 		} else {
1674 			goto out;
1675 		}
1676 	}
1677 
1678 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1679 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1680 		CN_POPULATION_PENDING);
1681 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1682 		gethrestime(&cp->c_metadata.md_localmtime);
1683 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1684 	}
1685 
1686 out:
1687 	if (commit) {
1688 		/* commit the log record */
1689 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1690 		if (cachefs_dlog_commit(fscp, commit, error)) {
1691 			/*EMPTY*/
1692 			/* XXX fix on panic */
1693 		}
1694 	}
1695 
1696 	if (error && commit) {
1697 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1698 		cachefs_rlent_moveto(fscp->fs_cache, type,
1699 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1700 		cp->c_metadata.md_rltype = type;
1701 		cp->c_flags |= CN_UPDATED;
1702 	}
1703 	mutex_exit(&cp->c_statelock);
1704 	return (error);
1705 }
1706 
1707 /*ARGSUSED*/
1708 static int
1709 cachefs_dump(struct vnode *vp, caddr_t foo1, int foo2, int foo3)
1710 {
1711 	return (ENOSYS); /* should we panic if we get here? */
1712 }
1713 
1714 /*ARGSUSED*/
1715 static int
1716 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1717 	int *rvalp)
1718 {
1719 	int error;
1720 	struct cnode *cp = VTOC(vp);
1721 	struct fscache *fscp = C_TO_FSCACHE(cp);
1722 	struct cachefscache *cachep;
1723 	extern kmutex_t cachefs_cachelock;
1724 	extern cachefscache_t *cachefs_cachelist;
1725 	cachefsio_pack_t *packp;
1726 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1727 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1728 	void *dinp, *doutp;
1729 	int (*dcmd_routine)(vnode_t *, void *, void *);
1730 
1731 	if (getzoneid() != GLOBAL_ZONEID)
1732 		return (EPERM);
1733 
1734 	/*
1735 	 * Cachefs only provides pass-through support for NFSv4,
1736 	 * and all vnode operations are passed through to the
1737 	 * back file system. For NFSv4 pass-through to work, only
1738 	 * connected operation is supported, the cnode backvp must
1739 	 * exist, and cachefs optional (eg., disconnectable) flags
1740 	 * are turned off. Assert these conditions which ensure
1741 	 * that only a subset of the ioctls are "truly supported"
1742 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1743 	 * The packing operations are meaningless since there is
1744 	 * no caching for NFSv4, and the called functions silently
1745 	 * return if the backfilesystem is NFSv4. The daemon
1746 	 * commands except for those above are essentially used
1747 	 * for disconnectable operation support (including log
1748 	 * rolling), so in each called function, we assert that
1749 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1750 	 * are from "cfsfstype" which is not a documented
1751 	 * command. However, the command is visible in
1752 	 * /usr/lib/fs/cachefs so the commands are simply let
1753 	 * through (don't seem to impact pass-through functionality).
1754 	 */
1755 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1756 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1757 
1758 	switch (cmd) {
1759 	case CACHEFSIO_PACK:
1760 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1761 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1762 		if (!error)
1763 			error = cachefs_pack(vp, packp->p_name, cred);
1764 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1765 		break;
1766 
1767 	case CACHEFSIO_UNPACK:
1768 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1769 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1770 		if (!error)
1771 			error = cachefs_unpack(vp, packp->p_name, cred);
1772 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1773 		break;
1774 
1775 	case CACHEFSIO_PACKINFO:
1776 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1777 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1778 		if (!error)
1779 			error = cachefs_packinfo(vp, packp->p_name,
1780 			    &packp->p_status, cred);
1781 		if (!error)
1782 			error = xcopyout(packp, (void *)arg,
1783 			    sizeof (cachefsio_pack_t));
1784 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1785 		break;
1786 
1787 	case CACHEFSIO_UNPACKALL:
1788 		error = cachefs_unpackall(vp);
1789 		break;
1790 
1791 	case CACHEFSIO_DCMD:
1792 		/*
1793 		 * This is a private interface between the cachefsd and
1794 		 * this file system.
1795 		 */
1796 
1797 		/* must be root to use these commands */
1798 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1799 			return (EPERM);
1800 
1801 		/* get the command packet */
1802 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1803 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1804 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1805 		if (error)
1806 			return (error);
1807 
1808 		/* copy in the data for the operation */
1809 		dinp = NULL;
1810 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1811 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1812 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1813 			    inlen);
1814 			if (error)
1815 				return (error);
1816 		}
1817 
1818 		/* allocate space for the result */
1819 		doutp = NULL;
1820 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1821 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1822 
1823 		/*
1824 		 * Assert NFSv4 only allows the daemonid and getstats
1825 		 * daemon requests
1826 		 */
1827 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1828 			STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1829 			STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1830 
1831 		/* get the routine to execute */
1832 		dcmd_routine = NULL;
1833 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1834 		case CFSDCMD_DAEMONID:
1835 			dcmd_routine = cachefs_io_daemonid;
1836 			break;
1837 		case CFSDCMD_STATEGET:
1838 			dcmd_routine = cachefs_io_stateget;
1839 			break;
1840 		case CFSDCMD_STATESET:
1841 			dcmd_routine = cachefs_io_stateset;
1842 			break;
1843 		case CFSDCMD_XWAIT:
1844 			dcmd_routine = cachefs_io_xwait;
1845 			break;
1846 		case CFSDCMD_EXISTS:
1847 			dcmd_routine = cachefs_io_exists;
1848 			break;
1849 		case CFSDCMD_LOSTFOUND:
1850 			dcmd_routine = cachefs_io_lostfound;
1851 			break;
1852 		case CFSDCMD_GETINFO:
1853 			dcmd_routine = cachefs_io_getinfo;
1854 			break;
1855 		case CFSDCMD_CIDTOFID:
1856 			dcmd_routine = cachefs_io_cidtofid;
1857 			break;
1858 		case CFSDCMD_GETATTRFID:
1859 			dcmd_routine = cachefs_io_getattrfid;
1860 			break;
1861 		case CFSDCMD_GETATTRNAME:
1862 			dcmd_routine = cachefs_io_getattrname;
1863 			break;
1864 		case CFSDCMD_GETSTATS:
1865 			dcmd_routine = cachefs_io_getstats;
1866 			break;
1867 		case CFSDCMD_ROOTFID:
1868 			dcmd_routine = cachefs_io_rootfid;
1869 			break;
1870 		case CFSDCMD_CREATE:
1871 			dcmd_routine = cachefs_io_create;
1872 			break;
1873 		case CFSDCMD_REMOVE:
1874 			dcmd_routine = cachefs_io_remove;
1875 			break;
1876 		case CFSDCMD_LINK:
1877 			dcmd_routine = cachefs_io_link;
1878 			break;
1879 		case CFSDCMD_RENAME:
1880 			dcmd_routine = cachefs_io_rename;
1881 			break;
1882 		case CFSDCMD_MKDIR:
1883 			dcmd_routine = cachefs_io_mkdir;
1884 			break;
1885 		case CFSDCMD_RMDIR:
1886 			dcmd_routine = cachefs_io_rmdir;
1887 			break;
1888 		case CFSDCMD_SYMLINK:
1889 			dcmd_routine = cachefs_io_symlink;
1890 			break;
1891 		case CFSDCMD_SETATTR:
1892 			dcmd_routine = cachefs_io_setattr;
1893 			break;
1894 		case CFSDCMD_SETSECATTR:
1895 			dcmd_routine = cachefs_io_setsecattr;
1896 			break;
1897 		case CFSDCMD_PUSHBACK:
1898 			dcmd_routine = cachefs_io_pushback;
1899 			break;
1900 		default:
1901 			error = ENOTTY;
1902 			break;
1903 		}
1904 
1905 		/* execute the routine */
1906 		if (dcmd_routine)
1907 			error = (*dcmd_routine)(vp, dinp, doutp);
1908 
1909 		/* copy out the result */
1910 		if ((error == 0) && doutp)
1911 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1912 			    outlen);
1913 
1914 		/* free allocated memory */
1915 		if (dinp)
1916 			cachefs_kmem_free(dinp, inlen);
1917 		if (doutp)
1918 			cachefs_kmem_free(doutp, outlen);
1919 
1920 		break;
1921 
1922 	case _FIOCOD:
1923 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1924 			error = EPERM;
1925 			break;
1926 		}
1927 
1928 		error = EBUSY;
1929 		if (arg) {
1930 			/* non-zero arg means do all filesystems */
1931 			mutex_enter(&cachefs_cachelock);
1932 			for (cachep = cachefs_cachelist; cachep != NULL;
1933 			    cachep = cachep->c_next) {
1934 				mutex_enter(&cachep->c_fslistlock);
1935 				for (fscp = cachep->c_fslist;
1936 				    fscp != NULL;
1937 				    fscp = fscp->fs_next) {
1938 					if (CFS_ISFS_CODCONST(fscp)) {
1939 						gethrestime(&fscp->fs_cod_time);
1940 						error = 0;
1941 					}
1942 				}
1943 				mutex_exit(&cachep->c_fslistlock);
1944 			}
1945 			mutex_exit(&cachefs_cachelock);
1946 		} else {
1947 			if (CFS_ISFS_CODCONST(fscp)) {
1948 				gethrestime(&fscp->fs_cod_time);
1949 				error = 0;
1950 			}
1951 		}
1952 		break;
1953 
1954 	case _FIOSTOPCACHE:
1955 		error = cachefs_stop_cache(cp);
1956 		break;
1957 
1958 	default:
1959 		error = ENOTTY;
1960 		break;
1961 	}
1962 
1963 	/* return the result */
1964 	return (error);
1965 }
1966 
1967 ino64_t
1968 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1969 {
1970 	ino64_t new;
1971 
1972 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1973 
1974 	for (;;) {
1975 		fscp->fs_info.fi_localfileno++;
1976 		if (fscp->fs_info.fi_localfileno == 0)
1977 			fscp->fs_info.fi_localfileno = 3;
1978 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
1979 
1980 		new = fscp->fs_info.fi_localfileno;
1981 		if (! cachefs_fileno_inuse(fscp, new))
1982 			break;
1983 	}
1984 
1985 	cachefs_inum_register(fscp, old, new);
1986 	cachefs_inum_register(fscp, new, 0);
1987 	return (new);
1988 }
1989 
1990 /*ARGSUSED*/
1991 static int
1992 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
1993 {
1994 	struct cnode *cp = VTOC(vp);
1995 	fscache_t *fscp = C_TO_FSCACHE(cp);
1996 	int error = 0;
1997 	int held = 0;
1998 	int connected = 0;
1999 
2000 #ifdef CFSDEBUG
2001 	CFS_DEBUG(CFSDEBUG_VOPS)
2002 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2003 #endif
2004 
2005 	if (getzoneid() != GLOBAL_ZONEID)
2006 		return (EPERM);
2007 
2008 	/* Call backfilesystem getattr if NFSv4 */
2009 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2010 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr);
2011 		goto out;
2012 	}
2013 
2014 	/*
2015 	 * If it has been specified that the return value will
2016 	 * just be used as a hint, and we are only being asked
2017 	 * for size, fsid or rdevid, then return the client's
2018 	 * notion of these values without checking to make sure
2019 	 * that the attribute cache is up to date.
2020 	 * The whole point is to avoid an over the wire GETATTR
2021 	 * call.
2022 	 */
2023 	if (flags & ATTR_HINT) {
2024 		if (vap->va_mask ==
2025 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2026 			if (vap->va_mask | AT_SIZE)
2027 				vap->va_size = cp->c_size;
2028 			/*
2029 			 * Return the FSID of the cachefs filesystem,
2030 			 * not the back filesystem
2031 			 */
2032 			if (vap->va_mask | AT_FSID)
2033 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2034 			if (vap->va_mask | AT_RDEV)
2035 				vap->va_rdev = cp->c_attr.va_rdev;
2036 			return (0);
2037 		}
2038 	}
2039 
2040 	/*
2041 	 * Only need to flush pages if asking for the mtime
2042 	 * and if there any dirty pages.
2043 	 */
2044 	if (vap->va_mask & AT_MTIME) {
2045 		/*EMPTY*/
2046 #if 0
2047 		/*
2048 		 * XXX bob: stolen from nfs code, need to do something similar
2049 		 */
2050 		rp = VTOR(vp);
2051 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2052 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2053 #endif
2054 	}
2055 
2056 	for (;;) {
2057 		/* get (or renew) access to the file system */
2058 		if (held) {
2059 			cachefs_cd_release(fscp);
2060 			held = 0;
2061 		}
2062 		error = cachefs_cd_access(fscp, connected, 0);
2063 		if (error)
2064 			goto out;
2065 		held = 1;
2066 
2067 		/*
2068 		 * If it has been specified that the return value will
2069 		 * just be used as a hint, and we are only being asked
2070 		 * for size, fsid or rdevid, then return the client's
2071 		 * notion of these values without checking to make sure
2072 		 * that the attribute cache is up to date.
2073 		 * The whole point is to avoid an over the wire GETATTR
2074 		 * call.
2075 		 */
2076 		if (flags & ATTR_HINT) {
2077 			if (vap->va_mask ==
2078 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2079 				if (vap->va_mask | AT_SIZE)
2080 					vap->va_size = cp->c_size;
2081 				/*
2082 				 * Return the FSID of the cachefs filesystem,
2083 				 * not the back filesystem
2084 				 */
2085 				if (vap->va_mask | AT_FSID)
2086 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2087 				if (vap->va_mask | AT_RDEV)
2088 					vap->va_rdev = cp->c_attr.va_rdev;
2089 				goto out;
2090 			}
2091 		}
2092 
2093 		mutex_enter(&cp->c_statelock);
2094 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2095 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2096 			mutex_exit(&cp->c_statelock);
2097 			connected = 1;
2098 			continue;
2099 		}
2100 
2101 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2102 		if (CFS_TIMEOUT(fscp, error)) {
2103 			mutex_exit(&cp->c_statelock);
2104 			cachefs_cd_release(fscp);
2105 			held = 0;
2106 			cachefs_cd_timedout(fscp);
2107 			continue;
2108 		}
2109 		if (error) {
2110 			mutex_exit(&cp->c_statelock);
2111 			break;
2112 		}
2113 
2114 		/* check for fileno conflict */
2115 		if ((fscp->fs_inum_size > 0) &&
2116 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2117 			ino64_t fakenum;
2118 
2119 			mutex_exit(&cp->c_statelock);
2120 			mutex_enter(&fscp->fs_fslock);
2121 			fakenum = cachefs_inum_real2fake(fscp,
2122 			    cp->c_attr.va_nodeid);
2123 			if (fakenum == 0) {
2124 				fakenum = cachefs_fileno_conflict(fscp,
2125 				    cp->c_attr.va_nodeid);
2126 			}
2127 			mutex_exit(&fscp->fs_fslock);
2128 
2129 			mutex_enter(&cp->c_statelock);
2130 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2131 			cp->c_metadata.md_localfileno = fakenum;
2132 			cp->c_flags |= CN_UPDATED;
2133 		}
2134 
2135 		/* copy out the attributes */
2136 		*vap = cp->c_attr;
2137 
2138 		/*
2139 		 * return the FSID of the cachefs filesystem,
2140 		 * not the back filesystem
2141 		 */
2142 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2143 
2144 		/* return our idea of the size */
2145 		if (cp->c_size > vap->va_size)
2146 			vap->va_size = cp->c_size;
2147 
2148 		/* overwrite with our version of fileno and timestamps */
2149 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2150 		vap->va_mtime = cp->c_metadata.md_localmtime;
2151 		vap->va_ctime = cp->c_metadata.md_localctime;
2152 
2153 		mutex_exit(&cp->c_statelock);
2154 		break;
2155 	}
2156 out:
2157 	if (held)
2158 		cachefs_cd_release(fscp);
2159 #ifdef CFS_CD_DEBUG
2160 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2161 #endif
2162 
2163 #ifdef CFSDEBUG
2164 	CFS_DEBUG(CFSDEBUG_VOPS)
2165 		printf("cachefs_getattr: EXIT error = %d\n", error);
2166 #endif
2167 	return (error);
2168 }
2169 
2170 /*
2171  * cachefs_getattr_backfs_nfsv4
2172  *
2173  * Call NFSv4 back filesystem to handle the getattr (cachefs
2174  * pass-through support for NFSv4).
2175  */
2176 static int
2177 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2178 			int flags, cred_t *cr)
2179 {
2180 	cnode_t *cp = VTOC(vp);
2181 	fscache_t *fscp = C_TO_FSCACHE(cp);
2182 	vnode_t *backvp;
2183 	int error;
2184 
2185 	/*
2186 	 * For NFSv4 pass-through to work, only connected operation
2187 	 * is supported, the cnode backvp must exist, and cachefs
2188 	 * optional (eg., disconnectable) flags are turned off. Assert
2189 	 * these conditions for the getattr operation.
2190 	 */
2191 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2192 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2193 
2194 	/* Call backfs vnode op after extracting backvp */
2195 	mutex_enter(&cp->c_statelock);
2196 	backvp = cp->c_backvp;
2197 	mutex_exit(&cp->c_statelock);
2198 
2199 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2200 					" backvp %p\n", cp, backvp));
2201 	error = VOP_GETATTR(backvp, vap, flags, cr);
2202 
2203 	/* Update attributes */
2204 	cp->c_attr = *vap;
2205 
2206 	/*
2207 	 * return the FSID of the cachefs filesystem,
2208 	 * not the back filesystem
2209 	 */
2210 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2211 
2212 	return (error);
2213 }
2214 
2215 /*ARGSUSED4*/
2216 static int
2217 cachefs_setattr(
2218 	vnode_t *vp,
2219 	vattr_t *vap,
2220 	int flags,
2221 	cred_t *cr,
2222 	caller_context_t *ct)
2223 {
2224 	cnode_t *cp = VTOC(vp);
2225 	fscache_t *fscp = C_TO_FSCACHE(cp);
2226 	int error;
2227 	int connected;
2228 	int held = 0;
2229 
2230 	if (getzoneid() != GLOBAL_ZONEID)
2231 		return (EPERM);
2232 
2233 	/*
2234 	 * Cachefs only provides pass-through support for NFSv4,
2235 	 * and all vnode operations are passed through to the
2236 	 * back file system. For NFSv4 pass-through to work, only
2237 	 * connected operation is supported, the cnode backvp must
2238 	 * exist, and cachefs optional (eg., disconnectable) flags
2239 	 * are turned off. Assert these conditions to ensure that
2240 	 * the backfilesystem is called for the setattr operation.
2241 	 */
2242 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2243 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2244 
2245 	connected = 0;
2246 	for (;;) {
2247 		/* drop hold on file system */
2248 		if (held) {
2249 			/* Won't loop with NFSv4 connected behavior */
2250 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2251 			cachefs_cd_release(fscp);
2252 			held = 0;
2253 		}
2254 
2255 		/* aquire access to the file system */
2256 		error = cachefs_cd_access(fscp, connected, 1);
2257 		if (error)
2258 			break;
2259 		held = 1;
2260 
2261 		/* perform the setattr */
2262 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2263 		if (error) {
2264 			/* if connected */
2265 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2266 				if (CFS_TIMEOUT(fscp, error)) {
2267 					cachefs_cd_release(fscp);
2268 					held = 0;
2269 					cachefs_cd_timedout(fscp);
2270 					connected = 0;
2271 					continue;
2272 				}
2273 			}
2274 
2275 			/* else must be disconnected */
2276 			else {
2277 				if (CFS_TIMEOUT(fscp, error)) {
2278 					connected = 1;
2279 					continue;
2280 				}
2281 			}
2282 		}
2283 		break;
2284 	}
2285 
2286 	if (held) {
2287 		cachefs_cd_release(fscp);
2288 	}
2289 #ifdef CFS_CD_DEBUG
2290 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2291 #endif
2292 	return (error);
2293 }
2294 
2295 static int
2296 cachefs_setattr_common(
2297 	vnode_t *vp,
2298 	vattr_t *vap,
2299 	int flags,
2300 	cred_t *cr,
2301 	caller_context_t *ct)
2302 {
2303 	cnode_t *cp = VTOC(vp);
2304 	fscache_t *fscp = C_TO_FSCACHE(cp);
2305 	cachefscache_t *cachep = fscp->fs_cache;
2306 	uint_t mask = vap->va_mask;
2307 	int error = 0;
2308 	uint_t bcnt;
2309 
2310 	/* Cannot set these attributes. */
2311 	if (mask & AT_NOSET)
2312 		return (EINVAL);
2313 
2314 	/*
2315 	 * Truncate file.  Must have write permission and not be a directory.
2316 	 */
2317 	if (mask & AT_SIZE) {
2318 		if (vp->v_type == VDIR) {
2319 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2320 				cachefs_log_truncate(cachep, EISDIR,
2321 				    fscp->fs_cfsvfsp,
2322 				    &cp->c_metadata.md_cookie,
2323 				    cp->c_id.cid_fileno,
2324 				    crgetuid(cr), vap->va_size);
2325 			return (EISDIR);
2326 		}
2327 	}
2328 
2329 	/*
2330 	 * Gotta deal with one special case here, where we're setting the
2331 	 * size of the file. First, we zero out part of the page after the
2332 	 * new size of the file. Then we toss (not write) all pages after
2333 	 * page in which the new offset occurs. Note that the NULL passed
2334 	 * in instead of a putapage() fn parameter is correct, since
2335 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2336 	 */
2337 
2338 	rw_enter(&cp->c_rwlock, RW_WRITER);
2339 
2340 	/* sync dirty pages */
2341 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2342 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2343 		if (error == EINTR)
2344 			goto out;
2345 	}
2346 	error = 0;
2347 
2348 	/* if connected */
2349 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2350 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2351 	}
2352 	/* else must be disconnected */
2353 	else {
2354 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2355 	}
2356 	if (error)
2357 		goto out;
2358 
2359 	/*
2360 	 * If the file size has been changed then
2361 	 * toss whole pages beyond the end of the file and zero
2362 	 * the portion of the last page that is beyond the end of the file.
2363 	 */
2364 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2365 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2366 		if (bcnt)
2367 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2368 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2369 			B_TRUNC | B_INVAL, cr);
2370 	}
2371 
2372 out:
2373 	rw_exit(&cp->c_rwlock);
2374 
2375 	if ((mask & AT_SIZE) &&
2376 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2377 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2378 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2379 		    crgetuid(cr), vap->va_size);
2380 
2381 	return (error);
2382 }
2383 
2384 static int
2385 cachefs_setattr_connected(
2386 	vnode_t *vp,
2387 	vattr_t *vap,
2388 	int flags,
2389 	cred_t *cr,
2390 	caller_context_t *ct)
2391 {
2392 	cnode_t *cp = VTOC(vp);
2393 	fscache_t *fscp = C_TO_FSCACHE(cp);
2394 	uint_t mask = vap->va_mask;
2395 	int error = 0;
2396 	int setsize;
2397 
2398 	mutex_enter(&cp->c_statelock);
2399 
2400 	if (cp->c_backvp == NULL) {
2401 		error = cachefs_getbackvp(fscp, cp);
2402 		if (error)
2403 			goto out;
2404 	}
2405 
2406 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2407 	if (error)
2408 		goto out;
2409 
2410 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2411 					"backvp %p\n", cp, cp->c_backvp));
2412 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2413 	if (error) {
2414 		goto out;
2415 	}
2416 
2417 	/* if the size of the file is being changed */
2418 	if (mask & AT_SIZE) {
2419 		cp->c_size = vap->va_size;
2420 		error = 0;
2421 		setsize = 0;
2422 
2423 		/* see if okay to try to set the file size */
2424 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2425 		    CFS_ISFS_NONSHARED(fscp)) {
2426 			/* okay to set size if file is populated */
2427 			if (cp->c_metadata.md_flags & MD_POPULATED)
2428 				setsize = 1;
2429 
2430 			/*
2431 			 * Okay to set size if front file exists and setting
2432 			 * file size to zero.
2433 			 */
2434 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2435 			    (vap->va_size == 0))
2436 				setsize = 1;
2437 		}
2438 
2439 		/* if okay to try to set the file size */
2440 		if (setsize) {
2441 			error = 0;
2442 			if (cp->c_frontvp == NULL)
2443 				error = cachefs_getfrontfile(cp);
2444 			if (error == 0)
2445 				error = cachefs_frontfile_size(cp, cp->c_size);
2446 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2447 			/* make sure file gets nocached */
2448 			error = EEXIST;
2449 		}
2450 
2451 		/* if we have to nocache the file */
2452 		if (error) {
2453 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2454 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2455 				cachefs_nocache(cp);
2456 			error = 0;
2457 		}
2458 	}
2459 
2460 	cp->c_flags |= CN_UPDATED;
2461 
2462 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2463 	cp->c_attr.va_mask = AT_ALL;
2464 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr);
2465 	if (error)
2466 		goto out;
2467 
2468 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2469 	cp->c_size = cp->c_attr.va_size;
2470 
2471 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2472 out:
2473 	mutex_exit(&cp->c_statelock);
2474 	return (error);
2475 }
2476 
2477 /*
2478  * perform the setattr on the local file system
2479  */
2480 /*ARGSUSED4*/
2481 static int
2482 cachefs_setattr_disconnected(
2483 	vnode_t *vp,
2484 	vattr_t *vap,
2485 	int flags,
2486 	cred_t *cr,
2487 	caller_context_t *ct)
2488 {
2489 	cnode_t *cp = VTOC(vp);
2490 	fscache_t *fscp = C_TO_FSCACHE(cp);
2491 	int mask;
2492 	int error;
2493 	int newfile;
2494 	off_t commit = 0;
2495 
2496 	if (CFS_ISFS_WRITE_AROUND(fscp))
2497 		return (ETIMEDOUT);
2498 
2499 	/* if we do not have good attributes */
2500 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2501 		return (ETIMEDOUT);
2502 
2503 	/* primary concern is to keep this routine as much like ufs_setattr */
2504 
2505 	mutex_enter(&cp->c_statelock);
2506 
2507 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2508 			    cachefs_access_local, cp);
2509 
2510 	if (error)
2511 		goto out;
2512 
2513 	mask = vap->va_mask;
2514 
2515 	/* if changing the size of the file */
2516 	if (mask & AT_SIZE) {
2517 		if (vp->v_type == VDIR) {
2518 			error = EISDIR;
2519 			goto out;
2520 		}
2521 
2522 		if (vp->v_type == VFIFO) {
2523 			error = 0;
2524 			goto out;
2525 		}
2526 
2527 		if ((vp->v_type != VREG) &&
2528 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2529 			error = EINVAL;
2530 			goto out;
2531 		}
2532 
2533 		if (vap->va_size > fscp->fs_offmax) {
2534 			error = EFBIG;
2535 			goto out;
2536 		}
2537 
2538 		/* if the file is not populated and we are not truncating it */
2539 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2540 		    (vap->va_size != 0)) {
2541 			error = ETIMEDOUT;
2542 			goto out;
2543 		}
2544 
2545 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2546 			error = cachefs_dlog_cidmap(fscp);
2547 			if (error) {
2548 				error = ENOSPC;
2549 				goto out;
2550 			}
2551 			cp->c_metadata.md_flags |= MD_MAPPING;
2552 		}
2553 
2554 		/* log the operation */
2555 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2556 		if (commit == 0) {
2557 			error = ENOSPC;
2558 			goto out;
2559 		}
2560 		cp->c_flags &= ~CN_NOCACHE;
2561 
2562 		/* special case truncating fast sym links */
2563 		if ((vp->v_type == VLNK) &&
2564 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2565 			/* XXX how can we get here */
2566 			/* XXX should update mtime */
2567 			cp->c_size = 0;
2568 			error = 0;
2569 			goto out;
2570 		}
2571 
2572 		/* get the front file, this may create one */
2573 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2574 		if (cp->c_frontvp == NULL) {
2575 			error = cachefs_getfrontfile(cp);
2576 			if (error)
2577 				goto out;
2578 		}
2579 		ASSERT(cp->c_frontvp);
2580 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2581 			/* allocate space for the metadata */
2582 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2583 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2584 			    == 0);
2585 			error = filegrp_write_metadata(cp->c_filegrp,
2586 			    &cp->c_id, &cp->c_metadata);
2587 			if (error)
2588 				goto out;
2589 		}
2590 
2591 		/* change the size of the front file */
2592 		error = cachefs_frontfile_size(cp, vap->va_size);
2593 		if (error)
2594 			goto out;
2595 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2596 		gethrestime(&cp->c_metadata.md_localmtime);
2597 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2598 		cachefs_modified(cp);
2599 		cp->c_flags |= CN_UPDATED;
2600 	}
2601 
2602 	if (mask & AT_MODE) {
2603 		/* mark as modified */
2604 		if (cachefs_modified_alloc(cp)) {
2605 			error = ENOSPC;
2606 			goto out;
2607 		}
2608 
2609 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2610 			error = cachefs_dlog_cidmap(fscp);
2611 			if (error) {
2612 				error = ENOSPC;
2613 				goto out;
2614 			}
2615 			cp->c_metadata.md_flags |= MD_MAPPING;
2616 		}
2617 
2618 		/* log the operation if not already logged */
2619 		if (commit == 0) {
2620 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2621 			if (commit == 0) {
2622 				error = ENOSPC;
2623 				goto out;
2624 			}
2625 		}
2626 
2627 		cp->c_attr.va_mode &= S_IFMT;
2628 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2629 		gethrestime(&cp->c_metadata.md_localctime);
2630 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2631 		cp->c_flags |= CN_UPDATED;
2632 	}
2633 
2634 	if (mask & (AT_UID|AT_GID)) {
2635 
2636 		/* mark as modified */
2637 		if (cachefs_modified_alloc(cp)) {
2638 			error = ENOSPC;
2639 			goto out;
2640 		}
2641 
2642 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2643 			error = cachefs_dlog_cidmap(fscp);
2644 			if (error) {
2645 				error = ENOSPC;
2646 				goto out;
2647 			}
2648 			cp->c_metadata.md_flags |= MD_MAPPING;
2649 		}
2650 
2651 		/* log the operation if not already logged */
2652 		if (commit == 0) {
2653 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2654 			if (commit == 0) {
2655 				error = ENOSPC;
2656 				goto out;
2657 			}
2658 		}
2659 
2660 		if (mask & AT_UID)
2661 			cp->c_attr.va_uid = vap->va_uid;
2662 
2663 		if (mask & AT_GID)
2664 			cp->c_attr.va_gid = vap->va_gid;
2665 		gethrestime(&cp->c_metadata.md_localctime);
2666 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2667 		cp->c_flags |= CN_UPDATED;
2668 	}
2669 
2670 
2671 	if (mask & (AT_MTIME|AT_ATIME)) {
2672 		/* mark as modified */
2673 		if (cachefs_modified_alloc(cp)) {
2674 			error = ENOSPC;
2675 			goto out;
2676 		}
2677 
2678 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2679 			error = cachefs_dlog_cidmap(fscp);
2680 			if (error) {
2681 				error = ENOSPC;
2682 				goto out;
2683 			}
2684 			cp->c_metadata.md_flags |= MD_MAPPING;
2685 		}
2686 
2687 		/* log the operation if not already logged */
2688 		if (commit == 0) {
2689 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2690 			if (commit == 0) {
2691 				error = ENOSPC;
2692 				goto out;
2693 			}
2694 		}
2695 
2696 		if (mask & AT_MTIME) {
2697 			cp->c_metadata.md_localmtime = vap->va_mtime;
2698 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2699 		}
2700 		if (mask & AT_ATIME)
2701 			cp->c_attr.va_atime = vap->va_atime;
2702 		gethrestime(&cp->c_metadata.md_localctime);
2703 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2704 		cp->c_flags |= CN_UPDATED;
2705 	}
2706 
2707 out:
2708 	mutex_exit(&cp->c_statelock);
2709 
2710 	/* commit the log entry */
2711 	if (commit) {
2712 		if (cachefs_dlog_commit(fscp, commit, error)) {
2713 			/*EMPTY*/
2714 			/* XXX bob: fix on panic */
2715 		}
2716 	}
2717 	return (error);
2718 }
2719 
2720 /* ARGSUSED */
2721 static int
2722 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr)
2723 {
2724 	cnode_t *cp = VTOC(vp);
2725 	fscache_t *fscp = C_TO_FSCACHE(cp);
2726 	int error;
2727 	int held = 0;
2728 	int connected = 0;
2729 
2730 #ifdef CFSDEBUG
2731 	CFS_DEBUG(CFSDEBUG_VOPS)
2732 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2733 #endif
2734 	if (getzoneid() != GLOBAL_ZONEID) {
2735 		error = EPERM;
2736 		goto out;
2737 	}
2738 
2739 	/*
2740 	 * Cachefs only provides pass-through support for NFSv4,
2741 	 * and all vnode operations are passed through to the
2742 	 * back file system. For NFSv4 pass-through to work, only
2743 	 * connected operation is supported, the cnode backvp must
2744 	 * exist, and cachefs optional (eg., disconnectable) flags
2745 	 * are turned off. Assert these conditions to ensure that
2746 	 * the backfilesystem is called for the access operation.
2747 	 */
2748 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2749 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2750 
2751 	for (;;) {
2752 		/* get (or renew) access to the file system */
2753 		if (held) {
2754 			/* Won't loop with NFSv4 connected behavior */
2755 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2756 			cachefs_cd_release(fscp);
2757 			held = 0;
2758 		}
2759 		error = cachefs_cd_access(fscp, connected, 0);
2760 		if (error)
2761 			break;
2762 		held = 1;
2763 
2764 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2765 			error = cachefs_access_connected(vp, mode, flags,
2766 			    cr);
2767 			if (CFS_TIMEOUT(fscp, error)) {
2768 				cachefs_cd_release(fscp);
2769 				held = 0;
2770 				cachefs_cd_timedout(fscp);
2771 				connected = 0;
2772 				continue;
2773 			}
2774 		} else {
2775 			mutex_enter(&cp->c_statelock);
2776 			error = cachefs_access_local(cp, mode, cr);
2777 			mutex_exit(&cp->c_statelock);
2778 			if (CFS_TIMEOUT(fscp, error)) {
2779 				if (cachefs_cd_access_miss(fscp)) {
2780 					mutex_enter(&cp->c_statelock);
2781 					if (cp->c_backvp == NULL) {
2782 						(void) cachefs_getbackvp(fscp,
2783 						    cp);
2784 					}
2785 					mutex_exit(&cp->c_statelock);
2786 					error = cachefs_access_connected(vp,
2787 					    mode, flags, cr);
2788 					if (!CFS_TIMEOUT(fscp, error))
2789 						break;
2790 					delay(5*hz);
2791 					connected = 0;
2792 					continue;
2793 				}
2794 				connected = 1;
2795 				continue;
2796 			}
2797 		}
2798 		break;
2799 	}
2800 	if (held)
2801 		cachefs_cd_release(fscp);
2802 #ifdef CFS_CD_DEBUG
2803 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2804 #endif
2805 out:
2806 #ifdef CFSDEBUG
2807 	CFS_DEBUG(CFSDEBUG_VOPS)
2808 		printf("cachefs_access: EXIT error = %d\n", error);
2809 #endif
2810 	return (error);
2811 }
2812 
2813 static int
2814 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2815 {
2816 	cnode_t *cp = VTOC(vp);
2817 	fscache_t *fscp = C_TO_FSCACHE(cp);
2818 	int error = 0;
2819 
2820 	mutex_enter(&cp->c_statelock);
2821 
2822 	/* Make sure the cnode attrs are valid first. */
2823 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2824 	if (error)
2825 		goto out;
2826 
2827 	/* see if can do a local file system check */
2828 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2829 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2830 		error = cachefs_access_local(cp, mode, cr);
2831 		goto out;
2832 	}
2833 
2834 	/* else do a remote file system check */
2835 	else {
2836 		if (cp->c_backvp == NULL) {
2837 			error = cachefs_getbackvp(fscp, cp);
2838 			if (error)
2839 				goto out;
2840 		}
2841 
2842 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2843 			("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2844 			cp, cp->c_backvp));
2845 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr);
2846 
2847 		/*
2848 		 * even though we don't `need' the ACL to do access
2849 		 * via the backvp, we should cache it here to make our
2850 		 * behavior more reasonable if we go disconnected.
2851 		 */
2852 
2853 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2854 		    (cachefs_vtype_aclok(vp)) &&
2855 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2856 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2857 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2858 			(void) cachefs_cacheacl(cp, NULL);
2859 	}
2860 out:
2861 	/*
2862 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2863 	 * the vn_open retry will read the file anew from backfs
2864 	 */
2865 	if (error == ESTALE)
2866 		cachefs_cnode_stale(cp);
2867 
2868 	mutex_exit(&cp->c_statelock);
2869 	return (error);
2870 }
2871 
2872 /*
2873  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2874  * the link is placed in the metadata itself (no front file is allocated).
2875  */
2876 static int
2877 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr)
2878 {
2879 	int error = 0;
2880 	cnode_t *cp = VTOC(vp);
2881 	fscache_t *fscp = C_TO_FSCACHE(cp);
2882 	cachefscache_t *cachep = fscp->fs_cache;
2883 	int held = 0;
2884 	int connected = 0;
2885 
2886 	if (getzoneid() != GLOBAL_ZONEID)
2887 		return (EPERM);
2888 
2889 	if (vp->v_type != VLNK)
2890 		return (EINVAL);
2891 
2892 	/*
2893 	 * Cachefs only provides pass-through support for NFSv4,
2894 	 * and all vnode operations are passed through to the
2895 	 * back file system. For NFSv4 pass-through to work, only
2896 	 * connected operation is supported, the cnode backvp must
2897 	 * exist, and cachefs optional (eg., disconnectable) flags
2898 	 * are turned off. Assert these conditions to ensure that
2899 	 * the backfilesystem is called for the readlink operation.
2900 	 */
2901 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2902 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2903 
2904 	for (;;) {
2905 		/* get (or renew) access to the file system */
2906 		if (held) {
2907 			/* Won't loop with NFSv4 connected behavior */
2908 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2909 			cachefs_cd_release(fscp);
2910 			held = 0;
2911 		}
2912 		error = cachefs_cd_access(fscp, connected, 0);
2913 		if (error)
2914 			break;
2915 		held = 1;
2916 
2917 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2918 			/*
2919 			 * since readlink_connected will call stuffsymlink
2920 			 * on success, have to serialize access
2921 			 */
2922 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2923 				cachefs_cd_release(fscp);
2924 				rw_enter(&cp->c_rwlock, RW_WRITER);
2925 				error = cachefs_cd_access(fscp, connected, 0);
2926 				if (error) {
2927 					held = 0;
2928 					rw_exit(&cp->c_rwlock);
2929 					break;
2930 				}
2931 			}
2932 			error = cachefs_readlink_connected(vp, uiop, cr);
2933 			rw_exit(&cp->c_rwlock);
2934 			if (CFS_TIMEOUT(fscp, error)) {
2935 				cachefs_cd_release(fscp);
2936 				held = 0;
2937 				cachefs_cd_timedout(fscp);
2938 				connected = 0;
2939 				continue;
2940 			}
2941 		} else {
2942 			error = cachefs_readlink_disconnected(vp, uiop);
2943 			if (CFS_TIMEOUT(fscp, error)) {
2944 				if (cachefs_cd_access_miss(fscp)) {
2945 					/* as above */
2946 					if (!rw_tryenter(&cp->c_rwlock,
2947 					    RW_WRITER)) {
2948 						cachefs_cd_release(fscp);
2949 						rw_enter(&cp->c_rwlock,
2950 						    RW_WRITER);
2951 						error = cachefs_cd_access(fscp,
2952 						    connected, 0);
2953 						if (error) {
2954 							held = 0;
2955 							rw_exit(&cp->c_rwlock);
2956 							break;
2957 						}
2958 					}
2959 					error = cachefs_readlink_connected(vp,
2960 					    uiop, cr);
2961 					rw_exit(&cp->c_rwlock);
2962 					if (!CFS_TIMEOUT(fscp, error))
2963 						break;
2964 					delay(5*hz);
2965 					connected = 0;
2966 					continue;
2967 				}
2968 				connected = 1;
2969 				continue;
2970 			}
2971 		}
2972 		break;
2973 	}
2974 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
2975 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
2976 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2977 		    crgetuid(cr), cp->c_size);
2978 
2979 	if (held)
2980 		cachefs_cd_release(fscp);
2981 #ifdef CFS_CD_DEBUG
2982 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2983 #endif
2984 
2985 	/*
2986 	 * The over the wire error for attempting to readlink something
2987 	 * other than a symbolic link is ENXIO.  However, we need to
2988 	 * return EINVAL instead of ENXIO, so we map it here.
2989 	 */
2990 	return (error == ENXIO ? EINVAL : error);
2991 }
2992 
2993 static int
2994 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
2995 {
2996 	int error;
2997 	cnode_t *cp = VTOC(vp);
2998 	fscache_t *fscp = C_TO_FSCACHE(cp);
2999 	caddr_t buf;
3000 	int buflen;
3001 	int readcache = 0;
3002 
3003 	mutex_enter(&cp->c_statelock);
3004 
3005 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3006 	if (error)
3007 		goto out;
3008 
3009 	/* if the sym link is cached as a fast sym link */
3010 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3011 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3012 		error = uiomove(cp->c_metadata.md_allocinfo,
3013 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3014 #ifdef CFSDEBUG
3015 		readcache = 1;
3016 		goto out;
3017 #else /* CFSDEBUG */
3018 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3019 		if (cp->c_size != 0) {
3020 			readcache = 1;
3021 			goto out;
3022 		}
3023 #endif /* CFSDEBUG */
3024 	}
3025 
3026 	/* if the sym link is cached in a front file */
3027 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3028 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3029 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3030 		if (cp->c_frontvp == NULL) {
3031 			(void) cachefs_getfrontfile(cp);
3032 		}
3033 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3034 			/* read symlink data from frontfile */
3035 			uiop->uio_offset = 0;
3036 			(void) VOP_RWLOCK(cp->c_frontvp,
3037 						V_WRITELOCK_FALSE, NULL);
3038 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3039 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3040 
3041 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3042 			if (cp->c_size != 0) {
3043 				readcache = 1;
3044 				goto out;
3045 			}
3046 		}
3047 	}
3048 
3049 	/* get the sym link contents from the back fs */
3050 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3051 	if (error)
3052 		goto out;
3053 
3054 	/* copy the contents out to the user */
3055 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3056 
3057 	/*
3058 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3059 	 * or if NFSv4 pass-through is enabled.
3060 	 */
3061 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3062 		cachefs_nocache(cp);
3063 	}
3064 
3065 	cachefs_kmem_free(buf, MAXPATHLEN);
3066 
3067 out:
3068 	mutex_exit(&cp->c_statelock);
3069 	if (error == 0) {
3070 		if (readcache)
3071 			fscp->fs_stats.st_hits++;
3072 		else
3073 			fscp->fs_stats.st_misses++;
3074 	}
3075 	return (error);
3076 }
3077 
3078 static int
3079 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3080 {
3081 	int error;
3082 	cnode_t *cp = VTOC(vp);
3083 	fscache_t *fscp = C_TO_FSCACHE(cp);
3084 	int readcache = 0;
3085 
3086 	mutex_enter(&cp->c_statelock);
3087 
3088 	/* if the sym link is cached as a fast sym link */
3089 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3090 		error = uiomove(cp->c_metadata.md_allocinfo,
3091 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3092 		readcache = 1;
3093 		goto out;
3094 	}
3095 
3096 	/* if the sym link is cached in a front file */
3097 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3098 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3099 		if (cp->c_frontvp == NULL) {
3100 			(void) cachefs_getfrontfile(cp);
3101 		}
3102 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3103 			/* read symlink data from frontfile */
3104 			uiop->uio_offset = 0;
3105 			(void) VOP_RWLOCK(cp->c_frontvp,
3106 						V_WRITELOCK_FALSE, NULL);
3107 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3108 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3109 			readcache = 1;
3110 			goto out;
3111 		}
3112 	}
3113 	error = ETIMEDOUT;
3114 
3115 out:
3116 	mutex_exit(&cp->c_statelock);
3117 	if (error == 0) {
3118 		if (readcache)
3119 			fscp->fs_stats.st_hits++;
3120 		else
3121 			fscp->fs_stats.st_misses++;
3122 	}
3123 	return (error);
3124 }
3125 
3126 /*ARGSUSED*/
3127 static int
3128 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr)
3129 {
3130 	cnode_t *cp = VTOC(vp);
3131 	int error = 0;
3132 	fscache_t *fscp = C_TO_FSCACHE(cp);
3133 	int held = 0;
3134 	int connected = 0;
3135 
3136 #ifdef CFSDEBUG
3137 	CFS_DEBUG(CFSDEBUG_VOPS)
3138 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3139 #endif
3140 
3141 	if (getzoneid() != GLOBAL_ZONEID) {
3142 		error = EPERM;
3143 		goto out;
3144 	}
3145 
3146 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3147 		goto out;
3148 
3149 	/*
3150 	 * Cachefs only provides pass-through support for NFSv4,
3151 	 * and all vnode operations are passed through to the
3152 	 * back file system. For NFSv4 pass-through to work, only
3153 	 * connected operation is supported, the cnode backvp must
3154 	 * exist, and cachefs optional (eg., disconnectable) flags
3155 	 * are turned off. Assert these conditions to ensure that
3156 	 * the backfilesystem is called for the fsync operation.
3157 	 */
3158 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3159 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3160 
3161 	for (;;) {
3162 		/* get (or renew) access to the file system */
3163 		if (held) {
3164 			/* Won't loop with NFSv4 connected behavior */
3165 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3166 			cachefs_cd_release(fscp);
3167 			held = 0;
3168 		}
3169 		error = cachefs_cd_access(fscp, connected, 1);
3170 		if (error)
3171 			break;
3172 		held = 1;
3173 		connected = 0;
3174 
3175 		/* if a regular file, write out the pages */
3176 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3177 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3178 			error = cachefs_putpage_common(vp, (offset_t)0,
3179 			    0, 0, cr);
3180 			if (CFS_TIMEOUT(fscp, error)) {
3181 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3182 					cachefs_cd_release(fscp);
3183 					held = 0;
3184 					cachefs_cd_timedout(fscp);
3185 					continue;
3186 				} else {
3187 					connected = 1;
3188 					continue;
3189 				}
3190 			}
3191 
3192 			/* if no space left in cache, wait until connected */
3193 			if ((error == ENOSPC) &&
3194 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3195 				connected = 1;
3196 				continue;
3197 			}
3198 
3199 			/* clear the cnode error if putpage worked */
3200 			if ((error == 0) && cp->c_error) {
3201 				mutex_enter(&cp->c_statelock);
3202 				cp->c_error = 0;
3203 				mutex_exit(&cp->c_statelock);
3204 			}
3205 
3206 			if (error)
3207 				break;
3208 		}
3209 
3210 		/* if connected, sync the backvp */
3211 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3212 		    cp->c_backvp) {
3213 			mutex_enter(&cp->c_statelock);
3214 			if (cp->c_backvp) {
3215 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3216 					("cachefs_fsync (nfsv4): cnode %p, "
3217 					"backvp %p\n", cp, cp->c_backvp));
3218 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr);
3219 				if (CFS_TIMEOUT(fscp, error)) {
3220 					mutex_exit(&cp->c_statelock);
3221 					cachefs_cd_release(fscp);
3222 					held = 0;
3223 					cachefs_cd_timedout(fscp);
3224 					continue;
3225 				} else if (error && (error != EINTR))
3226 					cp->c_error = error;
3227 			}
3228 			mutex_exit(&cp->c_statelock);
3229 		}
3230 
3231 		/* sync the metadata and the front file to the front fs */
3232 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3233 			error = cachefs_sync_metadata(cp);
3234 			if (error &&
3235 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3236 				error = 0;
3237 		}
3238 		break;
3239 	}
3240 
3241 	if (error == 0)
3242 		error = cp->c_error;
3243 
3244 	if (held)
3245 		cachefs_cd_release(fscp);
3246 
3247 out:
3248 #ifdef CFS_CD_DEBUG
3249 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3250 #endif
3251 
3252 #ifdef CFSDEBUG
3253 	CFS_DEBUG(CFSDEBUG_VOPS)
3254 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3255 #endif
3256 	return (error);
3257 }
3258 
3259 /*
3260  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3261  */
3262 int
3263 cachefs_sync_metadata(cnode_t *cp)
3264 {
3265 	int error = 0;
3266 	struct filegrp *fgp;
3267 	struct vattr va;
3268 	fscache_t *fscp = C_TO_FSCACHE(cp);
3269 
3270 #ifdef CFSDEBUG
3271 	CFS_DEBUG(CFSDEBUG_VOPS)
3272 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3273 			(void *)cp, cp->c_flags);
3274 #endif
3275 
3276 	mutex_enter(&cp->c_statelock);
3277 	if ((cp->c_flags & CN_UPDATED) == 0)
3278 		goto out;
3279 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3280 		goto out;
3281 	fgp = cp->c_filegrp;
3282 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3283 		goto out;
3284 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3285 		goto out;
3286 
3287 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3288 		mutex_exit(&cp->c_statelock);
3289 		error = filegrp_allocattr(fgp);
3290 		mutex_enter(&cp->c_statelock);
3291 		if (error) {
3292 			error = 0;
3293 			goto out;
3294 		}
3295 	}
3296 
3297 	if (cp->c_flags & CN_ALLOC_PENDING) {
3298 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3299 		    &cp->c_id);
3300 		if (error)
3301 			goto out;
3302 		cp->c_flags &= ~CN_ALLOC_PENDING;
3303 	}
3304 
3305 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3306 		if (cp->c_frontvp != NULL) {
3307 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred);
3308 			if (error) {
3309 				cp->c_metadata.md_timestamp.tv_sec = 0;
3310 			} else {
3311 				va.va_mask = AT_MTIME;
3312 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3313 				    kcred);
3314 				if (error)
3315 					goto out;
3316 				cp->c_metadata.md_timestamp = va.va_mtime;
3317 				cp->c_flags &=
3318 				~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3319 			}
3320 		} else {
3321 			cp->c_flags &=
3322 				~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3323 		}
3324 	}
3325 
3326 	/*
3327 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3328 	 * XXX tony: How can CN_UPDATED not be set?????
3329 	 */
3330 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3331 			(cp->c_flags & CN_UPDATED)) {
3332 		error = filegrp_write_metadata(fgp, &cp->c_id,
3333 				&cp->c_metadata);
3334 		if (error)
3335 			goto out;
3336 	}
3337 out:
3338 	if (error) {
3339 		/* XXX modified files? */
3340 		if (cp->c_metadata.md_rlno) {
3341 			cachefs_removefrontfile(&cp->c_metadata,
3342 			    &cp->c_id, fgp);
3343 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3344 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3345 			cp->c_metadata.md_rlno = 0;
3346 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3347 			if (cp->c_frontvp) {
3348 				VN_RELE(cp->c_frontvp);
3349 				cp->c_frontvp = NULL;
3350 			}
3351 		}
3352 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3353 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3354 		cp->c_flags |= CN_ALLOC_PENDING;
3355 		cachefs_nocache(cp);
3356 	}
3357 	/*
3358 	 * we clear the updated bit even on errors because a retry
3359 	 * will probably fail also.
3360 	 */
3361 	cp->c_flags &= ~CN_UPDATED;
3362 	mutex_exit(&cp->c_statelock);
3363 
3364 #ifdef CFSDEBUG
3365 	CFS_DEBUG(CFSDEBUG_VOPS)
3366 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3367 			(void *)cp, cp->c_flags);
3368 #endif
3369 
3370 	return (error);
3371 }
3372 
3373 /*
3374  * This is the vop entry point for inactivating a vnode.
3375  * It just queues the request for the async thread which
3376  * calls cachefs_inactive.
3377  * Because of the dnlc, it is not safe to grab most locks here.
3378  */
3379 static void
3380 cachefs_inactive(struct vnode *vp, cred_t *cr)
3381 {
3382 	cnode_t *cp;
3383 	struct cachefs_req *rp;
3384 	fscache_t *fscp;
3385 
3386 #ifdef CFSDEBUG
3387 	CFS_DEBUG(CFSDEBUG_VOPS)
3388 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3389 #endif
3390 
3391 	cp = VTOC(vp);
3392 	fscp = C_TO_FSCACHE(cp);
3393 
3394 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3395 
3396 	/*
3397 	 * Cachefs only provides pass-through support for NFSv4,
3398 	 * and all vnode operations are passed through to the
3399 	 * back file system. For NFSv4 pass-through to work, only
3400 	 * connected operation is supported, the cnode backvp must
3401 	 * exist, and cachefs optional (eg., disconnectable) flags
3402 	 * are turned off. Assert these conditions to ensure that
3403 	 * the backfilesystem is called for the inactive operation.
3404 	 */
3405 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3406 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3407 
3408 	/* vn_rele() set the v_count == 1 */
3409 
3410 	cp->c_ipending = 1;
3411 
3412 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3413 	rp->cfs_cmd = CFS_IDLE;
3414 	rp->cfs_cr = cr;
3415 	crhold(rp->cfs_cr);
3416 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3417 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3418 
3419 #ifdef CFSDEBUG
3420 	CFS_DEBUG(CFSDEBUG_VOPS)
3421 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3422 #endif
3423 }
3424 
3425 /* ARGSUSED */
3426 static int
3427 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3428     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3429 {
3430 	int error = 0;
3431 	cnode_t *dcp = VTOC(dvp);
3432 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3433 	int held = 0;
3434 	int connected = 0;
3435 
3436 #ifdef CFSDEBUG
3437 	CFS_DEBUG(CFSDEBUG_VOPS)
3438 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3439 #endif
3440 
3441 	if (getzoneid() != GLOBAL_ZONEID) {
3442 		error = EPERM;
3443 		goto out;
3444 	}
3445 
3446 	/*
3447 	 * Cachefs only provides pass-through support for NFSv4,
3448 	 * and all vnode operations are passed through to the
3449 	 * back file system. For NFSv4 pass-through to work, only
3450 	 * connected operation is supported, the cnode backvp must
3451 	 * exist, and cachefs optional (eg., disconnectable) flags
3452 	 * are turned off. Assert these conditions to ensure that
3453 	 * the backfilesystem is called for the lookup operation.
3454 	 */
3455 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3456 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3457 
3458 	for (;;) {
3459 		/* get (or renew) access to the file system */
3460 		if (held) {
3461 			/* Won't loop with NFSv4 connected behavior */
3462 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3463 			cachefs_cd_release(fscp);
3464 			held = 0;
3465 		}
3466 		error = cachefs_cd_access(fscp, connected, 0);
3467 		if (error)
3468 			break;
3469 		held = 1;
3470 
3471 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3472 			flags, rdir, cr);
3473 		if (CFS_TIMEOUT(fscp, error)) {
3474 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3475 				cachefs_cd_release(fscp);
3476 				held = 0;
3477 				cachefs_cd_timedout(fscp);
3478 				connected = 0;
3479 				continue;
3480 			} else {
3481 				if (cachefs_cd_access_miss(fscp)) {
3482 					rw_enter(&dcp->c_rwlock, RW_READER);
3483 					error = cachefs_lookup_back(dvp, nm,
3484 					    vpp, cr);
3485 					rw_exit(&dcp->c_rwlock);
3486 					if (!CFS_TIMEOUT(fscp, error))
3487 						break;
3488 					delay(5*hz);
3489 					connected = 0;
3490 					continue;
3491 				}
3492 				connected = 1;
3493 				continue;
3494 			}
3495 		}
3496 		break;
3497 	}
3498 	if (held)
3499 		cachefs_cd_release(fscp);
3500 
3501 	if (error == 0 && IS_DEVVP(*vpp)) {
3502 		struct vnode *newvp;
3503 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3504 		VN_RELE(*vpp);
3505 		if (newvp == NULL) {
3506 			error = ENOSYS;
3507 		} else {
3508 			*vpp = newvp;
3509 		}
3510 	}
3511 
3512 #ifdef CFS_CD_DEBUG
3513 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3514 #endif
3515 out:
3516 #ifdef CFSDEBUG
3517 	CFS_DEBUG(CFSDEBUG_VOPS)
3518 		printf("cachefs_lookup: EXIT error = %d\n", error);
3519 #endif
3520 
3521 	return (error);
3522 }
3523 
3524 /* ARGSUSED */
3525 int
3526 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3527     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3528 {
3529 	int error = 0;
3530 	cnode_t *cp, *dcp = VTOC(dvp);
3531 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3532 	struct fid cookie;
3533 	u_offset_t d_offset;
3534 	struct cachefs_req *rp;
3535 	cfs_cid_t cid, dircid;
3536 	uint_t flag;
3537 	uint_t uncached = 0;
3538 
3539 	*vpp = NULL;
3540 
3541 	/*
3542 	 * If lookup is for "", just return dvp.  Don't need
3543 	 * to send it over the wire, look it up in the dnlc,
3544 	 * or perform any access checks.
3545 	 */
3546 	if (*nm == '\0') {
3547 		VN_HOLD(dvp);
3548 		*vpp = dvp;
3549 		return (0);
3550 	}
3551 
3552 	/* can't do lookups in non-directories */
3553 	if (dvp->v_type != VDIR)
3554 		return (ENOTDIR);
3555 
3556 	/* perform access check, also does consistency check if connected */
3557 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3558 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3559 	} else {
3560 		mutex_enter(&dcp->c_statelock);
3561 		error = cachefs_access_local(dcp, VEXEC, cr);
3562 		mutex_exit(&dcp->c_statelock);
3563 	}
3564 	if (error)
3565 		return (error);
3566 
3567 	/*
3568 	 * If lookup is for ".", just return dvp.  Don't need
3569 	 * to send it over the wire or look it up in the dnlc,
3570 	 * just need to check access.
3571 	 */
3572 	if (strcmp(nm, ".") == 0) {
3573 		VN_HOLD(dvp);
3574 		*vpp = dvp;
3575 		return (0);
3576 	}
3577 
3578 	/* check the dnlc */
3579 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3580 	if (*vpp)
3581 		return (0);
3582 
3583 	/* read lock the dir before starting the search */
3584 	rw_enter(&dcp->c_rwlock, RW_READER);
3585 
3586 	mutex_enter(&dcp->c_statelock);
3587 	dircid = dcp->c_id;
3588 
3589 	dcp->c_usage++;
3590 
3591 	/* if front file is not usable, lookup on the back fs */
3592 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3593 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3594 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3595 		mutex_exit(&dcp->c_statelock);
3596 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3597 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3598 		else
3599 			error = ETIMEDOUT;
3600 		goto out;
3601 	}
3602 
3603 	/* if the front file is not populated, try to populate it */
3604 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3605 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3606 			error = ETIMEDOUT;
3607 			mutex_exit(&dcp->c_statelock);
3608 			goto out;
3609 		}
3610 
3611 		if (cachefs_async_okay()) {
3612 			/* cannot populate if cache is not writable */
3613 			ASSERT((dcp->c_flags &
3614 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3615 			dcp->c_flags |= CN_ASYNC_POPULATE;
3616 
3617 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3618 			rp->cfs_cmd = CFS_POPULATE;
3619 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3620 			rp->cfs_cr = cr;
3621 
3622 			crhold(cr);
3623 			VN_HOLD(dvp);
3624 
3625 			cachefs_addqueue(rp, &fscp->fs_workq);
3626 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3627 			error = cachefs_dir_fill(dcp, cr);
3628 			if (error != 0) {
3629 				mutex_exit(&dcp->c_statelock);
3630 				goto out;
3631 			}
3632 		}
3633 		/* no populate if too many asyncs and we have to cache ACLs */
3634 
3635 		mutex_exit(&dcp->c_statelock);
3636 
3637 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3638 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3639 		else
3640 			error = ETIMEDOUT;
3641 		goto out;
3642 	}
3643 
3644 	/* by now we have a valid cached front file that we can search */
3645 
3646 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3647 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3648 			&d_offset, &cid);
3649 	mutex_exit(&dcp->c_statelock);
3650 
3651 	if (error) {
3652 		/* if the entry does not have the fid, go get it */
3653 		if (error == EINVAL) {
3654 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3655 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3656 			else
3657 				error = ETIMEDOUT;
3658 		}
3659 
3660 		/* errors other than does not exist */
3661 		else if (error != ENOENT) {
3662 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3663 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3664 			else
3665 				error = ETIMEDOUT;
3666 		}
3667 		goto out;
3668 	}
3669 
3670 	/*
3671 	 * Else we found the entry in the cached directory.
3672 	 * Make a cnode for it.
3673 	 */
3674 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3675 	    cr, 0, &cp);
3676 	if (error == ESTALE) {
3677 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3678 		mutex_enter(&dcp->c_statelock);
3679 		cachefs_nocache(dcp);
3680 		mutex_exit(&dcp->c_statelock);
3681 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3682 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3683 			uncached = 1;
3684 		} else
3685 			error = ETIMEDOUT;
3686 	} else if (error == 0) {
3687 		*vpp = CTOV(cp);
3688 	}
3689 
3690 out:
3691 	if (error == 0) {
3692 		/* put the entry in the dnlc */
3693 		if (cachefs_dnlc)
3694 			dnlc_enter(dvp, nm, *vpp);
3695 
3696 		/* save the cid of the parent so can find the name */
3697 		cp = VTOC(*vpp);
3698 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3699 		    sizeof (cfs_cid_t)) != 0) {
3700 			mutex_enter(&cp->c_statelock);
3701 			cp->c_metadata.md_parent = dircid;
3702 			cp->c_flags |= CN_UPDATED;
3703 			mutex_exit(&cp->c_statelock);
3704 		}
3705 	}
3706 
3707 	rw_exit(&dcp->c_rwlock);
3708 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3709 		(void) cachefs_pack_common(dvp, cr);
3710 	return (error);
3711 }
3712 
3713 /*
3714  * Called from cachefs_lookup_common when the back file system needs to be
3715  * examined to perform the lookup.
3716  */
3717 static int
3718 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3719     cred_t *cr)
3720 {
3721 	int error = 0;
3722 	cnode_t *cp, *dcp = VTOC(dvp);
3723 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3724 	vnode_t *backvp = NULL;
3725 	struct vattr va;
3726 	struct fid cookie;
3727 	cfs_cid_t cid;
3728 	uint32_t valid_fid;
3729 
3730 	mutex_enter(&dcp->c_statelock);
3731 
3732 	/* do a lookup on the back FS to get the back vnode */
3733 	if (dcp->c_backvp == NULL) {
3734 		error = cachefs_getbackvp(fscp, dcp);
3735 		if (error)
3736 			goto out;
3737 	}
3738 
3739 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3740 		("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3741 		dcp, dcp->c_backvp, nm));
3742 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3743 				0, (vnode_t *)NULL, cr);
3744 	if (error)
3745 		goto out;
3746 	if (IS_DEVVP(backvp)) {
3747 		struct vnode *devvp = backvp;
3748 
3749 		if (VOP_REALVP(devvp, &backvp) == 0) {
3750 			VN_HOLD(backvp);
3751 			VN_RELE(devvp);
3752 		}
3753 	}
3754 
3755 	/* get the fid and attrs from the back fs */
3756 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3757 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3758 	if (error)
3759 		goto out;
3760 
3761 	cid.cid_fileno = va.va_nodeid;
3762 	cid.cid_flags = 0;
3763 
3764 #if 0
3765 	/* XXX bob: this is probably no longer necessary */
3766 	/* if the directory entry was incomplete, we can complete it now */
3767 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3768 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3769 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3770 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3771 	}
3772 #endif
3773 
3774 out:
3775 	mutex_exit(&dcp->c_statelock);
3776 
3777 	/* create the cnode */
3778 	if (error == 0) {
3779 		error = cachefs_cnode_make(&cid, fscp,
3780 					(valid_fid ? &cookie : NULL),
3781 					&va, backvp, cr, 0, &cp);
3782 		if (error == 0) {
3783 			*vpp = CTOV(cp);
3784 		}
3785 	}
3786 
3787 	if (backvp)
3788 		VN_RELE(backvp);
3789 
3790 	return (error);
3791 }
3792 
3793 /*ARGSUSED7*/
3794 static int
3795 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3796     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag)
3797 {
3798 	cnode_t *dcp = VTOC(dvp);
3799 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3800 	cachefscache_t *cachep = fscp->fs_cache;
3801 	int error;
3802 	int connected = 0;
3803 	int held = 0;
3804 
3805 #ifdef CFSDEBUG
3806 	CFS_DEBUG(CFSDEBUG_VOPS)
3807 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3808 		    (void *)dvp, nm);
3809 #endif
3810 	if (getzoneid() != GLOBAL_ZONEID) {
3811 		error = EPERM;
3812 		goto out;
3813 	}
3814 
3815 	/*
3816 	 * Cachefs only provides pass-through support for NFSv4,
3817 	 * and all vnode operations are passed through to the
3818 	 * back file system. For NFSv4 pass-through to work, only
3819 	 * connected operation is supported, the cnode backvp must
3820 	 * exist, and cachefs optional (eg., disconnectable) flags
3821 	 * are turned off. Assert these conditions to ensure that
3822 	 * the backfilesystem is called for the create operation.
3823 	 */
3824 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3825 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3826 
3827 	for (;;) {
3828 		/* get (or renew) access to the file system */
3829 		if (held) {
3830 			/* Won't loop with NFSv4 connected behavior */
3831 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3832 			cachefs_cd_release(fscp);
3833 			held = 0;
3834 		}
3835 		error = cachefs_cd_access(fscp, connected, 1);
3836 		if (error)
3837 			break;
3838 		held = 1;
3839 
3840 		/*
3841 		 * if we are connected, perform the remote portion of the
3842 		 * create.
3843 		 */
3844 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3845 			error = cachefs_create_connected(dvp, nm, vap,
3846 			    exclusive, mode, vpp, cr);
3847 			if (CFS_TIMEOUT(fscp, error)) {
3848 				cachefs_cd_release(fscp);
3849 				held = 0;
3850 				cachefs_cd_timedout(fscp);
3851 				connected = 0;
3852 				continue;
3853 			} else if (error) {
3854 				break;
3855 			}
3856 		}
3857 
3858 		/* else we must be disconnected */
3859 		else {
3860 			error = cachefs_create_disconnected(dvp, nm, vap,
3861 			    exclusive, mode, vpp, cr);
3862 			if (CFS_TIMEOUT(fscp, error)) {
3863 				connected = 1;
3864 				continue;
3865 			} else if (error) {
3866 				break;
3867 			}
3868 		}
3869 		break;
3870 	}
3871 
3872 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3873 		fid_t *fidp = NULL;
3874 		ino64_t fileno = 0;
3875 		cnode_t *cp = NULL;
3876 		if (error == 0)
3877 			cp = VTOC(*vpp);
3878 
3879 		if (cp != NULL) {
3880 			fidp = &cp->c_metadata.md_cookie;
3881 			fileno = cp->c_id.cid_fileno;
3882 		}
3883 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3884 		    fidp, fileno, crgetuid(cr));
3885 	}
3886 
3887 	if (held)
3888 		cachefs_cd_release(fscp);
3889 
3890 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3891 		(void) cachefs_pack(dvp, nm, cr);
3892 	if (error == 0 && IS_DEVVP(*vpp)) {
3893 		struct vnode *spcvp;
3894 
3895 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3896 		VN_RELE(*vpp);
3897 		if (spcvp == NULL) {
3898 			error = ENOSYS;
3899 		} else {
3900 			*vpp = spcvp;
3901 		}
3902 	}
3903 
3904 #ifdef CFS_CD_DEBUG
3905 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3906 #endif
3907 out:
3908 #ifdef CFSDEBUG
3909 	CFS_DEBUG(CFSDEBUG_VOPS)
3910 		printf("cachefs_create: EXIT error %d\n", error);
3911 #endif
3912 	return (error);
3913 }
3914 
3915 
3916 static int
3917 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3918     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3919 {
3920 	cnode_t *dcp = VTOC(dvp);
3921 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3922 	int error;
3923 	vnode_t *tvp = NULL;
3924 	vnode_t *devvp;
3925 	fid_t cookie;
3926 	vattr_t va;
3927 	cnode_t *ncp;
3928 	cfs_cid_t cid;
3929 	vnode_t *vp;
3930 	uint32_t valid_fid;
3931 
3932 	/* special case if file already exists */
3933 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3934 	if (CFS_TIMEOUT(fscp, error))
3935 		return (error);
3936 	if (error == 0) {
3937 		if (exclusive == EXCL)
3938 			error = EEXIST;
3939 		else if (vp->v_type == VDIR && (mode & VWRITE))
3940 			error = EISDIR;
3941 		else if ((error =
3942 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3943 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3944 				vap->va_mask = AT_SIZE;
3945 				error =
3946 				    cachefs_setattr_common(vp, vap,
3947 					0, cr, NULL);
3948 			}
3949 		}
3950 		if (error) {
3951 			VN_RELE(vp);
3952 		} else
3953 			*vpp = vp;
3954 		return (error);
3955 	}
3956 
3957 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3958 	mutex_enter(&dcp->c_statelock);
3959 
3960 	/* consistency check the directory */
3961 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3962 	if (error) {
3963 		mutex_exit(&dcp->c_statelock);
3964 		goto out;
3965 	}
3966 
3967 	/* get the backvp if necessary */
3968 	if (dcp->c_backvp == NULL) {
3969 		error = cachefs_getbackvp(fscp, dcp);
3970 		if (error) {
3971 			mutex_exit(&dcp->c_statelock);
3972 			goto out;
3973 		}
3974 	}
3975 
3976 	/* create the file on the back fs */
3977 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3978 		("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
3979 		"name %s\n", dcp, dcp->c_backvp, nm));
3980 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
3981 						&devvp, cr, 0);
3982 	mutex_exit(&dcp->c_statelock);
3983 	if (error)
3984 		goto out;
3985 	if (VOP_REALVP(devvp, &tvp) == 0) {
3986 		VN_HOLD(tvp);
3987 		VN_RELE(devvp);
3988 	} else {
3989 		tvp = devvp;
3990 	}
3991 
3992 	/* get the fid and attrs from the back fs */
3993 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3994 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
3995 	if (error)
3996 		goto out;
3997 
3998 	/* make the cnode */
3999 	cid.cid_fileno = va.va_nodeid;
4000 	cid.cid_flags = 0;
4001 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4002 					&va, tvp, cr, 0, &ncp);
4003 	if (error)
4004 		goto out;
4005 
4006 	*vpp = CTOV(ncp);
4007 
4008 	/* enter it in the parent directory */
4009 	mutex_enter(&dcp->c_statelock);
4010 	if (CFS_ISFS_NONSHARED(fscp) &&
4011 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4012 		/* see if entry already exists */
4013 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4014 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4015 		if (error == ENOENT) {
4016 			/* entry, does not exist, add the new file */
4017 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4018 			    &ncp->c_id, SM_ASYNC);
4019 			if (error) {
4020 				cachefs_nocache(dcp);
4021 				error = 0;
4022 			}
4023 			/* XXX should this be done elsewhere, too? */
4024 			dnlc_enter(dvp, nm, *vpp);
4025 		} else {
4026 			/* entry exists or some other problem */
4027 			cachefs_nocache(dcp);
4028 			error = 0;
4029 		}
4030 	}
4031 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4032 	mutex_exit(&dcp->c_statelock);
4033 
4034 out:
4035 	rw_exit(&dcp->c_rwlock);
4036 	if (tvp)
4037 		VN_RELE(tvp);
4038 
4039 	return (error);
4040 }
4041 
4042 static int
4043 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4044 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4045 {
4046 	cnode_t *dcp = VTOC(dvp);
4047 	cnode_t *cp;
4048 	cnode_t *ncp = NULL;
4049 	vnode_t *vp;
4050 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4051 	int error = 0;
4052 	struct vattr va;
4053 	timestruc_t current_time;
4054 	off_t commit = 0;
4055 	fid_t cookie;
4056 	cfs_cid_t cid;
4057 
4058 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4059 	mutex_enter(&dcp->c_statelock);
4060 
4061 	/* give up if the directory is not populated */
4062 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4063 		mutex_exit(&dcp->c_statelock);
4064 		rw_exit(&dcp->c_rwlock);
4065 		return (ETIMEDOUT);
4066 	}
4067 
4068 	/* special case if file already exists */
4069 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4070 	if (error == EINVAL) {
4071 		mutex_exit(&dcp->c_statelock);
4072 		rw_exit(&dcp->c_rwlock);
4073 		return (ETIMEDOUT);
4074 	}
4075 	if (error == 0) {
4076 		mutex_exit(&dcp->c_statelock);
4077 		rw_exit(&dcp->c_rwlock);
4078 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4079 		    cr, 0, &cp);
4080 		if (error) {
4081 			return (error);
4082 		}
4083 		vp = CTOV(cp);
4084 
4085 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4086 			error = ETIMEDOUT;
4087 		else if (exclusive == EXCL)
4088 			error = EEXIST;
4089 		else if (vp->v_type == VDIR && (mode & VWRITE))
4090 			error = EISDIR;
4091 		else {
4092 			mutex_enter(&cp->c_statelock);
4093 			error = cachefs_access_local(cp, mode, cr);
4094 			mutex_exit(&cp->c_statelock);
4095 			if (!error) {
4096 				if ((vap->va_mask & AT_SIZE) &&
4097 				    (vp->v_type == VREG)) {
4098 					vap->va_mask = AT_SIZE;
4099 					error =
4100 					    cachefs_setattr_common(vp, vap,
4101 						0, cr, NULL);
4102 				}
4103 			}
4104 		}
4105 		if (error) {
4106 			VN_RELE(vp);
4107 		} else
4108 			*vpp = vp;
4109 		return (error);
4110 	}
4111 
4112 	/* give up if cannot modify the cache */
4113 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4114 		mutex_exit(&dcp->c_statelock);
4115 		error = ETIMEDOUT;
4116 		goto out;
4117 	}
4118 
4119 	/* check access */
4120 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4121 		mutex_exit(&dcp->c_statelock);
4122 		goto out;
4123 	}
4124 
4125 	/* mark dir as modified */
4126 	cachefs_modified(dcp);
4127 	mutex_exit(&dcp->c_statelock);
4128 
4129 	/* must be privileged to set sticky bit */
4130 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4131 		vap->va_mode &= ~VSVTX;
4132 
4133 	/* make up a reasonable set of attributes */
4134 	cachefs_attr_setup(vap, &va, dcp, cr);
4135 
4136 	/* create the cnode */
4137 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4138 	if (error)
4139 		goto out;
4140 
4141 	mutex_enter(&ncp->c_statelock);
4142 
4143 	/* get the front file now instead of later */
4144 	if (vap->va_type == VREG) {
4145 		error = cachefs_getfrontfile(ncp);
4146 		if (error) {
4147 			mutex_exit(&ncp->c_statelock);
4148 			goto out;
4149 		}
4150 		ASSERT(ncp->c_frontvp != NULL);
4151 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4152 		ncp->c_metadata.md_flags |= MD_POPULATED;
4153 	} else {
4154 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4155 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4156 			(void) filegrp_allocattr(ncp->c_filegrp);
4157 		}
4158 		error = filegrp_create_metadata(ncp->c_filegrp,
4159 		    &ncp->c_metadata, &ncp->c_id);
4160 		if (error) {
4161 			mutex_exit(&ncp->c_statelock);
4162 			goto out;
4163 		}
4164 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4165 	}
4166 	mutex_enter(&dcp->c_statelock);
4167 	cachefs_creategid(dcp, ncp, vap, cr);
4168 	cachefs_createacl(dcp, ncp);
4169 	mutex_exit(&dcp->c_statelock);
4170 
4171 	/* set times on the file */
4172 	gethrestime(&current_time);
4173 	ncp->c_metadata.md_vattr.va_atime = current_time;
4174 	ncp->c_metadata.md_localctime = current_time;
4175 	ncp->c_metadata.md_localmtime = current_time;
4176 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4177 
4178 	/* reserve space for the daemon cid mapping */
4179 	error = cachefs_dlog_cidmap(fscp);
4180 	if (error) {
4181 		mutex_exit(&ncp->c_statelock);
4182 		goto out;
4183 	}
4184 	ncp->c_metadata.md_flags |= MD_MAPPING;
4185 
4186 	/* mark the new file as modified */
4187 	if (cachefs_modified_alloc(ncp)) {
4188 		mutex_exit(&ncp->c_statelock);
4189 		error = ENOSPC;
4190 		goto out;
4191 	}
4192 	ncp->c_flags |= CN_UPDATED;
4193 
4194 	/*
4195 	 * write the metadata now rather than waiting until
4196 	 * inactive so that if there's no space we can let
4197 	 * the caller know.
4198 	 */
4199 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4200 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4201 	error = filegrp_write_metadata(ncp->c_filegrp,
4202 	    &ncp->c_id, &ncp->c_metadata);
4203 	if (error) {
4204 		mutex_exit(&ncp->c_statelock);
4205 		goto out;
4206 	}
4207 
4208 	/* log the operation */
4209 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4210 	    mode, ncp, 0, cr);
4211 	if (commit == 0) {
4212 		mutex_exit(&ncp->c_statelock);
4213 		error = ENOSPC;
4214 		goto out;
4215 	}
4216 
4217 	mutex_exit(&ncp->c_statelock);
4218 
4219 	mutex_enter(&dcp->c_statelock);
4220 
4221 	/* update parent dir times */
4222 	dcp->c_metadata.md_localmtime = current_time;
4223 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4224 	dcp->c_flags |= CN_UPDATED;
4225 
4226 	/* enter new file name in the parent directory */
4227 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4228 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4229 		    &ncp->c_id, 0);
4230 		if (error) {
4231 			cachefs_nocache(dcp);
4232 			mutex_exit(&dcp->c_statelock);
4233 			error = ETIMEDOUT;
4234 			goto out;
4235 		}
4236 		dnlc_enter(dvp, nm, CTOV(ncp));
4237 	} else {
4238 		mutex_exit(&dcp->c_statelock);
4239 		error = ETIMEDOUT;
4240 		goto out;
4241 	}
4242 	mutex_exit(&dcp->c_statelock);
4243 
4244 out:
4245 	rw_exit(&dcp->c_rwlock);
4246 
4247 	if (commit) {
4248 		if (cachefs_dlog_commit(fscp, commit, error)) {
4249 			/*EMPTY*/
4250 			/* XXX bob: fix on panic */
4251 		}
4252 	}
4253 	if (error) {
4254 		/* destroy the cnode we created */
4255 		if (ncp) {
4256 			mutex_enter(&ncp->c_statelock);
4257 			ncp->c_flags |= CN_DESTROY;
4258 			mutex_exit(&ncp->c_statelock);
4259 			VN_RELE(CTOV(ncp));
4260 		}
4261 	} else {
4262 		*vpp = CTOV(ncp);
4263 	}
4264 	return (error);
4265 }
4266 
4267 static int
4268 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr)
4269 {
4270 	cnode_t *dcp = VTOC(dvp);
4271 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4272 	cachefscache_t *cachep = fscp->fs_cache;
4273 	int error = 0;
4274 	int held = 0;
4275 	int connected = 0;
4276 	size_t namlen;
4277 	vnode_t *vp = NULL;
4278 	int vfslock = 0;
4279 
4280 #ifdef CFSDEBUG
4281 	CFS_DEBUG(CFSDEBUG_VOPS)
4282 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4283 		    (void *)dvp, nm);
4284 #endif
4285 	if (getzoneid() != GLOBAL_ZONEID) {
4286 		error = EPERM;
4287 		goto out;
4288 	}
4289 
4290 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4291 		ASSERT(dcp->c_flags & CN_NOCACHE);
4292 
4293 	/*
4294 	 * Cachefs only provides pass-through support for NFSv4,
4295 	 * and all vnode operations are passed through to the
4296 	 * back file system. For NFSv4 pass-through to work, only
4297 	 * connected operation is supported, the cnode backvp must
4298 	 * exist, and cachefs optional (eg., disconnectable) flags
4299 	 * are turned off. Assert these conditions to ensure that
4300 	 * the backfilesystem is called for the remove operation.
4301 	 */
4302 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4303 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4304 
4305 	for (;;) {
4306 		if (vfslock) {
4307 			vn_vfsunlock(vp);
4308 			vfslock = 0;
4309 		}
4310 		if (vp) {
4311 			VN_RELE(vp);
4312 			vp = NULL;
4313 		}
4314 
4315 		/* get (or renew) access to the file system */
4316 		if (held) {
4317 			/* Won't loop with NFSv4 connected behavior */
4318 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4319 			cachefs_cd_release(fscp);
4320 			held = 0;
4321 		}
4322 		error = cachefs_cd_access(fscp, connected, 1);
4323 		if (error)
4324 			break;
4325 		held = 1;
4326 
4327 		/* if disconnected, do some extra error checking */
4328 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4329 			/* check permissions */
4330 			mutex_enter(&dcp->c_statelock);
4331 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4332 			mutex_exit(&dcp->c_statelock);
4333 			if (CFS_TIMEOUT(fscp, error)) {
4334 				connected = 1;
4335 				continue;
4336 			}
4337 			if (error)
4338 				break;
4339 
4340 			namlen = strlen(nm);
4341 			if (namlen == 0) {
4342 				error = EINVAL;
4343 				break;
4344 			}
4345 
4346 			/* cannot remove . and .. */
4347 			if (nm[0] == '.') {
4348 				if (namlen == 1) {
4349 					error = EINVAL;
4350 					break;
4351 				} else if (namlen == 2 && nm[1] == '.') {
4352 					error = EEXIST;
4353 					break;
4354 				}
4355 			}
4356 
4357 		}
4358 
4359 		/* get the cnode of the file to delete */
4360 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4361 		if (error) {
4362 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4363 				if (CFS_TIMEOUT(fscp, error)) {
4364 					cachefs_cd_release(fscp);
4365 					held = 0;
4366 					cachefs_cd_timedout(fscp);
4367 					connected = 0;
4368 					continue;
4369 				}
4370 			} else {
4371 				if (CFS_TIMEOUT(fscp, error)) {
4372 					connected = 1;
4373 					continue;
4374 				}
4375 			}
4376 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4377 				struct fid foo;
4378 
4379 				bzero(&foo, sizeof (foo));
4380 				cachefs_log_remove(cachep, error,
4381 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4382 			}
4383 			break;
4384 		}
4385 
4386 		if (vp->v_type == VDIR) {
4387 			/* must be privileged to remove dirs with unlink() */
4388 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4389 				break;
4390 
4391 			/* see ufs_dirremove for why this is done, mount race */
4392 			if (vn_vfswlock(vp)) {
4393 				error = EBUSY;
4394 				break;
4395 			}
4396 			vfslock = 1;
4397 			if (vn_mountedvfs(vp) != NULL) {
4398 				error = EBUSY;
4399 				break;
4400 			}
4401 		}
4402 
4403 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4404 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4405 			if (CFS_TIMEOUT(fscp, error)) {
4406 				cachefs_cd_release(fscp);
4407 				held = 0;
4408 				cachefs_cd_timedout(fscp);
4409 				connected = 0;
4410 				continue;
4411 			}
4412 		} else {
4413 			error = cachefs_remove_disconnected(dvp, nm, cr,
4414 				vp);
4415 			if (CFS_TIMEOUT(fscp, error)) {
4416 				connected = 1;
4417 				continue;
4418 			}
4419 		}
4420 		break;
4421 	}
4422 
4423 #if 0
4424 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4425 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4426 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4427 		    crgetuid(cr));
4428 #endif
4429 
4430 	if (held)
4431 		cachefs_cd_release(fscp);
4432 
4433 	if (vfslock)
4434 		vn_vfsunlock(vp);
4435 
4436 	if (vp)
4437 		VN_RELE(vp);
4438 
4439 #ifdef CFS_CD_DEBUG
4440 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4441 #endif
4442 out:
4443 #ifdef CFSDEBUG
4444 	CFS_DEBUG(CFSDEBUG_VOPS)
4445 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4446 #endif
4447 
4448 	return (error);
4449 }
4450 
4451 int
4452 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4453 {
4454 	cnode_t *dcp = VTOC(dvp);
4455 	cnode_t *cp = VTOC(vp);
4456 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4457 	int error = 0;
4458 
4459 	/*
4460 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4461 	 * activity on the directory.
4462 	 */
4463 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4464 
4465 	/* purge dnlc of this entry so can get accurate vnode count */
4466 	dnlc_purge_vp(vp);
4467 
4468 	/*
4469 	 * If the cnode is active, make a link to the file
4470 	 * so operations on the file will continue.
4471 	 */
4472 	if ((vp->v_type != VDIR) &&
4473 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4474 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4475 		if (error)
4476 			goto out;
4477 	}
4478 
4479 	/* else call backfs NFSv4 handler if NFSv4 */
4480 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4481 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4482 		goto out;
4483 	}
4484 
4485 	/* else drop the backvp so nfs does not do rename */
4486 	else if (cp->c_backvp) {
4487 		mutex_enter(&cp->c_statelock);
4488 		if (cp->c_backvp) {
4489 			VN_RELE(cp->c_backvp);
4490 			cp->c_backvp = NULL;
4491 		}
4492 		mutex_exit(&cp->c_statelock);
4493 	}
4494 
4495 	mutex_enter(&dcp->c_statelock);
4496 
4497 	/* get the backvp */
4498 	if (dcp->c_backvp == NULL) {
4499 		error = cachefs_getbackvp(fscp, dcp);
4500 		if (error) {
4501 			mutex_exit(&dcp->c_statelock);
4502 			goto out;
4503 		}
4504 	}
4505 
4506 	/* check directory consistency */
4507 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4508 	if (error) {
4509 		mutex_exit(&dcp->c_statelock);
4510 		goto out;
4511 	}
4512 
4513 	/* perform the remove on the back fs */
4514 	error = VOP_REMOVE(dcp->c_backvp, nm, cr);
4515 	if (error) {
4516 		mutex_exit(&dcp->c_statelock);
4517 		goto out;
4518 	}
4519 
4520 	/* the dir has been modified */
4521 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4522 
4523 	/* remove the entry from the populated directory */
4524 	if (CFS_ISFS_NONSHARED(fscp) &&
4525 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4526 		error = cachefs_dir_rmentry(dcp, nm);
4527 		if (error) {
4528 			cachefs_nocache(dcp);
4529 			error = 0;
4530 		}
4531 	}
4532 	mutex_exit(&dcp->c_statelock);
4533 
4534 	/* fix up the file we deleted */
4535 	mutex_enter(&cp->c_statelock);
4536 	if (cp->c_attr.va_nlink == 1)
4537 		cp->c_flags |= CN_DESTROY;
4538 	else
4539 		cp->c_flags |= CN_UPDATED;
4540 
4541 	cp->c_attr.va_nlink--;
4542 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4543 	mutex_exit(&cp->c_statelock);
4544 
4545 out:
4546 	rw_exit(&dcp->c_rwlock);
4547 	return (error);
4548 }
4549 
4550 /*
4551  * cachefs_remove_backfs_nfsv4
4552  *
4553  * Call NFSv4 back filesystem to handle the remove (cachefs
4554  * pass-through support for NFSv4).
4555  */
4556 int
4557 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4558 {
4559 	cnode_t *dcp = VTOC(dvp);
4560 	cnode_t *cp = VTOC(vp);
4561 	vnode_t *dbackvp;
4562 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4563 	int error = 0;
4564 
4565 	/*
4566 	 * For NFSv4 pass-through to work, only connected operation
4567 	 * is supported, the cnode backvp must exist, and cachefs
4568 	 * optional (eg., disconnectable) flags are turned off. Assert
4569 	 * these conditions for the getattr operation.
4570 	 */
4571 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4572 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4573 
4574 	/* Should hold the directory readwrite lock to update directory */
4575 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4576 
4577 	/*
4578 	 * Update attributes for directory. Note that
4579 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4580 	 * held, so grab it before calling the routine.
4581 	 */
4582 	mutex_enter(&dcp->c_statelock);
4583 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4584 	mutex_exit(&dcp->c_statelock);
4585 	if (error)
4586 		goto out;
4587 
4588 	/*
4589 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4590 	 * asserts for c_statelock being held, so grab it before
4591 	 * calling the routine.
4592 	 */
4593 	mutex_enter(&cp->c_statelock);
4594 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4595 	if (error) {
4596 		mutex_exit(&cp->c_statelock);
4597 		goto out;
4598 	}
4599 
4600 	/*
4601 	 * Drop the backvp so nfs if the link count is 1 so that
4602 	 * nfs does not do rename. Ensure that we will destroy the cnode
4603 	 * since this cnode no longer contains the backvp. Note that we
4604 	 * maintain lock on this cnode to prevent change till the remove
4605 	 * completes, otherwise other operations will encounter an ESTALE
4606 	 * if they try to use the cnode with CN_DESTROY set (see
4607 	 * cachefs_get_backvp()), or change the state of the cnode
4608 	 * while we're removing it.
4609 	 */
4610 	if (cp->c_attr.va_nlink == 1) {
4611 		/*
4612 		 * The unldvp information is created for the case
4613 		 * when there is more than one reference on the
4614 		 * vnode when a remove operation is called. If the
4615 		 * remove itself was holding a reference to the
4616 		 * vnode, then a subsequent remove will remove the
4617 		 * backvp, so we need to get rid of the unldvp
4618 		 * before removing the backvp. An alternate would
4619 		 * be to simply ignore the remove and let the
4620 		 * inactivation routine do the deletion of the
4621 		 * unldvp.
4622 		 */
4623 		if (cp->c_unldvp) {
4624 			VN_RELE(cp->c_unldvp);
4625 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4626 			crfree(cp->c_unlcred);
4627 			cp->c_unldvp = NULL;
4628 			cp->c_unlcred = NULL;
4629 		}
4630 		cp->c_flags |= CN_DESTROY;
4631 		cp->c_attr.va_nlink = 0;
4632 		VN_RELE(cp->c_backvp);
4633 		cp->c_backvp = NULL;
4634 	}
4635 
4636 	/* perform the remove on back fs after extracting directory backvp */
4637 	mutex_enter(&dcp->c_statelock);
4638 	dbackvp = dcp->c_backvp;
4639 	mutex_exit(&dcp->c_statelock);
4640 
4641 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4642 		("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4643 		dcp, dbackvp, nm));
4644 	error = VOP_REMOVE(dbackvp, nm, cr);
4645 	if (error) {
4646 		mutex_exit(&cp->c_statelock);
4647 		goto out;
4648 	}
4649 
4650 	/* fix up the file we deleted, if not destroying the cnode */
4651 	if ((cp->c_flags & CN_DESTROY) == 0) {
4652 		cp->c_attr.va_nlink--;
4653 		cp->c_flags |= CN_UPDATED;
4654 	}
4655 
4656 	mutex_exit(&cp->c_statelock);
4657 
4658 out:
4659 	return (error);
4660 }
4661 
4662 int
4663 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4664     vnode_t *vp)
4665 {
4666 	cnode_t *dcp = VTOC(dvp);
4667 	cnode_t *cp = VTOC(vp);
4668 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4669 	int error = 0;
4670 	off_t commit = 0;
4671 	timestruc_t current_time;
4672 
4673 	if (CFS_ISFS_WRITE_AROUND(fscp))
4674 		return (ETIMEDOUT);
4675 
4676 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4677 		return (ETIMEDOUT);
4678 
4679 	/*
4680 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4681 	 * activity on the directory.
4682 	 */
4683 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4684 
4685 	/* dir must be populated */
4686 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4687 		error = ETIMEDOUT;
4688 		goto out;
4689 	}
4690 
4691 	mutex_enter(&dcp->c_statelock);
4692 	mutex_enter(&cp->c_statelock);
4693 
4694 	error = cachefs_stickyrmchk(dcp, cp, cr);
4695 
4696 	mutex_exit(&cp->c_statelock);
4697 	mutex_exit(&dcp->c_statelock);
4698 	if (error)
4699 		goto out;
4700 
4701 	/* purge dnlc of this entry so can get accurate vnode count */
4702 	dnlc_purge_vp(vp);
4703 
4704 	/*
4705 	 * If the cnode is active, make a link to the file
4706 	 * so operations on the file will continue.
4707 	 */
4708 	if ((vp->v_type != VDIR) &&
4709 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4710 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4711 		if (error)
4712 			goto out;
4713 	}
4714 
4715 	if (cp->c_attr.va_nlink > 1) {
4716 		mutex_enter(&cp->c_statelock);
4717 		if (cachefs_modified_alloc(cp)) {
4718 			mutex_exit(&cp->c_statelock);
4719 			error = ENOSPC;
4720 			goto out;
4721 		}
4722 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4723 			error = cachefs_dlog_cidmap(fscp);
4724 			if (error) {
4725 				mutex_exit(&cp->c_statelock);
4726 				error = ENOSPC;
4727 				goto out;
4728 			}
4729 			cp->c_metadata.md_flags |= MD_MAPPING;
4730 			cp->c_flags |= CN_UPDATED;
4731 		}
4732 		mutex_exit(&cp->c_statelock);
4733 	}
4734 
4735 	/* log the remove */
4736 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4737 	if (commit == 0) {
4738 		error = ENOSPC;
4739 		goto out;
4740 	}
4741 
4742 	/* remove the file from the dir */
4743 	mutex_enter(&dcp->c_statelock);
4744 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4745 		mutex_exit(&dcp->c_statelock);
4746 		error = ETIMEDOUT;
4747 		goto out;
4748 
4749 	}
4750 	cachefs_modified(dcp);
4751 	error = cachefs_dir_rmentry(dcp, nm);
4752 	if (error) {
4753 		mutex_exit(&dcp->c_statelock);
4754 		if (error == ENOTDIR)
4755 			error = ETIMEDOUT;
4756 		goto out;
4757 	}
4758 
4759 	/* update parent dir times */
4760 	gethrestime(&current_time);
4761 	dcp->c_metadata.md_localctime = current_time;
4762 	dcp->c_metadata.md_localmtime = current_time;
4763 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4764 	dcp->c_flags |= CN_UPDATED;
4765 	mutex_exit(&dcp->c_statelock);
4766 
4767 	/* adjust file we are deleting */
4768 	mutex_enter(&cp->c_statelock);
4769 	cp->c_attr.va_nlink--;
4770 	cp->c_metadata.md_localctime = current_time;
4771 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4772 	if (cp->c_attr.va_nlink == 0) {
4773 		cp->c_flags |= CN_DESTROY;
4774 	} else {
4775 		cp->c_flags |= CN_UPDATED;
4776 	}
4777 	mutex_exit(&cp->c_statelock);
4778 
4779 out:
4780 	if (commit) {
4781 		/* commit the log entry */
4782 		if (cachefs_dlog_commit(fscp, commit, error)) {
4783 			/*EMPTY*/
4784 			/* XXX bob: fix on panic */
4785 		}
4786 	}
4787 
4788 	rw_exit(&dcp->c_rwlock);
4789 	return (error);
4790 }
4791 
4792 static int
4793 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4794 {
4795 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4796 	cnode_t *tdcp = VTOC(tdvp);
4797 	struct vnode *realvp;
4798 	int error = 0;
4799 	int held = 0;
4800 	int connected = 0;
4801 
4802 #ifdef CFSDEBUG
4803 	CFS_DEBUG(CFSDEBUG_VOPS)
4804 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4805 			(void *)fvp, (void *)tdvp, tnm);
4806 #endif
4807 
4808 	if (getzoneid() != GLOBAL_ZONEID) {
4809 		error = EPERM;
4810 		goto out;
4811 	}
4812 
4813 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4814 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4815 
4816 	if (VOP_REALVP(fvp, &realvp) == 0) {
4817 		fvp = realvp;
4818 	}
4819 
4820 	/*
4821 	 * Cachefs only provides pass-through support for NFSv4,
4822 	 * and all vnode operations are passed through to the
4823 	 * back file system. For NFSv4 pass-through to work, only
4824 	 * connected operation is supported, the cnode backvp must
4825 	 * exist, and cachefs optional (eg., disconnectable) flags
4826 	 * are turned off. Assert these conditions to ensure that
4827 	 * the backfilesystem is called for the link operation.
4828 	 */
4829 
4830 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4831 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4832 
4833 	for (;;) {
4834 		/* get (or renew) access to the file system */
4835 		if (held) {
4836 			/* Won't loop with NFSv4 connected behavior */
4837 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4838 			rw_exit(&tdcp->c_rwlock);
4839 			cachefs_cd_release(fscp);
4840 			held = 0;
4841 		}
4842 		error = cachefs_cd_access(fscp, connected, 1);
4843 		if (error)
4844 			break;
4845 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4846 		held = 1;
4847 
4848 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4849 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4850 			if (CFS_TIMEOUT(fscp, error)) {
4851 				rw_exit(&tdcp->c_rwlock);
4852 				cachefs_cd_release(fscp);
4853 				held = 0;
4854 				cachefs_cd_timedout(fscp);
4855 				connected = 0;
4856 				continue;
4857 			}
4858 		} else {
4859 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4860 				cr);
4861 			if (CFS_TIMEOUT(fscp, error)) {
4862 				connected = 1;
4863 				continue;
4864 			}
4865 		}
4866 		break;
4867 	}
4868 
4869 	if (held) {
4870 		rw_exit(&tdcp->c_rwlock);
4871 		cachefs_cd_release(fscp);
4872 	}
4873 
4874 #ifdef CFS_CD_DEBUG
4875 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4876 #endif
4877 out:
4878 #ifdef CFSDEBUG
4879 	CFS_DEBUG(CFSDEBUG_VOPS)
4880 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4881 			(void *)fvp, (void *)tdvp, tnm);
4882 #endif
4883 	return (error);
4884 }
4885 
4886 static int
4887 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4888 {
4889 	cnode_t *tdcp = VTOC(tdvp);
4890 	cnode_t *fcp = VTOC(fvp);
4891 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4892 	int error = 0;
4893 	vnode_t *backvp = NULL;
4894 
4895 	if (tdcp != fcp) {
4896 		mutex_enter(&fcp->c_statelock);
4897 
4898 		if (fcp->c_backvp == NULL) {
4899 			error = cachefs_getbackvp(fscp, fcp);
4900 			if (error) {
4901 				mutex_exit(&fcp->c_statelock);
4902 				goto out;
4903 			}
4904 		}
4905 
4906 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4907 		if (error) {
4908 			mutex_exit(&fcp->c_statelock);
4909 			goto out;
4910 		}
4911 		backvp = fcp->c_backvp;
4912 		VN_HOLD(backvp);
4913 		mutex_exit(&fcp->c_statelock);
4914 	}
4915 
4916 	mutex_enter(&tdcp->c_statelock);
4917 
4918 	/* get backvp of target directory */
4919 	if (tdcp->c_backvp == NULL) {
4920 		error = cachefs_getbackvp(fscp, tdcp);
4921 		if (error) {
4922 			mutex_exit(&tdcp->c_statelock);
4923 			goto out;
4924 		}
4925 	}
4926 
4927 	/* consistency check target directory */
4928 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4929 	if (error) {
4930 		mutex_exit(&tdcp->c_statelock);
4931 		goto out;
4932 	}
4933 	if (backvp == NULL) {
4934 		backvp = tdcp->c_backvp;
4935 		VN_HOLD(backvp);
4936 	}
4937 
4938 	/* perform the link on the back fs */
4939 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4940 		("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4941 		"name %s\n", tdcp, tdcp->c_backvp, tnm));
4942 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr);
4943 	if (error) {
4944 		mutex_exit(&tdcp->c_statelock);
4945 		goto out;
4946 	}
4947 
4948 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4949 
4950 	/* if the dir is populated, add the new link */
4951 	if (CFS_ISFS_NONSHARED(fscp) &&
4952 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4953 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4954 		    &fcp->c_id, SM_ASYNC);
4955 		if (error) {
4956 			cachefs_nocache(tdcp);
4957 			error = 0;
4958 		}
4959 	}
4960 	mutex_exit(&tdcp->c_statelock);
4961 
4962 	/* get the new link count on the file */
4963 	mutex_enter(&fcp->c_statelock);
4964 	fcp->c_flags |= CN_UPDATED;
4965 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
4966 	if (fcp->c_backvp == NULL) {
4967 		error = cachefs_getbackvp(fscp, fcp);
4968 		if (error) {
4969 			mutex_exit(&fcp->c_statelock);
4970 			goto out;
4971 		}
4972 	}
4973 
4974 	/* XXX bob: given what modify_cobject does this seems unnecessary */
4975 	fcp->c_attr.va_mask = AT_ALL;
4976 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr);
4977 	mutex_exit(&fcp->c_statelock);
4978 out:
4979 	if (backvp)
4980 		VN_RELE(backvp);
4981 
4982 	return (error);
4983 }
4984 
4985 static int
4986 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
4987     cred_t *cr)
4988 {
4989 	cnode_t *tdcp = VTOC(tdvp);
4990 	cnode_t *fcp = VTOC(fvp);
4991 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4992 	int error = 0;
4993 	timestruc_t current_time;
4994 	off_t commit = 0;
4995 
4996 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
4997 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
4998 		return (EPERM);
4999 
5000 	if (CFS_ISFS_WRITE_AROUND(fscp))
5001 		return (ETIMEDOUT);
5002 
5003 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5004 		return (ETIMEDOUT);
5005 
5006 	mutex_enter(&tdcp->c_statelock);
5007 
5008 	/* check permissions */
5009 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5010 		mutex_exit(&tdcp->c_statelock);
5011 		goto out;
5012 	}
5013 
5014 	/* the directory front file must be populated */
5015 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5016 		error = ETIMEDOUT;
5017 		mutex_exit(&tdcp->c_statelock);
5018 		goto out;
5019 	}
5020 
5021 	/* make sure tnm does not already exist in the directory */
5022 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5023 	if (error == ENOTDIR) {
5024 		error = ETIMEDOUT;
5025 		mutex_exit(&tdcp->c_statelock);
5026 		goto out;
5027 	}
5028 	if (error != ENOENT) {
5029 		error = EEXIST;
5030 		mutex_exit(&tdcp->c_statelock);
5031 		goto out;
5032 	}
5033 
5034 	mutex_enter(&fcp->c_statelock);
5035 
5036 	/* create a mapping for the file if necessary */
5037 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5038 		error = cachefs_dlog_cidmap(fscp);
5039 		if (error) {
5040 			mutex_exit(&fcp->c_statelock);
5041 			mutex_exit(&tdcp->c_statelock);
5042 			error = ENOSPC;
5043 			goto out;
5044 		}
5045 		fcp->c_metadata.md_flags |= MD_MAPPING;
5046 		fcp->c_flags |= CN_UPDATED;
5047 	}
5048 
5049 	/* mark file as modified */
5050 	if (cachefs_modified_alloc(fcp)) {
5051 		mutex_exit(&fcp->c_statelock);
5052 		mutex_exit(&tdcp->c_statelock);
5053 		error = ENOSPC;
5054 		goto out;
5055 	}
5056 	mutex_exit(&fcp->c_statelock);
5057 
5058 	/* log the operation */
5059 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5060 	if (commit == 0) {
5061 		mutex_exit(&tdcp->c_statelock);
5062 		error = ENOSPC;
5063 		goto out;
5064 	}
5065 
5066 	gethrestime(&current_time);
5067 
5068 	/* make the new link */
5069 	cachefs_modified(tdcp);
5070 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5071 	    &fcp->c_id, SM_ASYNC);
5072 	if (error) {
5073 		error = 0;
5074 		mutex_exit(&tdcp->c_statelock);
5075 		goto out;
5076 	}
5077 
5078 	/* Update mtime/ctime of parent dir */
5079 	tdcp->c_metadata.md_localmtime = current_time;
5080 	tdcp->c_metadata.md_localctime = current_time;
5081 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5082 	tdcp->c_flags |= CN_UPDATED;
5083 	mutex_exit(&tdcp->c_statelock);
5084 
5085 	/* update the file we linked to */
5086 	mutex_enter(&fcp->c_statelock);
5087 	fcp->c_attr.va_nlink++;
5088 	fcp->c_metadata.md_localctime = current_time;
5089 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5090 	fcp->c_flags |= CN_UPDATED;
5091 	mutex_exit(&fcp->c_statelock);
5092 
5093 out:
5094 	if (commit) {
5095 		/* commit the log entry */
5096 		if (cachefs_dlog_commit(fscp, commit, error)) {
5097 			/*EMPTY*/
5098 			/* XXX bob: fix on panic */
5099 		}
5100 	}
5101 
5102 	return (error);
5103 }
5104 
5105 /*
5106  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5107  * cnodes atomically.
5108  */
5109 kmutex_t cachefs_rename_lock;
5110 
5111 static int
5112 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5113     char *nnm, cred_t *cr)
5114 {
5115 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5116 	cachefscache_t *cachep = fscp->fs_cache;
5117 	int error = 0;
5118 	int held = 0;
5119 	int connected = 0;
5120 	vnode_t *delvp = NULL;
5121 	vnode_t *tvp = NULL;
5122 	int vfslock = 0;
5123 	struct vnode *realvp;
5124 
5125 	if (getzoneid() != GLOBAL_ZONEID)
5126 		return (EPERM);
5127 
5128 	if (VOP_REALVP(ndvp, &realvp) == 0)
5129 		ndvp = realvp;
5130 
5131 	/*
5132 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5133 	 * directory cnodes better indicate NOCACHE mode as well.
5134 	 */
5135 	ASSERT
5136 	    ((fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5137 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5138 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5139 
5140 	/*
5141 	 * Cachefs only provides pass-through support for NFSv4,
5142 	 * and all vnode operations are passed through to the
5143 	 * back file system. For NFSv4 pass-through to work, only
5144 	 * connected operation is supported, the cnode backvp must
5145 	 * exist, and cachefs optional (eg., disconnectable) flags
5146 	 * are turned off. Assert these conditions to ensure that
5147 	 * the backfilesystem is called for the rename operation.
5148 	 */
5149 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5150 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5151 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5152 
5153 	for (;;) {
5154 		if (vfslock) {
5155 			vn_vfsunlock(delvp);
5156 			vfslock = 0;
5157 		}
5158 		if (delvp) {
5159 			VN_RELE(delvp);
5160 			delvp = NULL;
5161 		}
5162 
5163 		/* get (or renew) access to the file system */
5164 		if (held) {
5165 			/* Won't loop for NFSv4 connected support */
5166 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5167 			cachefs_cd_release(fscp);
5168 			held = 0;
5169 		}
5170 		error = cachefs_cd_access(fscp, connected, 1);
5171 		if (error)
5172 			break;
5173 		held = 1;
5174 
5175 		/* sanity check */
5176 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5177 			error = EINVAL;
5178 			break;
5179 		}
5180 
5181 		/* cannot rename from or to . or .. */
5182 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5183 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5184 			error = EINVAL;
5185 			break;
5186 		}
5187 
5188 		if (odvp != ndvp) {
5189 			/*
5190 			 * if moving a directory, its notion
5191 			 * of ".." will change
5192 			 */
5193 			error = cachefs_lookup_common(odvp, onm, &tvp,
5194 			    NULL, 0, NULL, cr);
5195 			if (error == 0) {
5196 				ASSERT(tvp != NULL);
5197 				if (tvp->v_type == VDIR) {
5198 					cnode_t *cp = VTOC(tvp);
5199 
5200 					dnlc_remove(tvp, "..");
5201 
5202 					mutex_enter(&cp->c_statelock);
5203 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5204 					mutex_exit(&cp->c_statelock);
5205 				}
5206 			} else {
5207 				tvp = NULL;
5208 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5209 					if (CFS_TIMEOUT(fscp, error)) {
5210 						cachefs_cd_release(fscp);
5211 						held = 0;
5212 						cachefs_cd_timedout(fscp);
5213 						connected = 0;
5214 						continue;
5215 					}
5216 				} else {
5217 					if (CFS_TIMEOUT(fscp, error)) {
5218 						connected = 1;
5219 						continue;
5220 					}
5221 				}
5222 				break;
5223 			}
5224 		}
5225 
5226 		/* get the cnode if file being deleted */
5227 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5228 			NULL, cr);
5229 		if (error) {
5230 			delvp = NULL;
5231 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5232 				if (CFS_TIMEOUT(fscp, error)) {
5233 					cachefs_cd_release(fscp);
5234 					held = 0;
5235 					cachefs_cd_timedout(fscp);
5236 					connected = 0;
5237 					continue;
5238 				}
5239 			} else {
5240 				if (CFS_TIMEOUT(fscp, error)) {
5241 					connected = 1;
5242 					continue;
5243 				}
5244 			}
5245 			if (error != ENOENT)
5246 				break;
5247 		}
5248 
5249 		if (delvp && delvp->v_type == VDIR) {
5250 			/* see ufs_dirremove for why this is done, mount race */
5251 			if (vn_vfswlock(delvp)) {
5252 				error = EBUSY;
5253 				break;
5254 			}
5255 			vfslock = 1;
5256 			if (vn_mountedvfs(delvp) != NULL) {
5257 				error = EBUSY;
5258 				break;
5259 			}
5260 		}
5261 
5262 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5263 			error = cachefs_rename_connected(odvp, onm,
5264 				ndvp, nnm, cr, delvp);
5265 			if (CFS_TIMEOUT(fscp, error)) {
5266 				cachefs_cd_release(fscp);
5267 				held = 0;
5268 				cachefs_cd_timedout(fscp);
5269 				connected = 0;
5270 				continue;
5271 			}
5272 		} else {
5273 			error = cachefs_rename_disconnected(odvp, onm,
5274 				ndvp, nnm, cr, delvp);
5275 			if (CFS_TIMEOUT(fscp, error)) {
5276 				connected = 1;
5277 				continue;
5278 			}
5279 		}
5280 		break;
5281 	}
5282 
5283 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5284 		struct fid gone;
5285 
5286 		bzero(&gone, sizeof (gone));
5287 		gone.fid_len = MAXFIDSZ;
5288 		if (delvp != NULL)
5289 			(void) VOP_FID(delvp, &gone);
5290 
5291 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5292 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5293 	}
5294 
5295 	if (held)
5296 		cachefs_cd_release(fscp);
5297 
5298 	if (vfslock)
5299 		vn_vfsunlock(delvp);
5300 
5301 	if (delvp)
5302 		VN_RELE(delvp);
5303 	if (tvp)
5304 		VN_RELE(tvp);
5305 
5306 #ifdef CFS_CD_DEBUG
5307 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5308 #endif
5309 	return (error);
5310 }
5311 
5312 static int
5313 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5314     char *nnm, cred_t *cr, vnode_t *delvp)
5315 {
5316 	cnode_t *odcp = VTOC(odvp);
5317 	cnode_t *ndcp = VTOC(ndvp);
5318 	vnode_t *revp = NULL;
5319 	cnode_t *recp;
5320 	cnode_t *delcp;
5321 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5322 	int error = 0;
5323 	struct fid cookie;
5324 	struct fid *cookiep;
5325 	cfs_cid_t cid;
5326 	int gotdirent;
5327 
5328 	/* find the file we are renaming */
5329 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5330 	if (error)
5331 		return (error);
5332 	recp = VTOC(revp);
5333 
5334 	/*
5335 	 * To avoid deadlock, we acquire this global rename lock before
5336 	 * we try to get the locks for the source and target directories.
5337 	 */
5338 	mutex_enter(&cachefs_rename_lock);
5339 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5340 	if (odcp != ndcp) {
5341 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5342 	}
5343 	mutex_exit(&cachefs_rename_lock);
5344 
5345 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5346 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5347 
5348 	mutex_enter(&odcp->c_statelock);
5349 	if (odcp->c_backvp == NULL) {
5350 		error = cachefs_getbackvp(fscp, odcp);
5351 		if (error) {
5352 			mutex_exit(&odcp->c_statelock);
5353 			goto out;
5354 		}
5355 	}
5356 
5357 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5358 	if (error) {
5359 		mutex_exit(&odcp->c_statelock);
5360 		goto out;
5361 	}
5362 	mutex_exit(&odcp->c_statelock);
5363 
5364 	if (odcp != ndcp) {
5365 		mutex_enter(&ndcp->c_statelock);
5366 		if (ndcp->c_backvp == NULL) {
5367 			error = cachefs_getbackvp(fscp, ndcp);
5368 			if (error) {
5369 				mutex_exit(&ndcp->c_statelock);
5370 				goto out;
5371 			}
5372 		}
5373 
5374 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5375 		if (error) {
5376 			mutex_exit(&ndcp->c_statelock);
5377 			goto out;
5378 		}
5379 		mutex_exit(&ndcp->c_statelock);
5380 	}
5381 
5382 	/* if a file is being deleted because of this rename */
5383 	if (delvp) {
5384 		/* if src and dest file are same */
5385 		if (delvp == revp) {
5386 			error = 0;
5387 			goto out;
5388 		}
5389 
5390 		/*
5391 		 * If the cnode is active, make a link to the file
5392 		 * so operations on the file will continue.
5393 		 */
5394 		dnlc_purge_vp(delvp);
5395 		delcp = VTOC(delvp);
5396 		if ((delvp->v_type != VDIR) &&
5397 		    !((delvp->v_count == 1) ||
5398 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5399 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5400 			if (error)
5401 				goto out;
5402 		}
5403 	}
5404 
5405 	/* do the rename on the back fs */
5406 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5407 		("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5408 		" ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5409 		odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5410 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr);
5411 	if (error)
5412 		goto out;
5413 
5414 	/* purge mappings to file in the old directory */
5415 	dnlc_purge_vp(odvp);
5416 
5417 	/* purge mappings in the new dir if we deleted a file */
5418 	if (delvp && (odvp != ndvp))
5419 		dnlc_purge_vp(ndvp);
5420 
5421 	/* update the file we just deleted */
5422 	if (delvp) {
5423 		mutex_enter(&delcp->c_statelock);
5424 		if (delcp->c_attr.va_nlink == 1) {
5425 			delcp->c_flags |= CN_DESTROY;
5426 		} else {
5427 			delcp->c_flags |= CN_UPDATED;
5428 		}
5429 		delcp->c_attr.va_nlink--;
5430 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5431 		mutex_exit(&delcp->c_statelock);
5432 	}
5433 
5434 	/* find the entry in the old directory */
5435 	mutex_enter(&odcp->c_statelock);
5436 	gotdirent = 0;
5437 	cookiep = NULL;
5438 	if (CFS_ISFS_NONSHARED(fscp) &&
5439 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5440 		error = cachefs_dir_look(odcp, onm, &cookie,
5441 			NULL, NULL, &cid);
5442 		if (error == 0 || error == EINVAL) {
5443 			gotdirent = 1;
5444 			if (error == 0)
5445 				cookiep = &cookie;
5446 		} else {
5447 			cachefs_inval_object(odcp);
5448 		}
5449 	}
5450 	error = 0;
5451 
5452 	/* remove the directory entry from the old directory */
5453 	if (gotdirent) {
5454 		error = cachefs_dir_rmentry(odcp, onm);
5455 		if (error) {
5456 			cachefs_nocache(odcp);
5457 			error = 0;
5458 		}
5459 	}
5460 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5461 	mutex_exit(&odcp->c_statelock);
5462 
5463 	/* install the directory entry in the new directory */
5464 	mutex_enter(&ndcp->c_statelock);
5465 	if (CFS_ISFS_NONSHARED(fscp) &&
5466 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5467 		error = 1;
5468 		if (gotdirent) {
5469 			ASSERT(cid.cid_fileno != 0);
5470 			error = 0;
5471 			if (delvp) {
5472 				error = cachefs_dir_rmentry(ndcp, nnm);
5473 			}
5474 			if (error == 0) {
5475 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5476 				    &cid, SM_ASYNC);
5477 			}
5478 		}
5479 		if (error) {
5480 			cachefs_nocache(ndcp);
5481 			error = 0;
5482 		}
5483 	}
5484 	if (odcp != ndcp)
5485 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5486 	mutex_exit(&ndcp->c_statelock);
5487 
5488 	/* ctime of renamed file has changed */
5489 	mutex_enter(&recp->c_statelock);
5490 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5491 	mutex_exit(&recp->c_statelock);
5492 
5493 out:
5494 	if (odcp != ndcp)
5495 		rw_exit(&ndcp->c_rwlock);
5496 	rw_exit(&odcp->c_rwlock);
5497 
5498 	VN_RELE(revp);
5499 
5500 	return (error);
5501 }
5502 
5503 static int
5504 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5505     char *nnm, cred_t *cr, vnode_t *delvp)
5506 {
5507 	cnode_t *odcp = VTOC(odvp);
5508 	cnode_t *ndcp = VTOC(ndvp);
5509 	cnode_t *delcp = NULL;
5510 	vnode_t *revp = NULL;
5511 	cnode_t *recp;
5512 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5513 	int error = 0;
5514 	struct fid cookie;
5515 	struct fid *cookiep;
5516 	cfs_cid_t cid;
5517 	off_t commit = 0;
5518 	timestruc_t current_time;
5519 
5520 	if (CFS_ISFS_WRITE_AROUND(fscp))
5521 		return (ETIMEDOUT);
5522 
5523 	/* find the file we are renaming */
5524 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5525 	if (error)
5526 		return (error);
5527 	recp = VTOC(revp);
5528 
5529 	/*
5530 	 * To avoid deadlock, we acquire this global rename lock before
5531 	 * we try to get the locks for the source and target directories.
5532 	 */
5533 	mutex_enter(&cachefs_rename_lock);
5534 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5535 	if (odcp != ndcp) {
5536 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5537 	}
5538 	mutex_exit(&cachefs_rename_lock);
5539 
5540 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5541 		error = ETIMEDOUT;
5542 		goto out;
5543 	}
5544 
5545 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5546 		mutex_enter(&recp->c_statelock);
5547 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5548 			error = cachefs_dlog_cidmap(fscp);
5549 			if (error) {
5550 				mutex_exit(&recp->c_statelock);
5551 				error = ENOSPC;
5552 				goto out;
5553 			}
5554 			recp->c_metadata.md_flags |= MD_MAPPING;
5555 			recp->c_flags |= CN_UPDATED;
5556 		}
5557 		mutex_exit(&recp->c_statelock);
5558 	}
5559 
5560 	/* check permissions */
5561 	/* XXX clean up this mutex junk sometime */
5562 	mutex_enter(&odcp->c_statelock);
5563 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5564 	mutex_exit(&odcp->c_statelock);
5565 	if (error != 0)
5566 		goto out;
5567 	mutex_enter(&ndcp->c_statelock);
5568 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5569 	mutex_exit(&ndcp->c_statelock);
5570 	if (error != 0)
5571 		goto out;
5572 	mutex_enter(&odcp->c_statelock);
5573 	error = cachefs_stickyrmchk(odcp, recp, cr);
5574 	mutex_exit(&odcp->c_statelock);
5575 	if (error != 0)
5576 		goto out;
5577 
5578 	/* dirs must be populated */
5579 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5580 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5581 		error = ETIMEDOUT;
5582 		goto out;
5583 	}
5584 
5585 	/* for now do not allow moving dirs because could cause cycles */
5586 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5587 	    (revp == odvp)) {
5588 		error = ETIMEDOUT;
5589 		goto out;
5590 	}
5591 
5592 	/* if a file is being deleted because of this rename */
5593 	if (delvp) {
5594 		delcp = VTOC(delvp);
5595 
5596 		/* if src and dest file are the same */
5597 		if (delvp == revp) {
5598 			error = 0;
5599 			goto out;
5600 		}
5601 
5602 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5603 			error = ETIMEDOUT;
5604 			goto out;
5605 		}
5606 
5607 		/* if there are hard links to this file */
5608 		if (delcp->c_attr.va_nlink > 1) {
5609 			mutex_enter(&delcp->c_statelock);
5610 			if (cachefs_modified_alloc(delcp)) {
5611 				mutex_exit(&delcp->c_statelock);
5612 				error = ENOSPC;
5613 				goto out;
5614 			}
5615 
5616 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5617 				error = cachefs_dlog_cidmap(fscp);
5618 				if (error) {
5619 					mutex_exit(&delcp->c_statelock);
5620 					error = ENOSPC;
5621 					goto out;
5622 				}
5623 				delcp->c_metadata.md_flags |= MD_MAPPING;
5624 				delcp->c_flags |= CN_UPDATED;
5625 			}
5626 			mutex_exit(&delcp->c_statelock);
5627 		}
5628 
5629 		/* make sure we can delete file */
5630 		mutex_enter(&ndcp->c_statelock);
5631 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5632 		mutex_exit(&ndcp->c_statelock);
5633 		if (error != 0)
5634 			goto out;
5635 
5636 		/*
5637 		 * If the cnode is active, make a link to the file
5638 		 * so operations on the file will continue.
5639 		 */
5640 		dnlc_purge_vp(delvp);
5641 		if ((delvp->v_type != VDIR) &&
5642 		    !((delvp->v_count == 1) ||
5643 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5644 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5645 			if (error)
5646 				goto out;
5647 		}
5648 	}
5649 
5650 	/* purge mappings to file in the old directory */
5651 	dnlc_purge_vp(odvp);
5652 
5653 	/* purge mappings in the new dir if we deleted a file */
5654 	if (delvp && (odvp != ndvp))
5655 		dnlc_purge_vp(ndvp);
5656 
5657 	/* find the entry in the old directory */
5658 	mutex_enter(&odcp->c_statelock);
5659 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5660 		mutex_exit(&odcp->c_statelock);
5661 		error = ETIMEDOUT;
5662 		goto out;
5663 	}
5664 	cookiep = NULL;
5665 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5666 	if (error == 0 || error == EINVAL) {
5667 		if (error == 0)
5668 			cookiep = &cookie;
5669 	} else {
5670 		mutex_exit(&odcp->c_statelock);
5671 		if (error == ENOTDIR)
5672 			error = ETIMEDOUT;
5673 		goto out;
5674 	}
5675 	error = 0;
5676 
5677 	/* write the log entry */
5678 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5679 	    recp, delcp);
5680 	if (commit == 0) {
5681 		mutex_exit(&odcp->c_statelock);
5682 		error = ENOSPC;
5683 		goto out;
5684 	}
5685 
5686 	/* remove the directory entry from the old directory */
5687 	cachefs_modified(odcp);
5688 	error = cachefs_dir_rmentry(odcp, onm);
5689 	if (error) {
5690 		mutex_exit(&odcp->c_statelock);
5691 		if (error == ENOTDIR)
5692 			error = ETIMEDOUT;
5693 		goto out;
5694 	}
5695 	mutex_exit(&odcp->c_statelock);
5696 
5697 	/* install the directory entry in the new directory */
5698 	mutex_enter(&ndcp->c_statelock);
5699 	error = ENOTDIR;
5700 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5701 		ASSERT(cid.cid_fileno != 0);
5702 		cachefs_modified(ndcp);
5703 		error = 0;
5704 		if (delvp) {
5705 			error = cachefs_dir_rmentry(ndcp, nnm);
5706 		}
5707 		if (error == 0) {
5708 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5709 			    &cid, SM_ASYNC);
5710 		}
5711 	}
5712 	if (error) {
5713 		cachefs_nocache(ndcp);
5714 		mutex_exit(&ndcp->c_statelock);
5715 		mutex_enter(&odcp->c_statelock);
5716 		cachefs_nocache(odcp);
5717 		mutex_exit(&odcp->c_statelock);
5718 		if (error == ENOTDIR)
5719 			error = ETIMEDOUT;
5720 		goto out;
5721 	}
5722 	mutex_exit(&ndcp->c_statelock);
5723 
5724 	gethrestime(&current_time);
5725 
5726 	/* update the file we just deleted */
5727 	if (delvp) {
5728 		mutex_enter(&delcp->c_statelock);
5729 		delcp->c_attr.va_nlink--;
5730 		delcp->c_metadata.md_localctime = current_time;
5731 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5732 		if (delcp->c_attr.va_nlink == 0) {
5733 			delcp->c_flags |= CN_DESTROY;
5734 		} else {
5735 			delcp->c_flags |= CN_UPDATED;
5736 		}
5737 		mutex_exit(&delcp->c_statelock);
5738 	}
5739 
5740 	/* update the file we renamed */
5741 	mutex_enter(&recp->c_statelock);
5742 	recp->c_metadata.md_localctime = current_time;
5743 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5744 	recp->c_flags |= CN_UPDATED;
5745 	mutex_exit(&recp->c_statelock);
5746 
5747 	/* update the source directory */
5748 	mutex_enter(&odcp->c_statelock);
5749 	odcp->c_metadata.md_localctime = current_time;
5750 	odcp->c_metadata.md_localmtime = current_time;
5751 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5752 	odcp->c_flags |= CN_UPDATED;
5753 	mutex_exit(&odcp->c_statelock);
5754 
5755 	/* update the destination directory */
5756 	if (odcp != ndcp) {
5757 		mutex_enter(&ndcp->c_statelock);
5758 		ndcp->c_metadata.md_localctime = current_time;
5759 		ndcp->c_metadata.md_localmtime = current_time;
5760 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5761 		ndcp->c_flags |= CN_UPDATED;
5762 		mutex_exit(&ndcp->c_statelock);
5763 	}
5764 
5765 out:
5766 	if (commit) {
5767 		/* commit the log entry */
5768 		if (cachefs_dlog_commit(fscp, commit, error)) {
5769 			/*EMPTY*/
5770 			/* XXX bob: fix on panic */
5771 		}
5772 	}
5773 
5774 	if (odcp != ndcp)
5775 		rw_exit(&ndcp->c_rwlock);
5776 	rw_exit(&odcp->c_rwlock);
5777 
5778 	VN_RELE(revp);
5779 
5780 	return (error);
5781 }
5782 
5783 static int
5784 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5785     cred_t *cr)
5786 {
5787 	cnode_t *dcp = VTOC(dvp);
5788 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5789 	cachefscache_t *cachep = fscp->fs_cache;
5790 	int error = 0;
5791 	int held = 0;
5792 	int connected = 0;
5793 
5794 #ifdef CFSDEBUG
5795 	CFS_DEBUG(CFSDEBUG_VOPS)
5796 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5797 #endif
5798 
5799 	if (getzoneid() != GLOBAL_ZONEID) {
5800 		error = EPERM;
5801 		goto out;
5802 	}
5803 
5804 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5805 		ASSERT(dcp->c_flags & CN_NOCACHE);
5806 
5807 	/*
5808 	 * Cachefs only provides pass-through support for NFSv4,
5809 	 * and all vnode operations are passed through to the
5810 	 * back file system. For NFSv4 pass-through to work, only
5811 	 * connected operation is supported, the cnode backvp must
5812 	 * exist, and cachefs optional (eg., disconnectable) flags
5813 	 * are turned off. Assert these conditions to ensure that
5814 	 * the backfilesystem is called for the mkdir operation.
5815 	 */
5816 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5817 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5818 
5819 	for (;;) {
5820 		/* get (or renew) access to the file system */
5821 		if (held) {
5822 			/* Won't loop with NFSv4 connected behavior */
5823 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5824 			rw_exit(&dcp->c_rwlock);
5825 			cachefs_cd_release(fscp);
5826 			held = 0;
5827 		}
5828 		error = cachefs_cd_access(fscp, connected, 1);
5829 		if (error)
5830 			break;
5831 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5832 		held = 1;
5833 
5834 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5835 			error = cachefs_mkdir_connected(dvp, nm, vap,
5836 				vpp, cr);
5837 			if (CFS_TIMEOUT(fscp, error)) {
5838 				rw_exit(&dcp->c_rwlock);
5839 				cachefs_cd_release(fscp);
5840 				held = 0;
5841 				cachefs_cd_timedout(fscp);
5842 				connected = 0;
5843 				continue;
5844 			}
5845 		} else {
5846 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5847 				vpp, cr);
5848 			if (CFS_TIMEOUT(fscp, error)) {
5849 				connected = 1;
5850 				continue;
5851 			}
5852 		}
5853 		break;
5854 	}
5855 
5856 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5857 		fid_t *fidp = NULL;
5858 		ino64_t fileno = 0;
5859 		cnode_t *cp = NULL;
5860 		if (error == 0)
5861 			cp = VTOC(*vpp);
5862 
5863 		if (cp != NULL) {
5864 			fidp = &cp->c_metadata.md_cookie;
5865 			fileno = cp->c_id.cid_fileno;
5866 		}
5867 
5868 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5869 		    fidp, fileno, crgetuid(cr));
5870 	}
5871 
5872 	if (held) {
5873 		rw_exit(&dcp->c_rwlock);
5874 		cachefs_cd_release(fscp);
5875 	}
5876 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5877 		(void) cachefs_pack(dvp, nm, cr);
5878 
5879 #ifdef CFS_CD_DEBUG
5880 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5881 #endif
5882 out:
5883 #ifdef CFSDEBUG
5884 	CFS_DEBUG(CFSDEBUG_VOPS)
5885 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5886 #endif
5887 	return (error);
5888 }
5889 
5890 static int
5891 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5892     vnode_t **vpp, cred_t *cr)
5893 {
5894 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5895 	struct vnode *vp = NULL;
5896 	int error = 0;
5897 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5898 	struct fid cookie;
5899 	struct vattr attr;
5900 	cfs_cid_t cid, dircid;
5901 	uint32_t valid_fid;
5902 
5903 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5904 		ASSERT(dcp->c_flags & CN_NOCACHE);
5905 
5906 	mutex_enter(&dcp->c_statelock);
5907 
5908 	/* get backvp of dir */
5909 	if (dcp->c_backvp == NULL) {
5910 		error = cachefs_getbackvp(fscp, dcp);
5911 		if (error) {
5912 			mutex_exit(&dcp->c_statelock);
5913 			goto out;
5914 		}
5915 	}
5916 
5917 	/* consistency check the directory */
5918 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5919 	if (error) {
5920 		mutex_exit(&dcp->c_statelock);
5921 		goto out;
5922 	}
5923 	dircid = dcp->c_id;
5924 
5925 	/* make the dir on the back fs */
5926 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5927 		("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5928 		"name %s\n", dcp, dcp->c_backvp, nm));
5929 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr);
5930 	mutex_exit(&dcp->c_statelock);
5931 	if (error) {
5932 		goto out;
5933 	}
5934 
5935 	/* get the cookie and make the cnode */
5936 	attr.va_mask = AT_ALL;
5937 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5938 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5939 	if (error) {
5940 		goto out;
5941 	}
5942 	cid.cid_flags = 0;
5943 	cid.cid_fileno = attr.va_nodeid;
5944 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5945 					&attr, vp, cr, 0, &newcp);
5946 	if (error) {
5947 		goto out;
5948 	}
5949 	ASSERT(CTOV(newcp)->v_type == VDIR);
5950 	*vpp = CTOV(newcp);
5951 
5952 	/* if the dir is populated, add the new entry */
5953 	mutex_enter(&dcp->c_statelock);
5954 	if (CFS_ISFS_NONSHARED(fscp) &&
5955 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5956 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5957 		    SM_ASYNC);
5958 		if (error) {
5959 			cachefs_nocache(dcp);
5960 			error = 0;
5961 		}
5962 	}
5963 	dcp->c_attr.va_nlink++;
5964 	dcp->c_flags |= CN_UPDATED;
5965 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
5966 	mutex_exit(&dcp->c_statelock);
5967 
5968 	/* XXX bob: should we do a filldir here? or just add . and .. */
5969 	/* maybe should kick off an async filldir so caller does not wait */
5970 
5971 	/* put the entry in the dnlc */
5972 	if (cachefs_dnlc)
5973 		dnlc_enter(dvp, nm, *vpp);
5974 
5975 	/* save the fileno of the parent so can find the name */
5976 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
5977 	    sizeof (cfs_cid_t)) != 0) {
5978 		mutex_enter(&newcp->c_statelock);
5979 		newcp->c_metadata.md_parent = dircid;
5980 		newcp->c_flags |= CN_UPDATED;
5981 		mutex_exit(&newcp->c_statelock);
5982 	}
5983 out:
5984 	if (vp)
5985 		VN_RELE(vp);
5986 
5987 	return (error);
5988 }
5989 
5990 static int
5991 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
5992     vnode_t **vpp, cred_t *cr)
5993 {
5994 	cnode_t *dcp = VTOC(dvp);
5995 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5996 	int error;
5997 	cnode_t *newcp = NULL;
5998 	struct vattr va;
5999 	timestruc_t current_time;
6000 	off_t commit = 0;
6001 	char *s;
6002 	int namlen;
6003 
6004 	/* don't allow '/' characters in pathname component */
6005 	for (s = nm, namlen = 0; *s; s++, namlen++)
6006 		if (*s == '/')
6007 			return (EACCES);
6008 	if (namlen == 0)
6009 		return (EINVAL);
6010 
6011 	if (CFS_ISFS_WRITE_AROUND(fscp))
6012 		return (ETIMEDOUT);
6013 
6014 	mutex_enter(&dcp->c_statelock);
6015 
6016 	/* check permissions */
6017 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6018 		mutex_exit(&dcp->c_statelock);
6019 		goto out;
6020 	}
6021 
6022 	/* the directory front file must be populated */
6023 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6024 		error = ETIMEDOUT;
6025 		mutex_exit(&dcp->c_statelock);
6026 		goto out;
6027 	}
6028 
6029 	/* make sure nm does not already exist in the directory */
6030 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6031 	if (error == ENOTDIR) {
6032 		error = ETIMEDOUT;
6033 		mutex_exit(&dcp->c_statelock);
6034 		goto out;
6035 	}
6036 	if (error != ENOENT) {
6037 		error = EEXIST;
6038 		mutex_exit(&dcp->c_statelock);
6039 		goto out;
6040 	}
6041 
6042 	/* make up a reasonable set of attributes */
6043 	cachefs_attr_setup(vap, &va, dcp, cr);
6044 	va.va_type = VDIR;
6045 	va.va_mode |= S_IFDIR;
6046 	va.va_nlink = 2;
6047 
6048 	mutex_exit(&dcp->c_statelock);
6049 
6050 	/* create the cnode */
6051 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6052 	if (error)
6053 		goto out;
6054 
6055 	mutex_enter(&newcp->c_statelock);
6056 
6057 	error = cachefs_dlog_cidmap(fscp);
6058 	if (error) {
6059 		mutex_exit(&newcp->c_statelock);
6060 		goto out;
6061 	}
6062 
6063 	cachefs_creategid(dcp, newcp, vap, cr);
6064 	mutex_enter(&dcp->c_statelock);
6065 	cachefs_createacl(dcp, newcp);
6066 	mutex_exit(&dcp->c_statelock);
6067 	gethrestime(&current_time);
6068 	newcp->c_metadata.md_vattr.va_atime = current_time;
6069 	newcp->c_metadata.md_localctime = current_time;
6070 	newcp->c_metadata.md_localmtime = current_time;
6071 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6072 	    MD_LOCALCTIME;
6073 	newcp->c_flags |= CN_UPDATED;
6074 
6075 	/* make a front file for the new directory, add . and .. */
6076 	error = cachefs_dir_new(dcp, newcp);
6077 	if (error) {
6078 		mutex_exit(&newcp->c_statelock);
6079 		goto out;
6080 	}
6081 	cachefs_modified(newcp);
6082 
6083 	/*
6084 	 * write the metadata now rather than waiting until
6085 	 * inactive so that if there's no space we can let
6086 	 * the caller know.
6087 	 */
6088 	ASSERT(newcp->c_frontvp);
6089 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6090 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6091 	error = filegrp_write_metadata(newcp->c_filegrp,
6092 	    &newcp->c_id, &newcp->c_metadata);
6093 	if (error) {
6094 		mutex_exit(&newcp->c_statelock);
6095 		goto out;
6096 	}
6097 	mutex_exit(&newcp->c_statelock);
6098 
6099 	/* log the operation */
6100 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6101 	if (commit == 0) {
6102 		error = ENOSPC;
6103 		goto out;
6104 	}
6105 
6106 	mutex_enter(&dcp->c_statelock);
6107 
6108 	/* make sure directory is still populated */
6109 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6110 		mutex_exit(&dcp->c_statelock);
6111 		error = ETIMEDOUT;
6112 		goto out;
6113 	}
6114 	cachefs_modified(dcp);
6115 
6116 	/* enter the new file in the directory */
6117 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6118 		&newcp->c_id, SM_ASYNC);
6119 	if (error) {
6120 		mutex_exit(&dcp->c_statelock);
6121 		goto out;
6122 	}
6123 
6124 	/* update parent dir times */
6125 	dcp->c_metadata.md_localctime = current_time;
6126 	dcp->c_metadata.md_localmtime = current_time;
6127 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6128 	dcp->c_attr.va_nlink++;
6129 	dcp->c_flags |= CN_UPDATED;
6130 	mutex_exit(&dcp->c_statelock);
6131 
6132 out:
6133 	if (commit) {
6134 		/* commit the log entry */
6135 		if (cachefs_dlog_commit(fscp, commit, error)) {
6136 			/*EMPTY*/
6137 			/* XXX bob: fix on panic */
6138 		}
6139 	}
6140 	if (error) {
6141 		if (newcp) {
6142 			mutex_enter(&newcp->c_statelock);
6143 			newcp->c_flags |= CN_DESTROY;
6144 			mutex_exit(&newcp->c_statelock);
6145 			VN_RELE(CTOV(newcp));
6146 		}
6147 	} else {
6148 		*vpp = CTOV(newcp);
6149 	}
6150 	return (error);
6151 }
6152 
6153 static int
6154 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr)
6155 {
6156 	cnode_t *dcp = VTOC(dvp);
6157 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6158 	cachefscache_t *cachep = fscp->fs_cache;
6159 	int error = 0;
6160 	int held = 0;
6161 	int connected = 0;
6162 	size_t namlen;
6163 	vnode_t *vp = NULL;
6164 	int vfslock = 0;
6165 
6166 #ifdef CFSDEBUG
6167 	CFS_DEBUG(CFSDEBUG_VOPS)
6168 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6169 #endif
6170 
6171 	if (getzoneid() != GLOBAL_ZONEID) {
6172 		error = EPERM;
6173 		goto out;
6174 	}
6175 
6176 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6177 		ASSERT(dcp->c_flags & CN_NOCACHE);
6178 
6179 	/*
6180 	 * Cachefs only provides pass-through support for NFSv4,
6181 	 * and all vnode operations are passed through to the
6182 	 * back file system. For NFSv4 pass-through to work, only
6183 	 * connected operation is supported, the cnode backvp must
6184 	 * exist, and cachefs optional (eg., disconnectable) flags
6185 	 * are turned off. Assert these conditions to ensure that
6186 	 * the backfilesystem is called for the rmdir operation.
6187 	 */
6188 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6189 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6190 
6191 	for (;;) {
6192 		if (vfslock) {
6193 			vn_vfsunlock(vp);
6194 			vfslock = 0;
6195 		}
6196 		if (vp) {
6197 			VN_RELE(vp);
6198 			vp = NULL;
6199 		}
6200 
6201 		/* get (or renew) access to the file system */
6202 		if (held) {
6203 			/* Won't loop with NFSv4 connected behavior */
6204 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6205 			cachefs_cd_release(fscp);
6206 			held = 0;
6207 		}
6208 		error = cachefs_cd_access(fscp, connected, 1);
6209 		if (error)
6210 			break;
6211 		held = 1;
6212 
6213 		/* if disconnected, do some extra error checking */
6214 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6215 			/* check permissions */
6216 			mutex_enter(&dcp->c_statelock);
6217 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6218 			mutex_exit(&dcp->c_statelock);
6219 			if (CFS_TIMEOUT(fscp, error)) {
6220 				connected = 1;
6221 				continue;
6222 			}
6223 			if (error)
6224 				break;
6225 
6226 			namlen = strlen(nm);
6227 			if (namlen == 0) {
6228 				error = EINVAL;
6229 				break;
6230 			}
6231 
6232 			/* cannot remove . and .. */
6233 			if (nm[0] == '.') {
6234 				if (namlen == 1) {
6235 					error = EINVAL;
6236 					break;
6237 				} else if (namlen == 2 && nm[1] == '.') {
6238 					error = EEXIST;
6239 					break;
6240 				}
6241 			}
6242 
6243 		}
6244 
6245 		/* get the cnode of the dir to remove */
6246 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6247 		if (error) {
6248 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6249 				if (CFS_TIMEOUT(fscp, error)) {
6250 					cachefs_cd_release(fscp);
6251 					held = 0;
6252 					cachefs_cd_timedout(fscp);
6253 					connected = 0;
6254 					continue;
6255 				}
6256 			} else {
6257 				if (CFS_TIMEOUT(fscp, error)) {
6258 					connected = 1;
6259 					continue;
6260 				}
6261 			}
6262 			break;
6263 		}
6264 
6265 		/* must be a dir */
6266 		if (vp->v_type != VDIR) {
6267 			error = ENOTDIR;
6268 			break;
6269 		}
6270 
6271 		/* must not be current dir */
6272 		if (VOP_CMP(vp, cdir)) {
6273 			error = EINVAL;
6274 			break;
6275 		}
6276 
6277 		/* see ufs_dirremove for why this is done, mount race */
6278 		if (vn_vfswlock(vp)) {
6279 			error = EBUSY;
6280 			break;
6281 		}
6282 		vfslock = 1;
6283 		if (vn_mountedvfs(vp) != NULL) {
6284 			error = EBUSY;
6285 			break;
6286 		}
6287 
6288 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6289 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6290 				cr, vp);
6291 			if (CFS_TIMEOUT(fscp, error)) {
6292 				cachefs_cd_release(fscp);
6293 				held = 0;
6294 				cachefs_cd_timedout(fscp);
6295 				connected = 0;
6296 				continue;
6297 			}
6298 		} else {
6299 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6300 				cr, vp);
6301 			if (CFS_TIMEOUT(fscp, error)) {
6302 				connected = 1;
6303 				continue;
6304 			}
6305 		}
6306 		break;
6307 	}
6308 
6309 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6310 		ino64_t fileno = 0;
6311 		fid_t *fidp = NULL;
6312 		cnode_t *cp = NULL;
6313 		if (vp)
6314 			cp = VTOC(vp);
6315 
6316 		if (cp != NULL) {
6317 			fidp = &cp->c_metadata.md_cookie;
6318 			fileno = cp->c_id.cid_fileno;
6319 		}
6320 
6321 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6322 		    fidp, fileno, crgetuid(cr));
6323 	}
6324 
6325 	if (held) {
6326 		cachefs_cd_release(fscp);
6327 	}
6328 
6329 	if (vfslock)
6330 		vn_vfsunlock(vp);
6331 
6332 	if (vp)
6333 		VN_RELE(vp);
6334 
6335 #ifdef CFS_CD_DEBUG
6336 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6337 #endif
6338 out:
6339 #ifdef CFSDEBUG
6340 	CFS_DEBUG(CFSDEBUG_VOPS)
6341 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6342 #endif
6343 
6344 	return (error);
6345 }
6346 
6347 static int
6348 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6349     vnode_t *vp)
6350 {
6351 	cnode_t *dcp = VTOC(dvp);
6352 	cnode_t *cp = VTOC(vp);
6353 	int error = 0;
6354 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6355 
6356 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6357 	mutex_enter(&dcp->c_statelock);
6358 	mutex_enter(&cp->c_statelock);
6359 
6360 	if (dcp->c_backvp == NULL) {
6361 		error = cachefs_getbackvp(fscp, dcp);
6362 		if (error) {
6363 			goto out;
6364 		}
6365 	}
6366 
6367 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6368 	if (error)
6369 		goto out;
6370 
6371 	/* rmdir on the back fs */
6372 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6373 		("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6374 		"name %s\n", dcp, dcp->c_backvp, nm));
6375 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr);
6376 	if (error)
6377 		goto out;
6378 
6379 	/* if the dir is populated, remove the entry from it */
6380 	if (CFS_ISFS_NONSHARED(fscp) &&
6381 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6382 		error = cachefs_dir_rmentry(dcp, nm);
6383 		if (error) {
6384 			cachefs_nocache(dcp);
6385 			error = 0;
6386 		}
6387 	}
6388 
6389 	/*
6390 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6391 	 * flag on the cnode. The cached object will then be destroyed
6392 	 * at inactive time where the chickens come home to roost :-)
6393 	 * The link cnt for directories is bumped down by 2 'cause the "."
6394 	 * entry has to be elided too ! The link cnt for the parent goes down
6395 	 * by 1 (because of "..").
6396 	 */
6397 	cp->c_attr.va_nlink -= 2;
6398 	dcp->c_attr.va_nlink--;
6399 	if (cp->c_attr.va_nlink == 0) {
6400 		cp->c_flags |= CN_DESTROY;
6401 	} else {
6402 		cp->c_flags |= CN_UPDATED;
6403 	}
6404 	dcp->c_flags |= CN_UPDATED;
6405 
6406 	dnlc_purge_vp(vp);
6407 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6408 
6409 out:
6410 	mutex_exit(&cp->c_statelock);
6411 	mutex_exit(&dcp->c_statelock);
6412 	rw_exit(&dcp->c_rwlock);
6413 
6414 	return (error);
6415 }
6416 
6417 static int
6418 /*ARGSUSED*/
6419 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6420     cred_t *cr, vnode_t *vp)
6421 {
6422 	cnode_t *dcp = VTOC(dvp);
6423 	cnode_t *cp = VTOC(vp);
6424 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6425 	int error = 0;
6426 	off_t commit = 0;
6427 	timestruc_t current_time;
6428 
6429 	if (CFS_ISFS_WRITE_AROUND(fscp))
6430 		return (ETIMEDOUT);
6431 
6432 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6433 	mutex_enter(&dcp->c_statelock);
6434 	mutex_enter(&cp->c_statelock);
6435 
6436 	/* both directories must be populated */
6437 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6438 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6439 		error = ETIMEDOUT;
6440 		goto out;
6441 	}
6442 
6443 	/* if sticky bit set on the dir, more access checks to perform */
6444 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6445 		goto out;
6446 	}
6447 
6448 	/* make sure dir is empty */
6449 	if (cp->c_attr.va_nlink > 2) {
6450 		error = cachefs_dir_empty(cp);
6451 		if (error) {
6452 			if (error == ENOTDIR)
6453 				error = ETIMEDOUT;
6454 			goto out;
6455 		}
6456 		cachefs_modified(cp);
6457 	}
6458 	cachefs_modified(dcp);
6459 
6460 	/* log the operation */
6461 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6462 	if (commit == 0) {
6463 		error = ENOSPC;
6464 		goto out;
6465 	}
6466 
6467 	/* remove name from parent dir */
6468 	error = cachefs_dir_rmentry(dcp, nm);
6469 	if (error == ENOTDIR) {
6470 		error = ETIMEDOUT;
6471 		goto out;
6472 	}
6473 	if (error)
6474 		goto out;
6475 
6476 	gethrestime(&current_time);
6477 
6478 	/* update deleted dir values */
6479 	cp->c_attr.va_nlink -= 2;
6480 	if (cp->c_attr.va_nlink == 0)
6481 		cp->c_flags |= CN_DESTROY;
6482 	else {
6483 		cp->c_metadata.md_localctime = current_time;
6484 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6485 		cp->c_flags |= CN_UPDATED;
6486 	}
6487 
6488 	/* update parent values */
6489 	dcp->c_metadata.md_localctime = current_time;
6490 	dcp->c_metadata.md_localmtime = current_time;
6491 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6492 	dcp->c_attr.va_nlink--;
6493 	dcp->c_flags |= CN_UPDATED;
6494 
6495 out:
6496 	mutex_exit(&cp->c_statelock);
6497 	mutex_exit(&dcp->c_statelock);
6498 	rw_exit(&dcp->c_rwlock);
6499 	if (commit) {
6500 		/* commit the log entry */
6501 		if (cachefs_dlog_commit(fscp, commit, error)) {
6502 			/*EMPTY*/
6503 			/* XXX bob: fix on panic */
6504 		}
6505 		dnlc_purge_vp(vp);
6506 	}
6507 	return (error);
6508 }
6509 
6510 static int
6511 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6512     char *tnm, cred_t *cr)
6513 {
6514 	cnode_t *dcp = VTOC(dvp);
6515 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6516 	cachefscache_t *cachep = fscp->fs_cache;
6517 	int error = 0;
6518 	int held = 0;
6519 	int connected = 0;
6520 
6521 #ifdef CFSDEBUG
6522 	CFS_DEBUG(CFSDEBUG_VOPS)
6523 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6524 		    (void *)dvp, lnm, tnm);
6525 #endif
6526 
6527 	if (getzoneid() != GLOBAL_ZONEID) {
6528 		error = EPERM;
6529 		goto out;
6530 	}
6531 
6532 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6533 		ASSERT(dcp->c_flags & CN_NOCACHE);
6534 
6535 	/*
6536 	 * Cachefs only provides pass-through support for NFSv4,
6537 	 * and all vnode operations are passed through to the
6538 	 * back file system. For NFSv4 pass-through to work, only
6539 	 * connected operation is supported, the cnode backvp must
6540 	 * exist, and cachefs optional (eg., disconnectable) flags
6541 	 * are turned off. Assert these conditions to ensure that
6542 	 * the backfilesystem is called for the symlink operation.
6543 	 */
6544 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6545 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6546 
6547 	for (;;) {
6548 		/* get (or renew) access to the file system */
6549 		if (held) {
6550 			/* Won't loop with NFSv4 connected behavior */
6551 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6552 			rw_exit(&dcp->c_rwlock);
6553 			cachefs_cd_release(fscp);
6554 			held = 0;
6555 		}
6556 		error = cachefs_cd_access(fscp, connected, 1);
6557 		if (error)
6558 			break;
6559 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6560 		held = 1;
6561 
6562 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6563 			error = cachefs_symlink_connected(dvp, lnm, tva,
6564 				tnm, cr);
6565 			if (CFS_TIMEOUT(fscp, error)) {
6566 				rw_exit(&dcp->c_rwlock);
6567 				cachefs_cd_release(fscp);
6568 				held = 0;
6569 				cachefs_cd_timedout(fscp);
6570 				connected = 0;
6571 				continue;
6572 			}
6573 		} else {
6574 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6575 				tnm, cr);
6576 			if (CFS_TIMEOUT(fscp, error)) {
6577 				connected = 1;
6578 				continue;
6579 			}
6580 		}
6581 		break;
6582 	}
6583 
6584 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6585 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6586 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6587 		    crgetuid(cr), (uint_t)strlen(tnm));
6588 
6589 	if (held) {
6590 		rw_exit(&dcp->c_rwlock);
6591 		cachefs_cd_release(fscp);
6592 	}
6593 
6594 #ifdef CFS_CD_DEBUG
6595 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6596 #endif
6597 out:
6598 #ifdef CFSDEBUG
6599 	CFS_DEBUG(CFSDEBUG_VOPS)
6600 		printf("cachefs_symlink: EXIT error = %d\n", error);
6601 #endif
6602 	return (error);
6603 }
6604 
6605 static int
6606 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6607     char *tnm, cred_t *cr)
6608 {
6609 	cnode_t *dcp = VTOC(dvp);
6610 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6611 	int error = 0;
6612 	vnode_t *backvp = NULL;
6613 	cnode_t *newcp = NULL;
6614 	struct vattr va;
6615 	struct fid cookie;
6616 	cfs_cid_t cid;
6617 	uint32_t valid_fid;
6618 
6619 	mutex_enter(&dcp->c_statelock);
6620 
6621 	if (dcp->c_backvp == NULL) {
6622 		error = cachefs_getbackvp(fscp, dcp);
6623 		if (error) {
6624 			cachefs_nocache(dcp);
6625 			mutex_exit(&dcp->c_statelock);
6626 			goto out;
6627 		}
6628 	}
6629 
6630 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6631 	if (error) {
6632 		mutex_exit(&dcp->c_statelock);
6633 		goto out;
6634 	}
6635 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6636 		("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6637 		"lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6638 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr);
6639 	if (error) {
6640 		mutex_exit(&dcp->c_statelock);
6641 		goto out;
6642 	}
6643 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6644 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6645 		cachefs_nocache(dcp);
6646 		mutex_exit(&dcp->c_statelock);
6647 		goto out;
6648 	}
6649 
6650 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6651 
6652 	/* lookup the symlink we just created and get its fid and attrs */
6653 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr);
6654 	if (backvp == NULL) {
6655 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6656 			cachefs_nocache(dcp);
6657 		mutex_exit(&dcp->c_statelock);
6658 		goto out;
6659 	}
6660 
6661 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6662 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6663 	if (error) {
6664 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6665 		error = 0;
6666 		cachefs_nocache(dcp);
6667 		mutex_exit(&dcp->c_statelock);
6668 		goto out;
6669 	}
6670 	cid.cid_fileno = va.va_nodeid;
6671 	cid.cid_flags = 0;
6672 
6673 	/* if the dir is cached, add the symlink to it */
6674 	if (CFS_ISFS_NONSHARED(fscp) &&
6675 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6676 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6677 		if (error) {
6678 			cachefs_nocache(dcp);
6679 			error = 0;
6680 		}
6681 	}
6682 	mutex_exit(&dcp->c_statelock);
6683 
6684 	/* make the cnode for the sym link */
6685 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6686 						&va, backvp, cr, 0, &newcp);
6687 	if (error) {
6688 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6689 		cachefs_nocache(dcp);
6690 		error = 0;
6691 		goto out;
6692 	}
6693 
6694 	/* try to cache the symlink contents */
6695 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6696 	mutex_enter(&newcp->c_statelock);
6697 
6698 	/*
6699 	 * try to cache the sym link, note that its a noop if NOCACHE
6700 	 * or NFSv4 is set
6701 	 */
6702 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6703 	if (error) {
6704 		cachefs_nocache(newcp);
6705 		error = 0;
6706 	}
6707 	mutex_exit(&newcp->c_statelock);
6708 	rw_exit(&newcp->c_rwlock);
6709 
6710 out:
6711 	if (backvp)
6712 		VN_RELE(backvp);
6713 	if (newcp)
6714 		VN_RELE(CTOV(newcp));
6715 	return (error);
6716 }
6717 
6718 static int
6719 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6720     char *tnm, cred_t *cr)
6721 {
6722 	cnode_t *dcp = VTOC(dvp);
6723 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6724 	int error;
6725 	cnode_t *newcp = NULL;
6726 	struct vattr va;
6727 	timestruc_t current_time;
6728 	off_t commit = 0;
6729 
6730 	if (CFS_ISFS_WRITE_AROUND(fscp))
6731 		return (ETIMEDOUT);
6732 
6733 	mutex_enter(&dcp->c_statelock);
6734 
6735 	/* check permissions */
6736 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6737 		mutex_exit(&dcp->c_statelock);
6738 		goto out;
6739 	}
6740 
6741 	/* the directory front file must be populated */
6742 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6743 		error = ETIMEDOUT;
6744 		mutex_exit(&dcp->c_statelock);
6745 		goto out;
6746 	}
6747 
6748 	/* make sure lnm does not already exist in the directory */
6749 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6750 	if (error == ENOTDIR) {
6751 		error = ETIMEDOUT;
6752 		mutex_exit(&dcp->c_statelock);
6753 		goto out;
6754 	}
6755 	if (error != ENOENT) {
6756 		error = EEXIST;
6757 		mutex_exit(&dcp->c_statelock);
6758 		goto out;
6759 	}
6760 
6761 	/* make up a reasonable set of attributes */
6762 	cachefs_attr_setup(tva, &va, dcp, cr);
6763 	va.va_type = VLNK;
6764 	va.va_mode |= S_IFLNK;
6765 	va.va_size = strlen(tnm);
6766 
6767 	mutex_exit(&dcp->c_statelock);
6768 
6769 	/* create the cnode */
6770 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6771 	if (error)
6772 		goto out;
6773 
6774 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6775 	mutex_enter(&newcp->c_statelock);
6776 
6777 	error = cachefs_dlog_cidmap(fscp);
6778 	if (error) {
6779 		mutex_exit(&newcp->c_statelock);
6780 		rw_exit(&newcp->c_rwlock);
6781 		error = ENOSPC;
6782 		goto out;
6783 	}
6784 
6785 	cachefs_creategid(dcp, newcp, tva, cr);
6786 	mutex_enter(&dcp->c_statelock);
6787 	cachefs_createacl(dcp, newcp);
6788 	mutex_exit(&dcp->c_statelock);
6789 	gethrestime(&current_time);
6790 	newcp->c_metadata.md_vattr.va_atime = current_time;
6791 	newcp->c_metadata.md_localctime = current_time;
6792 	newcp->c_metadata.md_localmtime = current_time;
6793 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6794 	    MD_LOCALCTIME;
6795 	newcp->c_flags |= CN_UPDATED;
6796 
6797 	/* log the operation */
6798 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6799 	if (commit == 0) {
6800 		mutex_exit(&newcp->c_statelock);
6801 		rw_exit(&newcp->c_rwlock);
6802 		error = ENOSPC;
6803 		goto out;
6804 	}
6805 
6806 	/* store the symlink contents */
6807 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6808 	if (error) {
6809 		mutex_exit(&newcp->c_statelock);
6810 		rw_exit(&newcp->c_rwlock);
6811 		goto out;
6812 	}
6813 	if (cachefs_modified_alloc(newcp)) {
6814 		mutex_exit(&newcp->c_statelock);
6815 		rw_exit(&newcp->c_rwlock);
6816 		error = ENOSPC;
6817 		goto out;
6818 	}
6819 
6820 	/*
6821 	 * write the metadata now rather than waiting until
6822 	 * inactive so that if there's no space we can let
6823 	 * the caller know.
6824 	 */
6825 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6826 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6827 			(void) filegrp_allocattr(newcp->c_filegrp);
6828 		}
6829 		error = filegrp_create_metadata(newcp->c_filegrp,
6830 		    &newcp->c_metadata, &newcp->c_id);
6831 		if (error) {
6832 			mutex_exit(&newcp->c_statelock);
6833 			rw_exit(&newcp->c_rwlock);
6834 			goto out;
6835 		}
6836 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6837 	}
6838 	error = filegrp_write_metadata(newcp->c_filegrp,
6839 	    &newcp->c_id, &newcp->c_metadata);
6840 	if (error) {
6841 		mutex_exit(&newcp->c_statelock);
6842 		rw_exit(&newcp->c_rwlock);
6843 		goto out;
6844 	}
6845 	mutex_exit(&newcp->c_statelock);
6846 	rw_exit(&newcp->c_rwlock);
6847 
6848 	mutex_enter(&dcp->c_statelock);
6849 
6850 	/* enter the new file in the directory */
6851 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6852 		error = ETIMEDOUT;
6853 		mutex_exit(&dcp->c_statelock);
6854 		goto out;
6855 	}
6856 	cachefs_modified(dcp);
6857 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6858 		&newcp->c_id, SM_ASYNC);
6859 	if (error) {
6860 		mutex_exit(&dcp->c_statelock);
6861 		goto out;
6862 	}
6863 
6864 	/* update parent dir times */
6865 	dcp->c_metadata.md_localctime = current_time;
6866 	dcp->c_metadata.md_localmtime = current_time;
6867 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6868 	dcp->c_flags |= CN_UPDATED;
6869 	mutex_exit(&dcp->c_statelock);
6870 
6871 out:
6872 	if (commit) {
6873 		/* commit the log entry */
6874 		if (cachefs_dlog_commit(fscp, commit, error)) {
6875 			/*EMPTY*/
6876 			/* XXX bob: fix on panic */
6877 		}
6878 	}
6879 
6880 	if (error) {
6881 		if (newcp) {
6882 			mutex_enter(&newcp->c_statelock);
6883 			newcp->c_flags |= CN_DESTROY;
6884 			mutex_exit(&newcp->c_statelock);
6885 		}
6886 	}
6887 	if (newcp) {
6888 		VN_RELE(CTOV(newcp));
6889 	}
6890 
6891 	return (error);
6892 }
6893 
6894 static int
6895 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
6896 {
6897 	cnode_t *dcp = VTOC(vp);
6898 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6899 	cachefscache_t *cachep = fscp->fs_cache;
6900 	int error = 0;
6901 	int held = 0;
6902 	int connected = 0;
6903 
6904 #ifdef CFSDEBUG
6905 	CFS_DEBUG(CFSDEBUG_VOPS)
6906 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6907 #endif
6908 	if (getzoneid() != GLOBAL_ZONEID) {
6909 		error = EPERM;
6910 		goto out;
6911 	}
6912 
6913 	/*
6914 	 * Cachefs only provides pass-through support for NFSv4,
6915 	 * and all vnode operations are passed through to the
6916 	 * back file system. For NFSv4 pass-through to work, only
6917 	 * connected operation is supported, the cnode backvp must
6918 	 * exist, and cachefs optional (eg., disconnectable) flags
6919 	 * are turned off. Assert these conditions to ensure that
6920 	 * the backfilesystem is called for the readdir operation.
6921 	 */
6922 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6923 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6924 
6925 	for (;;) {
6926 		/* get (or renew) access to the file system */
6927 		if (held) {
6928 			/* Won't loop with NFSv4 connected behavior */
6929 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6930 			rw_exit(&dcp->c_rwlock);
6931 			cachefs_cd_release(fscp);
6932 			held = 0;
6933 		}
6934 		error = cachefs_cd_access(fscp, connected, 0);
6935 		if (error)
6936 			break;
6937 		rw_enter(&dcp->c_rwlock, RW_READER);
6938 		held = 1;
6939 
6940 		/* quit if link count of zero (posix) */
6941 		if (dcp->c_attr.va_nlink == 0) {
6942 			if (eofp)
6943 				*eofp = 1;
6944 			error = 0;
6945 			break;
6946 		}
6947 
6948 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6949 			error = cachefs_readdir_connected(vp, uiop, cr,
6950 			    eofp);
6951 			if (CFS_TIMEOUT(fscp, error)) {
6952 				rw_exit(&dcp->c_rwlock);
6953 				cachefs_cd_release(fscp);
6954 				held = 0;
6955 				cachefs_cd_timedout(fscp);
6956 				connected = 0;
6957 				continue;
6958 			}
6959 		} else {
6960 			error = cachefs_readdir_disconnected(vp, uiop, cr,
6961 			    eofp);
6962 			if (CFS_TIMEOUT(fscp, error)) {
6963 				if (cachefs_cd_access_miss(fscp)) {
6964 					error = cachefs_readdir_connected(vp,
6965 					    uiop, cr, eofp);
6966 					if (!CFS_TIMEOUT(fscp, error))
6967 						break;
6968 					delay(5*hz);
6969 					connected = 0;
6970 					continue;
6971 				}
6972 				connected = 1;
6973 				continue;
6974 			}
6975 		}
6976 		break;
6977 	}
6978 
6979 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
6980 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
6981 		&dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6982 		crgetuid(cr), uiop->uio_loffset, *eofp);
6983 
6984 	if (held) {
6985 		rw_exit(&dcp->c_rwlock);
6986 		cachefs_cd_release(fscp);
6987 	}
6988 
6989 #ifdef CFS_CD_DEBUG
6990 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6991 #endif
6992 out:
6993 #ifdef CFSDEBUG
6994 	CFS_DEBUG(CFSDEBUG_VOPS)
6995 		printf("cachefs_readdir: EXIT error = %d\n", error);
6996 #endif
6997 
6998 	return (error);
6999 }
7000 
7001 static int
7002 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7003 {
7004 	cnode_t *dcp = VTOC(vp);
7005 	int error;
7006 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7007 	struct cachefs_req *rp;
7008 
7009 	mutex_enter(&dcp->c_statelock);
7010 
7011 	/* check directory consistency */
7012 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7013 	if (error)
7014 		goto out;
7015 	dcp->c_usage++;
7016 
7017 	/* if dir was modified, toss old contents */
7018 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7019 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7020 		cachefs_inval_object(dcp);
7021 	}
7022 
7023 	error = 0;
7024 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7025 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7026 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7027 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7028 
7029 		if (cachefs_async_okay()) {
7030 
7031 			/*
7032 			 * Set up asynchronous request to fill this
7033 			 * directory.
7034 			 */
7035 
7036 			dcp->c_flags |= CN_ASYNC_POPULATE;
7037 
7038 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7039 			rp->cfs_cmd = CFS_POPULATE;
7040 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7041 			rp->cfs_cr = cr;
7042 
7043 			crhold(cr);
7044 			VN_HOLD(vp);
7045 
7046 			cachefs_addqueue(rp, &fscp->fs_workq);
7047 		} else {
7048 			error = cachefs_dir_fill(dcp, cr);
7049 			if (error != 0)
7050 				cachefs_nocache(dcp);
7051 		}
7052 	}
7053 
7054 	/* if front file is populated */
7055 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7056 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7057 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7058 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7059 		error = cachefs_dir_read(dcp, uiop, eofp);
7060 		if (error == 0)
7061 			fscp->fs_stats.st_hits++;
7062 	}
7063 
7064 	/* if front file could not be used */
7065 	if ((error != 0) ||
7066 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7067 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7068 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7069 
7070 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7071 			!CFS_ISFS_BACKFS_NFSV4(fscp))
7072 			cachefs_nocache(dcp);
7073 
7074 		/* get the back vp */
7075 		if (dcp->c_backvp == NULL) {
7076 			error = cachefs_getbackvp(fscp, dcp);
7077 			if (error)
7078 				goto out;
7079 		}
7080 
7081 		if (fscp->fs_inum_size > 0) {
7082 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7083 		} else {
7084 			/* do the dir read from the back fs */
7085 			(void) VOP_RWLOCK(dcp->c_backvp,
7086 						V_WRITELOCK_FALSE, NULL);
7087 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7088 				("cachefs_readdir (nfsv4): "
7089 				"dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7090 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp);
7091 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7092 		}
7093 
7094 		if (error == 0)
7095 			fscp->fs_stats.st_misses++;
7096 	}
7097 
7098 out:
7099 	mutex_exit(&dcp->c_statelock);
7100 
7101 	return (error);
7102 }
7103 
7104 static int
7105 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7106 {
7107 	int error = 0;
7108 	fscache_t *fscp = C_TO_FSCACHE(cp);
7109 	caddr_t buffy = NULL;
7110 	int buffysize = MAXBSIZE;
7111 	caddr_t chrp, end;
7112 	ino64_t newinum;
7113 	struct dirent64 *de;
7114 	uio_t uioin;
7115 	iovec_t iov;
7116 
7117 	ASSERT(cp->c_backvp != NULL);
7118 	ASSERT(fscp->fs_inum_size > 0);
7119 
7120 	if (uiop->uio_resid < buffysize)
7121 		buffysize = (int)uiop->uio_resid;
7122 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7123 
7124 	iov.iov_base = buffy;
7125 	iov.iov_len = buffysize;
7126 	uioin.uio_iov = &iov;
7127 	uioin.uio_iovcnt = 1;
7128 	uioin.uio_segflg = UIO_SYSSPACE;
7129 	uioin.uio_fmode = 0;
7130 	uioin.uio_extflg = UIO_COPY_CACHED;
7131 	uioin.uio_loffset = uiop->uio_loffset;
7132 	uioin.uio_resid = buffysize;
7133 
7134 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7135 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp);
7136 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7137 
7138 	if (error != 0)
7139 		goto out;
7140 
7141 	end = buffy + buffysize - uioin.uio_resid;
7142 
7143 	mutex_exit(&cp->c_statelock);
7144 	mutex_enter(&fscp->fs_fslock);
7145 
7146 
7147 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7148 		de = (dirent64_t *)chrp;
7149 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7150 		if (newinum == 0)
7151 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7152 		de->d_ino = newinum;
7153 	}
7154 	mutex_exit(&fscp->fs_fslock);
7155 	mutex_enter(&cp->c_statelock);
7156 
7157 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7158 	uiop->uio_loffset = uioin.uio_loffset;
7159 
7160 out:
7161 
7162 	if (buffy != NULL)
7163 		cachefs_kmem_free(buffy, buffysize);
7164 
7165 	return (error);
7166 }
7167 
7168 static int
7169 /*ARGSUSED*/
7170 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7171     int *eofp)
7172 {
7173 	cnode_t *dcp = VTOC(vp);
7174 	int error;
7175 
7176 	mutex_enter(&dcp->c_statelock);
7177 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7178 		error = ETIMEDOUT;
7179 	} else {
7180 		error = cachefs_dir_read(dcp, uiop, eofp);
7181 		if (error == ENOTDIR)
7182 			error = ETIMEDOUT;
7183 	}
7184 	mutex_exit(&dcp->c_statelock);
7185 
7186 	return (error);
7187 }
7188 
7189 static int
7190 cachefs_fid(struct vnode *vp, struct fid *fidp)
7191 {
7192 	int error = 0;
7193 	struct cnode *cp = VTOC(vp);
7194 	fscache_t *fscp = C_TO_FSCACHE(cp);
7195 
7196 	/*
7197 	 * Cachefs only provides pass-through support for NFSv4,
7198 	 * and all vnode operations are passed through to the
7199 	 * back file system. For NFSv4 pass-through to work, only
7200 	 * connected operation is supported, the cnode backvp must
7201 	 * exist, and cachefs optional (eg., disconnectable) flags
7202 	 * are turned off. Assert these conditions, then bail
7203 	 * as  NFSv4 doesn't support VOP_FID.
7204 	 */
7205 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7206 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7207 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7208 		return (ENOTSUP);
7209 	}
7210 
7211 	mutex_enter(&cp->c_statelock);
7212 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7213 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7214 		error = ENOSPC;
7215 	} else {
7216 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7217 		    cp->c_metadata.md_cookie.fid_len);
7218 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7219 	}
7220 	mutex_exit(&cp->c_statelock);
7221 	return (error);
7222 }
7223 
7224 /* ARGSUSED2 */
7225 static int
7226 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7227 {
7228 	cnode_t *cp = VTOC(vp);
7229 
7230 	/*
7231 	 * XXX - This is ifdef'ed out for now. The problem -
7232 	 * getdents() acquires the read version of rwlock, then we come
7233 	 * into cachefs_readdir() and that wants to acquire the write version
7234 	 * of this lock (if its going to populate the directory). This is
7235 	 * a problem, this can be solved by introducing another lock in the
7236 	 * cnode.
7237 	 */
7238 /* XXX */
7239 	if (vp->v_type != VREG)
7240 		return (-1);
7241 	if (write_lock)
7242 		rw_enter(&cp->c_rwlock, RW_WRITER);
7243 	else
7244 		rw_enter(&cp->c_rwlock, RW_READER);
7245 	return (write_lock);
7246 }
7247 
7248 /* ARGSUSED */
7249 static void
7250 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7251 {
7252 	cnode_t *cp = VTOC(vp);
7253 	if (vp->v_type != VREG)
7254 		return;
7255 	rw_exit(&cp->c_rwlock);
7256 }
7257 
7258 /* ARGSUSED */
7259 static int
7260 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp)
7261 {
7262 	return (0);
7263 }
7264 
7265 static int cachefs_lostpage = 0;
7266 /*
7267  * Return all the pages from [off..off+len] in file
7268  */
7269 static int
7270 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7271 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7272 	caddr_t addr, enum seg_rw rw, cred_t *cr)
7273 {
7274 	cnode_t *cp = VTOC(vp);
7275 	int error;
7276 	fscache_t *fscp = C_TO_FSCACHE(cp);
7277 	cachefscache_t *cachep = fscp->fs_cache;
7278 	int held = 0;
7279 	int connected = 0;
7280 
7281 #ifdef CFSDEBUG
7282 	u_offset_t offx = (u_offset_t)off;
7283 
7284 	CFS_DEBUG(CFSDEBUG_VOPS)
7285 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7286 		    (void *)vp, offx, len, rw);
7287 #endif
7288 	if (getzoneid() != GLOBAL_ZONEID) {
7289 		error = EPERM;
7290 		goto out;
7291 	}
7292 
7293 	if (vp->v_flag & VNOMAP) {
7294 		error = ENOSYS;
7295 		goto out;
7296 	}
7297 
7298 	/* Call backfilesystem if NFSv4 */
7299 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7300 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7301 						plsz, seg, addr, rw, cr);
7302 		goto out;
7303 	}
7304 
7305 	/* XXX sam: make this do an async populate? */
7306 	if (pl == NULL) {
7307 		error = 0;
7308 		goto out;
7309 	}
7310 	if (protp != NULL)
7311 		*protp = PROT_ALL;
7312 
7313 	for (;;) {
7314 		/* get (or renew) access to the file system */
7315 		if (held) {
7316 			cachefs_cd_release(fscp);
7317 			held = 0;
7318 		}
7319 		error = cachefs_cd_access(fscp, connected, 0);
7320 		if (error)
7321 			break;
7322 		held = 1;
7323 
7324 		/*
7325 		 * If we are getting called as a side effect of a
7326 		 * cachefs_write()
7327 		 * operation the local file size might not be extended yet.
7328 		 * In this case we want to be able to return pages of zeroes.
7329 		 */
7330 		if ((u_offset_t)off + len >
7331 			((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7332 			if (seg != segkmap) {
7333 				error = EFAULT;
7334 				break;
7335 			}
7336 		}
7337 		if (len <= PAGESIZE)
7338 			error = cachefs_getapage(vp, (u_offset_t)off, len,
7339 			    protp, pl, plsz, seg, addr, rw, cr);
7340 		else
7341 			error = pvn_getpages(cachefs_getapage, vp,
7342 			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7343 			    rw, cr);
7344 		if (error == 0)
7345 			break;
7346 
7347 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7348 		    error == EAGAIN) {
7349 			connected = 0;
7350 			continue;
7351 		}
7352 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7353 			if (CFS_TIMEOUT(fscp, error)) {
7354 				cachefs_cd_release(fscp);
7355 				held = 0;
7356 				cachefs_cd_timedout(fscp);
7357 				connected = 0;
7358 				continue;
7359 			}
7360 		} else {
7361 			if (CFS_TIMEOUT(fscp, error)) {
7362 				if (cachefs_cd_access_miss(fscp)) {
7363 					if (len <= PAGESIZE)
7364 						error = cachefs_getapage_back(
7365 						    vp, (u_offset_t)off,
7366 						    len, protp, pl,
7367 						    plsz, seg, addr, rw, cr);
7368 					else
7369 						error = pvn_getpages(
7370 						    cachefs_getapage_back, vp,
7371 						    (u_offset_t)off, len,
7372 						    protp, pl,
7373 						    plsz, seg, addr, rw, cr);
7374 					if (!CFS_TIMEOUT(fscp, error) &&
7375 					    (error != EAGAIN))
7376 						break;
7377 					delay(5*hz);
7378 					connected = 0;
7379 					continue;
7380 				}
7381 				connected = 1;
7382 				continue;
7383 			}
7384 		}
7385 		break;
7386 	}
7387 
7388 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7389 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7390 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7391 		    crgetuid(cr), off, len);
7392 
7393 	if (held) {
7394 		cachefs_cd_release(fscp);
7395 	}
7396 
7397 out:
7398 #ifdef CFS_CD_DEBUG
7399 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7400 #endif
7401 #ifdef CFSDEBUG
7402 	CFS_DEBUG(CFSDEBUG_VOPS)
7403 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7404 		    (void *)vp, error);
7405 #endif
7406 	return (error);
7407 }
7408 
7409 /*
7410  * cachefs_getpage_backfs_nfsv4
7411  *
7412  * Call NFSv4 back filesystem to handle the getpage (cachefs
7413  * pass-through support for NFSv4).
7414  */
7415 static int
7416 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7417 			uint_t *protp, struct page *pl[], size_t plsz,
7418 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7419 			cred_t *cr)
7420 {
7421 	cnode_t *cp = VTOC(vp);
7422 	fscache_t *fscp = C_TO_FSCACHE(cp);
7423 	vnode_t *backvp;
7424 	int error;
7425 
7426 	/*
7427 	 * For NFSv4 pass-through to work, only connected operation is
7428 	 * supported, the cnode backvp must exist, and cachefs optional
7429 	 * (eg., disconnectable) flags are turned off. Assert these
7430 	 * conditions for the getpage operation.
7431 	 */
7432 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7433 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7434 
7435 	/* Call backfs vnode op after extracting backvp */
7436 	mutex_enter(&cp->c_statelock);
7437 	backvp = cp->c_backvp;
7438 	mutex_exit(&cp->c_statelock);
7439 
7440 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7441 		("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7442 		cp, backvp));
7443 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7444 				    addr, rw, cr);
7445 
7446 	return (error);
7447 }
7448 
7449 /*
7450  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7451  */
7452 /*ARGSUSED*/
7453 static int
7454 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7455 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7456 	enum seg_rw rw, cred_t *cr)
7457 {
7458 	cnode_t *cp = VTOC(vp);
7459 	page_t **ppp, *pp = NULL;
7460 	fscache_t *fscp = C_TO_FSCACHE(cp);
7461 	cachefscache_t *cachep = fscp->fs_cache;
7462 	int error = 0;
7463 	struct page **ourpl;
7464 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7465 	int index = 0;
7466 	int downgrade;
7467 	int have_statelock = 0;
7468 	u_offset_t popoff;
7469 	size_t popsize = 0;
7470 
7471 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7472 
7473 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7474 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7475 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7476 	else
7477 		ourpl = ourstackpl;
7478 
7479 	ourpl[0] = NULL;
7480 	off = off & (offset_t)PAGEMASK;
7481 again:
7482 	/*
7483 	 * Look for the page
7484 	 */
7485 	if (page_exists(vp, off) == 0) {
7486 		/*
7487 		 * Need to do work to get the page.
7488 		 * Grab our lock because we are going to
7489 		 * modify the state of the cnode.
7490 		 */
7491 		if (! have_statelock) {
7492 			mutex_enter(&cp->c_statelock);
7493 			have_statelock = 1;
7494 		}
7495 		/*
7496 		 * If we're in NOCACHE mode, we will need a backvp
7497 		 */
7498 		if (cp->c_flags & CN_NOCACHE) {
7499 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7500 				error = ETIMEDOUT;
7501 				goto out;
7502 			}
7503 			if (cp->c_backvp == NULL) {
7504 				error = cachefs_getbackvp(fscp, cp);
7505 				if (error)
7506 					goto out;
7507 			}
7508 			error = VOP_GETPAGE(cp->c_backvp, off,
7509 					PAGESIZE, protp, ourpl, PAGESIZE, seg,
7510 					addr, S_READ, cr);
7511 			/*
7512 			 * backfs returns EFAULT when we are trying for a
7513 			 * page beyond EOF but cachefs has the knowledge that
7514 			 * it is not beyond EOF be cause cp->c_size is
7515 			 * greater then the offset requested.
7516 			 */
7517 			if (error == EFAULT) {
7518 				error = 0;
7519 				pp = page_create_va(vp, off, PAGESIZE,
7520 				    PG_EXCL | PG_WAIT, seg, addr);
7521 				if (pp == NULL)
7522 					goto again;
7523 				pagezero(pp, 0, PAGESIZE);
7524 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7525 				goto out;
7526 			}
7527 			if (error)
7528 				goto out;
7529 			goto getpages;
7530 		}
7531 		/*
7532 		 * We need a front file. If we can't get it,
7533 		 * put the cnode in NOCACHE mode and try again.
7534 		 */
7535 		if (cp->c_frontvp == NULL) {
7536 			error = cachefs_getfrontfile(cp);
7537 			if (error) {
7538 				cachefs_nocache(cp);
7539 				error = EAGAIN;
7540 				goto out;
7541 			}
7542 		}
7543 		/*
7544 		 * Check if the front file needs population.
7545 		 * If population is necessary, make sure we have a
7546 		 * backvp as well. We will get the page from the backvp.
7547 		 * bug 4152459-
7548 		 * But if the file system is in disconnected mode
7549 		 * and the file is a local file then do not check the
7550 		 * allocmap.
7551 		 */
7552 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7553 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7554 		    (cachefs_check_allocmap(cp, off) == 0)) {
7555 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7556 				error = ETIMEDOUT;
7557 				goto out;
7558 			}
7559 			if (cp->c_backvp == NULL) {
7560 				error = cachefs_getbackvp(fscp, cp);
7561 				if (error)
7562 					goto out;
7563 			}
7564 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7565 				cachefs_cluster_allocmap(off, &popoff,
7566 				    &popsize,
7567 				    fscp->fs_info.fi_popsize, cp);
7568 				if (popsize != 0) {
7569 					error = cachefs_populate(cp,
7570 					    popoff, popsize,
7571 					    cp->c_frontvp, cp->c_backvp,
7572 					    cp->c_size, cr);
7573 					if (error) {
7574 						cachefs_nocache(cp);
7575 						error = EAGAIN;
7576 						goto out;
7577 					} else {
7578 						cp->c_flags |=
7579 						    CN_UPDATED |
7580 						    CN_NEED_FRONT_SYNC |
7581 						    CN_POPULATION_PENDING;
7582 					}
7583 					popsize = popsize - (off - popoff);
7584 				} else {
7585 					popsize = PAGESIZE;
7586 				}
7587 			}
7588 			/* else XXX assert CN_NOCACHE? */
7589 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7590 					PAGESIZE, protp, ourpl, popsize,
7591 					seg, addr, S_READ, cr);
7592 			if (error)
7593 				goto out;
7594 			fscp->fs_stats.st_misses++;
7595 		} else {
7596 			if (cp->c_flags & CN_POPULATION_PENDING) {
7597 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr);
7598 				cp->c_flags &= ~CN_POPULATION_PENDING;
7599 				if (error) {
7600 					cachefs_nocache(cp);
7601 					error = EAGAIN;
7602 					goto out;
7603 				}
7604 			}
7605 			/*
7606 			 * File was populated so we get the page from the
7607 			 * frontvp
7608 			 */
7609 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7610 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7611 			    rw, cr);
7612 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7613 				cachefs_log_gpfront(cachep, error,
7614 				    fscp->fs_cfsvfsp,
7615 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7616 				    crgetuid(cr), off, PAGESIZE);
7617 			if (error) {
7618 				cachefs_nocache(cp);
7619 				error = EAGAIN;
7620 				goto out;
7621 			}
7622 			fscp->fs_stats.st_hits++;
7623 		}
7624 getpages:
7625 		ASSERT(have_statelock);
7626 		if (have_statelock) {
7627 			mutex_exit(&cp->c_statelock);
7628 			have_statelock = 0;
7629 		}
7630 		downgrade = 0;
7631 		for (ppp = ourpl; *ppp; ppp++) {
7632 			if ((*ppp)->p_offset < off) {
7633 				index++;
7634 				page_unlock(*ppp);
7635 				continue;
7636 			}
7637 			if (PAGE_SHARED(*ppp)) {
7638 				if (page_tryupgrade(*ppp) == 0) {
7639 					for (ppp = &ourpl[index]; *ppp; ppp++)
7640 						page_unlock(*ppp);
7641 					error = EAGAIN;
7642 					goto out;
7643 				}
7644 				downgrade = 1;
7645 			}
7646 			ASSERT(PAGE_EXCL(*ppp));
7647 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7648 			page_rename(*ppp, vp, (*ppp)->p_offset);
7649 		}
7650 		pl[0] = ourpl[index];
7651 		pl[1] = NULL;
7652 		if (downgrade) {
7653 			page_downgrade(ourpl[index]);
7654 		}
7655 		/* Unlock the rest of the pages from the cluster */
7656 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7657 			page_unlock(*ppp);
7658 	} else {
7659 		ASSERT(! have_statelock);
7660 		if (have_statelock) {
7661 			mutex_exit(&cp->c_statelock);
7662 			have_statelock = 0;
7663 		}
7664 		/* XXX SE_SHARED probably isn't what we *always* want */
7665 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7666 			cachefs_lostpage++;
7667 			goto again;
7668 		}
7669 		pl[0] = pp;
7670 		pl[1] = NULL;
7671 		/* XXX increment st_hits?  i don't think so, but... */
7672 	}
7673 
7674 out:
7675 	if (have_statelock) {
7676 		mutex_exit(&cp->c_statelock);
7677 		have_statelock = 0;
7678 	}
7679 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7680 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7681 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7682 	return (error);
7683 }
7684 
7685 /* gets a page but only from the back fs */
7686 /*ARGSUSED*/
7687 static int
7688 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7689     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7690     caddr_t addr, enum seg_rw rw, cred_t *cr)
7691 {
7692 	cnode_t *cp = VTOC(vp);
7693 	page_t **ppp, *pp = NULL;
7694 	fscache_t *fscp = C_TO_FSCACHE(cp);
7695 	int error = 0;
7696 	struct page *ourpl[17];
7697 	int index = 0;
7698 	int have_statelock = 0;
7699 	int downgrade;
7700 
7701 	/*
7702 	 * Grab the cnode statelock so the cnode state won't change
7703 	 * while we're in here.
7704 	 */
7705 	ourpl[0] = NULL;
7706 	off = off & (offset_t)PAGEMASK;
7707 again:
7708 	if (page_exists(vp, off) == 0) {
7709 		if (! have_statelock) {
7710 			mutex_enter(&cp->c_statelock);
7711 			have_statelock = 1;
7712 		}
7713 
7714 		if (cp->c_backvp == NULL) {
7715 			error = cachefs_getbackvp(fscp, cp);
7716 			if (error)
7717 				goto out;
7718 		}
7719 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7720 			PAGESIZE, protp, ourpl, PAGESIZE, seg,
7721 			addr, S_READ, cr);
7722 		if (error)
7723 			goto out;
7724 
7725 		if (have_statelock) {
7726 			mutex_exit(&cp->c_statelock);
7727 			have_statelock = 0;
7728 		}
7729 		downgrade = 0;
7730 		for (ppp = ourpl; *ppp; ppp++) {
7731 			if ((*ppp)->p_offset < off) {
7732 				index++;
7733 				page_unlock(*ppp);
7734 				continue;
7735 			}
7736 			if (PAGE_SHARED(*ppp)) {
7737 				if (page_tryupgrade(*ppp) == 0) {
7738 					for (ppp = &ourpl[index]; *ppp; ppp++)
7739 						page_unlock(*ppp);
7740 					error = EAGAIN;
7741 					goto out;
7742 				}
7743 				downgrade = 1;
7744 			}
7745 			ASSERT(PAGE_EXCL(*ppp));
7746 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7747 			page_rename(*ppp, vp, (*ppp)->p_offset);
7748 		}
7749 		pl[0] = ourpl[index];
7750 		pl[1] = NULL;
7751 		if (downgrade) {
7752 			page_downgrade(ourpl[index]);
7753 		}
7754 		/* Unlock the rest of the pages from the cluster */
7755 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7756 			page_unlock(*ppp);
7757 	} else {
7758 		ASSERT(! have_statelock);
7759 		if (have_statelock) {
7760 			mutex_exit(&cp->c_statelock);
7761 			have_statelock = 0;
7762 		}
7763 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7764 			cachefs_lostpage++;
7765 			goto again;
7766 		}
7767 		pl[0] = pp;
7768 		pl[1] = NULL;
7769 	}
7770 
7771 out:
7772 	if (have_statelock) {
7773 		mutex_exit(&cp->c_statelock);
7774 		have_statelock = 0;
7775 	}
7776 	return (error);
7777 }
7778 
7779 static int
7780 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr)
7781 {
7782 	cnode_t *cp = VTOC(vp);
7783 	int error = 0;
7784 	fscache_t *fscp = C_TO_FSCACHE(cp);
7785 	int held = 0;
7786 	int connected = 0;
7787 
7788 	if (getzoneid() != GLOBAL_ZONEID)
7789 		return (EPERM);
7790 
7791 	/* Call backfilesytem if NFSv4 */
7792 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7793 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7794 		goto out;
7795 	}
7796 
7797 	for (;;) {
7798 		/* get (or renew) access to the file system */
7799 		if (held) {
7800 			cachefs_cd_release(fscp);
7801 			held = 0;
7802 		}
7803 		error = cachefs_cd_access(fscp, connected, 1);
7804 		if (error)
7805 			break;
7806 		held = 1;
7807 
7808 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7809 		if (error == 0)
7810 			break;
7811 
7812 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7813 			if (CFS_TIMEOUT(fscp, error)) {
7814 				cachefs_cd_release(fscp);
7815 				held = 0;
7816 				cachefs_cd_timedout(fscp);
7817 				connected = 0;
7818 				continue;
7819 			}
7820 		} else {
7821 			if (NOMEMWAIT()) {
7822 				error = 0;
7823 				goto out;
7824 			}
7825 			if (CFS_TIMEOUT(fscp, error)) {
7826 				connected = 1;
7827 				continue;
7828 			}
7829 		}
7830 		break;
7831 	}
7832 
7833 out:
7834 
7835 	if (held) {
7836 		cachefs_cd_release(fscp);
7837 	}
7838 
7839 #ifdef CFS_CD_DEBUG
7840 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7841 #endif
7842 	return (error);
7843 }
7844 
7845 /*
7846  * cachefs_putpage_backfs_nfsv4
7847  *
7848  * Call NFSv4 back filesystem to handle the putpage (cachefs
7849  * pass-through support for NFSv4).
7850  */
7851 static int
7852 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7853 			cred_t *cr)
7854 {
7855 	cnode_t *cp = VTOC(vp);
7856 	fscache_t *fscp = C_TO_FSCACHE(cp);
7857 	vnode_t *backvp;
7858 	int error;
7859 
7860 	/*
7861 	 * For NFSv4 pass-through to work, only connected operation is
7862 	 * supported, the cnode backvp must exist, and cachefs optional
7863 	 * (eg., disconnectable) flags are turned off. Assert these
7864 	 * conditions for the putpage operation.
7865 	 */
7866 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7867 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7868 
7869 	/* Call backfs vnode op after extracting backvp */
7870 	mutex_enter(&cp->c_statelock);
7871 	backvp = cp->c_backvp;
7872 	mutex_exit(&cp->c_statelock);
7873 
7874 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7875 		("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7876 		cp, backvp));
7877 	error = VOP_PUTPAGE(backvp, off, len, flags, cr);
7878 
7879 	return (error);
7880 }
7881 
7882 /*
7883  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7884  * If len == 0, do from off to EOF.
7885  *
7886  * The normal cases should be len == 0 & off == 0 (entire vp list),
7887  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7888  * (from pageout).
7889  */
7890 
7891 /*ARGSUSED*/
7892 int
7893 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7894     int flags, cred_t *cr)
7895 {
7896 	struct cnode *cp  = VTOC(vp);
7897 	struct page *pp;
7898 	size_t io_len;
7899 	u_offset_t eoff, io_off;
7900 	int error = 0;
7901 	fscache_t *fscp = C_TO_FSCACHE(cp);
7902 	cachefscache_t *cachep = fscp->fs_cache;
7903 
7904 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7905 		return (0);
7906 	}
7907 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7908 	    (flags & B_INVAL) == 0))
7909 		return (0);
7910 
7911 	/*
7912 	 * Should never have cached data for the cachefs vnode
7913 	 * if NFSv4 is in use.
7914 	 */
7915 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7916 
7917 	/*
7918 	 * If this is an async putpage let a thread handle it.
7919 	 */
7920 	if (flags & B_ASYNC) {
7921 		struct cachefs_req *rp;
7922 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7923 
7924 		if (ttoproc(curthread) == proc_pageout) {
7925 			/*
7926 			 * If this is the page daemon we
7927 			 * do the push synchronously (Dangerous!) and hope
7928 			 * we can free enough to keep running...
7929 			 */
7930 			flags &= ~B_ASYNC;
7931 			goto again;
7932 		}
7933 
7934 		if (! cachefs_async_okay()) {
7935 
7936 			/*
7937 			 * this is somewhat like NFS's behavior.  keep
7938 			 * the system from thrashing.  we've seen
7939 			 * cases where async queues get out of
7940 			 * control, especially if
7941 			 * madvise(MADV_SEQUENTIAL) is done on a large
7942 			 * mmap()ed file that is read sequentially.
7943 			 */
7944 
7945 			flags &= ~B_ASYNC;
7946 			goto again;
7947 		}
7948 
7949 		/*
7950 		 * if no flags other than B_ASYNC were set,
7951 		 * we coalesce putpage requests into a single one for the
7952 		 * whole file (len = off = 0).  If such a request is
7953 		 * already queued, we're done.
7954 		 *
7955 		 * If there are other flags set (e.g., B_INVAL), we don't
7956 		 * attempt to coalesce and we use the specified length and
7957 		 * offset.
7958 		 */
7959 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7960 		mutex_enter(&cp->c_iomutex);
7961 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
7962 			rp->cfs_cmd = CFS_PUTPAGE;
7963 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
7964 			if (tflags == 0) {
7965 				off = len = 0;
7966 				cp->c_ioflags |= CIO_PUTPAGES;
7967 			}
7968 			rp->cfs_req_u.cu_putpage.cp_off = off;
7969 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
7970 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
7971 			rp->cfs_cr = cr;
7972 			crhold(rp->cfs_cr);
7973 			VN_HOLD(vp);
7974 			cp->c_nio++;
7975 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
7976 		} else {
7977 			kmem_cache_free(cachefs_req_cache, rp);
7978 		}
7979 
7980 		mutex_exit(&cp->c_iomutex);
7981 		return (0);
7982 	}
7983 
7984 
7985 again:
7986 	if (len == 0) {
7987 		/*
7988 		 * Search the entire vp list for pages >= off
7989 		 */
7990 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
7991 	} else {
7992 		/*
7993 		 * Do a range from [off...off + len] looking for pages
7994 		 * to deal with.
7995 		 */
7996 		eoff = (u_offset_t)off + len;
7997 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
7998 			io_off += io_len) {
7999 			/*
8000 			 * If we are not invalidating, synchronously
8001 			 * freeing or writing pages use the routine
8002 			 * page_lookup_nowait() to prevent reclaiming
8003 			 * them from the free list.
8004 			 */
8005 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8006 				pp = page_lookup(vp, io_off,
8007 					(flags & (B_INVAL | B_FREE)) ?
8008 					    SE_EXCL : SE_SHARED);
8009 			} else {
8010 				/* XXX this looks like dead code */
8011 				pp = page_lookup_nowait(vp, io_off,
8012 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
8013 			}
8014 
8015 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8016 				io_len = PAGESIZE;
8017 			else {
8018 				error = cachefs_push(vp, pp, &io_off,
8019 					&io_len, flags, cr);
8020 				if (error != 0)
8021 					break;
8022 				/*
8023 				 * "io_off" and "io_len" are returned as
8024 				 * the range of pages we actually wrote.
8025 				 * This allows us to skip ahead more quickly
8026 				 * since several pages may've been dealt
8027 				 * with by this iteration of the loop.
8028 				 */
8029 			}
8030 		}
8031 	}
8032 
8033 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8034 		cp->c_flags &= ~CDIRTY;
8035 	}
8036 
8037 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8038 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8039 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8040 		    crgetuid(cr), off, len);
8041 
8042 	return (error);
8043 
8044 }
8045 
8046 /*ARGSUSED*/
8047 static int
8048 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8049     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr)
8050 {
8051 	cnode_t *cp = VTOC(vp);
8052 	fscache_t *fscp = C_TO_FSCACHE(cp);
8053 	struct segvn_crargs vn_a;
8054 	int error;
8055 	int held = 0;
8056 	int writing;
8057 	int connected = 0;
8058 
8059 #ifdef CFSDEBUG
8060 	u_offset_t offx = (u_offset_t)off;
8061 
8062 	CFS_DEBUG(CFSDEBUG_VOPS)
8063 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8064 			(void *)vp, offx, len, flags);
8065 #endif
8066 	if (getzoneid() != GLOBAL_ZONEID) {
8067 		error = EPERM;
8068 		goto out;
8069 	}
8070 
8071 	if (vp->v_flag & VNOMAP) {
8072 		error = ENOSYS;
8073 		goto out;
8074 	}
8075 	if (off < 0 || (offset_t)(off + len) < 0) {
8076 		error = ENXIO;
8077 		goto out;
8078 	}
8079 	if (vp->v_type != VREG) {
8080 		error = ENODEV;
8081 		goto out;
8082 	}
8083 
8084 	/*
8085 	 * Check to see if the vnode is currently marked as not cachable.
8086 	 * If so, we have to refuse the map request as this violates the
8087 	 * don't cache attribute.
8088 	 */
8089 	if (vp->v_flag & VNOCACHE)
8090 		return (EAGAIN);
8091 
8092 #ifdef OBSOLETE
8093 	/*
8094 	 * If file is being locked, disallow mapping.
8095 	 */
8096 	if (vn_has_flocks(vp)) {
8097 		error = EAGAIN;
8098 		goto out;
8099 	}
8100 #endif
8101 
8102 	/* call backfilesystem if NFSv4 */
8103 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8104 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8105 						maxprot, flags, cr);
8106 		goto out;
8107 	}
8108 
8109 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8110 
8111 	for (;;) {
8112 		/* get (or renew) access to the file system */
8113 		if (held) {
8114 			cachefs_cd_release(fscp);
8115 			held = 0;
8116 		}
8117 		error = cachefs_cd_access(fscp, connected, writing);
8118 		if (error)
8119 			break;
8120 		held = 1;
8121 
8122 		if (writing) {
8123 			mutex_enter(&cp->c_statelock);
8124 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8125 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8126 					connected = 1;
8127 					continue;
8128 				} else {
8129 					cachefs_nocache(cp);
8130 				}
8131 			}
8132 
8133 			/*
8134 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8135 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8136 			 * not need to try to push out any pages.
8137 			 * This bit gets cleared when the cnode goes inactive.
8138 			 */
8139 			cp->c_flags |= CN_MAPWRITE;
8140 
8141 			mutex_exit(&cp->c_statelock);
8142 		}
8143 		break;
8144 	}
8145 
8146 	if (held) {
8147 		cachefs_cd_release(fscp);
8148 	}
8149 
8150 	as_rangelock(as);
8151 	if ((flags & MAP_FIXED) == 0) {
8152 		map_addr(addrp, len, off, 1, flags);
8153 		if (*addrp == NULL) {
8154 			as_rangeunlock(as);
8155 			error = ENOMEM;
8156 			goto out;
8157 		}
8158 	} else {
8159 		/*
8160 		 * User specified address - blow away any previous mappings
8161 		 */
8162 		(void) as_unmap(as, *addrp, len);
8163 	}
8164 
8165 	/*
8166 	 * package up all the data passed in into a segvn_args struct and
8167 	 * call as_map with segvn_create function to create a new segment
8168 	 * in the address space.
8169 	 */
8170 	vn_a.vp = vp;
8171 	vn_a.offset = off;
8172 	vn_a.type = flags & MAP_TYPE;
8173 	vn_a.prot = (uchar_t)prot;
8174 	vn_a.maxprot = (uchar_t)maxprot;
8175 	vn_a.cred = cr;
8176 	vn_a.amp = NULL;
8177 	vn_a.flags = flags & ~MAP_TYPE;
8178 	vn_a.szc = 0;
8179 	vn_a.lgrp_mem_policy_flags = 0;
8180 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8181 	as_rangeunlock(as);
8182 out:
8183 
8184 #ifdef CFS_CD_DEBUG
8185 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8186 #endif
8187 #ifdef CFSDEBUG
8188 	CFS_DEBUG(CFSDEBUG_VOPS)
8189 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8190 #endif
8191 	return (error);
8192 }
8193 
8194 /*
8195  * cachefs_map_backfs_nfsv4
8196  *
8197  * Call NFSv4 back filesystem to handle the map (cachefs
8198  * pass-through support for NFSv4).
8199  */
8200 static int
8201 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8202 			caddr_t *addrp, size_t len, uchar_t prot,
8203 			uchar_t maxprot, uint_t flags, cred_t *cr)
8204 {
8205 	cnode_t *cp = VTOC(vp);
8206 	fscache_t *fscp = C_TO_FSCACHE(cp);
8207 	vnode_t *backvp;
8208 	int error;
8209 
8210 	/*
8211 	 * For NFSv4 pass-through to work, only connected operation is
8212 	 * supported, the cnode backvp must exist, and cachefs optional
8213 	 * (eg., disconnectable) flags are turned off. Assert these
8214 	 * conditions for the map operation.
8215 	 */
8216 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8217 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8218 
8219 	/* Call backfs vnode op after extracting backvp */
8220 	mutex_enter(&cp->c_statelock);
8221 	backvp = cp->c_backvp;
8222 	mutex_exit(&cp->c_statelock);
8223 
8224 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8225 		("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8226 		cp, backvp));
8227 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr);
8228 
8229 	return (error);
8230 }
8231 
8232 /*ARGSUSED*/
8233 static int
8234 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8235     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8236     cred_t *cr)
8237 {
8238 	cnode_t *cp = VTOC(vp);
8239 	fscache_t *fscp = C_TO_FSCACHE(cp);
8240 
8241 	if (getzoneid() != GLOBAL_ZONEID)
8242 		return (EPERM);
8243 
8244 	if (vp->v_flag & VNOMAP)
8245 		return (ENOSYS);
8246 
8247 	/*
8248 	 * Check this is not an NFSv4 filesystem, as the mapping
8249 	 * is not done on the cachefs filesystem if NFSv4 is in
8250 	 * use.
8251 	 */
8252 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8253 
8254 	mutex_enter(&cp->c_statelock);
8255 	cp->c_mapcnt += btopr(len);
8256 	mutex_exit(&cp->c_statelock);
8257 	return (0);
8258 }
8259 
8260 /*ARGSUSED*/
8261 static int
8262 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8263 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8264 	cred_t *cr)
8265 {
8266 	cnode_t *cp = VTOC(vp);
8267 	fscache_t *fscp = C_TO_FSCACHE(cp);
8268 	int error;
8269 	int connected = 0;
8270 	int held = 0;
8271 
8272 	/*
8273 	 * The file may be passed in to (or inherited into) the zone, so we
8274 	 * need to let this operation go through since it happens as part of
8275 	 * exiting.
8276 	 */
8277 	if (vp->v_flag & VNOMAP)
8278 		return (ENOSYS);
8279 
8280 	/*
8281 	 * Check this is not an NFSv4 filesystem, as the mapping
8282 	 * is not done on the cachefs filesystem if NFSv4 is in
8283 	 * use.
8284 	 */
8285 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8286 
8287 	mutex_enter(&cp->c_statelock);
8288 	cp->c_mapcnt -= btopr(len);
8289 	ASSERT(cp->c_mapcnt >= 0);
8290 	mutex_exit(&cp->c_statelock);
8291 
8292 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8293 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8294 		return (0);
8295 
8296 	for (;;) {
8297 		/* get (or renew) access to the file system */
8298 		if (held) {
8299 			cachefs_cd_release(fscp);
8300 			held = 0;
8301 		}
8302 		error = cachefs_cd_access(fscp, connected, 1);
8303 		if (error)
8304 			break;
8305 		held = 1;
8306 		connected = 0;
8307 
8308 		error = cachefs_putpage_common(vp, (offset_t)0,
8309 		    (uint_t)0, 0, cr);
8310 		if (CFS_TIMEOUT(fscp, error)) {
8311 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8312 				cachefs_cd_release(fscp);
8313 				held = 0;
8314 				cachefs_cd_timedout(fscp);
8315 				continue;
8316 			} else {
8317 				connected = 1;
8318 				continue;
8319 			}
8320 		}
8321 
8322 		/* if no space left in cache, wait until connected */
8323 		if ((error == ENOSPC) &&
8324 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8325 			connected = 1;
8326 			continue;
8327 		}
8328 
8329 		mutex_enter(&cp->c_statelock);
8330 		if (!error)
8331 			error = cp->c_error;
8332 		cp->c_error = 0;
8333 		mutex_exit(&cp->c_statelock);
8334 		break;
8335 	}
8336 
8337 	if (held)
8338 		cachefs_cd_release(fscp);
8339 
8340 #ifdef CFS_CD_DEBUG
8341 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8342 #endif
8343 	return (error);
8344 }
8345 
8346 /* ARGSUSED */
8347 static int
8348 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8349 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr)
8350 {
8351 	struct cnode *cp = VTOC(vp);
8352 	int error;
8353 	struct fscache *fscp = C_TO_FSCACHE(cp);
8354 	vnode_t *backvp;
8355 	int held = 0;
8356 	int connected = 0;
8357 
8358 	if (getzoneid() != GLOBAL_ZONEID)
8359 		return (EPERM);
8360 
8361 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8362 		return (EINVAL);
8363 
8364 	/* Disallow locking of files that are currently mapped */
8365 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8366 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8367 		return (EAGAIN);
8368 	}
8369 
8370 	/*
8371 	 * Cachefs only provides pass-through support for NFSv4,
8372 	 * and all vnode operations are passed through to the
8373 	 * back file system. For NFSv4 pass-through to work, only
8374 	 * connected operation is supported, the cnode backvp must
8375 	 * exist, and cachefs optional (eg., disconnectable) flags
8376 	 * are turned off. Assert these conditions to ensure that
8377 	 * the backfilesystem is called for the frlock operation.
8378 	 */
8379 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8380 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8381 
8382 	/* XXX bob: nfs does a bunch more checks than we do */
8383 	if (CFS_ISFS_LLOCK(fscp)) {
8384 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8385 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr));
8386 	}
8387 
8388 	for (;;) {
8389 		/* get (or renew) access to the file system */
8390 		if (held) {
8391 			/* Won't loop with NFSv4 connected behavior */
8392 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8393 			cachefs_cd_release(fscp);
8394 			held = 0;
8395 		}
8396 		error = cachefs_cd_access(fscp, connected, 0);
8397 		if (error)
8398 			break;
8399 		held = 1;
8400 
8401 		/* if not connected, quit or wait */
8402 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8403 			connected = 1;
8404 			continue;
8405 		}
8406 
8407 		/* nocache the file */
8408 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8409 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8410 			mutex_enter(&cp->c_statelock);
8411 			cachefs_nocache(cp);
8412 			mutex_exit(&cp->c_statelock);
8413 		}
8414 
8415 		/*
8416 		 * XXX bob: probably should do a consistency check
8417 		 * Pass arguments unchanged if NFSv4 is the backfs.
8418 		 */
8419 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8420 			bfp->l_start += cp->c_size;
8421 			bfp->l_whence = 0;
8422 		}
8423 
8424 		/* get the back vp */
8425 		mutex_enter(&cp->c_statelock);
8426 		if (cp->c_backvp == NULL) {
8427 			error = cachefs_getbackvp(fscp, cp);
8428 			if (error) {
8429 				mutex_exit(&cp->c_statelock);
8430 				break;
8431 			}
8432 		}
8433 		backvp = cp->c_backvp;
8434 		VN_HOLD(backvp);
8435 		mutex_exit(&cp->c_statelock);
8436 
8437 		/*
8438 		 * make sure we can flush currently dirty pages before
8439 		 * allowing the lock
8440 		 */
8441 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8442 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8443 			error = cachefs_putpage(
8444 			    vp, (offset_t)0, 0, B_INVAL, cr);
8445 			if (error) {
8446 				error = ENOLCK;
8447 				VN_RELE(backvp);
8448 				break;
8449 			}
8450 		}
8451 
8452 		/* do lock on the back file */
8453 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8454 			("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8455 			cp, backvp));
8456 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr);
8457 		VN_RELE(backvp);
8458 		if (CFS_TIMEOUT(fscp, error)) {
8459 			connected = 1;
8460 			continue;
8461 		}
8462 		break;
8463 	}
8464 
8465 	if (held) {
8466 		cachefs_cd_release(fscp);
8467 	}
8468 
8469 	/*
8470 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8471 	 * cache does not give inconsistent results on locked files shared
8472 	 * between clients.  The VNOCACHE flag is never turned off as long
8473 	 * as the vnode is active because it is hard to figure out when the
8474 	 * last lock is gone.
8475 	 * XXX - what if some already has the vnode mapped in?
8476 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8477 	 */
8478 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8479 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8480 		vp->v_flag |= VNOCACHE;
8481 
8482 #ifdef CFS_CD_DEBUG
8483 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8484 #endif
8485 	return (error);
8486 }
8487 
8488 /*
8489  * Free storage space associated with the specified vnode.  The portion
8490  * to be freed is specified by bfp->l_start and bfp->l_len (already
8491  * normalized to a "whence" of 0).
8492  *
8493  * This is an experimental facility whose continued existence is not
8494  * guaranteed.  Currently, we only support the special case
8495  * of l_len == 0, meaning free to end of file.
8496  */
8497 /* ARGSUSED */
8498 static int
8499 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8500 	offset_t offset, cred_t *cr, caller_context_t *ct)
8501 {
8502 	cnode_t *cp = VTOC(vp);
8503 	fscache_t *fscp = C_TO_FSCACHE(cp);
8504 	int error;
8505 
8506 	ASSERT(vp->v_type == VREG);
8507 	if (getzoneid() != GLOBAL_ZONEID)
8508 		return (EPERM);
8509 	if (cmd != F_FREESP)
8510 		return (EINVAL);
8511 
8512 	/* call backfilesystem if NFSv4 */
8513 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8514 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8515 						offset, cr, ct);
8516 		goto out;
8517 	}
8518 
8519 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8520 		ASSERT(bfp->l_start >= 0);
8521 		if (bfp->l_len == 0) {
8522 			struct vattr va;
8523 
8524 			va.va_size = bfp->l_start;
8525 			va.va_mask = AT_SIZE;
8526 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8527 		} else
8528 			error = EINVAL;
8529 	}
8530 
8531 out:
8532 	return (error);
8533 }
8534 
8535 /*
8536  * cachefs_space_backfs_nfsv4
8537  *
8538  * Call NFSv4 back filesystem to handle the space (cachefs
8539  * pass-through support for NFSv4).
8540  */
8541 static int
8542 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8543 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8544 {
8545 	cnode_t *cp = VTOC(vp);
8546 	fscache_t *fscp = C_TO_FSCACHE(cp);
8547 	vnode_t *backvp;
8548 	int error;
8549 
8550 	/*
8551 	 * For NFSv4 pass-through to work, only connected operation is
8552 	 * supported, the cnode backvp must exist, and cachefs optional
8553 	 * (eg., disconnectable) flags are turned off. Assert these
8554 	 * conditions for the space operation.
8555 	 */
8556 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8557 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8558 
8559 	/* Call backfs vnode op after extracting backvp */
8560 	mutex_enter(&cp->c_statelock);
8561 	backvp = cp->c_backvp;
8562 	mutex_exit(&cp->c_statelock);
8563 
8564 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8565 		("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8566 		cp, backvp));
8567 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8568 
8569 	return (error);
8570 }
8571 
8572 /*ARGSUSED*/
8573 static int
8574 cachefs_realvp(struct vnode *vp, struct vnode **vpp)
8575 {
8576 	return (EINVAL);
8577 }
8578 
8579 /*ARGSUSED*/
8580 static int
8581 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8582 	int flags, cred_t *cr)
8583 {
8584 	return (ENOSYS);
8585 }
8586 
8587 static int
8588 cachefs_setsecattr_connected(cnode_t *cp,
8589     vsecattr_t *vsec, int flag, cred_t *cr)
8590 {
8591 	fscache_t *fscp = C_TO_FSCACHE(cp);
8592 	int error = 0;
8593 
8594 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8595 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8596 
8597 	mutex_enter(&cp->c_statelock);
8598 
8599 	if (cp->c_backvp == NULL) {
8600 		error = cachefs_getbackvp(fscp, cp);
8601 		if (error) {
8602 			cachefs_nocache(cp);
8603 			goto out;
8604 		}
8605 	}
8606 
8607 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8608 	if (error)
8609 		goto out;
8610 
8611 	/* only owner can set acl */
8612 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8613 		error = EINVAL;
8614 		goto out;
8615 	}
8616 
8617 
8618 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8619 		("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8620 		cp, cp->c_backvp));
8621 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr);
8622 	if (error) {
8623 		goto out;
8624 	}
8625 
8626 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8627 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8628 		cachefs_nocache(cp);
8629 		goto out;
8630 	}
8631 
8632 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8633 
8634 	/* acl may have changed permissions -- handle this. */
8635 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8636 		cachefs_acl2perm(cp, vsec);
8637 
8638 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8639 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8640 		error = cachefs_cacheacl(cp, vsec);
8641 		if (error != 0) {
8642 #ifdef CFSDEBUG
8643 			CFS_DEBUG(CFSDEBUG_VOPS)
8644 				printf("cachefs_setacl: cacheacl: error %d\n",
8645 				    error);
8646 #endif /* CFSDEBUG */
8647 			error = 0;
8648 			cachefs_nocache(cp);
8649 		}
8650 	}
8651 
8652 out:
8653 	mutex_exit(&cp->c_statelock);
8654 
8655 	return (error);
8656 }
8657 
8658 static int
8659 cachefs_setsecattr_disconnected(cnode_t *cp,
8660     vsecattr_t *vsec, int flag, cred_t *cr)
8661 {
8662 	fscache_t *fscp = C_TO_FSCACHE(cp);
8663 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8664 	off_t commit = 0;
8665 	int error = 0;
8666 
8667 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8668 
8669 	if (CFS_ISFS_WRITE_AROUND(fscp))
8670 		return (ETIMEDOUT);
8671 
8672 	mutex_enter(&cp->c_statelock);
8673 
8674 	/* only owner can set acl */
8675 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8676 		error = EINVAL;
8677 		goto out;
8678 	}
8679 
8680 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8681 		error = ETIMEDOUT;
8682 		goto out;
8683 	}
8684 
8685 	/* XXX do i need this?  is this right? */
8686 	if (cp->c_flags & CN_ALLOC_PENDING) {
8687 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8688 			(void) filegrp_allocattr(cp->c_filegrp);
8689 		}
8690 		error = filegrp_create_metadata(cp->c_filegrp,
8691 		    &cp->c_metadata, &cp->c_id);
8692 		if (error) {
8693 			goto out;
8694 		}
8695 		cp->c_flags &= ~CN_ALLOC_PENDING;
8696 	}
8697 
8698 	/* XXX is this right? */
8699 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8700 		error = cachefs_dlog_cidmap(fscp);
8701 		if (error) {
8702 			error = ENOSPC;
8703 			goto out;
8704 		}
8705 		cp->c_metadata.md_flags |= MD_MAPPING;
8706 		cp->c_flags |= CN_UPDATED;
8707 	}
8708 
8709 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8710 	if (commit == 0)
8711 		goto out;
8712 
8713 	/* fix modes in metadata */
8714 	cachefs_acl2perm(cp, vsec);
8715 
8716 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8717 		error = cachefs_cacheacl(cp, vsec);
8718 		if (error != 0) {
8719 			goto out;
8720 		}
8721 	}
8722 
8723 	/* XXX is this right? */
8724 	if (cachefs_modified_alloc(cp)) {
8725 		error = ENOSPC;
8726 		goto out;
8727 	}
8728 
8729 out:
8730 	if (error != 0)
8731 		cp->c_metadata.md_vattr.va_mode = failmode;
8732 
8733 	mutex_exit(&cp->c_statelock);
8734 
8735 	if (commit) {
8736 		if (cachefs_dlog_commit(fscp, commit, error)) {
8737 			/*EMPTY*/
8738 			/* XXX fix on panic? */
8739 		}
8740 	}
8741 
8742 	return (error);
8743 }
8744 
8745 static int
8746 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr)
8747 {
8748 	cnode_t *cp = VTOC(vp);
8749 	fscache_t *fscp = C_TO_FSCACHE(cp);
8750 	int connected = 0;
8751 	int held = 0;
8752 	int error = 0;
8753 
8754 #ifdef CFSDEBUG
8755 	CFS_DEBUG(CFSDEBUG_VOPS)
8756 	    printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8757 #endif
8758 	if (getzoneid() != GLOBAL_ZONEID) {
8759 		error = EPERM;
8760 		goto out;
8761 	}
8762 
8763 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8764 		error = ENOSYS;
8765 		goto out;
8766 	}
8767 
8768 	if (! cachefs_vtype_aclok(vp)) {
8769 		error = EINVAL;
8770 		goto out;
8771 	}
8772 
8773 	/*
8774 	 * Cachefs only provides pass-through support for NFSv4,
8775 	 * and all vnode operations are passed through to the
8776 	 * back file system. For NFSv4 pass-through to work, only
8777 	 * connected operation is supported, the cnode backvp must
8778 	 * exist, and cachefs optional (eg., disconnectable) flags
8779 	 * are turned off. Assert these conditions to ensure that
8780 	 * the backfilesystem is called for the setsecattr operation.
8781 	 */
8782 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8783 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8784 
8785 	for (;;) {
8786 		/* drop hold on file system */
8787 		if (held) {
8788 			/* Won't loop with NFSv4 connected operation */
8789 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8790 			cachefs_cd_release(fscp);
8791 			held = 0;
8792 		}
8793 
8794 		/* aquire access to the file system */
8795 		error = cachefs_cd_access(fscp, connected, 1);
8796 		if (error)
8797 			break;
8798 		held = 1;
8799 
8800 		/* perform the setattr */
8801 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8802 			error = cachefs_setsecattr_connected(cp,
8803 			    vsec, flag, cr);
8804 		else
8805 			error = cachefs_setsecattr_disconnected(cp,
8806 			    vsec, flag, cr);
8807 		if (error) {
8808 			/* if connected */
8809 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8810 				if (CFS_TIMEOUT(fscp, error)) {
8811 					cachefs_cd_release(fscp);
8812 					held = 0;
8813 					cachefs_cd_timedout(fscp);
8814 					connected = 0;
8815 					continue;
8816 				}
8817 			}
8818 
8819 			/* else must be disconnected */
8820 			else {
8821 				if (CFS_TIMEOUT(fscp, error)) {
8822 					connected = 1;
8823 					continue;
8824 				}
8825 			}
8826 		}
8827 		break;
8828 	}
8829 
8830 	if (held) {
8831 		cachefs_cd_release(fscp);
8832 	}
8833 	return (error);
8834 
8835 out:
8836 #ifdef CFS_CD_DEBUG
8837 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8838 #endif
8839 
8840 #ifdef CFSDEBUG
8841 	CFS_DEBUG(CFSDEBUG_VOPS)
8842 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8843 #endif
8844 	return (error);
8845 }
8846 
8847 /*
8848  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8849  * sanitize the acl.
8850  */
8851 
8852 static void
8853 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8854 {
8855 	aclent_t *aclp;
8856 	int i;
8857 
8858 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8859 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8860 		switch (aclp->a_type) {
8861 		case USER_OBJ:
8862 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8863 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8864 			break;
8865 
8866 		case GROUP_OBJ:
8867 			cp->c_metadata.md_vattr.va_mode &= (~070);
8868 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8869 			break;
8870 
8871 		case OTHER_OBJ:
8872 			cp->c_metadata.md_vattr.va_mode &= (~07);
8873 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8874 			break;
8875 
8876 		case CLASS_OBJ:
8877 			cp->c_metadata.md_aclclass = aclp->a_perm;
8878 			break;
8879 		}
8880 	}
8881 
8882 	cp->c_flags |= CN_UPDATED;
8883 }
8884 
8885 static int
8886 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr)
8887 {
8888 	cnode_t *cp = VTOC(vp);
8889 	fscache_t *fscp = C_TO_FSCACHE(cp);
8890 	int held = 0, connected = 0;
8891 	int error = 0;
8892 
8893 #ifdef CFSDEBUG
8894 	CFS_DEBUG(CFSDEBUG_VOPS)
8895 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8896 #endif
8897 
8898 	if (getzoneid() != GLOBAL_ZONEID) {
8899 		error = EPERM;
8900 		goto out;
8901 	}
8902 
8903 	/*
8904 	 * Cachefs only provides pass-through support for NFSv4,
8905 	 * and all vnode operations are passed through to the
8906 	 * back file system. For NFSv4 pass-through to work, only
8907 	 * connected operation is supported, the cnode backvp must
8908 	 * exist, and cachefs optional (eg., disconnectable) flags
8909 	 * are turned off. Assert these conditions to ensure that
8910 	 * the backfilesystem is called for the getsecattr operation.
8911 	 */
8912 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8913 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8914 
8915 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8916 		error = fs_fab_acl(vp, vsec, flag, cr);
8917 		goto out;
8918 	}
8919 
8920 	for (;;) {
8921 		if (held) {
8922 			/* Won't loop with NFSv4 connected behavior */
8923 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8924 			cachefs_cd_release(fscp);
8925 			held = 0;
8926 		}
8927 		error = cachefs_cd_access(fscp, connected, 0);
8928 		if (error)
8929 			break;
8930 		held = 1;
8931 
8932 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8933 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8934 			    cr);
8935 			if (CFS_TIMEOUT(fscp, error)) {
8936 				cachefs_cd_release(fscp);
8937 				held = 0;
8938 				cachefs_cd_timedout(fscp);
8939 				connected = 0;
8940 				continue;
8941 			}
8942 		} else {
8943 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8944 			    cr);
8945 			if (CFS_TIMEOUT(fscp, error)) {
8946 				if (cachefs_cd_access_miss(fscp)) {
8947 					error = cachefs_getsecattr_connected(vp,
8948 					    vsec, flag, cr);
8949 					if (!CFS_TIMEOUT(fscp, error))
8950 						break;
8951 					delay(5*hz);
8952 					connected = 0;
8953 					continue;
8954 				}
8955 				connected = 1;
8956 				continue;
8957 			}
8958 		}
8959 		break;
8960 	}
8961 
8962 out:
8963 	if (held)
8964 		cachefs_cd_release(fscp);
8965 
8966 #ifdef CFS_CD_DEBUG
8967 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8968 #endif
8969 #ifdef CFSDEBUG
8970 	CFS_DEBUG(CFSDEBUG_VOPS)
8971 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
8972 #endif
8973 	return (error);
8974 }
8975 
8976 static int
8977 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr)
8978 {
8979 	cnode_t *cp = VTOC(vp);
8980 	fscache_t *fscp = C_TO_FSCACHE(cp);
8981 	int error = 0;
8982 	vnode_t *backvp;
8983 
8984 #ifdef CFSDEBUG
8985 	CFS_DEBUG(CFSDEBUG_VOPS)
8986 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
8987 #endif
8988 
8989 	if (getzoneid() != GLOBAL_ZONEID) {
8990 		error = EPERM;
8991 		goto out;
8992 	}
8993 
8994 	/*
8995 	 * Cachefs only provides pass-through support for NFSv4,
8996 	 * and all vnode operations are passed through to the
8997 	 * back file system. For NFSv4 pass-through to work, only
8998 	 * connected operation is supported, the cnode backvp must
8999 	 * exist, and cachefs optional (eg., disconnectable) flags
9000 	 * are turned off. Assert these conditions to ensure that
9001 	 * the backfilesystem is called for the shrlock operation.
9002 	 */
9003 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9004 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9005 
9006 	mutex_enter(&cp->c_statelock);
9007 	if (cp->c_backvp == NULL)
9008 		error = cachefs_getbackvp(fscp, cp);
9009 	backvp = cp->c_backvp;
9010 	mutex_exit(&cp->c_statelock);
9011 	ASSERT((error != 0) || (backvp != NULL));
9012 
9013 	if (error == 0) {
9014 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9015 			("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9016 			cp, backvp));
9017 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr);
9018 	}
9019 
9020 out:
9021 #ifdef CFSDEBUG
9022 	CFS_DEBUG(CFSDEBUG_VOPS)
9023 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9024 #endif
9025 	return (error);
9026 }
9027 
9028 static int
9029 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9030     cred_t *cr)
9031 {
9032 	cnode_t *cp = VTOC(vp);
9033 	fscache_t *fscp = C_TO_FSCACHE(cp);
9034 	int hit = 0;
9035 	int error = 0;
9036 
9037 
9038 	mutex_enter(&cp->c_statelock);
9039 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9040 	if (error)
9041 		goto out;
9042 
9043 	/* read from the cache if we can */
9044 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9045 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9046 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9047 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9048 		error = cachefs_getaclfromcache(cp, vsec);
9049 		if (error) {
9050 			cachefs_nocache(cp);
9051 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9052 			error = 0;
9053 		} else {
9054 			hit = 1;
9055 			goto out;
9056 		}
9057 	}
9058 
9059 	ASSERT(error == 0);
9060 	if (cp->c_backvp == NULL)
9061 		error = cachefs_getbackvp(fscp, cp);
9062 	if (error)
9063 		goto out;
9064 
9065 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9066 		("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9067 		cp, cp->c_backvp));
9068 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr);
9069 	if (error)
9070 		goto out;
9071 
9072 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9073 	    (cachefs_vtype_aclok(vp)) &&
9074 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9075 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9076 		error = cachefs_cacheacl(cp, vsec);
9077 		if (error) {
9078 			error = 0;
9079 			cachefs_nocache(cp);
9080 		}
9081 	}
9082 
9083 out:
9084 	if (error == 0) {
9085 		if (hit)
9086 			fscp->fs_stats.st_hits++;
9087 		else
9088 			fscp->fs_stats.st_misses++;
9089 	}
9090 	mutex_exit(&cp->c_statelock);
9091 
9092 	return (error);
9093 }
9094 
9095 static int
9096 /*ARGSUSED*/
9097 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9098     cred_t *cr)
9099 {
9100 	cnode_t *cp = VTOC(vp);
9101 	fscache_t *fscp = C_TO_FSCACHE(cp);
9102 	int hit = 0;
9103 	int error = 0;
9104 
9105 
9106 	mutex_enter(&cp->c_statelock);
9107 
9108 	/* read from the cache if we can */
9109 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9110 	    (cp->c_metadata.md_flags & MD_ACL)) {
9111 		error = cachefs_getaclfromcache(cp, vsec);
9112 		if (error) {
9113 			cachefs_nocache(cp);
9114 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9115 			error = 0;
9116 		} else {
9117 			hit = 1;
9118 			goto out;
9119 		}
9120 	}
9121 	error = ETIMEDOUT;
9122 
9123 out:
9124 	if (error == 0) {
9125 		if (hit)
9126 			fscp->fs_stats.st_hits++;
9127 		else
9128 			fscp->fs_stats.st_misses++;
9129 	}
9130 	mutex_exit(&cp->c_statelock);
9131 
9132 	return (error);
9133 }
9134 
9135 /*
9136  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9137  * the frontfile if possible; otherwise, the adjunct directory.
9138  *
9139  * inputs:
9140  * cp - the cnode, with its statelock already held
9141  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9142  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9143  *
9144  * returns:
9145  * 0 - all is well
9146  * nonzero - errno
9147  */
9148 
9149 int
9150 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9151 {
9152 	fscache_t *fscp = C_TO_FSCACHE(cp);
9153 	vsecattr_t vsec;
9154 	aclent_t *aclp;
9155 	int gotvsec = 0;
9156 	int error = 0;
9157 	vnode_t *vp = NULL;
9158 	void *aclkeep = NULL;
9159 	int i;
9160 
9161 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9162 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9163 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9164 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9165 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9166 
9167 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9168 		error = ENOSYS;
9169 		goto out;
9170 	}
9171 
9172 	if (vsecp == NULL) {
9173 		if (cp->c_backvp == NULL)
9174 			error = cachefs_getbackvp(fscp, cp);
9175 		if (error != 0)
9176 			goto out;
9177 		vsecp = &vsec;
9178 		bzero(&vsec, sizeof (vsec));
9179 		vsecp->vsa_mask =
9180 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9181 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred);
9182 		if (error != 0) {
9183 			goto out;
9184 		}
9185 		gotvsec = 1;
9186 	} else if (vsecp->vsa_mask & VSA_ACL) {
9187 		aclkeep = vsecp->vsa_aclentp;
9188 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9189 		    sizeof (aclent_t), KM_SLEEP);
9190 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9191 		    sizeof (aclent_t));
9192 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9193 		/* unless there's real data, we can cache nothing. */
9194 		return (0);
9195 	}
9196 
9197 	/*
9198 	 * prevent the ACL from chmoding our frontfile, and
9199 	 * snarf the class info
9200 	 */
9201 
9202 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9203 	    (VSA_ACL | VSA_ACLCNT)) {
9204 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9205 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9206 			switch (aclp->a_type) {
9207 			case CLASS_OBJ:
9208 				cp->c_metadata.md_aclclass =
9209 			    aclp->a_perm;
9210 				/*FALLTHROUGH*/
9211 			case USER_OBJ:
9212 			case GROUP_OBJ:
9213 			case OTHER_OBJ:
9214 				aclp->a_perm = 06;
9215 			}
9216 		}
9217 	}
9218 
9219 	/*
9220 	 * if the frontfile exists, then we always do the work.  but,
9221 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9222 	 * then we don't want to do the work.  otherwise, an `ls -l'
9223 	 * will create tons of emtpy frontfiles.
9224 	 */
9225 
9226 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9227 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9228 	    <= MIN_ACL_ENTRIES)) {
9229 		cp->c_metadata.md_flags |= MD_ACL;
9230 		cp->c_flags |= CN_UPDATED;
9231 		goto out;
9232 	}
9233 
9234 	/*
9235 	 * if we have a default ACL, then we need a
9236 	 * real live directory in the frontfs that we
9237 	 * can apply the ACL to.  if not, then we just
9238 	 * use the frontfile.  we get the frontfile
9239 	 * regardless -- that way, we know the
9240 	 * directory for the frontfile exists.
9241 	 */
9242 
9243 	if (vsecp->vsa_dfaclcnt > 0) {
9244 		if (cp->c_acldirvp == NULL)
9245 			error = cachefs_getacldirvp(cp);
9246 		if (error != 0)
9247 			goto out;
9248 		vp = cp->c_acldirvp;
9249 	} else {
9250 		if (cp->c_frontvp == NULL)
9251 			error = cachefs_getfrontfile(cp);
9252 		if (error != 0)
9253 			goto out;
9254 		vp = cp->c_frontvp;
9255 	}
9256 	ASSERT(vp != NULL);
9257 
9258 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9259 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred);
9260 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9261 	if (error != 0) {
9262 #ifdef CFSDEBUG
9263 		CFS_DEBUG(CFSDEBUG_VOPS)
9264 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9265 			    error);
9266 #endif /* CFSDEBUG */
9267 		/*
9268 		 * If there was an error, we don't want to call
9269 		 * cachefs_nocache(); so, set error to 0.
9270 		 * We will call cachefs_purgeacl(), in order to
9271 		 * clean such things as adjunct ACL directories.
9272 		 */
9273 		cachefs_purgeacl(cp);
9274 		error = 0;
9275 		goto out;
9276 	}
9277 	if (vp == cp->c_frontvp)
9278 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9279 
9280 	cp->c_metadata.md_flags |= MD_ACL;
9281 	cp->c_flags |= CN_UPDATED;
9282 
9283 out:
9284 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9285 		cachefs_nocache(cp);
9286 
9287 	if (gotvsec) {
9288 		if (vsec.vsa_aclcnt)
9289 			kmem_free(vsec.vsa_aclentp,
9290 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9291 		if (vsec.vsa_dfaclcnt)
9292 			kmem_free(vsec.vsa_dfaclentp,
9293 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9294 	} else if (aclkeep != NULL) {
9295 		cachefs_kmem_free(vsecp->vsa_aclentp,
9296 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9297 		vsecp->vsa_aclentp = aclkeep;
9298 	}
9299 
9300 	return (error);
9301 }
9302 
9303 void
9304 cachefs_purgeacl(cnode_t *cp)
9305 {
9306 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9307 
9308 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9309 
9310 	if (cp->c_acldirvp != NULL) {
9311 		VN_RELE(cp->c_acldirvp);
9312 		cp->c_acldirvp = NULL;
9313 	}
9314 
9315 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9316 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9317 
9318 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9319 		make_ascii_name(&cp->c_id, name);
9320 		(void) strcat(name, ".d");
9321 
9322 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9323 		    cp->c_filegrp->fg_dirvp, kcred);
9324 	}
9325 
9326 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9327 	cp->c_flags |= CN_UPDATED;
9328 }
9329 
9330 static int
9331 cachefs_getacldirvp(cnode_t *cp)
9332 {
9333 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9334 	int error = 0;
9335 
9336 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9337 	ASSERT(cp->c_acldirvp == NULL);
9338 
9339 	if (cp->c_frontvp == NULL)
9340 		error = cachefs_getfrontfile(cp);
9341 	if (error != 0)
9342 		goto out;
9343 
9344 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9345 	make_ascii_name(&cp->c_id, name);
9346 	(void) strcat(name, ".d");
9347 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9348 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred);
9349 	if ((error != 0) && (error != ENOENT))
9350 		goto out;
9351 
9352 	if (error != 0) {
9353 		vattr_t va;
9354 
9355 		va.va_mode = S_IFDIR | 0777;
9356 		va.va_uid = 0;
9357 		va.va_gid = 0;
9358 		va.va_type = VDIR;
9359 		va.va_mask = AT_TYPE | AT_MODE |
9360 		    AT_UID | AT_GID;
9361 		error =
9362 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9363 			name, &va, &cp->c_acldirvp, kcred);
9364 		if (error != 0)
9365 			goto out;
9366 	}
9367 
9368 	ASSERT(cp->c_acldirvp != NULL);
9369 	cp->c_metadata.md_flags |= MD_ACLDIR;
9370 	cp->c_flags |= CN_UPDATED;
9371 
9372 out:
9373 	if (error != 0)
9374 		cp->c_acldirvp = NULL;
9375 	return (error);
9376 }
9377 
9378 static int
9379 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9380 {
9381 	aclent_t *aclp;
9382 	int error = 0;
9383 	vnode_t *vp = NULL;
9384 	int i;
9385 
9386 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9387 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9388 	ASSERT(vsec->vsa_aclentp == NULL);
9389 
9390 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9391 		if (cp->c_acldirvp == NULL)
9392 			error = cachefs_getacldirvp(cp);
9393 		if (error != 0)
9394 			goto out;
9395 		vp = cp->c_acldirvp;
9396 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9397 		if (cp->c_frontvp == NULL)
9398 			error = cachefs_getfrontfile(cp);
9399 		if (error != 0)
9400 			goto out;
9401 		vp = cp->c_frontvp;
9402 	} else {
9403 
9404 		/*
9405 		 * if we get here, then we know that MD_ACL is on,
9406 		 * meaning an ACL was successfully cached.  we also
9407 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9408 		 * this has to be an entry without a `real' ACL.
9409 		 * thus, we forge whatever is necessary.
9410 		 */
9411 
9412 		if (vsec->vsa_mask & VSA_ACLCNT)
9413 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9414 
9415 		if (vsec->vsa_mask & VSA_ACL) {
9416 			vsec->vsa_aclentp =
9417 			    kmem_zalloc(MIN_ACL_ENTRIES *
9418 			    sizeof (aclent_t), KM_SLEEP);
9419 			aclp = (aclent_t *)vsec->vsa_aclentp;
9420 			aclp->a_type = USER_OBJ;
9421 			++aclp;
9422 			aclp->a_type = GROUP_OBJ;
9423 			++aclp;
9424 			aclp->a_type = OTHER_OBJ;
9425 			++aclp;
9426 			aclp->a_type = CLASS_OBJ;
9427 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9428 			    sizeof (aclent_t), cmp2acls);
9429 		}
9430 
9431 		ASSERT(vp == NULL);
9432 	}
9433 
9434 	if (vp != NULL) {
9435 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred)) != 0) {
9436 #ifdef CFSDEBUG
9437 			CFS_DEBUG(CFSDEBUG_VOPS)
9438 				printf("cachefs_getaclfromcache: error %d\n",
9439 				    error);
9440 #endif /* CFSDEBUG */
9441 			goto out;
9442 		}
9443 	}
9444 
9445 	if (vsec->vsa_aclentp != NULL) {
9446 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9447 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9448 			switch (aclp->a_type) {
9449 			case USER_OBJ:
9450 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9451 				aclp->a_perm =
9452 				    cp->c_metadata.md_vattr.va_mode & 0700;
9453 				aclp->a_perm >>= 6;
9454 				break;
9455 
9456 			case GROUP_OBJ:
9457 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9458 				aclp->a_perm =
9459 				    cp->c_metadata.md_vattr.va_mode & 070;
9460 				aclp->a_perm >>= 3;
9461 				break;
9462 
9463 			case OTHER_OBJ:
9464 				aclp->a_perm =
9465 				    cp->c_metadata.md_vattr.va_mode & 07;
9466 				break;
9467 
9468 			case CLASS_OBJ:
9469 				aclp->a_perm =
9470 				    cp->c_metadata.md_aclclass;
9471 				break;
9472 			}
9473 		}
9474 	}
9475 
9476 out:
9477 
9478 	if (error != 0)
9479 		cachefs_nocache(cp);
9480 
9481 	return (error);
9482 }
9483 
9484 /*
9485  * Fills in targp with attribute information from srcp, cp
9486  * and if necessary the system.
9487  */
9488 static void
9489 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9490 {
9491 	time_t	now;
9492 
9493 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9494 
9495 	/*
9496 	 * Add code to fill in the va struct.  We use the fields from
9497 	 * the srcp struct if they are populated, otherwise we guess
9498 	 */
9499 
9500 	targp->va_mask = 0;	/* initialize all fields */
9501 	targp->va_mode = srcp->va_mode;
9502 	targp->va_type = srcp->va_type;
9503 	targp->va_nlink = 1;
9504 	targp->va_nodeid = 0;
9505 
9506 	if (srcp->va_mask & AT_UID)
9507 		targp->va_uid = srcp->va_uid;
9508 	else
9509 		targp->va_uid = crgetuid(cr);
9510 
9511 	if (srcp->va_mask & AT_GID)
9512 		targp->va_gid = srcp->va_gid;
9513 	else
9514 		targp->va_gid = crgetgid(cr);
9515 
9516 	if (srcp->va_mask & AT_FSID)
9517 		targp->va_fsid = srcp->va_fsid;
9518 	else
9519 		targp->va_fsid = 0;	/* initialize all fields */
9520 
9521 	now = gethrestime_sec();
9522 	if (srcp->va_mask & AT_ATIME)
9523 		targp->va_atime = srcp->va_atime;
9524 	else
9525 		targp->va_atime.tv_sec = now;
9526 
9527 	if (srcp->va_mask & AT_MTIME)
9528 		targp->va_mtime = srcp->va_mtime;
9529 	else
9530 		targp->va_mtime.tv_sec = now;
9531 
9532 	if (srcp->va_mask & AT_CTIME)
9533 		targp->va_ctime = srcp->va_ctime;
9534 	else
9535 		targp->va_ctime.tv_sec = now;
9536 
9537 
9538 	if (srcp->va_mask & AT_SIZE)
9539 		targp->va_size = srcp->va_size;
9540 	else
9541 		targp->va_size = 0;
9542 
9543 	/*
9544 	 * the remaing fields are set by the fs and not changable.
9545 	 * we populate these entries useing the parent directory
9546 	 * values.  It's a small hack, but should work.
9547 	 */
9548 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9549 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9550 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9551 	targp->va_seq = 0; /* Never keep the sequence number */
9552 }
9553 
9554 /*
9555  * set the gid for a newly created file.  The algorithm is as follows:
9556  *
9557  *	1) If the gid is set in the attribute list, then use it if
9558  *	   the caller is privileged, belongs to the target group, or
9559  *	   the group is the same as the parent directory.
9560  *
9561  *	2) If the parent directory's set-gid bit is clear, then use
9562  *	   the process gid
9563  *
9564  *	3) Otherwise, use the gid of the parent directory.
9565  *
9566  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9567  * this routine.
9568  */
9569 static void
9570 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9571 {
9572 	if ((vap->va_mask & AT_GID) &&
9573 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9574 	    groupmember(vap->va_gid, cr) ||
9575 	    secpolicy_vnode_create_gid(cr) != 0)) {
9576 		newcp->c_attr.va_gid = vap->va_gid;
9577 	} else {
9578 		if (dcp->c_attr.va_mode & S_ISGID)
9579 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9580 		else
9581 			newcp->c_attr.va_gid = crgetgid(cr);
9582 	}
9583 
9584 	/*
9585 	 * if we're creating a directory, and the parent directory has the
9586 	 * set-GID bit set, set it on the new directory.
9587 	 * Otherwise, if the user is neither privileged nor a member of the
9588 	 * file's new group, clear the file's set-GID bit.
9589 	 */
9590 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9591 		newcp->c_attr.va_mode |= S_ISGID;
9592 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9593 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9594 		newcp->c_attr.va_mode &= ~S_ISGID;
9595 }
9596 
9597 /*
9598  * create an acl for the newly created file.  should be called right
9599  * after cachefs_creategid.
9600  */
9601 
9602 static void
9603 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9604 {
9605 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9606 	vsecattr_t vsec;
9607 	int gotvsec = 0;
9608 	int error = 0; /* placeholder */
9609 	aclent_t *aclp;
9610 	o_mode_t *classp = NULL;
9611 	o_mode_t gunion = 0;
9612 	int i;
9613 
9614 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9615 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9616 		return;
9617 
9618 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9619 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9620 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9621 
9622 	/*
9623 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9624 	 * would hit code paths that isn't hit anywhere else.
9625 	 */
9626 
9627 	bzero(&vsec, sizeof (vsec));
9628 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9629 	error = cachefs_getaclfromcache(dcp, &vsec);
9630 	if (error != 0)
9631 		goto out;
9632 	gotvsec = 1;
9633 
9634 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9635 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9636 			kmem_free(vsec.vsa_aclentp,
9637 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9638 
9639 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9640 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9641 		vsec.vsa_dfaclcnt = 0;
9642 		vsec.vsa_dfaclentp = NULL;
9643 
9644 		if (newcp->c_attr.va_type == VDIR) {
9645 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9646 			    sizeof (aclent_t), KM_SLEEP);
9647 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9648 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9649 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9650 		}
9651 
9652 		/*
9653 		 * this function should be called pretty much after
9654 		 * the rest of the file creation stuff is done.  so,
9655 		 * uid, gid, etc. should be `right'.  we'll go with
9656 		 * that, rather than trying to determine whether to
9657 		 * get stuff from cr or va.
9658 		 */
9659 
9660 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9661 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9662 			switch (aclp->a_type) {
9663 			case DEF_USER_OBJ:
9664 				aclp->a_type = USER_OBJ;
9665 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9666 				aclp->a_perm =
9667 				    newcp->c_metadata.md_vattr.va_mode;
9668 				aclp->a_perm &= 0700;
9669 				aclp->a_perm >>= 6;
9670 				break;
9671 
9672 			case DEF_GROUP_OBJ:
9673 				aclp->a_type = GROUP_OBJ;
9674 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9675 				aclp->a_perm =
9676 				    newcp->c_metadata.md_vattr.va_mode;
9677 				aclp->a_perm &= 070;
9678 				aclp->a_perm >>= 3;
9679 				gunion |= aclp->a_perm;
9680 				break;
9681 
9682 			case DEF_OTHER_OBJ:
9683 				aclp->a_type = OTHER_OBJ;
9684 				aclp->a_perm =
9685 				    newcp->c_metadata.md_vattr.va_mode & 07;
9686 				break;
9687 
9688 			case DEF_CLASS_OBJ:
9689 				aclp->a_type = CLASS_OBJ;
9690 				classp = &(aclp->a_perm);
9691 				break;
9692 
9693 			case DEF_USER:
9694 				aclp->a_type = USER;
9695 				gunion |= aclp->a_perm;
9696 				break;
9697 
9698 			case DEF_GROUP:
9699 				aclp->a_type = GROUP;
9700 				gunion |= aclp->a_perm;
9701 				break;
9702 			}
9703 		}
9704 
9705 		/* XXX is this the POSIX thing to do? */
9706 		if (classp != NULL)
9707 			*classp &= gunion;
9708 
9709 		/*
9710 		 * we don't need to log this; rather, we clear the
9711 		 * MD_ACL bit when we reconnect.
9712 		 */
9713 
9714 		error = cachefs_cacheacl(newcp, &vsec);
9715 		if (error != 0)
9716 			goto out;
9717 	}
9718 
9719 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9720 	newcp->c_metadata.md_flags |= MD_ACL;
9721 	newcp->c_flags |= CN_UPDATED;
9722 
9723 out:
9724 
9725 	if (gotvsec) {
9726 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9727 			kmem_free(vsec.vsa_aclentp,
9728 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9729 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9730 			kmem_free(vsec.vsa_dfaclentp,
9731 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9732 	}
9733 }
9734 
9735 /*
9736  * this is translated from the UFS code for access checking.
9737  */
9738 
9739 static int
9740 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9741 {
9742 	cnode_t *cp = vcp;
9743 	fscache_t *fscp = C_TO_FSCACHE(cp);
9744 	int shift = 0;
9745 
9746 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9747 
9748 	if (mode & VWRITE) {
9749 		/*
9750 		 * Disallow write attempts on read-only
9751 		 * file systems, unless the file is special.
9752 		 */
9753 		struct vnode *vp = CTOV(cp);
9754 		if (vn_is_readonly(vp)) {
9755 			if (!IS_DEVVP(vp)) {
9756 				return (EROFS);
9757 			}
9758 		}
9759 	}
9760 
9761 	/*
9762 	 * if we need to do ACLs, do it.  this works whether anyone
9763 	 * has explicitly made an ACL or not.
9764 	 */
9765 
9766 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9767 	    (cachefs_vtype_aclok(CTOV(cp))))
9768 		return (cachefs_acl_access(cp, mode, cr));
9769 
9770 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9771 		shift += 3;
9772 		if (!groupmember(cp->c_attr.va_gid, cr))
9773 			shift += 3;
9774 	}
9775 
9776 	/* compute missing mode bits */
9777 	mode &= ~(cp->c_attr.va_mode << shift);
9778 
9779 	if (mode == 0)
9780 		return (0);
9781 
9782 	return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode));
9783 }
9784 
9785 /*
9786  * This is transcribed from ufs_acl_access().  If that changes, then
9787  * this should, too.
9788  *
9789  * Check the cnode's ACL's to see if this mode of access is
9790  * allowed; return 0 if allowed, EACCES if not.
9791  *
9792  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9793  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9794  */
9795 
9796 #define	ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \
9797 		    secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM)))
9798 
9799 static int
9800 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9801 {
9802 	int error = 0;
9803 
9804 	fscache_t *fscp = C_TO_FSCACHE(cp);
9805 
9806 	int mask = ~0;
9807 	int ismask = 0;
9808 
9809 	int gperm = 0;
9810 	int ngroup = 0;
9811 
9812 	vsecattr_t vsec;
9813 	int gotvsec = 0;
9814 	aclent_t *aclp;
9815 
9816 	uid_t owner = cp->c_attr.va_uid;
9817 
9818 	int i;
9819 
9820 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9821 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9822 
9823 	/*
9824 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9825 	 * but then i believe we'd be the only thing exercising those
9826 	 * code paths -- probably a bad thing.
9827 	 */
9828 
9829 	bzero(&vsec, sizeof (vsec));
9830 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9831 
9832 	/* XXX KLUDGE! correct insidious 0-class problem */
9833 	if (cp->c_metadata.md_aclclass == 0 &&
9834 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9835 		cachefs_purgeacl(cp);
9836 again:
9837 	if (cp->c_metadata.md_flags & MD_ACL) {
9838 		error = cachefs_getaclfromcache(cp, &vsec);
9839 		if (error != 0) {
9840 #ifdef CFSDEBUG
9841 			if (error != ETIMEDOUT)
9842 				CFS_DEBUG(CFSDEBUG_VOPS)
9843 					printf("cachefs_acl_access():"
9844 					    "error %d from getaclfromcache()\n",
9845 					    error);
9846 #endif /* CFSDEBUG */
9847 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9848 				goto again;
9849 			} else {
9850 				goto out;
9851 			}
9852 		}
9853 	} else {
9854 		if (cp->c_backvp == NULL) {
9855 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9856 				error = cachefs_getbackvp(fscp, cp);
9857 			else
9858 				error = ETIMEDOUT;
9859 		}
9860 		if (error == 0)
9861 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr);
9862 		if (error != 0) {
9863 #ifdef CFSDEBUG
9864 			CFS_DEBUG(CFSDEBUG_VOPS)
9865 				printf("cachefs_acl_access():"
9866 				    "error %d from getsecattr(backvp)\n",
9867 				    error);
9868 #endif /* CFSDEBUG */
9869 			goto out;
9870 		}
9871 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9872 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9873 			(void) cachefs_cacheacl(cp, &vsec);
9874 	}
9875 	gotvsec = 1;
9876 
9877 	ASSERT(error == 0);
9878 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9879 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9880 		switch (aclp->a_type) {
9881 		case USER_OBJ:
9882 			/*
9883 			 * this might look cleaner in the 2nd loop
9884 			 * below, but we do it here as an
9885 			 * optimization.
9886 			 */
9887 
9888 			owner = aclp->a_id;
9889 			if (crgetuid(cr) == owner) {
9890 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9891 							cr, cp);
9892 				goto out;
9893 			}
9894 			break;
9895 
9896 		case CLASS_OBJ:
9897 			mask = aclp->a_perm;
9898 			ismask = 1;
9899 			break;
9900 		}
9901 	}
9902 
9903 	ASSERT(error == 0);
9904 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9905 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9906 		switch (aclp->a_type) {
9907 		case USER:
9908 			if (crgetuid(cr) == aclp->a_id) {
9909 				error = ACL_MODE_CHECK(mode,
9910 					(aclp->a_perm & mask) << 6, cr, cp);
9911 				goto out;
9912 			}
9913 			break;
9914 
9915 		case GROUP_OBJ:
9916 			if (groupmember(aclp->a_id, cr)) {
9917 				++ngroup;
9918 				gperm |= aclp->a_perm;
9919 				if (! ismask) {
9920 					error = ACL_MODE_CHECK(mode,
9921 							aclp->a_perm << 6,
9922 							cr, cp);
9923 					goto out;
9924 				}
9925 			}
9926 			break;
9927 
9928 		case GROUP:
9929 			if (groupmember(aclp->a_id, cr)) {
9930 				++ngroup;
9931 				gperm |= aclp->a_perm;
9932 			}
9933 			break;
9934 
9935 		case OTHER_OBJ:
9936 			if (ngroup == 0) {
9937 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9938 						cr, cp);
9939 				goto out;
9940 			}
9941 			break;
9942 
9943 		default:
9944 			break;
9945 		}
9946 	}
9947 
9948 	ASSERT(ngroup > 0);
9949 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9950 
9951 out:
9952 	if (gotvsec) {
9953 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
9954 			kmem_free(vsec.vsa_aclentp,
9955 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9956 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
9957 			kmem_free(vsec.vsa_dfaclentp,
9958 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9959 	}
9960 
9961 	return (error);
9962 }
9963 
9964 /*
9965  * see if permissions allow for removal of the given file from
9966  * the given directory.
9967  */
9968 static int
9969 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
9970 {
9971 	uid_t uid;
9972 	/*
9973 	 * If the containing directory is sticky, the user must:
9974 	 *  - own the directory, or
9975 	 *  - own the file, or
9976 	 *  - be able to write the file (if it's a plain file), or
9977 	 *  - be sufficiently privileged.
9978 	 */
9979 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
9980 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
9981 	    (uid != cp->c_attr.va_uid) &&
9982 	    (cp->c_attr.va_type != VREG ||
9983 	    cachefs_access_local(cp, VWRITE, cr) != 0))
9984 		return (secpolicy_vnode_remove(cr));
9985 
9986 	return (0);
9987 }
9988 
9989 /*
9990  * Returns a new name, may even be unique.
9991  * Stolen from nfs code.
9992  * Since now we will use renaming to .cfs* in place of .nfs*
9993  * for CacheFS. Both NFS and CacheFS will rename opened files.
9994  */
9995 static char cachefs_prefix[] = ".cfs";
9996 kmutex_t cachefs_newnum_lock;
9997 
9998 static char *
9999 cachefs_newname(void)
10000 {
10001 	static uint_t newnum = 0;
10002 	char *news;
10003 	char *s, *p;
10004 	uint_t id;
10005 
10006 	mutex_enter(&cachefs_newnum_lock);
10007 	if (newnum == 0) {
10008 		newnum = gethrestime_sec() & 0xfffff;
10009 		newnum |= 0x10000;
10010 	}
10011 	id = newnum++;
10012 	mutex_exit(&cachefs_newnum_lock);
10013 
10014 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10015 	s = news;
10016 	p = cachefs_prefix;
10017 	while (*p != '\0')
10018 		*s++ = *p++;
10019 	while (id != 0) {
10020 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10021 		id >>= 4;
10022 	}
10023 	*s = '\0';
10024 	return (news);
10025 }
10026 
10027 /*
10028  * Called to rename the specified file to a temporary file so
10029  * operations to the file after remove work.
10030  * Must call this routine with the dir c_rwlock held as a writer.
10031  */
10032 static int
10033 /*ARGSUSED*/
10034 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10035 {
10036 	cnode_t *cp = VTOC(vp);
10037 	char *tmpname;
10038 	fscache_t *fscp = C_TO_FSCACHE(cp);
10039 	int error;
10040 
10041 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10042 
10043 	/* get the new name for the file */
10044 	tmpname = cachefs_newname();
10045 
10046 	/* do the link */
10047 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10048 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10049 	else
10050 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10051 	if (error) {
10052 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10053 		return (error);
10054 	}
10055 
10056 	mutex_enter(&cp->c_statelock);
10057 	if (cp->c_unldvp) {
10058 		VN_RELE(cp->c_unldvp);
10059 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10060 		crfree(cp->c_unlcred);
10061 	}
10062 
10063 	VN_HOLD(dvp);
10064 	cp->c_unldvp = dvp;
10065 	crhold(cr);
10066 	cp->c_unlcred = cr;
10067 	cp->c_unlname = tmpname;
10068 
10069 	/* drop the backvp so NFS does not also do a rename */
10070 	mutex_exit(&cp->c_statelock);
10071 
10072 	return (0);
10073 }
10074 
10075 /*
10076  * Marks the cnode as modified.
10077  */
10078 static void
10079 cachefs_modified(cnode_t *cp)
10080 {
10081 	fscache_t *fscp = C_TO_FSCACHE(cp);
10082 	struct vattr va;
10083 	int error;
10084 
10085 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10086 	ASSERT(cp->c_metadata.md_rlno);
10087 
10088 	/* if not on the modify list */
10089 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10090 		/* put on modified list, also marks the file as modified */
10091 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10092 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10093 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10094 		cp->c_flags |= CN_UPDATED;
10095 
10096 		/* if a modified regular file that is not local */
10097 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10098 		    (cp->c_metadata.md_flags & MD_FILE) &&
10099 		    (cp->c_attr.va_type == VREG)) {
10100 
10101 			if (cp->c_frontvp == NULL)
10102 				(void) cachefs_getfrontfile(cp);
10103 			if (cp->c_frontvp) {
10104 				/* identify file so fsck knows it is modified */
10105 				va.va_mode = 0766;
10106 				va.va_mask = AT_MODE;
10107 				error = VOP_SETATTR(cp->c_frontvp, &va,
10108 				    0, kcred, NULL);
10109 				if (error) {
10110 					cmn_err(CE_WARN,
10111 					    "Cannot change ff mode.\n");
10112 				}
10113 			}
10114 		}
10115 	}
10116 }
10117 
10118 /*
10119  * Marks the cnode as modified.
10120  * Allocates a rl slot for the cnode if necessary.
10121  * Returns 0 for success, !0 if cannot get an rl slot.
10122  */
10123 static int
10124 cachefs_modified_alloc(cnode_t *cp)
10125 {
10126 	fscache_t *fscp = C_TO_FSCACHE(cp);
10127 	filegrp_t *fgp = cp->c_filegrp;
10128 	int error;
10129 	rl_entry_t rl_ent;
10130 
10131 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10132 
10133 	/* get the rl slot if needed */
10134 	if (cp->c_metadata.md_rlno == 0) {
10135 		/* get a metadata slot if we do not have one yet */
10136 		if (cp->c_flags & CN_ALLOC_PENDING) {
10137 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10138 				(void) filegrp_allocattr(cp->c_filegrp);
10139 			}
10140 			error = filegrp_create_metadata(cp->c_filegrp,
10141 			    &cp->c_metadata, &cp->c_id);
10142 			if (error)
10143 				return (error);
10144 			cp->c_flags &= ~CN_ALLOC_PENDING;
10145 		}
10146 
10147 		/* get a free rl entry */
10148 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10149 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10150 		rl_ent.rl_fsid = fscp->fs_cfsid;
10151 		rl_ent.rl_attrc = 0;
10152 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10153 		    &cp->c_metadata.md_rlno);
10154 		if (error)
10155 			return (error);
10156 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10157 
10158 		/* hold the filegrp so the attrcache file is not gc */
10159 		error = filegrp_ffhold(fgp);
10160 		if (error) {
10161 			cachefs_rlent_moveto(fscp->fs_cache,
10162 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10163 			cp->c_metadata.md_rlno = 0;
10164 			return (error);
10165 		}
10166 	}
10167 	cachefs_modified(cp);
10168 	return (0);
10169 }
10170 
10171 int
10172 cachefs_vtype_aclok(vnode_t *vp)
10173 {
10174 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10175 
10176 	if (vp->v_type == VNON)
10177 		return (0);
10178 
10179 	for (vtp = oktypes; *vtp != VNON; vtp++)
10180 		if (vp->v_type == *vtp)
10181 			break;
10182 
10183 	return (*vtp != VNON);
10184 }
10185 
10186 static int
10187 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
10188 {
10189 	int error = 0;
10190 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10191 
10192 	/* Assert cachefs compatibility if NFSv4 is in use */
10193 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10194 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10195 
10196 	if (cmd == _PC_FILESIZEBITS) {
10197 		u_offset_t maxsize = fscp->fs_offmax;
10198 		(*valp) = 0;
10199 		while (maxsize != 0) {
10200 			maxsize >>= 1;
10201 			(*valp)++;
10202 		}
10203 		(*valp)++;
10204 	} else
10205 		error = fs_pathconf(vp, cmd, valp, cr);
10206 
10207 	return (error);
10208 }
10209