xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision 4a6ec905b96eb96a398c346f59e034a90ce8ad37)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/time.h>
35 #include <sys/vnode.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/file.h>
39 #include <sys/filio.h>
40 #include <sys/uio.h>
41 #include <sys/buf.h>
42 #include <sys/mman.h>
43 #include <sys/tiuser.h>
44 #include <sys/pathname.h>
45 #include <sys/dirent.h>
46 #include <sys/conf.h>
47 #include <sys/debug.h>
48 #include <sys/vmsystm.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/swap.h>
52 #include <sys/errno.h>
53 #include <sys/sysmacros.h>
54 #include <sys/disp.h>
55 #include <sys/kmem.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vtrace.h>
58 #include <sys/mount.h>
59 #include <sys/bootconf.h>
60 #include <sys/dnlc.h>
61 #include <sys/stat.h>
62 #include <sys/acl.h>
63 #include <sys/policy.h>
64 #include <rpc/types.h>
65 
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/rm.h>
74 #include <sys/fs/cachefs_fs.h>
75 #include <sys/fs/cachefs_dir.h>
76 #include <sys/fs/cachefs_dlog.h>
77 #include <sys/fs/cachefs_ioctl.h>
78 #include <sys/fs/cachefs_log.h>
79 #include <fs/fs_subr.h>
80 
81 int cachefs_dnlc;	/* use dnlc, debugging */
82 
83 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
84     cred_t *cr);
85 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
86     cred_t *cr);
87 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
88 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
89 static int cachefs_getacldirvp(cnode_t *cp);
90 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
91 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
92 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
93 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
94     u_offset_t iooff, cred_t *cr);
95 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
96     u_offset_t iooff, cred_t *cr);
97 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
98     cred_t *cr, caller_context_t *ct);
99 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
100     int flags, cred_t *cr, caller_context_t *ct);
101 static int cachefs_access_connected(struct vnode *vp, int mode,
102     int flags, cred_t *cr);
103 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
104     cred_t *cr);
105 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
106     char *tnm, cred_t *cr);
107 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
108     vattr_t *tva, char *tnm, cred_t *cr);
109 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
110     cred_t *cr);
111 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
112     char *tnm, cred_t *cr);
113 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
114     vnode_t **vpp, cred_t *cr);
115 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
116     vnode_t **vpp, cred_t *cr);
117 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
118 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
120 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
121     vnode_t *cdir, cred_t *cr, vnode_t *vp);
122 static char *cachefs_newname(void);
123 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
124     cred_t *cr);
125 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
127 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
128     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
129 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
130     int *eofp);
131 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
132     cred_t *cr, int *eofp);
133 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
134 	cred_t *cr, int *eofp);
135 
136 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
137     cred_t *cr, caller_context_t *ct);
138 
139 static	int	cachefs_open(struct vnode **, int, cred_t *,
140 			caller_context_t *);
141 static	int	cachefs_close(struct vnode *, int, int, offset_t,
142 			cred_t *, caller_context_t *);
143 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
144 			caller_context_t *);
145 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
146 			caller_context_t *);
147 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
148 			int *, caller_context_t *);
149 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
150 			cred_t *, caller_context_t *);
151 static	int	cachefs_setattr(struct vnode *, struct vattr *,
152 			int, cred_t *, caller_context_t *);
153 static	int	cachefs_access(struct vnode *, int, int, cred_t *,
154 			caller_context_t *);
155 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
156 			struct pathname *, int, struct vnode *, cred_t *,
157 			caller_context_t *, int *, pathname_t *);
158 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
159 			enum vcexcl, int, struct vnode **, cred_t *, int,
160 			caller_context_t *, vsecattr_t *);
161 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
162 			vattr_t *vap, enum vcexcl exclusive, int mode,
163 			vnode_t **vpp, cred_t *cr);
164 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
165 			vattr_t *vap, enum vcexcl exclusive, int mode,
166 			vnode_t **vpp, cred_t *cr);
167 static	int	cachefs_remove(struct vnode *, char *, cred_t *,
168 			caller_context_t *, int);
169 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
170 			cred_t *, caller_context_t *, int);
171 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
172 			char *, cred_t *, caller_context_t *, int);
173 static	int	cachefs_mkdir(struct vnode *, char *, struct
174 			vattr *, struct vnode **, cred_t *, caller_context_t *,
175 			int, vsecattr_t *);
176 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
177 			cred_t *, caller_context_t *, int);
178 static	int	cachefs_readdir(struct vnode *, struct uio *,
179 			cred_t *, int *, caller_context_t *, int);
180 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
181 			char *, cred_t *, caller_context_t *, int);
182 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
183 			caller_context_t *);
184 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
185 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
186 static	int	cachefs_fsync(struct vnode *, int, cred_t *,
187 			caller_context_t *);
188 static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
189 static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
190 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
191 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
192 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
193 			caller_context_t *);
194 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
195 			int, offset_t, struct flk_callback *, cred_t *,
196 			caller_context_t *);
197 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
198 			offset_t, cred_t *, caller_context_t *);
199 static	int	cachefs_realvp(struct vnode *, struct vnode **,
200 			caller_context_t *);
201 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
202 			struct page *[], size_t, struct seg *, caddr_t,
203 			enum seg_rw, cred_t *, caller_context_t *);
204 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
205 			struct page *[], size_t, struct seg *, caddr_t,
206 			enum seg_rw, cred_t *);
207 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
208 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
209 			enum seg_rw, cred_t *);
210 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
211 			cred_t *, caller_context_t *);
212 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
213 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
214 			caller_context_t *);
215 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
216 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
217 			caller_context_t *);
218 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
219 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
220 			caller_context_t *);
221 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
222 			int flag, cred_t *cr, caller_context_t *);
223 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
224 			int flag, cred_t *cr, caller_context_t *);
225 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
226 			cred_t *, caller_context_t *);
227 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
228     cred_t *cr);
229 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
230     int flag, cred_t *cr);
231 
232 static int	cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
233 			caller_context_t *);
234 static int	cachefs_pageio(struct vnode *, page_t *,
235 		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
236 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
237 		    int tcount, struct uio *uiop);
238 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
239 			caller_context_t *);
240 
241 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
242 			cred_t *cr, caller_context_t *ct);
243 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
244 			cred_t *cr, caller_context_t *ct);
245 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
246 			int flags, cred_t *cr, caller_context_t *ct);
247 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
248 			vnode_t *vp);
249 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
250 			size_t len, uint_t *protp, struct page *pl[],
251 			size_t plsz, struct seg *seg, caddr_t addr,
252 			enum seg_rw rw, cred_t *cr);
253 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
254 			size_t len, int flags, cred_t *cr);
255 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
256 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
257 			uchar_t maxprot, uint_t flags, cred_t *cr);
258 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
259 			struct flock64 *bfp, int flag, offset_t offset,
260 			cred_t *cr, caller_context_t *ct);
261 
262 struct vnodeops *cachefs_vnodeops;
263 
264 static const fs_operation_def_t cachefs_vnodeops_template[] = {
265 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
266 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
267 	VOPNAME_READ,		{ .vop_read = cachefs_read },
268 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
269 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
270 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
271 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
272 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
273 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
274 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
275 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
276 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
277 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
278 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
279 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
280 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
281 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
282 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
283 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
284 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
285 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
286 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
287 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
288 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
289 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
290 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
291 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
292 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
293 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
294 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
295 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
296 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
297 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
298 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
299 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
300 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
301 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
302 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
303 	NULL,			NULL
304 };
305 
306 /* forward declarations of statics */
307 static void cachefs_modified(cnode_t *cp);
308 static int cachefs_modified_alloc(cnode_t *cp);
309 
310 int
311 cachefs_init_vnops(char *name)
312 {
313 	return (vn_make_ops(name,
314 	    cachefs_vnodeops_template, &cachefs_vnodeops));
315 }
316 
317 struct vnodeops *
318 cachefs_getvnodeops(void)
319 {
320 	return (cachefs_vnodeops);
321 }
322 
323 static int
324 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
325 {
326 	int error = 0;
327 	cnode_t *cp = VTOC(*vpp);
328 	fscache_t *fscp = C_TO_FSCACHE(cp);
329 	int held = 0;
330 	int type;
331 	int connected = 0;
332 
333 #ifdef CFSDEBUG
334 	CFS_DEBUG(CFSDEBUG_VOPS)
335 		printf("cachefs_open: ENTER vpp %p flag %x\n",
336 		    (void *)vpp, flag);
337 #endif
338 	if (getzoneid() != GLOBAL_ZONEID) {
339 		error = EPERM;
340 		goto out;
341 	}
342 	if ((flag & FWRITE) &&
343 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
344 		error = EISDIR;
345 		goto out;
346 	}
347 
348 	/*
349 	 * Cachefs only provides pass-through support for NFSv4,
350 	 * and all vnode operations are passed through to the
351 	 * back file system. For NFSv4 pass-through to work, only
352 	 * connected operation is supported, the cnode backvp must
353 	 * exist, and cachefs optional (eg., disconnectable) flags
354 	 * are turned off. Assert these conditions to ensure that
355 	 * the backfilesystem is called for the open operation.
356 	 */
357 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
358 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
359 
360 	for (;;) {
361 		/* get (or renew) access to the file system */
362 		if (held) {
363 			/* Won't loop with NFSv4 connected behavior */
364 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
365 			cachefs_cd_release(fscp);
366 			held = 0;
367 		}
368 		error = cachefs_cd_access(fscp, connected, 0);
369 		if (error)
370 			goto out;
371 		held = 1;
372 
373 		mutex_enter(&cp->c_statelock);
374 
375 		/* grab creds if we do not have any yet */
376 		if (cp->c_cred == NULL) {
377 			crhold(cr);
378 			cp->c_cred = cr;
379 		}
380 		cp->c_flags |= CN_NEEDOPEN;
381 
382 		/* if we are disconnected */
383 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
384 			/* if we cannot write to the file system */
385 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
386 				mutex_exit(&cp->c_statelock);
387 				connected = 1;
388 				continue;
389 			}
390 			/*
391 			 * Allow read only requests to continue
392 			 */
393 			if ((flag & (FWRITE|FREAD)) == FREAD) {
394 				/* track the flag for opening the backvp */
395 				cp->c_rdcnt++;
396 				mutex_exit(&cp->c_statelock);
397 				error = 0;
398 				break;
399 			}
400 
401 			/*
402 			 * check credentials  - if this procs
403 			 * credentials don't match the creds in the
404 			 * cnode disallow writing while disconnected.
405 			 */
406 			if (crcmp(cp->c_cred, CRED()) != 0 &&
407 			    secpolicy_vnode_access(CRED(), *vpp,
408 			    cp->c_attr.va_uid, VWRITE) != 0) {
409 				mutex_exit(&cp->c_statelock);
410 				connected = 1;
411 				continue;
412 			}
413 			/* to get here, we know that the WRITE flag is on */
414 			cp->c_wrcnt++;
415 			if (flag & FREAD)
416 				cp->c_rdcnt++;
417 		}
418 
419 		/* else if we are connected */
420 		else {
421 			/* if cannot use the cached copy of the file */
422 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
423 			    ((cp->c_flags & CN_NOCACHE) == 0))
424 				cachefs_nocache(cp);
425 
426 			/* pass open to the back file */
427 			if (cp->c_backvp) {
428 				cp->c_flags &= ~CN_NEEDOPEN;
429 				CFS_DPRINT_BACKFS_NFSV4(fscp,
430 				    ("cachefs_open (nfsv4): cnode %p, "
431 				    "backvp %p\n", cp, cp->c_backvp));
432 				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
433 				if (CFS_TIMEOUT(fscp, error)) {
434 					mutex_exit(&cp->c_statelock);
435 					cachefs_cd_release(fscp);
436 					held = 0;
437 					cachefs_cd_timedout(fscp);
438 					continue;
439 				} else if (error) {
440 					mutex_exit(&cp->c_statelock);
441 					break;
442 				}
443 			} else {
444 				/* backvp will be VOP_OPEN'd later */
445 				if (flag & FREAD)
446 					cp->c_rdcnt++;
447 				if (flag & FWRITE)
448 					cp->c_wrcnt++;
449 			}
450 
451 			/*
452 			 * Now perform a consistency check on the file.
453 			 * If strict consistency then force a check to
454 			 * the backfs even if the timeout has not expired
455 			 * for close-to-open consistency.
456 			 */
457 			type = 0;
458 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
459 				type = C_BACK_CHECK;
460 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
461 			if (CFS_TIMEOUT(fscp, error)) {
462 				mutex_exit(&cp->c_statelock);
463 				cachefs_cd_release(fscp);
464 				held = 0;
465 				cachefs_cd_timedout(fscp);
466 				continue;
467 			}
468 		}
469 		mutex_exit(&cp->c_statelock);
470 		break;
471 	}
472 	if (held)
473 		cachefs_cd_release(fscp);
474 out:
475 #ifdef CFS_CD_DEBUG
476 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
477 #endif
478 #ifdef CFSDEBUG
479 	CFS_DEBUG(CFSDEBUG_VOPS)
480 		printf("cachefs_open: EXIT vpp %p error %d\n",
481 		    (void *)vpp, error);
482 #endif
483 	return (error);
484 }
485 
486 /* ARGSUSED */
487 static int
488 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
489 	caller_context_t *ct)
490 {
491 	int error = 0;
492 	cnode_t *cp = VTOC(vp);
493 	fscache_t *fscp = C_TO_FSCACHE(cp);
494 	int held = 0;
495 	int connected = 0;
496 	int close_cnt = 1;
497 	cachefscache_t *cachep;
498 
499 #ifdef CFSDEBUG
500 	CFS_DEBUG(CFSDEBUG_VOPS)
501 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
502 #endif
503 	/*
504 	 * Cachefs only provides pass-through support for NFSv4,
505 	 * and all vnode operations are passed through to the
506 	 * back file system. For NFSv4 pass-through to work, only
507 	 * connected operation is supported, the cnode backvp must
508 	 * exist, and cachefs optional (eg., disconnectable) flags
509 	 * are turned off. Assert these conditions to ensure that
510 	 * the backfilesystem is called for the close operation.
511 	 */
512 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
513 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
514 
515 	/*
516 	 * File could have been passed in or inherited from the global zone, so
517 	 * we don't want to flat out reject the request; we'll just leave things
518 	 * the way they are and let the backfs (NFS) deal with it.
519 	 */
520 	/* get rid of any local locks */
521 	if (CFS_ISFS_LLOCK(fscp)) {
522 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
523 	}
524 
525 	/* clean up if this is the daemon closing down */
526 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
527 	    ((ttoproc(curthread)->p_pid) != 0) &&
528 	    (vp == fscp->fs_rootvp) &&
529 	    (count == 1)) {
530 		mutex_enter(&fscp->fs_cdlock);
531 		fscp->fs_cddaemonid = 0;
532 		if (fscp->fs_dlogfile)
533 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
534 		else
535 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
536 		cv_broadcast(&fscp->fs_cdwaitcv);
537 		mutex_exit(&fscp->fs_cdlock);
538 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
539 			cachep = fscp->fs_cache;
540 			mutex_enter(&cachep->c_contentslock);
541 			ASSERT(cachep->c_rootdaemonid != 0);
542 			cachep->c_rootdaemonid = 0;
543 			mutex_exit(&cachep->c_contentslock);
544 		}
545 		return (0);
546 	}
547 
548 	for (;;) {
549 		/* get (or renew) access to the file system */
550 		if (held) {
551 			/* Won't loop with NFSv4 connected behavior */
552 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
553 			cachefs_cd_release(fscp);
554 			held = 0;
555 		}
556 		error = cachefs_cd_access(fscp, connected, 0);
557 		if (error)
558 			goto out;
559 		held = 1;
560 		connected = 0;
561 
562 		/* if not the last close */
563 		if (count > 1) {
564 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
565 				goto out;
566 			mutex_enter(&cp->c_statelock);
567 			if (cp->c_backvp) {
568 				CFS_DPRINT_BACKFS_NFSV4(fscp,
569 				    ("cachefs_close (nfsv4): cnode %p, "
570 				    "backvp %p\n", cp, cp->c_backvp));
571 				error = VOP_CLOSE(cp->c_backvp, flag, count,
572 				    offset, cr, ct);
573 				if (CFS_TIMEOUT(fscp, error)) {
574 					mutex_exit(&cp->c_statelock);
575 					cachefs_cd_release(fscp);
576 					held = 0;
577 					cachefs_cd_timedout(fscp);
578 					continue;
579 				}
580 			}
581 			mutex_exit(&cp->c_statelock);
582 			goto out;
583 		}
584 
585 		/*
586 		 * If the file is an unlinked file, then flush the lookup
587 		 * cache so that inactive will be called if this is
588 		 * the last reference.  It will invalidate all of the
589 		 * cached pages, without writing them out.  Writing them
590 		 * out is not required because they will be written to a
591 		 * file which will be immediately removed.
592 		 */
593 		if (cp->c_unldvp != NULL) {
594 			dnlc_purge_vp(vp);
595 			mutex_enter(&cp->c_statelock);
596 			error = cp->c_error;
597 			cp->c_error = 0;
598 			mutex_exit(&cp->c_statelock);
599 			/* always call VOP_CLOSE() for back fs vnode */
600 		}
601 
602 		/* force dirty data to stable storage */
603 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
604 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
605 			/* clean the cachefs pages synchronously */
606 			error = cachefs_putpage_common(vp, (offset_t)0,
607 			    0, 0, cr);
608 			if (CFS_TIMEOUT(fscp, error)) {
609 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
610 					cachefs_cd_release(fscp);
611 					held = 0;
612 					cachefs_cd_timedout(fscp);
613 					continue;
614 				} else {
615 					connected = 1;
616 					continue;
617 				}
618 			}
619 
620 			/* if no space left in cache, wait until connected */
621 			if ((error == ENOSPC) &&
622 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
623 				connected = 1;
624 				continue;
625 			}
626 
627 			/* clear the cnode error if putpage worked */
628 			if ((error == 0) && cp->c_error) {
629 				mutex_enter(&cp->c_statelock);
630 				cp->c_error = 0;
631 				mutex_exit(&cp->c_statelock);
632 			}
633 
634 			/* if any other important error */
635 			if (cp->c_error) {
636 				/* get rid of the pages */
637 				(void) cachefs_putpage_common(vp,
638 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
639 				dnlc_purge_vp(vp);
640 			}
641 		}
642 
643 		mutex_enter(&cp->c_statelock);
644 		if (cp->c_backvp &&
645 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
646 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
647 			    offset, cr, ct);
648 			if (CFS_TIMEOUT(fscp, error)) {
649 				mutex_exit(&cp->c_statelock);
650 				cachefs_cd_release(fscp);
651 				held = 0;
652 				cachefs_cd_timedout(fscp);
653 				/* don't decrement the vnode counts again */
654 				close_cnt = 0;
655 				continue;
656 			}
657 		}
658 		mutex_exit(&cp->c_statelock);
659 		break;
660 	}
661 
662 	mutex_enter(&cp->c_statelock);
663 	if (!error)
664 		error = cp->c_error;
665 	cp->c_error = 0;
666 	mutex_exit(&cp->c_statelock);
667 
668 out:
669 	if (held)
670 		cachefs_cd_release(fscp);
671 #ifdef CFS_CD_DEBUG
672 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
673 #endif
674 
675 #ifdef CFSDEBUG
676 	CFS_DEBUG(CFSDEBUG_VOPS)
677 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
678 #endif
679 	return (error);
680 }
681 
682 /*ARGSUSED*/
683 static int
684 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
685 	caller_context_t *ct)
686 {
687 	struct cnode *cp = VTOC(vp);
688 	fscache_t *fscp = C_TO_FSCACHE(cp);
689 	register u_offset_t off;
690 	register int mapoff;
691 	register caddr_t base;
692 	int n;
693 	offset_t diff;
694 	uint_t flags = 0;
695 	int error = 0;
696 
697 #if 0
698 	if (vp->v_flag & VNOCACHE)
699 		flags = SM_INVAL;
700 #endif
701 	if (getzoneid() != GLOBAL_ZONEID)
702 		return (EPERM);
703 	if (vp->v_type != VREG)
704 		return (EISDIR);
705 
706 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
707 
708 	if (uiop->uio_resid == 0)
709 		return (0);
710 
711 
712 	if (uiop->uio_loffset < (offset_t)0)
713 		return (EINVAL);
714 
715 	/*
716 	 * Call backfilesystem to read if NFSv4, the cachefs code
717 	 * does the read from the back filesystem asynchronously
718 	 * which is not supported by pass-through functionality.
719 	 */
720 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
721 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
722 		goto out;
723 	}
724 
725 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
726 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
727 		    uiop->uio_resid, uiop->uio_fmode, ct);
728 		if (error)
729 			return (error);
730 	}
731 
732 	/*
733 	 * Sit in a loop and transfer (uiomove) the data in up to
734 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
735 	 * address space as needed and then released.
736 	 */
737 	do {
738 		/*
739 		 *	off	Offset of current MAXBSIZE chunk
740 		 *	mapoff	Offset within the current chunk
741 		 *	n	Number of bytes to move from this chunk
742 		 *	base	kernel address of mapped in chunk
743 		 */
744 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
745 		mapoff = uiop->uio_loffset & MAXBOFFSET;
746 		n = MAXBSIZE - mapoff;
747 		if (n > uiop->uio_resid)
748 			n = (uint_t)uiop->uio_resid;
749 
750 		/* perform consistency check */
751 		error = cachefs_cd_access(fscp, 0, 0);
752 		if (error)
753 			break;
754 		mutex_enter(&cp->c_statelock);
755 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
756 		diff = cp->c_size - uiop->uio_loffset;
757 		mutex_exit(&cp->c_statelock);
758 		if (CFS_TIMEOUT(fscp, error)) {
759 			cachefs_cd_release(fscp);
760 			cachefs_cd_timedout(fscp);
761 			error = 0;
762 			continue;
763 		}
764 		cachefs_cd_release(fscp);
765 
766 		if (error)
767 			break;
768 
769 		if (diff <= (offset_t)0)
770 			break;
771 		if (diff < (offset_t)n)
772 			n = diff;
773 
774 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
775 
776 		error = segmap_fault(kas.a_hat, segkmap, base, n,
777 		    F_SOFTLOCK, S_READ);
778 		if (error) {
779 			(void) segmap_release(segkmap, base, 0);
780 			if (FC_CODE(error) == FC_OBJERR)
781 				error =  FC_ERRNO(error);
782 			else
783 				error = EIO;
784 			break;
785 		}
786 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
787 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
788 		    F_SOFTUNLOCK, S_READ);
789 		if (error == 0) {
790 			/*
791 			 * if we read a whole page(s), or to eof,
792 			 *  we won't need this page(s) again soon.
793 			 */
794 			if (n + mapoff == MAXBSIZE ||
795 			    uiop->uio_loffset == cp->c_size)
796 				flags |= SM_DONTNEED;
797 		}
798 		(void) segmap_release(segkmap, base, flags);
799 	} while (error == 0 && uiop->uio_resid > 0);
800 
801 out:
802 #ifdef CFSDEBUG
803 	CFS_DEBUG(CFSDEBUG_VOPS)
804 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
805 		    uiop->uio_resid);
806 #endif
807 	return (error);
808 }
809 
810 /*
811  * cachefs_read_backfs_nfsv4
812  *
813  * Call NFSv4 back filesystem to handle the read (cachefs
814  * pass-through support for NFSv4).
815  */
816 static int
817 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
818 			caller_context_t *ct)
819 {
820 	cnode_t *cp = VTOC(vp);
821 	fscache_t *fscp = C_TO_FSCACHE(cp);
822 	vnode_t *backvp;
823 	int error;
824 
825 	/*
826 	 * For NFSv4 pass-through to work, only connected operation
827 	 * is supported, the cnode backvp must exist, and cachefs
828 	 * optional (eg., disconnectable) flags are turned off. Assert
829 	 * these conditions for the read operation.
830 	 */
831 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
832 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
833 
834 	/* Call backfs vnode op after extracting backvp */
835 	mutex_enter(&cp->c_statelock);
836 	backvp = cp->c_backvp;
837 	mutex_exit(&cp->c_statelock);
838 
839 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
840 	    "backvp %p\n", cp, backvp));
841 
842 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
843 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
844 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
845 
846 	/* Increment cache miss counter */
847 	fscp->fs_stats.st_misses++;
848 
849 	return (error);
850 }
851 
852 /*ARGSUSED*/
853 static int
854 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
855 	caller_context_t *ct)
856 {
857 	struct cnode *cp = VTOC(vp);
858 	fscache_t *fscp = C_TO_FSCACHE(cp);
859 	int error = 0;
860 	u_offset_t off;
861 	caddr_t base;
862 	uint_t bsize;
863 	uint_t flags;
864 	int n, on;
865 	rlim64_t limit = uiop->uio_llimit;
866 	ssize_t resid;
867 	offset_t offset;
868 	offset_t remainder;
869 
870 #ifdef CFSDEBUG
871 	CFS_DEBUG(CFSDEBUG_VOPS)
872 		printf(
873 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
874 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid,
875 		    cp->c_flags);
876 #endif
877 	if (getzoneid() != GLOBAL_ZONEID) {
878 		error = EPERM;
879 		goto out;
880 	}
881 	if (vp->v_type != VREG) {
882 		error = EISDIR;
883 		goto out;
884 	}
885 
886 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
887 
888 	if (uiop->uio_resid == 0) {
889 		goto out;
890 	}
891 
892 	/* Call backfilesystem to write if NFSv4 */
893 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
894 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
895 		goto out2;
896 	}
897 
898 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
899 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
900 		    uiop->uio_resid, uiop->uio_fmode, ct);
901 		if (error)
902 			goto out;
903 	}
904 
905 	if (ioflag & FAPPEND) {
906 		for (;;) {
907 			/* do consistency check to get correct file size */
908 			error = cachefs_cd_access(fscp, 0, 1);
909 			if (error)
910 				goto out;
911 			mutex_enter(&cp->c_statelock);
912 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
913 			uiop->uio_loffset = cp->c_size;
914 			mutex_exit(&cp->c_statelock);
915 			if (CFS_TIMEOUT(fscp, error)) {
916 				cachefs_cd_release(fscp);
917 				cachefs_cd_timedout(fscp);
918 				continue;
919 			}
920 			cachefs_cd_release(fscp);
921 			if (error)
922 				goto out;
923 			break;
924 		}
925 	}
926 
927 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
928 		limit = MAXOFFSET_T;
929 
930 	if (uiop->uio_loffset >= limit) {
931 		proc_t *p = ttoproc(curthread);
932 
933 		mutex_enter(&p->p_lock);
934 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
935 		    p, RCA_UNSAFE_SIGINFO);
936 		mutex_exit(&p->p_lock);
937 		error = EFBIG;
938 		goto out;
939 	}
940 	if (uiop->uio_loffset > fscp->fs_offmax) {
941 		error = EFBIG;
942 		goto out;
943 	}
944 
945 	if (limit > fscp->fs_offmax)
946 		limit = fscp->fs_offmax;
947 
948 	if (uiop->uio_loffset < (offset_t)0) {
949 		error = EINVAL;
950 		goto out;
951 	}
952 
953 	offset = uiop->uio_loffset + uiop->uio_resid;
954 	/*
955 	 * Check to make sure that the process will not exceed
956 	 * its limit on file size.  It is okay to write up to
957 	 * the limit, but not beyond.  Thus, the write which
958 	 * reaches the limit will be short and the next write
959 	 * will return an error.
960 	 */
961 	remainder = 0;
962 	if (offset > limit) {
963 		remainder = (int)(offset - (u_offset_t)limit);
964 		uiop->uio_resid = limit - uiop->uio_loffset;
965 		if (uiop->uio_resid <= 0) {
966 			proc_t *p = ttoproc(curthread);
967 
968 			uiop->uio_resid += remainder;
969 			mutex_enter(&p->p_lock);
970 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
971 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
972 			mutex_exit(&p->p_lock);
973 			error = EFBIG;
974 			goto out;
975 		}
976 	}
977 
978 	resid = uiop->uio_resid;
979 	offset = uiop->uio_loffset;
980 	bsize = vp->v_vfsp->vfs_bsize;
981 
982 	/* loop around and do the write in MAXBSIZE chunks */
983 	do {
984 		/* mapping offset */
985 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
986 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
987 		n = MAXBSIZE - on;
988 		if (n > uiop->uio_resid)
989 			n = (int)uiop->uio_resid;
990 		base = segmap_getmap(segkmap, vp, off);
991 		error = cachefs_writepage(vp, (base + on), n, uiop);
992 		if (error == 0) {
993 			flags = 0;
994 			/*
995 			 * Have written a whole block.Start an
996 			 * asynchronous write and mark the buffer to
997 			 * indicate that it won't be needed again
998 			 * soon.
999 			 */
1000 			if (n + on == bsize) {
1001 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1002 			}
1003 #if 0
1004 			/* XXX need to understand this */
1005 			if ((ioflag & (FSYNC|FDSYNC)) ||
1006 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1007 				flags &= ~SM_ASYNC;
1008 				flags |= SM_WRITE;
1009 			}
1010 #else
1011 			if (ioflag & (FSYNC|FDSYNC)) {
1012 				flags &= ~SM_ASYNC;
1013 				flags |= SM_WRITE;
1014 			}
1015 #endif
1016 			error = segmap_release(segkmap, base, flags);
1017 		} else {
1018 			(void) segmap_release(segkmap, base, 0);
1019 		}
1020 	} while (error == 0 && uiop->uio_resid > 0);
1021 
1022 out:
1023 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1024 		uiop->uio_resid = resid;
1025 		uiop->uio_loffset = offset;
1026 	} else
1027 		uiop->uio_resid += remainder;
1028 
1029 out2:
1030 #ifdef CFSDEBUG
1031 	CFS_DEBUG(CFSDEBUG_VOPS)
1032 		printf("cachefs_write: EXIT error %d\n", error);
1033 #endif
1034 	return (error);
1035 }
1036 
1037 /*
1038  * cachefs_write_backfs_nfsv4
1039  *
1040  * Call NFSv4 back filesystem to handle the write (cachefs
1041  * pass-through support for NFSv4).
1042  */
1043 static int
1044 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1045 			caller_context_t *ct)
1046 {
1047 	cnode_t *cp = VTOC(vp);
1048 	fscache_t *fscp = C_TO_FSCACHE(cp);
1049 	vnode_t *backvp;
1050 	int error;
1051 
1052 	/*
1053 	 * For NFSv4 pass-through to work, only connected operation
1054 	 * is supported, the cnode backvp must exist, and cachefs
1055 	 * optional (eg., disconnectable) flags are turned off. Assert
1056 	 * these conditions for the read operation.
1057 	 */
1058 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1059 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1060 
1061 	/* Call backfs vnode op after extracting the backvp */
1062 	mutex_enter(&cp->c_statelock);
1063 	backvp = cp->c_backvp;
1064 	mutex_exit(&cp->c_statelock);
1065 
1066 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1067 	    "backvp %p\n", cp, backvp));
1068 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1069 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1070 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1071 
1072 	return (error);
1073 }
1074 
1075 /*
1076  * see if we've charged ourselves for frontfile data at
1077  * the given offset.  If not, allocate a block for it now.
1078  */
1079 static int
1080 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1081 {
1082 	u_offset_t blockoff;
1083 	int error;
1084 	int inc;
1085 
1086 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1087 	ASSERT(PAGESIZE <= MAXBSIZE);
1088 
1089 	error = 0;
1090 	blockoff = offset & (offset_t)MAXBMASK;
1091 
1092 	/* get the front file if necessary so allocblocks works */
1093 	if ((cp->c_frontvp == NULL) &&
1094 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1095 		(void) cachefs_getfrontfile(cp);
1096 	}
1097 	if (cp->c_flags & CN_NOCACHE)
1098 		return (1);
1099 
1100 	if (cachefs_check_allocmap(cp, blockoff))
1101 		return (0);
1102 
1103 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1104 		if (cachefs_check_allocmap(cp, blockoff+inc))
1105 			return (0);
1106 
1107 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1108 	    cp->c_metadata.md_rltype);
1109 	if (error == 0) {
1110 		cp->c_metadata.md_frontblks++;
1111 		cp->c_flags |= CN_UPDATED;
1112 	}
1113 	return (error);
1114 }
1115 
1116 /*
1117  * Called only by cachefs_write to write 1 page or less of data.
1118  *	base   - base address kernel addr space
1119  *	tcount - Total bytes to move - < MAXBSIZE
1120  */
1121 static int
1122 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1123 {
1124 	struct cnode *cp =  VTOC(vp);
1125 	fscache_t *fscp = C_TO_FSCACHE(cp);
1126 	register int n;
1127 	register u_offset_t offset;
1128 	int error = 0, terror;
1129 	extern struct as kas;
1130 	u_offset_t lastpage_off;
1131 	int pagecreate = 0;
1132 	int newpage;
1133 
1134 #ifdef CFSDEBUG
1135 	CFS_DEBUG(CFSDEBUG_VOPS)
1136 		printf(
1137 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1138 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1139 #endif
1140 
1141 	/*
1142 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1143 	 * uiomove() because page faults may cause the cache to be invalidated
1144 	 * out from under us.
1145 	 */
1146 	do {
1147 		offset = uiop->uio_loffset;
1148 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1149 
1150 		/*
1151 		 * If not connected then need to make sure we have space
1152 		 * to perform the write.  We could make this check
1153 		 * a little tighter by only doing it if we are growing the file.
1154 		 */
1155 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1156 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1157 			    cp->c_metadata.md_rltype);
1158 			if (error)
1159 				break;
1160 			cachefs_freeblocks(fscp->fs_cache, 1,
1161 			    cp->c_metadata.md_rltype);
1162 		}
1163 
1164 		/*
1165 		 * n is the number of bytes required to satisfy the request
1166 		 * or the number of bytes to fill out the page.
1167 		 */
1168 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1169 		if (n > tcount)
1170 			n = tcount;
1171 
1172 		/*
1173 		 * The number of bytes of data in the last page can not
1174 		 * be accurately be determined while page is being
1175 		 * uiomove'd to and the size of the file being updated.
1176 		 * Thus, inform threads which need to know accurately
1177 		 * how much data is in the last page of the file.  They
1178 		 * will not do the i/o immediately, but will arrange for
1179 		 * the i/o to happen later when this modify operation
1180 		 * will have finished.
1181 		 *
1182 		 * in similar NFS code, this is done right before the
1183 		 * uiomove(), which is best.  but here in cachefs, we
1184 		 * have two uiomove()s, so we must do it here.
1185 		 */
1186 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1187 		mutex_enter(&cp->c_statelock);
1188 		cp->c_flags |= CN_CMODINPROG;
1189 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1190 		mutex_exit(&cp->c_statelock);
1191 
1192 		/*
1193 		 * Check to see if we can skip reading in the page
1194 		 * and just allocate the memory.  We can do this
1195 		 * if we are going to rewrite the entire mapping
1196 		 * or if we are going to write to or beyond the current
1197 		 * end of file from the beginning of the mapping.
1198 		 */
1199 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1200 		    ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1201 		    ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1202 		    ((offset + n) >= cp->c_size))) {
1203 			pagecreate = 1;
1204 
1205 			/*
1206 			 * segmap_pagecreate() returns 1 if it calls
1207 			 * page_create_va() to allocate any pages.
1208 			 */
1209 			newpage = segmap_pagecreate(segkmap,
1210 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1211 			    PAGESIZE, 0);
1212 			/* do not zero page if we are overwriting all of it */
1213 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1214 			    (n == PAGESIZE))) {
1215 				(void) kzero((void *)
1216 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1217 				    PAGESIZE);
1218 			}
1219 			error = uiomove(base, n, UIO_WRITE, uiop);
1220 
1221 			/*
1222 			 * Unlock the page allocated by page_create_va()
1223 			 * in segmap_pagecreate()
1224 			 */
1225 			if (newpage)
1226 				segmap_pageunlock(segkmap,
1227 				    (caddr_t)((uintptr_t)base &
1228 				    (uintptr_t)PAGEMASK),
1229 				    PAGESIZE, S_WRITE);
1230 		} else {
1231 			/*
1232 			 * KLUDGE ! Use segmap_fault instead of faulting and
1233 			 * using as_fault() to avoid a recursive readers lock
1234 			 * on kas.
1235 			 */
1236 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1237 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1238 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1239 			if (error) {
1240 				if (FC_CODE(error) == FC_OBJERR)
1241 					error =  FC_ERRNO(error);
1242 				else
1243 					error = EIO;
1244 				break;
1245 			}
1246 			error = uiomove(base, n, UIO_WRITE, uiop);
1247 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1248 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1249 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1250 		}
1251 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1252 		base += n;
1253 		tcount -= n;
1254 
1255 		/* get access to the file system */
1256 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1257 			error = terror;
1258 			break;
1259 		}
1260 
1261 		/*
1262 		 * cp->c_attr.va_size is the maximum number of
1263 		 * bytes known to be in the file.
1264 		 * Make sure it is at least as high as the
1265 		 * last byte we just wrote into the buffer.
1266 		 */
1267 		mutex_enter(&cp->c_statelock);
1268 		if (cp->c_size < uiop->uio_loffset) {
1269 			cp->c_size = uiop->uio_loffset;
1270 		}
1271 		if (cp->c_size != cp->c_attr.va_size) {
1272 			cp->c_attr.va_size = cp->c_size;
1273 			cp->c_flags |= CN_UPDATED;
1274 		}
1275 		/* c_size is now correct, so we can clear modinprog */
1276 		cp->c_flags &= ~CN_CMODINPROG;
1277 		if (error == 0) {
1278 			cp->c_flags |= CDIRTY;
1279 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1280 				/*
1281 				 * if we're not in NOCACHE mode
1282 				 * (i.e., single-writer), we update the
1283 				 * allocmap here rather than waiting until
1284 				 * cachefspush is called.  This prevents
1285 				 * getpage from clustering up pages from
1286 				 * the backfile and stomping over the changes
1287 				 * we make here.
1288 				 */
1289 				if (cachefs_charge_page(cp, offset) == 0) {
1290 					cachefs_update_allocmap(cp,
1291 					    offset & (offset_t)PAGEMASK,
1292 					    (size_t)PAGESIZE);
1293 				}
1294 
1295 				/* else we ran out of space */
1296 				else {
1297 					/* nocache file if connected */
1298 					if (fscp->fs_cdconnected ==
1299 					    CFS_CD_CONNECTED)
1300 						cachefs_nocache(cp);
1301 					/*
1302 					 * If disconnected then cannot
1303 					 * nocache the file.  Let it have
1304 					 * the space.
1305 					 */
1306 					else {
1307 						cp->c_metadata.md_frontblks++;
1308 						cp->c_flags |= CN_UPDATED;
1309 						cachefs_update_allocmap(cp,
1310 						    offset & (offset_t)PAGEMASK,
1311 						    (size_t)PAGESIZE);
1312 					}
1313 				}
1314 			}
1315 		}
1316 		mutex_exit(&cp->c_statelock);
1317 		cachefs_cd_release(fscp);
1318 	} while (tcount > 0 && error == 0);
1319 
1320 	if (cp->c_flags & CN_CMODINPROG) {
1321 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1322 		mutex_enter(&cp->c_statelock);
1323 		cp->c_flags &= ~CN_CMODINPROG;
1324 		mutex_exit(&cp->c_statelock);
1325 	}
1326 
1327 #ifdef CFS_CD_DEBUG
1328 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1329 #endif
1330 
1331 #ifdef CFSDEBUG
1332 	CFS_DEBUG(CFSDEBUG_VOPS)
1333 		printf("cachefs_writepage: EXIT error %d\n", error);
1334 #endif
1335 
1336 	return (error);
1337 }
1338 
1339 /*
1340  * Pushes out pages to the back and/or front file system.
1341  */
1342 static int
1343 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1344     int flags, cred_t *cr)
1345 {
1346 	struct cnode *cp = VTOC(vp);
1347 	struct buf *bp;
1348 	int error;
1349 	fscache_t *fscp = C_TO_FSCACHE(cp);
1350 	u_offset_t iooff;
1351 	size_t iolen;
1352 	u_offset_t lbn;
1353 	u_offset_t lbn_off;
1354 	uint_t bsize;
1355 
1356 	ASSERT((flags & B_ASYNC) == 0);
1357 	ASSERT(!vn_is_readonly(vp));
1358 	ASSERT(pp != NULL);
1359 	ASSERT(cr != NULL);
1360 
1361 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1362 	lbn = pp->p_offset / bsize;
1363 	lbn_off = lbn * bsize;
1364 
1365 	/*
1366 	 * Find a kluster that fits in one block, or in
1367 	 * one page if pages are bigger than blocks.  If
1368 	 * there is less file space allocated than a whole
1369 	 * page, we'll shorten the i/o request below.
1370 	 */
1371 
1372 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1373 	    roundup(bsize, PAGESIZE), flags);
1374 
1375 	/*
1376 	 * The CN_CMODINPROG flag makes sure that we use a correct
1377 	 * value of c_size, below.  CN_CMODINPROG is set in
1378 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1379 	 * indicates that a uiomove() is in progress and the c_size
1380 	 * has not been made consistent with the new size of the
1381 	 * file. When the uiomove() completes the c_size is updated
1382 	 * and the CN_CMODINPROG flag is cleared.
1383 	 *
1384 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1385 	 * and cachefs_push_connected see a consistent value of
1386 	 * c_size.  Without this handshaking, it is possible that
1387 	 * these routines will pick up the old value of c_size before
1388 	 * the uiomove() in cachefs_writepage() completes.  This will
1389 	 * result in the vn_rdwr() being too small, and data loss.
1390 	 *
1391 	 * More precisely, there is a window between the time the
1392 	 * uiomove() completes and the time the c_size is updated. If
1393 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1394 	 * page will be picked up, because it is dirty; it will be
1395 	 * unlocked, unless it was pagecreate'd. When the page is
1396 	 * picked up as dirty, the dirty bit is reset
1397 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1398 	 * checked.  This will still be the old size.  Therefore, the
1399 	 * page will not be written out to the correct length, and the
1400 	 * page will be clean, so the data may disappear.
1401 	 */
1402 	if (cp->c_flags & CN_CMODINPROG) {
1403 		mutex_enter(&cp->c_statelock);
1404 		if ((cp->c_flags & CN_CMODINPROG) &&
1405 		    cp->c_modaddr + MAXBSIZE > iooff &&
1406 		    cp->c_modaddr < iooff + iolen) {
1407 			page_t *plist;
1408 
1409 			/*
1410 			 * A write is in progress for this region of
1411 			 * the file.  If we did not detect
1412 			 * CN_CMODINPROG here then this path through
1413 			 * cachefs_push_connected() would eventually
1414 			 * do the vn_rdwr() and may not write out all
1415 			 * of the data in the pages.  We end up losing
1416 			 * data. So we decide to set the modified bit
1417 			 * on each page in the page list and mark the
1418 			 * cnode with CDIRTY.  This push will be
1419 			 * restarted at some later time.
1420 			 */
1421 
1422 			plist = pp;
1423 			while (plist != NULL) {
1424 				pp = plist;
1425 				page_sub(&plist, pp);
1426 				hat_setmod(pp);
1427 				page_io_unlock(pp);
1428 				page_unlock(pp);
1429 			}
1430 			cp->c_flags |= CDIRTY;
1431 			mutex_exit(&cp->c_statelock);
1432 			if (offp)
1433 				*offp = iooff;
1434 			if (lenp)
1435 				*lenp = iolen;
1436 			return (0);
1437 		}
1438 		mutex_exit(&cp->c_statelock);
1439 	}
1440 
1441 	/*
1442 	 * Set the pages up for pageout.
1443 	 */
1444 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1445 	if (bp == NULL) {
1446 
1447 		/*
1448 		 * currently, there is no way for pageio_setup() to
1449 		 * return NULL, since it uses its own scheme for
1450 		 * kmem_alloc()ing that shouldn't return NULL, and
1451 		 * since pageio_setup() itself dereferences the thing
1452 		 * it's about to return.  still, we need to be ready
1453 		 * in case this ever does start happening.
1454 		 */
1455 
1456 		error = ENOMEM;
1457 		goto writedone;
1458 	}
1459 	/*
1460 	 * pageio_setup should have set b_addr to 0.  This
1461 	 * is correct since we want to do I/O on a page
1462 	 * boundary.  bp_mapin will use this addr to calculate
1463 	 * an offset, and then set b_addr to the kernel virtual
1464 	 * address it allocated for us.
1465 	 */
1466 	bp->b_edev = 0;
1467 	bp->b_dev = 0;
1468 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1469 	bp_mapin(bp);
1470 
1471 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1472 	if (iolen > bp->b_bcount)
1473 		iolen  = bp->b_bcount;
1474 
1475 	/* if connected */
1476 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1477 		/* write to the back file first */
1478 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1479 
1480 		/* write to the front file if allowed */
1481 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1482 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1483 			/* try to write to the front file */
1484 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1485 		}
1486 	}
1487 
1488 	/* else if disconnected */
1489 	else {
1490 		/* try to write to the front file */
1491 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1492 	}
1493 
1494 	bp_mapout(bp);
1495 	pageio_done(bp);
1496 
1497 writedone:
1498 
1499 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1500 	if (offp)
1501 		*offp = iooff;
1502 	if (lenp)
1503 		*lenp = iolen;
1504 
1505 	/* XXX ask bob mastors how to fix this someday */
1506 	mutex_enter(&cp->c_statelock);
1507 	if (error) {
1508 		if (error == ENOSPC) {
1509 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1510 			    CFS_ISFS_SOFT(fscp)) {
1511 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1512 				cp->c_error = error;
1513 			}
1514 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1515 		    (error != EINTR)) {
1516 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1517 			cp->c_error = error;
1518 		}
1519 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1520 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1521 	}
1522 	mutex_exit(&cp->c_statelock);
1523 
1524 	return (error);
1525 }
1526 
1527 /*
1528  * Pushes out pages to the back file system.
1529  */
1530 static int
1531 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1532     u_offset_t iooff, cred_t *cr)
1533 {
1534 	struct cnode *cp = VTOC(vp);
1535 	int error = 0;
1536 	int mode = 0;
1537 	fscache_t *fscp = C_TO_FSCACHE(cp);
1538 	ssize_t resid;
1539 	vnode_t *backvp;
1540 
1541 	/* get the back file if necessary */
1542 	mutex_enter(&cp->c_statelock);
1543 	if (cp->c_backvp == NULL) {
1544 		error = cachefs_getbackvp(fscp, cp);
1545 		if (error) {
1546 			mutex_exit(&cp->c_statelock);
1547 			goto out;
1548 		}
1549 	}
1550 	backvp = cp->c_backvp;
1551 	VN_HOLD(backvp);
1552 	mutex_exit(&cp->c_statelock);
1553 
1554 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1555 		mode = FSYNC;
1556 
1557 	/* write to the back file */
1558 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1559 	    iolen, iooff, UIO_SYSSPACE, mode,
1560 	    RLIM64_INFINITY, cr, &resid);
1561 	if (error) {
1562 #ifdef CFSDEBUG
1563 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1564 			printf("cachefspush: error %d cr %p\n",
1565 			    error, (void *)cr);
1566 #endif
1567 		bp->b_flags |= B_ERROR;
1568 	}
1569 	VN_RELE(backvp);
1570 out:
1571 	return (error);
1572 }
1573 
1574 /*
1575  * Pushes out pages to the front file system.
1576  * Called for both connected and disconnected states.
1577  */
1578 static int
1579 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1580     u_offset_t iooff, cred_t *cr)
1581 {
1582 	struct cnode *cp = VTOC(vp);
1583 	fscache_t *fscp = C_TO_FSCACHE(cp);
1584 	int error = 0;
1585 	ssize_t resid;
1586 	u_offset_t popoff;
1587 	off_t commit = 0;
1588 	uint_t seq;
1589 	enum cachefs_rl_type type;
1590 	vnode_t *frontvp = NULL;
1591 
1592 	mutex_enter(&cp->c_statelock);
1593 
1594 	if (!CFS_ISFS_NONSHARED(fscp)) {
1595 		error = ETIMEDOUT;
1596 		goto out;
1597 	}
1598 
1599 	/* get the front file if necessary */
1600 	if ((cp->c_frontvp == NULL) &&
1601 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1602 		(void) cachefs_getfrontfile(cp);
1603 	}
1604 	if (cp->c_flags & CN_NOCACHE) {
1605 		error = ETIMEDOUT;
1606 		goto out;
1607 	}
1608 
1609 	/* if disconnected, needs to be populated and have good attributes */
1610 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1611 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1612 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1613 		error = ETIMEDOUT;
1614 		goto out;
1615 	}
1616 
1617 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1618 		if (cachefs_charge_page(cp, popoff)) {
1619 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1620 				cachefs_nocache(cp);
1621 				goto out;
1622 			} else {
1623 				error = ENOSPC;
1624 				goto out;
1625 			}
1626 		}
1627 	}
1628 
1629 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1630 		/* log the first putpage to a file */
1631 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1632 			/* uses open's creds if we have them */
1633 			if (cp->c_cred)
1634 				cr = cp->c_cred;
1635 
1636 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1637 				error = cachefs_dlog_cidmap(fscp);
1638 				if (error) {
1639 					error = ENOSPC;
1640 					goto out;
1641 				}
1642 				cp->c_metadata.md_flags |= MD_MAPPING;
1643 			}
1644 
1645 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1646 			if (commit == 0) {
1647 				/* out of space */
1648 				error = ENOSPC;
1649 				goto out;
1650 			}
1651 
1652 			cp->c_metadata.md_seq = seq;
1653 			type = cp->c_metadata.md_rltype;
1654 			cachefs_modified(cp);
1655 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1656 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1657 			cp->c_flags |= CN_UPDATED;
1658 		}
1659 
1660 		/* subsequent putpages just get a new sequence number */
1661 		else {
1662 			/* but only if it matters */
1663 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1664 				seq = cachefs_dlog_seqnext(fscp);
1665 				if (seq == 0) {
1666 					error = ENOSPC;
1667 					goto out;
1668 				}
1669 				cp->c_metadata.md_seq = seq;
1670 				cp->c_flags |= CN_UPDATED;
1671 				/* XXX maybe should do write_metadata here */
1672 			}
1673 		}
1674 	}
1675 
1676 	frontvp = cp->c_frontvp;
1677 	VN_HOLD(frontvp);
1678 	mutex_exit(&cp->c_statelock);
1679 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1680 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1681 	    RLIM64_INFINITY, kcred, &resid);
1682 	mutex_enter(&cp->c_statelock);
1683 	VN_RELE(frontvp);
1684 	frontvp = NULL;
1685 	if (error) {
1686 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1687 			cachefs_nocache(cp);
1688 			error = 0;
1689 			goto out;
1690 		} else {
1691 			goto out;
1692 		}
1693 	}
1694 
1695 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1696 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1697 	    CN_POPULATION_PENDING);
1698 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1699 		gethrestime(&cp->c_metadata.md_localmtime);
1700 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1701 	}
1702 
1703 out:
1704 	if (commit) {
1705 		/* commit the log record */
1706 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1707 		if (cachefs_dlog_commit(fscp, commit, error)) {
1708 			/*EMPTY*/
1709 			/* XXX fix on panic */
1710 		}
1711 	}
1712 
1713 	if (error && commit) {
1714 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1715 		cachefs_rlent_moveto(fscp->fs_cache, type,
1716 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1717 		cp->c_metadata.md_rltype = type;
1718 		cp->c_flags |= CN_UPDATED;
1719 	}
1720 	mutex_exit(&cp->c_statelock);
1721 	return (error);
1722 }
1723 
1724 /*ARGSUSED*/
1725 static int
1726 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1727     caller_context_t *ct)
1728 {
1729 	return (ENOSYS); /* should we panic if we get here? */
1730 }
1731 
1732 /*ARGSUSED*/
1733 static int
1734 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1735 	int *rvalp, caller_context_t *ct)
1736 {
1737 	int error;
1738 	struct cnode *cp = VTOC(vp);
1739 	struct fscache *fscp = C_TO_FSCACHE(cp);
1740 	struct cachefscache *cachep;
1741 	extern kmutex_t cachefs_cachelock;
1742 	extern cachefscache_t *cachefs_cachelist;
1743 	cachefsio_pack_t *packp;
1744 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1745 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1746 	void *dinp, *doutp;
1747 	int (*dcmd_routine)(vnode_t *, void *, void *);
1748 
1749 	if (getzoneid() != GLOBAL_ZONEID)
1750 		return (EPERM);
1751 
1752 	/*
1753 	 * Cachefs only provides pass-through support for NFSv4,
1754 	 * and all vnode operations are passed through to the
1755 	 * back file system. For NFSv4 pass-through to work, only
1756 	 * connected operation is supported, the cnode backvp must
1757 	 * exist, and cachefs optional (eg., disconnectable) flags
1758 	 * are turned off. Assert these conditions which ensure
1759 	 * that only a subset of the ioctls are "truly supported"
1760 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1761 	 * The packing operations are meaningless since there is
1762 	 * no caching for NFSv4, and the called functions silently
1763 	 * return if the backfilesystem is NFSv4. The daemon
1764 	 * commands except for those above are essentially used
1765 	 * for disconnectable operation support (including log
1766 	 * rolling), so in each called function, we assert that
1767 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1768 	 * are from "cfsfstype" which is not a documented
1769 	 * command. However, the command is visible in
1770 	 * /usr/lib/fs/cachefs so the commands are simply let
1771 	 * through (don't seem to impact pass-through functionality).
1772 	 */
1773 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1774 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1775 
1776 	switch (cmd) {
1777 	case CACHEFSIO_PACK:
1778 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1779 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1780 		if (!error)
1781 			error = cachefs_pack(vp, packp->p_name, cred);
1782 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1783 		break;
1784 
1785 	case CACHEFSIO_UNPACK:
1786 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1787 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1788 		if (!error)
1789 			error = cachefs_unpack(vp, packp->p_name, cred);
1790 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1791 		break;
1792 
1793 	case CACHEFSIO_PACKINFO:
1794 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1795 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1796 		if (!error)
1797 			error = cachefs_packinfo(vp, packp->p_name,
1798 			    &packp->p_status, cred);
1799 		if (!error)
1800 			error = xcopyout(packp, (void *)arg,
1801 			    sizeof (cachefsio_pack_t));
1802 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1803 		break;
1804 
1805 	case CACHEFSIO_UNPACKALL:
1806 		error = cachefs_unpackall(vp);
1807 		break;
1808 
1809 	case CACHEFSIO_DCMD:
1810 		/*
1811 		 * This is a private interface between the cachefsd and
1812 		 * this file system.
1813 		 */
1814 
1815 		/* must be root to use these commands */
1816 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1817 			return (EPERM);
1818 
1819 		/* get the command packet */
1820 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1821 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1822 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1823 		if (error)
1824 			return (error);
1825 
1826 		/* copy in the data for the operation */
1827 		dinp = NULL;
1828 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1829 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1830 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1831 			    inlen);
1832 			if (error)
1833 				return (error);
1834 		}
1835 
1836 		/* allocate space for the result */
1837 		doutp = NULL;
1838 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1839 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1840 
1841 		/*
1842 		 * Assert NFSv4 only allows the daemonid and getstats
1843 		 * daemon requests
1844 		 */
1845 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1846 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1847 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1848 
1849 		/* get the routine to execute */
1850 		dcmd_routine = NULL;
1851 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1852 		case CFSDCMD_DAEMONID:
1853 			dcmd_routine = cachefs_io_daemonid;
1854 			break;
1855 		case CFSDCMD_STATEGET:
1856 			dcmd_routine = cachefs_io_stateget;
1857 			break;
1858 		case CFSDCMD_STATESET:
1859 			dcmd_routine = cachefs_io_stateset;
1860 			break;
1861 		case CFSDCMD_XWAIT:
1862 			dcmd_routine = cachefs_io_xwait;
1863 			break;
1864 		case CFSDCMD_EXISTS:
1865 			dcmd_routine = cachefs_io_exists;
1866 			break;
1867 		case CFSDCMD_LOSTFOUND:
1868 			dcmd_routine = cachefs_io_lostfound;
1869 			break;
1870 		case CFSDCMD_GETINFO:
1871 			dcmd_routine = cachefs_io_getinfo;
1872 			break;
1873 		case CFSDCMD_CIDTOFID:
1874 			dcmd_routine = cachefs_io_cidtofid;
1875 			break;
1876 		case CFSDCMD_GETATTRFID:
1877 			dcmd_routine = cachefs_io_getattrfid;
1878 			break;
1879 		case CFSDCMD_GETATTRNAME:
1880 			dcmd_routine = cachefs_io_getattrname;
1881 			break;
1882 		case CFSDCMD_GETSTATS:
1883 			dcmd_routine = cachefs_io_getstats;
1884 			break;
1885 		case CFSDCMD_ROOTFID:
1886 			dcmd_routine = cachefs_io_rootfid;
1887 			break;
1888 		case CFSDCMD_CREATE:
1889 			dcmd_routine = cachefs_io_create;
1890 			break;
1891 		case CFSDCMD_REMOVE:
1892 			dcmd_routine = cachefs_io_remove;
1893 			break;
1894 		case CFSDCMD_LINK:
1895 			dcmd_routine = cachefs_io_link;
1896 			break;
1897 		case CFSDCMD_RENAME:
1898 			dcmd_routine = cachefs_io_rename;
1899 			break;
1900 		case CFSDCMD_MKDIR:
1901 			dcmd_routine = cachefs_io_mkdir;
1902 			break;
1903 		case CFSDCMD_RMDIR:
1904 			dcmd_routine = cachefs_io_rmdir;
1905 			break;
1906 		case CFSDCMD_SYMLINK:
1907 			dcmd_routine = cachefs_io_symlink;
1908 			break;
1909 		case CFSDCMD_SETATTR:
1910 			dcmd_routine = cachefs_io_setattr;
1911 			break;
1912 		case CFSDCMD_SETSECATTR:
1913 			dcmd_routine = cachefs_io_setsecattr;
1914 			break;
1915 		case CFSDCMD_PUSHBACK:
1916 			dcmd_routine = cachefs_io_pushback;
1917 			break;
1918 		default:
1919 			error = ENOTTY;
1920 			break;
1921 		}
1922 
1923 		/* execute the routine */
1924 		if (dcmd_routine)
1925 			error = (*dcmd_routine)(vp, dinp, doutp);
1926 
1927 		/* copy out the result */
1928 		if ((error == 0) && doutp)
1929 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1930 			    outlen);
1931 
1932 		/* free allocated memory */
1933 		if (dinp)
1934 			cachefs_kmem_free(dinp, inlen);
1935 		if (doutp)
1936 			cachefs_kmem_free(doutp, outlen);
1937 
1938 		break;
1939 
1940 	case _FIOCOD:
1941 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1942 			error = EPERM;
1943 			break;
1944 		}
1945 
1946 		error = EBUSY;
1947 		if (arg) {
1948 			/* non-zero arg means do all filesystems */
1949 			mutex_enter(&cachefs_cachelock);
1950 			for (cachep = cachefs_cachelist; cachep != NULL;
1951 			    cachep = cachep->c_next) {
1952 				mutex_enter(&cachep->c_fslistlock);
1953 				for (fscp = cachep->c_fslist;
1954 				    fscp != NULL;
1955 				    fscp = fscp->fs_next) {
1956 					if (CFS_ISFS_CODCONST(fscp)) {
1957 						gethrestime(&fscp->fs_cod_time);
1958 						error = 0;
1959 					}
1960 				}
1961 				mutex_exit(&cachep->c_fslistlock);
1962 			}
1963 			mutex_exit(&cachefs_cachelock);
1964 		} else {
1965 			if (CFS_ISFS_CODCONST(fscp)) {
1966 				gethrestime(&fscp->fs_cod_time);
1967 				error = 0;
1968 			}
1969 		}
1970 		break;
1971 
1972 	case _FIOSTOPCACHE:
1973 		error = cachefs_stop_cache(cp);
1974 		break;
1975 
1976 	default:
1977 		error = ENOTTY;
1978 		break;
1979 	}
1980 
1981 	/* return the result */
1982 	return (error);
1983 }
1984 
1985 ino64_t
1986 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1987 {
1988 	ino64_t new;
1989 
1990 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1991 
1992 	for (;;) {
1993 		fscp->fs_info.fi_localfileno++;
1994 		if (fscp->fs_info.fi_localfileno == 0)
1995 			fscp->fs_info.fi_localfileno = 3;
1996 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
1997 
1998 		new = fscp->fs_info.fi_localfileno;
1999 		if (! cachefs_fileno_inuse(fscp, new))
2000 			break;
2001 	}
2002 
2003 	cachefs_inum_register(fscp, old, new);
2004 	cachefs_inum_register(fscp, new, 0);
2005 	return (new);
2006 }
2007 
2008 /*ARGSUSED*/
2009 static int
2010 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2011 	caller_context_t *ct)
2012 {
2013 	struct cnode *cp = VTOC(vp);
2014 	fscache_t *fscp = C_TO_FSCACHE(cp);
2015 	int error = 0;
2016 	int held = 0;
2017 	int connected = 0;
2018 
2019 #ifdef CFSDEBUG
2020 	CFS_DEBUG(CFSDEBUG_VOPS)
2021 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2022 #endif
2023 
2024 	if (getzoneid() != GLOBAL_ZONEID)
2025 		return (EPERM);
2026 
2027 	/* Call backfilesystem getattr if NFSv4 */
2028 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2029 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2030 		goto out;
2031 	}
2032 
2033 	/*
2034 	 * If it has been specified that the return value will
2035 	 * just be used as a hint, and we are only being asked
2036 	 * for size, fsid or rdevid, then return the client's
2037 	 * notion of these values without checking to make sure
2038 	 * that the attribute cache is up to date.
2039 	 * The whole point is to avoid an over the wire GETATTR
2040 	 * call.
2041 	 */
2042 	if (flags & ATTR_HINT) {
2043 		if (vap->va_mask ==
2044 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2045 			if (vap->va_mask | AT_SIZE)
2046 				vap->va_size = cp->c_size;
2047 			/*
2048 			 * Return the FSID of the cachefs filesystem,
2049 			 * not the back filesystem
2050 			 */
2051 			if (vap->va_mask | AT_FSID)
2052 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2053 			if (vap->va_mask | AT_RDEV)
2054 				vap->va_rdev = cp->c_attr.va_rdev;
2055 			return (0);
2056 		}
2057 	}
2058 
2059 	/*
2060 	 * Only need to flush pages if asking for the mtime
2061 	 * and if there any dirty pages.
2062 	 */
2063 	if (vap->va_mask & AT_MTIME) {
2064 		/*EMPTY*/
2065 #if 0
2066 		/*
2067 		 * XXX bob: stolen from nfs code, need to do something similar
2068 		 */
2069 		rp = VTOR(vp);
2070 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2071 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2072 #endif
2073 	}
2074 
2075 	for (;;) {
2076 		/* get (or renew) access to the file system */
2077 		if (held) {
2078 			cachefs_cd_release(fscp);
2079 			held = 0;
2080 		}
2081 		error = cachefs_cd_access(fscp, connected, 0);
2082 		if (error)
2083 			goto out;
2084 		held = 1;
2085 
2086 		/*
2087 		 * If it has been specified that the return value will
2088 		 * just be used as a hint, and we are only being asked
2089 		 * for size, fsid or rdevid, then return the client's
2090 		 * notion of these values without checking to make sure
2091 		 * that the attribute cache is up to date.
2092 		 * The whole point is to avoid an over the wire GETATTR
2093 		 * call.
2094 		 */
2095 		if (flags & ATTR_HINT) {
2096 			if (vap->va_mask ==
2097 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2098 				if (vap->va_mask | AT_SIZE)
2099 					vap->va_size = cp->c_size;
2100 				/*
2101 				 * Return the FSID of the cachefs filesystem,
2102 				 * not the back filesystem
2103 				 */
2104 				if (vap->va_mask | AT_FSID)
2105 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2106 				if (vap->va_mask | AT_RDEV)
2107 					vap->va_rdev = cp->c_attr.va_rdev;
2108 				goto out;
2109 			}
2110 		}
2111 
2112 		mutex_enter(&cp->c_statelock);
2113 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2114 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2115 			mutex_exit(&cp->c_statelock);
2116 			connected = 1;
2117 			continue;
2118 		}
2119 
2120 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2121 		if (CFS_TIMEOUT(fscp, error)) {
2122 			mutex_exit(&cp->c_statelock);
2123 			cachefs_cd_release(fscp);
2124 			held = 0;
2125 			cachefs_cd_timedout(fscp);
2126 			continue;
2127 		}
2128 		if (error) {
2129 			mutex_exit(&cp->c_statelock);
2130 			break;
2131 		}
2132 
2133 		/* check for fileno conflict */
2134 		if ((fscp->fs_inum_size > 0) &&
2135 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2136 			ino64_t fakenum;
2137 
2138 			mutex_exit(&cp->c_statelock);
2139 			mutex_enter(&fscp->fs_fslock);
2140 			fakenum = cachefs_inum_real2fake(fscp,
2141 			    cp->c_attr.va_nodeid);
2142 			if (fakenum == 0) {
2143 				fakenum = cachefs_fileno_conflict(fscp,
2144 				    cp->c_attr.va_nodeid);
2145 			}
2146 			mutex_exit(&fscp->fs_fslock);
2147 
2148 			mutex_enter(&cp->c_statelock);
2149 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2150 			cp->c_metadata.md_localfileno = fakenum;
2151 			cp->c_flags |= CN_UPDATED;
2152 		}
2153 
2154 		/* copy out the attributes */
2155 		*vap = cp->c_attr;
2156 
2157 		/*
2158 		 * return the FSID of the cachefs filesystem,
2159 		 * not the back filesystem
2160 		 */
2161 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2162 
2163 		/* return our idea of the size */
2164 		if (cp->c_size > vap->va_size)
2165 			vap->va_size = cp->c_size;
2166 
2167 		/* overwrite with our version of fileno and timestamps */
2168 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2169 		vap->va_mtime = cp->c_metadata.md_localmtime;
2170 		vap->va_ctime = cp->c_metadata.md_localctime;
2171 
2172 		mutex_exit(&cp->c_statelock);
2173 		break;
2174 	}
2175 out:
2176 	if (held)
2177 		cachefs_cd_release(fscp);
2178 #ifdef CFS_CD_DEBUG
2179 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2180 #endif
2181 
2182 #ifdef CFSDEBUG
2183 	CFS_DEBUG(CFSDEBUG_VOPS)
2184 		printf("cachefs_getattr: EXIT error = %d\n", error);
2185 #endif
2186 	return (error);
2187 }
2188 
2189 /*
2190  * cachefs_getattr_backfs_nfsv4
2191  *
2192  * Call NFSv4 back filesystem to handle the getattr (cachefs
2193  * pass-through support for NFSv4).
2194  */
2195 static int
2196 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2197     int flags, cred_t *cr, caller_context_t *ct)
2198 {
2199 	cnode_t *cp = VTOC(vp);
2200 	fscache_t *fscp = C_TO_FSCACHE(cp);
2201 	vnode_t *backvp;
2202 	int error;
2203 
2204 	/*
2205 	 * For NFSv4 pass-through to work, only connected operation
2206 	 * is supported, the cnode backvp must exist, and cachefs
2207 	 * optional (eg., disconnectable) flags are turned off. Assert
2208 	 * these conditions for the getattr operation.
2209 	 */
2210 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2211 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2212 
2213 	/* Call backfs vnode op after extracting backvp */
2214 	mutex_enter(&cp->c_statelock);
2215 	backvp = cp->c_backvp;
2216 	mutex_exit(&cp->c_statelock);
2217 
2218 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2219 	    " backvp %p\n", cp, backvp));
2220 	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2221 
2222 	/* Update attributes */
2223 	cp->c_attr = *vap;
2224 
2225 	/*
2226 	 * return the FSID of the cachefs filesystem,
2227 	 * not the back filesystem
2228 	 */
2229 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2230 
2231 	return (error);
2232 }
2233 
2234 /*ARGSUSED4*/
2235 static int
2236 cachefs_setattr(
2237 	vnode_t *vp,
2238 	vattr_t *vap,
2239 	int flags,
2240 	cred_t *cr,
2241 	caller_context_t *ct)
2242 {
2243 	cnode_t *cp = VTOC(vp);
2244 	fscache_t *fscp = C_TO_FSCACHE(cp);
2245 	int error;
2246 	int connected;
2247 	int held = 0;
2248 
2249 	if (getzoneid() != GLOBAL_ZONEID)
2250 		return (EPERM);
2251 
2252 	/*
2253 	 * Cachefs only provides pass-through support for NFSv4,
2254 	 * and all vnode operations are passed through to the
2255 	 * back file system. For NFSv4 pass-through to work, only
2256 	 * connected operation is supported, the cnode backvp must
2257 	 * exist, and cachefs optional (eg., disconnectable) flags
2258 	 * are turned off. Assert these conditions to ensure that
2259 	 * the backfilesystem is called for the setattr operation.
2260 	 */
2261 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2262 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2263 
2264 	connected = 0;
2265 	for (;;) {
2266 		/* drop hold on file system */
2267 		if (held) {
2268 			/* Won't loop with NFSv4 connected behavior */
2269 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2270 			cachefs_cd_release(fscp);
2271 			held = 0;
2272 		}
2273 
2274 		/* acquire access to the file system */
2275 		error = cachefs_cd_access(fscp, connected, 1);
2276 		if (error)
2277 			break;
2278 		held = 1;
2279 
2280 		/* perform the setattr */
2281 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2282 		if (error) {
2283 			/* if connected */
2284 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2285 				if (CFS_TIMEOUT(fscp, error)) {
2286 					cachefs_cd_release(fscp);
2287 					held = 0;
2288 					cachefs_cd_timedout(fscp);
2289 					connected = 0;
2290 					continue;
2291 				}
2292 			}
2293 
2294 			/* else must be disconnected */
2295 			else {
2296 				if (CFS_TIMEOUT(fscp, error)) {
2297 					connected = 1;
2298 					continue;
2299 				}
2300 			}
2301 		}
2302 		break;
2303 	}
2304 
2305 	if (held) {
2306 		cachefs_cd_release(fscp);
2307 	}
2308 #ifdef CFS_CD_DEBUG
2309 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2310 #endif
2311 	return (error);
2312 }
2313 
2314 static int
2315 cachefs_setattr_common(
2316 	vnode_t *vp,
2317 	vattr_t *vap,
2318 	int flags,
2319 	cred_t *cr,
2320 	caller_context_t *ct)
2321 {
2322 	cnode_t *cp = VTOC(vp);
2323 	fscache_t *fscp = C_TO_FSCACHE(cp);
2324 	cachefscache_t *cachep = fscp->fs_cache;
2325 	uint_t mask = vap->va_mask;
2326 	int error = 0;
2327 	uint_t bcnt;
2328 
2329 	/* Cannot set these attributes. */
2330 	if (mask & AT_NOSET)
2331 		return (EINVAL);
2332 
2333 	/*
2334 	 * Truncate file.  Must have write permission and not be a directory.
2335 	 */
2336 	if (mask & AT_SIZE) {
2337 		if (vp->v_type == VDIR) {
2338 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2339 				cachefs_log_truncate(cachep, EISDIR,
2340 				    fscp->fs_cfsvfsp,
2341 				    &cp->c_metadata.md_cookie,
2342 				    cp->c_id.cid_fileno,
2343 				    crgetuid(cr), vap->va_size);
2344 			return (EISDIR);
2345 		}
2346 	}
2347 
2348 	/*
2349 	 * Gotta deal with one special case here, where we're setting the
2350 	 * size of the file. First, we zero out part of the page after the
2351 	 * new size of the file. Then we toss (not write) all pages after
2352 	 * page in which the new offset occurs. Note that the NULL passed
2353 	 * in instead of a putapage() fn parameter is correct, since
2354 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2355 	 */
2356 
2357 	rw_enter(&cp->c_rwlock, RW_WRITER);
2358 
2359 	/* sync dirty pages */
2360 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2361 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2362 		if (error == EINTR)
2363 			goto out;
2364 	}
2365 	error = 0;
2366 
2367 	/* if connected */
2368 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2369 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2370 	}
2371 	/* else must be disconnected */
2372 	else {
2373 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2374 	}
2375 	if (error)
2376 		goto out;
2377 
2378 	/*
2379 	 * If the file size has been changed then
2380 	 * toss whole pages beyond the end of the file and zero
2381 	 * the portion of the last page that is beyond the end of the file.
2382 	 */
2383 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2384 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2385 		if (bcnt)
2386 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2387 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2388 		    B_TRUNC | B_INVAL, cr);
2389 	}
2390 
2391 out:
2392 	rw_exit(&cp->c_rwlock);
2393 
2394 	if ((mask & AT_SIZE) &&
2395 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2396 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2397 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2398 		    crgetuid(cr), vap->va_size);
2399 
2400 	return (error);
2401 }
2402 
2403 static int
2404 cachefs_setattr_connected(
2405 	vnode_t *vp,
2406 	vattr_t *vap,
2407 	int flags,
2408 	cred_t *cr,
2409 	caller_context_t *ct)
2410 {
2411 	cnode_t *cp = VTOC(vp);
2412 	fscache_t *fscp = C_TO_FSCACHE(cp);
2413 	uint_t mask = vap->va_mask;
2414 	int error = 0;
2415 	int setsize;
2416 
2417 	mutex_enter(&cp->c_statelock);
2418 
2419 	if (cp->c_backvp == NULL) {
2420 		error = cachefs_getbackvp(fscp, cp);
2421 		if (error)
2422 			goto out;
2423 	}
2424 
2425 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2426 	if (error)
2427 		goto out;
2428 
2429 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2430 	    "backvp %p\n", cp, cp->c_backvp));
2431 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2432 	if (error) {
2433 		goto out;
2434 	}
2435 
2436 	/* if the size of the file is being changed */
2437 	if (mask & AT_SIZE) {
2438 		cp->c_size = vap->va_size;
2439 		error = 0;
2440 		setsize = 0;
2441 
2442 		/* see if okay to try to set the file size */
2443 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2444 		    CFS_ISFS_NONSHARED(fscp)) {
2445 			/* okay to set size if file is populated */
2446 			if (cp->c_metadata.md_flags & MD_POPULATED)
2447 				setsize = 1;
2448 
2449 			/*
2450 			 * Okay to set size if front file exists and setting
2451 			 * file size to zero.
2452 			 */
2453 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2454 			    (vap->va_size == 0))
2455 				setsize = 1;
2456 		}
2457 
2458 		/* if okay to try to set the file size */
2459 		if (setsize) {
2460 			error = 0;
2461 			if (cp->c_frontvp == NULL)
2462 				error = cachefs_getfrontfile(cp);
2463 			if (error == 0)
2464 				error = cachefs_frontfile_size(cp, cp->c_size);
2465 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2466 			/* make sure file gets nocached */
2467 			error = EEXIST;
2468 		}
2469 
2470 		/* if we have to nocache the file */
2471 		if (error) {
2472 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2473 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2474 				cachefs_nocache(cp);
2475 			error = 0;
2476 		}
2477 	}
2478 
2479 	cp->c_flags |= CN_UPDATED;
2480 
2481 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2482 	cp->c_attr.va_mask = AT_ALL;
2483 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2484 	if (error)
2485 		goto out;
2486 
2487 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2488 	cp->c_size = cp->c_attr.va_size;
2489 
2490 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2491 out:
2492 	mutex_exit(&cp->c_statelock);
2493 	return (error);
2494 }
2495 
2496 /*
2497  * perform the setattr on the local file system
2498  */
2499 /*ARGSUSED4*/
2500 static int
2501 cachefs_setattr_disconnected(
2502 	vnode_t *vp,
2503 	vattr_t *vap,
2504 	int flags,
2505 	cred_t *cr,
2506 	caller_context_t *ct)
2507 {
2508 	cnode_t *cp = VTOC(vp);
2509 	fscache_t *fscp = C_TO_FSCACHE(cp);
2510 	int mask;
2511 	int error;
2512 	int newfile;
2513 	off_t commit = 0;
2514 
2515 	if (CFS_ISFS_WRITE_AROUND(fscp))
2516 		return (ETIMEDOUT);
2517 
2518 	/* if we do not have good attributes */
2519 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2520 		return (ETIMEDOUT);
2521 
2522 	/* primary concern is to keep this routine as much like ufs_setattr */
2523 
2524 	mutex_enter(&cp->c_statelock);
2525 
2526 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2527 	    cachefs_access_local, cp);
2528 
2529 	if (error)
2530 		goto out;
2531 
2532 	mask = vap->va_mask;
2533 
2534 	/* if changing the size of the file */
2535 	if (mask & AT_SIZE) {
2536 		if (vp->v_type == VDIR) {
2537 			error = EISDIR;
2538 			goto out;
2539 		}
2540 
2541 		if (vp->v_type == VFIFO) {
2542 			error = 0;
2543 			goto out;
2544 		}
2545 
2546 		if ((vp->v_type != VREG) &&
2547 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2548 			error = EINVAL;
2549 			goto out;
2550 		}
2551 
2552 		if (vap->va_size > fscp->fs_offmax) {
2553 			error = EFBIG;
2554 			goto out;
2555 		}
2556 
2557 		/* if the file is not populated and we are not truncating it */
2558 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2559 		    (vap->va_size != 0)) {
2560 			error = ETIMEDOUT;
2561 			goto out;
2562 		}
2563 
2564 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2565 			error = cachefs_dlog_cidmap(fscp);
2566 			if (error) {
2567 				error = ENOSPC;
2568 				goto out;
2569 			}
2570 			cp->c_metadata.md_flags |= MD_MAPPING;
2571 		}
2572 
2573 		/* log the operation */
2574 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2575 		if (commit == 0) {
2576 			error = ENOSPC;
2577 			goto out;
2578 		}
2579 		cp->c_flags &= ~CN_NOCACHE;
2580 
2581 		/* special case truncating fast sym links */
2582 		if ((vp->v_type == VLNK) &&
2583 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2584 			/* XXX how can we get here */
2585 			/* XXX should update mtime */
2586 			cp->c_size = 0;
2587 			error = 0;
2588 			goto out;
2589 		}
2590 
2591 		/* get the front file, this may create one */
2592 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2593 		if (cp->c_frontvp == NULL) {
2594 			error = cachefs_getfrontfile(cp);
2595 			if (error)
2596 				goto out;
2597 		}
2598 		ASSERT(cp->c_frontvp);
2599 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2600 			/* allocate space for the metadata */
2601 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2602 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2603 			    == 0);
2604 			error = filegrp_write_metadata(cp->c_filegrp,
2605 			    &cp->c_id, &cp->c_metadata);
2606 			if (error)
2607 				goto out;
2608 		}
2609 
2610 		/* change the size of the front file */
2611 		error = cachefs_frontfile_size(cp, vap->va_size);
2612 		if (error)
2613 			goto out;
2614 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2615 		gethrestime(&cp->c_metadata.md_localmtime);
2616 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2617 		cachefs_modified(cp);
2618 		cp->c_flags |= CN_UPDATED;
2619 	}
2620 
2621 	if (mask & AT_MODE) {
2622 		/* mark as modified */
2623 		if (cachefs_modified_alloc(cp)) {
2624 			error = ENOSPC;
2625 			goto out;
2626 		}
2627 
2628 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2629 			error = cachefs_dlog_cidmap(fscp);
2630 			if (error) {
2631 				error = ENOSPC;
2632 				goto out;
2633 			}
2634 			cp->c_metadata.md_flags |= MD_MAPPING;
2635 		}
2636 
2637 		/* log the operation if not already logged */
2638 		if (commit == 0) {
2639 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2640 			if (commit == 0) {
2641 				error = ENOSPC;
2642 				goto out;
2643 			}
2644 		}
2645 
2646 		cp->c_attr.va_mode &= S_IFMT;
2647 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2648 		gethrestime(&cp->c_metadata.md_localctime);
2649 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2650 		cp->c_flags |= CN_UPDATED;
2651 	}
2652 
2653 	if (mask & (AT_UID|AT_GID)) {
2654 
2655 		/* mark as modified */
2656 		if (cachefs_modified_alloc(cp)) {
2657 			error = ENOSPC;
2658 			goto out;
2659 		}
2660 
2661 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2662 			error = cachefs_dlog_cidmap(fscp);
2663 			if (error) {
2664 				error = ENOSPC;
2665 				goto out;
2666 			}
2667 			cp->c_metadata.md_flags |= MD_MAPPING;
2668 		}
2669 
2670 		/* log the operation if not already logged */
2671 		if (commit == 0) {
2672 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2673 			if (commit == 0) {
2674 				error = ENOSPC;
2675 				goto out;
2676 			}
2677 		}
2678 
2679 		if (mask & AT_UID)
2680 			cp->c_attr.va_uid = vap->va_uid;
2681 
2682 		if (mask & AT_GID)
2683 			cp->c_attr.va_gid = vap->va_gid;
2684 		gethrestime(&cp->c_metadata.md_localctime);
2685 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2686 		cp->c_flags |= CN_UPDATED;
2687 	}
2688 
2689 
2690 	if (mask & (AT_MTIME|AT_ATIME)) {
2691 		/* mark as modified */
2692 		if (cachefs_modified_alloc(cp)) {
2693 			error = ENOSPC;
2694 			goto out;
2695 		}
2696 
2697 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2698 			error = cachefs_dlog_cidmap(fscp);
2699 			if (error) {
2700 				error = ENOSPC;
2701 				goto out;
2702 			}
2703 			cp->c_metadata.md_flags |= MD_MAPPING;
2704 		}
2705 
2706 		/* log the operation if not already logged */
2707 		if (commit == 0) {
2708 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2709 			if (commit == 0) {
2710 				error = ENOSPC;
2711 				goto out;
2712 			}
2713 		}
2714 
2715 		if (mask & AT_MTIME) {
2716 			cp->c_metadata.md_localmtime = vap->va_mtime;
2717 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2718 		}
2719 		if (mask & AT_ATIME)
2720 			cp->c_attr.va_atime = vap->va_atime;
2721 		gethrestime(&cp->c_metadata.md_localctime);
2722 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2723 		cp->c_flags |= CN_UPDATED;
2724 	}
2725 
2726 out:
2727 	mutex_exit(&cp->c_statelock);
2728 
2729 	/* commit the log entry */
2730 	if (commit) {
2731 		if (cachefs_dlog_commit(fscp, commit, error)) {
2732 			/*EMPTY*/
2733 			/* XXX bob: fix on panic */
2734 		}
2735 	}
2736 	return (error);
2737 }
2738 
2739 /* ARGSUSED */
2740 static int
2741 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2742 	caller_context_t *ct)
2743 {
2744 	cnode_t *cp = VTOC(vp);
2745 	fscache_t *fscp = C_TO_FSCACHE(cp);
2746 	int error;
2747 	int held = 0;
2748 	int connected = 0;
2749 
2750 #ifdef CFSDEBUG
2751 	CFS_DEBUG(CFSDEBUG_VOPS)
2752 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2753 #endif
2754 	if (getzoneid() != GLOBAL_ZONEID) {
2755 		error = EPERM;
2756 		goto out;
2757 	}
2758 
2759 	/*
2760 	 * Cachefs only provides pass-through support for NFSv4,
2761 	 * and all vnode operations are passed through to the
2762 	 * back file system. For NFSv4 pass-through to work, only
2763 	 * connected operation is supported, the cnode backvp must
2764 	 * exist, and cachefs optional (eg., disconnectable) flags
2765 	 * are turned off. Assert these conditions to ensure that
2766 	 * the backfilesystem is called for the access operation.
2767 	 */
2768 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2769 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2770 
2771 	for (;;) {
2772 		/* get (or renew) access to the file system */
2773 		if (held) {
2774 			/* Won't loop with NFSv4 connected behavior */
2775 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2776 			cachefs_cd_release(fscp);
2777 			held = 0;
2778 		}
2779 		error = cachefs_cd_access(fscp, connected, 0);
2780 		if (error)
2781 			break;
2782 		held = 1;
2783 
2784 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2785 			error = cachefs_access_connected(vp, mode, flags,
2786 			    cr);
2787 			if (CFS_TIMEOUT(fscp, error)) {
2788 				cachefs_cd_release(fscp);
2789 				held = 0;
2790 				cachefs_cd_timedout(fscp);
2791 				connected = 0;
2792 				continue;
2793 			}
2794 		} else {
2795 			mutex_enter(&cp->c_statelock);
2796 			error = cachefs_access_local(cp, mode, cr);
2797 			mutex_exit(&cp->c_statelock);
2798 			if (CFS_TIMEOUT(fscp, error)) {
2799 				if (cachefs_cd_access_miss(fscp)) {
2800 					mutex_enter(&cp->c_statelock);
2801 					if (cp->c_backvp == NULL) {
2802 						(void) cachefs_getbackvp(fscp,
2803 						    cp);
2804 					}
2805 					mutex_exit(&cp->c_statelock);
2806 					error = cachefs_access_connected(vp,
2807 					    mode, flags, cr);
2808 					if (!CFS_TIMEOUT(fscp, error))
2809 						break;
2810 					delay(5*hz);
2811 					connected = 0;
2812 					continue;
2813 				}
2814 				connected = 1;
2815 				continue;
2816 			}
2817 		}
2818 		break;
2819 	}
2820 	if (held)
2821 		cachefs_cd_release(fscp);
2822 #ifdef CFS_CD_DEBUG
2823 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2824 #endif
2825 out:
2826 #ifdef CFSDEBUG
2827 	CFS_DEBUG(CFSDEBUG_VOPS)
2828 		printf("cachefs_access: EXIT error = %d\n", error);
2829 #endif
2830 	return (error);
2831 }
2832 
2833 static int
2834 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2835 {
2836 	cnode_t *cp = VTOC(vp);
2837 	fscache_t *fscp = C_TO_FSCACHE(cp);
2838 	int error = 0;
2839 
2840 	mutex_enter(&cp->c_statelock);
2841 
2842 	/* Make sure the cnode attrs are valid first. */
2843 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2844 	if (error)
2845 		goto out;
2846 
2847 	/* see if can do a local file system check */
2848 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2849 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2850 		error = cachefs_access_local(cp, mode, cr);
2851 		goto out;
2852 	}
2853 
2854 	/* else do a remote file system check */
2855 	else {
2856 		if (cp->c_backvp == NULL) {
2857 			error = cachefs_getbackvp(fscp, cp);
2858 			if (error)
2859 				goto out;
2860 		}
2861 
2862 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2863 		    ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2864 		    cp, cp->c_backvp));
2865 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2866 
2867 		/*
2868 		 * even though we don't `need' the ACL to do access
2869 		 * via the backvp, we should cache it here to make our
2870 		 * behavior more reasonable if we go disconnected.
2871 		 */
2872 
2873 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2874 		    (cachefs_vtype_aclok(vp)) &&
2875 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2876 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2877 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2878 			(void) cachefs_cacheacl(cp, NULL);
2879 	}
2880 out:
2881 	/*
2882 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2883 	 * the vn_open retry will read the file anew from backfs
2884 	 */
2885 	if (error == ESTALE)
2886 		cachefs_cnode_stale(cp);
2887 
2888 	mutex_exit(&cp->c_statelock);
2889 	return (error);
2890 }
2891 
2892 /*
2893  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2894  * the link is placed in the metadata itself (no front file is allocated).
2895  */
2896 /*ARGSUSED*/
2897 static int
2898 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2899 {
2900 	int error = 0;
2901 	cnode_t *cp = VTOC(vp);
2902 	fscache_t *fscp = C_TO_FSCACHE(cp);
2903 	cachefscache_t *cachep = fscp->fs_cache;
2904 	int held = 0;
2905 	int connected = 0;
2906 
2907 	if (getzoneid() != GLOBAL_ZONEID)
2908 		return (EPERM);
2909 
2910 	if (vp->v_type != VLNK)
2911 		return (EINVAL);
2912 
2913 	/*
2914 	 * Cachefs only provides pass-through support for NFSv4,
2915 	 * and all vnode operations are passed through to the
2916 	 * back file system. For NFSv4 pass-through to work, only
2917 	 * connected operation is supported, the cnode backvp must
2918 	 * exist, and cachefs optional (eg., disconnectable) flags
2919 	 * are turned off. Assert these conditions to ensure that
2920 	 * the backfilesystem is called for the readlink operation.
2921 	 */
2922 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2923 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2924 
2925 	for (;;) {
2926 		/* get (or renew) access to the file system */
2927 		if (held) {
2928 			/* Won't loop with NFSv4 connected behavior */
2929 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2930 			cachefs_cd_release(fscp);
2931 			held = 0;
2932 		}
2933 		error = cachefs_cd_access(fscp, connected, 0);
2934 		if (error)
2935 			break;
2936 		held = 1;
2937 
2938 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2939 			/*
2940 			 * since readlink_connected will call stuffsymlink
2941 			 * on success, have to serialize access
2942 			 */
2943 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2944 				cachefs_cd_release(fscp);
2945 				rw_enter(&cp->c_rwlock, RW_WRITER);
2946 				error = cachefs_cd_access(fscp, connected, 0);
2947 				if (error) {
2948 					held = 0;
2949 					rw_exit(&cp->c_rwlock);
2950 					break;
2951 				}
2952 			}
2953 			error = cachefs_readlink_connected(vp, uiop, cr);
2954 			rw_exit(&cp->c_rwlock);
2955 			if (CFS_TIMEOUT(fscp, error)) {
2956 				cachefs_cd_release(fscp);
2957 				held = 0;
2958 				cachefs_cd_timedout(fscp);
2959 				connected = 0;
2960 				continue;
2961 			}
2962 		} else {
2963 			error = cachefs_readlink_disconnected(vp, uiop);
2964 			if (CFS_TIMEOUT(fscp, error)) {
2965 				if (cachefs_cd_access_miss(fscp)) {
2966 					/* as above */
2967 					if (!rw_tryenter(&cp->c_rwlock,
2968 					    RW_WRITER)) {
2969 						cachefs_cd_release(fscp);
2970 						rw_enter(&cp->c_rwlock,
2971 						    RW_WRITER);
2972 						error = cachefs_cd_access(fscp,
2973 						    connected, 0);
2974 						if (error) {
2975 							held = 0;
2976 							rw_exit(&cp->c_rwlock);
2977 							break;
2978 						}
2979 					}
2980 					error = cachefs_readlink_connected(vp,
2981 					    uiop, cr);
2982 					rw_exit(&cp->c_rwlock);
2983 					if (!CFS_TIMEOUT(fscp, error))
2984 						break;
2985 					delay(5*hz);
2986 					connected = 0;
2987 					continue;
2988 				}
2989 				connected = 1;
2990 				continue;
2991 			}
2992 		}
2993 		break;
2994 	}
2995 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
2996 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
2997 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2998 		    crgetuid(cr), cp->c_size);
2999 
3000 	if (held)
3001 		cachefs_cd_release(fscp);
3002 #ifdef CFS_CD_DEBUG
3003 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3004 #endif
3005 
3006 	/*
3007 	 * The over the wire error for attempting to readlink something
3008 	 * other than a symbolic link is ENXIO.  However, we need to
3009 	 * return EINVAL instead of ENXIO, so we map it here.
3010 	 */
3011 	return (error == ENXIO ? EINVAL : error);
3012 }
3013 
3014 static int
3015 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3016 {
3017 	int error;
3018 	cnode_t *cp = VTOC(vp);
3019 	fscache_t *fscp = C_TO_FSCACHE(cp);
3020 	caddr_t buf;
3021 	int buflen;
3022 	int readcache = 0;
3023 
3024 	mutex_enter(&cp->c_statelock);
3025 
3026 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3027 	if (error)
3028 		goto out;
3029 
3030 	/* if the sym link is cached as a fast sym link */
3031 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3032 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3033 		error = uiomove(cp->c_metadata.md_allocinfo,
3034 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3035 #ifdef CFSDEBUG
3036 		readcache = 1;
3037 		goto out;
3038 #else /* CFSDEBUG */
3039 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3040 		if (cp->c_size != 0) {
3041 			readcache = 1;
3042 			goto out;
3043 		}
3044 #endif /* CFSDEBUG */
3045 	}
3046 
3047 	/* if the sym link is cached in a front file */
3048 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3049 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3050 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3051 		if (cp->c_frontvp == NULL) {
3052 			(void) cachefs_getfrontfile(cp);
3053 		}
3054 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3055 			/* read symlink data from frontfile */
3056 			uiop->uio_offset = 0;
3057 			(void) VOP_RWLOCK(cp->c_frontvp,
3058 			    V_WRITELOCK_FALSE, NULL);
3059 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3060 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3061 
3062 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3063 			if (cp->c_size != 0) {
3064 				readcache = 1;
3065 				goto out;
3066 			}
3067 		}
3068 	}
3069 
3070 	/* get the sym link contents from the back fs */
3071 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3072 	if (error)
3073 		goto out;
3074 
3075 	/* copy the contents out to the user */
3076 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3077 
3078 	/*
3079 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3080 	 * or if NFSv4 pass-through is enabled.
3081 	 */
3082 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3083 		cachefs_nocache(cp);
3084 	}
3085 
3086 	cachefs_kmem_free(buf, MAXPATHLEN);
3087 
3088 out:
3089 	mutex_exit(&cp->c_statelock);
3090 	if (error == 0) {
3091 		if (readcache)
3092 			fscp->fs_stats.st_hits++;
3093 		else
3094 			fscp->fs_stats.st_misses++;
3095 	}
3096 	return (error);
3097 }
3098 
3099 static int
3100 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3101 {
3102 	int error;
3103 	cnode_t *cp = VTOC(vp);
3104 	fscache_t *fscp = C_TO_FSCACHE(cp);
3105 	int readcache = 0;
3106 
3107 	mutex_enter(&cp->c_statelock);
3108 
3109 	/* if the sym link is cached as a fast sym link */
3110 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3111 		error = uiomove(cp->c_metadata.md_allocinfo,
3112 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3113 		readcache = 1;
3114 		goto out;
3115 	}
3116 
3117 	/* if the sym link is cached in a front file */
3118 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3119 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3120 		if (cp->c_frontvp == NULL) {
3121 			(void) cachefs_getfrontfile(cp);
3122 		}
3123 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3124 			/* read symlink data from frontfile */
3125 			uiop->uio_offset = 0;
3126 			(void) VOP_RWLOCK(cp->c_frontvp,
3127 			    V_WRITELOCK_FALSE, NULL);
3128 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3129 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3130 			readcache = 1;
3131 			goto out;
3132 		}
3133 	}
3134 	error = ETIMEDOUT;
3135 
3136 out:
3137 	mutex_exit(&cp->c_statelock);
3138 	if (error == 0) {
3139 		if (readcache)
3140 			fscp->fs_stats.st_hits++;
3141 		else
3142 			fscp->fs_stats.st_misses++;
3143 	}
3144 	return (error);
3145 }
3146 
3147 /*ARGSUSED*/
3148 static int
3149 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3150 {
3151 	cnode_t *cp = VTOC(vp);
3152 	int error = 0;
3153 	fscache_t *fscp = C_TO_FSCACHE(cp);
3154 	int held = 0;
3155 	int connected = 0;
3156 
3157 #ifdef CFSDEBUG
3158 	CFS_DEBUG(CFSDEBUG_VOPS)
3159 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3160 #endif
3161 
3162 	if (getzoneid() != GLOBAL_ZONEID) {
3163 		error = EPERM;
3164 		goto out;
3165 	}
3166 
3167 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3168 		goto out;
3169 
3170 	/*
3171 	 * Cachefs only provides pass-through support for NFSv4,
3172 	 * and all vnode operations are passed through to the
3173 	 * back file system. For NFSv4 pass-through to work, only
3174 	 * connected operation is supported, the cnode backvp must
3175 	 * exist, and cachefs optional (eg., disconnectable) flags
3176 	 * are turned off. Assert these conditions to ensure that
3177 	 * the backfilesystem is called for the fsync operation.
3178 	 */
3179 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3180 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3181 
3182 	for (;;) {
3183 		/* get (or renew) access to the file system */
3184 		if (held) {
3185 			/* Won't loop with NFSv4 connected behavior */
3186 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3187 			cachefs_cd_release(fscp);
3188 			held = 0;
3189 		}
3190 		error = cachefs_cd_access(fscp, connected, 1);
3191 		if (error)
3192 			break;
3193 		held = 1;
3194 		connected = 0;
3195 
3196 		/* if a regular file, write out the pages */
3197 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3198 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3199 			error = cachefs_putpage_common(vp, (offset_t)0,
3200 			    0, 0, cr);
3201 			if (CFS_TIMEOUT(fscp, error)) {
3202 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3203 					cachefs_cd_release(fscp);
3204 					held = 0;
3205 					cachefs_cd_timedout(fscp);
3206 					continue;
3207 				} else {
3208 					connected = 1;
3209 					continue;
3210 				}
3211 			}
3212 
3213 			/* if no space left in cache, wait until connected */
3214 			if ((error == ENOSPC) &&
3215 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3216 				connected = 1;
3217 				continue;
3218 			}
3219 
3220 			/* clear the cnode error if putpage worked */
3221 			if ((error == 0) && cp->c_error) {
3222 				mutex_enter(&cp->c_statelock);
3223 				cp->c_error = 0;
3224 				mutex_exit(&cp->c_statelock);
3225 			}
3226 
3227 			if (error)
3228 				break;
3229 		}
3230 
3231 		/* if connected, sync the backvp */
3232 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3233 		    cp->c_backvp) {
3234 			mutex_enter(&cp->c_statelock);
3235 			if (cp->c_backvp) {
3236 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3237 				    ("cachefs_fsync (nfsv4): cnode %p, "
3238 				    "backvp %p\n", cp, cp->c_backvp));
3239 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3240 				    ct);
3241 				if (CFS_TIMEOUT(fscp, error)) {
3242 					mutex_exit(&cp->c_statelock);
3243 					cachefs_cd_release(fscp);
3244 					held = 0;
3245 					cachefs_cd_timedout(fscp);
3246 					continue;
3247 				} else if (error && (error != EINTR))
3248 					cp->c_error = error;
3249 			}
3250 			mutex_exit(&cp->c_statelock);
3251 		}
3252 
3253 		/* sync the metadata and the front file to the front fs */
3254 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3255 			error = cachefs_sync_metadata(cp);
3256 			if (error &&
3257 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3258 				error = 0;
3259 		}
3260 		break;
3261 	}
3262 
3263 	if (error == 0)
3264 		error = cp->c_error;
3265 
3266 	if (held)
3267 		cachefs_cd_release(fscp);
3268 
3269 out:
3270 #ifdef CFS_CD_DEBUG
3271 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3272 #endif
3273 
3274 #ifdef CFSDEBUG
3275 	CFS_DEBUG(CFSDEBUG_VOPS)
3276 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3277 #endif
3278 	return (error);
3279 }
3280 
3281 /*
3282  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3283  */
3284 int
3285 cachefs_sync_metadata(cnode_t *cp)
3286 {
3287 	int error = 0;
3288 	struct filegrp *fgp;
3289 	struct vattr va;
3290 	fscache_t *fscp = C_TO_FSCACHE(cp);
3291 
3292 #ifdef CFSDEBUG
3293 	CFS_DEBUG(CFSDEBUG_VOPS)
3294 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3295 		    (void *)cp, cp->c_flags);
3296 #endif
3297 
3298 	mutex_enter(&cp->c_statelock);
3299 	if ((cp->c_flags & CN_UPDATED) == 0)
3300 		goto out;
3301 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3302 		goto out;
3303 	fgp = cp->c_filegrp;
3304 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3305 		goto out;
3306 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3307 		goto out;
3308 
3309 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3310 		mutex_exit(&cp->c_statelock);
3311 		error = filegrp_allocattr(fgp);
3312 		mutex_enter(&cp->c_statelock);
3313 		if (error) {
3314 			error = 0;
3315 			goto out;
3316 		}
3317 	}
3318 
3319 	if (cp->c_flags & CN_ALLOC_PENDING) {
3320 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3321 		    &cp->c_id);
3322 		if (error)
3323 			goto out;
3324 		cp->c_flags &= ~CN_ALLOC_PENDING;
3325 	}
3326 
3327 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3328 		if (cp->c_frontvp != NULL) {
3329 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3330 			if (error) {
3331 				cp->c_metadata.md_timestamp.tv_sec = 0;
3332 			} else {
3333 				va.va_mask = AT_MTIME;
3334 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3335 				    kcred, NULL);
3336 				if (error)
3337 					goto out;
3338 				cp->c_metadata.md_timestamp = va.va_mtime;
3339 				cp->c_flags &=
3340 				    ~(CN_NEED_FRONT_SYNC |
3341 				    CN_POPULATION_PENDING);
3342 			}
3343 		} else {
3344 			cp->c_flags &=
3345 			    ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3346 		}
3347 	}
3348 
3349 	/*
3350 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3351 	 * XXX tony: How can CN_UPDATED not be set?????
3352 	 */
3353 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3354 	    (cp->c_flags & CN_UPDATED)) {
3355 		error = filegrp_write_metadata(fgp, &cp->c_id,
3356 		    &cp->c_metadata);
3357 		if (error)
3358 			goto out;
3359 	}
3360 out:
3361 	if (error) {
3362 		/* XXX modified files? */
3363 		if (cp->c_metadata.md_rlno) {
3364 			cachefs_removefrontfile(&cp->c_metadata,
3365 			    &cp->c_id, fgp);
3366 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3367 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3368 			cp->c_metadata.md_rlno = 0;
3369 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3370 			if (cp->c_frontvp) {
3371 				VN_RELE(cp->c_frontvp);
3372 				cp->c_frontvp = NULL;
3373 			}
3374 		}
3375 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3376 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3377 		cp->c_flags |= CN_ALLOC_PENDING;
3378 		cachefs_nocache(cp);
3379 	}
3380 	/*
3381 	 * we clear the updated bit even on errors because a retry
3382 	 * will probably fail also.
3383 	 */
3384 	cp->c_flags &= ~CN_UPDATED;
3385 	mutex_exit(&cp->c_statelock);
3386 
3387 #ifdef CFSDEBUG
3388 	CFS_DEBUG(CFSDEBUG_VOPS)
3389 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3390 		    (void *)cp, cp->c_flags);
3391 #endif
3392 
3393 	return (error);
3394 }
3395 
3396 /*
3397  * This is the vop entry point for inactivating a vnode.
3398  * It just queues the request for the async thread which
3399  * calls cachefs_inactive.
3400  * Because of the dnlc, it is not safe to grab most locks here.
3401  */
3402 /*ARGSUSED*/
3403 static void
3404 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3405 {
3406 	cnode_t *cp;
3407 	struct cachefs_req *rp;
3408 	fscache_t *fscp;
3409 
3410 #ifdef CFSDEBUG
3411 	CFS_DEBUG(CFSDEBUG_VOPS)
3412 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3413 #endif
3414 
3415 	cp = VTOC(vp);
3416 	fscp = C_TO_FSCACHE(cp);
3417 
3418 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3419 
3420 	/*
3421 	 * Cachefs only provides pass-through support for NFSv4,
3422 	 * and all vnode operations are passed through to the
3423 	 * back file system. For NFSv4 pass-through to work, only
3424 	 * connected operation is supported, the cnode backvp must
3425 	 * exist, and cachefs optional (eg., disconnectable) flags
3426 	 * are turned off. Assert these conditions to ensure that
3427 	 * the backfilesystem is called for the inactive operation.
3428 	 */
3429 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3430 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3431 
3432 	/* vn_rele() set the v_count == 1 */
3433 
3434 	cp->c_ipending = 1;
3435 
3436 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3437 	rp->cfs_cmd = CFS_IDLE;
3438 	rp->cfs_cr = cr;
3439 	crhold(rp->cfs_cr);
3440 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3441 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3442 
3443 #ifdef CFSDEBUG
3444 	CFS_DEBUG(CFSDEBUG_VOPS)
3445 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3446 #endif
3447 }
3448 
3449 /* ARGSUSED */
3450 static int
3451 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3452     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3453     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3454 
3455 {
3456 	int error = 0;
3457 	cnode_t *dcp = VTOC(dvp);
3458 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3459 	int held = 0;
3460 	int connected = 0;
3461 
3462 #ifdef CFSDEBUG
3463 	CFS_DEBUG(CFSDEBUG_VOPS)
3464 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3465 #endif
3466 
3467 	if (getzoneid() != GLOBAL_ZONEID) {
3468 		error = EPERM;
3469 		goto out;
3470 	}
3471 
3472 	/*
3473 	 * Cachefs only provides pass-through support for NFSv4,
3474 	 * and all vnode operations are passed through to the
3475 	 * back file system. For NFSv4 pass-through to work, only
3476 	 * connected operation is supported, the cnode backvp must
3477 	 * exist, and cachefs optional (eg., disconnectable) flags
3478 	 * are turned off. Assert these conditions to ensure that
3479 	 * the backfilesystem is called for the lookup operation.
3480 	 */
3481 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3482 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3483 
3484 	for (;;) {
3485 		/* get (or renew) access to the file system */
3486 		if (held) {
3487 			/* Won't loop with NFSv4 connected behavior */
3488 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3489 			cachefs_cd_release(fscp);
3490 			held = 0;
3491 		}
3492 		error = cachefs_cd_access(fscp, connected, 0);
3493 		if (error)
3494 			break;
3495 		held = 1;
3496 
3497 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3498 			flags, rdir, cr);
3499 		if (CFS_TIMEOUT(fscp, error)) {
3500 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3501 				cachefs_cd_release(fscp);
3502 				held = 0;
3503 				cachefs_cd_timedout(fscp);
3504 				connected = 0;
3505 				continue;
3506 			} else {
3507 				if (cachefs_cd_access_miss(fscp)) {
3508 					rw_enter(&dcp->c_rwlock, RW_READER);
3509 					error = cachefs_lookup_back(dvp, nm,
3510 					    vpp, cr);
3511 					rw_exit(&dcp->c_rwlock);
3512 					if (!CFS_TIMEOUT(fscp, error))
3513 						break;
3514 					delay(5*hz);
3515 					connected = 0;
3516 					continue;
3517 				}
3518 				connected = 1;
3519 				continue;
3520 			}
3521 		}
3522 		break;
3523 	}
3524 	if (held)
3525 		cachefs_cd_release(fscp);
3526 
3527 	if (error == 0 && IS_DEVVP(*vpp)) {
3528 		struct vnode *newvp;
3529 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3530 		VN_RELE(*vpp);
3531 		if (newvp == NULL) {
3532 			error = ENOSYS;
3533 		} else {
3534 			*vpp = newvp;
3535 		}
3536 	}
3537 
3538 #ifdef CFS_CD_DEBUG
3539 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3540 #endif
3541 out:
3542 #ifdef CFSDEBUG
3543 	CFS_DEBUG(CFSDEBUG_VOPS)
3544 		printf("cachefs_lookup: EXIT error = %d\n", error);
3545 #endif
3546 
3547 	return (error);
3548 }
3549 
3550 /* ARGSUSED */
3551 int
3552 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3553     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3554 {
3555 	int error = 0;
3556 	cnode_t *cp, *dcp = VTOC(dvp);
3557 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3558 	struct fid cookie;
3559 	u_offset_t d_offset;
3560 	struct cachefs_req *rp;
3561 	cfs_cid_t cid, dircid;
3562 	uint_t flag;
3563 	uint_t uncached = 0;
3564 
3565 	*vpp = NULL;
3566 
3567 	/*
3568 	 * If lookup is for "", just return dvp.  Don't need
3569 	 * to send it over the wire, look it up in the dnlc,
3570 	 * or perform any access checks.
3571 	 */
3572 	if (*nm == '\0') {
3573 		VN_HOLD(dvp);
3574 		*vpp = dvp;
3575 		return (0);
3576 	}
3577 
3578 	/* can't do lookups in non-directories */
3579 	if (dvp->v_type != VDIR)
3580 		return (ENOTDIR);
3581 
3582 	/* perform access check, also does consistency check if connected */
3583 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3584 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3585 	} else {
3586 		mutex_enter(&dcp->c_statelock);
3587 		error = cachefs_access_local(dcp, VEXEC, cr);
3588 		mutex_exit(&dcp->c_statelock);
3589 	}
3590 	if (error)
3591 		return (error);
3592 
3593 	/*
3594 	 * If lookup is for ".", just return dvp.  Don't need
3595 	 * to send it over the wire or look it up in the dnlc,
3596 	 * just need to check access.
3597 	 */
3598 	if (strcmp(nm, ".") == 0) {
3599 		VN_HOLD(dvp);
3600 		*vpp = dvp;
3601 		return (0);
3602 	}
3603 
3604 	/* check the dnlc */
3605 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3606 	if (*vpp)
3607 		return (0);
3608 
3609 	/* read lock the dir before starting the search */
3610 	rw_enter(&dcp->c_rwlock, RW_READER);
3611 
3612 	mutex_enter(&dcp->c_statelock);
3613 	dircid = dcp->c_id;
3614 
3615 	dcp->c_usage++;
3616 
3617 	/* if front file is not usable, lookup on the back fs */
3618 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3619 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3620 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3621 		mutex_exit(&dcp->c_statelock);
3622 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3623 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3624 		else
3625 			error = ETIMEDOUT;
3626 		goto out;
3627 	}
3628 
3629 	/* if the front file is not populated, try to populate it */
3630 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3631 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3632 			error = ETIMEDOUT;
3633 			mutex_exit(&dcp->c_statelock);
3634 			goto out;
3635 		}
3636 
3637 		if (cachefs_async_okay()) {
3638 			/* cannot populate if cache is not writable */
3639 			ASSERT((dcp->c_flags &
3640 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3641 			dcp->c_flags |= CN_ASYNC_POPULATE;
3642 
3643 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3644 			rp->cfs_cmd = CFS_POPULATE;
3645 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3646 			rp->cfs_cr = cr;
3647 
3648 			crhold(cr);
3649 			VN_HOLD(dvp);
3650 
3651 			cachefs_addqueue(rp, &fscp->fs_workq);
3652 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3653 			error = cachefs_dir_fill(dcp, cr);
3654 			if (error != 0) {
3655 				mutex_exit(&dcp->c_statelock);
3656 				goto out;
3657 			}
3658 		}
3659 		/* no populate if too many asyncs and we have to cache ACLs */
3660 
3661 		mutex_exit(&dcp->c_statelock);
3662 
3663 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3664 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3665 		else
3666 			error = ETIMEDOUT;
3667 		goto out;
3668 	}
3669 
3670 	/* by now we have a valid cached front file that we can search */
3671 
3672 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3673 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3674 	    &d_offset, &cid);
3675 	mutex_exit(&dcp->c_statelock);
3676 
3677 	if (error) {
3678 		/* if the entry does not have the fid, go get it */
3679 		if (error == EINVAL) {
3680 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3681 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3682 			else
3683 				error = ETIMEDOUT;
3684 		}
3685 
3686 		/* errors other than does not exist */
3687 		else if (error != ENOENT) {
3688 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3689 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3690 			else
3691 				error = ETIMEDOUT;
3692 		}
3693 		goto out;
3694 	}
3695 
3696 	/*
3697 	 * Else we found the entry in the cached directory.
3698 	 * Make a cnode for it.
3699 	 */
3700 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3701 	    cr, 0, &cp);
3702 	if (error == ESTALE) {
3703 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3704 		mutex_enter(&dcp->c_statelock);
3705 		cachefs_nocache(dcp);
3706 		mutex_exit(&dcp->c_statelock);
3707 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3708 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3709 			uncached = 1;
3710 		} else
3711 			error = ETIMEDOUT;
3712 	} else if (error == 0) {
3713 		*vpp = CTOV(cp);
3714 	}
3715 
3716 out:
3717 	if (error == 0) {
3718 		/* put the entry in the dnlc */
3719 		if (cachefs_dnlc)
3720 			dnlc_enter(dvp, nm, *vpp);
3721 
3722 		/* save the cid of the parent so can find the name */
3723 		cp = VTOC(*vpp);
3724 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3725 		    sizeof (cfs_cid_t)) != 0) {
3726 			mutex_enter(&cp->c_statelock);
3727 			cp->c_metadata.md_parent = dircid;
3728 			cp->c_flags |= CN_UPDATED;
3729 			mutex_exit(&cp->c_statelock);
3730 		}
3731 	}
3732 
3733 	rw_exit(&dcp->c_rwlock);
3734 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3735 		(void) cachefs_pack_common(dvp, cr);
3736 	return (error);
3737 }
3738 
3739 /*
3740  * Called from cachefs_lookup_common when the back file system needs to be
3741  * examined to perform the lookup.
3742  */
3743 static int
3744 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3745     cred_t *cr)
3746 {
3747 	int error = 0;
3748 	cnode_t *cp, *dcp = VTOC(dvp);
3749 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3750 	vnode_t *backvp = NULL;
3751 	struct vattr va;
3752 	struct fid cookie;
3753 	cfs_cid_t cid;
3754 	uint32_t valid_fid;
3755 
3756 	mutex_enter(&dcp->c_statelock);
3757 
3758 	/* do a lookup on the back FS to get the back vnode */
3759 	if (dcp->c_backvp == NULL) {
3760 		error = cachefs_getbackvp(fscp, dcp);
3761 		if (error)
3762 			goto out;
3763 	}
3764 
3765 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3766 	    ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3767 	    dcp, dcp->c_backvp, nm));
3768 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3769 	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3770 	if (error)
3771 		goto out;
3772 	if (IS_DEVVP(backvp)) {
3773 		struct vnode *devvp = backvp;
3774 
3775 		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3776 			VN_HOLD(backvp);
3777 			VN_RELE(devvp);
3778 		}
3779 	}
3780 
3781 	/* get the fid and attrs from the back fs */
3782 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3783 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3784 	if (error)
3785 		goto out;
3786 
3787 	cid.cid_fileno = va.va_nodeid;
3788 	cid.cid_flags = 0;
3789 
3790 #if 0
3791 	/* XXX bob: this is probably no longer necessary */
3792 	/* if the directory entry was incomplete, we can complete it now */
3793 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3794 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3795 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3796 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3797 	}
3798 #endif
3799 
3800 out:
3801 	mutex_exit(&dcp->c_statelock);
3802 
3803 	/* create the cnode */
3804 	if (error == 0) {
3805 		error = cachefs_cnode_make(&cid, fscp,
3806 		    (valid_fid ? &cookie : NULL),
3807 		    &va, backvp, cr, 0, &cp);
3808 		if (error == 0) {
3809 			*vpp = CTOV(cp);
3810 		}
3811 	}
3812 
3813 	if (backvp)
3814 		VN_RELE(backvp);
3815 
3816 	return (error);
3817 }
3818 
3819 /*ARGSUSED7*/
3820 static int
3821 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3822     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3823     caller_context_t *ct, vsecattr_t *vsecp)
3824 
3825 {
3826 	cnode_t *dcp = VTOC(dvp);
3827 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3828 	cachefscache_t *cachep = fscp->fs_cache;
3829 	int error;
3830 	int connected = 0;
3831 	int held = 0;
3832 
3833 #ifdef CFSDEBUG
3834 	CFS_DEBUG(CFSDEBUG_VOPS)
3835 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3836 		    (void *)dvp, nm);
3837 #endif
3838 	if (getzoneid() != GLOBAL_ZONEID) {
3839 		error = EPERM;
3840 		goto out;
3841 	}
3842 
3843 	/*
3844 	 * Cachefs only provides pass-through support for NFSv4,
3845 	 * and all vnode operations are passed through to the
3846 	 * back file system. For NFSv4 pass-through to work, only
3847 	 * connected operation is supported, the cnode backvp must
3848 	 * exist, and cachefs optional (eg., disconnectable) flags
3849 	 * are turned off. Assert these conditions to ensure that
3850 	 * the backfilesystem is called for the create operation.
3851 	 */
3852 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3853 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3854 
3855 	for (;;) {
3856 		/* get (or renew) access to the file system */
3857 		if (held) {
3858 			/* Won't loop with NFSv4 connected behavior */
3859 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3860 			cachefs_cd_release(fscp);
3861 			held = 0;
3862 		}
3863 		error = cachefs_cd_access(fscp, connected, 1);
3864 		if (error)
3865 			break;
3866 		held = 1;
3867 
3868 		/*
3869 		 * if we are connected, perform the remote portion of the
3870 		 * create.
3871 		 */
3872 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3873 			error = cachefs_create_connected(dvp, nm, vap,
3874 			    exclusive, mode, vpp, cr);
3875 			if (CFS_TIMEOUT(fscp, error)) {
3876 				cachefs_cd_release(fscp);
3877 				held = 0;
3878 				cachefs_cd_timedout(fscp);
3879 				connected = 0;
3880 				continue;
3881 			} else if (error) {
3882 				break;
3883 			}
3884 		}
3885 
3886 		/* else we must be disconnected */
3887 		else {
3888 			error = cachefs_create_disconnected(dvp, nm, vap,
3889 			    exclusive, mode, vpp, cr);
3890 			if (CFS_TIMEOUT(fscp, error)) {
3891 				connected = 1;
3892 				continue;
3893 			} else if (error) {
3894 				break;
3895 			}
3896 		}
3897 		break;
3898 	}
3899 
3900 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3901 		fid_t *fidp = NULL;
3902 		ino64_t fileno = 0;
3903 		cnode_t *cp = NULL;
3904 		if (error == 0)
3905 			cp = VTOC(*vpp);
3906 
3907 		if (cp != NULL) {
3908 			fidp = &cp->c_metadata.md_cookie;
3909 			fileno = cp->c_id.cid_fileno;
3910 		}
3911 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3912 		    fidp, fileno, crgetuid(cr));
3913 	}
3914 
3915 	if (held)
3916 		cachefs_cd_release(fscp);
3917 
3918 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3919 		(void) cachefs_pack(dvp, nm, cr);
3920 	if (error == 0 && IS_DEVVP(*vpp)) {
3921 		struct vnode *spcvp;
3922 
3923 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3924 		VN_RELE(*vpp);
3925 		if (spcvp == NULL) {
3926 			error = ENOSYS;
3927 		} else {
3928 			*vpp = spcvp;
3929 		}
3930 	}
3931 
3932 #ifdef CFS_CD_DEBUG
3933 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3934 #endif
3935 out:
3936 #ifdef CFSDEBUG
3937 	CFS_DEBUG(CFSDEBUG_VOPS)
3938 		printf("cachefs_create: EXIT error %d\n", error);
3939 #endif
3940 	return (error);
3941 }
3942 
3943 
3944 static int
3945 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3946     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3947 {
3948 	cnode_t *dcp = VTOC(dvp);
3949 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3950 	int error;
3951 	vnode_t *tvp = NULL;
3952 	vnode_t *devvp;
3953 	fid_t cookie;
3954 	vattr_t va;
3955 	cnode_t *ncp;
3956 	cfs_cid_t cid;
3957 	vnode_t *vp;
3958 	uint32_t valid_fid;
3959 
3960 	/* special case if file already exists */
3961 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3962 	if (CFS_TIMEOUT(fscp, error))
3963 		return (error);
3964 	if (error == 0) {
3965 		if (exclusive == EXCL)
3966 			error = EEXIST;
3967 		else if (vp->v_type == VDIR && (mode & VWRITE))
3968 			error = EISDIR;
3969 		else if ((error =
3970 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3971 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3972 				vap->va_mask = AT_SIZE;
3973 				error = cachefs_setattr_common(vp, vap, 0,
3974 				    cr, NULL);
3975 			}
3976 		}
3977 		if (error) {
3978 			VN_RELE(vp);
3979 		} else
3980 			*vpp = vp;
3981 		return (error);
3982 	}
3983 
3984 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3985 	mutex_enter(&dcp->c_statelock);
3986 
3987 	/* consistency check the directory */
3988 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3989 	if (error) {
3990 		mutex_exit(&dcp->c_statelock);
3991 		goto out;
3992 	}
3993 
3994 	/* get the backvp if necessary */
3995 	if (dcp->c_backvp == NULL) {
3996 		error = cachefs_getbackvp(fscp, dcp);
3997 		if (error) {
3998 			mutex_exit(&dcp->c_statelock);
3999 			goto out;
4000 		}
4001 	}
4002 
4003 	/* create the file on the back fs */
4004 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4005 	    ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4006 	    "name %s\n", dcp, dcp->c_backvp, nm));
4007 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4008 	    &devvp, cr, 0, NULL, NULL);
4009 	mutex_exit(&dcp->c_statelock);
4010 	if (error)
4011 		goto out;
4012 	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4013 		VN_HOLD(tvp);
4014 		VN_RELE(devvp);
4015 	} else {
4016 		tvp = devvp;
4017 	}
4018 
4019 	/* get the fid and attrs from the back fs */
4020 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4021 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4022 	if (error)
4023 		goto out;
4024 
4025 	/* make the cnode */
4026 	cid.cid_fileno = va.va_nodeid;
4027 	cid.cid_flags = 0;
4028 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4029 	    &va, tvp, cr, 0, &ncp);
4030 	if (error)
4031 		goto out;
4032 
4033 	*vpp = CTOV(ncp);
4034 
4035 	/* enter it in the parent directory */
4036 	mutex_enter(&dcp->c_statelock);
4037 	if (CFS_ISFS_NONSHARED(fscp) &&
4038 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4039 		/* see if entry already exists */
4040 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4041 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4042 		if (error == ENOENT) {
4043 			/* entry, does not exist, add the new file */
4044 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4045 			    &ncp->c_id, SM_ASYNC);
4046 			if (error) {
4047 				cachefs_nocache(dcp);
4048 				error = 0;
4049 			}
4050 			/* XXX should this be done elsewhere, too? */
4051 			dnlc_enter(dvp, nm, *vpp);
4052 		} else {
4053 			/* entry exists or some other problem */
4054 			cachefs_nocache(dcp);
4055 			error = 0;
4056 		}
4057 	}
4058 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4059 	mutex_exit(&dcp->c_statelock);
4060 
4061 out:
4062 	rw_exit(&dcp->c_rwlock);
4063 	if (tvp)
4064 		VN_RELE(tvp);
4065 
4066 	return (error);
4067 }
4068 
4069 static int
4070 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4071 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4072 {
4073 	cnode_t *dcp = VTOC(dvp);
4074 	cnode_t *cp;
4075 	cnode_t *ncp = NULL;
4076 	vnode_t *vp;
4077 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4078 	int error = 0;
4079 	struct vattr va;
4080 	timestruc_t current_time;
4081 	off_t commit = 0;
4082 	fid_t cookie;
4083 	cfs_cid_t cid;
4084 
4085 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4086 	mutex_enter(&dcp->c_statelock);
4087 
4088 	/* give up if the directory is not populated */
4089 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4090 		mutex_exit(&dcp->c_statelock);
4091 		rw_exit(&dcp->c_rwlock);
4092 		return (ETIMEDOUT);
4093 	}
4094 
4095 	/* special case if file already exists */
4096 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4097 	if (error == EINVAL) {
4098 		mutex_exit(&dcp->c_statelock);
4099 		rw_exit(&dcp->c_rwlock);
4100 		return (ETIMEDOUT);
4101 	}
4102 	if (error == 0) {
4103 		mutex_exit(&dcp->c_statelock);
4104 		rw_exit(&dcp->c_rwlock);
4105 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4106 		    cr, 0, &cp);
4107 		if (error) {
4108 			return (error);
4109 		}
4110 		vp = CTOV(cp);
4111 
4112 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4113 			error = ETIMEDOUT;
4114 		else if (exclusive == EXCL)
4115 			error = EEXIST;
4116 		else if (vp->v_type == VDIR && (mode & VWRITE))
4117 			error = EISDIR;
4118 		else {
4119 			mutex_enter(&cp->c_statelock);
4120 			error = cachefs_access_local(cp, mode, cr);
4121 			mutex_exit(&cp->c_statelock);
4122 			if (!error) {
4123 				if ((vap->va_mask & AT_SIZE) &&
4124 				    (vp->v_type == VREG)) {
4125 					vap->va_mask = AT_SIZE;
4126 					error = cachefs_setattr_common(vp,
4127 					    vap, 0, cr, NULL);
4128 				}
4129 			}
4130 		}
4131 		if (error) {
4132 			VN_RELE(vp);
4133 		} else
4134 			*vpp = vp;
4135 		return (error);
4136 	}
4137 
4138 	/* give up if cannot modify the cache */
4139 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4140 		mutex_exit(&dcp->c_statelock);
4141 		error = ETIMEDOUT;
4142 		goto out;
4143 	}
4144 
4145 	/* check access */
4146 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4147 		mutex_exit(&dcp->c_statelock);
4148 		goto out;
4149 	}
4150 
4151 	/* mark dir as modified */
4152 	cachefs_modified(dcp);
4153 	mutex_exit(&dcp->c_statelock);
4154 
4155 	/* must be privileged to set sticky bit */
4156 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4157 		vap->va_mode &= ~VSVTX;
4158 
4159 	/* make up a reasonable set of attributes */
4160 	cachefs_attr_setup(vap, &va, dcp, cr);
4161 
4162 	/* create the cnode */
4163 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4164 	if (error)
4165 		goto out;
4166 
4167 	mutex_enter(&ncp->c_statelock);
4168 
4169 	/* get the front file now instead of later */
4170 	if (vap->va_type == VREG) {
4171 		error = cachefs_getfrontfile(ncp);
4172 		if (error) {
4173 			mutex_exit(&ncp->c_statelock);
4174 			goto out;
4175 		}
4176 		ASSERT(ncp->c_frontvp != NULL);
4177 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4178 		ncp->c_metadata.md_flags |= MD_POPULATED;
4179 	} else {
4180 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4181 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4182 			(void) filegrp_allocattr(ncp->c_filegrp);
4183 		}
4184 		error = filegrp_create_metadata(ncp->c_filegrp,
4185 		    &ncp->c_metadata, &ncp->c_id);
4186 		if (error) {
4187 			mutex_exit(&ncp->c_statelock);
4188 			goto out;
4189 		}
4190 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4191 	}
4192 	mutex_enter(&dcp->c_statelock);
4193 	cachefs_creategid(dcp, ncp, vap, cr);
4194 	cachefs_createacl(dcp, ncp);
4195 	mutex_exit(&dcp->c_statelock);
4196 
4197 	/* set times on the file */
4198 	gethrestime(&current_time);
4199 	ncp->c_metadata.md_vattr.va_atime = current_time;
4200 	ncp->c_metadata.md_localctime = current_time;
4201 	ncp->c_metadata.md_localmtime = current_time;
4202 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4203 
4204 	/* reserve space for the daemon cid mapping */
4205 	error = cachefs_dlog_cidmap(fscp);
4206 	if (error) {
4207 		mutex_exit(&ncp->c_statelock);
4208 		goto out;
4209 	}
4210 	ncp->c_metadata.md_flags |= MD_MAPPING;
4211 
4212 	/* mark the new file as modified */
4213 	if (cachefs_modified_alloc(ncp)) {
4214 		mutex_exit(&ncp->c_statelock);
4215 		error = ENOSPC;
4216 		goto out;
4217 	}
4218 	ncp->c_flags |= CN_UPDATED;
4219 
4220 	/*
4221 	 * write the metadata now rather than waiting until
4222 	 * inactive so that if there's no space we can let
4223 	 * the caller know.
4224 	 */
4225 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4226 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4227 	error = filegrp_write_metadata(ncp->c_filegrp,
4228 	    &ncp->c_id, &ncp->c_metadata);
4229 	if (error) {
4230 		mutex_exit(&ncp->c_statelock);
4231 		goto out;
4232 	}
4233 
4234 	/* log the operation */
4235 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4236 	    mode, ncp, 0, cr);
4237 	if (commit == 0) {
4238 		mutex_exit(&ncp->c_statelock);
4239 		error = ENOSPC;
4240 		goto out;
4241 	}
4242 
4243 	mutex_exit(&ncp->c_statelock);
4244 
4245 	mutex_enter(&dcp->c_statelock);
4246 
4247 	/* update parent dir times */
4248 	dcp->c_metadata.md_localmtime = current_time;
4249 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4250 	dcp->c_flags |= CN_UPDATED;
4251 
4252 	/* enter new file name in the parent directory */
4253 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4254 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4255 		    &ncp->c_id, 0);
4256 		if (error) {
4257 			cachefs_nocache(dcp);
4258 			mutex_exit(&dcp->c_statelock);
4259 			error = ETIMEDOUT;
4260 			goto out;
4261 		}
4262 		dnlc_enter(dvp, nm, CTOV(ncp));
4263 	} else {
4264 		mutex_exit(&dcp->c_statelock);
4265 		error = ETIMEDOUT;
4266 		goto out;
4267 	}
4268 	mutex_exit(&dcp->c_statelock);
4269 
4270 out:
4271 	rw_exit(&dcp->c_rwlock);
4272 
4273 	if (commit) {
4274 		if (cachefs_dlog_commit(fscp, commit, error)) {
4275 			/*EMPTY*/
4276 			/* XXX bob: fix on panic */
4277 		}
4278 	}
4279 	if (error) {
4280 		/* destroy the cnode we created */
4281 		if (ncp) {
4282 			mutex_enter(&ncp->c_statelock);
4283 			ncp->c_flags |= CN_DESTROY;
4284 			mutex_exit(&ncp->c_statelock);
4285 			VN_RELE(CTOV(ncp));
4286 		}
4287 	} else {
4288 		*vpp = CTOV(ncp);
4289 	}
4290 	return (error);
4291 }
4292 
4293 /*ARGSUSED*/
4294 static int
4295 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4296     int flags)
4297 {
4298 	cnode_t *dcp = VTOC(dvp);
4299 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4300 	cachefscache_t *cachep = fscp->fs_cache;
4301 	int error = 0;
4302 	int held = 0;
4303 	int connected = 0;
4304 	size_t namlen;
4305 	vnode_t *vp = NULL;
4306 	int vfslock = 0;
4307 
4308 #ifdef CFSDEBUG
4309 	CFS_DEBUG(CFSDEBUG_VOPS)
4310 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4311 		    (void *)dvp, nm);
4312 #endif
4313 	if (getzoneid() != GLOBAL_ZONEID) {
4314 		error = EPERM;
4315 		goto out;
4316 	}
4317 
4318 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4319 		ASSERT(dcp->c_flags & CN_NOCACHE);
4320 
4321 	/*
4322 	 * Cachefs only provides pass-through support for NFSv4,
4323 	 * and all vnode operations are passed through to the
4324 	 * back file system. For NFSv4 pass-through to work, only
4325 	 * connected operation is supported, the cnode backvp must
4326 	 * exist, and cachefs optional (eg., disconnectable) flags
4327 	 * are turned off. Assert these conditions to ensure that
4328 	 * the backfilesystem is called for the remove operation.
4329 	 */
4330 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4331 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4332 
4333 	for (;;) {
4334 		if (vfslock) {
4335 			vn_vfsunlock(vp);
4336 			vfslock = 0;
4337 		}
4338 		if (vp) {
4339 			VN_RELE(vp);
4340 			vp = NULL;
4341 		}
4342 
4343 		/* get (or renew) access to the file system */
4344 		if (held) {
4345 			/* Won't loop with NFSv4 connected behavior */
4346 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4347 			cachefs_cd_release(fscp);
4348 			held = 0;
4349 		}
4350 		error = cachefs_cd_access(fscp, connected, 1);
4351 		if (error)
4352 			break;
4353 		held = 1;
4354 
4355 		/* if disconnected, do some extra error checking */
4356 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4357 			/* check permissions */
4358 			mutex_enter(&dcp->c_statelock);
4359 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4360 			mutex_exit(&dcp->c_statelock);
4361 			if (CFS_TIMEOUT(fscp, error)) {
4362 				connected = 1;
4363 				continue;
4364 			}
4365 			if (error)
4366 				break;
4367 
4368 			namlen = strlen(nm);
4369 			if (namlen == 0) {
4370 				error = EINVAL;
4371 				break;
4372 			}
4373 
4374 			/* cannot remove . and .. */
4375 			if (nm[0] == '.') {
4376 				if (namlen == 1) {
4377 					error = EINVAL;
4378 					break;
4379 				} else if (namlen == 2 && nm[1] == '.') {
4380 					error = EEXIST;
4381 					break;
4382 				}
4383 			}
4384 
4385 		}
4386 
4387 		/* get the cnode of the file to delete */
4388 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4389 		if (error) {
4390 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4391 				if (CFS_TIMEOUT(fscp, error)) {
4392 					cachefs_cd_release(fscp);
4393 					held = 0;
4394 					cachefs_cd_timedout(fscp);
4395 					connected = 0;
4396 					continue;
4397 				}
4398 			} else {
4399 				if (CFS_TIMEOUT(fscp, error)) {
4400 					connected = 1;
4401 					continue;
4402 				}
4403 			}
4404 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4405 				struct fid foo;
4406 
4407 				bzero(&foo, sizeof (foo));
4408 				cachefs_log_remove(cachep, error,
4409 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4410 			}
4411 			break;
4412 		}
4413 
4414 		if (vp->v_type == VDIR) {
4415 			/* must be privileged to remove dirs with unlink() */
4416 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4417 				break;
4418 
4419 			/* see ufs_dirremove for why this is done, mount race */
4420 			if (vn_vfswlock(vp)) {
4421 				error = EBUSY;
4422 				break;
4423 			}
4424 			vfslock = 1;
4425 			if (vn_mountedvfs(vp) != NULL) {
4426 				error = EBUSY;
4427 				break;
4428 			}
4429 		}
4430 
4431 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4432 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4433 			if (CFS_TIMEOUT(fscp, error)) {
4434 				cachefs_cd_release(fscp);
4435 				held = 0;
4436 				cachefs_cd_timedout(fscp);
4437 				connected = 0;
4438 				continue;
4439 			}
4440 		} else {
4441 			error = cachefs_remove_disconnected(dvp, nm, cr,
4442 			    vp);
4443 			if (CFS_TIMEOUT(fscp, error)) {
4444 				connected = 1;
4445 				continue;
4446 			}
4447 		}
4448 		break;
4449 	}
4450 
4451 #if 0
4452 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4453 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4454 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4455 		    crgetuid(cr));
4456 #endif
4457 
4458 	if (held)
4459 		cachefs_cd_release(fscp);
4460 
4461 	if (vfslock)
4462 		vn_vfsunlock(vp);
4463 
4464 	if (vp)
4465 		VN_RELE(vp);
4466 
4467 #ifdef CFS_CD_DEBUG
4468 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4469 #endif
4470 out:
4471 #ifdef CFSDEBUG
4472 	CFS_DEBUG(CFSDEBUG_VOPS)
4473 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4474 #endif
4475 
4476 	return (error);
4477 }
4478 
4479 int
4480 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4481 {
4482 	cnode_t *dcp = VTOC(dvp);
4483 	cnode_t *cp = VTOC(vp);
4484 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4485 	int error = 0;
4486 
4487 	/*
4488 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4489 	 * activity on the directory.
4490 	 */
4491 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4492 
4493 	/* purge dnlc of this entry so can get accurate vnode count */
4494 	dnlc_purge_vp(vp);
4495 
4496 	/*
4497 	 * If the cnode is active, make a link to the file
4498 	 * so operations on the file will continue.
4499 	 */
4500 	if ((vp->v_type != VDIR) &&
4501 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4502 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4503 		if (error)
4504 			goto out;
4505 	}
4506 
4507 	/* else call backfs NFSv4 handler if NFSv4 */
4508 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4509 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4510 		goto out;
4511 	}
4512 
4513 	/* else drop the backvp so nfs does not do rename */
4514 	else if (cp->c_backvp) {
4515 		mutex_enter(&cp->c_statelock);
4516 		if (cp->c_backvp) {
4517 			VN_RELE(cp->c_backvp);
4518 			cp->c_backvp = NULL;
4519 		}
4520 		mutex_exit(&cp->c_statelock);
4521 	}
4522 
4523 	mutex_enter(&dcp->c_statelock);
4524 
4525 	/* get the backvp */
4526 	if (dcp->c_backvp == NULL) {
4527 		error = cachefs_getbackvp(fscp, dcp);
4528 		if (error) {
4529 			mutex_exit(&dcp->c_statelock);
4530 			goto out;
4531 		}
4532 	}
4533 
4534 	/* check directory consistency */
4535 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4536 	if (error) {
4537 		mutex_exit(&dcp->c_statelock);
4538 		goto out;
4539 	}
4540 
4541 	/* perform the remove on the back fs */
4542 	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4543 	if (error) {
4544 		mutex_exit(&dcp->c_statelock);
4545 		goto out;
4546 	}
4547 
4548 	/* the dir has been modified */
4549 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4550 
4551 	/* remove the entry from the populated directory */
4552 	if (CFS_ISFS_NONSHARED(fscp) &&
4553 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4554 		error = cachefs_dir_rmentry(dcp, nm);
4555 		if (error) {
4556 			cachefs_nocache(dcp);
4557 			error = 0;
4558 		}
4559 	}
4560 	mutex_exit(&dcp->c_statelock);
4561 
4562 	/* fix up the file we deleted */
4563 	mutex_enter(&cp->c_statelock);
4564 	if (cp->c_attr.va_nlink == 1)
4565 		cp->c_flags |= CN_DESTROY;
4566 	else
4567 		cp->c_flags |= CN_UPDATED;
4568 
4569 	cp->c_attr.va_nlink--;
4570 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4571 	mutex_exit(&cp->c_statelock);
4572 
4573 out:
4574 	rw_exit(&dcp->c_rwlock);
4575 	return (error);
4576 }
4577 
4578 /*
4579  * cachefs_remove_backfs_nfsv4
4580  *
4581  * Call NFSv4 back filesystem to handle the remove (cachefs
4582  * pass-through support for NFSv4).
4583  */
4584 int
4585 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4586 {
4587 	cnode_t *dcp = VTOC(dvp);
4588 	cnode_t *cp = VTOC(vp);
4589 	vnode_t *dbackvp;
4590 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4591 	int error = 0;
4592 
4593 	/*
4594 	 * For NFSv4 pass-through to work, only connected operation
4595 	 * is supported, the cnode backvp must exist, and cachefs
4596 	 * optional (eg., disconnectable) flags are turned off. Assert
4597 	 * these conditions for the getattr operation.
4598 	 */
4599 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4600 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4601 
4602 	/* Should hold the directory readwrite lock to update directory */
4603 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4604 
4605 	/*
4606 	 * Update attributes for directory. Note that
4607 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4608 	 * held, so grab it before calling the routine.
4609 	 */
4610 	mutex_enter(&dcp->c_statelock);
4611 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4612 	mutex_exit(&dcp->c_statelock);
4613 	if (error)
4614 		goto out;
4615 
4616 	/*
4617 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4618 	 * asserts for c_statelock being held, so grab it before
4619 	 * calling the routine.
4620 	 */
4621 	mutex_enter(&cp->c_statelock);
4622 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4623 	if (error) {
4624 		mutex_exit(&cp->c_statelock);
4625 		goto out;
4626 	}
4627 
4628 	/*
4629 	 * Drop the backvp so nfs if the link count is 1 so that
4630 	 * nfs does not do rename. Ensure that we will destroy the cnode
4631 	 * since this cnode no longer contains the backvp. Note that we
4632 	 * maintain lock on this cnode to prevent change till the remove
4633 	 * completes, otherwise other operations will encounter an ESTALE
4634 	 * if they try to use the cnode with CN_DESTROY set (see
4635 	 * cachefs_get_backvp()), or change the state of the cnode
4636 	 * while we're removing it.
4637 	 */
4638 	if (cp->c_attr.va_nlink == 1) {
4639 		/*
4640 		 * The unldvp information is created for the case
4641 		 * when there is more than one reference on the
4642 		 * vnode when a remove operation is called. If the
4643 		 * remove itself was holding a reference to the
4644 		 * vnode, then a subsequent remove will remove the
4645 		 * backvp, so we need to get rid of the unldvp
4646 		 * before removing the backvp. An alternate would
4647 		 * be to simply ignore the remove and let the
4648 		 * inactivation routine do the deletion of the
4649 		 * unldvp.
4650 		 */
4651 		if (cp->c_unldvp) {
4652 			VN_RELE(cp->c_unldvp);
4653 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4654 			crfree(cp->c_unlcred);
4655 			cp->c_unldvp = NULL;
4656 			cp->c_unlcred = NULL;
4657 		}
4658 		cp->c_flags |= CN_DESTROY;
4659 		cp->c_attr.va_nlink = 0;
4660 		VN_RELE(cp->c_backvp);
4661 		cp->c_backvp = NULL;
4662 	}
4663 
4664 	/* perform the remove on back fs after extracting directory backvp */
4665 	mutex_enter(&dcp->c_statelock);
4666 	dbackvp = dcp->c_backvp;
4667 	mutex_exit(&dcp->c_statelock);
4668 
4669 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4670 	    ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4671 	    dcp, dbackvp, nm));
4672 	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4673 	if (error) {
4674 		mutex_exit(&cp->c_statelock);
4675 		goto out;
4676 	}
4677 
4678 	/* fix up the file we deleted, if not destroying the cnode */
4679 	if ((cp->c_flags & CN_DESTROY) == 0) {
4680 		cp->c_attr.va_nlink--;
4681 		cp->c_flags |= CN_UPDATED;
4682 	}
4683 
4684 	mutex_exit(&cp->c_statelock);
4685 
4686 out:
4687 	return (error);
4688 }
4689 
4690 int
4691 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4692     vnode_t *vp)
4693 {
4694 	cnode_t *dcp = VTOC(dvp);
4695 	cnode_t *cp = VTOC(vp);
4696 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4697 	int error = 0;
4698 	off_t commit = 0;
4699 	timestruc_t current_time;
4700 
4701 	if (CFS_ISFS_WRITE_AROUND(fscp))
4702 		return (ETIMEDOUT);
4703 
4704 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4705 		return (ETIMEDOUT);
4706 
4707 	/*
4708 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4709 	 * activity on the directory.
4710 	 */
4711 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4712 
4713 	/* dir must be populated */
4714 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4715 		error = ETIMEDOUT;
4716 		goto out;
4717 	}
4718 
4719 	mutex_enter(&dcp->c_statelock);
4720 	mutex_enter(&cp->c_statelock);
4721 
4722 	error = cachefs_stickyrmchk(dcp, cp, cr);
4723 
4724 	mutex_exit(&cp->c_statelock);
4725 	mutex_exit(&dcp->c_statelock);
4726 	if (error)
4727 		goto out;
4728 
4729 	/* purge dnlc of this entry so can get accurate vnode count */
4730 	dnlc_purge_vp(vp);
4731 
4732 	/*
4733 	 * If the cnode is active, make a link to the file
4734 	 * so operations on the file will continue.
4735 	 */
4736 	if ((vp->v_type != VDIR) &&
4737 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4738 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4739 		if (error)
4740 			goto out;
4741 	}
4742 
4743 	if (cp->c_attr.va_nlink > 1) {
4744 		mutex_enter(&cp->c_statelock);
4745 		if (cachefs_modified_alloc(cp)) {
4746 			mutex_exit(&cp->c_statelock);
4747 			error = ENOSPC;
4748 			goto out;
4749 		}
4750 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4751 			error = cachefs_dlog_cidmap(fscp);
4752 			if (error) {
4753 				mutex_exit(&cp->c_statelock);
4754 				error = ENOSPC;
4755 				goto out;
4756 			}
4757 			cp->c_metadata.md_flags |= MD_MAPPING;
4758 			cp->c_flags |= CN_UPDATED;
4759 		}
4760 		mutex_exit(&cp->c_statelock);
4761 	}
4762 
4763 	/* log the remove */
4764 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4765 	if (commit == 0) {
4766 		error = ENOSPC;
4767 		goto out;
4768 	}
4769 
4770 	/* remove the file from the dir */
4771 	mutex_enter(&dcp->c_statelock);
4772 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4773 		mutex_exit(&dcp->c_statelock);
4774 		error = ETIMEDOUT;
4775 		goto out;
4776 
4777 	}
4778 	cachefs_modified(dcp);
4779 	error = cachefs_dir_rmentry(dcp, nm);
4780 	if (error) {
4781 		mutex_exit(&dcp->c_statelock);
4782 		if (error == ENOTDIR)
4783 			error = ETIMEDOUT;
4784 		goto out;
4785 	}
4786 
4787 	/* update parent dir times */
4788 	gethrestime(&current_time);
4789 	dcp->c_metadata.md_localctime = current_time;
4790 	dcp->c_metadata.md_localmtime = current_time;
4791 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4792 	dcp->c_flags |= CN_UPDATED;
4793 	mutex_exit(&dcp->c_statelock);
4794 
4795 	/* adjust file we are deleting */
4796 	mutex_enter(&cp->c_statelock);
4797 	cp->c_attr.va_nlink--;
4798 	cp->c_metadata.md_localctime = current_time;
4799 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4800 	if (cp->c_attr.va_nlink == 0) {
4801 		cp->c_flags |= CN_DESTROY;
4802 	} else {
4803 		cp->c_flags |= CN_UPDATED;
4804 	}
4805 	mutex_exit(&cp->c_statelock);
4806 
4807 out:
4808 	if (commit) {
4809 		/* commit the log entry */
4810 		if (cachefs_dlog_commit(fscp, commit, error)) {
4811 			/*EMPTY*/
4812 			/* XXX bob: fix on panic */
4813 		}
4814 	}
4815 
4816 	rw_exit(&dcp->c_rwlock);
4817 	return (error);
4818 }
4819 
4820 /*ARGSUSED*/
4821 static int
4822 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4823     caller_context_t *ct, int flags)
4824 {
4825 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4826 	cnode_t *tdcp = VTOC(tdvp);
4827 	struct vnode *realvp;
4828 	int error = 0;
4829 	int held = 0;
4830 	int connected = 0;
4831 
4832 #ifdef CFSDEBUG
4833 	CFS_DEBUG(CFSDEBUG_VOPS)
4834 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4835 		    (void *)fvp, (void *)tdvp, tnm);
4836 #endif
4837 
4838 	if (getzoneid() != GLOBAL_ZONEID) {
4839 		error = EPERM;
4840 		goto out;
4841 	}
4842 
4843 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4844 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4845 
4846 	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4847 		fvp = realvp;
4848 	}
4849 
4850 	/*
4851 	 * Cachefs only provides pass-through support for NFSv4,
4852 	 * and all vnode operations are passed through to the
4853 	 * back file system. For NFSv4 pass-through to work, only
4854 	 * connected operation is supported, the cnode backvp must
4855 	 * exist, and cachefs optional (eg., disconnectable) flags
4856 	 * are turned off. Assert these conditions to ensure that
4857 	 * the backfilesystem is called for the link operation.
4858 	 */
4859 
4860 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4861 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4862 
4863 	for (;;) {
4864 		/* get (or renew) access to the file system */
4865 		if (held) {
4866 			/* Won't loop with NFSv4 connected behavior */
4867 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4868 			rw_exit(&tdcp->c_rwlock);
4869 			cachefs_cd_release(fscp);
4870 			held = 0;
4871 		}
4872 		error = cachefs_cd_access(fscp, connected, 1);
4873 		if (error)
4874 			break;
4875 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4876 		held = 1;
4877 
4878 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4879 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4880 			if (CFS_TIMEOUT(fscp, error)) {
4881 				rw_exit(&tdcp->c_rwlock);
4882 				cachefs_cd_release(fscp);
4883 				held = 0;
4884 				cachefs_cd_timedout(fscp);
4885 				connected = 0;
4886 				continue;
4887 			}
4888 		} else {
4889 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4890 			    cr);
4891 			if (CFS_TIMEOUT(fscp, error)) {
4892 				connected = 1;
4893 				continue;
4894 			}
4895 		}
4896 		break;
4897 	}
4898 
4899 	if (held) {
4900 		rw_exit(&tdcp->c_rwlock);
4901 		cachefs_cd_release(fscp);
4902 	}
4903 
4904 #ifdef CFS_CD_DEBUG
4905 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4906 #endif
4907 out:
4908 #ifdef CFSDEBUG
4909 	CFS_DEBUG(CFSDEBUG_VOPS)
4910 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4911 		    (void *)fvp, (void *)tdvp, tnm);
4912 #endif
4913 	return (error);
4914 }
4915 
4916 static int
4917 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4918 {
4919 	cnode_t *tdcp = VTOC(tdvp);
4920 	cnode_t *fcp = VTOC(fvp);
4921 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4922 	int error = 0;
4923 	vnode_t *backvp = NULL;
4924 
4925 	if (tdcp != fcp) {
4926 		mutex_enter(&fcp->c_statelock);
4927 
4928 		if (fcp->c_backvp == NULL) {
4929 			error = cachefs_getbackvp(fscp, fcp);
4930 			if (error) {
4931 				mutex_exit(&fcp->c_statelock);
4932 				goto out;
4933 			}
4934 		}
4935 
4936 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4937 		if (error) {
4938 			mutex_exit(&fcp->c_statelock);
4939 			goto out;
4940 		}
4941 		backvp = fcp->c_backvp;
4942 		VN_HOLD(backvp);
4943 		mutex_exit(&fcp->c_statelock);
4944 	}
4945 
4946 	mutex_enter(&tdcp->c_statelock);
4947 
4948 	/* get backvp of target directory */
4949 	if (tdcp->c_backvp == NULL) {
4950 		error = cachefs_getbackvp(fscp, tdcp);
4951 		if (error) {
4952 			mutex_exit(&tdcp->c_statelock);
4953 			goto out;
4954 		}
4955 	}
4956 
4957 	/* consistency check target directory */
4958 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4959 	if (error) {
4960 		mutex_exit(&tdcp->c_statelock);
4961 		goto out;
4962 	}
4963 	if (backvp == NULL) {
4964 		backvp = tdcp->c_backvp;
4965 		VN_HOLD(backvp);
4966 	}
4967 
4968 	/* perform the link on the back fs */
4969 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4970 	    ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4971 	    "name %s\n", tdcp, tdcp->c_backvp, tnm));
4972 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4973 	if (error) {
4974 		mutex_exit(&tdcp->c_statelock);
4975 		goto out;
4976 	}
4977 
4978 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4979 
4980 	/* if the dir is populated, add the new link */
4981 	if (CFS_ISFS_NONSHARED(fscp) &&
4982 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4983 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4984 		    &fcp->c_id, SM_ASYNC);
4985 		if (error) {
4986 			cachefs_nocache(tdcp);
4987 			error = 0;
4988 		}
4989 	}
4990 	mutex_exit(&tdcp->c_statelock);
4991 
4992 	/* get the new link count on the file */
4993 	mutex_enter(&fcp->c_statelock);
4994 	fcp->c_flags |= CN_UPDATED;
4995 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
4996 	if (fcp->c_backvp == NULL) {
4997 		error = cachefs_getbackvp(fscp, fcp);
4998 		if (error) {
4999 			mutex_exit(&fcp->c_statelock);
5000 			goto out;
5001 		}
5002 	}
5003 
5004 	/* XXX bob: given what modify_cobject does this seems unnecessary */
5005 	fcp->c_attr.va_mask = AT_ALL;
5006 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5007 	mutex_exit(&fcp->c_statelock);
5008 out:
5009 	if (backvp)
5010 		VN_RELE(backvp);
5011 
5012 	return (error);
5013 }
5014 
5015 static int
5016 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5017     cred_t *cr)
5018 {
5019 	cnode_t *tdcp = VTOC(tdvp);
5020 	cnode_t *fcp = VTOC(fvp);
5021 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5022 	int error = 0;
5023 	timestruc_t current_time;
5024 	off_t commit = 0;
5025 
5026 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5027 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5028 		return (EPERM);
5029 
5030 	if (CFS_ISFS_WRITE_AROUND(fscp))
5031 		return (ETIMEDOUT);
5032 
5033 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5034 		return (ETIMEDOUT);
5035 
5036 	mutex_enter(&tdcp->c_statelock);
5037 
5038 	/* check permissions */
5039 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5040 		mutex_exit(&tdcp->c_statelock);
5041 		goto out;
5042 	}
5043 
5044 	/* the directory front file must be populated */
5045 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5046 		error = ETIMEDOUT;
5047 		mutex_exit(&tdcp->c_statelock);
5048 		goto out;
5049 	}
5050 
5051 	/* make sure tnm does not already exist in the directory */
5052 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5053 	if (error == ENOTDIR) {
5054 		error = ETIMEDOUT;
5055 		mutex_exit(&tdcp->c_statelock);
5056 		goto out;
5057 	}
5058 	if (error != ENOENT) {
5059 		error = EEXIST;
5060 		mutex_exit(&tdcp->c_statelock);
5061 		goto out;
5062 	}
5063 
5064 	mutex_enter(&fcp->c_statelock);
5065 
5066 	/* create a mapping for the file if necessary */
5067 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5068 		error = cachefs_dlog_cidmap(fscp);
5069 		if (error) {
5070 			mutex_exit(&fcp->c_statelock);
5071 			mutex_exit(&tdcp->c_statelock);
5072 			error = ENOSPC;
5073 			goto out;
5074 		}
5075 		fcp->c_metadata.md_flags |= MD_MAPPING;
5076 		fcp->c_flags |= CN_UPDATED;
5077 	}
5078 
5079 	/* mark file as modified */
5080 	if (cachefs_modified_alloc(fcp)) {
5081 		mutex_exit(&fcp->c_statelock);
5082 		mutex_exit(&tdcp->c_statelock);
5083 		error = ENOSPC;
5084 		goto out;
5085 	}
5086 	mutex_exit(&fcp->c_statelock);
5087 
5088 	/* log the operation */
5089 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5090 	if (commit == 0) {
5091 		mutex_exit(&tdcp->c_statelock);
5092 		error = ENOSPC;
5093 		goto out;
5094 	}
5095 
5096 	gethrestime(&current_time);
5097 
5098 	/* make the new link */
5099 	cachefs_modified(tdcp);
5100 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5101 	    &fcp->c_id, SM_ASYNC);
5102 	if (error) {
5103 		error = 0;
5104 		mutex_exit(&tdcp->c_statelock);
5105 		goto out;
5106 	}
5107 
5108 	/* Update mtime/ctime of parent dir */
5109 	tdcp->c_metadata.md_localmtime = current_time;
5110 	tdcp->c_metadata.md_localctime = current_time;
5111 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5112 	tdcp->c_flags |= CN_UPDATED;
5113 	mutex_exit(&tdcp->c_statelock);
5114 
5115 	/* update the file we linked to */
5116 	mutex_enter(&fcp->c_statelock);
5117 	fcp->c_attr.va_nlink++;
5118 	fcp->c_metadata.md_localctime = current_time;
5119 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5120 	fcp->c_flags |= CN_UPDATED;
5121 	mutex_exit(&fcp->c_statelock);
5122 
5123 out:
5124 	if (commit) {
5125 		/* commit the log entry */
5126 		if (cachefs_dlog_commit(fscp, commit, error)) {
5127 			/*EMPTY*/
5128 			/* XXX bob: fix on panic */
5129 		}
5130 	}
5131 
5132 	return (error);
5133 }
5134 
5135 /*
5136  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5137  * cnodes atomically.
5138  */
5139 kmutex_t cachefs_rename_lock;
5140 
5141 /*ARGSUSED*/
5142 static int
5143 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5144     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5145 {
5146 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5147 	cachefscache_t *cachep = fscp->fs_cache;
5148 	int error = 0;
5149 	int held = 0;
5150 	int connected = 0;
5151 	vnode_t *delvp = NULL;
5152 	vnode_t *tvp = NULL;
5153 	int vfslock = 0;
5154 	struct vnode *realvp;
5155 
5156 	if (getzoneid() != GLOBAL_ZONEID)
5157 		return (EPERM);
5158 
5159 	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5160 		ndvp = realvp;
5161 
5162 	/*
5163 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5164 	 * directory cnodes better indicate NOCACHE mode as well.
5165 	 */
5166 	ASSERT(
5167 	    (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5168 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5169 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5170 
5171 	/*
5172 	 * Cachefs only provides pass-through support for NFSv4,
5173 	 * and all vnode operations are passed through to the
5174 	 * back file system. For NFSv4 pass-through to work, only
5175 	 * connected operation is supported, the cnode backvp must
5176 	 * exist, and cachefs optional (eg., disconnectable) flags
5177 	 * are turned off. Assert these conditions to ensure that
5178 	 * the backfilesystem is called for the rename operation.
5179 	 */
5180 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5181 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5182 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5183 
5184 	for (;;) {
5185 		if (vfslock) {
5186 			vn_vfsunlock(delvp);
5187 			vfslock = 0;
5188 		}
5189 		if (delvp) {
5190 			VN_RELE(delvp);
5191 			delvp = NULL;
5192 		}
5193 
5194 		/* get (or renew) access to the file system */
5195 		if (held) {
5196 			/* Won't loop for NFSv4 connected support */
5197 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5198 			cachefs_cd_release(fscp);
5199 			held = 0;
5200 		}
5201 		error = cachefs_cd_access(fscp, connected, 1);
5202 		if (error)
5203 			break;
5204 		held = 1;
5205 
5206 		/* sanity check */
5207 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5208 			error = EINVAL;
5209 			break;
5210 		}
5211 
5212 		/* cannot rename from or to . or .. */
5213 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5214 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5215 			error = EINVAL;
5216 			break;
5217 		}
5218 
5219 		if (odvp != ndvp) {
5220 			/*
5221 			 * if moving a directory, its notion
5222 			 * of ".." will change
5223 			 */
5224 			error = cachefs_lookup_common(odvp, onm, &tvp,
5225 			    NULL, 0, NULL, cr);
5226 			if (error == 0) {
5227 				ASSERT(tvp != NULL);
5228 				if (tvp->v_type == VDIR) {
5229 					cnode_t *cp = VTOC(tvp);
5230 
5231 					dnlc_remove(tvp, "..");
5232 
5233 					mutex_enter(&cp->c_statelock);
5234 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5235 					mutex_exit(&cp->c_statelock);
5236 				}
5237 			} else {
5238 				tvp = NULL;
5239 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5240 					if (CFS_TIMEOUT(fscp, error)) {
5241 						cachefs_cd_release(fscp);
5242 						held = 0;
5243 						cachefs_cd_timedout(fscp);
5244 						connected = 0;
5245 						continue;
5246 					}
5247 				} else {
5248 					if (CFS_TIMEOUT(fscp, error)) {
5249 						connected = 1;
5250 						continue;
5251 					}
5252 				}
5253 				break;
5254 			}
5255 		}
5256 
5257 		/* get the cnode if file being deleted */
5258 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5259 		    NULL, cr);
5260 		if (error) {
5261 			delvp = NULL;
5262 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5263 				if (CFS_TIMEOUT(fscp, error)) {
5264 					cachefs_cd_release(fscp);
5265 					held = 0;
5266 					cachefs_cd_timedout(fscp);
5267 					connected = 0;
5268 					continue;
5269 				}
5270 			} else {
5271 				if (CFS_TIMEOUT(fscp, error)) {
5272 					connected = 1;
5273 					continue;
5274 				}
5275 			}
5276 			if (error != ENOENT)
5277 				break;
5278 		}
5279 
5280 		if (delvp && delvp->v_type == VDIR) {
5281 			/* see ufs_dirremove for why this is done, mount race */
5282 			if (vn_vfswlock(delvp)) {
5283 				error = EBUSY;
5284 				break;
5285 			}
5286 			vfslock = 1;
5287 			if (vn_mountedvfs(delvp) != NULL) {
5288 				error = EBUSY;
5289 				break;
5290 			}
5291 		}
5292 
5293 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5294 			error = cachefs_rename_connected(odvp, onm,
5295 			    ndvp, nnm, cr, delvp);
5296 			if (CFS_TIMEOUT(fscp, error)) {
5297 				cachefs_cd_release(fscp);
5298 				held = 0;
5299 				cachefs_cd_timedout(fscp);
5300 				connected = 0;
5301 				continue;
5302 			}
5303 		} else {
5304 			error = cachefs_rename_disconnected(odvp, onm,
5305 			    ndvp, nnm, cr, delvp);
5306 			if (CFS_TIMEOUT(fscp, error)) {
5307 				connected = 1;
5308 				continue;
5309 			}
5310 		}
5311 		break;
5312 	}
5313 
5314 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5315 		struct fid gone;
5316 
5317 		bzero(&gone, sizeof (gone));
5318 		gone.fid_len = MAXFIDSZ;
5319 		if (delvp != NULL)
5320 			(void) VOP_FID(delvp, &gone, ct);
5321 
5322 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5323 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5324 	}
5325 
5326 	if (held)
5327 		cachefs_cd_release(fscp);
5328 
5329 	if (vfslock)
5330 		vn_vfsunlock(delvp);
5331 
5332 	if (delvp)
5333 		VN_RELE(delvp);
5334 	if (tvp)
5335 		VN_RELE(tvp);
5336 
5337 #ifdef CFS_CD_DEBUG
5338 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5339 #endif
5340 	return (error);
5341 }
5342 
5343 static int
5344 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5345     char *nnm, cred_t *cr, vnode_t *delvp)
5346 {
5347 	cnode_t *odcp = VTOC(odvp);
5348 	cnode_t *ndcp = VTOC(ndvp);
5349 	vnode_t *revp = NULL;
5350 	cnode_t *recp;
5351 	cnode_t *delcp;
5352 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5353 	int error = 0;
5354 	struct fid cookie;
5355 	struct fid *cookiep;
5356 	cfs_cid_t cid;
5357 	int gotdirent;
5358 
5359 	/* find the file we are renaming */
5360 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5361 	if (error)
5362 		return (error);
5363 	recp = VTOC(revp);
5364 
5365 	/*
5366 	 * To avoid deadlock, we acquire this global rename lock before
5367 	 * we try to get the locks for the source and target directories.
5368 	 */
5369 	mutex_enter(&cachefs_rename_lock);
5370 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5371 	if (odcp != ndcp) {
5372 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5373 	}
5374 	mutex_exit(&cachefs_rename_lock);
5375 
5376 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5377 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5378 
5379 	mutex_enter(&odcp->c_statelock);
5380 	if (odcp->c_backvp == NULL) {
5381 		error = cachefs_getbackvp(fscp, odcp);
5382 		if (error) {
5383 			mutex_exit(&odcp->c_statelock);
5384 			goto out;
5385 		}
5386 	}
5387 
5388 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5389 	if (error) {
5390 		mutex_exit(&odcp->c_statelock);
5391 		goto out;
5392 	}
5393 	mutex_exit(&odcp->c_statelock);
5394 
5395 	if (odcp != ndcp) {
5396 		mutex_enter(&ndcp->c_statelock);
5397 		if (ndcp->c_backvp == NULL) {
5398 			error = cachefs_getbackvp(fscp, ndcp);
5399 			if (error) {
5400 				mutex_exit(&ndcp->c_statelock);
5401 				goto out;
5402 			}
5403 		}
5404 
5405 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5406 		if (error) {
5407 			mutex_exit(&ndcp->c_statelock);
5408 			goto out;
5409 		}
5410 		mutex_exit(&ndcp->c_statelock);
5411 	}
5412 
5413 	/* if a file is being deleted because of this rename */
5414 	if (delvp) {
5415 		/* if src and dest file are same */
5416 		if (delvp == revp) {
5417 			error = 0;
5418 			goto out;
5419 		}
5420 
5421 		/*
5422 		 * If the cnode is active, make a link to the file
5423 		 * so operations on the file will continue.
5424 		 */
5425 		dnlc_purge_vp(delvp);
5426 		delcp = VTOC(delvp);
5427 		if ((delvp->v_type != VDIR) &&
5428 		    !((delvp->v_count == 1) ||
5429 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5430 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5431 			if (error)
5432 				goto out;
5433 		}
5434 	}
5435 
5436 	/* do the rename on the back fs */
5437 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5438 	    ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5439 	    " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5440 	    odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5441 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5442 	    0);
5443 	if (error)
5444 		goto out;
5445 
5446 	/* purge mappings to file in the old directory */
5447 	dnlc_purge_vp(odvp);
5448 
5449 	/* purge mappings in the new dir if we deleted a file */
5450 	if (delvp && (odvp != ndvp))
5451 		dnlc_purge_vp(ndvp);
5452 
5453 	/* update the file we just deleted */
5454 	if (delvp) {
5455 		mutex_enter(&delcp->c_statelock);
5456 		if (delcp->c_attr.va_nlink == 1) {
5457 			delcp->c_flags |= CN_DESTROY;
5458 		} else {
5459 			delcp->c_flags |= CN_UPDATED;
5460 		}
5461 		delcp->c_attr.va_nlink--;
5462 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5463 		mutex_exit(&delcp->c_statelock);
5464 	}
5465 
5466 	/* find the entry in the old directory */
5467 	mutex_enter(&odcp->c_statelock);
5468 	gotdirent = 0;
5469 	cookiep = NULL;
5470 	if (CFS_ISFS_NONSHARED(fscp) &&
5471 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5472 		error = cachefs_dir_look(odcp, onm, &cookie,
5473 		    NULL, NULL, &cid);
5474 		if (error == 0 || error == EINVAL) {
5475 			gotdirent = 1;
5476 			if (error == 0)
5477 				cookiep = &cookie;
5478 		} else {
5479 			cachefs_inval_object(odcp);
5480 		}
5481 	}
5482 	error = 0;
5483 
5484 	/* remove the directory entry from the old directory */
5485 	if (gotdirent) {
5486 		error = cachefs_dir_rmentry(odcp, onm);
5487 		if (error) {
5488 			cachefs_nocache(odcp);
5489 			error = 0;
5490 		}
5491 	}
5492 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5493 	mutex_exit(&odcp->c_statelock);
5494 
5495 	/* install the directory entry in the new directory */
5496 	mutex_enter(&ndcp->c_statelock);
5497 	if (CFS_ISFS_NONSHARED(fscp) &&
5498 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5499 		error = 1;
5500 		if (gotdirent) {
5501 			ASSERT(cid.cid_fileno != 0);
5502 			error = 0;
5503 			if (delvp) {
5504 				error = cachefs_dir_rmentry(ndcp, nnm);
5505 			}
5506 			if (error == 0) {
5507 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5508 				    &cid, SM_ASYNC);
5509 			}
5510 		}
5511 		if (error) {
5512 			cachefs_nocache(ndcp);
5513 			error = 0;
5514 		}
5515 	}
5516 	if (odcp != ndcp)
5517 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5518 	mutex_exit(&ndcp->c_statelock);
5519 
5520 	/* ctime of renamed file has changed */
5521 	mutex_enter(&recp->c_statelock);
5522 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5523 	mutex_exit(&recp->c_statelock);
5524 
5525 out:
5526 	if (odcp != ndcp)
5527 		rw_exit(&ndcp->c_rwlock);
5528 	rw_exit(&odcp->c_rwlock);
5529 
5530 	VN_RELE(revp);
5531 
5532 	return (error);
5533 }
5534 
5535 static int
5536 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5537     char *nnm, cred_t *cr, vnode_t *delvp)
5538 {
5539 	cnode_t *odcp = VTOC(odvp);
5540 	cnode_t *ndcp = VTOC(ndvp);
5541 	cnode_t *delcp = NULL;
5542 	vnode_t *revp = NULL;
5543 	cnode_t *recp;
5544 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5545 	int error = 0;
5546 	struct fid cookie;
5547 	struct fid *cookiep;
5548 	cfs_cid_t cid;
5549 	off_t commit = 0;
5550 	timestruc_t current_time;
5551 
5552 	if (CFS_ISFS_WRITE_AROUND(fscp))
5553 		return (ETIMEDOUT);
5554 
5555 	/* find the file we are renaming */
5556 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5557 	if (error)
5558 		return (error);
5559 	recp = VTOC(revp);
5560 
5561 	/*
5562 	 * To avoid deadlock, we acquire this global rename lock before
5563 	 * we try to get the locks for the source and target directories.
5564 	 */
5565 	mutex_enter(&cachefs_rename_lock);
5566 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5567 	if (odcp != ndcp) {
5568 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5569 	}
5570 	mutex_exit(&cachefs_rename_lock);
5571 
5572 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5573 		error = ETIMEDOUT;
5574 		goto out;
5575 	}
5576 
5577 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5578 		mutex_enter(&recp->c_statelock);
5579 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5580 			error = cachefs_dlog_cidmap(fscp);
5581 			if (error) {
5582 				mutex_exit(&recp->c_statelock);
5583 				error = ENOSPC;
5584 				goto out;
5585 			}
5586 			recp->c_metadata.md_flags |= MD_MAPPING;
5587 			recp->c_flags |= CN_UPDATED;
5588 		}
5589 		mutex_exit(&recp->c_statelock);
5590 	}
5591 
5592 	/* check permissions */
5593 	/* XXX clean up this mutex junk sometime */
5594 	mutex_enter(&odcp->c_statelock);
5595 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5596 	mutex_exit(&odcp->c_statelock);
5597 	if (error != 0)
5598 		goto out;
5599 	mutex_enter(&ndcp->c_statelock);
5600 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5601 	mutex_exit(&ndcp->c_statelock);
5602 	if (error != 0)
5603 		goto out;
5604 	mutex_enter(&odcp->c_statelock);
5605 	error = cachefs_stickyrmchk(odcp, recp, cr);
5606 	mutex_exit(&odcp->c_statelock);
5607 	if (error != 0)
5608 		goto out;
5609 
5610 	/* dirs must be populated */
5611 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5612 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5613 		error = ETIMEDOUT;
5614 		goto out;
5615 	}
5616 
5617 	/* for now do not allow moving dirs because could cause cycles */
5618 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5619 	    (revp == odvp)) {
5620 		error = ETIMEDOUT;
5621 		goto out;
5622 	}
5623 
5624 	/* if a file is being deleted because of this rename */
5625 	if (delvp) {
5626 		delcp = VTOC(delvp);
5627 
5628 		/* if src and dest file are the same */
5629 		if (delvp == revp) {
5630 			error = 0;
5631 			goto out;
5632 		}
5633 
5634 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5635 			error = ETIMEDOUT;
5636 			goto out;
5637 		}
5638 
5639 		/* if there are hard links to this file */
5640 		if (delcp->c_attr.va_nlink > 1) {
5641 			mutex_enter(&delcp->c_statelock);
5642 			if (cachefs_modified_alloc(delcp)) {
5643 				mutex_exit(&delcp->c_statelock);
5644 				error = ENOSPC;
5645 				goto out;
5646 			}
5647 
5648 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5649 				error = cachefs_dlog_cidmap(fscp);
5650 				if (error) {
5651 					mutex_exit(&delcp->c_statelock);
5652 					error = ENOSPC;
5653 					goto out;
5654 				}
5655 				delcp->c_metadata.md_flags |= MD_MAPPING;
5656 				delcp->c_flags |= CN_UPDATED;
5657 			}
5658 			mutex_exit(&delcp->c_statelock);
5659 		}
5660 
5661 		/* make sure we can delete file */
5662 		mutex_enter(&ndcp->c_statelock);
5663 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5664 		mutex_exit(&ndcp->c_statelock);
5665 		if (error != 0)
5666 			goto out;
5667 
5668 		/*
5669 		 * If the cnode is active, make a link to the file
5670 		 * so operations on the file will continue.
5671 		 */
5672 		dnlc_purge_vp(delvp);
5673 		if ((delvp->v_type != VDIR) &&
5674 		    !((delvp->v_count == 1) ||
5675 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5676 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5677 			if (error)
5678 				goto out;
5679 		}
5680 	}
5681 
5682 	/* purge mappings to file in the old directory */
5683 	dnlc_purge_vp(odvp);
5684 
5685 	/* purge mappings in the new dir if we deleted a file */
5686 	if (delvp && (odvp != ndvp))
5687 		dnlc_purge_vp(ndvp);
5688 
5689 	/* find the entry in the old directory */
5690 	mutex_enter(&odcp->c_statelock);
5691 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5692 		mutex_exit(&odcp->c_statelock);
5693 		error = ETIMEDOUT;
5694 		goto out;
5695 	}
5696 	cookiep = NULL;
5697 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5698 	if (error == 0 || error == EINVAL) {
5699 		if (error == 0)
5700 			cookiep = &cookie;
5701 	} else {
5702 		mutex_exit(&odcp->c_statelock);
5703 		if (error == ENOTDIR)
5704 			error = ETIMEDOUT;
5705 		goto out;
5706 	}
5707 	error = 0;
5708 
5709 	/* write the log entry */
5710 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5711 	    recp, delcp);
5712 	if (commit == 0) {
5713 		mutex_exit(&odcp->c_statelock);
5714 		error = ENOSPC;
5715 		goto out;
5716 	}
5717 
5718 	/* remove the directory entry from the old directory */
5719 	cachefs_modified(odcp);
5720 	error = cachefs_dir_rmentry(odcp, onm);
5721 	if (error) {
5722 		mutex_exit(&odcp->c_statelock);
5723 		if (error == ENOTDIR)
5724 			error = ETIMEDOUT;
5725 		goto out;
5726 	}
5727 	mutex_exit(&odcp->c_statelock);
5728 
5729 	/* install the directory entry in the new directory */
5730 	mutex_enter(&ndcp->c_statelock);
5731 	error = ENOTDIR;
5732 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5733 		ASSERT(cid.cid_fileno != 0);
5734 		cachefs_modified(ndcp);
5735 		error = 0;
5736 		if (delvp) {
5737 			error = cachefs_dir_rmentry(ndcp, nnm);
5738 		}
5739 		if (error == 0) {
5740 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5741 			    &cid, SM_ASYNC);
5742 		}
5743 	}
5744 	if (error) {
5745 		cachefs_nocache(ndcp);
5746 		mutex_exit(&ndcp->c_statelock);
5747 		mutex_enter(&odcp->c_statelock);
5748 		cachefs_nocache(odcp);
5749 		mutex_exit(&odcp->c_statelock);
5750 		if (error == ENOTDIR)
5751 			error = ETIMEDOUT;
5752 		goto out;
5753 	}
5754 	mutex_exit(&ndcp->c_statelock);
5755 
5756 	gethrestime(&current_time);
5757 
5758 	/* update the file we just deleted */
5759 	if (delvp) {
5760 		mutex_enter(&delcp->c_statelock);
5761 		delcp->c_attr.va_nlink--;
5762 		delcp->c_metadata.md_localctime = current_time;
5763 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5764 		if (delcp->c_attr.va_nlink == 0) {
5765 			delcp->c_flags |= CN_DESTROY;
5766 		} else {
5767 			delcp->c_flags |= CN_UPDATED;
5768 		}
5769 		mutex_exit(&delcp->c_statelock);
5770 	}
5771 
5772 	/* update the file we renamed */
5773 	mutex_enter(&recp->c_statelock);
5774 	recp->c_metadata.md_localctime = current_time;
5775 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5776 	recp->c_flags |= CN_UPDATED;
5777 	mutex_exit(&recp->c_statelock);
5778 
5779 	/* update the source directory */
5780 	mutex_enter(&odcp->c_statelock);
5781 	odcp->c_metadata.md_localctime = current_time;
5782 	odcp->c_metadata.md_localmtime = current_time;
5783 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5784 	odcp->c_flags |= CN_UPDATED;
5785 	mutex_exit(&odcp->c_statelock);
5786 
5787 	/* update the destination directory */
5788 	if (odcp != ndcp) {
5789 		mutex_enter(&ndcp->c_statelock);
5790 		ndcp->c_metadata.md_localctime = current_time;
5791 		ndcp->c_metadata.md_localmtime = current_time;
5792 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5793 		ndcp->c_flags |= CN_UPDATED;
5794 		mutex_exit(&ndcp->c_statelock);
5795 	}
5796 
5797 out:
5798 	if (commit) {
5799 		/* commit the log entry */
5800 		if (cachefs_dlog_commit(fscp, commit, error)) {
5801 			/*EMPTY*/
5802 			/* XXX bob: fix on panic */
5803 		}
5804 	}
5805 
5806 	if (odcp != ndcp)
5807 		rw_exit(&ndcp->c_rwlock);
5808 	rw_exit(&odcp->c_rwlock);
5809 
5810 	VN_RELE(revp);
5811 
5812 	return (error);
5813 }
5814 
5815 /*ARGSUSED*/
5816 static int
5817 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5818     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5819 {
5820 	cnode_t *dcp = VTOC(dvp);
5821 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5822 	cachefscache_t *cachep = fscp->fs_cache;
5823 	int error = 0;
5824 	int held = 0;
5825 	int connected = 0;
5826 
5827 #ifdef CFSDEBUG
5828 	CFS_DEBUG(CFSDEBUG_VOPS)
5829 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5830 #endif
5831 
5832 	if (getzoneid() != GLOBAL_ZONEID) {
5833 		error = EPERM;
5834 		goto out;
5835 	}
5836 
5837 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5838 		ASSERT(dcp->c_flags & CN_NOCACHE);
5839 
5840 	/*
5841 	 * Cachefs only provides pass-through support for NFSv4,
5842 	 * and all vnode operations are passed through to the
5843 	 * back file system. For NFSv4 pass-through to work, only
5844 	 * connected operation is supported, the cnode backvp must
5845 	 * exist, and cachefs optional (eg., disconnectable) flags
5846 	 * are turned off. Assert these conditions to ensure that
5847 	 * the backfilesystem is called for the mkdir operation.
5848 	 */
5849 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5850 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5851 
5852 	for (;;) {
5853 		/* get (or renew) access to the file system */
5854 		if (held) {
5855 			/* Won't loop with NFSv4 connected behavior */
5856 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5857 			rw_exit(&dcp->c_rwlock);
5858 			cachefs_cd_release(fscp);
5859 			held = 0;
5860 		}
5861 		error = cachefs_cd_access(fscp, connected, 1);
5862 		if (error)
5863 			break;
5864 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5865 		held = 1;
5866 
5867 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5868 			error = cachefs_mkdir_connected(dvp, nm, vap,
5869 			    vpp, cr);
5870 			if (CFS_TIMEOUT(fscp, error)) {
5871 				rw_exit(&dcp->c_rwlock);
5872 				cachefs_cd_release(fscp);
5873 				held = 0;
5874 				cachefs_cd_timedout(fscp);
5875 				connected = 0;
5876 				continue;
5877 			}
5878 		} else {
5879 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5880 			    vpp, cr);
5881 			if (CFS_TIMEOUT(fscp, error)) {
5882 				connected = 1;
5883 				continue;
5884 			}
5885 		}
5886 		break;
5887 	}
5888 
5889 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5890 		fid_t *fidp = NULL;
5891 		ino64_t fileno = 0;
5892 		cnode_t *cp = NULL;
5893 		if (error == 0)
5894 			cp = VTOC(*vpp);
5895 
5896 		if (cp != NULL) {
5897 			fidp = &cp->c_metadata.md_cookie;
5898 			fileno = cp->c_id.cid_fileno;
5899 		}
5900 
5901 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5902 		    fidp, fileno, crgetuid(cr));
5903 	}
5904 
5905 	if (held) {
5906 		rw_exit(&dcp->c_rwlock);
5907 		cachefs_cd_release(fscp);
5908 	}
5909 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5910 		(void) cachefs_pack(dvp, nm, cr);
5911 
5912 #ifdef CFS_CD_DEBUG
5913 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5914 #endif
5915 out:
5916 #ifdef CFSDEBUG
5917 	CFS_DEBUG(CFSDEBUG_VOPS)
5918 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5919 #endif
5920 	return (error);
5921 }
5922 
5923 static int
5924 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5925     vnode_t **vpp, cred_t *cr)
5926 {
5927 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5928 	struct vnode *vp = NULL;
5929 	int error = 0;
5930 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5931 	struct fid cookie;
5932 	struct vattr attr;
5933 	cfs_cid_t cid, dircid;
5934 	uint32_t valid_fid;
5935 
5936 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5937 		ASSERT(dcp->c_flags & CN_NOCACHE);
5938 
5939 	mutex_enter(&dcp->c_statelock);
5940 
5941 	/* get backvp of dir */
5942 	if (dcp->c_backvp == NULL) {
5943 		error = cachefs_getbackvp(fscp, dcp);
5944 		if (error) {
5945 			mutex_exit(&dcp->c_statelock);
5946 			goto out;
5947 		}
5948 	}
5949 
5950 	/* consistency check the directory */
5951 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5952 	if (error) {
5953 		mutex_exit(&dcp->c_statelock);
5954 		goto out;
5955 	}
5956 	dircid = dcp->c_id;
5957 
5958 	/* make the dir on the back fs */
5959 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5960 	    ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5961 	    "name %s\n", dcp, dcp->c_backvp, nm));
5962 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5963 	mutex_exit(&dcp->c_statelock);
5964 	if (error) {
5965 		goto out;
5966 	}
5967 
5968 	/* get the cookie and make the cnode */
5969 	attr.va_mask = AT_ALL;
5970 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5971 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5972 	if (error) {
5973 		goto out;
5974 	}
5975 	cid.cid_flags = 0;
5976 	cid.cid_fileno = attr.va_nodeid;
5977 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5978 	    &attr, vp, cr, 0, &newcp);
5979 	if (error) {
5980 		goto out;
5981 	}
5982 	ASSERT(CTOV(newcp)->v_type == VDIR);
5983 	*vpp = CTOV(newcp);
5984 
5985 	/* if the dir is populated, add the new entry */
5986 	mutex_enter(&dcp->c_statelock);
5987 	if (CFS_ISFS_NONSHARED(fscp) &&
5988 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5989 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5990 		    SM_ASYNC);
5991 		if (error) {
5992 			cachefs_nocache(dcp);
5993 			error = 0;
5994 		}
5995 	}
5996 	dcp->c_attr.va_nlink++;
5997 	dcp->c_flags |= CN_UPDATED;
5998 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
5999 	mutex_exit(&dcp->c_statelock);
6000 
6001 	/* XXX bob: should we do a filldir here? or just add . and .. */
6002 	/* maybe should kick off an async filldir so caller does not wait */
6003 
6004 	/* put the entry in the dnlc */
6005 	if (cachefs_dnlc)
6006 		dnlc_enter(dvp, nm, *vpp);
6007 
6008 	/* save the fileno of the parent so can find the name */
6009 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6010 	    sizeof (cfs_cid_t)) != 0) {
6011 		mutex_enter(&newcp->c_statelock);
6012 		newcp->c_metadata.md_parent = dircid;
6013 		newcp->c_flags |= CN_UPDATED;
6014 		mutex_exit(&newcp->c_statelock);
6015 	}
6016 out:
6017 	if (vp)
6018 		VN_RELE(vp);
6019 
6020 	return (error);
6021 }
6022 
6023 static int
6024 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6025     vnode_t **vpp, cred_t *cr)
6026 {
6027 	cnode_t *dcp = VTOC(dvp);
6028 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6029 	int error;
6030 	cnode_t *newcp = NULL;
6031 	struct vattr va;
6032 	timestruc_t current_time;
6033 	off_t commit = 0;
6034 	char *s;
6035 	int namlen;
6036 
6037 	/* don't allow '/' characters in pathname component */
6038 	for (s = nm, namlen = 0; *s; s++, namlen++)
6039 		if (*s == '/')
6040 			return (EACCES);
6041 	if (namlen == 0)
6042 		return (EINVAL);
6043 
6044 	if (CFS_ISFS_WRITE_AROUND(fscp))
6045 		return (ETIMEDOUT);
6046 
6047 	mutex_enter(&dcp->c_statelock);
6048 
6049 	/* check permissions */
6050 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6051 		mutex_exit(&dcp->c_statelock);
6052 		goto out;
6053 	}
6054 
6055 	/* the directory front file must be populated */
6056 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6057 		error = ETIMEDOUT;
6058 		mutex_exit(&dcp->c_statelock);
6059 		goto out;
6060 	}
6061 
6062 	/* make sure nm does not already exist in the directory */
6063 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6064 	if (error == ENOTDIR) {
6065 		error = ETIMEDOUT;
6066 		mutex_exit(&dcp->c_statelock);
6067 		goto out;
6068 	}
6069 	if (error != ENOENT) {
6070 		error = EEXIST;
6071 		mutex_exit(&dcp->c_statelock);
6072 		goto out;
6073 	}
6074 
6075 	/* make up a reasonable set of attributes */
6076 	cachefs_attr_setup(vap, &va, dcp, cr);
6077 	va.va_type = VDIR;
6078 	va.va_mode |= S_IFDIR;
6079 	va.va_nlink = 2;
6080 
6081 	mutex_exit(&dcp->c_statelock);
6082 
6083 	/* create the cnode */
6084 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6085 	if (error)
6086 		goto out;
6087 
6088 	mutex_enter(&newcp->c_statelock);
6089 
6090 	error = cachefs_dlog_cidmap(fscp);
6091 	if (error) {
6092 		mutex_exit(&newcp->c_statelock);
6093 		goto out;
6094 	}
6095 
6096 	cachefs_creategid(dcp, newcp, vap, cr);
6097 	mutex_enter(&dcp->c_statelock);
6098 	cachefs_createacl(dcp, newcp);
6099 	mutex_exit(&dcp->c_statelock);
6100 	gethrestime(&current_time);
6101 	newcp->c_metadata.md_vattr.va_atime = current_time;
6102 	newcp->c_metadata.md_localctime = current_time;
6103 	newcp->c_metadata.md_localmtime = current_time;
6104 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6105 	    MD_LOCALCTIME;
6106 	newcp->c_flags |= CN_UPDATED;
6107 
6108 	/* make a front file for the new directory, add . and .. */
6109 	error = cachefs_dir_new(dcp, newcp);
6110 	if (error) {
6111 		mutex_exit(&newcp->c_statelock);
6112 		goto out;
6113 	}
6114 	cachefs_modified(newcp);
6115 
6116 	/*
6117 	 * write the metadata now rather than waiting until
6118 	 * inactive so that if there's no space we can let
6119 	 * the caller know.
6120 	 */
6121 	ASSERT(newcp->c_frontvp);
6122 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6123 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6124 	error = filegrp_write_metadata(newcp->c_filegrp,
6125 	    &newcp->c_id, &newcp->c_metadata);
6126 	if (error) {
6127 		mutex_exit(&newcp->c_statelock);
6128 		goto out;
6129 	}
6130 	mutex_exit(&newcp->c_statelock);
6131 
6132 	/* log the operation */
6133 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6134 	if (commit == 0) {
6135 		error = ENOSPC;
6136 		goto out;
6137 	}
6138 
6139 	mutex_enter(&dcp->c_statelock);
6140 
6141 	/* make sure directory is still populated */
6142 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6143 		mutex_exit(&dcp->c_statelock);
6144 		error = ETIMEDOUT;
6145 		goto out;
6146 	}
6147 	cachefs_modified(dcp);
6148 
6149 	/* enter the new file in the directory */
6150 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6151 	    &newcp->c_id, SM_ASYNC);
6152 	if (error) {
6153 		mutex_exit(&dcp->c_statelock);
6154 		goto out;
6155 	}
6156 
6157 	/* update parent dir times */
6158 	dcp->c_metadata.md_localctime = current_time;
6159 	dcp->c_metadata.md_localmtime = current_time;
6160 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6161 	dcp->c_attr.va_nlink++;
6162 	dcp->c_flags |= CN_UPDATED;
6163 	mutex_exit(&dcp->c_statelock);
6164 
6165 out:
6166 	if (commit) {
6167 		/* commit the log entry */
6168 		if (cachefs_dlog_commit(fscp, commit, error)) {
6169 			/*EMPTY*/
6170 			/* XXX bob: fix on panic */
6171 		}
6172 	}
6173 	if (error) {
6174 		if (newcp) {
6175 			mutex_enter(&newcp->c_statelock);
6176 			newcp->c_flags |= CN_DESTROY;
6177 			mutex_exit(&newcp->c_statelock);
6178 			VN_RELE(CTOV(newcp));
6179 		}
6180 	} else {
6181 		*vpp = CTOV(newcp);
6182 	}
6183 	return (error);
6184 }
6185 
6186 /*ARGSUSED*/
6187 static int
6188 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6189     caller_context_t *ct, int flags)
6190 {
6191 	cnode_t *dcp = VTOC(dvp);
6192 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6193 	cachefscache_t *cachep = fscp->fs_cache;
6194 	int error = 0;
6195 	int held = 0;
6196 	int connected = 0;
6197 	size_t namlen;
6198 	vnode_t *vp = NULL;
6199 	int vfslock = 0;
6200 
6201 #ifdef CFSDEBUG
6202 	CFS_DEBUG(CFSDEBUG_VOPS)
6203 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6204 #endif
6205 
6206 	if (getzoneid() != GLOBAL_ZONEID) {
6207 		error = EPERM;
6208 		goto out;
6209 	}
6210 
6211 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6212 		ASSERT(dcp->c_flags & CN_NOCACHE);
6213 
6214 	/*
6215 	 * Cachefs only provides pass-through support for NFSv4,
6216 	 * and all vnode operations are passed through to the
6217 	 * back file system. For NFSv4 pass-through to work, only
6218 	 * connected operation is supported, the cnode backvp must
6219 	 * exist, and cachefs optional (eg., disconnectable) flags
6220 	 * are turned off. Assert these conditions to ensure that
6221 	 * the backfilesystem is called for the rmdir operation.
6222 	 */
6223 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6224 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6225 
6226 	for (;;) {
6227 		if (vfslock) {
6228 			vn_vfsunlock(vp);
6229 			vfslock = 0;
6230 		}
6231 		if (vp) {
6232 			VN_RELE(vp);
6233 			vp = NULL;
6234 		}
6235 
6236 		/* get (or renew) access to the file system */
6237 		if (held) {
6238 			/* Won't loop with NFSv4 connected behavior */
6239 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6240 			cachefs_cd_release(fscp);
6241 			held = 0;
6242 		}
6243 		error = cachefs_cd_access(fscp, connected, 1);
6244 		if (error)
6245 			break;
6246 		held = 1;
6247 
6248 		/* if disconnected, do some extra error checking */
6249 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6250 			/* check permissions */
6251 			mutex_enter(&dcp->c_statelock);
6252 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6253 			mutex_exit(&dcp->c_statelock);
6254 			if (CFS_TIMEOUT(fscp, error)) {
6255 				connected = 1;
6256 				continue;
6257 			}
6258 			if (error)
6259 				break;
6260 
6261 			namlen = strlen(nm);
6262 			if (namlen == 0) {
6263 				error = EINVAL;
6264 				break;
6265 			}
6266 
6267 			/* cannot remove . and .. */
6268 			if (nm[0] == '.') {
6269 				if (namlen == 1) {
6270 					error = EINVAL;
6271 					break;
6272 				} else if (namlen == 2 && nm[1] == '.') {
6273 					error = EEXIST;
6274 					break;
6275 				}
6276 			}
6277 
6278 		}
6279 
6280 		/* get the cnode of the dir to remove */
6281 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6282 		if (error) {
6283 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6284 				if (CFS_TIMEOUT(fscp, error)) {
6285 					cachefs_cd_release(fscp);
6286 					held = 0;
6287 					cachefs_cd_timedout(fscp);
6288 					connected = 0;
6289 					continue;
6290 				}
6291 			} else {
6292 				if (CFS_TIMEOUT(fscp, error)) {
6293 					connected = 1;
6294 					continue;
6295 				}
6296 			}
6297 			break;
6298 		}
6299 
6300 		/* must be a dir */
6301 		if (vp->v_type != VDIR) {
6302 			error = ENOTDIR;
6303 			break;
6304 		}
6305 
6306 		/* must not be current dir */
6307 		if (VOP_CMP(vp, cdir, ct)) {
6308 			error = EINVAL;
6309 			break;
6310 		}
6311 
6312 		/* see ufs_dirremove for why this is done, mount race */
6313 		if (vn_vfswlock(vp)) {
6314 			error = EBUSY;
6315 			break;
6316 		}
6317 		vfslock = 1;
6318 		if (vn_mountedvfs(vp) != NULL) {
6319 			error = EBUSY;
6320 			break;
6321 		}
6322 
6323 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6324 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6325 			    cr, vp);
6326 			if (CFS_TIMEOUT(fscp, error)) {
6327 				cachefs_cd_release(fscp);
6328 				held = 0;
6329 				cachefs_cd_timedout(fscp);
6330 				connected = 0;
6331 				continue;
6332 			}
6333 		} else {
6334 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6335 			    cr, vp);
6336 			if (CFS_TIMEOUT(fscp, error)) {
6337 				connected = 1;
6338 				continue;
6339 			}
6340 		}
6341 		break;
6342 	}
6343 
6344 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6345 		ino64_t fileno = 0;
6346 		fid_t *fidp = NULL;
6347 		cnode_t *cp = NULL;
6348 		if (vp)
6349 			cp = VTOC(vp);
6350 
6351 		if (cp != NULL) {
6352 			fidp = &cp->c_metadata.md_cookie;
6353 			fileno = cp->c_id.cid_fileno;
6354 		}
6355 
6356 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6357 		    fidp, fileno, crgetuid(cr));
6358 	}
6359 
6360 	if (held) {
6361 		cachefs_cd_release(fscp);
6362 	}
6363 
6364 	if (vfslock)
6365 		vn_vfsunlock(vp);
6366 
6367 	if (vp)
6368 		VN_RELE(vp);
6369 
6370 #ifdef CFS_CD_DEBUG
6371 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6372 #endif
6373 out:
6374 #ifdef CFSDEBUG
6375 	CFS_DEBUG(CFSDEBUG_VOPS)
6376 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6377 #endif
6378 
6379 	return (error);
6380 }
6381 
6382 static int
6383 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6384     vnode_t *vp)
6385 {
6386 	cnode_t *dcp = VTOC(dvp);
6387 	cnode_t *cp = VTOC(vp);
6388 	int error = 0;
6389 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6390 
6391 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6392 	mutex_enter(&dcp->c_statelock);
6393 	mutex_enter(&cp->c_statelock);
6394 
6395 	if (dcp->c_backvp == NULL) {
6396 		error = cachefs_getbackvp(fscp, dcp);
6397 		if (error) {
6398 			goto out;
6399 		}
6400 	}
6401 
6402 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6403 	if (error)
6404 		goto out;
6405 
6406 	/* rmdir on the back fs */
6407 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6408 	    ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6409 	    "name %s\n", dcp, dcp->c_backvp, nm));
6410 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6411 	if (error)
6412 		goto out;
6413 
6414 	/* if the dir is populated, remove the entry from it */
6415 	if (CFS_ISFS_NONSHARED(fscp) &&
6416 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6417 		error = cachefs_dir_rmentry(dcp, nm);
6418 		if (error) {
6419 			cachefs_nocache(dcp);
6420 			error = 0;
6421 		}
6422 	}
6423 
6424 	/*
6425 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6426 	 * flag on the cnode. The cached object will then be destroyed
6427 	 * at inactive time where the chickens come home to roost :-)
6428 	 * The link cnt for directories is bumped down by 2 'cause the "."
6429 	 * entry has to be elided too ! The link cnt for the parent goes down
6430 	 * by 1 (because of "..").
6431 	 */
6432 	cp->c_attr.va_nlink -= 2;
6433 	dcp->c_attr.va_nlink--;
6434 	if (cp->c_attr.va_nlink == 0) {
6435 		cp->c_flags |= CN_DESTROY;
6436 	} else {
6437 		cp->c_flags |= CN_UPDATED;
6438 	}
6439 	dcp->c_flags |= CN_UPDATED;
6440 
6441 	dnlc_purge_vp(vp);
6442 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6443 
6444 out:
6445 	mutex_exit(&cp->c_statelock);
6446 	mutex_exit(&dcp->c_statelock);
6447 	rw_exit(&dcp->c_rwlock);
6448 
6449 	return (error);
6450 }
6451 
6452 static int
6453 /*ARGSUSED*/
6454 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6455     cred_t *cr, vnode_t *vp)
6456 {
6457 	cnode_t *dcp = VTOC(dvp);
6458 	cnode_t *cp = VTOC(vp);
6459 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6460 	int error = 0;
6461 	off_t commit = 0;
6462 	timestruc_t current_time;
6463 
6464 	if (CFS_ISFS_WRITE_AROUND(fscp))
6465 		return (ETIMEDOUT);
6466 
6467 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6468 	mutex_enter(&dcp->c_statelock);
6469 	mutex_enter(&cp->c_statelock);
6470 
6471 	/* both directories must be populated */
6472 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6473 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6474 		error = ETIMEDOUT;
6475 		goto out;
6476 	}
6477 
6478 	/* if sticky bit set on the dir, more access checks to perform */
6479 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6480 		goto out;
6481 	}
6482 
6483 	/* make sure dir is empty */
6484 	if (cp->c_attr.va_nlink > 2) {
6485 		error = cachefs_dir_empty(cp);
6486 		if (error) {
6487 			if (error == ENOTDIR)
6488 				error = ETIMEDOUT;
6489 			goto out;
6490 		}
6491 		cachefs_modified(cp);
6492 	}
6493 	cachefs_modified(dcp);
6494 
6495 	/* log the operation */
6496 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6497 	if (commit == 0) {
6498 		error = ENOSPC;
6499 		goto out;
6500 	}
6501 
6502 	/* remove name from parent dir */
6503 	error = cachefs_dir_rmentry(dcp, nm);
6504 	if (error == ENOTDIR) {
6505 		error = ETIMEDOUT;
6506 		goto out;
6507 	}
6508 	if (error)
6509 		goto out;
6510 
6511 	gethrestime(&current_time);
6512 
6513 	/* update deleted dir values */
6514 	cp->c_attr.va_nlink -= 2;
6515 	if (cp->c_attr.va_nlink == 0)
6516 		cp->c_flags |= CN_DESTROY;
6517 	else {
6518 		cp->c_metadata.md_localctime = current_time;
6519 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6520 		cp->c_flags |= CN_UPDATED;
6521 	}
6522 
6523 	/* update parent values */
6524 	dcp->c_metadata.md_localctime = current_time;
6525 	dcp->c_metadata.md_localmtime = current_time;
6526 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6527 	dcp->c_attr.va_nlink--;
6528 	dcp->c_flags |= CN_UPDATED;
6529 
6530 out:
6531 	mutex_exit(&cp->c_statelock);
6532 	mutex_exit(&dcp->c_statelock);
6533 	rw_exit(&dcp->c_rwlock);
6534 	if (commit) {
6535 		/* commit the log entry */
6536 		if (cachefs_dlog_commit(fscp, commit, error)) {
6537 			/*EMPTY*/
6538 			/* XXX bob: fix on panic */
6539 		}
6540 		dnlc_purge_vp(vp);
6541 	}
6542 	return (error);
6543 }
6544 
6545 /*ARGSUSED*/
6546 static int
6547 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6548     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6549 {
6550 	cnode_t *dcp = VTOC(dvp);
6551 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6552 	cachefscache_t *cachep = fscp->fs_cache;
6553 	int error = 0;
6554 	int held = 0;
6555 	int connected = 0;
6556 
6557 #ifdef CFSDEBUG
6558 	CFS_DEBUG(CFSDEBUG_VOPS)
6559 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6560 		    (void *)dvp, lnm, tnm);
6561 #endif
6562 
6563 	if (getzoneid() != GLOBAL_ZONEID) {
6564 		error = EPERM;
6565 		goto out;
6566 	}
6567 
6568 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6569 		ASSERT(dcp->c_flags & CN_NOCACHE);
6570 
6571 	/*
6572 	 * Cachefs only provides pass-through support for NFSv4,
6573 	 * and all vnode operations are passed through to the
6574 	 * back file system. For NFSv4 pass-through to work, only
6575 	 * connected operation is supported, the cnode backvp must
6576 	 * exist, and cachefs optional (eg., disconnectable) flags
6577 	 * are turned off. Assert these conditions to ensure that
6578 	 * the backfilesystem is called for the symlink operation.
6579 	 */
6580 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6581 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6582 
6583 	for (;;) {
6584 		/* get (or renew) access to the file system */
6585 		if (held) {
6586 			/* Won't loop with NFSv4 connected behavior */
6587 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6588 			rw_exit(&dcp->c_rwlock);
6589 			cachefs_cd_release(fscp);
6590 			held = 0;
6591 		}
6592 		error = cachefs_cd_access(fscp, connected, 1);
6593 		if (error)
6594 			break;
6595 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6596 		held = 1;
6597 
6598 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6599 			error = cachefs_symlink_connected(dvp, lnm, tva,
6600 			    tnm, cr);
6601 			if (CFS_TIMEOUT(fscp, error)) {
6602 				rw_exit(&dcp->c_rwlock);
6603 				cachefs_cd_release(fscp);
6604 				held = 0;
6605 				cachefs_cd_timedout(fscp);
6606 				connected = 0;
6607 				continue;
6608 			}
6609 		} else {
6610 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6611 			    tnm, cr);
6612 			if (CFS_TIMEOUT(fscp, error)) {
6613 				connected = 1;
6614 				continue;
6615 			}
6616 		}
6617 		break;
6618 	}
6619 
6620 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6621 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6622 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6623 		    crgetuid(cr), (uint_t)strlen(tnm));
6624 
6625 	if (held) {
6626 		rw_exit(&dcp->c_rwlock);
6627 		cachefs_cd_release(fscp);
6628 	}
6629 
6630 #ifdef CFS_CD_DEBUG
6631 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6632 #endif
6633 out:
6634 #ifdef CFSDEBUG
6635 	CFS_DEBUG(CFSDEBUG_VOPS)
6636 		printf("cachefs_symlink: EXIT error = %d\n", error);
6637 #endif
6638 	return (error);
6639 }
6640 
6641 static int
6642 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6643     char *tnm, cred_t *cr)
6644 {
6645 	cnode_t *dcp = VTOC(dvp);
6646 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6647 	int error = 0;
6648 	vnode_t *backvp = NULL;
6649 	cnode_t *newcp = NULL;
6650 	struct vattr va;
6651 	struct fid cookie;
6652 	cfs_cid_t cid;
6653 	uint32_t valid_fid;
6654 
6655 	mutex_enter(&dcp->c_statelock);
6656 
6657 	if (dcp->c_backvp == NULL) {
6658 		error = cachefs_getbackvp(fscp, dcp);
6659 		if (error) {
6660 			cachefs_nocache(dcp);
6661 			mutex_exit(&dcp->c_statelock);
6662 			goto out;
6663 		}
6664 	}
6665 
6666 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6667 	if (error) {
6668 		mutex_exit(&dcp->c_statelock);
6669 		goto out;
6670 	}
6671 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6672 	    ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6673 	    "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6674 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6675 	if (error) {
6676 		mutex_exit(&dcp->c_statelock);
6677 		goto out;
6678 	}
6679 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6680 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6681 		cachefs_nocache(dcp);
6682 		mutex_exit(&dcp->c_statelock);
6683 		goto out;
6684 	}
6685 
6686 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6687 
6688 	/* lookup the symlink we just created and get its fid and attrs */
6689 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6690 	    NULL, NULL, NULL);
6691 	if (backvp == NULL) {
6692 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6693 			cachefs_nocache(dcp);
6694 		mutex_exit(&dcp->c_statelock);
6695 		goto out;
6696 	}
6697 
6698 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6699 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6700 	if (error) {
6701 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6702 		error = 0;
6703 		cachefs_nocache(dcp);
6704 		mutex_exit(&dcp->c_statelock);
6705 		goto out;
6706 	}
6707 	cid.cid_fileno = va.va_nodeid;
6708 	cid.cid_flags = 0;
6709 
6710 	/* if the dir is cached, add the symlink to it */
6711 	if (CFS_ISFS_NONSHARED(fscp) &&
6712 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6713 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6714 		if (error) {
6715 			cachefs_nocache(dcp);
6716 			error = 0;
6717 		}
6718 	}
6719 	mutex_exit(&dcp->c_statelock);
6720 
6721 	/* make the cnode for the sym link */
6722 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6723 	    &va, backvp, cr, 0, &newcp);
6724 	if (error) {
6725 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6726 		cachefs_nocache(dcp);
6727 		error = 0;
6728 		goto out;
6729 	}
6730 
6731 	/* try to cache the symlink contents */
6732 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6733 	mutex_enter(&newcp->c_statelock);
6734 
6735 	/*
6736 	 * try to cache the sym link, note that its a noop if NOCACHE
6737 	 * or NFSv4 is set
6738 	 */
6739 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6740 	if (error) {
6741 		cachefs_nocache(newcp);
6742 		error = 0;
6743 	}
6744 	mutex_exit(&newcp->c_statelock);
6745 	rw_exit(&newcp->c_rwlock);
6746 
6747 out:
6748 	if (backvp)
6749 		VN_RELE(backvp);
6750 	if (newcp)
6751 		VN_RELE(CTOV(newcp));
6752 	return (error);
6753 }
6754 
6755 static int
6756 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6757     char *tnm, cred_t *cr)
6758 {
6759 	cnode_t *dcp = VTOC(dvp);
6760 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6761 	int error;
6762 	cnode_t *newcp = NULL;
6763 	struct vattr va;
6764 	timestruc_t current_time;
6765 	off_t commit = 0;
6766 
6767 	if (CFS_ISFS_WRITE_AROUND(fscp))
6768 		return (ETIMEDOUT);
6769 
6770 	mutex_enter(&dcp->c_statelock);
6771 
6772 	/* check permissions */
6773 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6774 		mutex_exit(&dcp->c_statelock);
6775 		goto out;
6776 	}
6777 
6778 	/* the directory front file must be populated */
6779 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6780 		error = ETIMEDOUT;
6781 		mutex_exit(&dcp->c_statelock);
6782 		goto out;
6783 	}
6784 
6785 	/* make sure lnm does not already exist in the directory */
6786 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6787 	if (error == ENOTDIR) {
6788 		error = ETIMEDOUT;
6789 		mutex_exit(&dcp->c_statelock);
6790 		goto out;
6791 	}
6792 	if (error != ENOENT) {
6793 		error = EEXIST;
6794 		mutex_exit(&dcp->c_statelock);
6795 		goto out;
6796 	}
6797 
6798 	/* make up a reasonable set of attributes */
6799 	cachefs_attr_setup(tva, &va, dcp, cr);
6800 	va.va_type = VLNK;
6801 	va.va_mode |= S_IFLNK;
6802 	va.va_size = strlen(tnm);
6803 
6804 	mutex_exit(&dcp->c_statelock);
6805 
6806 	/* create the cnode */
6807 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6808 	if (error)
6809 		goto out;
6810 
6811 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6812 	mutex_enter(&newcp->c_statelock);
6813 
6814 	error = cachefs_dlog_cidmap(fscp);
6815 	if (error) {
6816 		mutex_exit(&newcp->c_statelock);
6817 		rw_exit(&newcp->c_rwlock);
6818 		error = ENOSPC;
6819 		goto out;
6820 	}
6821 
6822 	cachefs_creategid(dcp, newcp, tva, cr);
6823 	mutex_enter(&dcp->c_statelock);
6824 	cachefs_createacl(dcp, newcp);
6825 	mutex_exit(&dcp->c_statelock);
6826 	gethrestime(&current_time);
6827 	newcp->c_metadata.md_vattr.va_atime = current_time;
6828 	newcp->c_metadata.md_localctime = current_time;
6829 	newcp->c_metadata.md_localmtime = current_time;
6830 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6831 	    MD_LOCALCTIME;
6832 	newcp->c_flags |= CN_UPDATED;
6833 
6834 	/* log the operation */
6835 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6836 	if (commit == 0) {
6837 		mutex_exit(&newcp->c_statelock);
6838 		rw_exit(&newcp->c_rwlock);
6839 		error = ENOSPC;
6840 		goto out;
6841 	}
6842 
6843 	/* store the symlink contents */
6844 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6845 	if (error) {
6846 		mutex_exit(&newcp->c_statelock);
6847 		rw_exit(&newcp->c_rwlock);
6848 		goto out;
6849 	}
6850 	if (cachefs_modified_alloc(newcp)) {
6851 		mutex_exit(&newcp->c_statelock);
6852 		rw_exit(&newcp->c_rwlock);
6853 		error = ENOSPC;
6854 		goto out;
6855 	}
6856 
6857 	/*
6858 	 * write the metadata now rather than waiting until
6859 	 * inactive so that if there's no space we can let
6860 	 * the caller know.
6861 	 */
6862 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6863 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6864 			(void) filegrp_allocattr(newcp->c_filegrp);
6865 		}
6866 		error = filegrp_create_metadata(newcp->c_filegrp,
6867 		    &newcp->c_metadata, &newcp->c_id);
6868 		if (error) {
6869 			mutex_exit(&newcp->c_statelock);
6870 			rw_exit(&newcp->c_rwlock);
6871 			goto out;
6872 		}
6873 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6874 	}
6875 	error = filegrp_write_metadata(newcp->c_filegrp,
6876 	    &newcp->c_id, &newcp->c_metadata);
6877 	if (error) {
6878 		mutex_exit(&newcp->c_statelock);
6879 		rw_exit(&newcp->c_rwlock);
6880 		goto out;
6881 	}
6882 	mutex_exit(&newcp->c_statelock);
6883 	rw_exit(&newcp->c_rwlock);
6884 
6885 	mutex_enter(&dcp->c_statelock);
6886 
6887 	/* enter the new file in the directory */
6888 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6889 		error = ETIMEDOUT;
6890 		mutex_exit(&dcp->c_statelock);
6891 		goto out;
6892 	}
6893 	cachefs_modified(dcp);
6894 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6895 	    &newcp->c_id, SM_ASYNC);
6896 	if (error) {
6897 		mutex_exit(&dcp->c_statelock);
6898 		goto out;
6899 	}
6900 
6901 	/* update parent dir times */
6902 	dcp->c_metadata.md_localctime = current_time;
6903 	dcp->c_metadata.md_localmtime = current_time;
6904 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6905 	dcp->c_flags |= CN_UPDATED;
6906 	mutex_exit(&dcp->c_statelock);
6907 
6908 out:
6909 	if (commit) {
6910 		/* commit the log entry */
6911 		if (cachefs_dlog_commit(fscp, commit, error)) {
6912 			/*EMPTY*/
6913 			/* XXX bob: fix on panic */
6914 		}
6915 	}
6916 
6917 	if (error) {
6918 		if (newcp) {
6919 			mutex_enter(&newcp->c_statelock);
6920 			newcp->c_flags |= CN_DESTROY;
6921 			mutex_exit(&newcp->c_statelock);
6922 		}
6923 	}
6924 	if (newcp) {
6925 		VN_RELE(CTOV(newcp));
6926 	}
6927 
6928 	return (error);
6929 }
6930 
6931 /*ARGSUSED*/
6932 static int
6933 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6934     caller_context_t *ct, int flags)
6935 {
6936 	cnode_t *dcp = VTOC(vp);
6937 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6938 	cachefscache_t *cachep = fscp->fs_cache;
6939 	int error = 0;
6940 	int held = 0;
6941 	int connected = 0;
6942 
6943 #ifdef CFSDEBUG
6944 	CFS_DEBUG(CFSDEBUG_VOPS)
6945 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6946 #endif
6947 	if (getzoneid() != GLOBAL_ZONEID) {
6948 		error = EPERM;
6949 		goto out;
6950 	}
6951 
6952 	/*
6953 	 * Cachefs only provides pass-through support for NFSv4,
6954 	 * and all vnode operations are passed through to the
6955 	 * back file system. For NFSv4 pass-through to work, only
6956 	 * connected operation is supported, the cnode backvp must
6957 	 * exist, and cachefs optional (eg., disconnectable) flags
6958 	 * are turned off. Assert these conditions to ensure that
6959 	 * the backfilesystem is called for the readdir operation.
6960 	 */
6961 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6962 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6963 
6964 	for (;;) {
6965 		/* get (or renew) access to the file system */
6966 		if (held) {
6967 			/* Won't loop with NFSv4 connected behavior */
6968 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6969 			rw_exit(&dcp->c_rwlock);
6970 			cachefs_cd_release(fscp);
6971 			held = 0;
6972 		}
6973 		error = cachefs_cd_access(fscp, connected, 0);
6974 		if (error)
6975 			break;
6976 		rw_enter(&dcp->c_rwlock, RW_READER);
6977 		held = 1;
6978 
6979 		/* quit if link count of zero (posix) */
6980 		if (dcp->c_attr.va_nlink == 0) {
6981 			if (eofp)
6982 				*eofp = 1;
6983 			error = 0;
6984 			break;
6985 		}
6986 
6987 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6988 			error = cachefs_readdir_connected(vp, uiop, cr,
6989 			    eofp);
6990 			if (CFS_TIMEOUT(fscp, error)) {
6991 				rw_exit(&dcp->c_rwlock);
6992 				cachefs_cd_release(fscp);
6993 				held = 0;
6994 				cachefs_cd_timedout(fscp);
6995 				connected = 0;
6996 				continue;
6997 			}
6998 		} else {
6999 			error = cachefs_readdir_disconnected(vp, uiop, cr,
7000 			    eofp);
7001 			if (CFS_TIMEOUT(fscp, error)) {
7002 				if (cachefs_cd_access_miss(fscp)) {
7003 					error = cachefs_readdir_connected(vp,
7004 					    uiop, cr, eofp);
7005 					if (!CFS_TIMEOUT(fscp, error))
7006 						break;
7007 					delay(5*hz);
7008 					connected = 0;
7009 					continue;
7010 				}
7011 				connected = 1;
7012 				continue;
7013 			}
7014 		}
7015 		break;
7016 	}
7017 
7018 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7019 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7020 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7021 		    crgetuid(cr), uiop->uio_loffset, *eofp);
7022 
7023 	if (held) {
7024 		rw_exit(&dcp->c_rwlock);
7025 		cachefs_cd_release(fscp);
7026 	}
7027 
7028 #ifdef CFS_CD_DEBUG
7029 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7030 #endif
7031 out:
7032 #ifdef CFSDEBUG
7033 	CFS_DEBUG(CFSDEBUG_VOPS)
7034 		printf("cachefs_readdir: EXIT error = %d\n", error);
7035 #endif
7036 
7037 	return (error);
7038 }
7039 
7040 static int
7041 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7042 {
7043 	cnode_t *dcp = VTOC(vp);
7044 	int error;
7045 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7046 	struct cachefs_req *rp;
7047 
7048 	mutex_enter(&dcp->c_statelock);
7049 
7050 	/* check directory consistency */
7051 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7052 	if (error)
7053 		goto out;
7054 	dcp->c_usage++;
7055 
7056 	/* if dir was modified, toss old contents */
7057 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7058 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7059 		cachefs_inval_object(dcp);
7060 	}
7061 
7062 	error = 0;
7063 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7064 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7065 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7066 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7067 
7068 		if (cachefs_async_okay()) {
7069 
7070 			/*
7071 			 * Set up asynchronous request to fill this
7072 			 * directory.
7073 			 */
7074 
7075 			dcp->c_flags |= CN_ASYNC_POPULATE;
7076 
7077 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7078 			rp->cfs_cmd = CFS_POPULATE;
7079 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7080 			rp->cfs_cr = cr;
7081 
7082 			crhold(cr);
7083 			VN_HOLD(vp);
7084 
7085 			cachefs_addqueue(rp, &fscp->fs_workq);
7086 		} else {
7087 			error = cachefs_dir_fill(dcp, cr);
7088 			if (error != 0)
7089 				cachefs_nocache(dcp);
7090 		}
7091 	}
7092 
7093 	/* if front file is populated */
7094 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7095 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7096 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7097 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7098 		error = cachefs_dir_read(dcp, uiop, eofp);
7099 		if (error == 0)
7100 			fscp->fs_stats.st_hits++;
7101 	}
7102 
7103 	/* if front file could not be used */
7104 	if ((error != 0) ||
7105 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7106 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7107 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7108 
7109 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7110 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
7111 			cachefs_nocache(dcp);
7112 
7113 		/* get the back vp */
7114 		if (dcp->c_backvp == NULL) {
7115 			error = cachefs_getbackvp(fscp, dcp);
7116 			if (error)
7117 				goto out;
7118 		}
7119 
7120 		if (fscp->fs_inum_size > 0) {
7121 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7122 		} else {
7123 			/* do the dir read from the back fs */
7124 			(void) VOP_RWLOCK(dcp->c_backvp,
7125 			    V_WRITELOCK_FALSE, NULL);
7126 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7127 			    ("cachefs_readdir (nfsv4): "
7128 			    "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7129 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7130 			    NULL, 0);
7131 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7132 		}
7133 
7134 		if (error == 0)
7135 			fscp->fs_stats.st_misses++;
7136 	}
7137 
7138 out:
7139 	mutex_exit(&dcp->c_statelock);
7140 
7141 	return (error);
7142 }
7143 
7144 static int
7145 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7146 {
7147 	int error = 0;
7148 	fscache_t *fscp = C_TO_FSCACHE(cp);
7149 	caddr_t buffy = NULL;
7150 	int buffysize = MAXBSIZE;
7151 	caddr_t chrp, end;
7152 	ino64_t newinum;
7153 	struct dirent64 *de;
7154 	uio_t uioin;
7155 	iovec_t iov;
7156 
7157 	ASSERT(cp->c_backvp != NULL);
7158 	ASSERT(fscp->fs_inum_size > 0);
7159 
7160 	if (uiop->uio_resid < buffysize)
7161 		buffysize = (int)uiop->uio_resid;
7162 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7163 
7164 	iov.iov_base = buffy;
7165 	iov.iov_len = buffysize;
7166 	uioin.uio_iov = &iov;
7167 	uioin.uio_iovcnt = 1;
7168 	uioin.uio_segflg = UIO_SYSSPACE;
7169 	uioin.uio_fmode = 0;
7170 	uioin.uio_extflg = UIO_COPY_CACHED;
7171 	uioin.uio_loffset = uiop->uio_loffset;
7172 	uioin.uio_resid = buffysize;
7173 
7174 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7175 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7176 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7177 
7178 	if (error != 0)
7179 		goto out;
7180 
7181 	end = buffy + buffysize - uioin.uio_resid;
7182 
7183 	mutex_exit(&cp->c_statelock);
7184 	mutex_enter(&fscp->fs_fslock);
7185 
7186 
7187 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7188 		de = (dirent64_t *)chrp;
7189 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7190 		if (newinum == 0)
7191 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7192 		de->d_ino = newinum;
7193 	}
7194 	mutex_exit(&fscp->fs_fslock);
7195 	mutex_enter(&cp->c_statelock);
7196 
7197 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7198 	uiop->uio_loffset = uioin.uio_loffset;
7199 
7200 out:
7201 
7202 	if (buffy != NULL)
7203 		cachefs_kmem_free(buffy, buffysize);
7204 
7205 	return (error);
7206 }
7207 
7208 static int
7209 /*ARGSUSED*/
7210 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7211     int *eofp)
7212 {
7213 	cnode_t *dcp = VTOC(vp);
7214 	int error;
7215 
7216 	mutex_enter(&dcp->c_statelock);
7217 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7218 		error = ETIMEDOUT;
7219 	} else {
7220 		error = cachefs_dir_read(dcp, uiop, eofp);
7221 		if (error == ENOTDIR)
7222 			error = ETIMEDOUT;
7223 	}
7224 	mutex_exit(&dcp->c_statelock);
7225 
7226 	return (error);
7227 }
7228 
7229 /*ARGSUSED*/
7230 static int
7231 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7232 {
7233 	int error = 0;
7234 	struct cnode *cp = VTOC(vp);
7235 	fscache_t *fscp = C_TO_FSCACHE(cp);
7236 
7237 	/*
7238 	 * Cachefs only provides pass-through support for NFSv4,
7239 	 * and all vnode operations are passed through to the
7240 	 * back file system. For NFSv4 pass-through to work, only
7241 	 * connected operation is supported, the cnode backvp must
7242 	 * exist, and cachefs optional (eg., disconnectable) flags
7243 	 * are turned off. Assert these conditions, then bail
7244 	 * as  NFSv4 doesn't support VOP_FID.
7245 	 */
7246 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7247 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7248 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7249 		return (ENOTSUP);
7250 	}
7251 
7252 	mutex_enter(&cp->c_statelock);
7253 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7254 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7255 		error = ENOSPC;
7256 	} else {
7257 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7258 		    cp->c_metadata.md_cookie.fid_len);
7259 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7260 	}
7261 	mutex_exit(&cp->c_statelock);
7262 	return (error);
7263 }
7264 
7265 /* ARGSUSED2 */
7266 static int
7267 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7268 {
7269 	cnode_t *cp = VTOC(vp);
7270 
7271 	/*
7272 	 * XXX - This is ifdef'ed out for now. The problem -
7273 	 * getdents() acquires the read version of rwlock, then we come
7274 	 * into cachefs_readdir() and that wants to acquire the write version
7275 	 * of this lock (if its going to populate the directory). This is
7276 	 * a problem, this can be solved by introducing another lock in the
7277 	 * cnode.
7278 	 */
7279 /* XXX */
7280 	if (vp->v_type != VREG)
7281 		return (-1);
7282 	if (write_lock)
7283 		rw_enter(&cp->c_rwlock, RW_WRITER);
7284 	else
7285 		rw_enter(&cp->c_rwlock, RW_READER);
7286 	return (write_lock);
7287 }
7288 
7289 /* ARGSUSED */
7290 static void
7291 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7292 {
7293 	cnode_t *cp = VTOC(vp);
7294 	if (vp->v_type != VREG)
7295 		return;
7296 	rw_exit(&cp->c_rwlock);
7297 }
7298 
7299 /* ARGSUSED */
7300 static int
7301 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7302     caller_context_t *ct)
7303 {
7304 	return (0);
7305 }
7306 
7307 static int cachefs_lostpage = 0;
7308 /*
7309  * Return all the pages from [off..off+len] in file
7310  */
7311 /*ARGSUSED*/
7312 static int
7313 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7314 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7315 	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7316 {
7317 	cnode_t *cp = VTOC(vp);
7318 	int error;
7319 	fscache_t *fscp = C_TO_FSCACHE(cp);
7320 	cachefscache_t *cachep = fscp->fs_cache;
7321 	int held = 0;
7322 	int connected = 0;
7323 
7324 #ifdef CFSDEBUG
7325 	u_offset_t offx = (u_offset_t)off;
7326 
7327 	CFS_DEBUG(CFSDEBUG_VOPS)
7328 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7329 		    (void *)vp, offx, len, rw);
7330 #endif
7331 	if (getzoneid() != GLOBAL_ZONEID) {
7332 		error = EPERM;
7333 		goto out;
7334 	}
7335 
7336 	if (vp->v_flag & VNOMAP) {
7337 		error = ENOSYS;
7338 		goto out;
7339 	}
7340 
7341 	/* Call backfilesystem if NFSv4 */
7342 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7343 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7344 		    plsz, seg, addr, rw, cr);
7345 		goto out;
7346 	}
7347 
7348 	/* XXX sam: make this do an async populate? */
7349 	if (pl == NULL) {
7350 		error = 0;
7351 		goto out;
7352 	}
7353 	if (protp != NULL)
7354 		*protp = PROT_ALL;
7355 
7356 	for (;;) {
7357 		/* get (or renew) access to the file system */
7358 		if (held) {
7359 			cachefs_cd_release(fscp);
7360 			held = 0;
7361 		}
7362 		error = cachefs_cd_access(fscp, connected, 0);
7363 		if (error)
7364 			break;
7365 		held = 1;
7366 
7367 		/*
7368 		 * If we are getting called as a side effect of a
7369 		 * cachefs_write()
7370 		 * operation the local file size might not be extended yet.
7371 		 * In this case we want to be able to return pages of zeroes.
7372 		 */
7373 		if ((u_offset_t)off + len >
7374 		    ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7375 			if (seg != segkmap) {
7376 				error = EFAULT;
7377 				break;
7378 			}
7379 		}
7380 		if (len <= PAGESIZE)
7381 			error = cachefs_getapage(vp, (u_offset_t)off, len,
7382 			    protp, pl, plsz, seg, addr, rw, cr);
7383 		else
7384 			error = pvn_getpages(cachefs_getapage, vp,
7385 			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7386 			    rw, cr);
7387 		if (error == 0)
7388 			break;
7389 
7390 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7391 		    error == EAGAIN) {
7392 			connected = 0;
7393 			continue;
7394 		}
7395 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7396 			if (CFS_TIMEOUT(fscp, error)) {
7397 				cachefs_cd_release(fscp);
7398 				held = 0;
7399 				cachefs_cd_timedout(fscp);
7400 				connected = 0;
7401 				continue;
7402 			}
7403 		} else {
7404 			if (CFS_TIMEOUT(fscp, error)) {
7405 				if (cachefs_cd_access_miss(fscp)) {
7406 					if (len <= PAGESIZE)
7407 						error = cachefs_getapage_back(
7408 						    vp, (u_offset_t)off,
7409 						    len, protp, pl,
7410 						    plsz, seg, addr, rw, cr);
7411 					else
7412 						error = pvn_getpages(
7413 						    cachefs_getapage_back, vp,
7414 						    (u_offset_t)off, len,
7415 						    protp, pl,
7416 						    plsz, seg, addr, rw, cr);
7417 					if (!CFS_TIMEOUT(fscp, error) &&
7418 					    (error != EAGAIN))
7419 						break;
7420 					delay(5*hz);
7421 					connected = 0;
7422 					continue;
7423 				}
7424 				connected = 1;
7425 				continue;
7426 			}
7427 		}
7428 		break;
7429 	}
7430 
7431 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7432 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7433 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7434 		    crgetuid(cr), off, len);
7435 
7436 	if (held) {
7437 		cachefs_cd_release(fscp);
7438 	}
7439 
7440 out:
7441 #ifdef CFS_CD_DEBUG
7442 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7443 #endif
7444 #ifdef CFSDEBUG
7445 	CFS_DEBUG(CFSDEBUG_VOPS)
7446 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7447 		    (void *)vp, error);
7448 #endif
7449 	return (error);
7450 }
7451 
7452 /*
7453  * cachefs_getpage_backfs_nfsv4
7454  *
7455  * Call NFSv4 back filesystem to handle the getpage (cachefs
7456  * pass-through support for NFSv4).
7457  */
7458 static int
7459 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7460 			uint_t *protp, struct page *pl[], size_t plsz,
7461 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7462 			cred_t *cr)
7463 {
7464 	cnode_t *cp = VTOC(vp);
7465 	fscache_t *fscp = C_TO_FSCACHE(cp);
7466 	vnode_t *backvp;
7467 	int error;
7468 
7469 	/*
7470 	 * For NFSv4 pass-through to work, only connected operation is
7471 	 * supported, the cnode backvp must exist, and cachefs optional
7472 	 * (eg., disconnectable) flags are turned off. Assert these
7473 	 * conditions for the getpage operation.
7474 	 */
7475 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7476 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7477 
7478 	/* Call backfs vnode op after extracting backvp */
7479 	mutex_enter(&cp->c_statelock);
7480 	backvp = cp->c_backvp;
7481 	mutex_exit(&cp->c_statelock);
7482 
7483 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7484 	    ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7485 	    cp, backvp));
7486 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7487 	    addr, rw, cr, NULL);
7488 
7489 	return (error);
7490 }
7491 
7492 /*
7493  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7494  */
7495 /*ARGSUSED*/
7496 static int
7497 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7498 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7499 	enum seg_rw rw, cred_t *cr)
7500 {
7501 	cnode_t *cp = VTOC(vp);
7502 	page_t **ppp, *pp = NULL;
7503 	fscache_t *fscp = C_TO_FSCACHE(cp);
7504 	cachefscache_t *cachep = fscp->fs_cache;
7505 	int error = 0;
7506 	struct page **ourpl;
7507 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7508 	int index = 0;
7509 	int downgrade;
7510 	int have_statelock = 0;
7511 	u_offset_t popoff;
7512 	size_t popsize = 0;
7513 
7514 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7515 
7516 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7517 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7518 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7519 	else
7520 		ourpl = ourstackpl;
7521 
7522 	ourpl[0] = NULL;
7523 	off = off & (offset_t)PAGEMASK;
7524 again:
7525 	/*
7526 	 * Look for the page
7527 	 */
7528 	if (page_exists(vp, off) == 0) {
7529 		/*
7530 		 * Need to do work to get the page.
7531 		 * Grab our lock because we are going to
7532 		 * modify the state of the cnode.
7533 		 */
7534 		if (! have_statelock) {
7535 			mutex_enter(&cp->c_statelock);
7536 			have_statelock = 1;
7537 		}
7538 		/*
7539 		 * If we're in NOCACHE mode, we will need a backvp
7540 		 */
7541 		if (cp->c_flags & CN_NOCACHE) {
7542 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7543 				error = ETIMEDOUT;
7544 				goto out;
7545 			}
7546 			if (cp->c_backvp == NULL) {
7547 				error = cachefs_getbackvp(fscp, cp);
7548 				if (error)
7549 					goto out;
7550 			}
7551 			error = VOP_GETPAGE(cp->c_backvp, off,
7552 			    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7553 			    addr, S_READ, cr, NULL);
7554 			/*
7555 			 * backfs returns EFAULT when we are trying for a
7556 			 * page beyond EOF but cachefs has the knowledge that
7557 			 * it is not beyond EOF be cause cp->c_size is
7558 			 * greater then the offset requested.
7559 			 */
7560 			if (error == EFAULT) {
7561 				error = 0;
7562 				pp = page_create_va(vp, off, PAGESIZE,
7563 				    PG_EXCL | PG_WAIT, seg, addr);
7564 				if (pp == NULL)
7565 					goto again;
7566 				pagezero(pp, 0, PAGESIZE);
7567 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7568 				goto out;
7569 			}
7570 			if (error)
7571 				goto out;
7572 			goto getpages;
7573 		}
7574 		/*
7575 		 * We need a front file. If we can't get it,
7576 		 * put the cnode in NOCACHE mode and try again.
7577 		 */
7578 		if (cp->c_frontvp == NULL) {
7579 			error = cachefs_getfrontfile(cp);
7580 			if (error) {
7581 				cachefs_nocache(cp);
7582 				error = EAGAIN;
7583 				goto out;
7584 			}
7585 		}
7586 		/*
7587 		 * Check if the front file needs population.
7588 		 * If population is necessary, make sure we have a
7589 		 * backvp as well. We will get the page from the backvp.
7590 		 * bug 4152459-
7591 		 * But if the file system is in disconnected mode
7592 		 * and the file is a local file then do not check the
7593 		 * allocmap.
7594 		 */
7595 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7596 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7597 		    (cachefs_check_allocmap(cp, off) == 0)) {
7598 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7599 				error = ETIMEDOUT;
7600 				goto out;
7601 			}
7602 			if (cp->c_backvp == NULL) {
7603 				error = cachefs_getbackvp(fscp, cp);
7604 				if (error)
7605 					goto out;
7606 			}
7607 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7608 				cachefs_cluster_allocmap(off, &popoff,
7609 				    &popsize,
7610 				    fscp->fs_info.fi_popsize, cp);
7611 				if (popsize != 0) {
7612 					error = cachefs_populate(cp,
7613 					    popoff, popsize,
7614 					    cp->c_frontvp, cp->c_backvp,
7615 					    cp->c_size, cr);
7616 					if (error) {
7617 						cachefs_nocache(cp);
7618 						error = EAGAIN;
7619 						goto out;
7620 					} else {
7621 						cp->c_flags |=
7622 						    CN_UPDATED |
7623 						    CN_NEED_FRONT_SYNC |
7624 						    CN_POPULATION_PENDING;
7625 					}
7626 					popsize = popsize - (off - popoff);
7627 				} else {
7628 					popsize = PAGESIZE;
7629 				}
7630 			}
7631 			/* else XXX assert CN_NOCACHE? */
7632 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7633 			    PAGESIZE, protp, ourpl, popsize,
7634 			    seg, addr, S_READ, cr, NULL);
7635 			if (error)
7636 				goto out;
7637 			fscp->fs_stats.st_misses++;
7638 		} else {
7639 			if (cp->c_flags & CN_POPULATION_PENDING) {
7640 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7641 				    NULL);
7642 				cp->c_flags &= ~CN_POPULATION_PENDING;
7643 				if (error) {
7644 					cachefs_nocache(cp);
7645 					error = EAGAIN;
7646 					goto out;
7647 				}
7648 			}
7649 			/*
7650 			 * File was populated so we get the page from the
7651 			 * frontvp
7652 			 */
7653 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7654 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7655 			    rw, cr, NULL);
7656 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7657 				cachefs_log_gpfront(cachep, error,
7658 				    fscp->fs_cfsvfsp,
7659 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7660 				    crgetuid(cr), off, PAGESIZE);
7661 			if (error) {
7662 				cachefs_nocache(cp);
7663 				error = EAGAIN;
7664 				goto out;
7665 			}
7666 			fscp->fs_stats.st_hits++;
7667 		}
7668 getpages:
7669 		ASSERT(have_statelock);
7670 		if (have_statelock) {
7671 			mutex_exit(&cp->c_statelock);
7672 			have_statelock = 0;
7673 		}
7674 		downgrade = 0;
7675 		for (ppp = ourpl; *ppp; ppp++) {
7676 			if ((*ppp)->p_offset < off) {
7677 				index++;
7678 				page_unlock(*ppp);
7679 				continue;
7680 			}
7681 			if (PAGE_SHARED(*ppp)) {
7682 				if (page_tryupgrade(*ppp) == 0) {
7683 					for (ppp = &ourpl[index]; *ppp; ppp++)
7684 						page_unlock(*ppp);
7685 					error = EAGAIN;
7686 					goto out;
7687 				}
7688 				downgrade = 1;
7689 			}
7690 			ASSERT(PAGE_EXCL(*ppp));
7691 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7692 			page_rename(*ppp, vp, (*ppp)->p_offset);
7693 		}
7694 		pl[0] = ourpl[index];
7695 		pl[1] = NULL;
7696 		if (downgrade) {
7697 			page_downgrade(ourpl[index]);
7698 		}
7699 		/* Unlock the rest of the pages from the cluster */
7700 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7701 			page_unlock(*ppp);
7702 	} else {
7703 		ASSERT(! have_statelock);
7704 		if (have_statelock) {
7705 			mutex_exit(&cp->c_statelock);
7706 			have_statelock = 0;
7707 		}
7708 		/* XXX SE_SHARED probably isn't what we *always* want */
7709 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7710 			cachefs_lostpage++;
7711 			goto again;
7712 		}
7713 		pl[0] = pp;
7714 		pl[1] = NULL;
7715 		/* XXX increment st_hits?  i don't think so, but... */
7716 	}
7717 
7718 out:
7719 	if (have_statelock) {
7720 		mutex_exit(&cp->c_statelock);
7721 		have_statelock = 0;
7722 	}
7723 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7724 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7725 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7726 	return (error);
7727 }
7728 
7729 /* gets a page but only from the back fs */
7730 /*ARGSUSED*/
7731 static int
7732 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7733     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7734     caddr_t addr, enum seg_rw rw, cred_t *cr)
7735 {
7736 	cnode_t *cp = VTOC(vp);
7737 	page_t **ppp, *pp = NULL;
7738 	fscache_t *fscp = C_TO_FSCACHE(cp);
7739 	int error = 0;
7740 	struct page *ourpl[17];
7741 	int index = 0;
7742 	int have_statelock = 0;
7743 	int downgrade;
7744 
7745 	/*
7746 	 * Grab the cnode statelock so the cnode state won't change
7747 	 * while we're in here.
7748 	 */
7749 	ourpl[0] = NULL;
7750 	off = off & (offset_t)PAGEMASK;
7751 again:
7752 	if (page_exists(vp, off) == 0) {
7753 		if (! have_statelock) {
7754 			mutex_enter(&cp->c_statelock);
7755 			have_statelock = 1;
7756 		}
7757 
7758 		if (cp->c_backvp == NULL) {
7759 			error = cachefs_getbackvp(fscp, cp);
7760 			if (error)
7761 				goto out;
7762 		}
7763 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7764 		    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7765 		    addr, S_READ, cr, NULL);
7766 		if (error)
7767 			goto out;
7768 
7769 		if (have_statelock) {
7770 			mutex_exit(&cp->c_statelock);
7771 			have_statelock = 0;
7772 		}
7773 		downgrade = 0;
7774 		for (ppp = ourpl; *ppp; ppp++) {
7775 			if ((*ppp)->p_offset < off) {
7776 				index++;
7777 				page_unlock(*ppp);
7778 				continue;
7779 			}
7780 			if (PAGE_SHARED(*ppp)) {
7781 				if (page_tryupgrade(*ppp) == 0) {
7782 					for (ppp = &ourpl[index]; *ppp; ppp++)
7783 						page_unlock(*ppp);
7784 					error = EAGAIN;
7785 					goto out;
7786 				}
7787 				downgrade = 1;
7788 			}
7789 			ASSERT(PAGE_EXCL(*ppp));
7790 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7791 			page_rename(*ppp, vp, (*ppp)->p_offset);
7792 		}
7793 		pl[0] = ourpl[index];
7794 		pl[1] = NULL;
7795 		if (downgrade) {
7796 			page_downgrade(ourpl[index]);
7797 		}
7798 		/* Unlock the rest of the pages from the cluster */
7799 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7800 			page_unlock(*ppp);
7801 	} else {
7802 		ASSERT(! have_statelock);
7803 		if (have_statelock) {
7804 			mutex_exit(&cp->c_statelock);
7805 			have_statelock = 0;
7806 		}
7807 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7808 			cachefs_lostpage++;
7809 			goto again;
7810 		}
7811 		pl[0] = pp;
7812 		pl[1] = NULL;
7813 	}
7814 
7815 out:
7816 	if (have_statelock) {
7817 		mutex_exit(&cp->c_statelock);
7818 		have_statelock = 0;
7819 	}
7820 	return (error);
7821 }
7822 
7823 /*ARGSUSED*/
7824 static int
7825 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7826     caller_context_t *ct)
7827 {
7828 	cnode_t *cp = VTOC(vp);
7829 	int error = 0;
7830 	fscache_t *fscp = C_TO_FSCACHE(cp);
7831 	int held = 0;
7832 	int connected = 0;
7833 
7834 	if (getzoneid() != GLOBAL_ZONEID)
7835 		return (EPERM);
7836 
7837 	/* Call backfilesytem if NFSv4 */
7838 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7839 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7840 		goto out;
7841 	}
7842 
7843 	for (;;) {
7844 		/* get (or renew) access to the file system */
7845 		if (held) {
7846 			cachefs_cd_release(fscp);
7847 			held = 0;
7848 		}
7849 		error = cachefs_cd_access(fscp, connected, 1);
7850 		if (error)
7851 			break;
7852 		held = 1;
7853 
7854 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7855 		if (error == 0)
7856 			break;
7857 
7858 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7859 			if (CFS_TIMEOUT(fscp, error)) {
7860 				cachefs_cd_release(fscp);
7861 				held = 0;
7862 				cachefs_cd_timedout(fscp);
7863 				connected = 0;
7864 				continue;
7865 			}
7866 		} else {
7867 			if (NOMEMWAIT()) {
7868 				error = 0;
7869 				goto out;
7870 			}
7871 			if (CFS_TIMEOUT(fscp, error)) {
7872 				connected = 1;
7873 				continue;
7874 			}
7875 		}
7876 		break;
7877 	}
7878 
7879 out:
7880 
7881 	if (held) {
7882 		cachefs_cd_release(fscp);
7883 	}
7884 
7885 #ifdef CFS_CD_DEBUG
7886 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7887 #endif
7888 	return (error);
7889 }
7890 
7891 /*
7892  * cachefs_putpage_backfs_nfsv4
7893  *
7894  * Call NFSv4 back filesystem to handle the putpage (cachefs
7895  * pass-through support for NFSv4).
7896  */
7897 static int
7898 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7899 			cred_t *cr)
7900 {
7901 	cnode_t *cp = VTOC(vp);
7902 	fscache_t *fscp = C_TO_FSCACHE(cp);
7903 	vnode_t *backvp;
7904 	int error;
7905 
7906 	/*
7907 	 * For NFSv4 pass-through to work, only connected operation is
7908 	 * supported, the cnode backvp must exist, and cachefs optional
7909 	 * (eg., disconnectable) flags are turned off. Assert these
7910 	 * conditions for the putpage operation.
7911 	 */
7912 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7913 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7914 
7915 	/* Call backfs vnode op after extracting backvp */
7916 	mutex_enter(&cp->c_statelock);
7917 	backvp = cp->c_backvp;
7918 	mutex_exit(&cp->c_statelock);
7919 
7920 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7921 	    ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7922 	    cp, backvp));
7923 	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7924 
7925 	return (error);
7926 }
7927 
7928 /*
7929  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7930  * If len == 0, do from off to EOF.
7931  *
7932  * The normal cases should be len == 0 & off == 0 (entire vp list),
7933  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7934  * (from pageout).
7935  */
7936 
7937 /*ARGSUSED*/
7938 int
7939 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7940     int flags, cred_t *cr)
7941 {
7942 	struct cnode *cp  = VTOC(vp);
7943 	struct page *pp;
7944 	size_t io_len;
7945 	u_offset_t eoff, io_off;
7946 	int error = 0;
7947 	fscache_t *fscp = C_TO_FSCACHE(cp);
7948 	cachefscache_t *cachep = fscp->fs_cache;
7949 
7950 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7951 		return (0);
7952 	}
7953 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7954 	    (flags & B_INVAL) == 0))
7955 		return (0);
7956 
7957 	/*
7958 	 * Should never have cached data for the cachefs vnode
7959 	 * if NFSv4 is in use.
7960 	 */
7961 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7962 
7963 	/*
7964 	 * If this is an async putpage let a thread handle it.
7965 	 */
7966 	if (flags & B_ASYNC) {
7967 		struct cachefs_req *rp;
7968 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7969 
7970 		if (ttoproc(curthread) == proc_pageout) {
7971 			/*
7972 			 * If this is the page daemon we
7973 			 * do the push synchronously (Dangerous!) and hope
7974 			 * we can free enough to keep running...
7975 			 */
7976 			flags &= ~B_ASYNC;
7977 			goto again;
7978 		}
7979 
7980 		if (! cachefs_async_okay()) {
7981 
7982 			/*
7983 			 * this is somewhat like NFS's behavior.  keep
7984 			 * the system from thrashing.  we've seen
7985 			 * cases where async queues get out of
7986 			 * control, especially if
7987 			 * madvise(MADV_SEQUENTIAL) is done on a large
7988 			 * mmap()ed file that is read sequentially.
7989 			 */
7990 
7991 			flags &= ~B_ASYNC;
7992 			goto again;
7993 		}
7994 
7995 		/*
7996 		 * if no flags other than B_ASYNC were set,
7997 		 * we coalesce putpage requests into a single one for the
7998 		 * whole file (len = off = 0).  If such a request is
7999 		 * already queued, we're done.
8000 		 *
8001 		 * If there are other flags set (e.g., B_INVAL), we don't
8002 		 * attempt to coalesce and we use the specified length and
8003 		 * offset.
8004 		 */
8005 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8006 		mutex_enter(&cp->c_iomutex);
8007 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8008 			rp->cfs_cmd = CFS_PUTPAGE;
8009 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8010 			if (tflags == 0) {
8011 				off = len = 0;
8012 				cp->c_ioflags |= CIO_PUTPAGES;
8013 			}
8014 			rp->cfs_req_u.cu_putpage.cp_off = off;
8015 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8016 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8017 			rp->cfs_cr = cr;
8018 			crhold(rp->cfs_cr);
8019 			VN_HOLD(vp);
8020 			cp->c_nio++;
8021 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8022 		} else {
8023 			kmem_cache_free(cachefs_req_cache, rp);
8024 		}
8025 
8026 		mutex_exit(&cp->c_iomutex);
8027 		return (0);
8028 	}
8029 
8030 
8031 again:
8032 	if (len == 0) {
8033 		/*
8034 		 * Search the entire vp list for pages >= off
8035 		 */
8036 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8037 	} else {
8038 		/*
8039 		 * Do a range from [off...off + len] looking for pages
8040 		 * to deal with.
8041 		 */
8042 		eoff = (u_offset_t)off + len;
8043 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8044 		    io_off += io_len) {
8045 			/*
8046 			 * If we are not invalidating, synchronously
8047 			 * freeing or writing pages use the routine
8048 			 * page_lookup_nowait() to prevent reclaiming
8049 			 * them from the free list.
8050 			 */
8051 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8052 				pp = page_lookup(vp, io_off,
8053 				    (flags & (B_INVAL | B_FREE)) ?
8054 				    SE_EXCL : SE_SHARED);
8055 			} else {
8056 				/* XXX this looks like dead code */
8057 				pp = page_lookup_nowait(vp, io_off,
8058 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8059 			}
8060 
8061 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8062 				io_len = PAGESIZE;
8063 			else {
8064 				error = cachefs_push(vp, pp, &io_off,
8065 				    &io_len, flags, cr);
8066 				if (error != 0)
8067 					break;
8068 				/*
8069 				 * "io_off" and "io_len" are returned as
8070 				 * the range of pages we actually wrote.
8071 				 * This allows us to skip ahead more quickly
8072 				 * since several pages may've been dealt
8073 				 * with by this iteration of the loop.
8074 				 */
8075 			}
8076 		}
8077 	}
8078 
8079 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8080 		cp->c_flags &= ~CDIRTY;
8081 	}
8082 
8083 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8084 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8085 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8086 		    crgetuid(cr), off, len);
8087 
8088 	return (error);
8089 
8090 }
8091 
8092 /*ARGSUSED*/
8093 static int
8094 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8095     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8096     caller_context_t *ct)
8097 {
8098 	cnode_t *cp = VTOC(vp);
8099 	fscache_t *fscp = C_TO_FSCACHE(cp);
8100 	struct segvn_crargs vn_a;
8101 	int error;
8102 	int held = 0;
8103 	int writing;
8104 	int connected = 0;
8105 
8106 #ifdef CFSDEBUG
8107 	u_offset_t offx = (u_offset_t)off;
8108 
8109 	CFS_DEBUG(CFSDEBUG_VOPS)
8110 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8111 		    (void *)vp, offx, len, flags);
8112 #endif
8113 	if (getzoneid() != GLOBAL_ZONEID) {
8114 		error = EPERM;
8115 		goto out;
8116 	}
8117 
8118 	if (vp->v_flag & VNOMAP) {
8119 		error = ENOSYS;
8120 		goto out;
8121 	}
8122 	if (off < 0 || (offset_t)(off + len) < 0) {
8123 		error = ENXIO;
8124 		goto out;
8125 	}
8126 	if (vp->v_type != VREG) {
8127 		error = ENODEV;
8128 		goto out;
8129 	}
8130 
8131 	/*
8132 	 * Check to see if the vnode is currently marked as not cachable.
8133 	 * If so, we have to refuse the map request as this violates the
8134 	 * don't cache attribute.
8135 	 */
8136 	if (vp->v_flag & VNOCACHE)
8137 		return (EAGAIN);
8138 
8139 #ifdef OBSOLETE
8140 	/*
8141 	 * If file is being locked, disallow mapping.
8142 	 */
8143 	if (vn_has_flocks(vp)) {
8144 		error = EAGAIN;
8145 		goto out;
8146 	}
8147 #endif
8148 
8149 	/* call backfilesystem if NFSv4 */
8150 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8151 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8152 		    maxprot, flags, cr);
8153 		goto out;
8154 	}
8155 
8156 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8157 
8158 	for (;;) {
8159 		/* get (or renew) access to the file system */
8160 		if (held) {
8161 			cachefs_cd_release(fscp);
8162 			held = 0;
8163 		}
8164 		error = cachefs_cd_access(fscp, connected, writing);
8165 		if (error)
8166 			break;
8167 		held = 1;
8168 
8169 		if (writing) {
8170 			mutex_enter(&cp->c_statelock);
8171 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8172 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8173 					connected = 1;
8174 					continue;
8175 				} else {
8176 					cachefs_nocache(cp);
8177 				}
8178 			}
8179 
8180 			/*
8181 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8182 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8183 			 * not need to try to push out any pages.
8184 			 * This bit gets cleared when the cnode goes inactive.
8185 			 */
8186 			cp->c_flags |= CN_MAPWRITE;
8187 
8188 			mutex_exit(&cp->c_statelock);
8189 		}
8190 		break;
8191 	}
8192 
8193 	if (held) {
8194 		cachefs_cd_release(fscp);
8195 	}
8196 
8197 	as_rangelock(as);
8198 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8199 	if (error != 0) {
8200 		as_rangeunlock(as);
8201 		goto out;
8202 	}
8203 
8204 	/*
8205 	 * package up all the data passed in into a segvn_args struct and
8206 	 * call as_map with segvn_create function to create a new segment
8207 	 * in the address space.
8208 	 */
8209 	vn_a.vp = vp;
8210 	vn_a.offset = off;
8211 	vn_a.type = flags & MAP_TYPE;
8212 	vn_a.prot = (uchar_t)prot;
8213 	vn_a.maxprot = (uchar_t)maxprot;
8214 	vn_a.cred = cr;
8215 	vn_a.amp = NULL;
8216 	vn_a.flags = flags & ~MAP_TYPE;
8217 	vn_a.szc = 0;
8218 	vn_a.lgrp_mem_policy_flags = 0;
8219 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8220 	as_rangeunlock(as);
8221 out:
8222 
8223 #ifdef CFS_CD_DEBUG
8224 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8225 #endif
8226 #ifdef CFSDEBUG
8227 	CFS_DEBUG(CFSDEBUG_VOPS)
8228 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8229 #endif
8230 	return (error);
8231 }
8232 
8233 /*
8234  * cachefs_map_backfs_nfsv4
8235  *
8236  * Call NFSv4 back filesystem to handle the map (cachefs
8237  * pass-through support for NFSv4).
8238  */
8239 static int
8240 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8241 			caddr_t *addrp, size_t len, uchar_t prot,
8242 			uchar_t maxprot, uint_t flags, cred_t *cr)
8243 {
8244 	cnode_t *cp = VTOC(vp);
8245 	fscache_t *fscp = C_TO_FSCACHE(cp);
8246 	vnode_t *backvp;
8247 	int error;
8248 
8249 	/*
8250 	 * For NFSv4 pass-through to work, only connected operation is
8251 	 * supported, the cnode backvp must exist, and cachefs optional
8252 	 * (eg., disconnectable) flags are turned off. Assert these
8253 	 * conditions for the map operation.
8254 	 */
8255 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8256 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8257 
8258 	/* Call backfs vnode op after extracting backvp */
8259 	mutex_enter(&cp->c_statelock);
8260 	backvp = cp->c_backvp;
8261 	mutex_exit(&cp->c_statelock);
8262 
8263 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8264 	    ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8265 	    cp, backvp));
8266 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8267 	    NULL);
8268 
8269 	return (error);
8270 }
8271 
8272 /*ARGSUSED*/
8273 static int
8274 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8275     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8276     cred_t *cr, caller_context_t *ct)
8277 {
8278 	cnode_t *cp = VTOC(vp);
8279 	fscache_t *fscp = C_TO_FSCACHE(cp);
8280 
8281 	if (getzoneid() != GLOBAL_ZONEID)
8282 		return (EPERM);
8283 
8284 	if (vp->v_flag & VNOMAP)
8285 		return (ENOSYS);
8286 
8287 	/*
8288 	 * Check this is not an NFSv4 filesystem, as the mapping
8289 	 * is not done on the cachefs filesystem if NFSv4 is in
8290 	 * use.
8291 	 */
8292 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8293 
8294 	mutex_enter(&cp->c_statelock);
8295 	cp->c_mapcnt += btopr(len);
8296 	mutex_exit(&cp->c_statelock);
8297 	return (0);
8298 }
8299 
8300 /*ARGSUSED*/
8301 static int
8302 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8303 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8304 	cred_t *cr, caller_context_t *ct)
8305 {
8306 	cnode_t *cp = VTOC(vp);
8307 	fscache_t *fscp = C_TO_FSCACHE(cp);
8308 	int error;
8309 	int connected = 0;
8310 	int held = 0;
8311 
8312 	/*
8313 	 * The file may be passed in to (or inherited into) the zone, so we
8314 	 * need to let this operation go through since it happens as part of
8315 	 * exiting.
8316 	 */
8317 	if (vp->v_flag & VNOMAP)
8318 		return (ENOSYS);
8319 
8320 	/*
8321 	 * Check this is not an NFSv4 filesystem, as the mapping
8322 	 * is not done on the cachefs filesystem if NFSv4 is in
8323 	 * use.
8324 	 */
8325 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8326 
8327 	mutex_enter(&cp->c_statelock);
8328 	cp->c_mapcnt -= btopr(len);
8329 	ASSERT(cp->c_mapcnt >= 0);
8330 	mutex_exit(&cp->c_statelock);
8331 
8332 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8333 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8334 		return (0);
8335 
8336 	for (;;) {
8337 		/* get (or renew) access to the file system */
8338 		if (held) {
8339 			cachefs_cd_release(fscp);
8340 			held = 0;
8341 		}
8342 		error = cachefs_cd_access(fscp, connected, 1);
8343 		if (error)
8344 			break;
8345 		held = 1;
8346 		connected = 0;
8347 
8348 		error = cachefs_putpage_common(vp, (offset_t)0,
8349 		    (uint_t)0, 0, cr);
8350 		if (CFS_TIMEOUT(fscp, error)) {
8351 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8352 				cachefs_cd_release(fscp);
8353 				held = 0;
8354 				cachefs_cd_timedout(fscp);
8355 				continue;
8356 			} else {
8357 				connected = 1;
8358 				continue;
8359 			}
8360 		}
8361 
8362 		/* if no space left in cache, wait until connected */
8363 		if ((error == ENOSPC) &&
8364 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8365 			connected = 1;
8366 			continue;
8367 		}
8368 
8369 		mutex_enter(&cp->c_statelock);
8370 		if (!error)
8371 			error = cp->c_error;
8372 		cp->c_error = 0;
8373 		mutex_exit(&cp->c_statelock);
8374 		break;
8375 	}
8376 
8377 	if (held)
8378 		cachefs_cd_release(fscp);
8379 
8380 #ifdef CFS_CD_DEBUG
8381 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8382 #endif
8383 	return (error);
8384 }
8385 
8386 /* ARGSUSED */
8387 static int
8388 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8389 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8390 	caller_context_t *ct)
8391 {
8392 	struct cnode *cp = VTOC(vp);
8393 	int error;
8394 	struct fscache *fscp = C_TO_FSCACHE(cp);
8395 	vnode_t *backvp;
8396 	int held = 0;
8397 	int connected = 0;
8398 
8399 	if (getzoneid() != GLOBAL_ZONEID)
8400 		return (EPERM);
8401 
8402 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8403 		return (EINVAL);
8404 
8405 	/* Disallow locking of files that are currently mapped */
8406 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8407 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8408 		return (EAGAIN);
8409 	}
8410 
8411 	/*
8412 	 * Cachefs only provides pass-through support for NFSv4,
8413 	 * and all vnode operations are passed through to the
8414 	 * back file system. For NFSv4 pass-through to work, only
8415 	 * connected operation is supported, the cnode backvp must
8416 	 * exist, and cachefs optional (eg., disconnectable) flags
8417 	 * are turned off. Assert these conditions to ensure that
8418 	 * the backfilesystem is called for the frlock operation.
8419 	 */
8420 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8421 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8422 
8423 	/* XXX bob: nfs does a bunch more checks than we do */
8424 	if (CFS_ISFS_LLOCK(fscp)) {
8425 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8426 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8427 	}
8428 
8429 	for (;;) {
8430 		/* get (or renew) access to the file system */
8431 		if (held) {
8432 			/* Won't loop with NFSv4 connected behavior */
8433 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8434 			cachefs_cd_release(fscp);
8435 			held = 0;
8436 		}
8437 		error = cachefs_cd_access(fscp, connected, 0);
8438 		if (error)
8439 			break;
8440 		held = 1;
8441 
8442 		/* if not connected, quit or wait */
8443 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8444 			connected = 1;
8445 			continue;
8446 		}
8447 
8448 		/* nocache the file */
8449 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8450 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8451 			mutex_enter(&cp->c_statelock);
8452 			cachefs_nocache(cp);
8453 			mutex_exit(&cp->c_statelock);
8454 		}
8455 
8456 		/*
8457 		 * XXX bob: probably should do a consistency check
8458 		 * Pass arguments unchanged if NFSv4 is the backfs.
8459 		 */
8460 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8461 			bfp->l_start += cp->c_size;
8462 			bfp->l_whence = 0;
8463 		}
8464 
8465 		/* get the back vp */
8466 		mutex_enter(&cp->c_statelock);
8467 		if (cp->c_backvp == NULL) {
8468 			error = cachefs_getbackvp(fscp, cp);
8469 			if (error) {
8470 				mutex_exit(&cp->c_statelock);
8471 				break;
8472 			}
8473 		}
8474 		backvp = cp->c_backvp;
8475 		VN_HOLD(backvp);
8476 		mutex_exit(&cp->c_statelock);
8477 
8478 		/*
8479 		 * make sure we can flush currently dirty pages before
8480 		 * allowing the lock
8481 		 */
8482 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8483 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8484 			error = cachefs_putpage(
8485 			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8486 			if (error) {
8487 				error = ENOLCK;
8488 				VN_RELE(backvp);
8489 				break;
8490 			}
8491 		}
8492 
8493 		/* do lock on the back file */
8494 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8495 		    ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8496 		    cp, backvp));
8497 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8498 		    ct);
8499 		VN_RELE(backvp);
8500 		if (CFS_TIMEOUT(fscp, error)) {
8501 			connected = 1;
8502 			continue;
8503 		}
8504 		break;
8505 	}
8506 
8507 	if (held) {
8508 		cachefs_cd_release(fscp);
8509 	}
8510 
8511 	/*
8512 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8513 	 * cache does not give inconsistent results on locked files shared
8514 	 * between clients.  The VNOCACHE flag is never turned off as long
8515 	 * as the vnode is active because it is hard to figure out when the
8516 	 * last lock is gone.
8517 	 * XXX - what if some already has the vnode mapped in?
8518 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8519 	 */
8520 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8521 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8522 		vp->v_flag |= VNOCACHE;
8523 
8524 #ifdef CFS_CD_DEBUG
8525 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8526 #endif
8527 	return (error);
8528 }
8529 
8530 /*
8531  * Free storage space associated with the specified vnode.  The portion
8532  * to be freed is specified by bfp->l_start and bfp->l_len (already
8533  * normalized to a "whence" of 0).
8534  *
8535  * This is an experimental facility whose continued existence is not
8536  * guaranteed.  Currently, we only support the special case
8537  * of l_len == 0, meaning free to end of file.
8538  */
8539 /* ARGSUSED */
8540 static int
8541 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8542 	offset_t offset, cred_t *cr, caller_context_t *ct)
8543 {
8544 	cnode_t *cp = VTOC(vp);
8545 	fscache_t *fscp = C_TO_FSCACHE(cp);
8546 	int error;
8547 
8548 	ASSERT(vp->v_type == VREG);
8549 	if (getzoneid() != GLOBAL_ZONEID)
8550 		return (EPERM);
8551 	if (cmd != F_FREESP)
8552 		return (EINVAL);
8553 
8554 	/* call backfilesystem if NFSv4 */
8555 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8556 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8557 		    offset, cr, ct);
8558 		goto out;
8559 	}
8560 
8561 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8562 		ASSERT(bfp->l_start >= 0);
8563 		if (bfp->l_len == 0) {
8564 			struct vattr va;
8565 
8566 			va.va_size = bfp->l_start;
8567 			va.va_mask = AT_SIZE;
8568 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8569 		} else
8570 			error = EINVAL;
8571 	}
8572 
8573 out:
8574 	return (error);
8575 }
8576 
8577 /*
8578  * cachefs_space_backfs_nfsv4
8579  *
8580  * Call NFSv4 back filesystem to handle the space (cachefs
8581  * pass-through support for NFSv4).
8582  */
8583 static int
8584 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8585 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8586 {
8587 	cnode_t *cp = VTOC(vp);
8588 	fscache_t *fscp = C_TO_FSCACHE(cp);
8589 	vnode_t *backvp;
8590 	int error;
8591 
8592 	/*
8593 	 * For NFSv4 pass-through to work, only connected operation is
8594 	 * supported, the cnode backvp must exist, and cachefs optional
8595 	 * (eg., disconnectable) flags are turned off. Assert these
8596 	 * conditions for the space operation.
8597 	 */
8598 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8599 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8600 
8601 	/* Call backfs vnode op after extracting backvp */
8602 	mutex_enter(&cp->c_statelock);
8603 	backvp = cp->c_backvp;
8604 	mutex_exit(&cp->c_statelock);
8605 
8606 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8607 	    ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8608 	    cp, backvp));
8609 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8610 
8611 	return (error);
8612 }
8613 
8614 /*ARGSUSED*/
8615 static int
8616 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8617 {
8618 	return (EINVAL);
8619 }
8620 
8621 /*ARGSUSED*/
8622 static int
8623 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8624 	int flags, cred_t *cr, caller_context_t *ct)
8625 {
8626 	return (ENOSYS);
8627 }
8628 
8629 static int
8630 cachefs_setsecattr_connected(cnode_t *cp,
8631     vsecattr_t *vsec, int flag, cred_t *cr)
8632 {
8633 	fscache_t *fscp = C_TO_FSCACHE(cp);
8634 	int error = 0;
8635 
8636 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8637 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8638 
8639 	mutex_enter(&cp->c_statelock);
8640 
8641 	if (cp->c_backvp == NULL) {
8642 		error = cachefs_getbackvp(fscp, cp);
8643 		if (error) {
8644 			cachefs_nocache(cp);
8645 			goto out;
8646 		}
8647 	}
8648 
8649 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8650 	if (error)
8651 		goto out;
8652 
8653 	/* only owner can set acl */
8654 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8655 		error = EINVAL;
8656 		goto out;
8657 	}
8658 
8659 
8660 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8661 	    ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8662 	    cp, cp->c_backvp));
8663 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8664 	if (error) {
8665 		goto out;
8666 	}
8667 
8668 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8669 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8670 		cachefs_nocache(cp);
8671 		goto out;
8672 	}
8673 
8674 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8675 
8676 	/* acl may have changed permissions -- handle this. */
8677 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8678 		cachefs_acl2perm(cp, vsec);
8679 
8680 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8681 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8682 		error = cachefs_cacheacl(cp, vsec);
8683 		if (error != 0) {
8684 #ifdef CFSDEBUG
8685 			CFS_DEBUG(CFSDEBUG_VOPS)
8686 				printf("cachefs_setacl: cacheacl: error %d\n",
8687 				    error);
8688 #endif /* CFSDEBUG */
8689 			error = 0;
8690 			cachefs_nocache(cp);
8691 		}
8692 	}
8693 
8694 out:
8695 	mutex_exit(&cp->c_statelock);
8696 
8697 	return (error);
8698 }
8699 
8700 static int
8701 cachefs_setsecattr_disconnected(cnode_t *cp,
8702     vsecattr_t *vsec, int flag, cred_t *cr)
8703 {
8704 	fscache_t *fscp = C_TO_FSCACHE(cp);
8705 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8706 	off_t commit = 0;
8707 	int error = 0;
8708 
8709 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8710 
8711 	if (CFS_ISFS_WRITE_AROUND(fscp))
8712 		return (ETIMEDOUT);
8713 
8714 	mutex_enter(&cp->c_statelock);
8715 
8716 	/* only owner can set acl */
8717 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8718 		error = EINVAL;
8719 		goto out;
8720 	}
8721 
8722 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8723 		error = ETIMEDOUT;
8724 		goto out;
8725 	}
8726 
8727 	/* XXX do i need this?  is this right? */
8728 	if (cp->c_flags & CN_ALLOC_PENDING) {
8729 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8730 			(void) filegrp_allocattr(cp->c_filegrp);
8731 		}
8732 		error = filegrp_create_metadata(cp->c_filegrp,
8733 		    &cp->c_metadata, &cp->c_id);
8734 		if (error) {
8735 			goto out;
8736 		}
8737 		cp->c_flags &= ~CN_ALLOC_PENDING;
8738 	}
8739 
8740 	/* XXX is this right? */
8741 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8742 		error = cachefs_dlog_cidmap(fscp);
8743 		if (error) {
8744 			error = ENOSPC;
8745 			goto out;
8746 		}
8747 		cp->c_metadata.md_flags |= MD_MAPPING;
8748 		cp->c_flags |= CN_UPDATED;
8749 	}
8750 
8751 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8752 	if (commit == 0)
8753 		goto out;
8754 
8755 	/* fix modes in metadata */
8756 	cachefs_acl2perm(cp, vsec);
8757 
8758 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8759 		error = cachefs_cacheacl(cp, vsec);
8760 		if (error != 0) {
8761 			goto out;
8762 		}
8763 	}
8764 
8765 	/* XXX is this right? */
8766 	if (cachefs_modified_alloc(cp)) {
8767 		error = ENOSPC;
8768 		goto out;
8769 	}
8770 
8771 out:
8772 	if (error != 0)
8773 		cp->c_metadata.md_vattr.va_mode = failmode;
8774 
8775 	mutex_exit(&cp->c_statelock);
8776 
8777 	if (commit) {
8778 		if (cachefs_dlog_commit(fscp, commit, error)) {
8779 			/*EMPTY*/
8780 			/* XXX fix on panic? */
8781 		}
8782 	}
8783 
8784 	return (error);
8785 }
8786 
8787 /*ARGSUSED*/
8788 static int
8789 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8790     caller_context_t *ct)
8791 {
8792 	cnode_t *cp = VTOC(vp);
8793 	fscache_t *fscp = C_TO_FSCACHE(cp);
8794 	int connected = 0;
8795 	int held = 0;
8796 	int error = 0;
8797 
8798 #ifdef CFSDEBUG
8799 	CFS_DEBUG(CFSDEBUG_VOPS)
8800 		printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8801 #endif
8802 	if (getzoneid() != GLOBAL_ZONEID) {
8803 		error = EPERM;
8804 		goto out;
8805 	}
8806 
8807 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8808 		error = ENOSYS;
8809 		goto out;
8810 	}
8811 
8812 	if (! cachefs_vtype_aclok(vp)) {
8813 		error = EINVAL;
8814 		goto out;
8815 	}
8816 
8817 	/*
8818 	 * Cachefs only provides pass-through support for NFSv4,
8819 	 * and all vnode operations are passed through to the
8820 	 * back file system. For NFSv4 pass-through to work, only
8821 	 * connected operation is supported, the cnode backvp must
8822 	 * exist, and cachefs optional (eg., disconnectable) flags
8823 	 * are turned off. Assert these conditions to ensure that
8824 	 * the backfilesystem is called for the setsecattr operation.
8825 	 */
8826 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8827 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8828 
8829 	for (;;) {
8830 		/* drop hold on file system */
8831 		if (held) {
8832 			/* Won't loop with NFSv4 connected operation */
8833 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8834 			cachefs_cd_release(fscp);
8835 			held = 0;
8836 		}
8837 
8838 		/* acquire access to the file system */
8839 		error = cachefs_cd_access(fscp, connected, 1);
8840 		if (error)
8841 			break;
8842 		held = 1;
8843 
8844 		/* perform the setattr */
8845 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8846 			error = cachefs_setsecattr_connected(cp,
8847 			    vsec, flag, cr);
8848 		else
8849 			error = cachefs_setsecattr_disconnected(cp,
8850 			    vsec, flag, cr);
8851 		if (error) {
8852 			/* if connected */
8853 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8854 				if (CFS_TIMEOUT(fscp, error)) {
8855 					cachefs_cd_release(fscp);
8856 					held = 0;
8857 					cachefs_cd_timedout(fscp);
8858 					connected = 0;
8859 					continue;
8860 				}
8861 			}
8862 
8863 			/* else must be disconnected */
8864 			else {
8865 				if (CFS_TIMEOUT(fscp, error)) {
8866 					connected = 1;
8867 					continue;
8868 				}
8869 			}
8870 		}
8871 		break;
8872 	}
8873 
8874 	if (held) {
8875 		cachefs_cd_release(fscp);
8876 	}
8877 	return (error);
8878 
8879 out:
8880 #ifdef CFS_CD_DEBUG
8881 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8882 #endif
8883 
8884 #ifdef CFSDEBUG
8885 	CFS_DEBUG(CFSDEBUG_VOPS)
8886 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8887 #endif
8888 	return (error);
8889 }
8890 
8891 /*
8892  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8893  * sanitize the acl.
8894  */
8895 
8896 static void
8897 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8898 {
8899 	aclent_t *aclp;
8900 	int i;
8901 
8902 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8903 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8904 		switch (aclp->a_type) {
8905 		case USER_OBJ:
8906 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8907 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8908 			break;
8909 
8910 		case GROUP_OBJ:
8911 			cp->c_metadata.md_vattr.va_mode &= (~070);
8912 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8913 			break;
8914 
8915 		case OTHER_OBJ:
8916 			cp->c_metadata.md_vattr.va_mode &= (~07);
8917 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8918 			break;
8919 
8920 		case CLASS_OBJ:
8921 			cp->c_metadata.md_aclclass = aclp->a_perm;
8922 			break;
8923 		}
8924 	}
8925 
8926 	cp->c_flags |= CN_UPDATED;
8927 }
8928 
8929 static int
8930 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8931     caller_context_t *ct)
8932 {
8933 	cnode_t *cp = VTOC(vp);
8934 	fscache_t *fscp = C_TO_FSCACHE(cp);
8935 	int held = 0, connected = 0;
8936 	int error = 0;
8937 
8938 #ifdef CFSDEBUG
8939 	CFS_DEBUG(CFSDEBUG_VOPS)
8940 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8941 #endif
8942 
8943 	if (getzoneid() != GLOBAL_ZONEID) {
8944 		error = EPERM;
8945 		goto out;
8946 	}
8947 
8948 	/*
8949 	 * Cachefs only provides pass-through support for NFSv4,
8950 	 * and all vnode operations are passed through to the
8951 	 * back file system. For NFSv4 pass-through to work, only
8952 	 * connected operation is supported, the cnode backvp must
8953 	 * exist, and cachefs optional (eg., disconnectable) flags
8954 	 * are turned off. Assert these conditions to ensure that
8955 	 * the backfilesystem is called for the getsecattr operation.
8956 	 */
8957 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8958 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8959 
8960 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8961 		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8962 		goto out;
8963 	}
8964 
8965 	for (;;) {
8966 		if (held) {
8967 			/* Won't loop with NFSv4 connected behavior */
8968 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8969 			cachefs_cd_release(fscp);
8970 			held = 0;
8971 		}
8972 		error = cachefs_cd_access(fscp, connected, 0);
8973 		if (error)
8974 			break;
8975 		held = 1;
8976 
8977 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8978 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8979 			    cr);
8980 			if (CFS_TIMEOUT(fscp, error)) {
8981 				cachefs_cd_release(fscp);
8982 				held = 0;
8983 				cachefs_cd_timedout(fscp);
8984 				connected = 0;
8985 				continue;
8986 			}
8987 		} else {
8988 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8989 			    cr);
8990 			if (CFS_TIMEOUT(fscp, error)) {
8991 				if (cachefs_cd_access_miss(fscp)) {
8992 					error = cachefs_getsecattr_connected(vp,
8993 					    vsec, flag, cr);
8994 					if (!CFS_TIMEOUT(fscp, error))
8995 						break;
8996 					delay(5*hz);
8997 					connected = 0;
8998 					continue;
8999 				}
9000 				connected = 1;
9001 				continue;
9002 			}
9003 		}
9004 		break;
9005 	}
9006 
9007 out:
9008 	if (held)
9009 		cachefs_cd_release(fscp);
9010 
9011 #ifdef CFS_CD_DEBUG
9012 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9013 #endif
9014 #ifdef CFSDEBUG
9015 	CFS_DEBUG(CFSDEBUG_VOPS)
9016 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9017 #endif
9018 	return (error);
9019 }
9020 
9021 static int
9022 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9023     caller_context_t *ct)
9024 {
9025 	cnode_t *cp = VTOC(vp);
9026 	fscache_t *fscp = C_TO_FSCACHE(cp);
9027 	int error = 0;
9028 	vnode_t *backvp;
9029 
9030 #ifdef CFSDEBUG
9031 	CFS_DEBUG(CFSDEBUG_VOPS)
9032 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9033 #endif
9034 
9035 	if (getzoneid() != GLOBAL_ZONEID) {
9036 		error = EPERM;
9037 		goto out;
9038 	}
9039 
9040 	/*
9041 	 * Cachefs only provides pass-through support for NFSv4,
9042 	 * and all vnode operations are passed through to the
9043 	 * back file system. For NFSv4 pass-through to work, only
9044 	 * connected operation is supported, the cnode backvp must
9045 	 * exist, and cachefs optional (eg., disconnectable) flags
9046 	 * are turned off. Assert these conditions to ensure that
9047 	 * the backfilesystem is called for the shrlock operation.
9048 	 */
9049 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9050 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9051 
9052 	mutex_enter(&cp->c_statelock);
9053 	if (cp->c_backvp == NULL)
9054 		error = cachefs_getbackvp(fscp, cp);
9055 	backvp = cp->c_backvp;
9056 	mutex_exit(&cp->c_statelock);
9057 	ASSERT((error != 0) || (backvp != NULL));
9058 
9059 	if (error == 0) {
9060 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9061 		    ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9062 		    cp, backvp));
9063 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9064 	}
9065 
9066 out:
9067 #ifdef CFSDEBUG
9068 	CFS_DEBUG(CFSDEBUG_VOPS)
9069 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9070 #endif
9071 	return (error);
9072 }
9073 
9074 static int
9075 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9076     cred_t *cr)
9077 {
9078 	cnode_t *cp = VTOC(vp);
9079 	fscache_t *fscp = C_TO_FSCACHE(cp);
9080 	int hit = 0;
9081 	int error = 0;
9082 
9083 
9084 	mutex_enter(&cp->c_statelock);
9085 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9086 	if (error)
9087 		goto out;
9088 
9089 	/* read from the cache if we can */
9090 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9091 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9092 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9093 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9094 		error = cachefs_getaclfromcache(cp, vsec);
9095 		if (error) {
9096 			cachefs_nocache(cp);
9097 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9098 			error = 0;
9099 		} else {
9100 			hit = 1;
9101 			goto out;
9102 		}
9103 	}
9104 
9105 	ASSERT(error == 0);
9106 	if (cp->c_backvp == NULL)
9107 		error = cachefs_getbackvp(fscp, cp);
9108 	if (error)
9109 		goto out;
9110 
9111 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9112 	    ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9113 	    cp, cp->c_backvp));
9114 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9115 	if (error)
9116 		goto out;
9117 
9118 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9119 	    (cachefs_vtype_aclok(vp)) &&
9120 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9121 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9122 		error = cachefs_cacheacl(cp, vsec);
9123 		if (error) {
9124 			error = 0;
9125 			cachefs_nocache(cp);
9126 		}
9127 	}
9128 
9129 out:
9130 	if (error == 0) {
9131 		if (hit)
9132 			fscp->fs_stats.st_hits++;
9133 		else
9134 			fscp->fs_stats.st_misses++;
9135 	}
9136 	mutex_exit(&cp->c_statelock);
9137 
9138 	return (error);
9139 }
9140 
9141 static int
9142 /*ARGSUSED*/
9143 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9144     cred_t *cr)
9145 {
9146 	cnode_t *cp = VTOC(vp);
9147 	fscache_t *fscp = C_TO_FSCACHE(cp);
9148 	int hit = 0;
9149 	int error = 0;
9150 
9151 
9152 	mutex_enter(&cp->c_statelock);
9153 
9154 	/* read from the cache if we can */
9155 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9156 	    (cp->c_metadata.md_flags & MD_ACL)) {
9157 		error = cachefs_getaclfromcache(cp, vsec);
9158 		if (error) {
9159 			cachefs_nocache(cp);
9160 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9161 			error = 0;
9162 		} else {
9163 			hit = 1;
9164 			goto out;
9165 		}
9166 	}
9167 	error = ETIMEDOUT;
9168 
9169 out:
9170 	if (error == 0) {
9171 		if (hit)
9172 			fscp->fs_stats.st_hits++;
9173 		else
9174 			fscp->fs_stats.st_misses++;
9175 	}
9176 	mutex_exit(&cp->c_statelock);
9177 
9178 	return (error);
9179 }
9180 
9181 /*
9182  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9183  * the frontfile if possible; otherwise, the adjunct directory.
9184  *
9185  * inputs:
9186  * cp - the cnode, with its statelock already held
9187  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9188  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9189  *
9190  * returns:
9191  * 0 - all is well
9192  * nonzero - errno
9193  */
9194 
9195 int
9196 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9197 {
9198 	fscache_t *fscp = C_TO_FSCACHE(cp);
9199 	vsecattr_t vsec;
9200 	aclent_t *aclp;
9201 	int gotvsec = 0;
9202 	int error = 0;
9203 	vnode_t *vp = NULL;
9204 	void *aclkeep = NULL;
9205 	int i;
9206 
9207 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9208 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9209 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9210 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9211 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9212 
9213 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9214 		error = ENOSYS;
9215 		goto out;
9216 	}
9217 
9218 	if (vsecp == NULL) {
9219 		if (cp->c_backvp == NULL)
9220 			error = cachefs_getbackvp(fscp, cp);
9221 		if (error != 0)
9222 			goto out;
9223 		vsecp = &vsec;
9224 		bzero(&vsec, sizeof (vsec));
9225 		vsecp->vsa_mask =
9226 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9227 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9228 		if (error != 0) {
9229 			goto out;
9230 		}
9231 		gotvsec = 1;
9232 	} else if (vsecp->vsa_mask & VSA_ACL) {
9233 		aclkeep = vsecp->vsa_aclentp;
9234 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9235 		    sizeof (aclent_t), KM_SLEEP);
9236 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9237 		    sizeof (aclent_t));
9238 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9239 		/* unless there's real data, we can cache nothing. */
9240 		return (0);
9241 	}
9242 
9243 	/*
9244 	 * prevent the ACL from chmoding our frontfile, and
9245 	 * snarf the class info
9246 	 */
9247 
9248 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9249 	    (VSA_ACL | VSA_ACLCNT)) {
9250 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9251 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9252 			switch (aclp->a_type) {
9253 			case CLASS_OBJ:
9254 				cp->c_metadata.md_aclclass =
9255 				    aclp->a_perm;
9256 				/*FALLTHROUGH*/
9257 			case USER_OBJ:
9258 			case GROUP_OBJ:
9259 			case OTHER_OBJ:
9260 				aclp->a_perm = 06;
9261 			}
9262 		}
9263 	}
9264 
9265 	/*
9266 	 * if the frontfile exists, then we always do the work.  but,
9267 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9268 	 * then we don't want to do the work.  otherwise, an `ls -l'
9269 	 * will create tons of emtpy frontfiles.
9270 	 */
9271 
9272 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9273 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9274 	    <= MIN_ACL_ENTRIES)) {
9275 		cp->c_metadata.md_flags |= MD_ACL;
9276 		cp->c_flags |= CN_UPDATED;
9277 		goto out;
9278 	}
9279 
9280 	/*
9281 	 * if we have a default ACL, then we need a
9282 	 * real live directory in the frontfs that we
9283 	 * can apply the ACL to.  if not, then we just
9284 	 * use the frontfile.  we get the frontfile
9285 	 * regardless -- that way, we know the
9286 	 * directory for the frontfile exists.
9287 	 */
9288 
9289 	if (vsecp->vsa_dfaclcnt > 0) {
9290 		if (cp->c_acldirvp == NULL)
9291 			error = cachefs_getacldirvp(cp);
9292 		if (error != 0)
9293 			goto out;
9294 		vp = cp->c_acldirvp;
9295 	} else {
9296 		if (cp->c_frontvp == NULL)
9297 			error = cachefs_getfrontfile(cp);
9298 		if (error != 0)
9299 			goto out;
9300 		vp = cp->c_frontvp;
9301 	}
9302 	ASSERT(vp != NULL);
9303 
9304 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9305 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9306 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9307 	if (error != 0) {
9308 #ifdef CFSDEBUG
9309 		CFS_DEBUG(CFSDEBUG_VOPS)
9310 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9311 			    error);
9312 #endif /* CFSDEBUG */
9313 		/*
9314 		 * If there was an error, we don't want to call
9315 		 * cachefs_nocache(); so, set error to 0.
9316 		 * We will call cachefs_purgeacl(), in order to
9317 		 * clean such things as adjunct ACL directories.
9318 		 */
9319 		cachefs_purgeacl(cp);
9320 		error = 0;
9321 		goto out;
9322 	}
9323 	if (vp == cp->c_frontvp)
9324 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9325 
9326 	cp->c_metadata.md_flags |= MD_ACL;
9327 	cp->c_flags |= CN_UPDATED;
9328 
9329 out:
9330 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9331 		cachefs_nocache(cp);
9332 
9333 	if (gotvsec) {
9334 		if (vsec.vsa_aclcnt)
9335 			kmem_free(vsec.vsa_aclentp,
9336 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9337 		if (vsec.vsa_dfaclcnt)
9338 			kmem_free(vsec.vsa_dfaclentp,
9339 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9340 	} else if (aclkeep != NULL) {
9341 		cachefs_kmem_free(vsecp->vsa_aclentp,
9342 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9343 		vsecp->vsa_aclentp = aclkeep;
9344 	}
9345 
9346 	return (error);
9347 }
9348 
9349 void
9350 cachefs_purgeacl(cnode_t *cp)
9351 {
9352 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9353 
9354 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9355 
9356 	if (cp->c_acldirvp != NULL) {
9357 		VN_RELE(cp->c_acldirvp);
9358 		cp->c_acldirvp = NULL;
9359 	}
9360 
9361 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9362 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9363 
9364 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9365 		make_ascii_name(&cp->c_id, name);
9366 		(void) strcat(name, ".d");
9367 
9368 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9369 		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9370 	}
9371 
9372 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9373 	cp->c_flags |= CN_UPDATED;
9374 }
9375 
9376 static int
9377 cachefs_getacldirvp(cnode_t *cp)
9378 {
9379 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9380 	int error = 0;
9381 
9382 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9383 	ASSERT(cp->c_acldirvp == NULL);
9384 
9385 	if (cp->c_frontvp == NULL)
9386 		error = cachefs_getfrontfile(cp);
9387 	if (error != 0)
9388 		goto out;
9389 
9390 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9391 	make_ascii_name(&cp->c_id, name);
9392 	(void) strcat(name, ".d");
9393 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9394 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9395 	if ((error != 0) && (error != ENOENT))
9396 		goto out;
9397 
9398 	if (error != 0) {
9399 		vattr_t va;
9400 
9401 		va.va_mode = S_IFDIR | 0777;
9402 		va.va_uid = 0;
9403 		va.va_gid = 0;
9404 		va.va_type = VDIR;
9405 		va.va_mask = AT_TYPE | AT_MODE |
9406 		    AT_UID | AT_GID;
9407 		error =
9408 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9409 		    name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9410 		if (error != 0)
9411 			goto out;
9412 	}
9413 
9414 	ASSERT(cp->c_acldirvp != NULL);
9415 	cp->c_metadata.md_flags |= MD_ACLDIR;
9416 	cp->c_flags |= CN_UPDATED;
9417 
9418 out:
9419 	if (error != 0)
9420 		cp->c_acldirvp = NULL;
9421 	return (error);
9422 }
9423 
9424 static int
9425 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9426 {
9427 	aclent_t *aclp;
9428 	int error = 0;
9429 	vnode_t *vp = NULL;
9430 	int i;
9431 
9432 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9433 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9434 	ASSERT(vsec->vsa_aclentp == NULL);
9435 
9436 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9437 		if (cp->c_acldirvp == NULL)
9438 			error = cachefs_getacldirvp(cp);
9439 		if (error != 0)
9440 			goto out;
9441 		vp = cp->c_acldirvp;
9442 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9443 		if (cp->c_frontvp == NULL)
9444 			error = cachefs_getfrontfile(cp);
9445 		if (error != 0)
9446 			goto out;
9447 		vp = cp->c_frontvp;
9448 	} else {
9449 
9450 		/*
9451 		 * if we get here, then we know that MD_ACL is on,
9452 		 * meaning an ACL was successfully cached.  we also
9453 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9454 		 * this has to be an entry without a `real' ACL.
9455 		 * thus, we forge whatever is necessary.
9456 		 */
9457 
9458 		if (vsec->vsa_mask & VSA_ACLCNT)
9459 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9460 
9461 		if (vsec->vsa_mask & VSA_ACL) {
9462 			vsec->vsa_aclentp =
9463 			    kmem_zalloc(MIN_ACL_ENTRIES *
9464 			    sizeof (aclent_t), KM_SLEEP);
9465 			aclp = (aclent_t *)vsec->vsa_aclentp;
9466 			aclp->a_type = USER_OBJ;
9467 			++aclp;
9468 			aclp->a_type = GROUP_OBJ;
9469 			++aclp;
9470 			aclp->a_type = OTHER_OBJ;
9471 			++aclp;
9472 			aclp->a_type = CLASS_OBJ;
9473 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9474 			    sizeof (aclent_t), cmp2acls);
9475 		}
9476 
9477 		ASSERT(vp == NULL);
9478 	}
9479 
9480 	if (vp != NULL) {
9481 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9482 #ifdef CFSDEBUG
9483 			CFS_DEBUG(CFSDEBUG_VOPS)
9484 				printf("cachefs_getaclfromcache: error %d\n",
9485 				    error);
9486 #endif /* CFSDEBUG */
9487 			goto out;
9488 		}
9489 	}
9490 
9491 	if (vsec->vsa_aclentp != NULL) {
9492 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9493 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9494 			switch (aclp->a_type) {
9495 			case USER_OBJ:
9496 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9497 				aclp->a_perm =
9498 				    cp->c_metadata.md_vattr.va_mode & 0700;
9499 				aclp->a_perm >>= 6;
9500 				break;
9501 
9502 			case GROUP_OBJ:
9503 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9504 				aclp->a_perm =
9505 				    cp->c_metadata.md_vattr.va_mode & 070;
9506 				aclp->a_perm >>= 3;
9507 				break;
9508 
9509 			case OTHER_OBJ:
9510 				aclp->a_perm =
9511 				    cp->c_metadata.md_vattr.va_mode & 07;
9512 				break;
9513 
9514 			case CLASS_OBJ:
9515 				aclp->a_perm =
9516 				    cp->c_metadata.md_aclclass;
9517 				break;
9518 			}
9519 		}
9520 	}
9521 
9522 out:
9523 
9524 	if (error != 0)
9525 		cachefs_nocache(cp);
9526 
9527 	return (error);
9528 }
9529 
9530 /*
9531  * Fills in targp with attribute information from srcp, cp
9532  * and if necessary the system.
9533  */
9534 static void
9535 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9536 {
9537 	time_t	now;
9538 
9539 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9540 
9541 	/*
9542 	 * Add code to fill in the va struct.  We use the fields from
9543 	 * the srcp struct if they are populated, otherwise we guess
9544 	 */
9545 
9546 	targp->va_mask = 0;	/* initialize all fields */
9547 	targp->va_mode = srcp->va_mode;
9548 	targp->va_type = srcp->va_type;
9549 	targp->va_nlink = 1;
9550 	targp->va_nodeid = 0;
9551 
9552 	if (srcp->va_mask & AT_UID)
9553 		targp->va_uid = srcp->va_uid;
9554 	else
9555 		targp->va_uid = crgetuid(cr);
9556 
9557 	if (srcp->va_mask & AT_GID)
9558 		targp->va_gid = srcp->va_gid;
9559 	else
9560 		targp->va_gid = crgetgid(cr);
9561 
9562 	if (srcp->va_mask & AT_FSID)
9563 		targp->va_fsid = srcp->va_fsid;
9564 	else
9565 		targp->va_fsid = 0;	/* initialize all fields */
9566 
9567 	now = gethrestime_sec();
9568 	if (srcp->va_mask & AT_ATIME)
9569 		targp->va_atime = srcp->va_atime;
9570 	else
9571 		targp->va_atime.tv_sec = now;
9572 
9573 	if (srcp->va_mask & AT_MTIME)
9574 		targp->va_mtime = srcp->va_mtime;
9575 	else
9576 		targp->va_mtime.tv_sec = now;
9577 
9578 	if (srcp->va_mask & AT_CTIME)
9579 		targp->va_ctime = srcp->va_ctime;
9580 	else
9581 		targp->va_ctime.tv_sec = now;
9582 
9583 
9584 	if (srcp->va_mask & AT_SIZE)
9585 		targp->va_size = srcp->va_size;
9586 	else
9587 		targp->va_size = 0;
9588 
9589 	/*
9590 	 * the remaing fields are set by the fs and not changable.
9591 	 * we populate these entries useing the parent directory
9592 	 * values.  It's a small hack, but should work.
9593 	 */
9594 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9595 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9596 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9597 	targp->va_seq = 0; /* Never keep the sequence number */
9598 }
9599 
9600 /*
9601  * set the gid for a newly created file.  The algorithm is as follows:
9602  *
9603  *	1) If the gid is set in the attribute list, then use it if
9604  *	   the caller is privileged, belongs to the target group, or
9605  *	   the group is the same as the parent directory.
9606  *
9607  *	2) If the parent directory's set-gid bit is clear, then use
9608  *	   the process gid
9609  *
9610  *	3) Otherwise, use the gid of the parent directory.
9611  *
9612  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9613  * this routine.
9614  */
9615 static void
9616 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9617 {
9618 	if ((vap->va_mask & AT_GID) &&
9619 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9620 	    groupmember(vap->va_gid, cr) ||
9621 	    secpolicy_vnode_create_gid(cr) != 0)) {
9622 		newcp->c_attr.va_gid = vap->va_gid;
9623 	} else {
9624 		if (dcp->c_attr.va_mode & S_ISGID)
9625 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9626 		else
9627 			newcp->c_attr.va_gid = crgetgid(cr);
9628 	}
9629 
9630 	/*
9631 	 * if we're creating a directory, and the parent directory has the
9632 	 * set-GID bit set, set it on the new directory.
9633 	 * Otherwise, if the user is neither privileged nor a member of the
9634 	 * file's new group, clear the file's set-GID bit.
9635 	 */
9636 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9637 		newcp->c_attr.va_mode |= S_ISGID;
9638 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9639 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9640 		newcp->c_attr.va_mode &= ~S_ISGID;
9641 }
9642 
9643 /*
9644  * create an acl for the newly created file.  should be called right
9645  * after cachefs_creategid.
9646  */
9647 
9648 static void
9649 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9650 {
9651 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9652 	vsecattr_t vsec;
9653 	int gotvsec = 0;
9654 	int error = 0; /* placeholder */
9655 	aclent_t *aclp;
9656 	o_mode_t *classp = NULL;
9657 	o_mode_t gunion = 0;
9658 	int i;
9659 
9660 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9661 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9662 		return;
9663 
9664 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9665 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9666 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9667 
9668 	/*
9669 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9670 	 * would hit code paths that isn't hit anywhere else.
9671 	 */
9672 
9673 	bzero(&vsec, sizeof (vsec));
9674 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9675 	error = cachefs_getaclfromcache(dcp, &vsec);
9676 	if (error != 0)
9677 		goto out;
9678 	gotvsec = 1;
9679 
9680 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9681 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9682 			kmem_free(vsec.vsa_aclentp,
9683 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9684 
9685 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9686 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9687 		vsec.vsa_dfaclcnt = 0;
9688 		vsec.vsa_dfaclentp = NULL;
9689 
9690 		if (newcp->c_attr.va_type == VDIR) {
9691 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9692 			    sizeof (aclent_t), KM_SLEEP);
9693 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9694 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9695 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9696 		}
9697 
9698 		/*
9699 		 * this function should be called pretty much after
9700 		 * the rest of the file creation stuff is done.  so,
9701 		 * uid, gid, etc. should be `right'.  we'll go with
9702 		 * that, rather than trying to determine whether to
9703 		 * get stuff from cr or va.
9704 		 */
9705 
9706 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9707 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9708 			switch (aclp->a_type) {
9709 			case DEF_USER_OBJ:
9710 				aclp->a_type = USER_OBJ;
9711 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9712 				aclp->a_perm =
9713 				    newcp->c_metadata.md_vattr.va_mode;
9714 				aclp->a_perm &= 0700;
9715 				aclp->a_perm >>= 6;
9716 				break;
9717 
9718 			case DEF_GROUP_OBJ:
9719 				aclp->a_type = GROUP_OBJ;
9720 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9721 				aclp->a_perm =
9722 				    newcp->c_metadata.md_vattr.va_mode;
9723 				aclp->a_perm &= 070;
9724 				aclp->a_perm >>= 3;
9725 				gunion |= aclp->a_perm;
9726 				break;
9727 
9728 			case DEF_OTHER_OBJ:
9729 				aclp->a_type = OTHER_OBJ;
9730 				aclp->a_perm =
9731 				    newcp->c_metadata.md_vattr.va_mode & 07;
9732 				break;
9733 
9734 			case DEF_CLASS_OBJ:
9735 				aclp->a_type = CLASS_OBJ;
9736 				classp = &(aclp->a_perm);
9737 				break;
9738 
9739 			case DEF_USER:
9740 				aclp->a_type = USER;
9741 				gunion |= aclp->a_perm;
9742 				break;
9743 
9744 			case DEF_GROUP:
9745 				aclp->a_type = GROUP;
9746 				gunion |= aclp->a_perm;
9747 				break;
9748 			}
9749 		}
9750 
9751 		/* XXX is this the POSIX thing to do? */
9752 		if (classp != NULL)
9753 			*classp &= gunion;
9754 
9755 		/*
9756 		 * we don't need to log this; rather, we clear the
9757 		 * MD_ACL bit when we reconnect.
9758 		 */
9759 
9760 		error = cachefs_cacheacl(newcp, &vsec);
9761 		if (error != 0)
9762 			goto out;
9763 	}
9764 
9765 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9766 	newcp->c_metadata.md_flags |= MD_ACL;
9767 	newcp->c_flags |= CN_UPDATED;
9768 
9769 out:
9770 
9771 	if (gotvsec) {
9772 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9773 			kmem_free(vsec.vsa_aclentp,
9774 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9775 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9776 			kmem_free(vsec.vsa_dfaclentp,
9777 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9778 	}
9779 }
9780 
9781 /*
9782  * this is translated from the UFS code for access checking.
9783  */
9784 
9785 static int
9786 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9787 {
9788 	cnode_t *cp = vcp;
9789 	fscache_t *fscp = C_TO_FSCACHE(cp);
9790 	int shift = 0;
9791 
9792 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9793 
9794 	if (mode & VWRITE) {
9795 		/*
9796 		 * Disallow write attempts on read-only
9797 		 * file systems, unless the file is special.
9798 		 */
9799 		struct vnode *vp = CTOV(cp);
9800 		if (vn_is_readonly(vp)) {
9801 			if (!IS_DEVVP(vp)) {
9802 				return (EROFS);
9803 			}
9804 		}
9805 	}
9806 
9807 	/*
9808 	 * if we need to do ACLs, do it.  this works whether anyone
9809 	 * has explicitly made an ACL or not.
9810 	 */
9811 
9812 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9813 	    (cachefs_vtype_aclok(CTOV(cp))))
9814 		return (cachefs_acl_access(cp, mode, cr));
9815 
9816 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9817 		shift += 3;
9818 		if (!groupmember(cp->c_attr.va_gid, cr))
9819 			shift += 3;
9820 	}
9821 
9822 	/* compute missing mode bits */
9823 	mode &= ~(cp->c_attr.va_mode << shift);
9824 
9825 	if (mode == 0)
9826 		return (0);
9827 
9828 	return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode));
9829 }
9830 
9831 /*
9832  * This is transcribed from ufs_acl_access().  If that changes, then
9833  * this should, too.
9834  *
9835  * Check the cnode's ACL's to see if this mode of access is
9836  * allowed; return 0 if allowed, EACCES if not.
9837  *
9838  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9839  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9840  */
9841 
9842 #define	ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \
9843 		    secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM)))
9844 
9845 static int
9846 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9847 {
9848 	int error = 0;
9849 
9850 	fscache_t *fscp = C_TO_FSCACHE(cp);
9851 
9852 	int mask = ~0;
9853 	int ismask = 0;
9854 
9855 	int gperm = 0;
9856 	int ngroup = 0;
9857 
9858 	vsecattr_t vsec;
9859 	int gotvsec = 0;
9860 	aclent_t *aclp;
9861 
9862 	uid_t owner = cp->c_attr.va_uid;
9863 
9864 	int i;
9865 
9866 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9867 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9868 
9869 	/*
9870 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9871 	 * but then i believe we'd be the only thing exercising those
9872 	 * code paths -- probably a bad thing.
9873 	 */
9874 
9875 	bzero(&vsec, sizeof (vsec));
9876 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9877 
9878 	/* XXX KLUDGE! correct insidious 0-class problem */
9879 	if (cp->c_metadata.md_aclclass == 0 &&
9880 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9881 		cachefs_purgeacl(cp);
9882 again:
9883 	if (cp->c_metadata.md_flags & MD_ACL) {
9884 		error = cachefs_getaclfromcache(cp, &vsec);
9885 		if (error != 0) {
9886 #ifdef CFSDEBUG
9887 			if (error != ETIMEDOUT)
9888 				CFS_DEBUG(CFSDEBUG_VOPS)
9889 					printf("cachefs_acl_access():"
9890 					    "error %d from getaclfromcache()\n",
9891 					    error);
9892 #endif /* CFSDEBUG */
9893 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9894 				goto again;
9895 			} else {
9896 				goto out;
9897 			}
9898 		}
9899 	} else {
9900 		if (cp->c_backvp == NULL) {
9901 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9902 				error = cachefs_getbackvp(fscp, cp);
9903 			else
9904 				error = ETIMEDOUT;
9905 		}
9906 		if (error == 0)
9907 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9908 			    NULL);
9909 		if (error != 0) {
9910 #ifdef CFSDEBUG
9911 			CFS_DEBUG(CFSDEBUG_VOPS)
9912 				printf("cachefs_acl_access():"
9913 				    "error %d from getsecattr(backvp)\n",
9914 				    error);
9915 #endif /* CFSDEBUG */
9916 			goto out;
9917 		}
9918 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9919 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9920 			(void) cachefs_cacheacl(cp, &vsec);
9921 	}
9922 	gotvsec = 1;
9923 
9924 	ASSERT(error == 0);
9925 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9926 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9927 		switch (aclp->a_type) {
9928 		case USER_OBJ:
9929 			/*
9930 			 * this might look cleaner in the 2nd loop
9931 			 * below, but we do it here as an
9932 			 * optimization.
9933 			 */
9934 
9935 			owner = aclp->a_id;
9936 			if (crgetuid(cr) == owner) {
9937 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9938 				    cr, cp);
9939 				goto out;
9940 			}
9941 			break;
9942 
9943 		case CLASS_OBJ:
9944 			mask = aclp->a_perm;
9945 			ismask = 1;
9946 			break;
9947 		}
9948 	}
9949 
9950 	ASSERT(error == 0);
9951 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9952 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9953 		switch (aclp->a_type) {
9954 		case USER:
9955 			if (crgetuid(cr) == aclp->a_id) {
9956 				error = ACL_MODE_CHECK(mode,
9957 				    (aclp->a_perm & mask) << 6, cr, cp);
9958 				goto out;
9959 			}
9960 			break;
9961 
9962 		case GROUP_OBJ:
9963 			if (groupmember(aclp->a_id, cr)) {
9964 				++ngroup;
9965 				gperm |= aclp->a_perm;
9966 				if (! ismask) {
9967 					error = ACL_MODE_CHECK(mode,
9968 					    aclp->a_perm << 6,
9969 					    cr, cp);
9970 					goto out;
9971 				}
9972 			}
9973 			break;
9974 
9975 		case GROUP:
9976 			if (groupmember(aclp->a_id, cr)) {
9977 				++ngroup;
9978 				gperm |= aclp->a_perm;
9979 			}
9980 			break;
9981 
9982 		case OTHER_OBJ:
9983 			if (ngroup == 0) {
9984 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9985 				    cr, cp);
9986 				goto out;
9987 			}
9988 			break;
9989 
9990 		default:
9991 			break;
9992 		}
9993 	}
9994 
9995 	ASSERT(ngroup > 0);
9996 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9997 
9998 out:
9999 	if (gotvsec) {
10000 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10001 			kmem_free(vsec.vsa_aclentp,
10002 			    vsec.vsa_aclcnt * sizeof (aclent_t));
10003 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10004 			kmem_free(vsec.vsa_dfaclentp,
10005 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
10006 	}
10007 
10008 	return (error);
10009 }
10010 
10011 /*
10012  * see if permissions allow for removal of the given file from
10013  * the given directory.
10014  */
10015 static int
10016 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10017 {
10018 	uid_t uid;
10019 	/*
10020 	 * If the containing directory is sticky, the user must:
10021 	 *  - own the directory, or
10022 	 *  - own the file, or
10023 	 *  - be able to write the file (if it's a plain file), or
10024 	 *  - be sufficiently privileged.
10025 	 */
10026 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10027 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10028 	    (uid != cp->c_attr.va_uid) &&
10029 	    (cp->c_attr.va_type != VREG ||
10030 	    cachefs_access_local(cp, VWRITE, cr) != 0))
10031 		return (secpolicy_vnode_remove(cr));
10032 
10033 	return (0);
10034 }
10035 
10036 /*
10037  * Returns a new name, may even be unique.
10038  * Stolen from nfs code.
10039  * Since now we will use renaming to .cfs* in place of .nfs*
10040  * for CacheFS. Both NFS and CacheFS will rename opened files.
10041  */
10042 static char cachefs_prefix[] = ".cfs";
10043 kmutex_t cachefs_newnum_lock;
10044 
10045 static char *
10046 cachefs_newname(void)
10047 {
10048 	static uint_t newnum = 0;
10049 	char *news;
10050 	char *s, *p;
10051 	uint_t id;
10052 
10053 	mutex_enter(&cachefs_newnum_lock);
10054 	if (newnum == 0) {
10055 		newnum = gethrestime_sec() & 0xfffff;
10056 		newnum |= 0x10000;
10057 	}
10058 	id = newnum++;
10059 	mutex_exit(&cachefs_newnum_lock);
10060 
10061 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10062 	s = news;
10063 	p = cachefs_prefix;
10064 	while (*p != '\0')
10065 		*s++ = *p++;
10066 	while (id != 0) {
10067 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10068 		id >>= 4;
10069 	}
10070 	*s = '\0';
10071 	return (news);
10072 }
10073 
10074 /*
10075  * Called to rename the specified file to a temporary file so
10076  * operations to the file after remove work.
10077  * Must call this routine with the dir c_rwlock held as a writer.
10078  */
10079 static int
10080 /*ARGSUSED*/
10081 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10082 {
10083 	cnode_t *cp = VTOC(vp);
10084 	char *tmpname;
10085 	fscache_t *fscp = C_TO_FSCACHE(cp);
10086 	int error;
10087 
10088 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10089 
10090 	/* get the new name for the file */
10091 	tmpname = cachefs_newname();
10092 
10093 	/* do the link */
10094 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10095 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10096 	else
10097 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10098 	if (error) {
10099 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10100 		return (error);
10101 	}
10102 
10103 	mutex_enter(&cp->c_statelock);
10104 	if (cp->c_unldvp) {
10105 		VN_RELE(cp->c_unldvp);
10106 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10107 		crfree(cp->c_unlcred);
10108 	}
10109 
10110 	VN_HOLD(dvp);
10111 	cp->c_unldvp = dvp;
10112 	crhold(cr);
10113 	cp->c_unlcred = cr;
10114 	cp->c_unlname = tmpname;
10115 
10116 	/* drop the backvp so NFS does not also do a rename */
10117 	mutex_exit(&cp->c_statelock);
10118 
10119 	return (0);
10120 }
10121 
10122 /*
10123  * Marks the cnode as modified.
10124  */
10125 static void
10126 cachefs_modified(cnode_t *cp)
10127 {
10128 	fscache_t *fscp = C_TO_FSCACHE(cp);
10129 	struct vattr va;
10130 	int error;
10131 
10132 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10133 	ASSERT(cp->c_metadata.md_rlno);
10134 
10135 	/* if not on the modify list */
10136 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10137 		/* put on modified list, also marks the file as modified */
10138 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10139 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10140 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10141 		cp->c_flags |= CN_UPDATED;
10142 
10143 		/* if a modified regular file that is not local */
10144 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10145 		    (cp->c_metadata.md_flags & MD_FILE) &&
10146 		    (cp->c_attr.va_type == VREG)) {
10147 
10148 			if (cp->c_frontvp == NULL)
10149 				(void) cachefs_getfrontfile(cp);
10150 			if (cp->c_frontvp) {
10151 				/* identify file so fsck knows it is modified */
10152 				va.va_mode = 0766;
10153 				va.va_mask = AT_MODE;
10154 				error = VOP_SETATTR(cp->c_frontvp,
10155 				    &va, 0, kcred, NULL);
10156 				if (error) {
10157 					cmn_err(CE_WARN,
10158 					    "Cannot change ff mode.\n");
10159 				}
10160 			}
10161 		}
10162 	}
10163 }
10164 
10165 /*
10166  * Marks the cnode as modified.
10167  * Allocates a rl slot for the cnode if necessary.
10168  * Returns 0 for success, !0 if cannot get an rl slot.
10169  */
10170 static int
10171 cachefs_modified_alloc(cnode_t *cp)
10172 {
10173 	fscache_t *fscp = C_TO_FSCACHE(cp);
10174 	filegrp_t *fgp = cp->c_filegrp;
10175 	int error;
10176 	rl_entry_t rl_ent;
10177 
10178 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10179 
10180 	/* get the rl slot if needed */
10181 	if (cp->c_metadata.md_rlno == 0) {
10182 		/* get a metadata slot if we do not have one yet */
10183 		if (cp->c_flags & CN_ALLOC_PENDING) {
10184 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10185 				(void) filegrp_allocattr(cp->c_filegrp);
10186 			}
10187 			error = filegrp_create_metadata(cp->c_filegrp,
10188 			    &cp->c_metadata, &cp->c_id);
10189 			if (error)
10190 				return (error);
10191 			cp->c_flags &= ~CN_ALLOC_PENDING;
10192 		}
10193 
10194 		/* get a free rl entry */
10195 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10196 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10197 		rl_ent.rl_fsid = fscp->fs_cfsid;
10198 		rl_ent.rl_attrc = 0;
10199 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10200 		    &cp->c_metadata.md_rlno);
10201 		if (error)
10202 			return (error);
10203 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10204 
10205 		/* hold the filegrp so the attrcache file is not gc */
10206 		error = filegrp_ffhold(fgp);
10207 		if (error) {
10208 			cachefs_rlent_moveto(fscp->fs_cache,
10209 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10210 			cp->c_metadata.md_rlno = 0;
10211 			return (error);
10212 		}
10213 	}
10214 	cachefs_modified(cp);
10215 	return (0);
10216 }
10217 
10218 int
10219 cachefs_vtype_aclok(vnode_t *vp)
10220 {
10221 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10222 
10223 	if (vp->v_type == VNON)
10224 		return (0);
10225 
10226 	for (vtp = oktypes; *vtp != VNON; vtp++)
10227 		if (vp->v_type == *vtp)
10228 			break;
10229 
10230 	return (*vtp != VNON);
10231 }
10232 
10233 static int
10234 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10235     caller_context_t *ct)
10236 {
10237 	int error = 0;
10238 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10239 
10240 	/* Assert cachefs compatibility if NFSv4 is in use */
10241 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10242 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10243 
10244 	if (cmd == _PC_FILESIZEBITS) {
10245 		u_offset_t maxsize = fscp->fs_offmax;
10246 		(*valp) = 0;
10247 		while (maxsize != 0) {
10248 			maxsize >>= 1;
10249 			(*valp)++;
10250 		}
10251 		(*valp)++;
10252 	} else
10253 		error = fs_pathconf(vp, cmd, valp, cr, ct);
10254 
10255 	return (error);
10256 }
10257