xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision e7619b69689d6a40a07425ef14002f2ab4d724d9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/cred.h>
30 #include <sys/proc.h>
31 #include <sys/user.h>
32 #include <sys/time.h>
33 #include <sys/vnode.h>
34 #include <sys/vfs.h>
35 #include <sys/vfs_opreg.h>
36 #include <sys/file.h>
37 #include <sys/filio.h>
38 #include <sys/uio.h>
39 #include <sys/buf.h>
40 #include <sys/mman.h>
41 #include <sys/tiuser.h>
42 #include <sys/pathname.h>
43 #include <sys/dirent.h>
44 #include <sys/conf.h>
45 #include <sys/debug.h>
46 #include <sys/vmsystm.h>
47 #include <sys/fcntl.h>
48 #include <sys/flock.h>
49 #include <sys/swap.h>
50 #include <sys/errno.h>
51 #include <sys/sysmacros.h>
52 #include <sys/disp.h>
53 #include <sys/kmem.h>
54 #include <sys/cmn_err.h>
55 #include <sys/vtrace.h>
56 #include <sys/mount.h>
57 #include <sys/bootconf.h>
58 #include <sys/dnlc.h>
59 #include <sys/stat.h>
60 #include <sys/acl.h>
61 #include <sys/policy.h>
62 #include <rpc/types.h>
63 
64 #include <vm/hat.h>
65 #include <vm/as.h>
66 #include <vm/page.h>
67 #include <vm/pvn.h>
68 #include <vm/seg.h>
69 #include <vm/seg_map.h>
70 #include <vm/seg_vn.h>
71 #include <vm/rm.h>
72 #include <sys/fs/cachefs_fs.h>
73 #include <sys/fs/cachefs_dir.h>
74 #include <sys/fs/cachefs_dlog.h>
75 #include <sys/fs/cachefs_ioctl.h>
76 #include <sys/fs/cachefs_log.h>
77 #include <fs/fs_subr.h>
78 
79 int cachefs_dnlc;	/* use dnlc, debugging */
80 
81 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
82     cred_t *cr);
83 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
84     cred_t *cr);
85 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
86 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
87 static int cachefs_getacldirvp(cnode_t *cp);
88 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
89 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
90 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
91 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
92     u_offset_t iooff, cred_t *cr);
93 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
94     u_offset_t iooff, cred_t *cr);
95 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
96     cred_t *cr, caller_context_t *ct);
97 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
98     int flags, cred_t *cr, caller_context_t *ct);
99 static int cachefs_access_connected(struct vnode *vp, int mode,
100     int flags, cred_t *cr);
101 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
102     cred_t *cr);
103 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
104     char *tnm, cred_t *cr);
105 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
106     vattr_t *tva, char *tnm, cred_t *cr);
107 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
108     cred_t *cr);
109 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
110     char *tnm, cred_t *cr);
111 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
112     vnode_t **vpp, cred_t *cr);
113 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
114     vnode_t **vpp, cred_t *cr);
115 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
116 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
117     vnode_t *cdir, cred_t *cr, vnode_t *vp);
118 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
120 static char *cachefs_newname(void);
121 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
122     cred_t *cr);
123 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
124     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
125 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
127 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
128     int *eofp);
129 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
130     cred_t *cr, int *eofp);
131 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
132 	cred_t *cr, int *eofp);
133 
134 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
135     cred_t *cr, caller_context_t *ct);
136 
137 static	int	cachefs_open(struct vnode **, int, cred_t *,
138 			caller_context_t *);
139 static	int	cachefs_close(struct vnode *, int, int, offset_t,
140 			cred_t *, caller_context_t *);
141 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
142 			caller_context_t *);
143 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
144 			caller_context_t *);
145 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
146 			int *, caller_context_t *);
147 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
148 			cred_t *, caller_context_t *);
149 static	int	cachefs_setattr(struct vnode *, struct vattr *,
150 			int, cred_t *, caller_context_t *);
151 static	int	cachefs_access(struct vnode *, int, int, cred_t *,
152 			caller_context_t *);
153 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
154 			struct pathname *, int, struct vnode *, cred_t *,
155 			caller_context_t *, int *, pathname_t *);
156 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
157 			enum vcexcl, int, struct vnode **, cred_t *, int,
158 			caller_context_t *, vsecattr_t *);
159 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
160 			vattr_t *vap, enum vcexcl exclusive, int mode,
161 			vnode_t **vpp, cred_t *cr);
162 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
163 			vattr_t *vap, enum vcexcl exclusive, int mode,
164 			vnode_t **vpp, cred_t *cr);
165 static	int	cachefs_remove(struct vnode *, char *, cred_t *,
166 			caller_context_t *, int);
167 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
168 			cred_t *, caller_context_t *, int);
169 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
170 			char *, cred_t *, caller_context_t *, int);
171 static	int	cachefs_mkdir(struct vnode *, char *, struct
172 			vattr *, struct vnode **, cred_t *, caller_context_t *,
173 			int, vsecattr_t *);
174 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
175 			cred_t *, caller_context_t *, int);
176 static	int	cachefs_readdir(struct vnode *, struct uio *,
177 			cred_t *, int *, caller_context_t *, int);
178 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
179 			char *, cred_t *, caller_context_t *, int);
180 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
181 			caller_context_t *);
182 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
183 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
184 static	int	cachefs_fsync(struct vnode *, int, cred_t *,
185 			caller_context_t *);
186 static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
187 static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
188 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
189 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
190 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
191 			caller_context_t *);
192 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
193 			int, offset_t, struct flk_callback *, cred_t *,
194 			caller_context_t *);
195 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
196 			offset_t, cred_t *, caller_context_t *);
197 static	int	cachefs_realvp(struct vnode *, struct vnode **,
198 			caller_context_t *);
199 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
200 			struct page *[], size_t, struct seg *, caddr_t,
201 			enum seg_rw, cred_t *, caller_context_t *);
202 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
203 			struct page *[], size_t, struct seg *, caddr_t,
204 			enum seg_rw, cred_t *);
205 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
206 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
207 			enum seg_rw, cred_t *);
208 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
209 			cred_t *, caller_context_t *);
210 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
211 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
212 			caller_context_t *);
213 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
214 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
215 			caller_context_t *);
216 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
217 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
218 			caller_context_t *);
219 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
220 			int flag, cred_t *cr, caller_context_t *);
221 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
222 			int flag, cred_t *cr, caller_context_t *);
223 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
224 			cred_t *, caller_context_t *);
225 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
226     cred_t *cr);
227 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
228     int flag, cred_t *cr);
229 
230 static int	cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
231 			caller_context_t *);
232 static int	cachefs_pageio(struct vnode *, page_t *,
233 		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
234 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
235 		    int tcount, struct uio *uiop);
236 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
237 			caller_context_t *);
238 
239 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
240 			cred_t *cr, caller_context_t *ct);
241 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
242 			cred_t *cr, caller_context_t *ct);
243 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
244 			int flags, cred_t *cr, caller_context_t *ct);
245 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
246 			vnode_t *vp);
247 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
248 			size_t len, uint_t *protp, struct page *pl[],
249 			size_t plsz, struct seg *seg, caddr_t addr,
250 			enum seg_rw rw, cred_t *cr);
251 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
252 			size_t len, int flags, cred_t *cr);
253 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
254 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
255 			uchar_t maxprot, uint_t flags, cred_t *cr);
256 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
257 			struct flock64 *bfp, int flag, offset_t offset,
258 			cred_t *cr, caller_context_t *ct);
259 
260 struct vnodeops *cachefs_vnodeops;
261 
262 static const fs_operation_def_t cachefs_vnodeops_template[] = {
263 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
264 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
265 	VOPNAME_READ,		{ .vop_read = cachefs_read },
266 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
267 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
268 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
269 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
270 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
271 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
272 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
273 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
274 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
275 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
276 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
277 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
278 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
279 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
280 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
281 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
282 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
283 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
284 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
285 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
286 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
287 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
288 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
289 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
290 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
291 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
292 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
293 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
294 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
295 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
296 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
297 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
298 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
299 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
300 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
301 	NULL,			NULL
302 };
303 
304 /* forward declarations of statics */
305 static void cachefs_modified(cnode_t *cp);
306 static int cachefs_modified_alloc(cnode_t *cp);
307 
308 int
309 cachefs_init_vnops(char *name)
310 {
311 	return (vn_make_ops(name,
312 	    cachefs_vnodeops_template, &cachefs_vnodeops));
313 }
314 
315 struct vnodeops *
316 cachefs_getvnodeops(void)
317 {
318 	return (cachefs_vnodeops);
319 }
320 
321 static int
322 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
323 {
324 	int error = 0;
325 	cnode_t *cp = VTOC(*vpp);
326 	fscache_t *fscp = C_TO_FSCACHE(cp);
327 	int held = 0;
328 	int type;
329 	int connected = 0;
330 
331 #ifdef CFSDEBUG
332 	CFS_DEBUG(CFSDEBUG_VOPS)
333 		printf("cachefs_open: ENTER vpp %p flag %x\n",
334 		    (void *)vpp, flag);
335 #endif
336 	if (getzoneid() != GLOBAL_ZONEID) {
337 		error = EPERM;
338 		goto out;
339 	}
340 	if ((flag & FWRITE) &&
341 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
342 		error = EISDIR;
343 		goto out;
344 	}
345 
346 	/*
347 	 * Cachefs only provides pass-through support for NFSv4,
348 	 * and all vnode operations are passed through to the
349 	 * back file system. For NFSv4 pass-through to work, only
350 	 * connected operation is supported, the cnode backvp must
351 	 * exist, and cachefs optional (eg., disconnectable) flags
352 	 * are turned off. Assert these conditions to ensure that
353 	 * the backfilesystem is called for the open operation.
354 	 */
355 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
356 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
357 
358 	for (;;) {
359 		/* get (or renew) access to the file system */
360 		if (held) {
361 			/* Won't loop with NFSv4 connected behavior */
362 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
363 			cachefs_cd_release(fscp);
364 			held = 0;
365 		}
366 		error = cachefs_cd_access(fscp, connected, 0);
367 		if (error)
368 			goto out;
369 		held = 1;
370 
371 		mutex_enter(&cp->c_statelock);
372 
373 		/* grab creds if we do not have any yet */
374 		if (cp->c_cred == NULL) {
375 			crhold(cr);
376 			cp->c_cred = cr;
377 		}
378 		cp->c_flags |= CN_NEEDOPEN;
379 
380 		/* if we are disconnected */
381 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
382 			/* if we cannot write to the file system */
383 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
384 				mutex_exit(&cp->c_statelock);
385 				connected = 1;
386 				continue;
387 			}
388 			/*
389 			 * Allow read only requests to continue
390 			 */
391 			if ((flag & (FWRITE|FREAD)) == FREAD) {
392 				/* track the flag for opening the backvp */
393 				cp->c_rdcnt++;
394 				mutex_exit(&cp->c_statelock);
395 				error = 0;
396 				break;
397 			}
398 
399 			/*
400 			 * check credentials  - if this procs
401 			 * credentials don't match the creds in the
402 			 * cnode disallow writing while disconnected.
403 			 */
404 			if (crcmp(cp->c_cred, CRED()) != 0 &&
405 			    secpolicy_vnode_access2(CRED(), *vpp,
406 			    cp->c_attr.va_uid, 0, VWRITE) != 0) {
407 				mutex_exit(&cp->c_statelock);
408 				connected = 1;
409 				continue;
410 			}
411 			/* to get here, we know that the WRITE flag is on */
412 			cp->c_wrcnt++;
413 			if (flag & FREAD)
414 				cp->c_rdcnt++;
415 		}
416 
417 		/* else if we are connected */
418 		else {
419 			/* if cannot use the cached copy of the file */
420 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
421 			    ((cp->c_flags & CN_NOCACHE) == 0))
422 				cachefs_nocache(cp);
423 
424 			/* pass open to the back file */
425 			if (cp->c_backvp) {
426 				cp->c_flags &= ~CN_NEEDOPEN;
427 				CFS_DPRINT_BACKFS_NFSV4(fscp,
428 				    ("cachefs_open (nfsv4): cnode %p, "
429 				    "backvp %p\n", cp, cp->c_backvp));
430 				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
431 				if (CFS_TIMEOUT(fscp, error)) {
432 					mutex_exit(&cp->c_statelock);
433 					cachefs_cd_release(fscp);
434 					held = 0;
435 					cachefs_cd_timedout(fscp);
436 					continue;
437 				} else if (error) {
438 					mutex_exit(&cp->c_statelock);
439 					break;
440 				}
441 			} else {
442 				/* backvp will be VOP_OPEN'd later */
443 				if (flag & FREAD)
444 					cp->c_rdcnt++;
445 				if (flag & FWRITE)
446 					cp->c_wrcnt++;
447 			}
448 
449 			/*
450 			 * Now perform a consistency check on the file.
451 			 * If strict consistency then force a check to
452 			 * the backfs even if the timeout has not expired
453 			 * for close-to-open consistency.
454 			 */
455 			type = 0;
456 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
457 				type = C_BACK_CHECK;
458 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
459 			if (CFS_TIMEOUT(fscp, error)) {
460 				mutex_exit(&cp->c_statelock);
461 				cachefs_cd_release(fscp);
462 				held = 0;
463 				cachefs_cd_timedout(fscp);
464 				continue;
465 			}
466 		}
467 		mutex_exit(&cp->c_statelock);
468 		break;
469 	}
470 	if (held)
471 		cachefs_cd_release(fscp);
472 out:
473 #ifdef CFS_CD_DEBUG
474 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
475 #endif
476 #ifdef CFSDEBUG
477 	CFS_DEBUG(CFSDEBUG_VOPS)
478 		printf("cachefs_open: EXIT vpp %p error %d\n",
479 		    (void *)vpp, error);
480 #endif
481 	return (error);
482 }
483 
484 /* ARGSUSED */
485 static int
486 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
487 	caller_context_t *ct)
488 {
489 	int error = 0;
490 	cnode_t *cp = VTOC(vp);
491 	fscache_t *fscp = C_TO_FSCACHE(cp);
492 	int held = 0;
493 	int connected = 0;
494 	int close_cnt = 1;
495 	cachefscache_t *cachep;
496 
497 #ifdef CFSDEBUG
498 	CFS_DEBUG(CFSDEBUG_VOPS)
499 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
500 #endif
501 	/*
502 	 * Cachefs only provides pass-through support for NFSv4,
503 	 * and all vnode operations are passed through to the
504 	 * back file system. For NFSv4 pass-through to work, only
505 	 * connected operation is supported, the cnode backvp must
506 	 * exist, and cachefs optional (eg., disconnectable) flags
507 	 * are turned off. Assert these conditions to ensure that
508 	 * the backfilesystem is called for the close operation.
509 	 */
510 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
511 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
512 
513 	/*
514 	 * File could have been passed in or inherited from the global zone, so
515 	 * we don't want to flat out reject the request; we'll just leave things
516 	 * the way they are and let the backfs (NFS) deal with it.
517 	 */
518 	/* get rid of any local locks */
519 	if (CFS_ISFS_LLOCK(fscp)) {
520 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
521 	}
522 
523 	/* clean up if this is the daemon closing down */
524 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
525 	    ((ttoproc(curthread)->p_pid) != 0) &&
526 	    (vp == fscp->fs_rootvp) &&
527 	    (count == 1)) {
528 		mutex_enter(&fscp->fs_cdlock);
529 		fscp->fs_cddaemonid = 0;
530 		if (fscp->fs_dlogfile)
531 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
532 		else
533 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
534 		cv_broadcast(&fscp->fs_cdwaitcv);
535 		mutex_exit(&fscp->fs_cdlock);
536 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
537 			cachep = fscp->fs_cache;
538 			mutex_enter(&cachep->c_contentslock);
539 			ASSERT(cachep->c_rootdaemonid != 0);
540 			cachep->c_rootdaemonid = 0;
541 			mutex_exit(&cachep->c_contentslock);
542 		}
543 		return (0);
544 	}
545 
546 	for (;;) {
547 		/* get (or renew) access to the file system */
548 		if (held) {
549 			/* Won't loop with NFSv4 connected behavior */
550 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
551 			cachefs_cd_release(fscp);
552 			held = 0;
553 		}
554 		error = cachefs_cd_access(fscp, connected, 0);
555 		if (error)
556 			goto out;
557 		held = 1;
558 		connected = 0;
559 
560 		/* if not the last close */
561 		if (count > 1) {
562 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
563 				goto out;
564 			mutex_enter(&cp->c_statelock);
565 			if (cp->c_backvp) {
566 				CFS_DPRINT_BACKFS_NFSV4(fscp,
567 				    ("cachefs_close (nfsv4): cnode %p, "
568 				    "backvp %p\n", cp, cp->c_backvp));
569 				error = VOP_CLOSE(cp->c_backvp, flag, count,
570 				    offset, cr, ct);
571 				if (CFS_TIMEOUT(fscp, error)) {
572 					mutex_exit(&cp->c_statelock);
573 					cachefs_cd_release(fscp);
574 					held = 0;
575 					cachefs_cd_timedout(fscp);
576 					continue;
577 				}
578 			}
579 			mutex_exit(&cp->c_statelock);
580 			goto out;
581 		}
582 
583 		/*
584 		 * If the file is an unlinked file, then flush the lookup
585 		 * cache so that inactive will be called if this is
586 		 * the last reference.  It will invalidate all of the
587 		 * cached pages, without writing them out.  Writing them
588 		 * out is not required because they will be written to a
589 		 * file which will be immediately removed.
590 		 */
591 		if (cp->c_unldvp != NULL) {
592 			dnlc_purge_vp(vp);
593 			mutex_enter(&cp->c_statelock);
594 			error = cp->c_error;
595 			cp->c_error = 0;
596 			mutex_exit(&cp->c_statelock);
597 			/* always call VOP_CLOSE() for back fs vnode */
598 		}
599 
600 		/* force dirty data to stable storage */
601 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
602 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
603 			/* clean the cachefs pages synchronously */
604 			error = cachefs_putpage_common(vp, (offset_t)0,
605 			    0, 0, cr);
606 			if (CFS_TIMEOUT(fscp, error)) {
607 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
608 					cachefs_cd_release(fscp);
609 					held = 0;
610 					cachefs_cd_timedout(fscp);
611 					continue;
612 				} else {
613 					connected = 1;
614 					continue;
615 				}
616 			}
617 
618 			/* if no space left in cache, wait until connected */
619 			if ((error == ENOSPC) &&
620 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
621 				connected = 1;
622 				continue;
623 			}
624 
625 			/* clear the cnode error if putpage worked */
626 			if ((error == 0) && cp->c_error) {
627 				mutex_enter(&cp->c_statelock);
628 				cp->c_error = 0;
629 				mutex_exit(&cp->c_statelock);
630 			}
631 
632 			/* if any other important error */
633 			if (cp->c_error) {
634 				/* get rid of the pages */
635 				(void) cachefs_putpage_common(vp,
636 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
637 				dnlc_purge_vp(vp);
638 			}
639 		}
640 
641 		mutex_enter(&cp->c_statelock);
642 		if (cp->c_backvp &&
643 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
644 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
645 			    offset, cr, ct);
646 			if (CFS_TIMEOUT(fscp, error)) {
647 				mutex_exit(&cp->c_statelock);
648 				cachefs_cd_release(fscp);
649 				held = 0;
650 				cachefs_cd_timedout(fscp);
651 				/* don't decrement the vnode counts again */
652 				close_cnt = 0;
653 				continue;
654 			}
655 		}
656 		mutex_exit(&cp->c_statelock);
657 		break;
658 	}
659 
660 	mutex_enter(&cp->c_statelock);
661 	if (!error)
662 		error = cp->c_error;
663 	cp->c_error = 0;
664 	mutex_exit(&cp->c_statelock);
665 
666 out:
667 	if (held)
668 		cachefs_cd_release(fscp);
669 #ifdef CFS_CD_DEBUG
670 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
671 #endif
672 
673 #ifdef CFSDEBUG
674 	CFS_DEBUG(CFSDEBUG_VOPS)
675 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
676 #endif
677 	return (error);
678 }
679 
680 /*ARGSUSED*/
681 static int
682 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
683 	caller_context_t *ct)
684 {
685 	struct cnode *cp = VTOC(vp);
686 	fscache_t *fscp = C_TO_FSCACHE(cp);
687 	register u_offset_t off;
688 	register int mapoff;
689 	register caddr_t base;
690 	int n;
691 	offset_t diff;
692 	uint_t flags = 0;
693 	int error = 0;
694 
695 #if 0
696 	if (vp->v_flag & VNOCACHE)
697 		flags = SM_INVAL;
698 #endif
699 	if (getzoneid() != GLOBAL_ZONEID)
700 		return (EPERM);
701 	if (vp->v_type != VREG)
702 		return (EISDIR);
703 
704 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
705 
706 	if (uiop->uio_resid == 0)
707 		return (0);
708 
709 
710 	if (uiop->uio_loffset < (offset_t)0)
711 		return (EINVAL);
712 
713 	/*
714 	 * Call backfilesystem to read if NFSv4, the cachefs code
715 	 * does the read from the back filesystem asynchronously
716 	 * which is not supported by pass-through functionality.
717 	 */
718 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
719 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
720 		goto out;
721 	}
722 
723 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
724 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
725 		    uiop->uio_resid, uiop->uio_fmode, ct);
726 		if (error)
727 			return (error);
728 	}
729 
730 	/*
731 	 * Sit in a loop and transfer (uiomove) the data in up to
732 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
733 	 * address space as needed and then released.
734 	 */
735 	do {
736 		/*
737 		 *	off	Offset of current MAXBSIZE chunk
738 		 *	mapoff	Offset within the current chunk
739 		 *	n	Number of bytes to move from this chunk
740 		 *	base	kernel address of mapped in chunk
741 		 */
742 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
743 		mapoff = uiop->uio_loffset & MAXBOFFSET;
744 		n = MAXBSIZE - mapoff;
745 		if (n > uiop->uio_resid)
746 			n = (uint_t)uiop->uio_resid;
747 
748 		/* perform consistency check */
749 		error = cachefs_cd_access(fscp, 0, 0);
750 		if (error)
751 			break;
752 		mutex_enter(&cp->c_statelock);
753 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
754 		diff = cp->c_size - uiop->uio_loffset;
755 		mutex_exit(&cp->c_statelock);
756 		if (CFS_TIMEOUT(fscp, error)) {
757 			cachefs_cd_release(fscp);
758 			cachefs_cd_timedout(fscp);
759 			error = 0;
760 			continue;
761 		}
762 		cachefs_cd_release(fscp);
763 
764 		if (error)
765 			break;
766 
767 		if (diff <= (offset_t)0)
768 			break;
769 		if (diff < (offset_t)n)
770 			n = diff;
771 
772 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
773 
774 		error = segmap_fault(kas.a_hat, segkmap, base, n,
775 		    F_SOFTLOCK, S_READ);
776 		if (error) {
777 			(void) segmap_release(segkmap, base, 0);
778 			if (FC_CODE(error) == FC_OBJERR)
779 				error =  FC_ERRNO(error);
780 			else
781 				error = EIO;
782 			break;
783 		}
784 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
785 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
786 		    F_SOFTUNLOCK, S_READ);
787 		if (error == 0) {
788 			/*
789 			 * if we read a whole page(s), or to eof,
790 			 *  we won't need this page(s) again soon.
791 			 */
792 			if (n + mapoff == MAXBSIZE ||
793 			    uiop->uio_loffset == cp->c_size)
794 				flags |= SM_DONTNEED;
795 		}
796 		(void) segmap_release(segkmap, base, flags);
797 	} while (error == 0 && uiop->uio_resid > 0);
798 
799 out:
800 #ifdef CFSDEBUG
801 	CFS_DEBUG(CFSDEBUG_VOPS)
802 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
803 		    uiop->uio_resid);
804 #endif
805 	return (error);
806 }
807 
808 /*
809  * cachefs_read_backfs_nfsv4
810  *
811  * Call NFSv4 back filesystem to handle the read (cachefs
812  * pass-through support for NFSv4).
813  */
814 static int
815 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
816 			caller_context_t *ct)
817 {
818 	cnode_t *cp = VTOC(vp);
819 	fscache_t *fscp = C_TO_FSCACHE(cp);
820 	vnode_t *backvp;
821 	int error;
822 
823 	/*
824 	 * For NFSv4 pass-through to work, only connected operation
825 	 * is supported, the cnode backvp must exist, and cachefs
826 	 * optional (eg., disconnectable) flags are turned off. Assert
827 	 * these conditions for the read operation.
828 	 */
829 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
830 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
831 
832 	/* Call backfs vnode op after extracting backvp */
833 	mutex_enter(&cp->c_statelock);
834 	backvp = cp->c_backvp;
835 	mutex_exit(&cp->c_statelock);
836 
837 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
838 	    "backvp %p\n", cp, backvp));
839 
840 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
841 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
842 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
843 
844 	/* Increment cache miss counter */
845 	fscp->fs_stats.st_misses++;
846 
847 	return (error);
848 }
849 
850 /*ARGSUSED*/
851 static int
852 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
853 	caller_context_t *ct)
854 {
855 	struct cnode *cp = VTOC(vp);
856 	fscache_t *fscp = C_TO_FSCACHE(cp);
857 	int error = 0;
858 	u_offset_t off;
859 	caddr_t base;
860 	uint_t bsize;
861 	uint_t flags;
862 	int n, on;
863 	rlim64_t limit = uiop->uio_llimit;
864 	ssize_t resid;
865 	offset_t offset;
866 	offset_t remainder;
867 
868 #ifdef CFSDEBUG
869 	CFS_DEBUG(CFSDEBUG_VOPS)
870 		printf(
871 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
872 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid,
873 		    cp->c_flags);
874 #endif
875 	if (getzoneid() != GLOBAL_ZONEID) {
876 		error = EPERM;
877 		goto out;
878 	}
879 	if (vp->v_type != VREG) {
880 		error = EISDIR;
881 		goto out;
882 	}
883 
884 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
885 
886 	if (uiop->uio_resid == 0) {
887 		goto out;
888 	}
889 
890 	/* Call backfilesystem to write if NFSv4 */
891 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
892 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
893 		goto out2;
894 	}
895 
896 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
897 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
898 		    uiop->uio_resid, uiop->uio_fmode, ct);
899 		if (error)
900 			goto out;
901 	}
902 
903 	if (ioflag & FAPPEND) {
904 		for (;;) {
905 			/* do consistency check to get correct file size */
906 			error = cachefs_cd_access(fscp, 0, 1);
907 			if (error)
908 				goto out;
909 			mutex_enter(&cp->c_statelock);
910 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
911 			uiop->uio_loffset = cp->c_size;
912 			mutex_exit(&cp->c_statelock);
913 			if (CFS_TIMEOUT(fscp, error)) {
914 				cachefs_cd_release(fscp);
915 				cachefs_cd_timedout(fscp);
916 				continue;
917 			}
918 			cachefs_cd_release(fscp);
919 			if (error)
920 				goto out;
921 			break;
922 		}
923 	}
924 
925 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
926 		limit = MAXOFFSET_T;
927 
928 	if (uiop->uio_loffset >= limit) {
929 		proc_t *p = ttoproc(curthread);
930 
931 		mutex_enter(&p->p_lock);
932 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
933 		    p, RCA_UNSAFE_SIGINFO);
934 		mutex_exit(&p->p_lock);
935 		error = EFBIG;
936 		goto out;
937 	}
938 	if (uiop->uio_loffset > fscp->fs_offmax) {
939 		error = EFBIG;
940 		goto out;
941 	}
942 
943 	if (limit > fscp->fs_offmax)
944 		limit = fscp->fs_offmax;
945 
946 	if (uiop->uio_loffset < (offset_t)0) {
947 		error = EINVAL;
948 		goto out;
949 	}
950 
951 	offset = uiop->uio_loffset + uiop->uio_resid;
952 	/*
953 	 * Check to make sure that the process will not exceed
954 	 * its limit on file size.  It is okay to write up to
955 	 * the limit, but not beyond.  Thus, the write which
956 	 * reaches the limit will be short and the next write
957 	 * will return an error.
958 	 */
959 	remainder = 0;
960 	if (offset > limit) {
961 		remainder = (int)(offset - (u_offset_t)limit);
962 		uiop->uio_resid = limit - uiop->uio_loffset;
963 		if (uiop->uio_resid <= 0) {
964 			proc_t *p = ttoproc(curthread);
965 
966 			uiop->uio_resid += remainder;
967 			mutex_enter(&p->p_lock);
968 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
969 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
970 			mutex_exit(&p->p_lock);
971 			error = EFBIG;
972 			goto out;
973 		}
974 	}
975 
976 	resid = uiop->uio_resid;
977 	offset = uiop->uio_loffset;
978 	bsize = vp->v_vfsp->vfs_bsize;
979 
980 	/* loop around and do the write in MAXBSIZE chunks */
981 	do {
982 		/* mapping offset */
983 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
984 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
985 		n = MAXBSIZE - on;
986 		if (n > uiop->uio_resid)
987 			n = (int)uiop->uio_resid;
988 
989 		/*
990 		 * Touch the page and fault it in if it is not in
991 		 * core before segmap_getmapflt can lock it. This
992 		 * is to avoid the deadlock if the buffer is mapped
993 		 * to the same file through mmap which we want to
994 		 * write to.
995 		 */
996 		uio_prefaultpages((long)n, uiop);
997 
998 		base = segmap_getmap(segkmap, vp, off);
999 		error = cachefs_writepage(vp, (base + on), n, uiop);
1000 		if (error == 0) {
1001 			flags = 0;
1002 			/*
1003 			 * Have written a whole block.Start an
1004 			 * asynchronous write and mark the buffer to
1005 			 * indicate that it won't be needed again
1006 			 * soon.
1007 			 */
1008 			if (n + on == bsize) {
1009 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1010 			}
1011 #if 0
1012 			/* XXX need to understand this */
1013 			if ((ioflag & (FSYNC|FDSYNC)) ||
1014 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1015 				flags &= ~SM_ASYNC;
1016 				flags |= SM_WRITE;
1017 			}
1018 #else
1019 			if (ioflag & (FSYNC|FDSYNC)) {
1020 				flags &= ~SM_ASYNC;
1021 				flags |= SM_WRITE;
1022 			}
1023 #endif
1024 			error = segmap_release(segkmap, base, flags);
1025 		} else {
1026 			(void) segmap_release(segkmap, base, 0);
1027 		}
1028 	} while (error == 0 && uiop->uio_resid > 0);
1029 
1030 out:
1031 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1032 		uiop->uio_resid = resid;
1033 		uiop->uio_loffset = offset;
1034 	} else
1035 		uiop->uio_resid += remainder;
1036 
1037 out2:
1038 #ifdef CFSDEBUG
1039 	CFS_DEBUG(CFSDEBUG_VOPS)
1040 		printf("cachefs_write: EXIT error %d\n", error);
1041 #endif
1042 	return (error);
1043 }
1044 
1045 /*
1046  * cachefs_write_backfs_nfsv4
1047  *
1048  * Call NFSv4 back filesystem to handle the write (cachefs
1049  * pass-through support for NFSv4).
1050  */
1051 static int
1052 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1053 			caller_context_t *ct)
1054 {
1055 	cnode_t *cp = VTOC(vp);
1056 	fscache_t *fscp = C_TO_FSCACHE(cp);
1057 	vnode_t *backvp;
1058 	int error;
1059 
1060 	/*
1061 	 * For NFSv4 pass-through to work, only connected operation
1062 	 * is supported, the cnode backvp must exist, and cachefs
1063 	 * optional (eg., disconnectable) flags are turned off. Assert
1064 	 * these conditions for the read operation.
1065 	 */
1066 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1067 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1068 
1069 	/* Call backfs vnode op after extracting the backvp */
1070 	mutex_enter(&cp->c_statelock);
1071 	backvp = cp->c_backvp;
1072 	mutex_exit(&cp->c_statelock);
1073 
1074 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1075 	    "backvp %p\n", cp, backvp));
1076 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1077 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1078 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1079 
1080 	return (error);
1081 }
1082 
1083 /*
1084  * see if we've charged ourselves for frontfile data at
1085  * the given offset.  If not, allocate a block for it now.
1086  */
1087 static int
1088 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1089 {
1090 	u_offset_t blockoff;
1091 	int error;
1092 	int inc;
1093 
1094 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1095 	/*LINTED*/
1096 	ASSERT(PAGESIZE <= MAXBSIZE);
1097 
1098 	error = 0;
1099 	blockoff = offset & (offset_t)MAXBMASK;
1100 
1101 	/* get the front file if necessary so allocblocks works */
1102 	if ((cp->c_frontvp == NULL) &&
1103 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1104 		(void) cachefs_getfrontfile(cp);
1105 	}
1106 	if (cp->c_flags & CN_NOCACHE)
1107 		return (1);
1108 
1109 	if (cachefs_check_allocmap(cp, blockoff))
1110 		return (0);
1111 
1112 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1113 		if (cachefs_check_allocmap(cp, blockoff+inc))
1114 			return (0);
1115 
1116 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1117 	    cp->c_metadata.md_rltype);
1118 	if (error == 0) {
1119 		cp->c_metadata.md_frontblks++;
1120 		cp->c_flags |= CN_UPDATED;
1121 	}
1122 	return (error);
1123 }
1124 
1125 /*
1126  * Called only by cachefs_write to write 1 page or less of data.
1127  *	base   - base address kernel addr space
1128  *	tcount - Total bytes to move - < MAXBSIZE
1129  */
1130 static int
1131 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1132 {
1133 	struct cnode *cp =  VTOC(vp);
1134 	fscache_t *fscp = C_TO_FSCACHE(cp);
1135 	register int n;
1136 	register u_offset_t offset;
1137 	int error = 0, terror;
1138 	extern struct as kas;
1139 	u_offset_t lastpage_off;
1140 	int pagecreate = 0;
1141 	int newpage;
1142 
1143 #ifdef CFSDEBUG
1144 	CFS_DEBUG(CFSDEBUG_VOPS)
1145 		printf(
1146 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1147 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1148 #endif
1149 
1150 	/*
1151 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1152 	 * uiomove() because page faults may cause the cache to be invalidated
1153 	 * out from under us.
1154 	 */
1155 	do {
1156 		offset = uiop->uio_loffset;
1157 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1158 
1159 		/*
1160 		 * If not connected then need to make sure we have space
1161 		 * to perform the write.  We could make this check
1162 		 * a little tighter by only doing it if we are growing the file.
1163 		 */
1164 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1165 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1166 			    cp->c_metadata.md_rltype);
1167 			if (error)
1168 				break;
1169 			cachefs_freeblocks(fscp->fs_cache, 1,
1170 			    cp->c_metadata.md_rltype);
1171 		}
1172 
1173 		/*
1174 		 * n is the number of bytes required to satisfy the request
1175 		 * or the number of bytes to fill out the page.
1176 		 */
1177 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1178 		if (n > tcount)
1179 			n = tcount;
1180 
1181 		/*
1182 		 * The number of bytes of data in the last page can not
1183 		 * be accurately be determined while page is being
1184 		 * uiomove'd to and the size of the file being updated.
1185 		 * Thus, inform threads which need to know accurately
1186 		 * how much data is in the last page of the file.  They
1187 		 * will not do the i/o immediately, but will arrange for
1188 		 * the i/o to happen later when this modify operation
1189 		 * will have finished.
1190 		 *
1191 		 * in similar NFS code, this is done right before the
1192 		 * uiomove(), which is best.  but here in cachefs, we
1193 		 * have two uiomove()s, so we must do it here.
1194 		 */
1195 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1196 		mutex_enter(&cp->c_statelock);
1197 		cp->c_flags |= CN_CMODINPROG;
1198 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1199 		mutex_exit(&cp->c_statelock);
1200 
1201 		/*
1202 		 * Check to see if we can skip reading in the page
1203 		 * and just allocate the memory.  We can do this
1204 		 * if we are going to rewrite the entire mapping
1205 		 * or if we are going to write to or beyond the current
1206 		 * end of file from the beginning of the mapping.
1207 		 */
1208 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1209 		    ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1210 		    ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1211 		    ((offset + n) >= cp->c_size))) {
1212 			pagecreate = 1;
1213 
1214 			/*
1215 			 * segmap_pagecreate() returns 1 if it calls
1216 			 * page_create_va() to allocate any pages.
1217 			 */
1218 			newpage = segmap_pagecreate(segkmap,
1219 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1220 			    PAGESIZE, 0);
1221 			/* do not zero page if we are overwriting all of it */
1222 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1223 			    (n == PAGESIZE))) {
1224 				(void) kzero((void *)
1225 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1226 				    PAGESIZE);
1227 			}
1228 			error = uiomove(base, n, UIO_WRITE, uiop);
1229 
1230 			/*
1231 			 * Unlock the page allocated by page_create_va()
1232 			 * in segmap_pagecreate()
1233 			 */
1234 			if (newpage)
1235 				segmap_pageunlock(segkmap,
1236 				    (caddr_t)((uintptr_t)base &
1237 				    (uintptr_t)PAGEMASK),
1238 				    PAGESIZE, S_WRITE);
1239 		} else {
1240 			/*
1241 			 * KLUDGE ! Use segmap_fault instead of faulting and
1242 			 * using as_fault() to avoid a recursive readers lock
1243 			 * on kas.
1244 			 */
1245 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1246 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1247 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1248 			if (error) {
1249 				if (FC_CODE(error) == FC_OBJERR)
1250 					error =  FC_ERRNO(error);
1251 				else
1252 					error = EIO;
1253 				break;
1254 			}
1255 			error = uiomove(base, n, UIO_WRITE, uiop);
1256 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1257 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1258 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1259 		}
1260 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1261 		base += n;
1262 		tcount -= n;
1263 
1264 		/* get access to the file system */
1265 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1266 			error = terror;
1267 			break;
1268 		}
1269 
1270 		/*
1271 		 * cp->c_attr.va_size is the maximum number of
1272 		 * bytes known to be in the file.
1273 		 * Make sure it is at least as high as the
1274 		 * last byte we just wrote into the buffer.
1275 		 */
1276 		mutex_enter(&cp->c_statelock);
1277 		if (cp->c_size < uiop->uio_loffset) {
1278 			cp->c_size = uiop->uio_loffset;
1279 		}
1280 		if (cp->c_size != cp->c_attr.va_size) {
1281 			cp->c_attr.va_size = cp->c_size;
1282 			cp->c_flags |= CN_UPDATED;
1283 		}
1284 		/* c_size is now correct, so we can clear modinprog */
1285 		cp->c_flags &= ~CN_CMODINPROG;
1286 		if (error == 0) {
1287 			cp->c_flags |= CDIRTY;
1288 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1289 				/*
1290 				 * if we're not in NOCACHE mode
1291 				 * (i.e., single-writer), we update the
1292 				 * allocmap here rather than waiting until
1293 				 * cachefspush is called.  This prevents
1294 				 * getpage from clustering up pages from
1295 				 * the backfile and stomping over the changes
1296 				 * we make here.
1297 				 */
1298 				if (cachefs_charge_page(cp, offset) == 0) {
1299 					cachefs_update_allocmap(cp,
1300 					    offset & (offset_t)PAGEMASK,
1301 					    (size_t)PAGESIZE);
1302 				}
1303 
1304 				/* else we ran out of space */
1305 				else {
1306 					/* nocache file if connected */
1307 					if (fscp->fs_cdconnected ==
1308 					    CFS_CD_CONNECTED)
1309 						cachefs_nocache(cp);
1310 					/*
1311 					 * If disconnected then cannot
1312 					 * nocache the file.  Let it have
1313 					 * the space.
1314 					 */
1315 					else {
1316 						cp->c_metadata.md_frontblks++;
1317 						cp->c_flags |= CN_UPDATED;
1318 						cachefs_update_allocmap(cp,
1319 						    offset & (offset_t)PAGEMASK,
1320 						    (size_t)PAGESIZE);
1321 					}
1322 				}
1323 			}
1324 		}
1325 		mutex_exit(&cp->c_statelock);
1326 		cachefs_cd_release(fscp);
1327 	} while (tcount > 0 && error == 0);
1328 
1329 	if (cp->c_flags & CN_CMODINPROG) {
1330 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1331 		mutex_enter(&cp->c_statelock);
1332 		cp->c_flags &= ~CN_CMODINPROG;
1333 		mutex_exit(&cp->c_statelock);
1334 	}
1335 
1336 #ifdef CFS_CD_DEBUG
1337 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1338 #endif
1339 
1340 #ifdef CFSDEBUG
1341 	CFS_DEBUG(CFSDEBUG_VOPS)
1342 		printf("cachefs_writepage: EXIT error %d\n", error);
1343 #endif
1344 
1345 	return (error);
1346 }
1347 
1348 /*
1349  * Pushes out pages to the back and/or front file system.
1350  */
1351 static int
1352 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1353     int flags, cred_t *cr)
1354 {
1355 	struct cnode *cp = VTOC(vp);
1356 	struct buf *bp;
1357 	int error;
1358 	fscache_t *fscp = C_TO_FSCACHE(cp);
1359 	u_offset_t iooff;
1360 	size_t iolen;
1361 	u_offset_t lbn;
1362 	u_offset_t lbn_off;
1363 	uint_t bsize;
1364 
1365 	ASSERT((flags & B_ASYNC) == 0);
1366 	ASSERT(!vn_is_readonly(vp));
1367 	ASSERT(pp != NULL);
1368 	ASSERT(cr != NULL);
1369 
1370 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1371 	lbn = pp->p_offset / bsize;
1372 	lbn_off = lbn * bsize;
1373 
1374 	/*
1375 	 * Find a kluster that fits in one block, or in
1376 	 * one page if pages are bigger than blocks.  If
1377 	 * there is less file space allocated than a whole
1378 	 * page, we'll shorten the i/o request below.
1379 	 */
1380 
1381 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1382 	    roundup(bsize, PAGESIZE), flags);
1383 
1384 	/*
1385 	 * The CN_CMODINPROG flag makes sure that we use a correct
1386 	 * value of c_size, below.  CN_CMODINPROG is set in
1387 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1388 	 * indicates that a uiomove() is in progress and the c_size
1389 	 * has not been made consistent with the new size of the
1390 	 * file. When the uiomove() completes the c_size is updated
1391 	 * and the CN_CMODINPROG flag is cleared.
1392 	 *
1393 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1394 	 * and cachefs_push_connected see a consistent value of
1395 	 * c_size.  Without this handshaking, it is possible that
1396 	 * these routines will pick up the old value of c_size before
1397 	 * the uiomove() in cachefs_writepage() completes.  This will
1398 	 * result in the vn_rdwr() being too small, and data loss.
1399 	 *
1400 	 * More precisely, there is a window between the time the
1401 	 * uiomove() completes and the time the c_size is updated. If
1402 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1403 	 * page will be picked up, because it is dirty; it will be
1404 	 * unlocked, unless it was pagecreate'd. When the page is
1405 	 * picked up as dirty, the dirty bit is reset
1406 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1407 	 * checked.  This will still be the old size.  Therefore, the
1408 	 * page will not be written out to the correct length, and the
1409 	 * page will be clean, so the data may disappear.
1410 	 */
1411 	if (cp->c_flags & CN_CMODINPROG) {
1412 		mutex_enter(&cp->c_statelock);
1413 		if ((cp->c_flags & CN_CMODINPROG) &&
1414 		    cp->c_modaddr + MAXBSIZE > iooff &&
1415 		    cp->c_modaddr < iooff + iolen) {
1416 			page_t *plist;
1417 
1418 			/*
1419 			 * A write is in progress for this region of
1420 			 * the file.  If we did not detect
1421 			 * CN_CMODINPROG here then this path through
1422 			 * cachefs_push_connected() would eventually
1423 			 * do the vn_rdwr() and may not write out all
1424 			 * of the data in the pages.  We end up losing
1425 			 * data. So we decide to set the modified bit
1426 			 * on each page in the page list and mark the
1427 			 * cnode with CDIRTY.  This push will be
1428 			 * restarted at some later time.
1429 			 */
1430 
1431 			plist = pp;
1432 			while (plist != NULL) {
1433 				pp = plist;
1434 				page_sub(&plist, pp);
1435 				hat_setmod(pp);
1436 				page_io_unlock(pp);
1437 				page_unlock(pp);
1438 			}
1439 			cp->c_flags |= CDIRTY;
1440 			mutex_exit(&cp->c_statelock);
1441 			if (offp)
1442 				*offp = iooff;
1443 			if (lenp)
1444 				*lenp = iolen;
1445 			return (0);
1446 		}
1447 		mutex_exit(&cp->c_statelock);
1448 	}
1449 
1450 	/*
1451 	 * Set the pages up for pageout.
1452 	 */
1453 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1454 	if (bp == NULL) {
1455 
1456 		/*
1457 		 * currently, there is no way for pageio_setup() to
1458 		 * return NULL, since it uses its own scheme for
1459 		 * kmem_alloc()ing that shouldn't return NULL, and
1460 		 * since pageio_setup() itself dereferences the thing
1461 		 * it's about to return.  still, we need to be ready
1462 		 * in case this ever does start happening.
1463 		 */
1464 
1465 		error = ENOMEM;
1466 		goto writedone;
1467 	}
1468 	/*
1469 	 * pageio_setup should have set b_addr to 0.  This
1470 	 * is correct since we want to do I/O on a page
1471 	 * boundary.  bp_mapin will use this addr to calculate
1472 	 * an offset, and then set b_addr to the kernel virtual
1473 	 * address it allocated for us.
1474 	 */
1475 	bp->b_edev = 0;
1476 	bp->b_dev = 0;
1477 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1478 	bp_mapin(bp);
1479 
1480 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1481 	if (iolen > bp->b_bcount)
1482 		iolen  = bp->b_bcount;
1483 
1484 	/* if connected */
1485 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1486 		/* write to the back file first */
1487 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1488 
1489 		/* write to the front file if allowed */
1490 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1491 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1492 			/* try to write to the front file */
1493 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1494 		}
1495 	}
1496 
1497 	/* else if disconnected */
1498 	else {
1499 		/* try to write to the front file */
1500 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1501 	}
1502 
1503 	bp_mapout(bp);
1504 	pageio_done(bp);
1505 
1506 writedone:
1507 
1508 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1509 	if (offp)
1510 		*offp = iooff;
1511 	if (lenp)
1512 		*lenp = iolen;
1513 
1514 	/* XXX ask bob mastors how to fix this someday */
1515 	mutex_enter(&cp->c_statelock);
1516 	if (error) {
1517 		if (error == ENOSPC) {
1518 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1519 			    CFS_ISFS_SOFT(fscp)) {
1520 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1521 				cp->c_error = error;
1522 			}
1523 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1524 		    (error != EINTR)) {
1525 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1526 			cp->c_error = error;
1527 		}
1528 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1529 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1530 	}
1531 	mutex_exit(&cp->c_statelock);
1532 
1533 	return (error);
1534 }
1535 
1536 /*
1537  * Pushes out pages to the back file system.
1538  */
1539 static int
1540 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1541     u_offset_t iooff, cred_t *cr)
1542 {
1543 	struct cnode *cp = VTOC(vp);
1544 	int error = 0;
1545 	int mode = 0;
1546 	fscache_t *fscp = C_TO_FSCACHE(cp);
1547 	ssize_t resid;
1548 	vnode_t *backvp;
1549 
1550 	/* get the back file if necessary */
1551 	mutex_enter(&cp->c_statelock);
1552 	if (cp->c_backvp == NULL) {
1553 		error = cachefs_getbackvp(fscp, cp);
1554 		if (error) {
1555 			mutex_exit(&cp->c_statelock);
1556 			goto out;
1557 		}
1558 	}
1559 	backvp = cp->c_backvp;
1560 	VN_HOLD(backvp);
1561 	mutex_exit(&cp->c_statelock);
1562 
1563 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1564 		mode = FSYNC;
1565 
1566 	/* write to the back file */
1567 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1568 	    iolen, iooff, UIO_SYSSPACE, mode,
1569 	    RLIM64_INFINITY, cr, &resid);
1570 	if (error) {
1571 #ifdef CFSDEBUG
1572 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1573 			printf("cachefspush: error %d cr %p\n",
1574 			    error, (void *)cr);
1575 #endif
1576 		bp->b_flags |= B_ERROR;
1577 	}
1578 	VN_RELE(backvp);
1579 out:
1580 	return (error);
1581 }
1582 
1583 /*
1584  * Pushes out pages to the front file system.
1585  * Called for both connected and disconnected states.
1586  */
1587 static int
1588 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1589     u_offset_t iooff, cred_t *cr)
1590 {
1591 	struct cnode *cp = VTOC(vp);
1592 	fscache_t *fscp = C_TO_FSCACHE(cp);
1593 	int error = 0;
1594 	ssize_t resid;
1595 	u_offset_t popoff;
1596 	off_t commit = 0;
1597 	uint_t seq;
1598 	enum cachefs_rl_type type;
1599 	vnode_t *frontvp = NULL;
1600 
1601 	mutex_enter(&cp->c_statelock);
1602 
1603 	if (!CFS_ISFS_NONSHARED(fscp)) {
1604 		error = ETIMEDOUT;
1605 		goto out;
1606 	}
1607 
1608 	/* get the front file if necessary */
1609 	if ((cp->c_frontvp == NULL) &&
1610 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1611 		(void) cachefs_getfrontfile(cp);
1612 	}
1613 	if (cp->c_flags & CN_NOCACHE) {
1614 		error = ETIMEDOUT;
1615 		goto out;
1616 	}
1617 
1618 	/* if disconnected, needs to be populated and have good attributes */
1619 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1620 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1621 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1622 		error = ETIMEDOUT;
1623 		goto out;
1624 	}
1625 
1626 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1627 		if (cachefs_charge_page(cp, popoff)) {
1628 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1629 				cachefs_nocache(cp);
1630 				goto out;
1631 			} else {
1632 				error = ENOSPC;
1633 				goto out;
1634 			}
1635 		}
1636 	}
1637 
1638 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1639 		/* log the first putpage to a file */
1640 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1641 			/* uses open's creds if we have them */
1642 			if (cp->c_cred)
1643 				cr = cp->c_cred;
1644 
1645 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1646 				error = cachefs_dlog_cidmap(fscp);
1647 				if (error) {
1648 					error = ENOSPC;
1649 					goto out;
1650 				}
1651 				cp->c_metadata.md_flags |= MD_MAPPING;
1652 			}
1653 
1654 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1655 			if (commit == 0) {
1656 				/* out of space */
1657 				error = ENOSPC;
1658 				goto out;
1659 			}
1660 
1661 			cp->c_metadata.md_seq = seq;
1662 			type = cp->c_metadata.md_rltype;
1663 			cachefs_modified(cp);
1664 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1665 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1666 			cp->c_flags |= CN_UPDATED;
1667 		}
1668 
1669 		/* subsequent putpages just get a new sequence number */
1670 		else {
1671 			/* but only if it matters */
1672 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1673 				seq = cachefs_dlog_seqnext(fscp);
1674 				if (seq == 0) {
1675 					error = ENOSPC;
1676 					goto out;
1677 				}
1678 				cp->c_metadata.md_seq = seq;
1679 				cp->c_flags |= CN_UPDATED;
1680 				/* XXX maybe should do write_metadata here */
1681 			}
1682 		}
1683 	}
1684 
1685 	frontvp = cp->c_frontvp;
1686 	VN_HOLD(frontvp);
1687 	mutex_exit(&cp->c_statelock);
1688 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1689 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1690 	    RLIM64_INFINITY, kcred, &resid);
1691 	mutex_enter(&cp->c_statelock);
1692 	VN_RELE(frontvp);
1693 	frontvp = NULL;
1694 	if (error) {
1695 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1696 			cachefs_nocache(cp);
1697 			error = 0;
1698 			goto out;
1699 		} else {
1700 			goto out;
1701 		}
1702 	}
1703 
1704 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1705 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1706 	    CN_POPULATION_PENDING);
1707 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1708 		gethrestime(&cp->c_metadata.md_localmtime);
1709 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1710 	}
1711 
1712 out:
1713 	if (commit) {
1714 		/* commit the log record */
1715 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1716 		if (cachefs_dlog_commit(fscp, commit, error)) {
1717 			/*EMPTY*/
1718 			/* XXX fix on panic */
1719 		}
1720 	}
1721 
1722 	if (error && commit) {
1723 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1724 		cachefs_rlent_moveto(fscp->fs_cache, type,
1725 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1726 		cp->c_metadata.md_rltype = type;
1727 		cp->c_flags |= CN_UPDATED;
1728 	}
1729 	mutex_exit(&cp->c_statelock);
1730 	return (error);
1731 }
1732 
1733 /*ARGSUSED*/
1734 static int
1735 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1736     caller_context_t *ct)
1737 {
1738 	return (ENOSYS); /* should we panic if we get here? */
1739 }
1740 
1741 /*ARGSUSED*/
1742 static int
1743 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1744 	int *rvalp, caller_context_t *ct)
1745 {
1746 	int error;
1747 	struct cnode *cp = VTOC(vp);
1748 	struct fscache *fscp = C_TO_FSCACHE(cp);
1749 	struct cachefscache *cachep;
1750 	extern kmutex_t cachefs_cachelock;
1751 	extern cachefscache_t *cachefs_cachelist;
1752 	cachefsio_pack_t *packp;
1753 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1754 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1755 	void *dinp, *doutp;
1756 	int (*dcmd_routine)(vnode_t *, void *, void *);
1757 
1758 	if (getzoneid() != GLOBAL_ZONEID)
1759 		return (EPERM);
1760 
1761 	/*
1762 	 * Cachefs only provides pass-through support for NFSv4,
1763 	 * and all vnode operations are passed through to the
1764 	 * back file system. For NFSv4 pass-through to work, only
1765 	 * connected operation is supported, the cnode backvp must
1766 	 * exist, and cachefs optional (eg., disconnectable) flags
1767 	 * are turned off. Assert these conditions which ensure
1768 	 * that only a subset of the ioctls are "truly supported"
1769 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1770 	 * The packing operations are meaningless since there is
1771 	 * no caching for NFSv4, and the called functions silently
1772 	 * return if the backfilesystem is NFSv4. The daemon
1773 	 * commands except for those above are essentially used
1774 	 * for disconnectable operation support (including log
1775 	 * rolling), so in each called function, we assert that
1776 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1777 	 * are from "cfsfstype" which is not a documented
1778 	 * command. However, the command is visible in
1779 	 * /usr/lib/fs/cachefs so the commands are simply let
1780 	 * through (don't seem to impact pass-through functionality).
1781 	 */
1782 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1783 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1784 
1785 	switch (cmd) {
1786 	case CACHEFSIO_PACK:
1787 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1788 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1789 		if (!error)
1790 			error = cachefs_pack(vp, packp->p_name, cred);
1791 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1792 		break;
1793 
1794 	case CACHEFSIO_UNPACK:
1795 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1796 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1797 		if (!error)
1798 			error = cachefs_unpack(vp, packp->p_name, cred);
1799 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1800 		break;
1801 
1802 	case CACHEFSIO_PACKINFO:
1803 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1804 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1805 		if (!error)
1806 			error = cachefs_packinfo(vp, packp->p_name,
1807 			    &packp->p_status, cred);
1808 		if (!error)
1809 			error = xcopyout(packp, (void *)arg,
1810 			    sizeof (cachefsio_pack_t));
1811 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1812 		break;
1813 
1814 	case CACHEFSIO_UNPACKALL:
1815 		error = cachefs_unpackall(vp);
1816 		break;
1817 
1818 	case CACHEFSIO_DCMD:
1819 		/*
1820 		 * This is a private interface between the cachefsd and
1821 		 * this file system.
1822 		 */
1823 
1824 		/* must be root to use these commands */
1825 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1826 			return (EPERM);
1827 
1828 		/* get the command packet */
1829 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1830 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1831 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1832 		if (error)
1833 			return (error);
1834 
1835 		/* copy in the data for the operation */
1836 		dinp = NULL;
1837 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1838 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1839 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1840 			    inlen);
1841 			if (error)
1842 				return (error);
1843 		}
1844 
1845 		/* allocate space for the result */
1846 		doutp = NULL;
1847 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1848 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1849 
1850 		/*
1851 		 * Assert NFSv4 only allows the daemonid and getstats
1852 		 * daemon requests
1853 		 */
1854 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1855 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1856 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1857 
1858 		/* get the routine to execute */
1859 		dcmd_routine = NULL;
1860 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1861 		case CFSDCMD_DAEMONID:
1862 			dcmd_routine = cachefs_io_daemonid;
1863 			break;
1864 		case CFSDCMD_STATEGET:
1865 			dcmd_routine = cachefs_io_stateget;
1866 			break;
1867 		case CFSDCMD_STATESET:
1868 			dcmd_routine = cachefs_io_stateset;
1869 			break;
1870 		case CFSDCMD_XWAIT:
1871 			dcmd_routine = cachefs_io_xwait;
1872 			break;
1873 		case CFSDCMD_EXISTS:
1874 			dcmd_routine = cachefs_io_exists;
1875 			break;
1876 		case CFSDCMD_LOSTFOUND:
1877 			dcmd_routine = cachefs_io_lostfound;
1878 			break;
1879 		case CFSDCMD_GETINFO:
1880 			dcmd_routine = cachefs_io_getinfo;
1881 			break;
1882 		case CFSDCMD_CIDTOFID:
1883 			dcmd_routine = cachefs_io_cidtofid;
1884 			break;
1885 		case CFSDCMD_GETATTRFID:
1886 			dcmd_routine = cachefs_io_getattrfid;
1887 			break;
1888 		case CFSDCMD_GETATTRNAME:
1889 			dcmd_routine = cachefs_io_getattrname;
1890 			break;
1891 		case CFSDCMD_GETSTATS:
1892 			dcmd_routine = cachefs_io_getstats;
1893 			break;
1894 		case CFSDCMD_ROOTFID:
1895 			dcmd_routine = cachefs_io_rootfid;
1896 			break;
1897 		case CFSDCMD_CREATE:
1898 			dcmd_routine = cachefs_io_create;
1899 			break;
1900 		case CFSDCMD_REMOVE:
1901 			dcmd_routine = cachefs_io_remove;
1902 			break;
1903 		case CFSDCMD_LINK:
1904 			dcmd_routine = cachefs_io_link;
1905 			break;
1906 		case CFSDCMD_RENAME:
1907 			dcmd_routine = cachefs_io_rename;
1908 			break;
1909 		case CFSDCMD_MKDIR:
1910 			dcmd_routine = cachefs_io_mkdir;
1911 			break;
1912 		case CFSDCMD_RMDIR:
1913 			dcmd_routine = cachefs_io_rmdir;
1914 			break;
1915 		case CFSDCMD_SYMLINK:
1916 			dcmd_routine = cachefs_io_symlink;
1917 			break;
1918 		case CFSDCMD_SETATTR:
1919 			dcmd_routine = cachefs_io_setattr;
1920 			break;
1921 		case CFSDCMD_SETSECATTR:
1922 			dcmd_routine = cachefs_io_setsecattr;
1923 			break;
1924 		case CFSDCMD_PUSHBACK:
1925 			dcmd_routine = cachefs_io_pushback;
1926 			break;
1927 		default:
1928 			error = ENOTTY;
1929 			break;
1930 		}
1931 
1932 		/* execute the routine */
1933 		if (dcmd_routine)
1934 			error = (*dcmd_routine)(vp, dinp, doutp);
1935 
1936 		/* copy out the result */
1937 		if ((error == 0) && doutp)
1938 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1939 			    outlen);
1940 
1941 		/* free allocated memory */
1942 		if (dinp)
1943 			cachefs_kmem_free(dinp, inlen);
1944 		if (doutp)
1945 			cachefs_kmem_free(doutp, outlen);
1946 
1947 		break;
1948 
1949 	case _FIOCOD:
1950 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1951 			error = EPERM;
1952 			break;
1953 		}
1954 
1955 		error = EBUSY;
1956 		if (arg) {
1957 			/* non-zero arg means do all filesystems */
1958 			mutex_enter(&cachefs_cachelock);
1959 			for (cachep = cachefs_cachelist; cachep != NULL;
1960 			    cachep = cachep->c_next) {
1961 				mutex_enter(&cachep->c_fslistlock);
1962 				for (fscp = cachep->c_fslist;
1963 				    fscp != NULL;
1964 				    fscp = fscp->fs_next) {
1965 					if (CFS_ISFS_CODCONST(fscp)) {
1966 						gethrestime(&fscp->fs_cod_time);
1967 						error = 0;
1968 					}
1969 				}
1970 				mutex_exit(&cachep->c_fslistlock);
1971 			}
1972 			mutex_exit(&cachefs_cachelock);
1973 		} else {
1974 			if (CFS_ISFS_CODCONST(fscp)) {
1975 				gethrestime(&fscp->fs_cod_time);
1976 				error = 0;
1977 			}
1978 		}
1979 		break;
1980 
1981 	case _FIOSTOPCACHE:
1982 		error = cachefs_stop_cache(cp);
1983 		break;
1984 
1985 	default:
1986 		error = ENOTTY;
1987 		break;
1988 	}
1989 
1990 	/* return the result */
1991 	return (error);
1992 }
1993 
1994 ino64_t
1995 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1996 {
1997 	ino64_t new;
1998 
1999 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
2000 
2001 	for (;;) {
2002 		fscp->fs_info.fi_localfileno++;
2003 		if (fscp->fs_info.fi_localfileno == 0)
2004 			fscp->fs_info.fi_localfileno = 3;
2005 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
2006 
2007 		new = fscp->fs_info.fi_localfileno;
2008 		if (! cachefs_fileno_inuse(fscp, new))
2009 			break;
2010 	}
2011 
2012 	cachefs_inum_register(fscp, old, new);
2013 	cachefs_inum_register(fscp, new, 0);
2014 	return (new);
2015 }
2016 
2017 /*ARGSUSED*/
2018 static int
2019 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2020 	caller_context_t *ct)
2021 {
2022 	struct cnode *cp = VTOC(vp);
2023 	fscache_t *fscp = C_TO_FSCACHE(cp);
2024 	int error = 0;
2025 	int held = 0;
2026 	int connected = 0;
2027 
2028 #ifdef CFSDEBUG
2029 	CFS_DEBUG(CFSDEBUG_VOPS)
2030 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2031 #endif
2032 
2033 	if (getzoneid() != GLOBAL_ZONEID)
2034 		return (EPERM);
2035 
2036 	/* Call backfilesystem getattr if NFSv4 */
2037 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2038 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2039 		goto out;
2040 	}
2041 
2042 	/*
2043 	 * If it has been specified that the return value will
2044 	 * just be used as a hint, and we are only being asked
2045 	 * for size, fsid or rdevid, then return the client's
2046 	 * notion of these values without checking to make sure
2047 	 * that the attribute cache is up to date.
2048 	 * The whole point is to avoid an over the wire GETATTR
2049 	 * call.
2050 	 */
2051 	if (flags & ATTR_HINT) {
2052 		if (vap->va_mask ==
2053 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2054 			if (vap->va_mask | AT_SIZE)
2055 				vap->va_size = cp->c_size;
2056 			/*
2057 			 * Return the FSID of the cachefs filesystem,
2058 			 * not the back filesystem
2059 			 */
2060 			if (vap->va_mask | AT_FSID)
2061 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2062 			if (vap->va_mask | AT_RDEV)
2063 				vap->va_rdev = cp->c_attr.va_rdev;
2064 			return (0);
2065 		}
2066 	}
2067 
2068 	/*
2069 	 * Only need to flush pages if asking for the mtime
2070 	 * and if there any dirty pages.
2071 	 */
2072 	if (vap->va_mask & AT_MTIME) {
2073 		/*EMPTY*/
2074 #if 0
2075 		/*
2076 		 * XXX bob: stolen from nfs code, need to do something similar
2077 		 */
2078 		rp = VTOR(vp);
2079 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2080 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2081 #endif
2082 	}
2083 
2084 	for (;;) {
2085 		/* get (or renew) access to the file system */
2086 		if (held) {
2087 			cachefs_cd_release(fscp);
2088 			held = 0;
2089 		}
2090 		error = cachefs_cd_access(fscp, connected, 0);
2091 		if (error)
2092 			goto out;
2093 		held = 1;
2094 
2095 		/*
2096 		 * If it has been specified that the return value will
2097 		 * just be used as a hint, and we are only being asked
2098 		 * for size, fsid or rdevid, then return the client's
2099 		 * notion of these values without checking to make sure
2100 		 * that the attribute cache is up to date.
2101 		 * The whole point is to avoid an over the wire GETATTR
2102 		 * call.
2103 		 */
2104 		if (flags & ATTR_HINT) {
2105 			if (vap->va_mask ==
2106 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2107 				if (vap->va_mask | AT_SIZE)
2108 					vap->va_size = cp->c_size;
2109 				/*
2110 				 * Return the FSID of the cachefs filesystem,
2111 				 * not the back filesystem
2112 				 */
2113 				if (vap->va_mask | AT_FSID)
2114 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2115 				if (vap->va_mask | AT_RDEV)
2116 					vap->va_rdev = cp->c_attr.va_rdev;
2117 				goto out;
2118 			}
2119 		}
2120 
2121 		mutex_enter(&cp->c_statelock);
2122 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2123 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2124 			mutex_exit(&cp->c_statelock);
2125 			connected = 1;
2126 			continue;
2127 		}
2128 
2129 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2130 		if (CFS_TIMEOUT(fscp, error)) {
2131 			mutex_exit(&cp->c_statelock);
2132 			cachefs_cd_release(fscp);
2133 			held = 0;
2134 			cachefs_cd_timedout(fscp);
2135 			continue;
2136 		}
2137 		if (error) {
2138 			mutex_exit(&cp->c_statelock);
2139 			break;
2140 		}
2141 
2142 		/* check for fileno conflict */
2143 		if ((fscp->fs_inum_size > 0) &&
2144 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2145 			ino64_t fakenum;
2146 
2147 			mutex_exit(&cp->c_statelock);
2148 			mutex_enter(&fscp->fs_fslock);
2149 			fakenum = cachefs_inum_real2fake(fscp,
2150 			    cp->c_attr.va_nodeid);
2151 			if (fakenum == 0) {
2152 				fakenum = cachefs_fileno_conflict(fscp,
2153 				    cp->c_attr.va_nodeid);
2154 			}
2155 			mutex_exit(&fscp->fs_fslock);
2156 
2157 			mutex_enter(&cp->c_statelock);
2158 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2159 			cp->c_metadata.md_localfileno = fakenum;
2160 			cp->c_flags |= CN_UPDATED;
2161 		}
2162 
2163 		/* copy out the attributes */
2164 		*vap = cp->c_attr;
2165 
2166 		/*
2167 		 * return the FSID of the cachefs filesystem,
2168 		 * not the back filesystem
2169 		 */
2170 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2171 
2172 		/* return our idea of the size */
2173 		if (cp->c_size > vap->va_size)
2174 			vap->va_size = cp->c_size;
2175 
2176 		/* overwrite with our version of fileno and timestamps */
2177 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2178 		vap->va_mtime = cp->c_metadata.md_localmtime;
2179 		vap->va_ctime = cp->c_metadata.md_localctime;
2180 
2181 		mutex_exit(&cp->c_statelock);
2182 		break;
2183 	}
2184 out:
2185 	if (held)
2186 		cachefs_cd_release(fscp);
2187 #ifdef CFS_CD_DEBUG
2188 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2189 #endif
2190 
2191 #ifdef CFSDEBUG
2192 	CFS_DEBUG(CFSDEBUG_VOPS)
2193 		printf("cachefs_getattr: EXIT error = %d\n", error);
2194 #endif
2195 	return (error);
2196 }
2197 
2198 /*
2199  * cachefs_getattr_backfs_nfsv4
2200  *
2201  * Call NFSv4 back filesystem to handle the getattr (cachefs
2202  * pass-through support for NFSv4).
2203  */
2204 static int
2205 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2206     int flags, cred_t *cr, caller_context_t *ct)
2207 {
2208 	cnode_t *cp = VTOC(vp);
2209 	fscache_t *fscp = C_TO_FSCACHE(cp);
2210 	vnode_t *backvp;
2211 	int error;
2212 
2213 	/*
2214 	 * For NFSv4 pass-through to work, only connected operation
2215 	 * is supported, the cnode backvp must exist, and cachefs
2216 	 * optional (eg., disconnectable) flags are turned off. Assert
2217 	 * these conditions for the getattr operation.
2218 	 */
2219 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2220 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2221 
2222 	/* Call backfs vnode op after extracting backvp */
2223 	mutex_enter(&cp->c_statelock);
2224 	backvp = cp->c_backvp;
2225 	mutex_exit(&cp->c_statelock);
2226 
2227 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2228 	    " backvp %p\n", cp, backvp));
2229 	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2230 
2231 	/* Update attributes */
2232 	cp->c_attr = *vap;
2233 
2234 	/*
2235 	 * return the FSID of the cachefs filesystem,
2236 	 * not the back filesystem
2237 	 */
2238 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2239 
2240 	return (error);
2241 }
2242 
2243 /*ARGSUSED4*/
2244 static int
2245 cachefs_setattr(
2246 	vnode_t *vp,
2247 	vattr_t *vap,
2248 	int flags,
2249 	cred_t *cr,
2250 	caller_context_t *ct)
2251 {
2252 	cnode_t *cp = VTOC(vp);
2253 	fscache_t *fscp = C_TO_FSCACHE(cp);
2254 	int error;
2255 	int connected;
2256 	int held = 0;
2257 
2258 	if (getzoneid() != GLOBAL_ZONEID)
2259 		return (EPERM);
2260 
2261 	/*
2262 	 * Cachefs only provides pass-through support for NFSv4,
2263 	 * and all vnode operations are passed through to the
2264 	 * back file system. For NFSv4 pass-through to work, only
2265 	 * connected operation is supported, the cnode backvp must
2266 	 * exist, and cachefs optional (eg., disconnectable) flags
2267 	 * are turned off. Assert these conditions to ensure that
2268 	 * the backfilesystem is called for the setattr operation.
2269 	 */
2270 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2271 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2272 
2273 	connected = 0;
2274 	for (;;) {
2275 		/* drop hold on file system */
2276 		if (held) {
2277 			/* Won't loop with NFSv4 connected behavior */
2278 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2279 			cachefs_cd_release(fscp);
2280 			held = 0;
2281 		}
2282 
2283 		/* acquire access to the file system */
2284 		error = cachefs_cd_access(fscp, connected, 1);
2285 		if (error)
2286 			break;
2287 		held = 1;
2288 
2289 		/* perform the setattr */
2290 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2291 		if (error) {
2292 			/* if connected */
2293 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2294 				if (CFS_TIMEOUT(fscp, error)) {
2295 					cachefs_cd_release(fscp);
2296 					held = 0;
2297 					cachefs_cd_timedout(fscp);
2298 					connected = 0;
2299 					continue;
2300 				}
2301 			}
2302 
2303 			/* else must be disconnected */
2304 			else {
2305 				if (CFS_TIMEOUT(fscp, error)) {
2306 					connected = 1;
2307 					continue;
2308 				}
2309 			}
2310 		}
2311 		break;
2312 	}
2313 
2314 	if (held) {
2315 		cachefs_cd_release(fscp);
2316 	}
2317 #ifdef CFS_CD_DEBUG
2318 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2319 #endif
2320 	return (error);
2321 }
2322 
2323 static int
2324 cachefs_setattr_common(
2325 	vnode_t *vp,
2326 	vattr_t *vap,
2327 	int flags,
2328 	cred_t *cr,
2329 	caller_context_t *ct)
2330 {
2331 	cnode_t *cp = VTOC(vp);
2332 	fscache_t *fscp = C_TO_FSCACHE(cp);
2333 	cachefscache_t *cachep = fscp->fs_cache;
2334 	uint_t mask = vap->va_mask;
2335 	int error = 0;
2336 	uint_t bcnt;
2337 
2338 	/* Cannot set these attributes. */
2339 	if (mask & AT_NOSET)
2340 		return (EINVAL);
2341 
2342 	/*
2343 	 * Truncate file.  Must have write permission and not be a directory.
2344 	 */
2345 	if (mask & AT_SIZE) {
2346 		if (vp->v_type == VDIR) {
2347 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2348 				cachefs_log_truncate(cachep, EISDIR,
2349 				    fscp->fs_cfsvfsp,
2350 				    &cp->c_metadata.md_cookie,
2351 				    cp->c_id.cid_fileno,
2352 				    crgetuid(cr), vap->va_size);
2353 			return (EISDIR);
2354 		}
2355 	}
2356 
2357 	/*
2358 	 * Gotta deal with one special case here, where we're setting the
2359 	 * size of the file. First, we zero out part of the page after the
2360 	 * new size of the file. Then we toss (not write) all pages after
2361 	 * page in which the new offset occurs. Note that the NULL passed
2362 	 * in instead of a putapage() fn parameter is correct, since
2363 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2364 	 */
2365 
2366 	rw_enter(&cp->c_rwlock, RW_WRITER);
2367 
2368 	/* sync dirty pages */
2369 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2370 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2371 		if (error == EINTR)
2372 			goto out;
2373 	}
2374 	error = 0;
2375 
2376 	/* if connected */
2377 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2378 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2379 	}
2380 	/* else must be disconnected */
2381 	else {
2382 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2383 	}
2384 	if (error)
2385 		goto out;
2386 
2387 	/*
2388 	 * If the file size has been changed then
2389 	 * toss whole pages beyond the end of the file and zero
2390 	 * the portion of the last page that is beyond the end of the file.
2391 	 */
2392 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2393 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2394 		if (bcnt)
2395 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2396 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2397 		    B_TRUNC | B_INVAL, cr);
2398 	}
2399 
2400 out:
2401 	rw_exit(&cp->c_rwlock);
2402 
2403 	if ((mask & AT_SIZE) &&
2404 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2405 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2406 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2407 		    crgetuid(cr), vap->va_size);
2408 
2409 	return (error);
2410 }
2411 
2412 static int
2413 cachefs_setattr_connected(
2414 	vnode_t *vp,
2415 	vattr_t *vap,
2416 	int flags,
2417 	cred_t *cr,
2418 	caller_context_t *ct)
2419 {
2420 	cnode_t *cp = VTOC(vp);
2421 	fscache_t *fscp = C_TO_FSCACHE(cp);
2422 	uint_t mask = vap->va_mask;
2423 	int error = 0;
2424 	int setsize;
2425 
2426 	mutex_enter(&cp->c_statelock);
2427 
2428 	if (cp->c_backvp == NULL) {
2429 		error = cachefs_getbackvp(fscp, cp);
2430 		if (error)
2431 			goto out;
2432 	}
2433 
2434 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2435 	if (error)
2436 		goto out;
2437 
2438 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2439 	    "backvp %p\n", cp, cp->c_backvp));
2440 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2441 	if (error) {
2442 		goto out;
2443 	}
2444 
2445 	/* if the size of the file is being changed */
2446 	if (mask & AT_SIZE) {
2447 		cp->c_size = vap->va_size;
2448 		error = 0;
2449 		setsize = 0;
2450 
2451 		/* see if okay to try to set the file size */
2452 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2453 		    CFS_ISFS_NONSHARED(fscp)) {
2454 			/* okay to set size if file is populated */
2455 			if (cp->c_metadata.md_flags & MD_POPULATED)
2456 				setsize = 1;
2457 
2458 			/*
2459 			 * Okay to set size if front file exists and setting
2460 			 * file size to zero.
2461 			 */
2462 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2463 			    (vap->va_size == 0))
2464 				setsize = 1;
2465 		}
2466 
2467 		/* if okay to try to set the file size */
2468 		if (setsize) {
2469 			error = 0;
2470 			if (cp->c_frontvp == NULL)
2471 				error = cachefs_getfrontfile(cp);
2472 			if (error == 0)
2473 				error = cachefs_frontfile_size(cp, cp->c_size);
2474 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2475 			/* make sure file gets nocached */
2476 			error = EEXIST;
2477 		}
2478 
2479 		/* if we have to nocache the file */
2480 		if (error) {
2481 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2482 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2483 				cachefs_nocache(cp);
2484 			error = 0;
2485 		}
2486 	}
2487 
2488 	cp->c_flags |= CN_UPDATED;
2489 
2490 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2491 	cp->c_attr.va_mask = AT_ALL;
2492 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2493 	if (error)
2494 		goto out;
2495 
2496 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2497 	cp->c_size = cp->c_attr.va_size;
2498 
2499 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2500 out:
2501 	mutex_exit(&cp->c_statelock);
2502 	return (error);
2503 }
2504 
2505 /*
2506  * perform the setattr on the local file system
2507  */
2508 /*ARGSUSED4*/
2509 static int
2510 cachefs_setattr_disconnected(
2511 	vnode_t *vp,
2512 	vattr_t *vap,
2513 	int flags,
2514 	cred_t *cr,
2515 	caller_context_t *ct)
2516 {
2517 	cnode_t *cp = VTOC(vp);
2518 	fscache_t *fscp = C_TO_FSCACHE(cp);
2519 	int mask;
2520 	int error;
2521 	int newfile;
2522 	off_t commit = 0;
2523 
2524 	if (CFS_ISFS_WRITE_AROUND(fscp))
2525 		return (ETIMEDOUT);
2526 
2527 	/* if we do not have good attributes */
2528 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2529 		return (ETIMEDOUT);
2530 
2531 	/* primary concern is to keep this routine as much like ufs_setattr */
2532 
2533 	mutex_enter(&cp->c_statelock);
2534 
2535 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2536 	    cachefs_access_local, cp);
2537 
2538 	if (error)
2539 		goto out;
2540 
2541 	mask = vap->va_mask;
2542 
2543 	/* if changing the size of the file */
2544 	if (mask & AT_SIZE) {
2545 		if (vp->v_type == VDIR) {
2546 			error = EISDIR;
2547 			goto out;
2548 		}
2549 
2550 		if (vp->v_type == VFIFO) {
2551 			error = 0;
2552 			goto out;
2553 		}
2554 
2555 		if ((vp->v_type != VREG) &&
2556 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2557 			error = EINVAL;
2558 			goto out;
2559 		}
2560 
2561 		if (vap->va_size > fscp->fs_offmax) {
2562 			error = EFBIG;
2563 			goto out;
2564 		}
2565 
2566 		/* if the file is not populated and we are not truncating it */
2567 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2568 		    (vap->va_size != 0)) {
2569 			error = ETIMEDOUT;
2570 			goto out;
2571 		}
2572 
2573 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2574 			error = cachefs_dlog_cidmap(fscp);
2575 			if (error) {
2576 				error = ENOSPC;
2577 				goto out;
2578 			}
2579 			cp->c_metadata.md_flags |= MD_MAPPING;
2580 		}
2581 
2582 		/* log the operation */
2583 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2584 		if (commit == 0) {
2585 			error = ENOSPC;
2586 			goto out;
2587 		}
2588 		cp->c_flags &= ~CN_NOCACHE;
2589 
2590 		/* special case truncating fast sym links */
2591 		if ((vp->v_type == VLNK) &&
2592 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2593 			/* XXX how can we get here */
2594 			/* XXX should update mtime */
2595 			cp->c_size = 0;
2596 			error = 0;
2597 			goto out;
2598 		}
2599 
2600 		/* get the front file, this may create one */
2601 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2602 		if (cp->c_frontvp == NULL) {
2603 			error = cachefs_getfrontfile(cp);
2604 			if (error)
2605 				goto out;
2606 		}
2607 		ASSERT(cp->c_frontvp);
2608 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2609 			/* allocate space for the metadata */
2610 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2611 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2612 			    == 0);
2613 			error = filegrp_write_metadata(cp->c_filegrp,
2614 			    &cp->c_id, &cp->c_metadata);
2615 			if (error)
2616 				goto out;
2617 		}
2618 
2619 		/* change the size of the front file */
2620 		error = cachefs_frontfile_size(cp, vap->va_size);
2621 		if (error)
2622 			goto out;
2623 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2624 		gethrestime(&cp->c_metadata.md_localmtime);
2625 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2626 		cachefs_modified(cp);
2627 		cp->c_flags |= CN_UPDATED;
2628 	}
2629 
2630 	if (mask & AT_MODE) {
2631 		/* mark as modified */
2632 		if (cachefs_modified_alloc(cp)) {
2633 			error = ENOSPC;
2634 			goto out;
2635 		}
2636 
2637 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2638 			error = cachefs_dlog_cidmap(fscp);
2639 			if (error) {
2640 				error = ENOSPC;
2641 				goto out;
2642 			}
2643 			cp->c_metadata.md_flags |= MD_MAPPING;
2644 		}
2645 
2646 		/* log the operation if not already logged */
2647 		if (commit == 0) {
2648 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2649 			if (commit == 0) {
2650 				error = ENOSPC;
2651 				goto out;
2652 			}
2653 		}
2654 
2655 		cp->c_attr.va_mode &= S_IFMT;
2656 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2657 		gethrestime(&cp->c_metadata.md_localctime);
2658 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2659 		cp->c_flags |= CN_UPDATED;
2660 	}
2661 
2662 	if (mask & (AT_UID|AT_GID)) {
2663 
2664 		/* mark as modified */
2665 		if (cachefs_modified_alloc(cp)) {
2666 			error = ENOSPC;
2667 			goto out;
2668 		}
2669 
2670 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2671 			error = cachefs_dlog_cidmap(fscp);
2672 			if (error) {
2673 				error = ENOSPC;
2674 				goto out;
2675 			}
2676 			cp->c_metadata.md_flags |= MD_MAPPING;
2677 		}
2678 
2679 		/* log the operation if not already logged */
2680 		if (commit == 0) {
2681 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2682 			if (commit == 0) {
2683 				error = ENOSPC;
2684 				goto out;
2685 			}
2686 		}
2687 
2688 		if (mask & AT_UID)
2689 			cp->c_attr.va_uid = vap->va_uid;
2690 
2691 		if (mask & AT_GID)
2692 			cp->c_attr.va_gid = vap->va_gid;
2693 		gethrestime(&cp->c_metadata.md_localctime);
2694 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2695 		cp->c_flags |= CN_UPDATED;
2696 	}
2697 
2698 
2699 	if (mask & (AT_MTIME|AT_ATIME)) {
2700 		/* mark as modified */
2701 		if (cachefs_modified_alloc(cp)) {
2702 			error = ENOSPC;
2703 			goto out;
2704 		}
2705 
2706 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2707 			error = cachefs_dlog_cidmap(fscp);
2708 			if (error) {
2709 				error = ENOSPC;
2710 				goto out;
2711 			}
2712 			cp->c_metadata.md_flags |= MD_MAPPING;
2713 		}
2714 
2715 		/* log the operation if not already logged */
2716 		if (commit == 0) {
2717 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2718 			if (commit == 0) {
2719 				error = ENOSPC;
2720 				goto out;
2721 			}
2722 		}
2723 
2724 		if (mask & AT_MTIME) {
2725 			cp->c_metadata.md_localmtime = vap->va_mtime;
2726 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2727 		}
2728 		if (mask & AT_ATIME)
2729 			cp->c_attr.va_atime = vap->va_atime;
2730 		gethrestime(&cp->c_metadata.md_localctime);
2731 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2732 		cp->c_flags |= CN_UPDATED;
2733 	}
2734 
2735 out:
2736 	mutex_exit(&cp->c_statelock);
2737 
2738 	/* commit the log entry */
2739 	if (commit) {
2740 		if (cachefs_dlog_commit(fscp, commit, error)) {
2741 			/*EMPTY*/
2742 			/* XXX bob: fix on panic */
2743 		}
2744 	}
2745 	return (error);
2746 }
2747 
2748 /* ARGSUSED */
2749 static int
2750 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2751 	caller_context_t *ct)
2752 {
2753 	cnode_t *cp = VTOC(vp);
2754 	fscache_t *fscp = C_TO_FSCACHE(cp);
2755 	int error;
2756 	int held = 0;
2757 	int connected = 0;
2758 
2759 #ifdef CFSDEBUG
2760 	CFS_DEBUG(CFSDEBUG_VOPS)
2761 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2762 #endif
2763 	if (getzoneid() != GLOBAL_ZONEID) {
2764 		error = EPERM;
2765 		goto out;
2766 	}
2767 
2768 	/*
2769 	 * Cachefs only provides pass-through support for NFSv4,
2770 	 * and all vnode operations are passed through to the
2771 	 * back file system. For NFSv4 pass-through to work, only
2772 	 * connected operation is supported, the cnode backvp must
2773 	 * exist, and cachefs optional (eg., disconnectable) flags
2774 	 * are turned off. Assert these conditions to ensure that
2775 	 * the backfilesystem is called for the access operation.
2776 	 */
2777 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2778 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2779 
2780 	for (;;) {
2781 		/* get (or renew) access to the file system */
2782 		if (held) {
2783 			/* Won't loop with NFSv4 connected behavior */
2784 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2785 			cachefs_cd_release(fscp);
2786 			held = 0;
2787 		}
2788 		error = cachefs_cd_access(fscp, connected, 0);
2789 		if (error)
2790 			break;
2791 		held = 1;
2792 
2793 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2794 			error = cachefs_access_connected(vp, mode, flags,
2795 			    cr);
2796 			if (CFS_TIMEOUT(fscp, error)) {
2797 				cachefs_cd_release(fscp);
2798 				held = 0;
2799 				cachefs_cd_timedout(fscp);
2800 				connected = 0;
2801 				continue;
2802 			}
2803 		} else {
2804 			mutex_enter(&cp->c_statelock);
2805 			error = cachefs_access_local(cp, mode, cr);
2806 			mutex_exit(&cp->c_statelock);
2807 			if (CFS_TIMEOUT(fscp, error)) {
2808 				if (cachefs_cd_access_miss(fscp)) {
2809 					mutex_enter(&cp->c_statelock);
2810 					if (cp->c_backvp == NULL) {
2811 						(void) cachefs_getbackvp(fscp,
2812 						    cp);
2813 					}
2814 					mutex_exit(&cp->c_statelock);
2815 					error = cachefs_access_connected(vp,
2816 					    mode, flags, cr);
2817 					if (!CFS_TIMEOUT(fscp, error))
2818 						break;
2819 					delay(5*hz);
2820 					connected = 0;
2821 					continue;
2822 				}
2823 				connected = 1;
2824 				continue;
2825 			}
2826 		}
2827 		break;
2828 	}
2829 	if (held)
2830 		cachefs_cd_release(fscp);
2831 #ifdef CFS_CD_DEBUG
2832 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2833 #endif
2834 out:
2835 #ifdef CFSDEBUG
2836 	CFS_DEBUG(CFSDEBUG_VOPS)
2837 		printf("cachefs_access: EXIT error = %d\n", error);
2838 #endif
2839 	return (error);
2840 }
2841 
2842 static int
2843 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2844 {
2845 	cnode_t *cp = VTOC(vp);
2846 	fscache_t *fscp = C_TO_FSCACHE(cp);
2847 	int error = 0;
2848 
2849 	mutex_enter(&cp->c_statelock);
2850 
2851 	/* Make sure the cnode attrs are valid first. */
2852 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2853 	if (error)
2854 		goto out;
2855 
2856 	/* see if can do a local file system check */
2857 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2858 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2859 		error = cachefs_access_local(cp, mode, cr);
2860 		goto out;
2861 	}
2862 
2863 	/* else do a remote file system check */
2864 	else {
2865 		if (cp->c_backvp == NULL) {
2866 			error = cachefs_getbackvp(fscp, cp);
2867 			if (error)
2868 				goto out;
2869 		}
2870 
2871 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2872 		    ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2873 		    cp, cp->c_backvp));
2874 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2875 
2876 		/*
2877 		 * even though we don't `need' the ACL to do access
2878 		 * via the backvp, we should cache it here to make our
2879 		 * behavior more reasonable if we go disconnected.
2880 		 */
2881 
2882 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2883 		    (cachefs_vtype_aclok(vp)) &&
2884 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2885 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2886 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2887 			(void) cachefs_cacheacl(cp, NULL);
2888 	}
2889 out:
2890 	/*
2891 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2892 	 * the vn_open retry will read the file anew from backfs
2893 	 */
2894 	if (error == ESTALE)
2895 		cachefs_cnode_stale(cp);
2896 
2897 	mutex_exit(&cp->c_statelock);
2898 	return (error);
2899 }
2900 
2901 /*
2902  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2903  * the link is placed in the metadata itself (no front file is allocated).
2904  */
2905 /*ARGSUSED*/
2906 static int
2907 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2908 {
2909 	int error = 0;
2910 	cnode_t *cp = VTOC(vp);
2911 	fscache_t *fscp = C_TO_FSCACHE(cp);
2912 	cachefscache_t *cachep = fscp->fs_cache;
2913 	int held = 0;
2914 	int connected = 0;
2915 
2916 	if (getzoneid() != GLOBAL_ZONEID)
2917 		return (EPERM);
2918 
2919 	if (vp->v_type != VLNK)
2920 		return (EINVAL);
2921 
2922 	/*
2923 	 * Cachefs only provides pass-through support for NFSv4,
2924 	 * and all vnode operations are passed through to the
2925 	 * back file system. For NFSv4 pass-through to work, only
2926 	 * connected operation is supported, the cnode backvp must
2927 	 * exist, and cachefs optional (eg., disconnectable) flags
2928 	 * are turned off. Assert these conditions to ensure that
2929 	 * the backfilesystem is called for the readlink operation.
2930 	 */
2931 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2932 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2933 
2934 	for (;;) {
2935 		/* get (or renew) access to the file system */
2936 		if (held) {
2937 			/* Won't loop with NFSv4 connected behavior */
2938 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2939 			cachefs_cd_release(fscp);
2940 			held = 0;
2941 		}
2942 		error = cachefs_cd_access(fscp, connected, 0);
2943 		if (error)
2944 			break;
2945 		held = 1;
2946 
2947 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2948 			/*
2949 			 * since readlink_connected will call stuffsymlink
2950 			 * on success, have to serialize access
2951 			 */
2952 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2953 				cachefs_cd_release(fscp);
2954 				rw_enter(&cp->c_rwlock, RW_WRITER);
2955 				error = cachefs_cd_access(fscp, connected, 0);
2956 				if (error) {
2957 					held = 0;
2958 					rw_exit(&cp->c_rwlock);
2959 					break;
2960 				}
2961 			}
2962 			error = cachefs_readlink_connected(vp, uiop, cr);
2963 			rw_exit(&cp->c_rwlock);
2964 			if (CFS_TIMEOUT(fscp, error)) {
2965 				cachefs_cd_release(fscp);
2966 				held = 0;
2967 				cachefs_cd_timedout(fscp);
2968 				connected = 0;
2969 				continue;
2970 			}
2971 		} else {
2972 			error = cachefs_readlink_disconnected(vp, uiop);
2973 			if (CFS_TIMEOUT(fscp, error)) {
2974 				if (cachefs_cd_access_miss(fscp)) {
2975 					/* as above */
2976 					if (!rw_tryenter(&cp->c_rwlock,
2977 					    RW_WRITER)) {
2978 						cachefs_cd_release(fscp);
2979 						rw_enter(&cp->c_rwlock,
2980 						    RW_WRITER);
2981 						error = cachefs_cd_access(fscp,
2982 						    connected, 0);
2983 						if (error) {
2984 							held = 0;
2985 							rw_exit(&cp->c_rwlock);
2986 							break;
2987 						}
2988 					}
2989 					error = cachefs_readlink_connected(vp,
2990 					    uiop, cr);
2991 					rw_exit(&cp->c_rwlock);
2992 					if (!CFS_TIMEOUT(fscp, error))
2993 						break;
2994 					delay(5*hz);
2995 					connected = 0;
2996 					continue;
2997 				}
2998 				connected = 1;
2999 				continue;
3000 			}
3001 		}
3002 		break;
3003 	}
3004 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
3005 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
3006 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
3007 		    crgetuid(cr), cp->c_size);
3008 
3009 	if (held)
3010 		cachefs_cd_release(fscp);
3011 #ifdef CFS_CD_DEBUG
3012 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3013 #endif
3014 
3015 	/*
3016 	 * The over the wire error for attempting to readlink something
3017 	 * other than a symbolic link is ENXIO.  However, we need to
3018 	 * return EINVAL instead of ENXIO, so we map it here.
3019 	 */
3020 	return (error == ENXIO ? EINVAL : error);
3021 }
3022 
3023 static int
3024 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3025 {
3026 	int error;
3027 	cnode_t *cp = VTOC(vp);
3028 	fscache_t *fscp = C_TO_FSCACHE(cp);
3029 	caddr_t buf;
3030 	int buflen;
3031 	int readcache = 0;
3032 
3033 	mutex_enter(&cp->c_statelock);
3034 
3035 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3036 	if (error)
3037 		goto out;
3038 
3039 	/* if the sym link is cached as a fast sym link */
3040 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3041 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3042 		error = uiomove(cp->c_metadata.md_allocinfo,
3043 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3044 #ifdef CFSDEBUG
3045 		readcache = 1;
3046 		goto out;
3047 #else /* CFSDEBUG */
3048 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3049 		if (cp->c_size != 0) {
3050 			readcache = 1;
3051 			goto out;
3052 		}
3053 #endif /* CFSDEBUG */
3054 	}
3055 
3056 	/* if the sym link is cached in a front file */
3057 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3058 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3059 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3060 		if (cp->c_frontvp == NULL) {
3061 			(void) cachefs_getfrontfile(cp);
3062 		}
3063 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3064 			/* read symlink data from frontfile */
3065 			uiop->uio_offset = 0;
3066 			(void) VOP_RWLOCK(cp->c_frontvp,
3067 			    V_WRITELOCK_FALSE, NULL);
3068 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3069 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3070 
3071 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3072 			if (cp->c_size != 0) {
3073 				readcache = 1;
3074 				goto out;
3075 			}
3076 		}
3077 	}
3078 
3079 	/* get the sym link contents from the back fs */
3080 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3081 	if (error)
3082 		goto out;
3083 
3084 	/* copy the contents out to the user */
3085 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3086 
3087 	/*
3088 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3089 	 * or if NFSv4 pass-through is enabled.
3090 	 */
3091 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3092 		cachefs_nocache(cp);
3093 	}
3094 
3095 	cachefs_kmem_free(buf, MAXPATHLEN);
3096 
3097 out:
3098 	mutex_exit(&cp->c_statelock);
3099 	if (error == 0) {
3100 		if (readcache)
3101 			fscp->fs_stats.st_hits++;
3102 		else
3103 			fscp->fs_stats.st_misses++;
3104 	}
3105 	return (error);
3106 }
3107 
3108 static int
3109 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3110 {
3111 	int error;
3112 	cnode_t *cp = VTOC(vp);
3113 	fscache_t *fscp = C_TO_FSCACHE(cp);
3114 	int readcache = 0;
3115 
3116 	mutex_enter(&cp->c_statelock);
3117 
3118 	/* if the sym link is cached as a fast sym link */
3119 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3120 		error = uiomove(cp->c_metadata.md_allocinfo,
3121 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3122 		readcache = 1;
3123 		goto out;
3124 	}
3125 
3126 	/* if the sym link is cached in a front file */
3127 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3128 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3129 		if (cp->c_frontvp == NULL) {
3130 			(void) cachefs_getfrontfile(cp);
3131 		}
3132 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3133 			/* read symlink data from frontfile */
3134 			uiop->uio_offset = 0;
3135 			(void) VOP_RWLOCK(cp->c_frontvp,
3136 			    V_WRITELOCK_FALSE, NULL);
3137 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3138 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3139 			readcache = 1;
3140 			goto out;
3141 		}
3142 	}
3143 	error = ETIMEDOUT;
3144 
3145 out:
3146 	mutex_exit(&cp->c_statelock);
3147 	if (error == 0) {
3148 		if (readcache)
3149 			fscp->fs_stats.st_hits++;
3150 		else
3151 			fscp->fs_stats.st_misses++;
3152 	}
3153 	return (error);
3154 }
3155 
3156 /*ARGSUSED*/
3157 static int
3158 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3159 {
3160 	cnode_t *cp = VTOC(vp);
3161 	int error = 0;
3162 	fscache_t *fscp = C_TO_FSCACHE(cp);
3163 	int held = 0;
3164 	int connected = 0;
3165 
3166 #ifdef CFSDEBUG
3167 	CFS_DEBUG(CFSDEBUG_VOPS)
3168 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3169 #endif
3170 
3171 	if (getzoneid() != GLOBAL_ZONEID) {
3172 		error = EPERM;
3173 		goto out;
3174 	}
3175 
3176 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3177 		goto out;
3178 
3179 	/*
3180 	 * Cachefs only provides pass-through support for NFSv4,
3181 	 * and all vnode operations are passed through to the
3182 	 * back file system. For NFSv4 pass-through to work, only
3183 	 * connected operation is supported, the cnode backvp must
3184 	 * exist, and cachefs optional (eg., disconnectable) flags
3185 	 * are turned off. Assert these conditions to ensure that
3186 	 * the backfilesystem is called for the fsync operation.
3187 	 */
3188 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3189 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3190 
3191 	for (;;) {
3192 		/* get (or renew) access to the file system */
3193 		if (held) {
3194 			/* Won't loop with NFSv4 connected behavior */
3195 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3196 			cachefs_cd_release(fscp);
3197 			held = 0;
3198 		}
3199 		error = cachefs_cd_access(fscp, connected, 1);
3200 		if (error)
3201 			break;
3202 		held = 1;
3203 		connected = 0;
3204 
3205 		/* if a regular file, write out the pages */
3206 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3207 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3208 			error = cachefs_putpage_common(vp, (offset_t)0,
3209 			    0, 0, cr);
3210 			if (CFS_TIMEOUT(fscp, error)) {
3211 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3212 					cachefs_cd_release(fscp);
3213 					held = 0;
3214 					cachefs_cd_timedout(fscp);
3215 					continue;
3216 				} else {
3217 					connected = 1;
3218 					continue;
3219 				}
3220 			}
3221 
3222 			/* if no space left in cache, wait until connected */
3223 			if ((error == ENOSPC) &&
3224 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3225 				connected = 1;
3226 				continue;
3227 			}
3228 
3229 			/* clear the cnode error if putpage worked */
3230 			if ((error == 0) && cp->c_error) {
3231 				mutex_enter(&cp->c_statelock);
3232 				cp->c_error = 0;
3233 				mutex_exit(&cp->c_statelock);
3234 			}
3235 
3236 			if (error)
3237 				break;
3238 		}
3239 
3240 		/* if connected, sync the backvp */
3241 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3242 		    cp->c_backvp) {
3243 			mutex_enter(&cp->c_statelock);
3244 			if (cp->c_backvp) {
3245 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3246 				    ("cachefs_fsync (nfsv4): cnode %p, "
3247 				    "backvp %p\n", cp, cp->c_backvp));
3248 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3249 				    ct);
3250 				if (CFS_TIMEOUT(fscp, error)) {
3251 					mutex_exit(&cp->c_statelock);
3252 					cachefs_cd_release(fscp);
3253 					held = 0;
3254 					cachefs_cd_timedout(fscp);
3255 					continue;
3256 				} else if (error && (error != EINTR))
3257 					cp->c_error = error;
3258 			}
3259 			mutex_exit(&cp->c_statelock);
3260 		}
3261 
3262 		/* sync the metadata and the front file to the front fs */
3263 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3264 			error = cachefs_sync_metadata(cp);
3265 			if (error &&
3266 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3267 				error = 0;
3268 		}
3269 		break;
3270 	}
3271 
3272 	if (error == 0)
3273 		error = cp->c_error;
3274 
3275 	if (held)
3276 		cachefs_cd_release(fscp);
3277 
3278 out:
3279 #ifdef CFS_CD_DEBUG
3280 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3281 #endif
3282 
3283 #ifdef CFSDEBUG
3284 	CFS_DEBUG(CFSDEBUG_VOPS)
3285 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3286 #endif
3287 	return (error);
3288 }
3289 
3290 /*
3291  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3292  */
3293 int
3294 cachefs_sync_metadata(cnode_t *cp)
3295 {
3296 	int error = 0;
3297 	struct filegrp *fgp;
3298 	struct vattr va;
3299 	fscache_t *fscp = C_TO_FSCACHE(cp);
3300 
3301 #ifdef CFSDEBUG
3302 	CFS_DEBUG(CFSDEBUG_VOPS)
3303 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3304 		    (void *)cp, cp->c_flags);
3305 #endif
3306 
3307 	mutex_enter(&cp->c_statelock);
3308 	if ((cp->c_flags & CN_UPDATED) == 0)
3309 		goto out;
3310 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3311 		goto out;
3312 	fgp = cp->c_filegrp;
3313 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3314 		goto out;
3315 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3316 		goto out;
3317 
3318 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3319 		mutex_exit(&cp->c_statelock);
3320 		error = filegrp_allocattr(fgp);
3321 		mutex_enter(&cp->c_statelock);
3322 		if (error) {
3323 			error = 0;
3324 			goto out;
3325 		}
3326 	}
3327 
3328 	if (cp->c_flags & CN_ALLOC_PENDING) {
3329 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3330 		    &cp->c_id);
3331 		if (error)
3332 			goto out;
3333 		cp->c_flags &= ~CN_ALLOC_PENDING;
3334 	}
3335 
3336 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3337 		if (cp->c_frontvp != NULL) {
3338 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3339 			if (error) {
3340 				cp->c_metadata.md_timestamp.tv_sec = 0;
3341 			} else {
3342 				va.va_mask = AT_MTIME;
3343 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3344 				    kcred, NULL);
3345 				if (error)
3346 					goto out;
3347 				cp->c_metadata.md_timestamp = va.va_mtime;
3348 				cp->c_flags &=
3349 				    ~(CN_NEED_FRONT_SYNC |
3350 				    CN_POPULATION_PENDING);
3351 			}
3352 		} else {
3353 			cp->c_flags &=
3354 			    ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3355 		}
3356 	}
3357 
3358 	/*
3359 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3360 	 * XXX tony: How can CN_UPDATED not be set?????
3361 	 */
3362 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3363 	    (cp->c_flags & CN_UPDATED)) {
3364 		error = filegrp_write_metadata(fgp, &cp->c_id,
3365 		    &cp->c_metadata);
3366 		if (error)
3367 			goto out;
3368 	}
3369 out:
3370 	if (error) {
3371 		/* XXX modified files? */
3372 		if (cp->c_metadata.md_rlno) {
3373 			cachefs_removefrontfile(&cp->c_metadata,
3374 			    &cp->c_id, fgp);
3375 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3376 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3377 			cp->c_metadata.md_rlno = 0;
3378 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3379 			if (cp->c_frontvp) {
3380 				VN_RELE(cp->c_frontvp);
3381 				cp->c_frontvp = NULL;
3382 			}
3383 		}
3384 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3385 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3386 		cp->c_flags |= CN_ALLOC_PENDING;
3387 		cachefs_nocache(cp);
3388 	}
3389 	/*
3390 	 * we clear the updated bit even on errors because a retry
3391 	 * will probably fail also.
3392 	 */
3393 	cp->c_flags &= ~CN_UPDATED;
3394 	mutex_exit(&cp->c_statelock);
3395 
3396 #ifdef CFSDEBUG
3397 	CFS_DEBUG(CFSDEBUG_VOPS)
3398 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3399 		    (void *)cp, cp->c_flags);
3400 #endif
3401 
3402 	return (error);
3403 }
3404 
3405 /*
3406  * This is the vop entry point for inactivating a vnode.
3407  * It just queues the request for the async thread which
3408  * calls cachefs_inactive.
3409  * Because of the dnlc, it is not safe to grab most locks here.
3410  */
3411 /*ARGSUSED*/
3412 static void
3413 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3414 {
3415 	cnode_t *cp;
3416 	struct cachefs_req *rp;
3417 	fscache_t *fscp;
3418 
3419 #ifdef CFSDEBUG
3420 	CFS_DEBUG(CFSDEBUG_VOPS)
3421 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3422 #endif
3423 
3424 	cp = VTOC(vp);
3425 	fscp = C_TO_FSCACHE(cp);
3426 
3427 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3428 
3429 	/*
3430 	 * Cachefs only provides pass-through support for NFSv4,
3431 	 * and all vnode operations are passed through to the
3432 	 * back file system. For NFSv4 pass-through to work, only
3433 	 * connected operation is supported, the cnode backvp must
3434 	 * exist, and cachefs optional (eg., disconnectable) flags
3435 	 * are turned off. Assert these conditions to ensure that
3436 	 * the backfilesystem is called for the inactive operation.
3437 	 */
3438 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3439 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3440 
3441 	/* vn_rele() set the v_count == 1 */
3442 
3443 	cp->c_ipending = 1;
3444 
3445 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3446 	rp->cfs_cmd = CFS_IDLE;
3447 	rp->cfs_cr = cr;
3448 	crhold(rp->cfs_cr);
3449 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3450 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3451 
3452 #ifdef CFSDEBUG
3453 	CFS_DEBUG(CFSDEBUG_VOPS)
3454 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3455 #endif
3456 }
3457 
3458 /* ARGSUSED */
3459 static int
3460 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3461     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3462     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3463 
3464 {
3465 	int error = 0;
3466 	cnode_t *dcp = VTOC(dvp);
3467 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3468 	int held = 0;
3469 	int connected = 0;
3470 
3471 #ifdef CFSDEBUG
3472 	CFS_DEBUG(CFSDEBUG_VOPS)
3473 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3474 #endif
3475 
3476 	if (getzoneid() != GLOBAL_ZONEID) {
3477 		error = EPERM;
3478 		goto out;
3479 	}
3480 
3481 	/*
3482 	 * Cachefs only provides pass-through support for NFSv4,
3483 	 * and all vnode operations are passed through to the
3484 	 * back file system. For NFSv4 pass-through to work, only
3485 	 * connected operation is supported, the cnode backvp must
3486 	 * exist, and cachefs optional (eg., disconnectable) flags
3487 	 * are turned off. Assert these conditions to ensure that
3488 	 * the backfilesystem is called for the lookup operation.
3489 	 */
3490 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3491 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3492 
3493 	for (;;) {
3494 		/* get (or renew) access to the file system */
3495 		if (held) {
3496 			/* Won't loop with NFSv4 connected behavior */
3497 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3498 			cachefs_cd_release(fscp);
3499 			held = 0;
3500 		}
3501 		error = cachefs_cd_access(fscp, connected, 0);
3502 		if (error)
3503 			break;
3504 		held = 1;
3505 
3506 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3507 			flags, rdir, cr);
3508 		if (CFS_TIMEOUT(fscp, error)) {
3509 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3510 				cachefs_cd_release(fscp);
3511 				held = 0;
3512 				cachefs_cd_timedout(fscp);
3513 				connected = 0;
3514 				continue;
3515 			} else {
3516 				if (cachefs_cd_access_miss(fscp)) {
3517 					rw_enter(&dcp->c_rwlock, RW_READER);
3518 					error = cachefs_lookup_back(dvp, nm,
3519 					    vpp, cr);
3520 					rw_exit(&dcp->c_rwlock);
3521 					if (!CFS_TIMEOUT(fscp, error))
3522 						break;
3523 					delay(5*hz);
3524 					connected = 0;
3525 					continue;
3526 				}
3527 				connected = 1;
3528 				continue;
3529 			}
3530 		}
3531 		break;
3532 	}
3533 	if (held)
3534 		cachefs_cd_release(fscp);
3535 
3536 	if (error == 0 && IS_DEVVP(*vpp)) {
3537 		struct vnode *newvp;
3538 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3539 		VN_RELE(*vpp);
3540 		if (newvp == NULL) {
3541 			error = ENOSYS;
3542 		} else {
3543 			*vpp = newvp;
3544 		}
3545 	}
3546 
3547 #ifdef CFS_CD_DEBUG
3548 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3549 #endif
3550 out:
3551 #ifdef CFSDEBUG
3552 	CFS_DEBUG(CFSDEBUG_VOPS)
3553 		printf("cachefs_lookup: EXIT error = %d\n", error);
3554 #endif
3555 
3556 	return (error);
3557 }
3558 
3559 /* ARGSUSED */
3560 int
3561 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3562     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3563 {
3564 	int error = 0;
3565 	cnode_t *cp, *dcp = VTOC(dvp);
3566 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3567 	struct fid cookie;
3568 	u_offset_t d_offset;
3569 	struct cachefs_req *rp;
3570 	cfs_cid_t cid, dircid;
3571 	uint_t flag;
3572 	uint_t uncached = 0;
3573 
3574 	*vpp = NULL;
3575 
3576 	/*
3577 	 * If lookup is for "", just return dvp.  Don't need
3578 	 * to send it over the wire, look it up in the dnlc,
3579 	 * or perform any access checks.
3580 	 */
3581 	if (*nm == '\0') {
3582 		VN_HOLD(dvp);
3583 		*vpp = dvp;
3584 		return (0);
3585 	}
3586 
3587 	/* can't do lookups in non-directories */
3588 	if (dvp->v_type != VDIR)
3589 		return (ENOTDIR);
3590 
3591 	/* perform access check, also does consistency check if connected */
3592 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3593 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3594 	} else {
3595 		mutex_enter(&dcp->c_statelock);
3596 		error = cachefs_access_local(dcp, VEXEC, cr);
3597 		mutex_exit(&dcp->c_statelock);
3598 	}
3599 	if (error)
3600 		return (error);
3601 
3602 	/*
3603 	 * If lookup is for ".", just return dvp.  Don't need
3604 	 * to send it over the wire or look it up in the dnlc,
3605 	 * just need to check access.
3606 	 */
3607 	if (strcmp(nm, ".") == 0) {
3608 		VN_HOLD(dvp);
3609 		*vpp = dvp;
3610 		return (0);
3611 	}
3612 
3613 	/* check the dnlc */
3614 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3615 	if (*vpp)
3616 		return (0);
3617 
3618 	/* read lock the dir before starting the search */
3619 	rw_enter(&dcp->c_rwlock, RW_READER);
3620 
3621 	mutex_enter(&dcp->c_statelock);
3622 	dircid = dcp->c_id;
3623 
3624 	dcp->c_usage++;
3625 
3626 	/* if front file is not usable, lookup on the back fs */
3627 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3628 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3629 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3630 		mutex_exit(&dcp->c_statelock);
3631 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3632 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3633 		else
3634 			error = ETIMEDOUT;
3635 		goto out;
3636 	}
3637 
3638 	/* if the front file is not populated, try to populate it */
3639 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3640 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3641 			error = ETIMEDOUT;
3642 			mutex_exit(&dcp->c_statelock);
3643 			goto out;
3644 		}
3645 
3646 		if (cachefs_async_okay()) {
3647 			/* cannot populate if cache is not writable */
3648 			ASSERT((dcp->c_flags &
3649 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3650 			dcp->c_flags |= CN_ASYNC_POPULATE;
3651 
3652 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3653 			rp->cfs_cmd = CFS_POPULATE;
3654 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3655 			rp->cfs_cr = cr;
3656 
3657 			crhold(cr);
3658 			VN_HOLD(dvp);
3659 
3660 			cachefs_addqueue(rp, &fscp->fs_workq);
3661 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3662 			error = cachefs_dir_fill(dcp, cr);
3663 			if (error != 0) {
3664 				mutex_exit(&dcp->c_statelock);
3665 				goto out;
3666 			}
3667 		}
3668 		/* no populate if too many asyncs and we have to cache ACLs */
3669 
3670 		mutex_exit(&dcp->c_statelock);
3671 
3672 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3673 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3674 		else
3675 			error = ETIMEDOUT;
3676 		goto out;
3677 	}
3678 
3679 	/* by now we have a valid cached front file that we can search */
3680 
3681 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3682 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3683 	    &d_offset, &cid);
3684 	mutex_exit(&dcp->c_statelock);
3685 
3686 	if (error) {
3687 		/* if the entry does not have the fid, go get it */
3688 		if (error == EINVAL) {
3689 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3690 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3691 			else
3692 				error = ETIMEDOUT;
3693 		}
3694 
3695 		/* errors other than does not exist */
3696 		else if (error != ENOENT) {
3697 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3698 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3699 			else
3700 				error = ETIMEDOUT;
3701 		}
3702 		goto out;
3703 	}
3704 
3705 	/*
3706 	 * Else we found the entry in the cached directory.
3707 	 * Make a cnode for it.
3708 	 */
3709 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3710 	    cr, 0, &cp);
3711 	if (error == ESTALE) {
3712 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3713 		mutex_enter(&dcp->c_statelock);
3714 		cachefs_nocache(dcp);
3715 		mutex_exit(&dcp->c_statelock);
3716 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3717 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3718 			uncached = 1;
3719 		} else
3720 			error = ETIMEDOUT;
3721 	} else if (error == 0) {
3722 		*vpp = CTOV(cp);
3723 	}
3724 
3725 out:
3726 	if (error == 0) {
3727 		/* put the entry in the dnlc */
3728 		if (cachefs_dnlc)
3729 			dnlc_enter(dvp, nm, *vpp);
3730 
3731 		/* save the cid of the parent so can find the name */
3732 		cp = VTOC(*vpp);
3733 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3734 		    sizeof (cfs_cid_t)) != 0) {
3735 			mutex_enter(&cp->c_statelock);
3736 			cp->c_metadata.md_parent = dircid;
3737 			cp->c_flags |= CN_UPDATED;
3738 			mutex_exit(&cp->c_statelock);
3739 		}
3740 	}
3741 
3742 	rw_exit(&dcp->c_rwlock);
3743 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3744 		(void) cachefs_pack_common(dvp, cr);
3745 	return (error);
3746 }
3747 
3748 /*
3749  * Called from cachefs_lookup_common when the back file system needs to be
3750  * examined to perform the lookup.
3751  */
3752 static int
3753 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3754     cred_t *cr)
3755 {
3756 	int error = 0;
3757 	cnode_t *cp, *dcp = VTOC(dvp);
3758 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3759 	vnode_t *backvp = NULL;
3760 	struct vattr va;
3761 	struct fid cookie;
3762 	cfs_cid_t cid;
3763 	uint32_t valid_fid;
3764 
3765 	mutex_enter(&dcp->c_statelock);
3766 
3767 	/* do a lookup on the back FS to get the back vnode */
3768 	if (dcp->c_backvp == NULL) {
3769 		error = cachefs_getbackvp(fscp, dcp);
3770 		if (error)
3771 			goto out;
3772 	}
3773 
3774 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3775 	    ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3776 	    dcp, dcp->c_backvp, nm));
3777 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3778 	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3779 	if (error)
3780 		goto out;
3781 	if (IS_DEVVP(backvp)) {
3782 		struct vnode *devvp = backvp;
3783 
3784 		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3785 			VN_HOLD(backvp);
3786 			VN_RELE(devvp);
3787 		}
3788 	}
3789 
3790 	/* get the fid and attrs from the back fs */
3791 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3792 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3793 	if (error)
3794 		goto out;
3795 
3796 	cid.cid_fileno = va.va_nodeid;
3797 	cid.cid_flags = 0;
3798 
3799 #if 0
3800 	/* XXX bob: this is probably no longer necessary */
3801 	/* if the directory entry was incomplete, we can complete it now */
3802 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3803 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3804 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3805 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3806 	}
3807 #endif
3808 
3809 out:
3810 	mutex_exit(&dcp->c_statelock);
3811 
3812 	/* create the cnode */
3813 	if (error == 0) {
3814 		error = cachefs_cnode_make(&cid, fscp,
3815 		    (valid_fid ? &cookie : NULL),
3816 		    &va, backvp, cr, 0, &cp);
3817 		if (error == 0) {
3818 			*vpp = CTOV(cp);
3819 		}
3820 	}
3821 
3822 	if (backvp)
3823 		VN_RELE(backvp);
3824 
3825 	return (error);
3826 }
3827 
3828 /*ARGSUSED7*/
3829 static int
3830 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3831     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3832     caller_context_t *ct, vsecattr_t *vsecp)
3833 
3834 {
3835 	cnode_t *dcp = VTOC(dvp);
3836 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3837 	cachefscache_t *cachep = fscp->fs_cache;
3838 	int error;
3839 	int connected = 0;
3840 	int held = 0;
3841 
3842 #ifdef CFSDEBUG
3843 	CFS_DEBUG(CFSDEBUG_VOPS)
3844 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3845 		    (void *)dvp, nm);
3846 #endif
3847 	if (getzoneid() != GLOBAL_ZONEID) {
3848 		error = EPERM;
3849 		goto out;
3850 	}
3851 
3852 	/*
3853 	 * Cachefs only provides pass-through support for NFSv4,
3854 	 * and all vnode operations are passed through to the
3855 	 * back file system. For NFSv4 pass-through to work, only
3856 	 * connected operation is supported, the cnode backvp must
3857 	 * exist, and cachefs optional (eg., disconnectable) flags
3858 	 * are turned off. Assert these conditions to ensure that
3859 	 * the backfilesystem is called for the create operation.
3860 	 */
3861 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3862 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3863 
3864 	for (;;) {
3865 		/* get (or renew) access to the file system */
3866 		if (held) {
3867 			/* Won't loop with NFSv4 connected behavior */
3868 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3869 			cachefs_cd_release(fscp);
3870 			held = 0;
3871 		}
3872 		error = cachefs_cd_access(fscp, connected, 1);
3873 		if (error)
3874 			break;
3875 		held = 1;
3876 
3877 		/*
3878 		 * if we are connected, perform the remote portion of the
3879 		 * create.
3880 		 */
3881 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3882 			error = cachefs_create_connected(dvp, nm, vap,
3883 			    exclusive, mode, vpp, cr);
3884 			if (CFS_TIMEOUT(fscp, error)) {
3885 				cachefs_cd_release(fscp);
3886 				held = 0;
3887 				cachefs_cd_timedout(fscp);
3888 				connected = 0;
3889 				continue;
3890 			} else if (error) {
3891 				break;
3892 			}
3893 		}
3894 
3895 		/* else we must be disconnected */
3896 		else {
3897 			error = cachefs_create_disconnected(dvp, nm, vap,
3898 			    exclusive, mode, vpp, cr);
3899 			if (CFS_TIMEOUT(fscp, error)) {
3900 				connected = 1;
3901 				continue;
3902 			} else if (error) {
3903 				break;
3904 			}
3905 		}
3906 		break;
3907 	}
3908 
3909 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3910 		fid_t *fidp = NULL;
3911 		ino64_t fileno = 0;
3912 		cnode_t *cp = NULL;
3913 		if (error == 0)
3914 			cp = VTOC(*vpp);
3915 
3916 		if (cp != NULL) {
3917 			fidp = &cp->c_metadata.md_cookie;
3918 			fileno = cp->c_id.cid_fileno;
3919 		}
3920 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3921 		    fidp, fileno, crgetuid(cr));
3922 	}
3923 
3924 	if (held)
3925 		cachefs_cd_release(fscp);
3926 
3927 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3928 		(void) cachefs_pack(dvp, nm, cr);
3929 	if (error == 0 && IS_DEVVP(*vpp)) {
3930 		struct vnode *spcvp;
3931 
3932 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3933 		VN_RELE(*vpp);
3934 		if (spcvp == NULL) {
3935 			error = ENOSYS;
3936 		} else {
3937 			*vpp = spcvp;
3938 		}
3939 	}
3940 
3941 #ifdef CFS_CD_DEBUG
3942 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3943 #endif
3944 out:
3945 #ifdef CFSDEBUG
3946 	CFS_DEBUG(CFSDEBUG_VOPS)
3947 		printf("cachefs_create: EXIT error %d\n", error);
3948 #endif
3949 	return (error);
3950 }
3951 
3952 
3953 static int
3954 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3955     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3956 {
3957 	cnode_t *dcp = VTOC(dvp);
3958 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3959 	int error;
3960 	vnode_t *tvp = NULL;
3961 	vnode_t *devvp;
3962 	fid_t cookie;
3963 	vattr_t va;
3964 	cnode_t *ncp;
3965 	cfs_cid_t cid;
3966 	vnode_t *vp;
3967 	uint32_t valid_fid;
3968 
3969 	/* special case if file already exists */
3970 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3971 	if (CFS_TIMEOUT(fscp, error))
3972 		return (error);
3973 	if (error == 0) {
3974 		if (exclusive == EXCL)
3975 			error = EEXIST;
3976 		else if (vp->v_type == VDIR && (mode & VWRITE))
3977 			error = EISDIR;
3978 		else if ((error =
3979 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3980 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3981 				vap->va_mask = AT_SIZE;
3982 				error = cachefs_setattr_common(vp, vap, 0,
3983 				    cr, NULL);
3984 			}
3985 		}
3986 		if (error) {
3987 			VN_RELE(vp);
3988 		} else
3989 			*vpp = vp;
3990 		return (error);
3991 	}
3992 
3993 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3994 	mutex_enter(&dcp->c_statelock);
3995 
3996 	/* consistency check the directory */
3997 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3998 	if (error) {
3999 		mutex_exit(&dcp->c_statelock);
4000 		goto out;
4001 	}
4002 
4003 	/* get the backvp if necessary */
4004 	if (dcp->c_backvp == NULL) {
4005 		error = cachefs_getbackvp(fscp, dcp);
4006 		if (error) {
4007 			mutex_exit(&dcp->c_statelock);
4008 			goto out;
4009 		}
4010 	}
4011 
4012 	/* create the file on the back fs */
4013 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4014 	    ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4015 	    "name %s\n", dcp, dcp->c_backvp, nm));
4016 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4017 	    &devvp, cr, 0, NULL, NULL);
4018 	mutex_exit(&dcp->c_statelock);
4019 	if (error)
4020 		goto out;
4021 	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4022 		VN_HOLD(tvp);
4023 		VN_RELE(devvp);
4024 	} else {
4025 		tvp = devvp;
4026 	}
4027 
4028 	/* get the fid and attrs from the back fs */
4029 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4030 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4031 	if (error)
4032 		goto out;
4033 
4034 	/* make the cnode */
4035 	cid.cid_fileno = va.va_nodeid;
4036 	cid.cid_flags = 0;
4037 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4038 	    &va, tvp, cr, 0, &ncp);
4039 	if (error)
4040 		goto out;
4041 
4042 	*vpp = CTOV(ncp);
4043 
4044 	/* enter it in the parent directory */
4045 	mutex_enter(&dcp->c_statelock);
4046 	if (CFS_ISFS_NONSHARED(fscp) &&
4047 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4048 		/* see if entry already exists */
4049 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4050 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4051 		if (error == ENOENT) {
4052 			/* entry, does not exist, add the new file */
4053 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4054 			    &ncp->c_id, SM_ASYNC);
4055 			if (error) {
4056 				cachefs_nocache(dcp);
4057 				error = 0;
4058 			}
4059 			/* XXX should this be done elsewhere, too? */
4060 			dnlc_enter(dvp, nm, *vpp);
4061 		} else {
4062 			/* entry exists or some other problem */
4063 			cachefs_nocache(dcp);
4064 			error = 0;
4065 		}
4066 	}
4067 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4068 	mutex_exit(&dcp->c_statelock);
4069 
4070 out:
4071 	rw_exit(&dcp->c_rwlock);
4072 	if (tvp)
4073 		VN_RELE(tvp);
4074 
4075 	return (error);
4076 }
4077 
4078 static int
4079 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4080 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4081 {
4082 	cnode_t *dcp = VTOC(dvp);
4083 	cnode_t *cp;
4084 	cnode_t *ncp = NULL;
4085 	vnode_t *vp;
4086 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4087 	int error = 0;
4088 	struct vattr va;
4089 	timestruc_t current_time;
4090 	off_t commit = 0;
4091 	fid_t cookie;
4092 	cfs_cid_t cid;
4093 
4094 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4095 	mutex_enter(&dcp->c_statelock);
4096 
4097 	/* give up if the directory is not populated */
4098 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4099 		mutex_exit(&dcp->c_statelock);
4100 		rw_exit(&dcp->c_rwlock);
4101 		return (ETIMEDOUT);
4102 	}
4103 
4104 	/* special case if file already exists */
4105 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4106 	if (error == EINVAL) {
4107 		mutex_exit(&dcp->c_statelock);
4108 		rw_exit(&dcp->c_rwlock);
4109 		return (ETIMEDOUT);
4110 	}
4111 	if (error == 0) {
4112 		mutex_exit(&dcp->c_statelock);
4113 		rw_exit(&dcp->c_rwlock);
4114 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4115 		    cr, 0, &cp);
4116 		if (error) {
4117 			return (error);
4118 		}
4119 		vp = CTOV(cp);
4120 
4121 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4122 			error = ETIMEDOUT;
4123 		else if (exclusive == EXCL)
4124 			error = EEXIST;
4125 		else if (vp->v_type == VDIR && (mode & VWRITE))
4126 			error = EISDIR;
4127 		else {
4128 			mutex_enter(&cp->c_statelock);
4129 			error = cachefs_access_local(cp, mode, cr);
4130 			mutex_exit(&cp->c_statelock);
4131 			if (!error) {
4132 				if ((vap->va_mask & AT_SIZE) &&
4133 				    (vp->v_type == VREG)) {
4134 					vap->va_mask = AT_SIZE;
4135 					error = cachefs_setattr_common(vp,
4136 					    vap, 0, cr, NULL);
4137 				}
4138 			}
4139 		}
4140 		if (error) {
4141 			VN_RELE(vp);
4142 		} else
4143 			*vpp = vp;
4144 		return (error);
4145 	}
4146 
4147 	/* give up if cannot modify the cache */
4148 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4149 		mutex_exit(&dcp->c_statelock);
4150 		error = ETIMEDOUT;
4151 		goto out;
4152 	}
4153 
4154 	/* check access */
4155 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4156 		mutex_exit(&dcp->c_statelock);
4157 		goto out;
4158 	}
4159 
4160 	/* mark dir as modified */
4161 	cachefs_modified(dcp);
4162 	mutex_exit(&dcp->c_statelock);
4163 
4164 	/* must be privileged to set sticky bit */
4165 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4166 		vap->va_mode &= ~VSVTX;
4167 
4168 	/* make up a reasonable set of attributes */
4169 	cachefs_attr_setup(vap, &va, dcp, cr);
4170 
4171 	/* create the cnode */
4172 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4173 	if (error)
4174 		goto out;
4175 
4176 	mutex_enter(&ncp->c_statelock);
4177 
4178 	/* get the front file now instead of later */
4179 	if (vap->va_type == VREG) {
4180 		error = cachefs_getfrontfile(ncp);
4181 		if (error) {
4182 			mutex_exit(&ncp->c_statelock);
4183 			goto out;
4184 		}
4185 		ASSERT(ncp->c_frontvp != NULL);
4186 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4187 		ncp->c_metadata.md_flags |= MD_POPULATED;
4188 	} else {
4189 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4190 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4191 			(void) filegrp_allocattr(ncp->c_filegrp);
4192 		}
4193 		error = filegrp_create_metadata(ncp->c_filegrp,
4194 		    &ncp->c_metadata, &ncp->c_id);
4195 		if (error) {
4196 			mutex_exit(&ncp->c_statelock);
4197 			goto out;
4198 		}
4199 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4200 	}
4201 	mutex_enter(&dcp->c_statelock);
4202 	cachefs_creategid(dcp, ncp, vap, cr);
4203 	cachefs_createacl(dcp, ncp);
4204 	mutex_exit(&dcp->c_statelock);
4205 
4206 	/* set times on the file */
4207 	gethrestime(&current_time);
4208 	ncp->c_metadata.md_vattr.va_atime = current_time;
4209 	ncp->c_metadata.md_localctime = current_time;
4210 	ncp->c_metadata.md_localmtime = current_time;
4211 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4212 
4213 	/* reserve space for the daemon cid mapping */
4214 	error = cachefs_dlog_cidmap(fscp);
4215 	if (error) {
4216 		mutex_exit(&ncp->c_statelock);
4217 		goto out;
4218 	}
4219 	ncp->c_metadata.md_flags |= MD_MAPPING;
4220 
4221 	/* mark the new file as modified */
4222 	if (cachefs_modified_alloc(ncp)) {
4223 		mutex_exit(&ncp->c_statelock);
4224 		error = ENOSPC;
4225 		goto out;
4226 	}
4227 	ncp->c_flags |= CN_UPDATED;
4228 
4229 	/*
4230 	 * write the metadata now rather than waiting until
4231 	 * inactive so that if there's no space we can let
4232 	 * the caller know.
4233 	 */
4234 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4235 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4236 	error = filegrp_write_metadata(ncp->c_filegrp,
4237 	    &ncp->c_id, &ncp->c_metadata);
4238 	if (error) {
4239 		mutex_exit(&ncp->c_statelock);
4240 		goto out;
4241 	}
4242 
4243 	/* log the operation */
4244 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4245 	    mode, ncp, 0, cr);
4246 	if (commit == 0) {
4247 		mutex_exit(&ncp->c_statelock);
4248 		error = ENOSPC;
4249 		goto out;
4250 	}
4251 
4252 	mutex_exit(&ncp->c_statelock);
4253 
4254 	mutex_enter(&dcp->c_statelock);
4255 
4256 	/* update parent dir times */
4257 	dcp->c_metadata.md_localmtime = current_time;
4258 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4259 	dcp->c_flags |= CN_UPDATED;
4260 
4261 	/* enter new file name in the parent directory */
4262 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4263 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4264 		    &ncp->c_id, 0);
4265 		if (error) {
4266 			cachefs_nocache(dcp);
4267 			mutex_exit(&dcp->c_statelock);
4268 			error = ETIMEDOUT;
4269 			goto out;
4270 		}
4271 		dnlc_enter(dvp, nm, CTOV(ncp));
4272 	} else {
4273 		mutex_exit(&dcp->c_statelock);
4274 		error = ETIMEDOUT;
4275 		goto out;
4276 	}
4277 	mutex_exit(&dcp->c_statelock);
4278 
4279 out:
4280 	rw_exit(&dcp->c_rwlock);
4281 
4282 	if (commit) {
4283 		if (cachefs_dlog_commit(fscp, commit, error)) {
4284 			/*EMPTY*/
4285 			/* XXX bob: fix on panic */
4286 		}
4287 	}
4288 	if (error) {
4289 		/* destroy the cnode we created */
4290 		if (ncp) {
4291 			mutex_enter(&ncp->c_statelock);
4292 			ncp->c_flags |= CN_DESTROY;
4293 			mutex_exit(&ncp->c_statelock);
4294 			VN_RELE(CTOV(ncp));
4295 		}
4296 	} else {
4297 		*vpp = CTOV(ncp);
4298 	}
4299 	return (error);
4300 }
4301 
4302 /*ARGSUSED*/
4303 static int
4304 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4305     int flags)
4306 {
4307 	cnode_t *dcp = VTOC(dvp);
4308 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4309 	cachefscache_t *cachep = fscp->fs_cache;
4310 	int error = 0;
4311 	int held = 0;
4312 	int connected = 0;
4313 	size_t namlen;
4314 	vnode_t *vp = NULL;
4315 	int vfslock = 0;
4316 
4317 #ifdef CFSDEBUG
4318 	CFS_DEBUG(CFSDEBUG_VOPS)
4319 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4320 		    (void *)dvp, nm);
4321 #endif
4322 	if (getzoneid() != GLOBAL_ZONEID) {
4323 		error = EPERM;
4324 		goto out;
4325 	}
4326 
4327 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4328 		ASSERT(dcp->c_flags & CN_NOCACHE);
4329 
4330 	/*
4331 	 * Cachefs only provides pass-through support for NFSv4,
4332 	 * and all vnode operations are passed through to the
4333 	 * back file system. For NFSv4 pass-through to work, only
4334 	 * connected operation is supported, the cnode backvp must
4335 	 * exist, and cachefs optional (eg., disconnectable) flags
4336 	 * are turned off. Assert these conditions to ensure that
4337 	 * the backfilesystem is called for the remove operation.
4338 	 */
4339 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4340 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4341 
4342 	for (;;) {
4343 		if (vfslock) {
4344 			vn_vfsunlock(vp);
4345 			vfslock = 0;
4346 		}
4347 		if (vp) {
4348 			VN_RELE(vp);
4349 			vp = NULL;
4350 		}
4351 
4352 		/* get (or renew) access to the file system */
4353 		if (held) {
4354 			/* Won't loop with NFSv4 connected behavior */
4355 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4356 			cachefs_cd_release(fscp);
4357 			held = 0;
4358 		}
4359 		error = cachefs_cd_access(fscp, connected, 1);
4360 		if (error)
4361 			break;
4362 		held = 1;
4363 
4364 		/* if disconnected, do some extra error checking */
4365 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4366 			/* check permissions */
4367 			mutex_enter(&dcp->c_statelock);
4368 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4369 			mutex_exit(&dcp->c_statelock);
4370 			if (CFS_TIMEOUT(fscp, error)) {
4371 				connected = 1;
4372 				continue;
4373 			}
4374 			if (error)
4375 				break;
4376 
4377 			namlen = strlen(nm);
4378 			if (namlen == 0) {
4379 				error = EINVAL;
4380 				break;
4381 			}
4382 
4383 			/* cannot remove . and .. */
4384 			if (nm[0] == '.') {
4385 				if (namlen == 1) {
4386 					error = EINVAL;
4387 					break;
4388 				} else if (namlen == 2 && nm[1] == '.') {
4389 					error = EEXIST;
4390 					break;
4391 				}
4392 			}
4393 
4394 		}
4395 
4396 		/* get the cnode of the file to delete */
4397 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4398 		if (error) {
4399 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4400 				if (CFS_TIMEOUT(fscp, error)) {
4401 					cachefs_cd_release(fscp);
4402 					held = 0;
4403 					cachefs_cd_timedout(fscp);
4404 					connected = 0;
4405 					continue;
4406 				}
4407 			} else {
4408 				if (CFS_TIMEOUT(fscp, error)) {
4409 					connected = 1;
4410 					continue;
4411 				}
4412 			}
4413 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4414 				struct fid foo;
4415 
4416 				bzero(&foo, sizeof (foo));
4417 				cachefs_log_remove(cachep, error,
4418 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4419 			}
4420 			break;
4421 		}
4422 
4423 		if (vp->v_type == VDIR) {
4424 			/* must be privileged to remove dirs with unlink() */
4425 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4426 				break;
4427 
4428 			/* see ufs_dirremove for why this is done, mount race */
4429 			if (vn_vfswlock(vp)) {
4430 				error = EBUSY;
4431 				break;
4432 			}
4433 			vfslock = 1;
4434 			if (vn_mountedvfs(vp) != NULL) {
4435 				error = EBUSY;
4436 				break;
4437 			}
4438 		}
4439 
4440 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4441 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4442 			if (CFS_TIMEOUT(fscp, error)) {
4443 				cachefs_cd_release(fscp);
4444 				held = 0;
4445 				cachefs_cd_timedout(fscp);
4446 				connected = 0;
4447 				continue;
4448 			}
4449 		} else {
4450 			error = cachefs_remove_disconnected(dvp, nm, cr,
4451 			    vp);
4452 			if (CFS_TIMEOUT(fscp, error)) {
4453 				connected = 1;
4454 				continue;
4455 			}
4456 		}
4457 		break;
4458 	}
4459 
4460 #if 0
4461 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4462 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4463 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4464 		    crgetuid(cr));
4465 #endif
4466 
4467 	if (held)
4468 		cachefs_cd_release(fscp);
4469 
4470 	if (vfslock)
4471 		vn_vfsunlock(vp);
4472 
4473 	if (vp)
4474 		VN_RELE(vp);
4475 
4476 #ifdef CFS_CD_DEBUG
4477 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4478 #endif
4479 out:
4480 #ifdef CFSDEBUG
4481 	CFS_DEBUG(CFSDEBUG_VOPS)
4482 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4483 #endif
4484 
4485 	return (error);
4486 }
4487 
4488 int
4489 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4490 {
4491 	cnode_t *dcp = VTOC(dvp);
4492 	cnode_t *cp = VTOC(vp);
4493 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4494 	int error = 0;
4495 
4496 	/*
4497 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4498 	 * activity on the directory.
4499 	 */
4500 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4501 
4502 	/* purge dnlc of this entry so can get accurate vnode count */
4503 	dnlc_purge_vp(vp);
4504 
4505 	/*
4506 	 * If the cnode is active, make a link to the file
4507 	 * so operations on the file will continue.
4508 	 */
4509 	if ((vp->v_type != VDIR) &&
4510 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4511 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4512 		if (error)
4513 			goto out;
4514 	}
4515 
4516 	/* else call backfs NFSv4 handler if NFSv4 */
4517 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4518 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4519 		goto out;
4520 	}
4521 
4522 	/* else drop the backvp so nfs does not do rename */
4523 	else if (cp->c_backvp) {
4524 		mutex_enter(&cp->c_statelock);
4525 		if (cp->c_backvp) {
4526 			VN_RELE(cp->c_backvp);
4527 			cp->c_backvp = NULL;
4528 		}
4529 		mutex_exit(&cp->c_statelock);
4530 	}
4531 
4532 	mutex_enter(&dcp->c_statelock);
4533 
4534 	/* get the backvp */
4535 	if (dcp->c_backvp == NULL) {
4536 		error = cachefs_getbackvp(fscp, dcp);
4537 		if (error) {
4538 			mutex_exit(&dcp->c_statelock);
4539 			goto out;
4540 		}
4541 	}
4542 
4543 	/* check directory consistency */
4544 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4545 	if (error) {
4546 		mutex_exit(&dcp->c_statelock);
4547 		goto out;
4548 	}
4549 
4550 	/* perform the remove on the back fs */
4551 	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4552 	if (error) {
4553 		mutex_exit(&dcp->c_statelock);
4554 		goto out;
4555 	}
4556 
4557 	/* the dir has been modified */
4558 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4559 
4560 	/* remove the entry from the populated directory */
4561 	if (CFS_ISFS_NONSHARED(fscp) &&
4562 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4563 		error = cachefs_dir_rmentry(dcp, nm);
4564 		if (error) {
4565 			cachefs_nocache(dcp);
4566 			error = 0;
4567 		}
4568 	}
4569 	mutex_exit(&dcp->c_statelock);
4570 
4571 	/* fix up the file we deleted */
4572 	mutex_enter(&cp->c_statelock);
4573 	if (cp->c_attr.va_nlink == 1)
4574 		cp->c_flags |= CN_DESTROY;
4575 	else
4576 		cp->c_flags |= CN_UPDATED;
4577 
4578 	cp->c_attr.va_nlink--;
4579 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4580 	mutex_exit(&cp->c_statelock);
4581 
4582 out:
4583 	rw_exit(&dcp->c_rwlock);
4584 	return (error);
4585 }
4586 
4587 /*
4588  * cachefs_remove_backfs_nfsv4
4589  *
4590  * Call NFSv4 back filesystem to handle the remove (cachefs
4591  * pass-through support for NFSv4).
4592  */
4593 int
4594 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4595 {
4596 	cnode_t *dcp = VTOC(dvp);
4597 	cnode_t *cp = VTOC(vp);
4598 	vnode_t *dbackvp;
4599 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4600 	int error = 0;
4601 
4602 	/*
4603 	 * For NFSv4 pass-through to work, only connected operation
4604 	 * is supported, the cnode backvp must exist, and cachefs
4605 	 * optional (eg., disconnectable) flags are turned off. Assert
4606 	 * these conditions for the getattr operation.
4607 	 */
4608 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4609 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4610 
4611 	/* Should hold the directory readwrite lock to update directory */
4612 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4613 
4614 	/*
4615 	 * Update attributes for directory. Note that
4616 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4617 	 * held, so grab it before calling the routine.
4618 	 */
4619 	mutex_enter(&dcp->c_statelock);
4620 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4621 	mutex_exit(&dcp->c_statelock);
4622 	if (error)
4623 		goto out;
4624 
4625 	/*
4626 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4627 	 * asserts for c_statelock being held, so grab it before
4628 	 * calling the routine.
4629 	 */
4630 	mutex_enter(&cp->c_statelock);
4631 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4632 	if (error) {
4633 		mutex_exit(&cp->c_statelock);
4634 		goto out;
4635 	}
4636 
4637 	/*
4638 	 * Drop the backvp so nfs if the link count is 1 so that
4639 	 * nfs does not do rename. Ensure that we will destroy the cnode
4640 	 * since this cnode no longer contains the backvp. Note that we
4641 	 * maintain lock on this cnode to prevent change till the remove
4642 	 * completes, otherwise other operations will encounter an ESTALE
4643 	 * if they try to use the cnode with CN_DESTROY set (see
4644 	 * cachefs_get_backvp()), or change the state of the cnode
4645 	 * while we're removing it.
4646 	 */
4647 	if (cp->c_attr.va_nlink == 1) {
4648 		/*
4649 		 * The unldvp information is created for the case
4650 		 * when there is more than one reference on the
4651 		 * vnode when a remove operation is called. If the
4652 		 * remove itself was holding a reference to the
4653 		 * vnode, then a subsequent remove will remove the
4654 		 * backvp, so we need to get rid of the unldvp
4655 		 * before removing the backvp. An alternate would
4656 		 * be to simply ignore the remove and let the
4657 		 * inactivation routine do the deletion of the
4658 		 * unldvp.
4659 		 */
4660 		if (cp->c_unldvp) {
4661 			VN_RELE(cp->c_unldvp);
4662 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4663 			crfree(cp->c_unlcred);
4664 			cp->c_unldvp = NULL;
4665 			cp->c_unlcred = NULL;
4666 		}
4667 		cp->c_flags |= CN_DESTROY;
4668 		cp->c_attr.va_nlink = 0;
4669 		VN_RELE(cp->c_backvp);
4670 		cp->c_backvp = NULL;
4671 	}
4672 
4673 	/* perform the remove on back fs after extracting directory backvp */
4674 	mutex_enter(&dcp->c_statelock);
4675 	dbackvp = dcp->c_backvp;
4676 	mutex_exit(&dcp->c_statelock);
4677 
4678 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4679 	    ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4680 	    dcp, dbackvp, nm));
4681 	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4682 	if (error) {
4683 		mutex_exit(&cp->c_statelock);
4684 		goto out;
4685 	}
4686 
4687 	/* fix up the file we deleted, if not destroying the cnode */
4688 	if ((cp->c_flags & CN_DESTROY) == 0) {
4689 		cp->c_attr.va_nlink--;
4690 		cp->c_flags |= CN_UPDATED;
4691 	}
4692 
4693 	mutex_exit(&cp->c_statelock);
4694 
4695 out:
4696 	return (error);
4697 }
4698 
4699 int
4700 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4701     vnode_t *vp)
4702 {
4703 	cnode_t *dcp = VTOC(dvp);
4704 	cnode_t *cp = VTOC(vp);
4705 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4706 	int error = 0;
4707 	off_t commit = 0;
4708 	timestruc_t current_time;
4709 
4710 	if (CFS_ISFS_WRITE_AROUND(fscp))
4711 		return (ETIMEDOUT);
4712 
4713 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4714 		return (ETIMEDOUT);
4715 
4716 	/*
4717 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4718 	 * activity on the directory.
4719 	 */
4720 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4721 
4722 	/* dir must be populated */
4723 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4724 		error = ETIMEDOUT;
4725 		goto out;
4726 	}
4727 
4728 	mutex_enter(&dcp->c_statelock);
4729 	mutex_enter(&cp->c_statelock);
4730 
4731 	error = cachefs_stickyrmchk(dcp, cp, cr);
4732 
4733 	mutex_exit(&cp->c_statelock);
4734 	mutex_exit(&dcp->c_statelock);
4735 	if (error)
4736 		goto out;
4737 
4738 	/* purge dnlc of this entry so can get accurate vnode count */
4739 	dnlc_purge_vp(vp);
4740 
4741 	/*
4742 	 * If the cnode is active, make a link to the file
4743 	 * so operations on the file will continue.
4744 	 */
4745 	if ((vp->v_type != VDIR) &&
4746 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4747 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4748 		if (error)
4749 			goto out;
4750 	}
4751 
4752 	if (cp->c_attr.va_nlink > 1) {
4753 		mutex_enter(&cp->c_statelock);
4754 		if (cachefs_modified_alloc(cp)) {
4755 			mutex_exit(&cp->c_statelock);
4756 			error = ENOSPC;
4757 			goto out;
4758 		}
4759 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4760 			error = cachefs_dlog_cidmap(fscp);
4761 			if (error) {
4762 				mutex_exit(&cp->c_statelock);
4763 				error = ENOSPC;
4764 				goto out;
4765 			}
4766 			cp->c_metadata.md_flags |= MD_MAPPING;
4767 			cp->c_flags |= CN_UPDATED;
4768 		}
4769 		mutex_exit(&cp->c_statelock);
4770 	}
4771 
4772 	/* log the remove */
4773 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4774 	if (commit == 0) {
4775 		error = ENOSPC;
4776 		goto out;
4777 	}
4778 
4779 	/* remove the file from the dir */
4780 	mutex_enter(&dcp->c_statelock);
4781 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4782 		mutex_exit(&dcp->c_statelock);
4783 		error = ETIMEDOUT;
4784 		goto out;
4785 
4786 	}
4787 	cachefs_modified(dcp);
4788 	error = cachefs_dir_rmentry(dcp, nm);
4789 	if (error) {
4790 		mutex_exit(&dcp->c_statelock);
4791 		if (error == ENOTDIR)
4792 			error = ETIMEDOUT;
4793 		goto out;
4794 	}
4795 
4796 	/* update parent dir times */
4797 	gethrestime(&current_time);
4798 	dcp->c_metadata.md_localctime = current_time;
4799 	dcp->c_metadata.md_localmtime = current_time;
4800 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4801 	dcp->c_flags |= CN_UPDATED;
4802 	mutex_exit(&dcp->c_statelock);
4803 
4804 	/* adjust file we are deleting */
4805 	mutex_enter(&cp->c_statelock);
4806 	cp->c_attr.va_nlink--;
4807 	cp->c_metadata.md_localctime = current_time;
4808 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4809 	if (cp->c_attr.va_nlink == 0) {
4810 		cp->c_flags |= CN_DESTROY;
4811 	} else {
4812 		cp->c_flags |= CN_UPDATED;
4813 	}
4814 	mutex_exit(&cp->c_statelock);
4815 
4816 out:
4817 	if (commit) {
4818 		/* commit the log entry */
4819 		if (cachefs_dlog_commit(fscp, commit, error)) {
4820 			/*EMPTY*/
4821 			/* XXX bob: fix on panic */
4822 		}
4823 	}
4824 
4825 	rw_exit(&dcp->c_rwlock);
4826 	return (error);
4827 }
4828 
4829 /*ARGSUSED*/
4830 static int
4831 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4832     caller_context_t *ct, int flags)
4833 {
4834 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4835 	cnode_t *tdcp = VTOC(tdvp);
4836 	struct vnode *realvp;
4837 	int error = 0;
4838 	int held = 0;
4839 	int connected = 0;
4840 
4841 #ifdef CFSDEBUG
4842 	CFS_DEBUG(CFSDEBUG_VOPS)
4843 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4844 		    (void *)fvp, (void *)tdvp, tnm);
4845 #endif
4846 
4847 	if (getzoneid() != GLOBAL_ZONEID) {
4848 		error = EPERM;
4849 		goto out;
4850 	}
4851 
4852 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4853 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4854 
4855 	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4856 		fvp = realvp;
4857 	}
4858 
4859 	/*
4860 	 * Cachefs only provides pass-through support for NFSv4,
4861 	 * and all vnode operations are passed through to the
4862 	 * back file system. For NFSv4 pass-through to work, only
4863 	 * connected operation is supported, the cnode backvp must
4864 	 * exist, and cachefs optional (eg., disconnectable) flags
4865 	 * are turned off. Assert these conditions to ensure that
4866 	 * the backfilesystem is called for the link operation.
4867 	 */
4868 
4869 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4870 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4871 
4872 	for (;;) {
4873 		/* get (or renew) access to the file system */
4874 		if (held) {
4875 			/* Won't loop with NFSv4 connected behavior */
4876 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4877 			rw_exit(&tdcp->c_rwlock);
4878 			cachefs_cd_release(fscp);
4879 			held = 0;
4880 		}
4881 		error = cachefs_cd_access(fscp, connected, 1);
4882 		if (error)
4883 			break;
4884 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4885 		held = 1;
4886 
4887 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4888 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4889 			if (CFS_TIMEOUT(fscp, error)) {
4890 				rw_exit(&tdcp->c_rwlock);
4891 				cachefs_cd_release(fscp);
4892 				held = 0;
4893 				cachefs_cd_timedout(fscp);
4894 				connected = 0;
4895 				continue;
4896 			}
4897 		} else {
4898 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4899 			    cr);
4900 			if (CFS_TIMEOUT(fscp, error)) {
4901 				connected = 1;
4902 				continue;
4903 			}
4904 		}
4905 		break;
4906 	}
4907 
4908 	if (held) {
4909 		rw_exit(&tdcp->c_rwlock);
4910 		cachefs_cd_release(fscp);
4911 	}
4912 
4913 #ifdef CFS_CD_DEBUG
4914 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4915 #endif
4916 out:
4917 #ifdef CFSDEBUG
4918 	CFS_DEBUG(CFSDEBUG_VOPS)
4919 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4920 		    (void *)fvp, (void *)tdvp, tnm);
4921 #endif
4922 	return (error);
4923 }
4924 
4925 static int
4926 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4927 {
4928 	cnode_t *tdcp = VTOC(tdvp);
4929 	cnode_t *fcp = VTOC(fvp);
4930 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4931 	int error = 0;
4932 	vnode_t *backvp = NULL;
4933 
4934 	if (tdcp != fcp) {
4935 		mutex_enter(&fcp->c_statelock);
4936 
4937 		if (fcp->c_backvp == NULL) {
4938 			error = cachefs_getbackvp(fscp, fcp);
4939 			if (error) {
4940 				mutex_exit(&fcp->c_statelock);
4941 				goto out;
4942 			}
4943 		}
4944 
4945 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4946 		if (error) {
4947 			mutex_exit(&fcp->c_statelock);
4948 			goto out;
4949 		}
4950 		backvp = fcp->c_backvp;
4951 		VN_HOLD(backvp);
4952 		mutex_exit(&fcp->c_statelock);
4953 	}
4954 
4955 	mutex_enter(&tdcp->c_statelock);
4956 
4957 	/* get backvp of target directory */
4958 	if (tdcp->c_backvp == NULL) {
4959 		error = cachefs_getbackvp(fscp, tdcp);
4960 		if (error) {
4961 			mutex_exit(&tdcp->c_statelock);
4962 			goto out;
4963 		}
4964 	}
4965 
4966 	/* consistency check target directory */
4967 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4968 	if (error) {
4969 		mutex_exit(&tdcp->c_statelock);
4970 		goto out;
4971 	}
4972 	if (backvp == NULL) {
4973 		backvp = tdcp->c_backvp;
4974 		VN_HOLD(backvp);
4975 	}
4976 
4977 	/* perform the link on the back fs */
4978 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4979 	    ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4980 	    "name %s\n", tdcp, tdcp->c_backvp, tnm));
4981 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4982 	if (error) {
4983 		mutex_exit(&tdcp->c_statelock);
4984 		goto out;
4985 	}
4986 
4987 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4988 
4989 	/* if the dir is populated, add the new link */
4990 	if (CFS_ISFS_NONSHARED(fscp) &&
4991 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4992 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4993 		    &fcp->c_id, SM_ASYNC);
4994 		if (error) {
4995 			cachefs_nocache(tdcp);
4996 			error = 0;
4997 		}
4998 	}
4999 	mutex_exit(&tdcp->c_statelock);
5000 
5001 	/* get the new link count on the file */
5002 	mutex_enter(&fcp->c_statelock);
5003 	fcp->c_flags |= CN_UPDATED;
5004 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
5005 	if (fcp->c_backvp == NULL) {
5006 		error = cachefs_getbackvp(fscp, fcp);
5007 		if (error) {
5008 			mutex_exit(&fcp->c_statelock);
5009 			goto out;
5010 		}
5011 	}
5012 
5013 	/* XXX bob: given what modify_cobject does this seems unnecessary */
5014 	fcp->c_attr.va_mask = AT_ALL;
5015 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5016 	mutex_exit(&fcp->c_statelock);
5017 out:
5018 	if (backvp)
5019 		VN_RELE(backvp);
5020 
5021 	return (error);
5022 }
5023 
5024 static int
5025 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5026     cred_t *cr)
5027 {
5028 	cnode_t *tdcp = VTOC(tdvp);
5029 	cnode_t *fcp = VTOC(fvp);
5030 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5031 	int error = 0;
5032 	timestruc_t current_time;
5033 	off_t commit = 0;
5034 
5035 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5036 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5037 		return (EPERM);
5038 
5039 	if (CFS_ISFS_WRITE_AROUND(fscp))
5040 		return (ETIMEDOUT);
5041 
5042 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5043 		return (ETIMEDOUT);
5044 
5045 	mutex_enter(&tdcp->c_statelock);
5046 
5047 	/* check permissions */
5048 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5049 		mutex_exit(&tdcp->c_statelock);
5050 		goto out;
5051 	}
5052 
5053 	/* the directory front file must be populated */
5054 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5055 		error = ETIMEDOUT;
5056 		mutex_exit(&tdcp->c_statelock);
5057 		goto out;
5058 	}
5059 
5060 	/* make sure tnm does not already exist in the directory */
5061 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5062 	if (error == ENOTDIR) {
5063 		error = ETIMEDOUT;
5064 		mutex_exit(&tdcp->c_statelock);
5065 		goto out;
5066 	}
5067 	if (error != ENOENT) {
5068 		error = EEXIST;
5069 		mutex_exit(&tdcp->c_statelock);
5070 		goto out;
5071 	}
5072 
5073 	mutex_enter(&fcp->c_statelock);
5074 
5075 	/* create a mapping for the file if necessary */
5076 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5077 		error = cachefs_dlog_cidmap(fscp);
5078 		if (error) {
5079 			mutex_exit(&fcp->c_statelock);
5080 			mutex_exit(&tdcp->c_statelock);
5081 			error = ENOSPC;
5082 			goto out;
5083 		}
5084 		fcp->c_metadata.md_flags |= MD_MAPPING;
5085 		fcp->c_flags |= CN_UPDATED;
5086 	}
5087 
5088 	/* mark file as modified */
5089 	if (cachefs_modified_alloc(fcp)) {
5090 		mutex_exit(&fcp->c_statelock);
5091 		mutex_exit(&tdcp->c_statelock);
5092 		error = ENOSPC;
5093 		goto out;
5094 	}
5095 	mutex_exit(&fcp->c_statelock);
5096 
5097 	/* log the operation */
5098 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5099 	if (commit == 0) {
5100 		mutex_exit(&tdcp->c_statelock);
5101 		error = ENOSPC;
5102 		goto out;
5103 	}
5104 
5105 	gethrestime(&current_time);
5106 
5107 	/* make the new link */
5108 	cachefs_modified(tdcp);
5109 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5110 	    &fcp->c_id, SM_ASYNC);
5111 	if (error) {
5112 		error = 0;
5113 		mutex_exit(&tdcp->c_statelock);
5114 		goto out;
5115 	}
5116 
5117 	/* Update mtime/ctime of parent dir */
5118 	tdcp->c_metadata.md_localmtime = current_time;
5119 	tdcp->c_metadata.md_localctime = current_time;
5120 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5121 	tdcp->c_flags |= CN_UPDATED;
5122 	mutex_exit(&tdcp->c_statelock);
5123 
5124 	/* update the file we linked to */
5125 	mutex_enter(&fcp->c_statelock);
5126 	fcp->c_attr.va_nlink++;
5127 	fcp->c_metadata.md_localctime = current_time;
5128 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5129 	fcp->c_flags |= CN_UPDATED;
5130 	mutex_exit(&fcp->c_statelock);
5131 
5132 out:
5133 	if (commit) {
5134 		/* commit the log entry */
5135 		if (cachefs_dlog_commit(fscp, commit, error)) {
5136 			/*EMPTY*/
5137 			/* XXX bob: fix on panic */
5138 		}
5139 	}
5140 
5141 	return (error);
5142 }
5143 
5144 /*
5145  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5146  * cnodes atomically.
5147  */
5148 kmutex_t cachefs_rename_lock;
5149 
5150 /*ARGSUSED*/
5151 static int
5152 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5153     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5154 {
5155 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5156 	cachefscache_t *cachep = fscp->fs_cache;
5157 	int error = 0;
5158 	int held = 0;
5159 	int connected = 0;
5160 	vnode_t *delvp = NULL;
5161 	vnode_t *tvp = NULL;
5162 	int vfslock = 0;
5163 	struct vnode *realvp;
5164 
5165 	if (getzoneid() != GLOBAL_ZONEID)
5166 		return (EPERM);
5167 
5168 	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5169 		ndvp = realvp;
5170 
5171 	/*
5172 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5173 	 * directory cnodes better indicate NOCACHE mode as well.
5174 	 */
5175 	ASSERT(
5176 	    (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5177 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5178 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5179 
5180 	/*
5181 	 * Cachefs only provides pass-through support for NFSv4,
5182 	 * and all vnode operations are passed through to the
5183 	 * back file system. For NFSv4 pass-through to work, only
5184 	 * connected operation is supported, the cnode backvp must
5185 	 * exist, and cachefs optional (eg., disconnectable) flags
5186 	 * are turned off. Assert these conditions to ensure that
5187 	 * the backfilesystem is called for the rename operation.
5188 	 */
5189 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5190 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5191 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5192 
5193 	for (;;) {
5194 		if (vfslock) {
5195 			vn_vfsunlock(delvp);
5196 			vfslock = 0;
5197 		}
5198 		if (delvp) {
5199 			VN_RELE(delvp);
5200 			delvp = NULL;
5201 		}
5202 
5203 		/* get (or renew) access to the file system */
5204 		if (held) {
5205 			/* Won't loop for NFSv4 connected support */
5206 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5207 			cachefs_cd_release(fscp);
5208 			held = 0;
5209 		}
5210 		error = cachefs_cd_access(fscp, connected, 1);
5211 		if (error)
5212 			break;
5213 		held = 1;
5214 
5215 		/* sanity check */
5216 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5217 			error = EINVAL;
5218 			break;
5219 		}
5220 
5221 		/* cannot rename from or to . or .. */
5222 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5223 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5224 			error = EINVAL;
5225 			break;
5226 		}
5227 
5228 		if (odvp != ndvp) {
5229 			/*
5230 			 * if moving a directory, its notion
5231 			 * of ".." will change
5232 			 */
5233 			error = cachefs_lookup_common(odvp, onm, &tvp,
5234 			    NULL, 0, NULL, cr);
5235 			if (error == 0) {
5236 				ASSERT(tvp != NULL);
5237 				if (tvp->v_type == VDIR) {
5238 					cnode_t *cp = VTOC(tvp);
5239 
5240 					dnlc_remove(tvp, "..");
5241 
5242 					mutex_enter(&cp->c_statelock);
5243 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5244 					mutex_exit(&cp->c_statelock);
5245 				}
5246 			} else {
5247 				tvp = NULL;
5248 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5249 					if (CFS_TIMEOUT(fscp, error)) {
5250 						cachefs_cd_release(fscp);
5251 						held = 0;
5252 						cachefs_cd_timedout(fscp);
5253 						connected = 0;
5254 						continue;
5255 					}
5256 				} else {
5257 					if (CFS_TIMEOUT(fscp, error)) {
5258 						connected = 1;
5259 						continue;
5260 					}
5261 				}
5262 				break;
5263 			}
5264 		}
5265 
5266 		/* get the cnode if file being deleted */
5267 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5268 		    NULL, cr);
5269 		if (error) {
5270 			delvp = NULL;
5271 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5272 				if (CFS_TIMEOUT(fscp, error)) {
5273 					cachefs_cd_release(fscp);
5274 					held = 0;
5275 					cachefs_cd_timedout(fscp);
5276 					connected = 0;
5277 					continue;
5278 				}
5279 			} else {
5280 				if (CFS_TIMEOUT(fscp, error)) {
5281 					connected = 1;
5282 					continue;
5283 				}
5284 			}
5285 			if (error != ENOENT)
5286 				break;
5287 		}
5288 
5289 		if (delvp && delvp->v_type == VDIR) {
5290 			/* see ufs_dirremove for why this is done, mount race */
5291 			if (vn_vfswlock(delvp)) {
5292 				error = EBUSY;
5293 				break;
5294 			}
5295 			vfslock = 1;
5296 			if (vn_mountedvfs(delvp) != NULL) {
5297 				error = EBUSY;
5298 				break;
5299 			}
5300 		}
5301 
5302 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5303 			error = cachefs_rename_connected(odvp, onm,
5304 			    ndvp, nnm, cr, delvp);
5305 			if (CFS_TIMEOUT(fscp, error)) {
5306 				cachefs_cd_release(fscp);
5307 				held = 0;
5308 				cachefs_cd_timedout(fscp);
5309 				connected = 0;
5310 				continue;
5311 			}
5312 		} else {
5313 			error = cachefs_rename_disconnected(odvp, onm,
5314 			    ndvp, nnm, cr, delvp);
5315 			if (CFS_TIMEOUT(fscp, error)) {
5316 				connected = 1;
5317 				continue;
5318 			}
5319 		}
5320 		break;
5321 	}
5322 
5323 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5324 		struct fid gone;
5325 
5326 		bzero(&gone, sizeof (gone));
5327 		gone.fid_len = MAXFIDSZ;
5328 		if (delvp != NULL)
5329 			(void) VOP_FID(delvp, &gone, ct);
5330 
5331 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5332 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5333 	}
5334 
5335 	if (held)
5336 		cachefs_cd_release(fscp);
5337 
5338 	if (vfslock)
5339 		vn_vfsunlock(delvp);
5340 
5341 	if (delvp)
5342 		VN_RELE(delvp);
5343 	if (tvp)
5344 		VN_RELE(tvp);
5345 
5346 #ifdef CFS_CD_DEBUG
5347 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5348 #endif
5349 	return (error);
5350 }
5351 
5352 static int
5353 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5354     char *nnm, cred_t *cr, vnode_t *delvp)
5355 {
5356 	cnode_t *odcp = VTOC(odvp);
5357 	cnode_t *ndcp = VTOC(ndvp);
5358 	vnode_t *revp = NULL;
5359 	cnode_t *recp;
5360 	cnode_t *delcp;
5361 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5362 	int error = 0;
5363 	struct fid cookie;
5364 	struct fid *cookiep;
5365 	cfs_cid_t cid;
5366 	int gotdirent;
5367 
5368 	/* find the file we are renaming */
5369 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5370 	if (error)
5371 		return (error);
5372 	recp = VTOC(revp);
5373 
5374 	/*
5375 	 * To avoid deadlock, we acquire this global rename lock before
5376 	 * we try to get the locks for the source and target directories.
5377 	 */
5378 	mutex_enter(&cachefs_rename_lock);
5379 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5380 	if (odcp != ndcp) {
5381 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5382 	}
5383 	mutex_exit(&cachefs_rename_lock);
5384 
5385 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5386 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5387 
5388 	mutex_enter(&odcp->c_statelock);
5389 	if (odcp->c_backvp == NULL) {
5390 		error = cachefs_getbackvp(fscp, odcp);
5391 		if (error) {
5392 			mutex_exit(&odcp->c_statelock);
5393 			goto out;
5394 		}
5395 	}
5396 
5397 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5398 	if (error) {
5399 		mutex_exit(&odcp->c_statelock);
5400 		goto out;
5401 	}
5402 	mutex_exit(&odcp->c_statelock);
5403 
5404 	if (odcp != ndcp) {
5405 		mutex_enter(&ndcp->c_statelock);
5406 		if (ndcp->c_backvp == NULL) {
5407 			error = cachefs_getbackvp(fscp, ndcp);
5408 			if (error) {
5409 				mutex_exit(&ndcp->c_statelock);
5410 				goto out;
5411 			}
5412 		}
5413 
5414 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5415 		if (error) {
5416 			mutex_exit(&ndcp->c_statelock);
5417 			goto out;
5418 		}
5419 		mutex_exit(&ndcp->c_statelock);
5420 	}
5421 
5422 	/* if a file is being deleted because of this rename */
5423 	if (delvp) {
5424 		/* if src and dest file are same */
5425 		if (delvp == revp) {
5426 			error = 0;
5427 			goto out;
5428 		}
5429 
5430 		/*
5431 		 * If the cnode is active, make a link to the file
5432 		 * so operations on the file will continue.
5433 		 */
5434 		dnlc_purge_vp(delvp);
5435 		delcp = VTOC(delvp);
5436 		if ((delvp->v_type != VDIR) &&
5437 		    !((delvp->v_count == 1) ||
5438 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5439 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5440 			if (error)
5441 				goto out;
5442 		}
5443 	}
5444 
5445 	/* do the rename on the back fs */
5446 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5447 	    ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5448 	    " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5449 	    odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5450 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5451 	    0);
5452 	if (error)
5453 		goto out;
5454 
5455 	/* purge mappings to file in the old directory */
5456 	dnlc_purge_vp(odvp);
5457 
5458 	/* purge mappings in the new dir if we deleted a file */
5459 	if (delvp && (odvp != ndvp))
5460 		dnlc_purge_vp(ndvp);
5461 
5462 	/* update the file we just deleted */
5463 	if (delvp) {
5464 		mutex_enter(&delcp->c_statelock);
5465 		if (delcp->c_attr.va_nlink == 1) {
5466 			delcp->c_flags |= CN_DESTROY;
5467 		} else {
5468 			delcp->c_flags |= CN_UPDATED;
5469 		}
5470 		delcp->c_attr.va_nlink--;
5471 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5472 		mutex_exit(&delcp->c_statelock);
5473 	}
5474 
5475 	/* find the entry in the old directory */
5476 	mutex_enter(&odcp->c_statelock);
5477 	gotdirent = 0;
5478 	cookiep = NULL;
5479 	if (CFS_ISFS_NONSHARED(fscp) &&
5480 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5481 		error = cachefs_dir_look(odcp, onm, &cookie,
5482 		    NULL, NULL, &cid);
5483 		if (error == 0 || error == EINVAL) {
5484 			gotdirent = 1;
5485 			if (error == 0)
5486 				cookiep = &cookie;
5487 		} else {
5488 			cachefs_inval_object(odcp);
5489 		}
5490 	}
5491 	error = 0;
5492 
5493 	/* remove the directory entry from the old directory */
5494 	if (gotdirent) {
5495 		error = cachefs_dir_rmentry(odcp, onm);
5496 		if (error) {
5497 			cachefs_nocache(odcp);
5498 			error = 0;
5499 		}
5500 	}
5501 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5502 	mutex_exit(&odcp->c_statelock);
5503 
5504 	/* install the directory entry in the new directory */
5505 	mutex_enter(&ndcp->c_statelock);
5506 	if (CFS_ISFS_NONSHARED(fscp) &&
5507 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5508 		error = 1;
5509 		if (gotdirent) {
5510 			ASSERT(cid.cid_fileno != 0);
5511 			error = 0;
5512 			if (delvp) {
5513 				error = cachefs_dir_rmentry(ndcp, nnm);
5514 			}
5515 			if (error == 0) {
5516 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5517 				    &cid, SM_ASYNC);
5518 			}
5519 		}
5520 		if (error) {
5521 			cachefs_nocache(ndcp);
5522 			error = 0;
5523 		}
5524 	}
5525 	if (odcp != ndcp)
5526 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5527 	mutex_exit(&ndcp->c_statelock);
5528 
5529 	/* ctime of renamed file has changed */
5530 	mutex_enter(&recp->c_statelock);
5531 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5532 	mutex_exit(&recp->c_statelock);
5533 
5534 out:
5535 	if (odcp != ndcp)
5536 		rw_exit(&ndcp->c_rwlock);
5537 	rw_exit(&odcp->c_rwlock);
5538 
5539 	VN_RELE(revp);
5540 
5541 	return (error);
5542 }
5543 
5544 static int
5545 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5546     char *nnm, cred_t *cr, vnode_t *delvp)
5547 {
5548 	cnode_t *odcp = VTOC(odvp);
5549 	cnode_t *ndcp = VTOC(ndvp);
5550 	cnode_t *delcp = NULL;
5551 	vnode_t *revp = NULL;
5552 	cnode_t *recp;
5553 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5554 	int error = 0;
5555 	struct fid cookie;
5556 	struct fid *cookiep;
5557 	cfs_cid_t cid;
5558 	off_t commit = 0;
5559 	timestruc_t current_time;
5560 
5561 	if (CFS_ISFS_WRITE_AROUND(fscp))
5562 		return (ETIMEDOUT);
5563 
5564 	/* find the file we are renaming */
5565 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5566 	if (error)
5567 		return (error);
5568 	recp = VTOC(revp);
5569 
5570 	/*
5571 	 * To avoid deadlock, we acquire this global rename lock before
5572 	 * we try to get the locks for the source and target directories.
5573 	 */
5574 	mutex_enter(&cachefs_rename_lock);
5575 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5576 	if (odcp != ndcp) {
5577 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5578 	}
5579 	mutex_exit(&cachefs_rename_lock);
5580 
5581 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5582 		error = ETIMEDOUT;
5583 		goto out;
5584 	}
5585 
5586 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5587 		mutex_enter(&recp->c_statelock);
5588 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5589 			error = cachefs_dlog_cidmap(fscp);
5590 			if (error) {
5591 				mutex_exit(&recp->c_statelock);
5592 				error = ENOSPC;
5593 				goto out;
5594 			}
5595 			recp->c_metadata.md_flags |= MD_MAPPING;
5596 			recp->c_flags |= CN_UPDATED;
5597 		}
5598 		mutex_exit(&recp->c_statelock);
5599 	}
5600 
5601 	/* check permissions */
5602 	/* XXX clean up this mutex junk sometime */
5603 	mutex_enter(&odcp->c_statelock);
5604 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5605 	mutex_exit(&odcp->c_statelock);
5606 	if (error != 0)
5607 		goto out;
5608 	mutex_enter(&ndcp->c_statelock);
5609 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5610 	mutex_exit(&ndcp->c_statelock);
5611 	if (error != 0)
5612 		goto out;
5613 	mutex_enter(&odcp->c_statelock);
5614 	error = cachefs_stickyrmchk(odcp, recp, cr);
5615 	mutex_exit(&odcp->c_statelock);
5616 	if (error != 0)
5617 		goto out;
5618 
5619 	/* dirs must be populated */
5620 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5621 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5622 		error = ETIMEDOUT;
5623 		goto out;
5624 	}
5625 
5626 	/* for now do not allow moving dirs because could cause cycles */
5627 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5628 	    (revp == odvp)) {
5629 		error = ETIMEDOUT;
5630 		goto out;
5631 	}
5632 
5633 	/* if a file is being deleted because of this rename */
5634 	if (delvp) {
5635 		delcp = VTOC(delvp);
5636 
5637 		/* if src and dest file are the same */
5638 		if (delvp == revp) {
5639 			error = 0;
5640 			goto out;
5641 		}
5642 
5643 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5644 			error = ETIMEDOUT;
5645 			goto out;
5646 		}
5647 
5648 		/* if there are hard links to this file */
5649 		if (delcp->c_attr.va_nlink > 1) {
5650 			mutex_enter(&delcp->c_statelock);
5651 			if (cachefs_modified_alloc(delcp)) {
5652 				mutex_exit(&delcp->c_statelock);
5653 				error = ENOSPC;
5654 				goto out;
5655 			}
5656 
5657 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5658 				error = cachefs_dlog_cidmap(fscp);
5659 				if (error) {
5660 					mutex_exit(&delcp->c_statelock);
5661 					error = ENOSPC;
5662 					goto out;
5663 				}
5664 				delcp->c_metadata.md_flags |= MD_MAPPING;
5665 				delcp->c_flags |= CN_UPDATED;
5666 			}
5667 			mutex_exit(&delcp->c_statelock);
5668 		}
5669 
5670 		/* make sure we can delete file */
5671 		mutex_enter(&ndcp->c_statelock);
5672 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5673 		mutex_exit(&ndcp->c_statelock);
5674 		if (error != 0)
5675 			goto out;
5676 
5677 		/*
5678 		 * If the cnode is active, make a link to the file
5679 		 * so operations on the file will continue.
5680 		 */
5681 		dnlc_purge_vp(delvp);
5682 		if ((delvp->v_type != VDIR) &&
5683 		    !((delvp->v_count == 1) ||
5684 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5685 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5686 			if (error)
5687 				goto out;
5688 		}
5689 	}
5690 
5691 	/* purge mappings to file in the old directory */
5692 	dnlc_purge_vp(odvp);
5693 
5694 	/* purge mappings in the new dir if we deleted a file */
5695 	if (delvp && (odvp != ndvp))
5696 		dnlc_purge_vp(ndvp);
5697 
5698 	/* find the entry in the old directory */
5699 	mutex_enter(&odcp->c_statelock);
5700 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5701 		mutex_exit(&odcp->c_statelock);
5702 		error = ETIMEDOUT;
5703 		goto out;
5704 	}
5705 	cookiep = NULL;
5706 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5707 	if (error == 0 || error == EINVAL) {
5708 		if (error == 0)
5709 			cookiep = &cookie;
5710 	} else {
5711 		mutex_exit(&odcp->c_statelock);
5712 		if (error == ENOTDIR)
5713 			error = ETIMEDOUT;
5714 		goto out;
5715 	}
5716 	error = 0;
5717 
5718 	/* write the log entry */
5719 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5720 	    recp, delcp);
5721 	if (commit == 0) {
5722 		mutex_exit(&odcp->c_statelock);
5723 		error = ENOSPC;
5724 		goto out;
5725 	}
5726 
5727 	/* remove the directory entry from the old directory */
5728 	cachefs_modified(odcp);
5729 	error = cachefs_dir_rmentry(odcp, onm);
5730 	if (error) {
5731 		mutex_exit(&odcp->c_statelock);
5732 		if (error == ENOTDIR)
5733 			error = ETIMEDOUT;
5734 		goto out;
5735 	}
5736 	mutex_exit(&odcp->c_statelock);
5737 
5738 	/* install the directory entry in the new directory */
5739 	mutex_enter(&ndcp->c_statelock);
5740 	error = ENOTDIR;
5741 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5742 		ASSERT(cid.cid_fileno != 0);
5743 		cachefs_modified(ndcp);
5744 		error = 0;
5745 		if (delvp) {
5746 			error = cachefs_dir_rmentry(ndcp, nnm);
5747 		}
5748 		if (error == 0) {
5749 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5750 			    &cid, SM_ASYNC);
5751 		}
5752 	}
5753 	if (error) {
5754 		cachefs_nocache(ndcp);
5755 		mutex_exit(&ndcp->c_statelock);
5756 		mutex_enter(&odcp->c_statelock);
5757 		cachefs_nocache(odcp);
5758 		mutex_exit(&odcp->c_statelock);
5759 		if (error == ENOTDIR)
5760 			error = ETIMEDOUT;
5761 		goto out;
5762 	}
5763 	mutex_exit(&ndcp->c_statelock);
5764 
5765 	gethrestime(&current_time);
5766 
5767 	/* update the file we just deleted */
5768 	if (delvp) {
5769 		mutex_enter(&delcp->c_statelock);
5770 		delcp->c_attr.va_nlink--;
5771 		delcp->c_metadata.md_localctime = current_time;
5772 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5773 		if (delcp->c_attr.va_nlink == 0) {
5774 			delcp->c_flags |= CN_DESTROY;
5775 		} else {
5776 			delcp->c_flags |= CN_UPDATED;
5777 		}
5778 		mutex_exit(&delcp->c_statelock);
5779 	}
5780 
5781 	/* update the file we renamed */
5782 	mutex_enter(&recp->c_statelock);
5783 	recp->c_metadata.md_localctime = current_time;
5784 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5785 	recp->c_flags |= CN_UPDATED;
5786 	mutex_exit(&recp->c_statelock);
5787 
5788 	/* update the source directory */
5789 	mutex_enter(&odcp->c_statelock);
5790 	odcp->c_metadata.md_localctime = current_time;
5791 	odcp->c_metadata.md_localmtime = current_time;
5792 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5793 	odcp->c_flags |= CN_UPDATED;
5794 	mutex_exit(&odcp->c_statelock);
5795 
5796 	/* update the destination directory */
5797 	if (odcp != ndcp) {
5798 		mutex_enter(&ndcp->c_statelock);
5799 		ndcp->c_metadata.md_localctime = current_time;
5800 		ndcp->c_metadata.md_localmtime = current_time;
5801 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5802 		ndcp->c_flags |= CN_UPDATED;
5803 		mutex_exit(&ndcp->c_statelock);
5804 	}
5805 
5806 out:
5807 	if (commit) {
5808 		/* commit the log entry */
5809 		if (cachefs_dlog_commit(fscp, commit, error)) {
5810 			/*EMPTY*/
5811 			/* XXX bob: fix on panic */
5812 		}
5813 	}
5814 
5815 	if (odcp != ndcp)
5816 		rw_exit(&ndcp->c_rwlock);
5817 	rw_exit(&odcp->c_rwlock);
5818 
5819 	VN_RELE(revp);
5820 
5821 	return (error);
5822 }
5823 
5824 /*ARGSUSED*/
5825 static int
5826 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5827     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5828 {
5829 	cnode_t *dcp = VTOC(dvp);
5830 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5831 	cachefscache_t *cachep = fscp->fs_cache;
5832 	int error = 0;
5833 	int held = 0;
5834 	int connected = 0;
5835 
5836 #ifdef CFSDEBUG
5837 	CFS_DEBUG(CFSDEBUG_VOPS)
5838 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5839 #endif
5840 
5841 	if (getzoneid() != GLOBAL_ZONEID) {
5842 		error = EPERM;
5843 		goto out;
5844 	}
5845 
5846 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5847 		ASSERT(dcp->c_flags & CN_NOCACHE);
5848 
5849 	/*
5850 	 * Cachefs only provides pass-through support for NFSv4,
5851 	 * and all vnode operations are passed through to the
5852 	 * back file system. For NFSv4 pass-through to work, only
5853 	 * connected operation is supported, the cnode backvp must
5854 	 * exist, and cachefs optional (eg., disconnectable) flags
5855 	 * are turned off. Assert these conditions to ensure that
5856 	 * the backfilesystem is called for the mkdir operation.
5857 	 */
5858 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5859 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5860 
5861 	for (;;) {
5862 		/* get (or renew) access to the file system */
5863 		if (held) {
5864 			/* Won't loop with NFSv4 connected behavior */
5865 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5866 			rw_exit(&dcp->c_rwlock);
5867 			cachefs_cd_release(fscp);
5868 			held = 0;
5869 		}
5870 		error = cachefs_cd_access(fscp, connected, 1);
5871 		if (error)
5872 			break;
5873 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5874 		held = 1;
5875 
5876 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5877 			error = cachefs_mkdir_connected(dvp, nm, vap,
5878 			    vpp, cr);
5879 			if (CFS_TIMEOUT(fscp, error)) {
5880 				rw_exit(&dcp->c_rwlock);
5881 				cachefs_cd_release(fscp);
5882 				held = 0;
5883 				cachefs_cd_timedout(fscp);
5884 				connected = 0;
5885 				continue;
5886 			}
5887 		} else {
5888 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5889 			    vpp, cr);
5890 			if (CFS_TIMEOUT(fscp, error)) {
5891 				connected = 1;
5892 				continue;
5893 			}
5894 		}
5895 		break;
5896 	}
5897 
5898 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5899 		fid_t *fidp = NULL;
5900 		ino64_t fileno = 0;
5901 		cnode_t *cp = NULL;
5902 		if (error == 0)
5903 			cp = VTOC(*vpp);
5904 
5905 		if (cp != NULL) {
5906 			fidp = &cp->c_metadata.md_cookie;
5907 			fileno = cp->c_id.cid_fileno;
5908 		}
5909 
5910 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5911 		    fidp, fileno, crgetuid(cr));
5912 	}
5913 
5914 	if (held) {
5915 		rw_exit(&dcp->c_rwlock);
5916 		cachefs_cd_release(fscp);
5917 	}
5918 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5919 		(void) cachefs_pack(dvp, nm, cr);
5920 
5921 #ifdef CFS_CD_DEBUG
5922 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5923 #endif
5924 out:
5925 #ifdef CFSDEBUG
5926 	CFS_DEBUG(CFSDEBUG_VOPS)
5927 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5928 #endif
5929 	return (error);
5930 }
5931 
5932 static int
5933 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5934     vnode_t **vpp, cred_t *cr)
5935 {
5936 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5937 	struct vnode *vp = NULL;
5938 	int error = 0;
5939 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5940 	struct fid cookie;
5941 	struct vattr attr;
5942 	cfs_cid_t cid, dircid;
5943 	uint32_t valid_fid;
5944 
5945 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5946 		ASSERT(dcp->c_flags & CN_NOCACHE);
5947 
5948 	mutex_enter(&dcp->c_statelock);
5949 
5950 	/* get backvp of dir */
5951 	if (dcp->c_backvp == NULL) {
5952 		error = cachefs_getbackvp(fscp, dcp);
5953 		if (error) {
5954 			mutex_exit(&dcp->c_statelock);
5955 			goto out;
5956 		}
5957 	}
5958 
5959 	/* consistency check the directory */
5960 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5961 	if (error) {
5962 		mutex_exit(&dcp->c_statelock);
5963 		goto out;
5964 	}
5965 	dircid = dcp->c_id;
5966 
5967 	/* make the dir on the back fs */
5968 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5969 	    ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5970 	    "name %s\n", dcp, dcp->c_backvp, nm));
5971 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5972 	mutex_exit(&dcp->c_statelock);
5973 	if (error) {
5974 		goto out;
5975 	}
5976 
5977 	/* get the cookie and make the cnode */
5978 	attr.va_mask = AT_ALL;
5979 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5980 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5981 	if (error) {
5982 		goto out;
5983 	}
5984 	cid.cid_flags = 0;
5985 	cid.cid_fileno = attr.va_nodeid;
5986 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5987 	    &attr, vp, cr, 0, &newcp);
5988 	if (error) {
5989 		goto out;
5990 	}
5991 	ASSERT(CTOV(newcp)->v_type == VDIR);
5992 	*vpp = CTOV(newcp);
5993 
5994 	/* if the dir is populated, add the new entry */
5995 	mutex_enter(&dcp->c_statelock);
5996 	if (CFS_ISFS_NONSHARED(fscp) &&
5997 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5998 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5999 		    SM_ASYNC);
6000 		if (error) {
6001 			cachefs_nocache(dcp);
6002 			error = 0;
6003 		}
6004 	}
6005 	dcp->c_attr.va_nlink++;
6006 	dcp->c_flags |= CN_UPDATED;
6007 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6008 	mutex_exit(&dcp->c_statelock);
6009 
6010 	/* XXX bob: should we do a filldir here? or just add . and .. */
6011 	/* maybe should kick off an async filldir so caller does not wait */
6012 
6013 	/* put the entry in the dnlc */
6014 	if (cachefs_dnlc)
6015 		dnlc_enter(dvp, nm, *vpp);
6016 
6017 	/* save the fileno of the parent so can find the name */
6018 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6019 	    sizeof (cfs_cid_t)) != 0) {
6020 		mutex_enter(&newcp->c_statelock);
6021 		newcp->c_metadata.md_parent = dircid;
6022 		newcp->c_flags |= CN_UPDATED;
6023 		mutex_exit(&newcp->c_statelock);
6024 	}
6025 out:
6026 	if (vp)
6027 		VN_RELE(vp);
6028 
6029 	return (error);
6030 }
6031 
6032 static int
6033 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6034     vnode_t **vpp, cred_t *cr)
6035 {
6036 	cnode_t *dcp = VTOC(dvp);
6037 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6038 	int error;
6039 	cnode_t *newcp = NULL;
6040 	struct vattr va;
6041 	timestruc_t current_time;
6042 	off_t commit = 0;
6043 	char *s;
6044 	int namlen;
6045 
6046 	/* don't allow '/' characters in pathname component */
6047 	for (s = nm, namlen = 0; *s; s++, namlen++)
6048 		if (*s == '/')
6049 			return (EACCES);
6050 	if (namlen == 0)
6051 		return (EINVAL);
6052 
6053 	if (CFS_ISFS_WRITE_AROUND(fscp))
6054 		return (ETIMEDOUT);
6055 
6056 	mutex_enter(&dcp->c_statelock);
6057 
6058 	/* check permissions */
6059 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6060 		mutex_exit(&dcp->c_statelock);
6061 		goto out;
6062 	}
6063 
6064 	/* the directory front file must be populated */
6065 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6066 		error = ETIMEDOUT;
6067 		mutex_exit(&dcp->c_statelock);
6068 		goto out;
6069 	}
6070 
6071 	/* make sure nm does not already exist in the directory */
6072 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6073 	if (error == ENOTDIR) {
6074 		error = ETIMEDOUT;
6075 		mutex_exit(&dcp->c_statelock);
6076 		goto out;
6077 	}
6078 	if (error != ENOENT) {
6079 		error = EEXIST;
6080 		mutex_exit(&dcp->c_statelock);
6081 		goto out;
6082 	}
6083 
6084 	/* make up a reasonable set of attributes */
6085 	cachefs_attr_setup(vap, &va, dcp, cr);
6086 	va.va_type = VDIR;
6087 	va.va_mode |= S_IFDIR;
6088 	va.va_nlink = 2;
6089 
6090 	mutex_exit(&dcp->c_statelock);
6091 
6092 	/* create the cnode */
6093 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6094 	if (error)
6095 		goto out;
6096 
6097 	mutex_enter(&newcp->c_statelock);
6098 
6099 	error = cachefs_dlog_cidmap(fscp);
6100 	if (error) {
6101 		mutex_exit(&newcp->c_statelock);
6102 		goto out;
6103 	}
6104 
6105 	cachefs_creategid(dcp, newcp, vap, cr);
6106 	mutex_enter(&dcp->c_statelock);
6107 	cachefs_createacl(dcp, newcp);
6108 	mutex_exit(&dcp->c_statelock);
6109 	gethrestime(&current_time);
6110 	newcp->c_metadata.md_vattr.va_atime = current_time;
6111 	newcp->c_metadata.md_localctime = current_time;
6112 	newcp->c_metadata.md_localmtime = current_time;
6113 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6114 	    MD_LOCALCTIME;
6115 	newcp->c_flags |= CN_UPDATED;
6116 
6117 	/* make a front file for the new directory, add . and .. */
6118 	error = cachefs_dir_new(dcp, newcp);
6119 	if (error) {
6120 		mutex_exit(&newcp->c_statelock);
6121 		goto out;
6122 	}
6123 	cachefs_modified(newcp);
6124 
6125 	/*
6126 	 * write the metadata now rather than waiting until
6127 	 * inactive so that if there's no space we can let
6128 	 * the caller know.
6129 	 */
6130 	ASSERT(newcp->c_frontvp);
6131 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6132 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6133 	error = filegrp_write_metadata(newcp->c_filegrp,
6134 	    &newcp->c_id, &newcp->c_metadata);
6135 	if (error) {
6136 		mutex_exit(&newcp->c_statelock);
6137 		goto out;
6138 	}
6139 	mutex_exit(&newcp->c_statelock);
6140 
6141 	/* log the operation */
6142 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6143 	if (commit == 0) {
6144 		error = ENOSPC;
6145 		goto out;
6146 	}
6147 
6148 	mutex_enter(&dcp->c_statelock);
6149 
6150 	/* make sure directory is still populated */
6151 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6152 		mutex_exit(&dcp->c_statelock);
6153 		error = ETIMEDOUT;
6154 		goto out;
6155 	}
6156 	cachefs_modified(dcp);
6157 
6158 	/* enter the new file in the directory */
6159 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6160 	    &newcp->c_id, SM_ASYNC);
6161 	if (error) {
6162 		mutex_exit(&dcp->c_statelock);
6163 		goto out;
6164 	}
6165 
6166 	/* update parent dir times */
6167 	dcp->c_metadata.md_localctime = current_time;
6168 	dcp->c_metadata.md_localmtime = current_time;
6169 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6170 	dcp->c_attr.va_nlink++;
6171 	dcp->c_flags |= CN_UPDATED;
6172 	mutex_exit(&dcp->c_statelock);
6173 
6174 out:
6175 	if (commit) {
6176 		/* commit the log entry */
6177 		if (cachefs_dlog_commit(fscp, commit, error)) {
6178 			/*EMPTY*/
6179 			/* XXX bob: fix on panic */
6180 		}
6181 	}
6182 	if (error) {
6183 		if (newcp) {
6184 			mutex_enter(&newcp->c_statelock);
6185 			newcp->c_flags |= CN_DESTROY;
6186 			mutex_exit(&newcp->c_statelock);
6187 			VN_RELE(CTOV(newcp));
6188 		}
6189 	} else {
6190 		*vpp = CTOV(newcp);
6191 	}
6192 	return (error);
6193 }
6194 
6195 /*ARGSUSED*/
6196 static int
6197 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6198     caller_context_t *ct, int flags)
6199 {
6200 	cnode_t *dcp = VTOC(dvp);
6201 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6202 	cachefscache_t *cachep = fscp->fs_cache;
6203 	int error = 0;
6204 	int held = 0;
6205 	int connected = 0;
6206 	size_t namlen;
6207 	vnode_t *vp = NULL;
6208 	int vfslock = 0;
6209 
6210 #ifdef CFSDEBUG
6211 	CFS_DEBUG(CFSDEBUG_VOPS)
6212 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6213 #endif
6214 
6215 	if (getzoneid() != GLOBAL_ZONEID) {
6216 		error = EPERM;
6217 		goto out;
6218 	}
6219 
6220 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6221 		ASSERT(dcp->c_flags & CN_NOCACHE);
6222 
6223 	/*
6224 	 * Cachefs only provides pass-through support for NFSv4,
6225 	 * and all vnode operations are passed through to the
6226 	 * back file system. For NFSv4 pass-through to work, only
6227 	 * connected operation is supported, the cnode backvp must
6228 	 * exist, and cachefs optional (eg., disconnectable) flags
6229 	 * are turned off. Assert these conditions to ensure that
6230 	 * the backfilesystem is called for the rmdir operation.
6231 	 */
6232 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6233 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6234 
6235 	for (;;) {
6236 		if (vfslock) {
6237 			vn_vfsunlock(vp);
6238 			vfslock = 0;
6239 		}
6240 		if (vp) {
6241 			VN_RELE(vp);
6242 			vp = NULL;
6243 		}
6244 
6245 		/* get (or renew) access to the file system */
6246 		if (held) {
6247 			/* Won't loop with NFSv4 connected behavior */
6248 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6249 			cachefs_cd_release(fscp);
6250 			held = 0;
6251 		}
6252 		error = cachefs_cd_access(fscp, connected, 1);
6253 		if (error)
6254 			break;
6255 		held = 1;
6256 
6257 		/* if disconnected, do some extra error checking */
6258 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6259 			/* check permissions */
6260 			mutex_enter(&dcp->c_statelock);
6261 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6262 			mutex_exit(&dcp->c_statelock);
6263 			if (CFS_TIMEOUT(fscp, error)) {
6264 				connected = 1;
6265 				continue;
6266 			}
6267 			if (error)
6268 				break;
6269 
6270 			namlen = strlen(nm);
6271 			if (namlen == 0) {
6272 				error = EINVAL;
6273 				break;
6274 			}
6275 
6276 			/* cannot remove . and .. */
6277 			if (nm[0] == '.') {
6278 				if (namlen == 1) {
6279 					error = EINVAL;
6280 					break;
6281 				} else if (namlen == 2 && nm[1] == '.') {
6282 					error = EEXIST;
6283 					break;
6284 				}
6285 			}
6286 
6287 		}
6288 
6289 		/* get the cnode of the dir to remove */
6290 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6291 		if (error) {
6292 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6293 				if (CFS_TIMEOUT(fscp, error)) {
6294 					cachefs_cd_release(fscp);
6295 					held = 0;
6296 					cachefs_cd_timedout(fscp);
6297 					connected = 0;
6298 					continue;
6299 				}
6300 			} else {
6301 				if (CFS_TIMEOUT(fscp, error)) {
6302 					connected = 1;
6303 					continue;
6304 				}
6305 			}
6306 			break;
6307 		}
6308 
6309 		/* must be a dir */
6310 		if (vp->v_type != VDIR) {
6311 			error = ENOTDIR;
6312 			break;
6313 		}
6314 
6315 		/* must not be current dir */
6316 		if (VOP_CMP(vp, cdir, ct)) {
6317 			error = EINVAL;
6318 			break;
6319 		}
6320 
6321 		/* see ufs_dirremove for why this is done, mount race */
6322 		if (vn_vfswlock(vp)) {
6323 			error = EBUSY;
6324 			break;
6325 		}
6326 		vfslock = 1;
6327 		if (vn_mountedvfs(vp) != NULL) {
6328 			error = EBUSY;
6329 			break;
6330 		}
6331 
6332 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6333 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6334 			    cr, vp);
6335 			if (CFS_TIMEOUT(fscp, error)) {
6336 				cachefs_cd_release(fscp);
6337 				held = 0;
6338 				cachefs_cd_timedout(fscp);
6339 				connected = 0;
6340 				continue;
6341 			}
6342 		} else {
6343 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6344 			    cr, vp);
6345 			if (CFS_TIMEOUT(fscp, error)) {
6346 				connected = 1;
6347 				continue;
6348 			}
6349 		}
6350 		break;
6351 	}
6352 
6353 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6354 		ino64_t fileno = 0;
6355 		fid_t *fidp = NULL;
6356 		cnode_t *cp = NULL;
6357 		if (vp)
6358 			cp = VTOC(vp);
6359 
6360 		if (cp != NULL) {
6361 			fidp = &cp->c_metadata.md_cookie;
6362 			fileno = cp->c_id.cid_fileno;
6363 		}
6364 
6365 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6366 		    fidp, fileno, crgetuid(cr));
6367 	}
6368 
6369 	if (held) {
6370 		cachefs_cd_release(fscp);
6371 	}
6372 
6373 	if (vfslock)
6374 		vn_vfsunlock(vp);
6375 
6376 	if (vp)
6377 		VN_RELE(vp);
6378 
6379 #ifdef CFS_CD_DEBUG
6380 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6381 #endif
6382 out:
6383 #ifdef CFSDEBUG
6384 	CFS_DEBUG(CFSDEBUG_VOPS)
6385 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6386 #endif
6387 
6388 	return (error);
6389 }
6390 
6391 static int
6392 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6393     vnode_t *vp)
6394 {
6395 	cnode_t *dcp = VTOC(dvp);
6396 	cnode_t *cp = VTOC(vp);
6397 	int error = 0;
6398 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6399 
6400 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6401 	mutex_enter(&dcp->c_statelock);
6402 	mutex_enter(&cp->c_statelock);
6403 
6404 	if (dcp->c_backvp == NULL) {
6405 		error = cachefs_getbackvp(fscp, dcp);
6406 		if (error) {
6407 			goto out;
6408 		}
6409 	}
6410 
6411 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6412 	if (error)
6413 		goto out;
6414 
6415 	/* rmdir on the back fs */
6416 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6417 	    ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6418 	    "name %s\n", dcp, dcp->c_backvp, nm));
6419 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6420 	if (error)
6421 		goto out;
6422 
6423 	/* if the dir is populated, remove the entry from it */
6424 	if (CFS_ISFS_NONSHARED(fscp) &&
6425 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6426 		error = cachefs_dir_rmentry(dcp, nm);
6427 		if (error) {
6428 			cachefs_nocache(dcp);
6429 			error = 0;
6430 		}
6431 	}
6432 
6433 	/*
6434 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6435 	 * flag on the cnode. The cached object will then be destroyed
6436 	 * at inactive time where the chickens come home to roost :-)
6437 	 * The link cnt for directories is bumped down by 2 'cause the "."
6438 	 * entry has to be elided too ! The link cnt for the parent goes down
6439 	 * by 1 (because of "..").
6440 	 */
6441 	cp->c_attr.va_nlink -= 2;
6442 	dcp->c_attr.va_nlink--;
6443 	if (cp->c_attr.va_nlink == 0) {
6444 		cp->c_flags |= CN_DESTROY;
6445 	} else {
6446 		cp->c_flags |= CN_UPDATED;
6447 	}
6448 	dcp->c_flags |= CN_UPDATED;
6449 
6450 	dnlc_purge_vp(vp);
6451 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6452 
6453 out:
6454 	mutex_exit(&cp->c_statelock);
6455 	mutex_exit(&dcp->c_statelock);
6456 	rw_exit(&dcp->c_rwlock);
6457 
6458 	return (error);
6459 }
6460 
6461 static int
6462 /*ARGSUSED*/
6463 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6464     cred_t *cr, vnode_t *vp)
6465 {
6466 	cnode_t *dcp = VTOC(dvp);
6467 	cnode_t *cp = VTOC(vp);
6468 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6469 	int error = 0;
6470 	off_t commit = 0;
6471 	timestruc_t current_time;
6472 
6473 	if (CFS_ISFS_WRITE_AROUND(fscp))
6474 		return (ETIMEDOUT);
6475 
6476 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6477 	mutex_enter(&dcp->c_statelock);
6478 	mutex_enter(&cp->c_statelock);
6479 
6480 	/* both directories must be populated */
6481 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6482 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6483 		error = ETIMEDOUT;
6484 		goto out;
6485 	}
6486 
6487 	/* if sticky bit set on the dir, more access checks to perform */
6488 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6489 		goto out;
6490 	}
6491 
6492 	/* make sure dir is empty */
6493 	if (cp->c_attr.va_nlink > 2) {
6494 		error = cachefs_dir_empty(cp);
6495 		if (error) {
6496 			if (error == ENOTDIR)
6497 				error = ETIMEDOUT;
6498 			goto out;
6499 		}
6500 		cachefs_modified(cp);
6501 	}
6502 	cachefs_modified(dcp);
6503 
6504 	/* log the operation */
6505 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6506 	if (commit == 0) {
6507 		error = ENOSPC;
6508 		goto out;
6509 	}
6510 
6511 	/* remove name from parent dir */
6512 	error = cachefs_dir_rmentry(dcp, nm);
6513 	if (error == ENOTDIR) {
6514 		error = ETIMEDOUT;
6515 		goto out;
6516 	}
6517 	if (error)
6518 		goto out;
6519 
6520 	gethrestime(&current_time);
6521 
6522 	/* update deleted dir values */
6523 	cp->c_attr.va_nlink -= 2;
6524 	if (cp->c_attr.va_nlink == 0)
6525 		cp->c_flags |= CN_DESTROY;
6526 	else {
6527 		cp->c_metadata.md_localctime = current_time;
6528 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6529 		cp->c_flags |= CN_UPDATED;
6530 	}
6531 
6532 	/* update parent values */
6533 	dcp->c_metadata.md_localctime = current_time;
6534 	dcp->c_metadata.md_localmtime = current_time;
6535 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6536 	dcp->c_attr.va_nlink--;
6537 	dcp->c_flags |= CN_UPDATED;
6538 
6539 out:
6540 	mutex_exit(&cp->c_statelock);
6541 	mutex_exit(&dcp->c_statelock);
6542 	rw_exit(&dcp->c_rwlock);
6543 	if (commit) {
6544 		/* commit the log entry */
6545 		if (cachefs_dlog_commit(fscp, commit, error)) {
6546 			/*EMPTY*/
6547 			/* XXX bob: fix on panic */
6548 		}
6549 		dnlc_purge_vp(vp);
6550 	}
6551 	return (error);
6552 }
6553 
6554 /*ARGSUSED*/
6555 static int
6556 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6557     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6558 {
6559 	cnode_t *dcp = VTOC(dvp);
6560 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6561 	cachefscache_t *cachep = fscp->fs_cache;
6562 	int error = 0;
6563 	int held = 0;
6564 	int connected = 0;
6565 
6566 #ifdef CFSDEBUG
6567 	CFS_DEBUG(CFSDEBUG_VOPS)
6568 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6569 		    (void *)dvp, lnm, tnm);
6570 #endif
6571 
6572 	if (getzoneid() != GLOBAL_ZONEID) {
6573 		error = EPERM;
6574 		goto out;
6575 	}
6576 
6577 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6578 		ASSERT(dcp->c_flags & CN_NOCACHE);
6579 
6580 	/*
6581 	 * Cachefs only provides pass-through support for NFSv4,
6582 	 * and all vnode operations are passed through to the
6583 	 * back file system. For NFSv4 pass-through to work, only
6584 	 * connected operation is supported, the cnode backvp must
6585 	 * exist, and cachefs optional (eg., disconnectable) flags
6586 	 * are turned off. Assert these conditions to ensure that
6587 	 * the backfilesystem is called for the symlink operation.
6588 	 */
6589 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6590 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6591 
6592 	for (;;) {
6593 		/* get (or renew) access to the file system */
6594 		if (held) {
6595 			/* Won't loop with NFSv4 connected behavior */
6596 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6597 			rw_exit(&dcp->c_rwlock);
6598 			cachefs_cd_release(fscp);
6599 			held = 0;
6600 		}
6601 		error = cachefs_cd_access(fscp, connected, 1);
6602 		if (error)
6603 			break;
6604 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6605 		held = 1;
6606 
6607 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6608 			error = cachefs_symlink_connected(dvp, lnm, tva,
6609 			    tnm, cr);
6610 			if (CFS_TIMEOUT(fscp, error)) {
6611 				rw_exit(&dcp->c_rwlock);
6612 				cachefs_cd_release(fscp);
6613 				held = 0;
6614 				cachefs_cd_timedout(fscp);
6615 				connected = 0;
6616 				continue;
6617 			}
6618 		} else {
6619 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6620 			    tnm, cr);
6621 			if (CFS_TIMEOUT(fscp, error)) {
6622 				connected = 1;
6623 				continue;
6624 			}
6625 		}
6626 		break;
6627 	}
6628 
6629 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6630 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6631 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6632 		    crgetuid(cr), (uint_t)strlen(tnm));
6633 
6634 	if (held) {
6635 		rw_exit(&dcp->c_rwlock);
6636 		cachefs_cd_release(fscp);
6637 	}
6638 
6639 #ifdef CFS_CD_DEBUG
6640 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6641 #endif
6642 out:
6643 #ifdef CFSDEBUG
6644 	CFS_DEBUG(CFSDEBUG_VOPS)
6645 		printf("cachefs_symlink: EXIT error = %d\n", error);
6646 #endif
6647 	return (error);
6648 }
6649 
6650 static int
6651 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6652     char *tnm, cred_t *cr)
6653 {
6654 	cnode_t *dcp = VTOC(dvp);
6655 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6656 	int error = 0;
6657 	vnode_t *backvp = NULL;
6658 	cnode_t *newcp = NULL;
6659 	struct vattr va;
6660 	struct fid cookie;
6661 	cfs_cid_t cid;
6662 	uint32_t valid_fid;
6663 
6664 	mutex_enter(&dcp->c_statelock);
6665 
6666 	if (dcp->c_backvp == NULL) {
6667 		error = cachefs_getbackvp(fscp, dcp);
6668 		if (error) {
6669 			cachefs_nocache(dcp);
6670 			mutex_exit(&dcp->c_statelock);
6671 			goto out;
6672 		}
6673 	}
6674 
6675 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6676 	if (error) {
6677 		mutex_exit(&dcp->c_statelock);
6678 		goto out;
6679 	}
6680 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6681 	    ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6682 	    "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6683 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6684 	if (error) {
6685 		mutex_exit(&dcp->c_statelock);
6686 		goto out;
6687 	}
6688 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6689 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6690 		cachefs_nocache(dcp);
6691 		mutex_exit(&dcp->c_statelock);
6692 		goto out;
6693 	}
6694 
6695 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6696 
6697 	/* lookup the symlink we just created and get its fid and attrs */
6698 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6699 	    NULL, NULL, NULL);
6700 	if (backvp == NULL) {
6701 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6702 			cachefs_nocache(dcp);
6703 		mutex_exit(&dcp->c_statelock);
6704 		goto out;
6705 	}
6706 
6707 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6708 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6709 	if (error) {
6710 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6711 		error = 0;
6712 		cachefs_nocache(dcp);
6713 		mutex_exit(&dcp->c_statelock);
6714 		goto out;
6715 	}
6716 	cid.cid_fileno = va.va_nodeid;
6717 	cid.cid_flags = 0;
6718 
6719 	/* if the dir is cached, add the symlink to it */
6720 	if (CFS_ISFS_NONSHARED(fscp) &&
6721 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6722 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6723 		if (error) {
6724 			cachefs_nocache(dcp);
6725 			error = 0;
6726 		}
6727 	}
6728 	mutex_exit(&dcp->c_statelock);
6729 
6730 	/* make the cnode for the sym link */
6731 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6732 	    &va, backvp, cr, 0, &newcp);
6733 	if (error) {
6734 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6735 		cachefs_nocache(dcp);
6736 		error = 0;
6737 		goto out;
6738 	}
6739 
6740 	/* try to cache the symlink contents */
6741 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6742 	mutex_enter(&newcp->c_statelock);
6743 
6744 	/*
6745 	 * try to cache the sym link, note that its a noop if NOCACHE
6746 	 * or NFSv4 is set
6747 	 */
6748 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6749 	if (error) {
6750 		cachefs_nocache(newcp);
6751 		error = 0;
6752 	}
6753 	mutex_exit(&newcp->c_statelock);
6754 	rw_exit(&newcp->c_rwlock);
6755 
6756 out:
6757 	if (backvp)
6758 		VN_RELE(backvp);
6759 	if (newcp)
6760 		VN_RELE(CTOV(newcp));
6761 	return (error);
6762 }
6763 
6764 static int
6765 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6766     char *tnm, cred_t *cr)
6767 {
6768 	cnode_t *dcp = VTOC(dvp);
6769 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6770 	int error;
6771 	cnode_t *newcp = NULL;
6772 	struct vattr va;
6773 	timestruc_t current_time;
6774 	off_t commit = 0;
6775 
6776 	if (CFS_ISFS_WRITE_AROUND(fscp))
6777 		return (ETIMEDOUT);
6778 
6779 	mutex_enter(&dcp->c_statelock);
6780 
6781 	/* check permissions */
6782 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6783 		mutex_exit(&dcp->c_statelock);
6784 		goto out;
6785 	}
6786 
6787 	/* the directory front file must be populated */
6788 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6789 		error = ETIMEDOUT;
6790 		mutex_exit(&dcp->c_statelock);
6791 		goto out;
6792 	}
6793 
6794 	/* make sure lnm does not already exist in the directory */
6795 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6796 	if (error == ENOTDIR) {
6797 		error = ETIMEDOUT;
6798 		mutex_exit(&dcp->c_statelock);
6799 		goto out;
6800 	}
6801 	if (error != ENOENT) {
6802 		error = EEXIST;
6803 		mutex_exit(&dcp->c_statelock);
6804 		goto out;
6805 	}
6806 
6807 	/* make up a reasonable set of attributes */
6808 	cachefs_attr_setup(tva, &va, dcp, cr);
6809 	va.va_type = VLNK;
6810 	va.va_mode |= S_IFLNK;
6811 	va.va_size = strlen(tnm);
6812 
6813 	mutex_exit(&dcp->c_statelock);
6814 
6815 	/* create the cnode */
6816 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6817 	if (error)
6818 		goto out;
6819 
6820 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6821 	mutex_enter(&newcp->c_statelock);
6822 
6823 	error = cachefs_dlog_cidmap(fscp);
6824 	if (error) {
6825 		mutex_exit(&newcp->c_statelock);
6826 		rw_exit(&newcp->c_rwlock);
6827 		error = ENOSPC;
6828 		goto out;
6829 	}
6830 
6831 	cachefs_creategid(dcp, newcp, tva, cr);
6832 	mutex_enter(&dcp->c_statelock);
6833 	cachefs_createacl(dcp, newcp);
6834 	mutex_exit(&dcp->c_statelock);
6835 	gethrestime(&current_time);
6836 	newcp->c_metadata.md_vattr.va_atime = current_time;
6837 	newcp->c_metadata.md_localctime = current_time;
6838 	newcp->c_metadata.md_localmtime = current_time;
6839 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6840 	    MD_LOCALCTIME;
6841 	newcp->c_flags |= CN_UPDATED;
6842 
6843 	/* log the operation */
6844 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6845 	if (commit == 0) {
6846 		mutex_exit(&newcp->c_statelock);
6847 		rw_exit(&newcp->c_rwlock);
6848 		error = ENOSPC;
6849 		goto out;
6850 	}
6851 
6852 	/* store the symlink contents */
6853 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6854 	if (error) {
6855 		mutex_exit(&newcp->c_statelock);
6856 		rw_exit(&newcp->c_rwlock);
6857 		goto out;
6858 	}
6859 	if (cachefs_modified_alloc(newcp)) {
6860 		mutex_exit(&newcp->c_statelock);
6861 		rw_exit(&newcp->c_rwlock);
6862 		error = ENOSPC;
6863 		goto out;
6864 	}
6865 
6866 	/*
6867 	 * write the metadata now rather than waiting until
6868 	 * inactive so that if there's no space we can let
6869 	 * the caller know.
6870 	 */
6871 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6872 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6873 			(void) filegrp_allocattr(newcp->c_filegrp);
6874 		}
6875 		error = filegrp_create_metadata(newcp->c_filegrp,
6876 		    &newcp->c_metadata, &newcp->c_id);
6877 		if (error) {
6878 			mutex_exit(&newcp->c_statelock);
6879 			rw_exit(&newcp->c_rwlock);
6880 			goto out;
6881 		}
6882 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6883 	}
6884 	error = filegrp_write_metadata(newcp->c_filegrp,
6885 	    &newcp->c_id, &newcp->c_metadata);
6886 	if (error) {
6887 		mutex_exit(&newcp->c_statelock);
6888 		rw_exit(&newcp->c_rwlock);
6889 		goto out;
6890 	}
6891 	mutex_exit(&newcp->c_statelock);
6892 	rw_exit(&newcp->c_rwlock);
6893 
6894 	mutex_enter(&dcp->c_statelock);
6895 
6896 	/* enter the new file in the directory */
6897 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6898 		error = ETIMEDOUT;
6899 		mutex_exit(&dcp->c_statelock);
6900 		goto out;
6901 	}
6902 	cachefs_modified(dcp);
6903 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6904 	    &newcp->c_id, SM_ASYNC);
6905 	if (error) {
6906 		mutex_exit(&dcp->c_statelock);
6907 		goto out;
6908 	}
6909 
6910 	/* update parent dir times */
6911 	dcp->c_metadata.md_localctime = current_time;
6912 	dcp->c_metadata.md_localmtime = current_time;
6913 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6914 	dcp->c_flags |= CN_UPDATED;
6915 	mutex_exit(&dcp->c_statelock);
6916 
6917 out:
6918 	if (commit) {
6919 		/* commit the log entry */
6920 		if (cachefs_dlog_commit(fscp, commit, error)) {
6921 			/*EMPTY*/
6922 			/* XXX bob: fix on panic */
6923 		}
6924 	}
6925 
6926 	if (error) {
6927 		if (newcp) {
6928 			mutex_enter(&newcp->c_statelock);
6929 			newcp->c_flags |= CN_DESTROY;
6930 			mutex_exit(&newcp->c_statelock);
6931 		}
6932 	}
6933 	if (newcp) {
6934 		VN_RELE(CTOV(newcp));
6935 	}
6936 
6937 	return (error);
6938 }
6939 
6940 /*ARGSUSED*/
6941 static int
6942 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6943     caller_context_t *ct, int flags)
6944 {
6945 	cnode_t *dcp = VTOC(vp);
6946 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6947 	cachefscache_t *cachep = fscp->fs_cache;
6948 	int error = 0;
6949 	int held = 0;
6950 	int connected = 0;
6951 
6952 #ifdef CFSDEBUG
6953 	CFS_DEBUG(CFSDEBUG_VOPS)
6954 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6955 #endif
6956 	if (getzoneid() != GLOBAL_ZONEID) {
6957 		error = EPERM;
6958 		goto out;
6959 	}
6960 
6961 	/*
6962 	 * Cachefs only provides pass-through support for NFSv4,
6963 	 * and all vnode operations are passed through to the
6964 	 * back file system. For NFSv4 pass-through to work, only
6965 	 * connected operation is supported, the cnode backvp must
6966 	 * exist, and cachefs optional (eg., disconnectable) flags
6967 	 * are turned off. Assert these conditions to ensure that
6968 	 * the backfilesystem is called for the readdir operation.
6969 	 */
6970 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6971 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6972 
6973 	for (;;) {
6974 		/* get (or renew) access to the file system */
6975 		if (held) {
6976 			/* Won't loop with NFSv4 connected behavior */
6977 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6978 			rw_exit(&dcp->c_rwlock);
6979 			cachefs_cd_release(fscp);
6980 			held = 0;
6981 		}
6982 		error = cachefs_cd_access(fscp, connected, 0);
6983 		if (error)
6984 			break;
6985 		rw_enter(&dcp->c_rwlock, RW_READER);
6986 		held = 1;
6987 
6988 		/* quit if link count of zero (posix) */
6989 		if (dcp->c_attr.va_nlink == 0) {
6990 			if (eofp)
6991 				*eofp = 1;
6992 			error = 0;
6993 			break;
6994 		}
6995 
6996 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6997 			error = cachefs_readdir_connected(vp, uiop, cr,
6998 			    eofp);
6999 			if (CFS_TIMEOUT(fscp, error)) {
7000 				rw_exit(&dcp->c_rwlock);
7001 				cachefs_cd_release(fscp);
7002 				held = 0;
7003 				cachefs_cd_timedout(fscp);
7004 				connected = 0;
7005 				continue;
7006 			}
7007 		} else {
7008 			error = cachefs_readdir_disconnected(vp, uiop, cr,
7009 			    eofp);
7010 			if (CFS_TIMEOUT(fscp, error)) {
7011 				if (cachefs_cd_access_miss(fscp)) {
7012 					error = cachefs_readdir_connected(vp,
7013 					    uiop, cr, eofp);
7014 					if (!CFS_TIMEOUT(fscp, error))
7015 						break;
7016 					delay(5*hz);
7017 					connected = 0;
7018 					continue;
7019 				}
7020 				connected = 1;
7021 				continue;
7022 			}
7023 		}
7024 		break;
7025 	}
7026 
7027 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7028 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7029 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7030 		    crgetuid(cr), uiop->uio_loffset, *eofp);
7031 
7032 	if (held) {
7033 		rw_exit(&dcp->c_rwlock);
7034 		cachefs_cd_release(fscp);
7035 	}
7036 
7037 #ifdef CFS_CD_DEBUG
7038 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7039 #endif
7040 out:
7041 #ifdef CFSDEBUG
7042 	CFS_DEBUG(CFSDEBUG_VOPS)
7043 		printf("cachefs_readdir: EXIT error = %d\n", error);
7044 #endif
7045 
7046 	return (error);
7047 }
7048 
7049 static int
7050 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7051 {
7052 	cnode_t *dcp = VTOC(vp);
7053 	int error;
7054 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7055 	struct cachefs_req *rp;
7056 
7057 	mutex_enter(&dcp->c_statelock);
7058 
7059 	/* check directory consistency */
7060 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7061 	if (error)
7062 		goto out;
7063 	dcp->c_usage++;
7064 
7065 	/* if dir was modified, toss old contents */
7066 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7067 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7068 		cachefs_inval_object(dcp);
7069 	}
7070 
7071 	error = 0;
7072 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7073 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7074 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7075 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7076 
7077 		if (cachefs_async_okay()) {
7078 
7079 			/*
7080 			 * Set up asynchronous request to fill this
7081 			 * directory.
7082 			 */
7083 
7084 			dcp->c_flags |= CN_ASYNC_POPULATE;
7085 
7086 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7087 			rp->cfs_cmd = CFS_POPULATE;
7088 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7089 			rp->cfs_cr = cr;
7090 
7091 			crhold(cr);
7092 			VN_HOLD(vp);
7093 
7094 			cachefs_addqueue(rp, &fscp->fs_workq);
7095 		} else {
7096 			error = cachefs_dir_fill(dcp, cr);
7097 			if (error != 0)
7098 				cachefs_nocache(dcp);
7099 		}
7100 	}
7101 
7102 	/* if front file is populated */
7103 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7104 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7105 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7106 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7107 		error = cachefs_dir_read(dcp, uiop, eofp);
7108 		if (error == 0)
7109 			fscp->fs_stats.st_hits++;
7110 	}
7111 
7112 	/* if front file could not be used */
7113 	if ((error != 0) ||
7114 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7115 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7116 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7117 
7118 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7119 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
7120 			cachefs_nocache(dcp);
7121 
7122 		/* get the back vp */
7123 		if (dcp->c_backvp == NULL) {
7124 			error = cachefs_getbackvp(fscp, dcp);
7125 			if (error)
7126 				goto out;
7127 		}
7128 
7129 		if (fscp->fs_inum_size > 0) {
7130 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7131 		} else {
7132 			/* do the dir read from the back fs */
7133 			(void) VOP_RWLOCK(dcp->c_backvp,
7134 			    V_WRITELOCK_FALSE, NULL);
7135 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7136 			    ("cachefs_readdir (nfsv4): "
7137 			    "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7138 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7139 			    NULL, 0);
7140 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7141 		}
7142 
7143 		if (error == 0)
7144 			fscp->fs_stats.st_misses++;
7145 	}
7146 
7147 out:
7148 	mutex_exit(&dcp->c_statelock);
7149 
7150 	return (error);
7151 }
7152 
7153 static int
7154 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7155 {
7156 	int error = 0;
7157 	fscache_t *fscp = C_TO_FSCACHE(cp);
7158 	caddr_t buffy = NULL;
7159 	int buffysize = MAXBSIZE;
7160 	caddr_t chrp, end;
7161 	ino64_t newinum;
7162 	struct dirent64 *de;
7163 	uio_t uioin;
7164 	iovec_t iov;
7165 
7166 	ASSERT(cp->c_backvp != NULL);
7167 	ASSERT(fscp->fs_inum_size > 0);
7168 
7169 	if (uiop->uio_resid < buffysize)
7170 		buffysize = (int)uiop->uio_resid;
7171 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7172 
7173 	iov.iov_base = buffy;
7174 	iov.iov_len = buffysize;
7175 	uioin.uio_iov = &iov;
7176 	uioin.uio_iovcnt = 1;
7177 	uioin.uio_segflg = UIO_SYSSPACE;
7178 	uioin.uio_fmode = 0;
7179 	uioin.uio_extflg = UIO_COPY_CACHED;
7180 	uioin.uio_loffset = uiop->uio_loffset;
7181 	uioin.uio_resid = buffysize;
7182 
7183 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7184 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7185 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7186 
7187 	if (error != 0)
7188 		goto out;
7189 
7190 	end = buffy + buffysize - uioin.uio_resid;
7191 
7192 	mutex_exit(&cp->c_statelock);
7193 	mutex_enter(&fscp->fs_fslock);
7194 
7195 
7196 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7197 		de = (dirent64_t *)chrp;
7198 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7199 		if (newinum == 0)
7200 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7201 		de->d_ino = newinum;
7202 	}
7203 	mutex_exit(&fscp->fs_fslock);
7204 	mutex_enter(&cp->c_statelock);
7205 
7206 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7207 	uiop->uio_loffset = uioin.uio_loffset;
7208 
7209 out:
7210 
7211 	if (buffy != NULL)
7212 		cachefs_kmem_free(buffy, buffysize);
7213 
7214 	return (error);
7215 }
7216 
7217 static int
7218 /*ARGSUSED*/
7219 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7220     int *eofp)
7221 {
7222 	cnode_t *dcp = VTOC(vp);
7223 	int error;
7224 
7225 	mutex_enter(&dcp->c_statelock);
7226 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7227 		error = ETIMEDOUT;
7228 	} else {
7229 		error = cachefs_dir_read(dcp, uiop, eofp);
7230 		if (error == ENOTDIR)
7231 			error = ETIMEDOUT;
7232 	}
7233 	mutex_exit(&dcp->c_statelock);
7234 
7235 	return (error);
7236 }
7237 
7238 /*ARGSUSED*/
7239 static int
7240 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7241 {
7242 	int error = 0;
7243 	struct cnode *cp = VTOC(vp);
7244 	fscache_t *fscp = C_TO_FSCACHE(cp);
7245 
7246 	/*
7247 	 * Cachefs only provides pass-through support for NFSv4,
7248 	 * and all vnode operations are passed through to the
7249 	 * back file system. For NFSv4 pass-through to work, only
7250 	 * connected operation is supported, the cnode backvp must
7251 	 * exist, and cachefs optional (eg., disconnectable) flags
7252 	 * are turned off. Assert these conditions, then bail
7253 	 * as  NFSv4 doesn't support VOP_FID.
7254 	 */
7255 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7256 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7257 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7258 		return (ENOTSUP);
7259 	}
7260 
7261 	mutex_enter(&cp->c_statelock);
7262 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7263 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7264 		error = ENOSPC;
7265 	} else {
7266 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7267 		    cp->c_metadata.md_cookie.fid_len);
7268 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7269 	}
7270 	mutex_exit(&cp->c_statelock);
7271 	return (error);
7272 }
7273 
7274 /* ARGSUSED2 */
7275 static int
7276 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7277 {
7278 	cnode_t *cp = VTOC(vp);
7279 
7280 	/*
7281 	 * XXX - This is ifdef'ed out for now. The problem -
7282 	 * getdents() acquires the read version of rwlock, then we come
7283 	 * into cachefs_readdir() and that wants to acquire the write version
7284 	 * of this lock (if its going to populate the directory). This is
7285 	 * a problem, this can be solved by introducing another lock in the
7286 	 * cnode.
7287 	 */
7288 /* XXX */
7289 	if (vp->v_type != VREG)
7290 		return (-1);
7291 	if (write_lock)
7292 		rw_enter(&cp->c_rwlock, RW_WRITER);
7293 	else
7294 		rw_enter(&cp->c_rwlock, RW_READER);
7295 	return (write_lock);
7296 }
7297 
7298 /* ARGSUSED */
7299 static void
7300 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7301 {
7302 	cnode_t *cp = VTOC(vp);
7303 	if (vp->v_type != VREG)
7304 		return;
7305 	rw_exit(&cp->c_rwlock);
7306 }
7307 
7308 /* ARGSUSED */
7309 static int
7310 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7311     caller_context_t *ct)
7312 {
7313 	return (0);
7314 }
7315 
7316 static int cachefs_lostpage = 0;
7317 /*
7318  * Return all the pages from [off..off+len] in file
7319  */
7320 /*ARGSUSED*/
7321 static int
7322 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7323 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7324 	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7325 {
7326 	cnode_t *cp = VTOC(vp);
7327 	int error;
7328 	fscache_t *fscp = C_TO_FSCACHE(cp);
7329 	cachefscache_t *cachep = fscp->fs_cache;
7330 	int held = 0;
7331 	int connected = 0;
7332 
7333 #ifdef CFSDEBUG
7334 	u_offset_t offx = (u_offset_t)off;
7335 
7336 	CFS_DEBUG(CFSDEBUG_VOPS)
7337 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7338 		    (void *)vp, offx, len, rw);
7339 #endif
7340 	if (getzoneid() != GLOBAL_ZONEID) {
7341 		error = EPERM;
7342 		goto out;
7343 	}
7344 
7345 	if (vp->v_flag & VNOMAP) {
7346 		error = ENOSYS;
7347 		goto out;
7348 	}
7349 
7350 	/* Call backfilesystem if NFSv4 */
7351 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7352 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7353 		    plsz, seg, addr, rw, cr);
7354 		goto out;
7355 	}
7356 
7357 	/* XXX sam: make this do an async populate? */
7358 	if (pl == NULL) {
7359 		error = 0;
7360 		goto out;
7361 	}
7362 	if (protp != NULL)
7363 		*protp = PROT_ALL;
7364 
7365 	for (;;) {
7366 		/* get (or renew) access to the file system */
7367 		if (held) {
7368 			cachefs_cd_release(fscp);
7369 			held = 0;
7370 		}
7371 		error = cachefs_cd_access(fscp, connected, 0);
7372 		if (error)
7373 			break;
7374 		held = 1;
7375 
7376 		/*
7377 		 * If we are getting called as a side effect of a
7378 		 * cachefs_write()
7379 		 * operation the local file size might not be extended yet.
7380 		 * In this case we want to be able to return pages of zeroes.
7381 		 */
7382 		if ((u_offset_t)off + len >
7383 		    ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7384 			if (seg != segkmap) {
7385 				error = EFAULT;
7386 				break;
7387 			}
7388 		}
7389 		error = pvn_getpages(cachefs_getapage, vp, (u_offset_t)off,
7390 		    len, protp, pl, plsz, seg, addr, rw, cr);
7391 		if (error == 0)
7392 			break;
7393 
7394 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7395 		    error == EAGAIN) {
7396 			connected = 0;
7397 			continue;
7398 		}
7399 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7400 			if (CFS_TIMEOUT(fscp, error)) {
7401 				cachefs_cd_release(fscp);
7402 				held = 0;
7403 				cachefs_cd_timedout(fscp);
7404 				connected = 0;
7405 				continue;
7406 			}
7407 		} else {
7408 			if (CFS_TIMEOUT(fscp, error)) {
7409 				if (cachefs_cd_access_miss(fscp)) {
7410 					error = pvn_getpages(
7411 					    cachefs_getapage_back, vp,
7412 					    (u_offset_t)off, len, protp, pl,
7413 					    plsz, seg, addr, rw, cr);
7414 					if (!CFS_TIMEOUT(fscp, error) &&
7415 					    (error != EAGAIN))
7416 						break;
7417 					delay(5*hz);
7418 					connected = 0;
7419 					continue;
7420 				}
7421 				connected = 1;
7422 				continue;
7423 			}
7424 		}
7425 		break;
7426 	}
7427 
7428 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7429 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7430 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7431 		    crgetuid(cr), off, len);
7432 
7433 	if (held) {
7434 		cachefs_cd_release(fscp);
7435 	}
7436 
7437 out:
7438 #ifdef CFS_CD_DEBUG
7439 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7440 #endif
7441 #ifdef CFSDEBUG
7442 	CFS_DEBUG(CFSDEBUG_VOPS)
7443 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7444 		    (void *)vp, error);
7445 #endif
7446 	return (error);
7447 }
7448 
7449 /*
7450  * cachefs_getpage_backfs_nfsv4
7451  *
7452  * Call NFSv4 back filesystem to handle the getpage (cachefs
7453  * pass-through support for NFSv4).
7454  */
7455 static int
7456 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7457 			uint_t *protp, struct page *pl[], size_t plsz,
7458 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7459 			cred_t *cr)
7460 {
7461 	cnode_t *cp = VTOC(vp);
7462 	fscache_t *fscp = C_TO_FSCACHE(cp);
7463 	vnode_t *backvp;
7464 	int error;
7465 
7466 	/*
7467 	 * For NFSv4 pass-through to work, only connected operation is
7468 	 * supported, the cnode backvp must exist, and cachefs optional
7469 	 * (eg., disconnectable) flags are turned off. Assert these
7470 	 * conditions for the getpage operation.
7471 	 */
7472 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7473 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7474 
7475 	/* Call backfs vnode op after extracting backvp */
7476 	mutex_enter(&cp->c_statelock);
7477 	backvp = cp->c_backvp;
7478 	mutex_exit(&cp->c_statelock);
7479 
7480 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7481 	    ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7482 	    cp, backvp));
7483 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7484 	    addr, rw, cr, NULL);
7485 
7486 	return (error);
7487 }
7488 
7489 /*
7490  * Called from pvn_getpages to get a particular page.
7491  */
7492 /*ARGSUSED*/
7493 static int
7494 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7495 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7496 	enum seg_rw rw, cred_t *cr)
7497 {
7498 	cnode_t *cp = VTOC(vp);
7499 	page_t **ppp, *pp = NULL;
7500 	fscache_t *fscp = C_TO_FSCACHE(cp);
7501 	cachefscache_t *cachep = fscp->fs_cache;
7502 	int error = 0;
7503 	struct page **ourpl;
7504 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7505 	int index = 0;
7506 	int downgrade;
7507 	int have_statelock = 0;
7508 	u_offset_t popoff;
7509 	size_t popsize = 0;
7510 
7511 	/*LINTED*/
7512 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7513 
7514 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7515 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7516 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7517 	else
7518 		ourpl = ourstackpl;
7519 
7520 	ourpl[0] = NULL;
7521 	off = off & (offset_t)PAGEMASK;
7522 again:
7523 	/*
7524 	 * Look for the page
7525 	 */
7526 	if (page_exists(vp, off) == 0) {
7527 		/*
7528 		 * Need to do work to get the page.
7529 		 * Grab our lock because we are going to
7530 		 * modify the state of the cnode.
7531 		 */
7532 		if (! have_statelock) {
7533 			mutex_enter(&cp->c_statelock);
7534 			have_statelock = 1;
7535 		}
7536 		/*
7537 		 * If we're in NOCACHE mode, we will need a backvp
7538 		 */
7539 		if (cp->c_flags & CN_NOCACHE) {
7540 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7541 				error = ETIMEDOUT;
7542 				goto out;
7543 			}
7544 			if (cp->c_backvp == NULL) {
7545 				error = cachefs_getbackvp(fscp, cp);
7546 				if (error)
7547 					goto out;
7548 			}
7549 			error = VOP_GETPAGE(cp->c_backvp, off,
7550 			    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7551 			    addr, S_READ, cr, NULL);
7552 			/*
7553 			 * backfs returns EFAULT when we are trying for a
7554 			 * page beyond EOF but cachefs has the knowledge that
7555 			 * it is not beyond EOF be cause cp->c_size is
7556 			 * greater then the offset requested.
7557 			 */
7558 			if (error == EFAULT) {
7559 				error = 0;
7560 				pp = page_create_va(vp, off, PAGESIZE,
7561 				    PG_EXCL | PG_WAIT, seg, addr);
7562 				if (pp == NULL)
7563 					goto again;
7564 				pagezero(pp, 0, PAGESIZE);
7565 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7566 				goto out;
7567 			}
7568 			if (error)
7569 				goto out;
7570 			goto getpages;
7571 		}
7572 		/*
7573 		 * We need a front file. If we can't get it,
7574 		 * put the cnode in NOCACHE mode and try again.
7575 		 */
7576 		if (cp->c_frontvp == NULL) {
7577 			error = cachefs_getfrontfile(cp);
7578 			if (error) {
7579 				cachefs_nocache(cp);
7580 				error = EAGAIN;
7581 				goto out;
7582 			}
7583 		}
7584 		/*
7585 		 * Check if the front file needs population.
7586 		 * If population is necessary, make sure we have a
7587 		 * backvp as well. We will get the page from the backvp.
7588 		 * bug 4152459-
7589 		 * But if the file system is in disconnected mode
7590 		 * and the file is a local file then do not check the
7591 		 * allocmap.
7592 		 */
7593 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7594 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7595 		    (cachefs_check_allocmap(cp, off) == 0)) {
7596 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7597 				error = ETIMEDOUT;
7598 				goto out;
7599 			}
7600 			if (cp->c_backvp == NULL) {
7601 				error = cachefs_getbackvp(fscp, cp);
7602 				if (error)
7603 					goto out;
7604 			}
7605 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7606 				cachefs_cluster_allocmap(off, &popoff,
7607 				    &popsize,
7608 				    fscp->fs_info.fi_popsize, cp);
7609 				if (popsize != 0) {
7610 					error = cachefs_populate(cp,
7611 					    popoff, popsize,
7612 					    cp->c_frontvp, cp->c_backvp,
7613 					    cp->c_size, cr);
7614 					if (error) {
7615 						cachefs_nocache(cp);
7616 						error = EAGAIN;
7617 						goto out;
7618 					} else {
7619 						cp->c_flags |=
7620 						    CN_UPDATED |
7621 						    CN_NEED_FRONT_SYNC |
7622 						    CN_POPULATION_PENDING;
7623 					}
7624 					popsize = popsize - (off - popoff);
7625 				} else {
7626 					popsize = PAGESIZE;
7627 				}
7628 			}
7629 			/* else XXX assert CN_NOCACHE? */
7630 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7631 			    PAGESIZE, protp, ourpl, popsize,
7632 			    seg, addr, S_READ, cr, NULL);
7633 			if (error)
7634 				goto out;
7635 			fscp->fs_stats.st_misses++;
7636 		} else {
7637 			if (cp->c_flags & CN_POPULATION_PENDING) {
7638 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7639 				    NULL);
7640 				cp->c_flags &= ~CN_POPULATION_PENDING;
7641 				if (error) {
7642 					cachefs_nocache(cp);
7643 					error = EAGAIN;
7644 					goto out;
7645 				}
7646 			}
7647 			/*
7648 			 * File was populated so we get the page from the
7649 			 * frontvp
7650 			 */
7651 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7652 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7653 			    rw, cr, NULL);
7654 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7655 				cachefs_log_gpfront(cachep, error,
7656 				    fscp->fs_cfsvfsp,
7657 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7658 				    crgetuid(cr), off, PAGESIZE);
7659 			if (error) {
7660 				cachefs_nocache(cp);
7661 				error = EAGAIN;
7662 				goto out;
7663 			}
7664 			fscp->fs_stats.st_hits++;
7665 		}
7666 getpages:
7667 		ASSERT(have_statelock);
7668 		if (have_statelock) {
7669 			mutex_exit(&cp->c_statelock);
7670 			have_statelock = 0;
7671 		}
7672 		downgrade = 0;
7673 		for (ppp = ourpl; *ppp; ppp++) {
7674 			if ((*ppp)->p_offset < off) {
7675 				index++;
7676 				page_unlock(*ppp);
7677 				continue;
7678 			}
7679 			if (PAGE_SHARED(*ppp)) {
7680 				if (page_tryupgrade(*ppp) == 0) {
7681 					for (ppp = &ourpl[index]; *ppp; ppp++)
7682 						page_unlock(*ppp);
7683 					error = EAGAIN;
7684 					goto out;
7685 				}
7686 				downgrade = 1;
7687 			}
7688 			ASSERT(PAGE_EXCL(*ppp));
7689 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7690 			page_rename(*ppp, vp, (*ppp)->p_offset);
7691 		}
7692 		pl[0] = ourpl[index];
7693 		pl[1] = NULL;
7694 		if (downgrade) {
7695 			page_downgrade(ourpl[index]);
7696 		}
7697 		/* Unlock the rest of the pages from the cluster */
7698 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7699 			page_unlock(*ppp);
7700 	} else {
7701 		ASSERT(! have_statelock);
7702 		if (have_statelock) {
7703 			mutex_exit(&cp->c_statelock);
7704 			have_statelock = 0;
7705 		}
7706 		/* XXX SE_SHARED probably isn't what we *always* want */
7707 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7708 			cachefs_lostpage++;
7709 			goto again;
7710 		}
7711 		pl[0] = pp;
7712 		pl[1] = NULL;
7713 		/* XXX increment st_hits?  i don't think so, but... */
7714 	}
7715 
7716 out:
7717 	if (have_statelock) {
7718 		mutex_exit(&cp->c_statelock);
7719 		have_statelock = 0;
7720 	}
7721 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7722 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7723 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7724 	return (error);
7725 }
7726 
7727 /* gets a page but only from the back fs */
7728 /*ARGSUSED*/
7729 static int
7730 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7731     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7732     caddr_t addr, enum seg_rw rw, cred_t *cr)
7733 {
7734 	cnode_t *cp = VTOC(vp);
7735 	page_t **ppp, *pp = NULL;
7736 	fscache_t *fscp = C_TO_FSCACHE(cp);
7737 	int error = 0;
7738 	struct page *ourpl[17];
7739 	int index = 0;
7740 	int have_statelock = 0;
7741 	int downgrade;
7742 
7743 	/*
7744 	 * Grab the cnode statelock so the cnode state won't change
7745 	 * while we're in here.
7746 	 */
7747 	ourpl[0] = NULL;
7748 	off = off & (offset_t)PAGEMASK;
7749 again:
7750 	if (page_exists(vp, off) == 0) {
7751 		if (! have_statelock) {
7752 			mutex_enter(&cp->c_statelock);
7753 			have_statelock = 1;
7754 		}
7755 
7756 		if (cp->c_backvp == NULL) {
7757 			error = cachefs_getbackvp(fscp, cp);
7758 			if (error)
7759 				goto out;
7760 		}
7761 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7762 		    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7763 		    addr, S_READ, cr, NULL);
7764 		if (error)
7765 			goto out;
7766 
7767 		if (have_statelock) {
7768 			mutex_exit(&cp->c_statelock);
7769 			have_statelock = 0;
7770 		}
7771 		downgrade = 0;
7772 		for (ppp = ourpl; *ppp; ppp++) {
7773 			if ((*ppp)->p_offset < off) {
7774 				index++;
7775 				page_unlock(*ppp);
7776 				continue;
7777 			}
7778 			if (PAGE_SHARED(*ppp)) {
7779 				if (page_tryupgrade(*ppp) == 0) {
7780 					for (ppp = &ourpl[index]; *ppp; ppp++)
7781 						page_unlock(*ppp);
7782 					error = EAGAIN;
7783 					goto out;
7784 				}
7785 				downgrade = 1;
7786 			}
7787 			ASSERT(PAGE_EXCL(*ppp));
7788 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7789 			page_rename(*ppp, vp, (*ppp)->p_offset);
7790 		}
7791 		pl[0] = ourpl[index];
7792 		pl[1] = NULL;
7793 		if (downgrade) {
7794 			page_downgrade(ourpl[index]);
7795 		}
7796 		/* Unlock the rest of the pages from the cluster */
7797 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7798 			page_unlock(*ppp);
7799 	} else {
7800 		ASSERT(! have_statelock);
7801 		if (have_statelock) {
7802 			mutex_exit(&cp->c_statelock);
7803 			have_statelock = 0;
7804 		}
7805 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7806 			cachefs_lostpage++;
7807 			goto again;
7808 		}
7809 		pl[0] = pp;
7810 		pl[1] = NULL;
7811 	}
7812 
7813 out:
7814 	if (have_statelock) {
7815 		mutex_exit(&cp->c_statelock);
7816 		have_statelock = 0;
7817 	}
7818 	return (error);
7819 }
7820 
7821 /*ARGSUSED*/
7822 static int
7823 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7824     caller_context_t *ct)
7825 {
7826 	cnode_t *cp = VTOC(vp);
7827 	int error = 0;
7828 	fscache_t *fscp = C_TO_FSCACHE(cp);
7829 	int held = 0;
7830 	int connected = 0;
7831 
7832 	if (getzoneid() != GLOBAL_ZONEID)
7833 		return (EPERM);
7834 
7835 	/* Call backfilesytem if NFSv4 */
7836 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7837 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7838 		goto out;
7839 	}
7840 
7841 	for (;;) {
7842 		/* get (or renew) access to the file system */
7843 		if (held) {
7844 			cachefs_cd_release(fscp);
7845 			held = 0;
7846 		}
7847 		error = cachefs_cd_access(fscp, connected, 1);
7848 		if (error)
7849 			break;
7850 		held = 1;
7851 
7852 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7853 		if (error == 0)
7854 			break;
7855 
7856 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7857 			if (CFS_TIMEOUT(fscp, error)) {
7858 				cachefs_cd_release(fscp);
7859 				held = 0;
7860 				cachefs_cd_timedout(fscp);
7861 				connected = 0;
7862 				continue;
7863 			}
7864 		} else {
7865 			if (NOMEMWAIT()) {
7866 				error = 0;
7867 				goto out;
7868 			}
7869 			if (CFS_TIMEOUT(fscp, error)) {
7870 				connected = 1;
7871 				continue;
7872 			}
7873 		}
7874 		break;
7875 	}
7876 
7877 out:
7878 
7879 	if (held) {
7880 		cachefs_cd_release(fscp);
7881 	}
7882 
7883 #ifdef CFS_CD_DEBUG
7884 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7885 #endif
7886 	return (error);
7887 }
7888 
7889 /*
7890  * cachefs_putpage_backfs_nfsv4
7891  *
7892  * Call NFSv4 back filesystem to handle the putpage (cachefs
7893  * pass-through support for NFSv4).
7894  */
7895 static int
7896 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7897 			cred_t *cr)
7898 {
7899 	cnode_t *cp = VTOC(vp);
7900 	fscache_t *fscp = C_TO_FSCACHE(cp);
7901 	vnode_t *backvp;
7902 	int error;
7903 
7904 	/*
7905 	 * For NFSv4 pass-through to work, only connected operation is
7906 	 * supported, the cnode backvp must exist, and cachefs optional
7907 	 * (eg., disconnectable) flags are turned off. Assert these
7908 	 * conditions for the putpage operation.
7909 	 */
7910 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7911 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7912 
7913 	/* Call backfs vnode op after extracting backvp */
7914 	mutex_enter(&cp->c_statelock);
7915 	backvp = cp->c_backvp;
7916 	mutex_exit(&cp->c_statelock);
7917 
7918 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7919 	    ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7920 	    cp, backvp));
7921 	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7922 
7923 	return (error);
7924 }
7925 
7926 /*
7927  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7928  * If len == 0, do from off to EOF.
7929  *
7930  * The normal cases should be len == 0 & off == 0 (entire vp list),
7931  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7932  * (from pageout).
7933  */
7934 
7935 /*ARGSUSED*/
7936 int
7937 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7938     int flags, cred_t *cr)
7939 {
7940 	struct cnode *cp  = VTOC(vp);
7941 	struct page *pp;
7942 	size_t io_len;
7943 	u_offset_t eoff, io_off;
7944 	int error = 0;
7945 	fscache_t *fscp = C_TO_FSCACHE(cp);
7946 	cachefscache_t *cachep = fscp->fs_cache;
7947 
7948 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7949 		return (0);
7950 	}
7951 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7952 	    (flags & B_INVAL) == 0))
7953 		return (0);
7954 
7955 	/*
7956 	 * Should never have cached data for the cachefs vnode
7957 	 * if NFSv4 is in use.
7958 	 */
7959 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7960 
7961 	/*
7962 	 * If this is an async putpage let a thread handle it.
7963 	 */
7964 	if (flags & B_ASYNC) {
7965 		struct cachefs_req *rp;
7966 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7967 
7968 		if (ttoproc(curthread) == proc_pageout) {
7969 			/*
7970 			 * If this is the page daemon we
7971 			 * do the push synchronously (Dangerous!) and hope
7972 			 * we can free enough to keep running...
7973 			 */
7974 			flags &= ~B_ASYNC;
7975 			goto again;
7976 		}
7977 
7978 		if (! cachefs_async_okay()) {
7979 
7980 			/*
7981 			 * this is somewhat like NFS's behavior.  keep
7982 			 * the system from thrashing.  we've seen
7983 			 * cases where async queues get out of
7984 			 * control, especially if
7985 			 * madvise(MADV_SEQUENTIAL) is done on a large
7986 			 * mmap()ed file that is read sequentially.
7987 			 */
7988 
7989 			flags &= ~B_ASYNC;
7990 			goto again;
7991 		}
7992 
7993 		/*
7994 		 * if no flags other than B_ASYNC were set,
7995 		 * we coalesce putpage requests into a single one for the
7996 		 * whole file (len = off = 0).  If such a request is
7997 		 * already queued, we're done.
7998 		 *
7999 		 * If there are other flags set (e.g., B_INVAL), we don't
8000 		 * attempt to coalesce and we use the specified length and
8001 		 * offset.
8002 		 */
8003 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8004 		mutex_enter(&cp->c_iomutex);
8005 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8006 			rp->cfs_cmd = CFS_PUTPAGE;
8007 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8008 			if (tflags == 0) {
8009 				off = len = 0;
8010 				cp->c_ioflags |= CIO_PUTPAGES;
8011 			}
8012 			rp->cfs_req_u.cu_putpage.cp_off = off;
8013 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8014 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8015 			rp->cfs_cr = cr;
8016 			crhold(rp->cfs_cr);
8017 			VN_HOLD(vp);
8018 			cp->c_nio++;
8019 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8020 		} else {
8021 			kmem_cache_free(cachefs_req_cache, rp);
8022 		}
8023 
8024 		mutex_exit(&cp->c_iomutex);
8025 		return (0);
8026 	}
8027 
8028 
8029 again:
8030 	if (len == 0) {
8031 		/*
8032 		 * Search the entire vp list for pages >= off
8033 		 */
8034 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8035 	} else {
8036 		/*
8037 		 * Do a range from [off...off + len] looking for pages
8038 		 * to deal with.
8039 		 */
8040 		eoff = (u_offset_t)off + len;
8041 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8042 		    io_off += io_len) {
8043 			/*
8044 			 * If we are not invalidating, synchronously
8045 			 * freeing or writing pages use the routine
8046 			 * page_lookup_nowait() to prevent reclaiming
8047 			 * them from the free list.
8048 			 */
8049 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8050 				pp = page_lookup(vp, io_off,
8051 				    (flags & (B_INVAL | B_FREE)) ?
8052 				    SE_EXCL : SE_SHARED);
8053 			} else {
8054 				/* XXX this looks like dead code */
8055 				pp = page_lookup_nowait(vp, io_off,
8056 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8057 			}
8058 
8059 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8060 				io_len = PAGESIZE;
8061 			else {
8062 				error = cachefs_push(vp, pp, &io_off,
8063 				    &io_len, flags, cr);
8064 				if (error != 0)
8065 					break;
8066 				/*
8067 				 * "io_off" and "io_len" are returned as
8068 				 * the range of pages we actually wrote.
8069 				 * This allows us to skip ahead more quickly
8070 				 * since several pages may've been dealt
8071 				 * with by this iteration of the loop.
8072 				 */
8073 			}
8074 		}
8075 	}
8076 
8077 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8078 		cp->c_flags &= ~CDIRTY;
8079 	}
8080 
8081 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8082 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8083 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8084 		    crgetuid(cr), off, len);
8085 
8086 	return (error);
8087 
8088 }
8089 
8090 /*ARGSUSED*/
8091 static int
8092 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8093     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8094     caller_context_t *ct)
8095 {
8096 	cnode_t *cp = VTOC(vp);
8097 	fscache_t *fscp = C_TO_FSCACHE(cp);
8098 	struct segvn_crargs vn_a;
8099 	int error;
8100 	int held = 0;
8101 	int writing;
8102 	int connected = 0;
8103 
8104 #ifdef CFSDEBUG
8105 	u_offset_t offx = (u_offset_t)off;
8106 
8107 	CFS_DEBUG(CFSDEBUG_VOPS)
8108 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8109 		    (void *)vp, offx, len, flags);
8110 #endif
8111 	if (getzoneid() != GLOBAL_ZONEID) {
8112 		error = EPERM;
8113 		goto out;
8114 	}
8115 
8116 	if (vp->v_flag & VNOMAP) {
8117 		error = ENOSYS;
8118 		goto out;
8119 	}
8120 	if (off < 0 || (offset_t)(off + len) < 0) {
8121 		error = ENXIO;
8122 		goto out;
8123 	}
8124 	if (vp->v_type != VREG) {
8125 		error = ENODEV;
8126 		goto out;
8127 	}
8128 
8129 	/*
8130 	 * Check to see if the vnode is currently marked as not cachable.
8131 	 * If so, we have to refuse the map request as this violates the
8132 	 * don't cache attribute.
8133 	 */
8134 	if (vp->v_flag & VNOCACHE)
8135 		return (EAGAIN);
8136 
8137 #ifdef OBSOLETE
8138 	/*
8139 	 * If file is being locked, disallow mapping.
8140 	 */
8141 	if (vn_has_flocks(vp)) {
8142 		error = EAGAIN;
8143 		goto out;
8144 	}
8145 #endif
8146 
8147 	/* call backfilesystem if NFSv4 */
8148 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8149 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8150 		    maxprot, flags, cr);
8151 		goto out;
8152 	}
8153 
8154 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8155 
8156 	for (;;) {
8157 		/* get (or renew) access to the file system */
8158 		if (held) {
8159 			cachefs_cd_release(fscp);
8160 			held = 0;
8161 		}
8162 		error = cachefs_cd_access(fscp, connected, writing);
8163 		if (error)
8164 			break;
8165 		held = 1;
8166 
8167 		if (writing) {
8168 			mutex_enter(&cp->c_statelock);
8169 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8170 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8171 					connected = 1;
8172 					continue;
8173 				} else {
8174 					cachefs_nocache(cp);
8175 				}
8176 			}
8177 
8178 			/*
8179 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8180 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8181 			 * not need to try to push out any pages.
8182 			 * This bit gets cleared when the cnode goes inactive.
8183 			 */
8184 			cp->c_flags |= CN_MAPWRITE;
8185 
8186 			mutex_exit(&cp->c_statelock);
8187 		}
8188 		break;
8189 	}
8190 
8191 	if (held) {
8192 		cachefs_cd_release(fscp);
8193 	}
8194 
8195 	as_rangelock(as);
8196 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8197 	if (error != 0) {
8198 		as_rangeunlock(as);
8199 		goto out;
8200 	}
8201 
8202 	/*
8203 	 * package up all the data passed in into a segvn_args struct and
8204 	 * call as_map with segvn_create function to create a new segment
8205 	 * in the address space.
8206 	 */
8207 	vn_a.vp = vp;
8208 	vn_a.offset = off;
8209 	vn_a.type = flags & MAP_TYPE;
8210 	vn_a.prot = (uchar_t)prot;
8211 	vn_a.maxprot = (uchar_t)maxprot;
8212 	vn_a.cred = cr;
8213 	vn_a.amp = NULL;
8214 	vn_a.flags = flags & ~MAP_TYPE;
8215 	vn_a.szc = 0;
8216 	vn_a.lgrp_mem_policy_flags = 0;
8217 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8218 	as_rangeunlock(as);
8219 out:
8220 
8221 #ifdef CFS_CD_DEBUG
8222 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8223 #endif
8224 #ifdef CFSDEBUG
8225 	CFS_DEBUG(CFSDEBUG_VOPS)
8226 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8227 #endif
8228 	return (error);
8229 }
8230 
8231 /*
8232  * cachefs_map_backfs_nfsv4
8233  *
8234  * Call NFSv4 back filesystem to handle the map (cachefs
8235  * pass-through support for NFSv4).
8236  */
8237 static int
8238 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8239 			caddr_t *addrp, size_t len, uchar_t prot,
8240 			uchar_t maxprot, uint_t flags, cred_t *cr)
8241 {
8242 	cnode_t *cp = VTOC(vp);
8243 	fscache_t *fscp = C_TO_FSCACHE(cp);
8244 	vnode_t *backvp;
8245 	int error;
8246 
8247 	/*
8248 	 * For NFSv4 pass-through to work, only connected operation is
8249 	 * supported, the cnode backvp must exist, and cachefs optional
8250 	 * (eg., disconnectable) flags are turned off. Assert these
8251 	 * conditions for the map operation.
8252 	 */
8253 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8254 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8255 
8256 	/* Call backfs vnode op after extracting backvp */
8257 	mutex_enter(&cp->c_statelock);
8258 	backvp = cp->c_backvp;
8259 	mutex_exit(&cp->c_statelock);
8260 
8261 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8262 	    ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8263 	    cp, backvp));
8264 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8265 	    NULL);
8266 
8267 	return (error);
8268 }
8269 
8270 /*ARGSUSED*/
8271 static int
8272 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8273     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8274     cred_t *cr, caller_context_t *ct)
8275 {
8276 	cnode_t *cp = VTOC(vp);
8277 	fscache_t *fscp = C_TO_FSCACHE(cp);
8278 
8279 	if (getzoneid() != GLOBAL_ZONEID)
8280 		return (EPERM);
8281 
8282 	if (vp->v_flag & VNOMAP)
8283 		return (ENOSYS);
8284 
8285 	/*
8286 	 * Check this is not an NFSv4 filesystem, as the mapping
8287 	 * is not done on the cachefs filesystem if NFSv4 is in
8288 	 * use.
8289 	 */
8290 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8291 
8292 	mutex_enter(&cp->c_statelock);
8293 	cp->c_mapcnt += btopr(len);
8294 	mutex_exit(&cp->c_statelock);
8295 	return (0);
8296 }
8297 
8298 /*ARGSUSED*/
8299 static int
8300 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8301 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8302 	cred_t *cr, caller_context_t *ct)
8303 {
8304 	cnode_t *cp = VTOC(vp);
8305 	fscache_t *fscp = C_TO_FSCACHE(cp);
8306 	int error;
8307 	int connected = 0;
8308 	int held = 0;
8309 
8310 	/*
8311 	 * The file may be passed in to (or inherited into) the zone, so we
8312 	 * need to let this operation go through since it happens as part of
8313 	 * exiting.
8314 	 */
8315 	if (vp->v_flag & VNOMAP)
8316 		return (ENOSYS);
8317 
8318 	/*
8319 	 * Check this is not an NFSv4 filesystem, as the mapping
8320 	 * is not done on the cachefs filesystem if NFSv4 is in
8321 	 * use.
8322 	 */
8323 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8324 
8325 	mutex_enter(&cp->c_statelock);
8326 	cp->c_mapcnt -= btopr(len);
8327 	ASSERT(cp->c_mapcnt >= 0);
8328 	mutex_exit(&cp->c_statelock);
8329 
8330 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8331 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8332 		return (0);
8333 
8334 	for (;;) {
8335 		/* get (or renew) access to the file system */
8336 		if (held) {
8337 			cachefs_cd_release(fscp);
8338 			held = 0;
8339 		}
8340 		error = cachefs_cd_access(fscp, connected, 1);
8341 		if (error)
8342 			break;
8343 		held = 1;
8344 		connected = 0;
8345 
8346 		error = cachefs_putpage_common(vp, (offset_t)0,
8347 		    (uint_t)0, 0, cr);
8348 		if (CFS_TIMEOUT(fscp, error)) {
8349 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8350 				cachefs_cd_release(fscp);
8351 				held = 0;
8352 				cachefs_cd_timedout(fscp);
8353 				continue;
8354 			} else {
8355 				connected = 1;
8356 				continue;
8357 			}
8358 		}
8359 
8360 		/* if no space left in cache, wait until connected */
8361 		if ((error == ENOSPC) &&
8362 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8363 			connected = 1;
8364 			continue;
8365 		}
8366 
8367 		mutex_enter(&cp->c_statelock);
8368 		if (!error)
8369 			error = cp->c_error;
8370 		cp->c_error = 0;
8371 		mutex_exit(&cp->c_statelock);
8372 		break;
8373 	}
8374 
8375 	if (held)
8376 		cachefs_cd_release(fscp);
8377 
8378 #ifdef CFS_CD_DEBUG
8379 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8380 #endif
8381 	return (error);
8382 }
8383 
8384 /* ARGSUSED */
8385 static int
8386 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8387 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8388 	caller_context_t *ct)
8389 {
8390 	struct cnode *cp = VTOC(vp);
8391 	int error;
8392 	struct fscache *fscp = C_TO_FSCACHE(cp);
8393 	vnode_t *backvp;
8394 	int held = 0;
8395 	int connected = 0;
8396 
8397 	if (getzoneid() != GLOBAL_ZONEID)
8398 		return (EPERM);
8399 
8400 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8401 		return (EINVAL);
8402 
8403 	/* Disallow locking of files that are currently mapped */
8404 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8405 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8406 		return (EAGAIN);
8407 	}
8408 
8409 	/*
8410 	 * Cachefs only provides pass-through support for NFSv4,
8411 	 * and all vnode operations are passed through to the
8412 	 * back file system. For NFSv4 pass-through to work, only
8413 	 * connected operation is supported, the cnode backvp must
8414 	 * exist, and cachefs optional (eg., disconnectable) flags
8415 	 * are turned off. Assert these conditions to ensure that
8416 	 * the backfilesystem is called for the frlock operation.
8417 	 */
8418 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8419 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8420 
8421 	/* XXX bob: nfs does a bunch more checks than we do */
8422 	if (CFS_ISFS_LLOCK(fscp)) {
8423 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8424 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8425 	}
8426 
8427 	for (;;) {
8428 		/* get (or renew) access to the file system */
8429 		if (held) {
8430 			/* Won't loop with NFSv4 connected behavior */
8431 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8432 			cachefs_cd_release(fscp);
8433 			held = 0;
8434 		}
8435 		error = cachefs_cd_access(fscp, connected, 0);
8436 		if (error)
8437 			break;
8438 		held = 1;
8439 
8440 		/* if not connected, quit or wait */
8441 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8442 			connected = 1;
8443 			continue;
8444 		}
8445 
8446 		/* nocache the file */
8447 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8448 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8449 			mutex_enter(&cp->c_statelock);
8450 			cachefs_nocache(cp);
8451 			mutex_exit(&cp->c_statelock);
8452 		}
8453 
8454 		/*
8455 		 * XXX bob: probably should do a consistency check
8456 		 * Pass arguments unchanged if NFSv4 is the backfs.
8457 		 */
8458 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8459 			bfp->l_start += cp->c_size;
8460 			bfp->l_whence = 0;
8461 		}
8462 
8463 		/* get the back vp */
8464 		mutex_enter(&cp->c_statelock);
8465 		if (cp->c_backvp == NULL) {
8466 			error = cachefs_getbackvp(fscp, cp);
8467 			if (error) {
8468 				mutex_exit(&cp->c_statelock);
8469 				break;
8470 			}
8471 		}
8472 		backvp = cp->c_backvp;
8473 		VN_HOLD(backvp);
8474 		mutex_exit(&cp->c_statelock);
8475 
8476 		/*
8477 		 * make sure we can flush currently dirty pages before
8478 		 * allowing the lock
8479 		 */
8480 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8481 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8482 			error = cachefs_putpage(
8483 			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8484 			if (error) {
8485 				error = ENOLCK;
8486 				VN_RELE(backvp);
8487 				break;
8488 			}
8489 		}
8490 
8491 		/* do lock on the back file */
8492 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8493 		    ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8494 		    cp, backvp));
8495 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8496 		    ct);
8497 		VN_RELE(backvp);
8498 		if (CFS_TIMEOUT(fscp, error)) {
8499 			connected = 1;
8500 			continue;
8501 		}
8502 		break;
8503 	}
8504 
8505 	if (held) {
8506 		cachefs_cd_release(fscp);
8507 	}
8508 
8509 	/*
8510 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8511 	 * cache does not give inconsistent results on locked files shared
8512 	 * between clients.  The VNOCACHE flag is never turned off as long
8513 	 * as the vnode is active because it is hard to figure out when the
8514 	 * last lock is gone.
8515 	 * XXX - what if some already has the vnode mapped in?
8516 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8517 	 */
8518 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8519 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8520 		vp->v_flag |= VNOCACHE;
8521 
8522 #ifdef CFS_CD_DEBUG
8523 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8524 #endif
8525 	return (error);
8526 }
8527 
8528 /*
8529  * Free storage space associated with the specified vnode.  The portion
8530  * to be freed is specified by bfp->l_start and bfp->l_len (already
8531  * normalized to a "whence" of 0).
8532  *
8533  * This is an experimental facility whose continued existence is not
8534  * guaranteed.  Currently, we only support the special case
8535  * of l_len == 0, meaning free to end of file.
8536  */
8537 /* ARGSUSED */
8538 static int
8539 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8540 	offset_t offset, cred_t *cr, caller_context_t *ct)
8541 {
8542 	cnode_t *cp = VTOC(vp);
8543 	fscache_t *fscp = C_TO_FSCACHE(cp);
8544 	int error;
8545 
8546 	ASSERT(vp->v_type == VREG);
8547 	if (getzoneid() != GLOBAL_ZONEID)
8548 		return (EPERM);
8549 	if (cmd != F_FREESP)
8550 		return (EINVAL);
8551 
8552 	/* call backfilesystem if NFSv4 */
8553 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8554 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8555 		    offset, cr, ct);
8556 		goto out;
8557 	}
8558 
8559 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8560 		ASSERT(bfp->l_start >= 0);
8561 		if (bfp->l_len == 0) {
8562 			struct vattr va;
8563 
8564 			va.va_size = bfp->l_start;
8565 			va.va_mask = AT_SIZE;
8566 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8567 		} else
8568 			error = EINVAL;
8569 	}
8570 
8571 out:
8572 	return (error);
8573 }
8574 
8575 /*
8576  * cachefs_space_backfs_nfsv4
8577  *
8578  * Call NFSv4 back filesystem to handle the space (cachefs
8579  * pass-through support for NFSv4).
8580  */
8581 static int
8582 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8583 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8584 {
8585 	cnode_t *cp = VTOC(vp);
8586 	fscache_t *fscp = C_TO_FSCACHE(cp);
8587 	vnode_t *backvp;
8588 	int error;
8589 
8590 	/*
8591 	 * For NFSv4 pass-through to work, only connected operation is
8592 	 * supported, the cnode backvp must exist, and cachefs optional
8593 	 * (eg., disconnectable) flags are turned off. Assert these
8594 	 * conditions for the space operation.
8595 	 */
8596 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8597 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8598 
8599 	/* Call backfs vnode op after extracting backvp */
8600 	mutex_enter(&cp->c_statelock);
8601 	backvp = cp->c_backvp;
8602 	mutex_exit(&cp->c_statelock);
8603 
8604 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8605 	    ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8606 	    cp, backvp));
8607 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8608 
8609 	return (error);
8610 }
8611 
8612 /*ARGSUSED*/
8613 static int
8614 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8615 {
8616 	return (EINVAL);
8617 }
8618 
8619 /*ARGSUSED*/
8620 static int
8621 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8622 	int flags, cred_t *cr, caller_context_t *ct)
8623 {
8624 	return (ENOSYS);
8625 }
8626 
8627 static int
8628 cachefs_setsecattr_connected(cnode_t *cp,
8629     vsecattr_t *vsec, int flag, cred_t *cr)
8630 {
8631 	fscache_t *fscp = C_TO_FSCACHE(cp);
8632 	int error = 0;
8633 
8634 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8635 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8636 
8637 	mutex_enter(&cp->c_statelock);
8638 
8639 	if (cp->c_backvp == NULL) {
8640 		error = cachefs_getbackvp(fscp, cp);
8641 		if (error) {
8642 			cachefs_nocache(cp);
8643 			goto out;
8644 		}
8645 	}
8646 
8647 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8648 	if (error)
8649 		goto out;
8650 
8651 	/* only owner can set acl */
8652 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8653 		error = EINVAL;
8654 		goto out;
8655 	}
8656 
8657 
8658 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8659 	    ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8660 	    cp, cp->c_backvp));
8661 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8662 	if (error) {
8663 		goto out;
8664 	}
8665 
8666 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8667 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8668 		cachefs_nocache(cp);
8669 		goto out;
8670 	}
8671 
8672 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8673 
8674 	/* acl may have changed permissions -- handle this. */
8675 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8676 		cachefs_acl2perm(cp, vsec);
8677 
8678 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8679 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8680 		error = cachefs_cacheacl(cp, vsec);
8681 		if (error != 0) {
8682 #ifdef CFSDEBUG
8683 			CFS_DEBUG(CFSDEBUG_VOPS)
8684 				printf("cachefs_setacl: cacheacl: error %d\n",
8685 				    error);
8686 #endif /* CFSDEBUG */
8687 			error = 0;
8688 			cachefs_nocache(cp);
8689 		}
8690 	}
8691 
8692 out:
8693 	mutex_exit(&cp->c_statelock);
8694 
8695 	return (error);
8696 }
8697 
8698 static int
8699 cachefs_setsecattr_disconnected(cnode_t *cp,
8700     vsecattr_t *vsec, int flag, cred_t *cr)
8701 {
8702 	fscache_t *fscp = C_TO_FSCACHE(cp);
8703 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8704 	off_t commit = 0;
8705 	int error = 0;
8706 
8707 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8708 
8709 	if (CFS_ISFS_WRITE_AROUND(fscp))
8710 		return (ETIMEDOUT);
8711 
8712 	mutex_enter(&cp->c_statelock);
8713 
8714 	/* only owner can set acl */
8715 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8716 		error = EINVAL;
8717 		goto out;
8718 	}
8719 
8720 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8721 		error = ETIMEDOUT;
8722 		goto out;
8723 	}
8724 
8725 	/* XXX do i need this?  is this right? */
8726 	if (cp->c_flags & CN_ALLOC_PENDING) {
8727 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8728 			(void) filegrp_allocattr(cp->c_filegrp);
8729 		}
8730 		error = filegrp_create_metadata(cp->c_filegrp,
8731 		    &cp->c_metadata, &cp->c_id);
8732 		if (error) {
8733 			goto out;
8734 		}
8735 		cp->c_flags &= ~CN_ALLOC_PENDING;
8736 	}
8737 
8738 	/* XXX is this right? */
8739 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8740 		error = cachefs_dlog_cidmap(fscp);
8741 		if (error) {
8742 			error = ENOSPC;
8743 			goto out;
8744 		}
8745 		cp->c_metadata.md_flags |= MD_MAPPING;
8746 		cp->c_flags |= CN_UPDATED;
8747 	}
8748 
8749 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8750 	if (commit == 0)
8751 		goto out;
8752 
8753 	/* fix modes in metadata */
8754 	cachefs_acl2perm(cp, vsec);
8755 
8756 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8757 		error = cachefs_cacheacl(cp, vsec);
8758 		if (error != 0) {
8759 			goto out;
8760 		}
8761 	}
8762 
8763 	/* XXX is this right? */
8764 	if (cachefs_modified_alloc(cp)) {
8765 		error = ENOSPC;
8766 		goto out;
8767 	}
8768 
8769 out:
8770 	if (error != 0)
8771 		cp->c_metadata.md_vattr.va_mode = failmode;
8772 
8773 	mutex_exit(&cp->c_statelock);
8774 
8775 	if (commit) {
8776 		if (cachefs_dlog_commit(fscp, commit, error)) {
8777 			/*EMPTY*/
8778 			/* XXX fix on panic? */
8779 		}
8780 	}
8781 
8782 	return (error);
8783 }
8784 
8785 /*ARGSUSED*/
8786 static int
8787 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8788     caller_context_t *ct)
8789 {
8790 	cnode_t *cp = VTOC(vp);
8791 	fscache_t *fscp = C_TO_FSCACHE(cp);
8792 	int connected = 0;
8793 	int held = 0;
8794 	int error = 0;
8795 
8796 #ifdef CFSDEBUG
8797 	CFS_DEBUG(CFSDEBUG_VOPS)
8798 		printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8799 #endif
8800 	if (getzoneid() != GLOBAL_ZONEID) {
8801 		error = EPERM;
8802 		goto out;
8803 	}
8804 
8805 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8806 		error = ENOSYS;
8807 		goto out;
8808 	}
8809 
8810 	if (! cachefs_vtype_aclok(vp)) {
8811 		error = EINVAL;
8812 		goto out;
8813 	}
8814 
8815 	/*
8816 	 * Cachefs only provides pass-through support for NFSv4,
8817 	 * and all vnode operations are passed through to the
8818 	 * back file system. For NFSv4 pass-through to work, only
8819 	 * connected operation is supported, the cnode backvp must
8820 	 * exist, and cachefs optional (eg., disconnectable) flags
8821 	 * are turned off. Assert these conditions to ensure that
8822 	 * the backfilesystem is called for the setsecattr operation.
8823 	 */
8824 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8825 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8826 
8827 	for (;;) {
8828 		/* drop hold on file system */
8829 		if (held) {
8830 			/* Won't loop with NFSv4 connected operation */
8831 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8832 			cachefs_cd_release(fscp);
8833 			held = 0;
8834 		}
8835 
8836 		/* acquire access to the file system */
8837 		error = cachefs_cd_access(fscp, connected, 1);
8838 		if (error)
8839 			break;
8840 		held = 1;
8841 
8842 		/* perform the setattr */
8843 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8844 			error = cachefs_setsecattr_connected(cp,
8845 			    vsec, flag, cr);
8846 		else
8847 			error = cachefs_setsecattr_disconnected(cp,
8848 			    vsec, flag, cr);
8849 		if (error) {
8850 			/* if connected */
8851 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8852 				if (CFS_TIMEOUT(fscp, error)) {
8853 					cachefs_cd_release(fscp);
8854 					held = 0;
8855 					cachefs_cd_timedout(fscp);
8856 					connected = 0;
8857 					continue;
8858 				}
8859 			}
8860 
8861 			/* else must be disconnected */
8862 			else {
8863 				if (CFS_TIMEOUT(fscp, error)) {
8864 					connected = 1;
8865 					continue;
8866 				}
8867 			}
8868 		}
8869 		break;
8870 	}
8871 
8872 	if (held) {
8873 		cachefs_cd_release(fscp);
8874 	}
8875 	return (error);
8876 
8877 out:
8878 #ifdef CFS_CD_DEBUG
8879 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8880 #endif
8881 
8882 #ifdef CFSDEBUG
8883 	CFS_DEBUG(CFSDEBUG_VOPS)
8884 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8885 #endif
8886 	return (error);
8887 }
8888 
8889 /*
8890  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8891  * sanitize the acl.
8892  */
8893 
8894 static void
8895 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8896 {
8897 	aclent_t *aclp;
8898 	int i;
8899 
8900 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8901 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8902 		switch (aclp->a_type) {
8903 		case USER_OBJ:
8904 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8905 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8906 			break;
8907 
8908 		case GROUP_OBJ:
8909 			cp->c_metadata.md_vattr.va_mode &= (~070);
8910 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8911 			break;
8912 
8913 		case OTHER_OBJ:
8914 			cp->c_metadata.md_vattr.va_mode &= (~07);
8915 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8916 			break;
8917 
8918 		case CLASS_OBJ:
8919 			cp->c_metadata.md_aclclass = aclp->a_perm;
8920 			break;
8921 		}
8922 	}
8923 
8924 	cp->c_flags |= CN_UPDATED;
8925 }
8926 
8927 static int
8928 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8929     caller_context_t *ct)
8930 {
8931 	cnode_t *cp = VTOC(vp);
8932 	fscache_t *fscp = C_TO_FSCACHE(cp);
8933 	int held = 0, connected = 0;
8934 	int error = 0;
8935 
8936 #ifdef CFSDEBUG
8937 	CFS_DEBUG(CFSDEBUG_VOPS)
8938 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8939 #endif
8940 
8941 	if (getzoneid() != GLOBAL_ZONEID) {
8942 		error = EPERM;
8943 		goto out;
8944 	}
8945 
8946 	/*
8947 	 * Cachefs only provides pass-through support for NFSv4,
8948 	 * and all vnode operations are passed through to the
8949 	 * back file system. For NFSv4 pass-through to work, only
8950 	 * connected operation is supported, the cnode backvp must
8951 	 * exist, and cachefs optional (eg., disconnectable) flags
8952 	 * are turned off. Assert these conditions to ensure that
8953 	 * the backfilesystem is called for the getsecattr operation.
8954 	 */
8955 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8956 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8957 
8958 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8959 		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8960 		goto out;
8961 	}
8962 
8963 	for (;;) {
8964 		if (held) {
8965 			/* Won't loop with NFSv4 connected behavior */
8966 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8967 			cachefs_cd_release(fscp);
8968 			held = 0;
8969 		}
8970 		error = cachefs_cd_access(fscp, connected, 0);
8971 		if (error)
8972 			break;
8973 		held = 1;
8974 
8975 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8976 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8977 			    cr);
8978 			if (CFS_TIMEOUT(fscp, error)) {
8979 				cachefs_cd_release(fscp);
8980 				held = 0;
8981 				cachefs_cd_timedout(fscp);
8982 				connected = 0;
8983 				continue;
8984 			}
8985 		} else {
8986 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8987 			    cr);
8988 			if (CFS_TIMEOUT(fscp, error)) {
8989 				if (cachefs_cd_access_miss(fscp)) {
8990 					error = cachefs_getsecattr_connected(vp,
8991 					    vsec, flag, cr);
8992 					if (!CFS_TIMEOUT(fscp, error))
8993 						break;
8994 					delay(5*hz);
8995 					connected = 0;
8996 					continue;
8997 				}
8998 				connected = 1;
8999 				continue;
9000 			}
9001 		}
9002 		break;
9003 	}
9004 
9005 out:
9006 	if (held)
9007 		cachefs_cd_release(fscp);
9008 
9009 #ifdef CFS_CD_DEBUG
9010 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9011 #endif
9012 #ifdef CFSDEBUG
9013 	CFS_DEBUG(CFSDEBUG_VOPS)
9014 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9015 #endif
9016 	return (error);
9017 }
9018 
9019 static int
9020 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9021     caller_context_t *ct)
9022 {
9023 	cnode_t *cp = VTOC(vp);
9024 	fscache_t *fscp = C_TO_FSCACHE(cp);
9025 	int error = 0;
9026 	vnode_t *backvp;
9027 
9028 #ifdef CFSDEBUG
9029 	CFS_DEBUG(CFSDEBUG_VOPS)
9030 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9031 #endif
9032 
9033 	if (getzoneid() != GLOBAL_ZONEID) {
9034 		error = EPERM;
9035 		goto out;
9036 	}
9037 
9038 	/*
9039 	 * Cachefs only provides pass-through support for NFSv4,
9040 	 * and all vnode operations are passed through to the
9041 	 * back file system. For NFSv4 pass-through to work, only
9042 	 * connected operation is supported, the cnode backvp must
9043 	 * exist, and cachefs optional (eg., disconnectable) flags
9044 	 * are turned off. Assert these conditions to ensure that
9045 	 * the backfilesystem is called for the shrlock operation.
9046 	 */
9047 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9048 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9049 
9050 	mutex_enter(&cp->c_statelock);
9051 	if (cp->c_backvp == NULL)
9052 		error = cachefs_getbackvp(fscp, cp);
9053 	backvp = cp->c_backvp;
9054 	mutex_exit(&cp->c_statelock);
9055 	ASSERT((error != 0) || (backvp != NULL));
9056 
9057 	if (error == 0) {
9058 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9059 		    ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9060 		    cp, backvp));
9061 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9062 	}
9063 
9064 out:
9065 #ifdef CFSDEBUG
9066 	CFS_DEBUG(CFSDEBUG_VOPS)
9067 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9068 #endif
9069 	return (error);
9070 }
9071 
9072 static int
9073 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9074     cred_t *cr)
9075 {
9076 	cnode_t *cp = VTOC(vp);
9077 	fscache_t *fscp = C_TO_FSCACHE(cp);
9078 	int hit = 0;
9079 	int error = 0;
9080 
9081 
9082 	mutex_enter(&cp->c_statelock);
9083 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9084 	if (error)
9085 		goto out;
9086 
9087 	/* read from the cache if we can */
9088 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9089 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9090 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9091 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9092 		error = cachefs_getaclfromcache(cp, vsec);
9093 		if (error) {
9094 			cachefs_nocache(cp);
9095 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9096 			error = 0;
9097 		} else {
9098 			hit = 1;
9099 			goto out;
9100 		}
9101 	}
9102 
9103 	ASSERT(error == 0);
9104 	if (cp->c_backvp == NULL)
9105 		error = cachefs_getbackvp(fscp, cp);
9106 	if (error)
9107 		goto out;
9108 
9109 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9110 	    ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9111 	    cp, cp->c_backvp));
9112 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9113 	if (error)
9114 		goto out;
9115 
9116 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9117 	    (cachefs_vtype_aclok(vp)) &&
9118 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9119 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9120 		error = cachefs_cacheacl(cp, vsec);
9121 		if (error) {
9122 			error = 0;
9123 			cachefs_nocache(cp);
9124 		}
9125 	}
9126 
9127 out:
9128 	if (error == 0) {
9129 		if (hit)
9130 			fscp->fs_stats.st_hits++;
9131 		else
9132 			fscp->fs_stats.st_misses++;
9133 	}
9134 	mutex_exit(&cp->c_statelock);
9135 
9136 	return (error);
9137 }
9138 
9139 static int
9140 /*ARGSUSED*/
9141 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9142     cred_t *cr)
9143 {
9144 	cnode_t *cp = VTOC(vp);
9145 	fscache_t *fscp = C_TO_FSCACHE(cp);
9146 	int hit = 0;
9147 	int error = 0;
9148 
9149 
9150 	mutex_enter(&cp->c_statelock);
9151 
9152 	/* read from the cache if we can */
9153 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9154 	    (cp->c_metadata.md_flags & MD_ACL)) {
9155 		error = cachefs_getaclfromcache(cp, vsec);
9156 		if (error) {
9157 			cachefs_nocache(cp);
9158 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9159 			error = 0;
9160 		} else {
9161 			hit = 1;
9162 			goto out;
9163 		}
9164 	}
9165 	error = ETIMEDOUT;
9166 
9167 out:
9168 	if (error == 0) {
9169 		if (hit)
9170 			fscp->fs_stats.st_hits++;
9171 		else
9172 			fscp->fs_stats.st_misses++;
9173 	}
9174 	mutex_exit(&cp->c_statelock);
9175 
9176 	return (error);
9177 }
9178 
9179 /*
9180  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9181  * the frontfile if possible; otherwise, the adjunct directory.
9182  *
9183  * inputs:
9184  * cp - the cnode, with its statelock already held
9185  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9186  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9187  *
9188  * returns:
9189  * 0 - all is well
9190  * nonzero - errno
9191  */
9192 
9193 int
9194 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9195 {
9196 	fscache_t *fscp = C_TO_FSCACHE(cp);
9197 	vsecattr_t vsec;
9198 	aclent_t *aclp;
9199 	int gotvsec = 0;
9200 	int error = 0;
9201 	vnode_t *vp = NULL;
9202 	void *aclkeep = NULL;
9203 	int i;
9204 
9205 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9206 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9207 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9208 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9209 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9210 
9211 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9212 		error = ENOSYS;
9213 		goto out;
9214 	}
9215 
9216 	if (vsecp == NULL) {
9217 		if (cp->c_backvp == NULL)
9218 			error = cachefs_getbackvp(fscp, cp);
9219 		if (error != 0)
9220 			goto out;
9221 		vsecp = &vsec;
9222 		bzero(&vsec, sizeof (vsec));
9223 		vsecp->vsa_mask =
9224 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9225 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9226 		if (error != 0) {
9227 			goto out;
9228 		}
9229 		gotvsec = 1;
9230 	} else if (vsecp->vsa_mask & VSA_ACL) {
9231 		aclkeep = vsecp->vsa_aclentp;
9232 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9233 		    sizeof (aclent_t), KM_SLEEP);
9234 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9235 		    sizeof (aclent_t));
9236 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9237 		/* unless there's real data, we can cache nothing. */
9238 		return (0);
9239 	}
9240 
9241 	/*
9242 	 * prevent the ACL from chmoding our frontfile, and
9243 	 * snarf the class info
9244 	 */
9245 
9246 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9247 	    (VSA_ACL | VSA_ACLCNT)) {
9248 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9249 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9250 			switch (aclp->a_type) {
9251 			case CLASS_OBJ:
9252 				cp->c_metadata.md_aclclass =
9253 				    aclp->a_perm;
9254 				/*FALLTHROUGH*/
9255 			case USER_OBJ:
9256 			case GROUP_OBJ:
9257 			case OTHER_OBJ:
9258 				aclp->a_perm = 06;
9259 			}
9260 		}
9261 	}
9262 
9263 	/*
9264 	 * if the frontfile exists, then we always do the work.  but,
9265 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9266 	 * then we don't want to do the work.  otherwise, an `ls -l'
9267 	 * will create tons of emtpy frontfiles.
9268 	 */
9269 
9270 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9271 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9272 	    <= MIN_ACL_ENTRIES)) {
9273 		cp->c_metadata.md_flags |= MD_ACL;
9274 		cp->c_flags |= CN_UPDATED;
9275 		goto out;
9276 	}
9277 
9278 	/*
9279 	 * if we have a default ACL, then we need a
9280 	 * real live directory in the frontfs that we
9281 	 * can apply the ACL to.  if not, then we just
9282 	 * use the frontfile.  we get the frontfile
9283 	 * regardless -- that way, we know the
9284 	 * directory for the frontfile exists.
9285 	 */
9286 
9287 	if (vsecp->vsa_dfaclcnt > 0) {
9288 		if (cp->c_acldirvp == NULL)
9289 			error = cachefs_getacldirvp(cp);
9290 		if (error != 0)
9291 			goto out;
9292 		vp = cp->c_acldirvp;
9293 	} else {
9294 		if (cp->c_frontvp == NULL)
9295 			error = cachefs_getfrontfile(cp);
9296 		if (error != 0)
9297 			goto out;
9298 		vp = cp->c_frontvp;
9299 	}
9300 	ASSERT(vp != NULL);
9301 
9302 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9303 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9304 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9305 	if (error != 0) {
9306 #ifdef CFSDEBUG
9307 		CFS_DEBUG(CFSDEBUG_VOPS)
9308 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9309 			    error);
9310 #endif /* CFSDEBUG */
9311 		/*
9312 		 * If there was an error, we don't want to call
9313 		 * cachefs_nocache(); so, set error to 0.
9314 		 * We will call cachefs_purgeacl(), in order to
9315 		 * clean such things as adjunct ACL directories.
9316 		 */
9317 		cachefs_purgeacl(cp);
9318 		error = 0;
9319 		goto out;
9320 	}
9321 	if (vp == cp->c_frontvp)
9322 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9323 
9324 	cp->c_metadata.md_flags |= MD_ACL;
9325 	cp->c_flags |= CN_UPDATED;
9326 
9327 out:
9328 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9329 		cachefs_nocache(cp);
9330 
9331 	if (gotvsec) {
9332 		if (vsec.vsa_aclcnt)
9333 			kmem_free(vsec.vsa_aclentp,
9334 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9335 		if (vsec.vsa_dfaclcnt)
9336 			kmem_free(vsec.vsa_dfaclentp,
9337 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9338 	} else if (aclkeep != NULL) {
9339 		cachefs_kmem_free(vsecp->vsa_aclentp,
9340 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9341 		vsecp->vsa_aclentp = aclkeep;
9342 	}
9343 
9344 	return (error);
9345 }
9346 
9347 void
9348 cachefs_purgeacl(cnode_t *cp)
9349 {
9350 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9351 
9352 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9353 
9354 	if (cp->c_acldirvp != NULL) {
9355 		VN_RELE(cp->c_acldirvp);
9356 		cp->c_acldirvp = NULL;
9357 	}
9358 
9359 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9360 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9361 
9362 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9363 		make_ascii_name(&cp->c_id, name);
9364 		(void) strcat(name, ".d");
9365 
9366 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9367 		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9368 	}
9369 
9370 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9371 	cp->c_flags |= CN_UPDATED;
9372 }
9373 
9374 static int
9375 cachefs_getacldirvp(cnode_t *cp)
9376 {
9377 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9378 	int error = 0;
9379 
9380 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9381 	ASSERT(cp->c_acldirvp == NULL);
9382 
9383 	if (cp->c_frontvp == NULL)
9384 		error = cachefs_getfrontfile(cp);
9385 	if (error != 0)
9386 		goto out;
9387 
9388 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9389 	make_ascii_name(&cp->c_id, name);
9390 	(void) strcat(name, ".d");
9391 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9392 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9393 	if ((error != 0) && (error != ENOENT))
9394 		goto out;
9395 
9396 	if (error != 0) {
9397 		vattr_t va;
9398 
9399 		va.va_mode = S_IFDIR | 0777;
9400 		va.va_uid = 0;
9401 		va.va_gid = 0;
9402 		va.va_type = VDIR;
9403 		va.va_mask = AT_TYPE | AT_MODE |
9404 		    AT_UID | AT_GID;
9405 		error =
9406 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9407 		    name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9408 		if (error != 0)
9409 			goto out;
9410 	}
9411 
9412 	ASSERT(cp->c_acldirvp != NULL);
9413 	cp->c_metadata.md_flags |= MD_ACLDIR;
9414 	cp->c_flags |= CN_UPDATED;
9415 
9416 out:
9417 	if (error != 0)
9418 		cp->c_acldirvp = NULL;
9419 	return (error);
9420 }
9421 
9422 static int
9423 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9424 {
9425 	aclent_t *aclp;
9426 	int error = 0;
9427 	vnode_t *vp = NULL;
9428 	int i;
9429 
9430 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9431 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9432 	ASSERT(vsec->vsa_aclentp == NULL);
9433 
9434 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9435 		if (cp->c_acldirvp == NULL)
9436 			error = cachefs_getacldirvp(cp);
9437 		if (error != 0)
9438 			goto out;
9439 		vp = cp->c_acldirvp;
9440 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9441 		if (cp->c_frontvp == NULL)
9442 			error = cachefs_getfrontfile(cp);
9443 		if (error != 0)
9444 			goto out;
9445 		vp = cp->c_frontvp;
9446 	} else {
9447 
9448 		/*
9449 		 * if we get here, then we know that MD_ACL is on,
9450 		 * meaning an ACL was successfully cached.  we also
9451 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9452 		 * this has to be an entry without a `real' ACL.
9453 		 * thus, we forge whatever is necessary.
9454 		 */
9455 
9456 		if (vsec->vsa_mask & VSA_ACLCNT)
9457 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9458 
9459 		if (vsec->vsa_mask & VSA_ACL) {
9460 			vsec->vsa_aclentp =
9461 			    kmem_zalloc(MIN_ACL_ENTRIES *
9462 			    sizeof (aclent_t), KM_SLEEP);
9463 			aclp = (aclent_t *)vsec->vsa_aclentp;
9464 			aclp->a_type = USER_OBJ;
9465 			++aclp;
9466 			aclp->a_type = GROUP_OBJ;
9467 			++aclp;
9468 			aclp->a_type = OTHER_OBJ;
9469 			++aclp;
9470 			aclp->a_type = CLASS_OBJ;
9471 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9472 			    sizeof (aclent_t), cmp2acls);
9473 		}
9474 
9475 		ASSERT(vp == NULL);
9476 	}
9477 
9478 	if (vp != NULL) {
9479 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9480 #ifdef CFSDEBUG
9481 			CFS_DEBUG(CFSDEBUG_VOPS)
9482 				printf("cachefs_getaclfromcache: error %d\n",
9483 				    error);
9484 #endif /* CFSDEBUG */
9485 			goto out;
9486 		}
9487 	}
9488 
9489 	if (vsec->vsa_aclentp != NULL) {
9490 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9491 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9492 			switch (aclp->a_type) {
9493 			case USER_OBJ:
9494 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9495 				aclp->a_perm =
9496 				    cp->c_metadata.md_vattr.va_mode & 0700;
9497 				aclp->a_perm >>= 6;
9498 				break;
9499 
9500 			case GROUP_OBJ:
9501 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9502 				aclp->a_perm =
9503 				    cp->c_metadata.md_vattr.va_mode & 070;
9504 				aclp->a_perm >>= 3;
9505 				break;
9506 
9507 			case OTHER_OBJ:
9508 				aclp->a_perm =
9509 				    cp->c_metadata.md_vattr.va_mode & 07;
9510 				break;
9511 
9512 			case CLASS_OBJ:
9513 				aclp->a_perm =
9514 				    cp->c_metadata.md_aclclass;
9515 				break;
9516 			}
9517 		}
9518 	}
9519 
9520 out:
9521 
9522 	if (error != 0)
9523 		cachefs_nocache(cp);
9524 
9525 	return (error);
9526 }
9527 
9528 /*
9529  * Fills in targp with attribute information from srcp, cp
9530  * and if necessary the system.
9531  */
9532 static void
9533 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9534 {
9535 	time_t	now;
9536 
9537 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9538 
9539 	/*
9540 	 * Add code to fill in the va struct.  We use the fields from
9541 	 * the srcp struct if they are populated, otherwise we guess
9542 	 */
9543 
9544 	targp->va_mask = 0;	/* initialize all fields */
9545 	targp->va_mode = srcp->va_mode;
9546 	targp->va_type = srcp->va_type;
9547 	targp->va_nlink = 1;
9548 	targp->va_nodeid = 0;
9549 
9550 	if (srcp->va_mask & AT_UID)
9551 		targp->va_uid = srcp->va_uid;
9552 	else
9553 		targp->va_uid = crgetuid(cr);
9554 
9555 	if (srcp->va_mask & AT_GID)
9556 		targp->va_gid = srcp->va_gid;
9557 	else
9558 		targp->va_gid = crgetgid(cr);
9559 
9560 	if (srcp->va_mask & AT_FSID)
9561 		targp->va_fsid = srcp->va_fsid;
9562 	else
9563 		targp->va_fsid = 0;	/* initialize all fields */
9564 
9565 	now = gethrestime_sec();
9566 	if (srcp->va_mask & AT_ATIME)
9567 		targp->va_atime = srcp->va_atime;
9568 	else
9569 		targp->va_atime.tv_sec = now;
9570 
9571 	if (srcp->va_mask & AT_MTIME)
9572 		targp->va_mtime = srcp->va_mtime;
9573 	else
9574 		targp->va_mtime.tv_sec = now;
9575 
9576 	if (srcp->va_mask & AT_CTIME)
9577 		targp->va_ctime = srcp->va_ctime;
9578 	else
9579 		targp->va_ctime.tv_sec = now;
9580 
9581 
9582 	if (srcp->va_mask & AT_SIZE)
9583 		targp->va_size = srcp->va_size;
9584 	else
9585 		targp->va_size = 0;
9586 
9587 	/*
9588 	 * the remaing fields are set by the fs and not changable.
9589 	 * we populate these entries useing the parent directory
9590 	 * values.  It's a small hack, but should work.
9591 	 */
9592 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9593 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9594 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9595 	targp->va_seq = 0; /* Never keep the sequence number */
9596 }
9597 
9598 /*
9599  * set the gid for a newly created file.  The algorithm is as follows:
9600  *
9601  *	1) If the gid is set in the attribute list, then use it if
9602  *	   the caller is privileged, belongs to the target group, or
9603  *	   the group is the same as the parent directory.
9604  *
9605  *	2) If the parent directory's set-gid bit is clear, then use
9606  *	   the process gid
9607  *
9608  *	3) Otherwise, use the gid of the parent directory.
9609  *
9610  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9611  * this routine.
9612  */
9613 static void
9614 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9615 {
9616 	if ((vap->va_mask & AT_GID) &&
9617 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9618 	    groupmember(vap->va_gid, cr) ||
9619 	    secpolicy_vnode_create_gid(cr) != 0)) {
9620 		newcp->c_attr.va_gid = vap->va_gid;
9621 	} else {
9622 		if (dcp->c_attr.va_mode & S_ISGID)
9623 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9624 		else
9625 			newcp->c_attr.va_gid = crgetgid(cr);
9626 	}
9627 
9628 	/*
9629 	 * if we're creating a directory, and the parent directory has the
9630 	 * set-GID bit set, set it on the new directory.
9631 	 * Otherwise, if the user is neither privileged nor a member of the
9632 	 * file's new group, clear the file's set-GID bit.
9633 	 */
9634 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9635 		newcp->c_attr.va_mode |= S_ISGID;
9636 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9637 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9638 		newcp->c_attr.va_mode &= ~S_ISGID;
9639 }
9640 
9641 /*
9642  * create an acl for the newly created file.  should be called right
9643  * after cachefs_creategid.
9644  */
9645 
9646 static void
9647 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9648 {
9649 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9650 	vsecattr_t vsec;
9651 	int gotvsec = 0;
9652 	int error = 0; /* placeholder */
9653 	aclent_t *aclp;
9654 	o_mode_t *classp = NULL;
9655 	o_mode_t gunion = 0;
9656 	int i;
9657 
9658 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9659 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9660 		return;
9661 
9662 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9663 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9664 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9665 
9666 	/*
9667 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9668 	 * would hit code paths that isn't hit anywhere else.
9669 	 */
9670 
9671 	bzero(&vsec, sizeof (vsec));
9672 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9673 	error = cachefs_getaclfromcache(dcp, &vsec);
9674 	if (error != 0)
9675 		goto out;
9676 	gotvsec = 1;
9677 
9678 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9679 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9680 			kmem_free(vsec.vsa_aclentp,
9681 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9682 
9683 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9684 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9685 		vsec.vsa_dfaclcnt = 0;
9686 		vsec.vsa_dfaclentp = NULL;
9687 
9688 		if (newcp->c_attr.va_type == VDIR) {
9689 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9690 			    sizeof (aclent_t), KM_SLEEP);
9691 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9692 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9693 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9694 		}
9695 
9696 		/*
9697 		 * this function should be called pretty much after
9698 		 * the rest of the file creation stuff is done.  so,
9699 		 * uid, gid, etc. should be `right'.  we'll go with
9700 		 * that, rather than trying to determine whether to
9701 		 * get stuff from cr or va.
9702 		 */
9703 
9704 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9705 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9706 			switch (aclp->a_type) {
9707 			case DEF_USER_OBJ:
9708 				aclp->a_type = USER_OBJ;
9709 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9710 				aclp->a_perm =
9711 				    newcp->c_metadata.md_vattr.va_mode;
9712 				aclp->a_perm &= 0700;
9713 				aclp->a_perm >>= 6;
9714 				break;
9715 
9716 			case DEF_GROUP_OBJ:
9717 				aclp->a_type = GROUP_OBJ;
9718 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9719 				aclp->a_perm =
9720 				    newcp->c_metadata.md_vattr.va_mode;
9721 				aclp->a_perm &= 070;
9722 				aclp->a_perm >>= 3;
9723 				gunion |= aclp->a_perm;
9724 				break;
9725 
9726 			case DEF_OTHER_OBJ:
9727 				aclp->a_type = OTHER_OBJ;
9728 				aclp->a_perm =
9729 				    newcp->c_metadata.md_vattr.va_mode & 07;
9730 				break;
9731 
9732 			case DEF_CLASS_OBJ:
9733 				aclp->a_type = CLASS_OBJ;
9734 				classp = &(aclp->a_perm);
9735 				break;
9736 
9737 			case DEF_USER:
9738 				aclp->a_type = USER;
9739 				gunion |= aclp->a_perm;
9740 				break;
9741 
9742 			case DEF_GROUP:
9743 				aclp->a_type = GROUP;
9744 				gunion |= aclp->a_perm;
9745 				break;
9746 			}
9747 		}
9748 
9749 		/* XXX is this the POSIX thing to do? */
9750 		if (classp != NULL)
9751 			*classp &= gunion;
9752 
9753 		/*
9754 		 * we don't need to log this; rather, we clear the
9755 		 * MD_ACL bit when we reconnect.
9756 		 */
9757 
9758 		error = cachefs_cacheacl(newcp, &vsec);
9759 		if (error != 0)
9760 			goto out;
9761 	}
9762 
9763 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9764 	newcp->c_metadata.md_flags |= MD_ACL;
9765 	newcp->c_flags |= CN_UPDATED;
9766 
9767 out:
9768 
9769 	if (gotvsec) {
9770 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9771 			kmem_free(vsec.vsa_aclentp,
9772 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9773 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9774 			kmem_free(vsec.vsa_dfaclentp,
9775 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9776 	}
9777 }
9778 
9779 /*
9780  * this is translated from the UFS code for access checking.
9781  */
9782 
9783 static int
9784 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9785 {
9786 	cnode_t *cp = vcp;
9787 	fscache_t *fscp = C_TO_FSCACHE(cp);
9788 	int shift = 0;
9789 
9790 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9791 
9792 	if (mode & VWRITE) {
9793 		/*
9794 		 * Disallow write attempts on read-only
9795 		 * file systems, unless the file is special.
9796 		 */
9797 		struct vnode *vp = CTOV(cp);
9798 		if (vn_is_readonly(vp)) {
9799 			if (!IS_DEVVP(vp)) {
9800 				return (EROFS);
9801 			}
9802 		}
9803 	}
9804 
9805 	/*
9806 	 * if we need to do ACLs, do it.  this works whether anyone
9807 	 * has explicitly made an ACL or not.
9808 	 */
9809 
9810 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9811 	    (cachefs_vtype_aclok(CTOV(cp))))
9812 		return (cachefs_acl_access(cp, mode, cr));
9813 
9814 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9815 		shift += 3;
9816 		if (!groupmember(cp->c_attr.va_gid, cr))
9817 			shift += 3;
9818 	}
9819 
9820 	return (secpolicy_vnode_access2(cr, CTOV(cp), cp->c_attr.va_uid,
9821 	    cp->c_attr.va_mode << shift, mode));
9822 }
9823 
9824 /*
9825  * This is transcribed from ufs_acl_access().  If that changes, then
9826  * this should, too.
9827  *
9828  * Check the cnode's ACL's to see if this mode of access is
9829  * allowed; return 0 if allowed, EACCES if not.
9830  *
9831  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9832  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9833  */
9834 
9835 #define	ACL_MODE_CHECK(M, PERM, C, I) \
9836     secpolicy_vnode_access2(C, CTOV(I), owner, (PERM), (M))
9837 
9838 static int
9839 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9840 {
9841 	int error = 0;
9842 
9843 	fscache_t *fscp = C_TO_FSCACHE(cp);
9844 
9845 	int mask = ~0;
9846 	int ismask = 0;
9847 
9848 	int gperm = 0;
9849 	int ngroup = 0;
9850 
9851 	vsecattr_t vsec;
9852 	int gotvsec = 0;
9853 	aclent_t *aclp;
9854 
9855 	uid_t owner = cp->c_attr.va_uid;
9856 
9857 	int i;
9858 
9859 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9860 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9861 
9862 	/*
9863 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9864 	 * but then i believe we'd be the only thing exercising those
9865 	 * code paths -- probably a bad thing.
9866 	 */
9867 
9868 	bzero(&vsec, sizeof (vsec));
9869 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9870 
9871 	/* XXX KLUDGE! correct insidious 0-class problem */
9872 	if (cp->c_metadata.md_aclclass == 0 &&
9873 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9874 		cachefs_purgeacl(cp);
9875 again:
9876 	if (cp->c_metadata.md_flags & MD_ACL) {
9877 		error = cachefs_getaclfromcache(cp, &vsec);
9878 		if (error != 0) {
9879 #ifdef CFSDEBUG
9880 			if (error != ETIMEDOUT)
9881 				CFS_DEBUG(CFSDEBUG_VOPS)
9882 					printf("cachefs_acl_access():"
9883 					    "error %d from getaclfromcache()\n",
9884 					    error);
9885 #endif /* CFSDEBUG */
9886 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9887 				goto again;
9888 			} else {
9889 				goto out;
9890 			}
9891 		}
9892 	} else {
9893 		if (cp->c_backvp == NULL) {
9894 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9895 				error = cachefs_getbackvp(fscp, cp);
9896 			else
9897 				error = ETIMEDOUT;
9898 		}
9899 		if (error == 0)
9900 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9901 			    NULL);
9902 		if (error != 0) {
9903 #ifdef CFSDEBUG
9904 			CFS_DEBUG(CFSDEBUG_VOPS)
9905 				printf("cachefs_acl_access():"
9906 				    "error %d from getsecattr(backvp)\n",
9907 				    error);
9908 #endif /* CFSDEBUG */
9909 			goto out;
9910 		}
9911 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9912 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9913 			(void) cachefs_cacheacl(cp, &vsec);
9914 	}
9915 	gotvsec = 1;
9916 
9917 	ASSERT(error == 0);
9918 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9919 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9920 		switch (aclp->a_type) {
9921 		case USER_OBJ:
9922 			/*
9923 			 * this might look cleaner in the 2nd loop
9924 			 * below, but we do it here as an
9925 			 * optimization.
9926 			 */
9927 
9928 			owner = aclp->a_id;
9929 			if (crgetuid(cr) == owner) {
9930 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9931 				    cr, cp);
9932 				goto out;
9933 			}
9934 			break;
9935 
9936 		case CLASS_OBJ:
9937 			mask = aclp->a_perm;
9938 			ismask = 1;
9939 			break;
9940 		}
9941 	}
9942 
9943 	ASSERT(error == 0);
9944 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9945 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9946 		switch (aclp->a_type) {
9947 		case USER:
9948 			if (crgetuid(cr) == aclp->a_id) {
9949 				error = ACL_MODE_CHECK(mode,
9950 				    (aclp->a_perm & mask) << 6, cr, cp);
9951 				goto out;
9952 			}
9953 			break;
9954 
9955 		case GROUP_OBJ:
9956 			if (groupmember(aclp->a_id, cr)) {
9957 				++ngroup;
9958 				gperm |= aclp->a_perm;
9959 				if (! ismask) {
9960 					error = ACL_MODE_CHECK(mode,
9961 					    aclp->a_perm << 6,
9962 					    cr, cp);
9963 					goto out;
9964 				}
9965 			}
9966 			break;
9967 
9968 		case GROUP:
9969 			if (groupmember(aclp->a_id, cr)) {
9970 				++ngroup;
9971 				gperm |= aclp->a_perm;
9972 			}
9973 			break;
9974 
9975 		case OTHER_OBJ:
9976 			if (ngroup == 0) {
9977 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9978 				    cr, cp);
9979 				goto out;
9980 			}
9981 			break;
9982 
9983 		default:
9984 			break;
9985 		}
9986 	}
9987 
9988 	ASSERT(ngroup > 0);
9989 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9990 
9991 out:
9992 	if (gotvsec) {
9993 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
9994 			kmem_free(vsec.vsa_aclentp,
9995 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9996 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
9997 			kmem_free(vsec.vsa_dfaclentp,
9998 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9999 	}
10000 
10001 	return (error);
10002 }
10003 
10004 /*
10005  * see if permissions allow for removal of the given file from
10006  * the given directory.
10007  */
10008 static int
10009 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10010 {
10011 	uid_t uid;
10012 	/*
10013 	 * If the containing directory is sticky, the user must:
10014 	 *  - own the directory, or
10015 	 *  - own the file, or
10016 	 *  - be able to write the file (if it's a plain file), or
10017 	 *  - be sufficiently privileged.
10018 	 */
10019 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10020 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10021 	    (uid != cp->c_attr.va_uid) &&
10022 	    (cp->c_attr.va_type != VREG ||
10023 	    cachefs_access_local(cp, VWRITE, cr) != 0))
10024 		return (secpolicy_vnode_remove(cr));
10025 
10026 	return (0);
10027 }
10028 
10029 /*
10030  * Returns a new name, may even be unique.
10031  * Stolen from nfs code.
10032  * Since now we will use renaming to .cfs* in place of .nfs*
10033  * for CacheFS. Both NFS and CacheFS will rename opened files.
10034  */
10035 static char cachefs_prefix[] = ".cfs";
10036 kmutex_t cachefs_newnum_lock;
10037 
10038 static char *
10039 cachefs_newname(void)
10040 {
10041 	static uint_t newnum = 0;
10042 	char *news;
10043 	char *s, *p;
10044 	uint_t id;
10045 
10046 	mutex_enter(&cachefs_newnum_lock);
10047 	if (newnum == 0) {
10048 		newnum = gethrestime_sec() & 0xfffff;
10049 		newnum |= 0x10000;
10050 	}
10051 	id = newnum++;
10052 	mutex_exit(&cachefs_newnum_lock);
10053 
10054 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10055 	s = news;
10056 	p = cachefs_prefix;
10057 	while (*p != '\0')
10058 		*s++ = *p++;
10059 	while (id != 0) {
10060 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10061 		id >>= 4;
10062 	}
10063 	*s = '\0';
10064 	return (news);
10065 }
10066 
10067 /*
10068  * Called to rename the specified file to a temporary file so
10069  * operations to the file after remove work.
10070  * Must call this routine with the dir c_rwlock held as a writer.
10071  */
10072 static int
10073 /*ARGSUSED*/
10074 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10075 {
10076 	cnode_t *cp = VTOC(vp);
10077 	char *tmpname;
10078 	fscache_t *fscp = C_TO_FSCACHE(cp);
10079 	int error;
10080 
10081 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10082 
10083 	/* get the new name for the file */
10084 	tmpname = cachefs_newname();
10085 
10086 	/* do the link */
10087 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10088 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10089 	else
10090 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10091 	if (error) {
10092 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10093 		return (error);
10094 	}
10095 
10096 	mutex_enter(&cp->c_statelock);
10097 	if (cp->c_unldvp) {
10098 		VN_RELE(cp->c_unldvp);
10099 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10100 		crfree(cp->c_unlcred);
10101 	}
10102 
10103 	VN_HOLD(dvp);
10104 	cp->c_unldvp = dvp;
10105 	crhold(cr);
10106 	cp->c_unlcred = cr;
10107 	cp->c_unlname = tmpname;
10108 
10109 	/* drop the backvp so NFS does not also do a rename */
10110 	mutex_exit(&cp->c_statelock);
10111 
10112 	return (0);
10113 }
10114 
10115 /*
10116  * Marks the cnode as modified.
10117  */
10118 static void
10119 cachefs_modified(cnode_t *cp)
10120 {
10121 	fscache_t *fscp = C_TO_FSCACHE(cp);
10122 	struct vattr va;
10123 	int error;
10124 
10125 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10126 	ASSERT(cp->c_metadata.md_rlno);
10127 
10128 	/* if not on the modify list */
10129 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10130 		/* put on modified list, also marks the file as modified */
10131 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10132 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10133 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10134 		cp->c_flags |= CN_UPDATED;
10135 
10136 		/* if a modified regular file that is not local */
10137 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10138 		    (cp->c_metadata.md_flags & MD_FILE) &&
10139 		    (cp->c_attr.va_type == VREG)) {
10140 
10141 			if (cp->c_frontvp == NULL)
10142 				(void) cachefs_getfrontfile(cp);
10143 			if (cp->c_frontvp) {
10144 				/* identify file so fsck knows it is modified */
10145 				va.va_mode = 0766;
10146 				va.va_mask = AT_MODE;
10147 				error = VOP_SETATTR(cp->c_frontvp,
10148 				    &va, 0, kcred, NULL);
10149 				if (error) {
10150 					cmn_err(CE_WARN,
10151 					    "Cannot change ff mode.\n");
10152 				}
10153 			}
10154 		}
10155 	}
10156 }
10157 
10158 /*
10159  * Marks the cnode as modified.
10160  * Allocates a rl slot for the cnode if necessary.
10161  * Returns 0 for success, !0 if cannot get an rl slot.
10162  */
10163 static int
10164 cachefs_modified_alloc(cnode_t *cp)
10165 {
10166 	fscache_t *fscp = C_TO_FSCACHE(cp);
10167 	filegrp_t *fgp = cp->c_filegrp;
10168 	int error;
10169 	rl_entry_t rl_ent;
10170 
10171 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10172 
10173 	/* get the rl slot if needed */
10174 	if (cp->c_metadata.md_rlno == 0) {
10175 		/* get a metadata slot if we do not have one yet */
10176 		if (cp->c_flags & CN_ALLOC_PENDING) {
10177 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10178 				(void) filegrp_allocattr(cp->c_filegrp);
10179 			}
10180 			error = filegrp_create_metadata(cp->c_filegrp,
10181 			    &cp->c_metadata, &cp->c_id);
10182 			if (error)
10183 				return (error);
10184 			cp->c_flags &= ~CN_ALLOC_PENDING;
10185 		}
10186 
10187 		/* get a free rl entry */
10188 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10189 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10190 		rl_ent.rl_fsid = fscp->fs_cfsid;
10191 		rl_ent.rl_attrc = 0;
10192 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10193 		    &cp->c_metadata.md_rlno);
10194 		if (error)
10195 			return (error);
10196 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10197 
10198 		/* hold the filegrp so the attrcache file is not gc */
10199 		error = filegrp_ffhold(fgp);
10200 		if (error) {
10201 			cachefs_rlent_moveto(fscp->fs_cache,
10202 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10203 			cp->c_metadata.md_rlno = 0;
10204 			return (error);
10205 		}
10206 	}
10207 	cachefs_modified(cp);
10208 	return (0);
10209 }
10210 
10211 int
10212 cachefs_vtype_aclok(vnode_t *vp)
10213 {
10214 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10215 
10216 	if (vp->v_type == VNON)
10217 		return (0);
10218 
10219 	for (vtp = oktypes; *vtp != VNON; vtp++)
10220 		if (vp->v_type == *vtp)
10221 			break;
10222 
10223 	return (*vtp != VNON);
10224 }
10225 
10226 static int
10227 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10228     caller_context_t *ct)
10229 {
10230 	int error = 0;
10231 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10232 
10233 	/* Assert cachefs compatibility if NFSv4 is in use */
10234 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10235 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10236 
10237 	if (cmd == _PC_FILESIZEBITS) {
10238 		u_offset_t maxsize = fscp->fs_offmax;
10239 		(*valp) = 0;
10240 		while (maxsize != 0) {
10241 			maxsize >>= 1;
10242 			(*valp)++;
10243 		}
10244 		(*valp)++;
10245 	} else
10246 		error = fs_pathconf(vp, cmd, valp, cr, ct);
10247 
10248 	return (error);
10249 }
10250