xref: /titanic_41/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision 9622934a862fa39a8e90c816c4136e293d75629d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/time.h>
35 #include <sys/vnode.h>
36 #include <sys/vfs.h>
37 #include <sys/vfs_opreg.h>
38 #include <sys/file.h>
39 #include <sys/filio.h>
40 #include <sys/uio.h>
41 #include <sys/buf.h>
42 #include <sys/mman.h>
43 #include <sys/tiuser.h>
44 #include <sys/pathname.h>
45 #include <sys/dirent.h>
46 #include <sys/conf.h>
47 #include <sys/debug.h>
48 #include <sys/vmsystm.h>
49 #include <sys/fcntl.h>
50 #include <sys/flock.h>
51 #include <sys/swap.h>
52 #include <sys/errno.h>
53 #include <sys/sysmacros.h>
54 #include <sys/disp.h>
55 #include <sys/kmem.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vtrace.h>
58 #include <sys/mount.h>
59 #include <sys/bootconf.h>
60 #include <sys/dnlc.h>
61 #include <sys/stat.h>
62 #include <sys/acl.h>
63 #include <sys/policy.h>
64 #include <rpc/types.h>
65 
66 #include <vm/hat.h>
67 #include <vm/as.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/seg.h>
71 #include <vm/seg_map.h>
72 #include <vm/seg_vn.h>
73 #include <vm/rm.h>
74 #include <sys/fs/cachefs_fs.h>
75 #include <sys/fs/cachefs_dir.h>
76 #include <sys/fs/cachefs_dlog.h>
77 #include <sys/fs/cachefs_ioctl.h>
78 #include <sys/fs/cachefs_log.h>
79 #include <fs/fs_subr.h>
80 
81 int cachefs_dnlc;	/* use dnlc, debugging */
82 
83 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
84     cred_t *cr);
85 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
86     cred_t *cr);
87 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
88 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
89 static int cachefs_getacldirvp(cnode_t *cp);
90 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
91 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
92 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
93 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
94     u_offset_t iooff, cred_t *cr);
95 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
96     u_offset_t iooff, cred_t *cr);
97 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
98     cred_t *cr, caller_context_t *ct);
99 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
100     int flags, cred_t *cr, caller_context_t *ct);
101 static int cachefs_access_connected(struct vnode *vp, int mode,
102     int flags, cred_t *cr);
103 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
104     cred_t *cr);
105 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
106     char *tnm, cred_t *cr);
107 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
108     vattr_t *tva, char *tnm, cred_t *cr);
109 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
110     cred_t *cr);
111 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
112     char *tnm, cred_t *cr);
113 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
114     vnode_t **vpp, cred_t *cr);
115 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
116     vnode_t **vpp, cred_t *cr);
117 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
118 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
119     vnode_t *cdir, cred_t *cr, vnode_t *vp);
120 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
121     vnode_t *cdir, cred_t *cr, vnode_t *vp);
122 static char *cachefs_newname(void);
123 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
124     cred_t *cr);
125 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
126     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
127 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
128     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
129 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
130     int *eofp);
131 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
132     cred_t *cr, int *eofp);
133 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
134 	cred_t *cr, int *eofp);
135 
136 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
137     cred_t *cr, caller_context_t *ct);
138 
139 static	int	cachefs_open(struct vnode **, int, cred_t *,
140 			caller_context_t *);
141 static	int	cachefs_close(struct vnode *, int, int, offset_t,
142 			cred_t *, caller_context_t *);
143 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
144 			caller_context_t *);
145 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
146 			caller_context_t *);
147 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
148 			int *, caller_context_t *);
149 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
150 			cred_t *, caller_context_t *);
151 static	int	cachefs_setattr(struct vnode *, struct vattr *,
152 			int, cred_t *, caller_context_t *);
153 static	int	cachefs_access(struct vnode *, int, int, cred_t *,
154 			caller_context_t *);
155 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
156 			struct pathname *, int, struct vnode *, cred_t *,
157 			caller_context_t *, int *, pathname_t *);
158 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
159 			enum vcexcl, int, struct vnode **, cred_t *, int,
160 			caller_context_t *, vsecattr_t *);
161 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
162 			vattr_t *vap, enum vcexcl exclusive, int mode,
163 			vnode_t **vpp, cred_t *cr);
164 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
165 			vattr_t *vap, enum vcexcl exclusive, int mode,
166 			vnode_t **vpp, cred_t *cr);
167 static	int	cachefs_remove(struct vnode *, char *, cred_t *,
168 			caller_context_t *, int);
169 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
170 			cred_t *, caller_context_t *, int);
171 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
172 			char *, cred_t *, caller_context_t *, int);
173 static	int	cachefs_mkdir(struct vnode *, char *, struct
174 			vattr *, struct vnode **, cred_t *, caller_context_t *,
175 			int, vsecattr_t *);
176 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
177 			cred_t *, caller_context_t *, int);
178 static	int	cachefs_readdir(struct vnode *, struct uio *,
179 			cred_t *, int *, caller_context_t *, int);
180 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
181 			char *, cred_t *, caller_context_t *, int);
182 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
183 			caller_context_t *);
184 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
185 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
186 static	int	cachefs_fsync(struct vnode *, int, cred_t *,
187 			caller_context_t *);
188 static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
189 static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
190 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
191 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
192 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
193 			caller_context_t *);
194 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
195 			int, offset_t, struct flk_callback *, cred_t *,
196 			caller_context_t *);
197 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
198 			offset_t, cred_t *, caller_context_t *);
199 static	int	cachefs_realvp(struct vnode *, struct vnode **,
200 			caller_context_t *);
201 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
202 			struct page *[], size_t, struct seg *, caddr_t,
203 			enum seg_rw, cred_t *, caller_context_t *);
204 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
205 			struct page *[], size_t, struct seg *, caddr_t,
206 			enum seg_rw, cred_t *);
207 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
208 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
209 			enum seg_rw, cred_t *);
210 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
211 			cred_t *, caller_context_t *);
212 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
213 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
214 			caller_context_t *);
215 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
216 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
217 			caller_context_t *);
218 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
219 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
220 			caller_context_t *);
221 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
222 			int flag, cred_t *cr, caller_context_t *);
223 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
224 			int flag, cred_t *cr, caller_context_t *);
225 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
226 			cred_t *, caller_context_t *);
227 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
228     cred_t *cr);
229 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
230     int flag, cred_t *cr);
231 
232 static int	cachefs_dump(struct vnode *, caddr_t, int, int,
233 			caller_context_t *);
234 static int	cachefs_pageio(struct vnode *, page_t *,
235 		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
236 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
237 		    int tcount, struct uio *uiop);
238 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
239 			caller_context_t *);
240 
241 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
242 			cred_t *cr, caller_context_t *ct);
243 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
244 			cred_t *cr, caller_context_t *ct);
245 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
246 			int flags, cred_t *cr, caller_context_t *ct);
247 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
248 			vnode_t *vp);
249 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
250 			size_t len, uint_t *protp, struct page *pl[],
251 			size_t plsz, struct seg *seg, caddr_t addr,
252 			enum seg_rw rw, cred_t *cr);
253 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
254 			size_t len, int flags, cred_t *cr);
255 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
256 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
257 			uchar_t maxprot, uint_t flags, cred_t *cr);
258 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
259 			struct flock64 *bfp, int flag, offset_t offset,
260 			cred_t *cr, caller_context_t *ct);
261 
262 struct vnodeops *cachefs_vnodeops;
263 
264 static const fs_operation_def_t cachefs_vnodeops_template[] = {
265 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
266 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
267 	VOPNAME_READ,		{ .vop_read = cachefs_read },
268 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
269 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
270 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
271 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
272 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
273 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
274 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
275 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
276 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
277 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
278 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
279 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
280 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
281 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
282 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
283 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
284 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
285 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
286 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
287 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
288 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
289 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
290 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
291 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
292 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
293 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
294 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
295 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
296 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
297 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
298 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
299 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
300 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
301 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
302 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
303 	NULL,			NULL
304 };
305 
306 /* forward declarations of statics */
307 static void cachefs_modified(cnode_t *cp);
308 static int cachefs_modified_alloc(cnode_t *cp);
309 
310 int
311 cachefs_init_vnops(char *name)
312 {
313 	return (vn_make_ops(name,
314 		    cachefs_vnodeops_template, &cachefs_vnodeops));
315 }
316 
317 struct vnodeops *
318 cachefs_getvnodeops(void)
319 {
320 	return (cachefs_vnodeops);
321 }
322 
323 static int
324 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
325 {
326 	int error = 0;
327 	cnode_t *cp = VTOC(*vpp);
328 	fscache_t *fscp = C_TO_FSCACHE(cp);
329 	int held = 0;
330 	int type;
331 	int connected = 0;
332 
333 #ifdef CFSDEBUG
334 	CFS_DEBUG(CFSDEBUG_VOPS)
335 		printf("cachefs_open: ENTER vpp %p flag %x\n",
336 		    (void *)vpp, flag);
337 #endif
338 	if (getzoneid() != GLOBAL_ZONEID) {
339 		error = EPERM;
340 		goto out;
341 	}
342 	if ((flag & FWRITE) &&
343 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
344 		error = EISDIR;
345 		goto out;
346 	}
347 
348 	/*
349 	 * Cachefs only provides pass-through support for NFSv4,
350 	 * and all vnode operations are passed through to the
351 	 * back file system. For NFSv4 pass-through to work, only
352 	 * connected operation is supported, the cnode backvp must
353 	 * exist, and cachefs optional (eg., disconnectable) flags
354 	 * are turned off. Assert these conditions to ensure that
355 	 * the backfilesystem is called for the open operation.
356 	 */
357 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
358 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
359 
360 	for (;;) {
361 		/* get (or renew) access to the file system */
362 		if (held) {
363 			/* Won't loop with NFSv4 connected behavior */
364 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
365 			cachefs_cd_release(fscp);
366 			held = 0;
367 		}
368 		error = cachefs_cd_access(fscp, connected, 0);
369 		if (error)
370 			goto out;
371 		held = 1;
372 
373 		mutex_enter(&cp->c_statelock);
374 
375 		/* grab creds if we do not have any yet */
376 		if (cp->c_cred == NULL) {
377 			crhold(cr);
378 			cp->c_cred = cr;
379 		}
380 		cp->c_flags |= CN_NEEDOPEN;
381 
382 		/* if we are disconnected */
383 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
384 			/* if we cannot write to the file system */
385 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
386 				mutex_exit(&cp->c_statelock);
387 				connected = 1;
388 				continue;
389 			}
390 			/*
391 			 * Allow read only requests to continue
392 			 */
393 			if ((flag & (FWRITE|FREAD)) == FREAD) {
394 				/* track the flag for opening the backvp */
395 				cp->c_rdcnt++;
396 				mutex_exit(&cp->c_statelock);
397 				error = 0;
398 				break;
399 			}
400 
401 			/*
402 			 * check credentials  - if this procs
403 			 * credentials don't match the creds in the
404 			 * cnode disallow writing while disconnected.
405 			 */
406 			if (crcmp(cp->c_cred, CRED()) != 0 &&
407 			    secpolicy_vnode_access(CRED(), *vpp,
408 					    cp->c_attr.va_uid, VWRITE) != 0) {
409 				mutex_exit(&cp->c_statelock);
410 				connected = 1;
411 				continue;
412 			}
413 			/* to get here, we know that the WRITE flag is on */
414 			cp->c_wrcnt++;
415 			if (flag & FREAD)
416 				cp->c_rdcnt++;
417 		}
418 
419 		/* else if we are connected */
420 		else {
421 			/* if cannot use the cached copy of the file */
422 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
423 			    ((cp->c_flags & CN_NOCACHE) == 0))
424 				cachefs_nocache(cp);
425 
426 			/* pass open to the back file */
427 			if (cp->c_backvp) {
428 				cp->c_flags &= ~CN_NEEDOPEN;
429 				CFS_DPRINT_BACKFS_NFSV4(fscp,
430 					("cachefs_open (nfsv4): cnode %p, "
431 					"backvp %p\n", cp, cp->c_backvp));
432 				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
433 				if (CFS_TIMEOUT(fscp, error)) {
434 					mutex_exit(&cp->c_statelock);
435 					cachefs_cd_release(fscp);
436 					held = 0;
437 					cachefs_cd_timedout(fscp);
438 					continue;
439 				} else if (error) {
440 					mutex_exit(&cp->c_statelock);
441 					break;
442 				}
443 			} else {
444 				/* backvp will be VOP_OPEN'd later */
445 				if (flag & FREAD)
446 					cp->c_rdcnt++;
447 				if (flag & FWRITE)
448 					cp->c_wrcnt++;
449 			}
450 
451 			/*
452 			 * Now perform a consistency check on the file.
453 			 * If strict consistency then force a check to
454 			 * the backfs even if the timeout has not expired
455 			 * for close-to-open consistency.
456 			 */
457 			type = 0;
458 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
459 				type = C_BACK_CHECK;
460 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
461 			if (CFS_TIMEOUT(fscp, error)) {
462 				mutex_exit(&cp->c_statelock);
463 				cachefs_cd_release(fscp);
464 				held = 0;
465 				cachefs_cd_timedout(fscp);
466 				continue;
467 			}
468 		}
469 		mutex_exit(&cp->c_statelock);
470 		break;
471 	}
472 	if (held)
473 		cachefs_cd_release(fscp);
474 out:
475 #ifdef CFS_CD_DEBUG
476 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
477 #endif
478 #ifdef CFSDEBUG
479 	CFS_DEBUG(CFSDEBUG_VOPS)
480 		printf("cachefs_open: EXIT vpp %p error %d\n",
481 		    (void *)vpp, error);
482 #endif
483 	return (error);
484 }
485 
486 /* ARGSUSED */
487 static int
488 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
489 	caller_context_t *ct)
490 {
491 	int error = 0;
492 	cnode_t *cp = VTOC(vp);
493 	fscache_t *fscp = C_TO_FSCACHE(cp);
494 	int held = 0;
495 	int connected = 0;
496 	int close_cnt = 1;
497 	cachefscache_t *cachep;
498 
499 #ifdef CFSDEBUG
500 	CFS_DEBUG(CFSDEBUG_VOPS)
501 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
502 #endif
503 	/*
504 	 * Cachefs only provides pass-through support for NFSv4,
505 	 * and all vnode operations are passed through to the
506 	 * back file system. For NFSv4 pass-through to work, only
507 	 * connected operation is supported, the cnode backvp must
508 	 * exist, and cachefs optional (eg., disconnectable) flags
509 	 * are turned off. Assert these conditions to ensure that
510 	 * the backfilesystem is called for the close operation.
511 	 */
512 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
513 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
514 
515 	/*
516 	 * File could have been passed in or inherited from the global zone, so
517 	 * we don't want to flat out reject the request; we'll just leave things
518 	 * the way they are and let the backfs (NFS) deal with it.
519 	 */
520 	/* get rid of any local locks */
521 	if (CFS_ISFS_LLOCK(fscp)) {
522 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
523 	}
524 
525 	/* clean up if this is the daemon closing down */
526 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
527 	    ((ttoproc(curthread)->p_pid) != 0) &&
528 	    (vp == fscp->fs_rootvp) &&
529 	    (count == 1)) {
530 		mutex_enter(&fscp->fs_cdlock);
531 		fscp->fs_cddaemonid = 0;
532 		if (fscp->fs_dlogfile)
533 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
534 		else
535 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
536 		cv_broadcast(&fscp->fs_cdwaitcv);
537 		mutex_exit(&fscp->fs_cdlock);
538 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
539 			cachep = fscp->fs_cache;
540 			mutex_enter(&cachep->c_contentslock);
541 			ASSERT(cachep->c_rootdaemonid != 0);
542 			cachep->c_rootdaemonid = 0;
543 			mutex_exit(&cachep->c_contentslock);
544 		}
545 		return (0);
546 	}
547 
548 	for (;;) {
549 		/* get (or renew) access to the file system */
550 		if (held) {
551 			/* Won't loop with NFSv4 connected behavior */
552 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
553 			cachefs_cd_release(fscp);
554 			held = 0;
555 		}
556 		error = cachefs_cd_access(fscp, connected, 0);
557 		if (error)
558 			goto out;
559 		held = 1;
560 		connected = 0;
561 
562 		/* if not the last close */
563 		if (count > 1) {
564 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
565 				goto out;
566 			mutex_enter(&cp->c_statelock);
567 			if (cp->c_backvp) {
568 				CFS_DPRINT_BACKFS_NFSV4(fscp,
569 					("cachefs_close (nfsv4): cnode %p, "
570 					"backvp %p\n", cp, cp->c_backvp));
571 				error = VOP_CLOSE(cp->c_backvp, flag, count,
572 				    offset, cr, ct);
573 				if (CFS_TIMEOUT(fscp, error)) {
574 					mutex_exit(&cp->c_statelock);
575 					cachefs_cd_release(fscp);
576 					held = 0;
577 					cachefs_cd_timedout(fscp);
578 					continue;
579 				}
580 			}
581 			mutex_exit(&cp->c_statelock);
582 			goto out;
583 		}
584 
585 		/*
586 		 * If the file is an unlinked file, then flush the lookup
587 		 * cache so that inactive will be called if this is
588 		 * the last reference.  It will invalidate all of the
589 		 * cached pages, without writing them out.  Writing them
590 		 * out is not required because they will be written to a
591 		 * file which will be immediately removed.
592 		 */
593 		if (cp->c_unldvp != NULL) {
594 			dnlc_purge_vp(vp);
595 			mutex_enter(&cp->c_statelock);
596 			error = cp->c_error;
597 			cp->c_error = 0;
598 			mutex_exit(&cp->c_statelock);
599 			/* always call VOP_CLOSE() for back fs vnode */
600 		}
601 
602 		/* force dirty data to stable storage */
603 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
604 				!CFS_ISFS_BACKFS_NFSV4(fscp)) {
605 			/* clean the cachefs pages synchronously */
606 			error = cachefs_putpage_common(vp, (offset_t)0,
607 			    0, 0, cr);
608 			if (CFS_TIMEOUT(fscp, error)) {
609 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
610 					cachefs_cd_release(fscp);
611 					held = 0;
612 					cachefs_cd_timedout(fscp);
613 					continue;
614 				} else {
615 					connected = 1;
616 					continue;
617 				}
618 			}
619 
620 			/* if no space left in cache, wait until connected */
621 			if ((error == ENOSPC) &&
622 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
623 				connected = 1;
624 				continue;
625 			}
626 
627 			/* clear the cnode error if putpage worked */
628 			if ((error == 0) && cp->c_error) {
629 				mutex_enter(&cp->c_statelock);
630 				cp->c_error = 0;
631 				mutex_exit(&cp->c_statelock);
632 			}
633 
634 			/* if any other important error */
635 			if (cp->c_error) {
636 				/* get rid of the pages */
637 				(void) cachefs_putpage_common(vp,
638 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
639 				dnlc_purge_vp(vp);
640 			}
641 		}
642 
643 		mutex_enter(&cp->c_statelock);
644 		if (cp->c_backvp &&
645 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
646 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
647 			    offset, cr, ct);
648 			if (CFS_TIMEOUT(fscp, error)) {
649 				mutex_exit(&cp->c_statelock);
650 				cachefs_cd_release(fscp);
651 				held = 0;
652 				cachefs_cd_timedout(fscp);
653 				/* don't decrement the vnode counts again */
654 				close_cnt = 0;
655 				continue;
656 			}
657 		}
658 		mutex_exit(&cp->c_statelock);
659 		break;
660 	}
661 
662 	mutex_enter(&cp->c_statelock);
663 	if (!error)
664 		error = cp->c_error;
665 	cp->c_error = 0;
666 	mutex_exit(&cp->c_statelock);
667 
668 out:
669 	if (held)
670 		cachefs_cd_release(fscp);
671 #ifdef CFS_CD_DEBUG
672 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
673 #endif
674 
675 #ifdef CFSDEBUG
676 	CFS_DEBUG(CFSDEBUG_VOPS)
677 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
678 #endif
679 	return (error);
680 }
681 
682 /*ARGSUSED*/
683 static int
684 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
685 	caller_context_t *ct)
686 {
687 	struct cnode *cp = VTOC(vp);
688 	fscache_t *fscp = C_TO_FSCACHE(cp);
689 	register u_offset_t off;
690 	register int mapoff;
691 	register caddr_t base;
692 	int n;
693 	offset_t diff;
694 	uint_t flags = 0;
695 	int error = 0;
696 
697 #if 0
698 	if (vp->v_flag & VNOCACHE)
699 		flags = SM_INVAL;
700 #endif
701 	if (getzoneid() != GLOBAL_ZONEID)
702 		return (EPERM);
703 	if (vp->v_type != VREG)
704 		return (EISDIR);
705 
706 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
707 
708 	if (uiop->uio_resid == 0)
709 		return (0);
710 
711 
712 	if (uiop->uio_loffset < (offset_t)0)
713 		return (EINVAL);
714 
715 	/*
716 	 * Call backfilesystem to read if NFSv4, the cachefs code
717 	 * does the read from the back filesystem asynchronously
718 	 * which is not supported by pass-through functionality.
719 	 */
720 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
721 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
722 		goto out;
723 	}
724 
725 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
726 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
727 			    uiop->uio_resid, uiop->uio_fmode, ct);
728 		if (error)
729 			return (error);
730 	}
731 
732 	/*
733 	 * Sit in a loop and transfer (uiomove) the data in up to
734 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
735 	 * address space as needed and then released.
736 	 */
737 	do {
738 		/*
739 		 *	off	Offset of current MAXBSIZE chunk
740 		 *	mapoff	Offset within the current chunk
741 		 *	n	Number of bytes to move from this chunk
742 		 *	base	kernel address of mapped in chunk
743 		 */
744 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
745 		mapoff = uiop->uio_loffset & MAXBOFFSET;
746 		n = MAXBSIZE - mapoff;
747 		if (n > uiop->uio_resid)
748 			n = (uint_t)uiop->uio_resid;
749 
750 		/* perform consistency check */
751 		error = cachefs_cd_access(fscp, 0, 0);
752 		if (error)
753 			break;
754 		mutex_enter(&cp->c_statelock);
755 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
756 		diff = cp->c_size - uiop->uio_loffset;
757 		mutex_exit(&cp->c_statelock);
758 		if (CFS_TIMEOUT(fscp, error)) {
759 			cachefs_cd_release(fscp);
760 			cachefs_cd_timedout(fscp);
761 			error = 0;
762 			continue;
763 		}
764 		cachefs_cd_release(fscp);
765 
766 		if (error)
767 			break;
768 
769 		if (diff <= (offset_t)0)
770 			break;
771 		if (diff < (offset_t)n)
772 			n = diff;
773 
774 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
775 
776 		error = segmap_fault(kas.a_hat, segkmap, base, n,
777 			F_SOFTLOCK, S_READ);
778 		if (error) {
779 			(void) segmap_release(segkmap, base, 0);
780 			if (FC_CODE(error) == FC_OBJERR)
781 				error =  FC_ERRNO(error);
782 			else
783 				error = EIO;
784 			break;
785 		}
786 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
787 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
788 				F_SOFTUNLOCK, S_READ);
789 		if (error == 0) {
790 			/*
791 			 * if we read a whole page(s), or to eof,
792 			 *  we won't need this page(s) again soon.
793 			 */
794 			if (n + mapoff == MAXBSIZE ||
795 				uiop->uio_loffset == cp->c_size)
796 				flags |= SM_DONTNEED;
797 		}
798 		(void) segmap_release(segkmap, base, flags);
799 	} while (error == 0 && uiop->uio_resid > 0);
800 
801 out:
802 #ifdef CFSDEBUG
803 	CFS_DEBUG(CFSDEBUG_VOPS)
804 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
805 			uiop->uio_resid);
806 #endif
807 	return (error);
808 }
809 
810 /*
811  * cachefs_read_backfs_nfsv4
812  *
813  * Call NFSv4 back filesystem to handle the read (cachefs
814  * pass-through support for NFSv4).
815  */
816 static int
817 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
818 			caller_context_t *ct)
819 {
820 	cnode_t *cp = VTOC(vp);
821 	fscache_t *fscp = C_TO_FSCACHE(cp);
822 	vnode_t *backvp;
823 	int error;
824 
825 	/*
826 	 * For NFSv4 pass-through to work, only connected operation
827 	 * is supported, the cnode backvp must exist, and cachefs
828 	 * optional (eg., disconnectable) flags are turned off. Assert
829 	 * these conditions for the read operation.
830 	 */
831 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
832 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
833 
834 	/* Call backfs vnode op after extracting backvp */
835 	mutex_enter(&cp->c_statelock);
836 	backvp = cp->c_backvp;
837 	mutex_exit(&cp->c_statelock);
838 
839 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
840 					"backvp %p\n", cp, backvp));
841 
842 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
843 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
844 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
845 
846 	/* Increment cache miss counter */
847 	fscp->fs_stats.st_misses++;
848 
849 	return (error);
850 }
851 
852 /*ARGSUSED*/
853 static int
854 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
855 	caller_context_t *ct)
856 {
857 	struct cnode *cp = VTOC(vp);
858 	fscache_t *fscp = C_TO_FSCACHE(cp);
859 	int error = 0;
860 	u_offset_t off;
861 	caddr_t base;
862 	uint_t bsize;
863 	uint_t flags;
864 	int n, on;
865 	rlim64_t limit = uiop->uio_llimit;
866 	ssize_t resid;
867 	offset_t offset;
868 	offset_t remainder;
869 
870 #ifdef CFSDEBUG
871 	CFS_DEBUG(CFSDEBUG_VOPS)
872 		printf(
873 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
874 			(void *)vp, uiop->uio_loffset, uiop->uio_resid,
875 			cp->c_flags);
876 #endif
877 	if (getzoneid() != GLOBAL_ZONEID) {
878 		error = EPERM;
879 		goto out;
880 	}
881 	if (vp->v_type != VREG) {
882 		error = EISDIR;
883 		goto out;
884 	}
885 
886 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
887 
888 	if (uiop->uio_resid == 0) {
889 		goto out;
890 	}
891 
892 	/* Call backfilesystem to write if NFSv4 */
893 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
894 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
895 		goto out2;
896 	}
897 
898 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
899 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
900 			    uiop->uio_resid, uiop->uio_fmode, ct);
901 		if (error)
902 			goto out;
903 	}
904 
905 	if (ioflag & FAPPEND) {
906 		for (;;) {
907 			/* do consistency check to get correct file size */
908 			error = cachefs_cd_access(fscp, 0, 1);
909 			if (error)
910 				goto out;
911 			mutex_enter(&cp->c_statelock);
912 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
913 			uiop->uio_loffset = cp->c_size;
914 			mutex_exit(&cp->c_statelock);
915 			if (CFS_TIMEOUT(fscp, error)) {
916 				cachefs_cd_release(fscp);
917 				cachefs_cd_timedout(fscp);
918 				continue;
919 			}
920 			cachefs_cd_release(fscp);
921 			if (error)
922 				goto out;
923 			break;
924 		}
925 	}
926 
927 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
928 		limit = MAXOFFSET_T;
929 
930 	if (uiop->uio_loffset >= limit) {
931 		proc_t *p = ttoproc(curthread);
932 
933 		mutex_enter(&p->p_lock);
934 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
935 		    p, RCA_UNSAFE_SIGINFO);
936 		mutex_exit(&p->p_lock);
937 		error = EFBIG;
938 		goto out;
939 	}
940 	if (uiop->uio_loffset > fscp->fs_offmax) {
941 		error = EFBIG;
942 		goto out;
943 	}
944 
945 	if (limit > fscp->fs_offmax)
946 		limit = fscp->fs_offmax;
947 
948 	if (uiop->uio_loffset < (offset_t)0) {
949 		error = EINVAL;
950 		goto out;
951 	}
952 
953 	offset = uiop->uio_loffset + uiop->uio_resid;
954 	/*
955 	 * Check to make sure that the process will not exceed
956 	 * its limit on file size.  It is okay to write up to
957 	 * the limit, but not beyond.  Thus, the write which
958 	 * reaches the limit will be short and the next write
959 	 * will return an error.
960 	 */
961 	remainder = 0;
962 	if (offset > limit) {
963 		remainder = (int)(offset - (u_offset_t)limit);
964 		uiop->uio_resid = limit - uiop->uio_loffset;
965 		if (uiop->uio_resid <= 0) {
966 			proc_t *p = ttoproc(curthread);
967 
968 			uiop->uio_resid += remainder;
969 			mutex_enter(&p->p_lock);
970 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
971 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
972 			mutex_exit(&p->p_lock);
973 			error = EFBIG;
974 			goto out;
975 		}
976 	}
977 
978 	resid = uiop->uio_resid;
979 	offset = uiop->uio_loffset;
980 	bsize = vp->v_vfsp->vfs_bsize;
981 
982 	/* loop around and do the write in MAXBSIZE chunks */
983 	do {
984 		/* mapping offset */
985 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
986 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
987 		n = MAXBSIZE - on;
988 		if (n > uiop->uio_resid)
989 			n = (int)uiop->uio_resid;
990 		base = segmap_getmap(segkmap, vp, off);
991 		error = cachefs_writepage(vp, (base + on), n, uiop);
992 		if (error == 0) {
993 			flags = 0;
994 			/*
995 			 * Have written a whole block.Start an
996 			 * asynchronous write and mark the buffer to
997 			 * indicate that it won't be needed again
998 			 * soon.
999 			 */
1000 			if (n + on == bsize) {
1001 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1002 			}
1003 #if 0
1004 			/* XXX need to understand this */
1005 			if ((ioflag & (FSYNC|FDSYNC)) ||
1006 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1007 				flags &= ~SM_ASYNC;
1008 				flags |= SM_WRITE;
1009 			}
1010 #else
1011 			if (ioflag & (FSYNC|FDSYNC)) {
1012 				flags &= ~SM_ASYNC;
1013 				flags |= SM_WRITE;
1014 			}
1015 #endif
1016 			error = segmap_release(segkmap, base, flags);
1017 		} else {
1018 			(void) segmap_release(segkmap, base, 0);
1019 		}
1020 	} while (error == 0 && uiop->uio_resid > 0);
1021 
1022 out:
1023 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1024 		uiop->uio_resid = resid;
1025 		uiop->uio_loffset = offset;
1026 	} else
1027 		uiop->uio_resid += remainder;
1028 
1029 out2:
1030 #ifdef CFSDEBUG
1031 	CFS_DEBUG(CFSDEBUG_VOPS)
1032 		printf("cachefs_write: EXIT error %d\n", error);
1033 #endif
1034 	return (error);
1035 }
1036 
1037 /*
1038  * cachefs_write_backfs_nfsv4
1039  *
1040  * Call NFSv4 back filesystem to handle the write (cachefs
1041  * pass-through support for NFSv4).
1042  */
1043 static int
1044 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1045 			caller_context_t *ct)
1046 {
1047 	cnode_t *cp = VTOC(vp);
1048 	fscache_t *fscp = C_TO_FSCACHE(cp);
1049 	vnode_t *backvp;
1050 	int error;
1051 
1052 	/*
1053 	 * For NFSv4 pass-through to work, only connected operation
1054 	 * is supported, the cnode backvp must exist, and cachefs
1055 	 * optional (eg., disconnectable) flags are turned off. Assert
1056 	 * these conditions for the read operation.
1057 	 */
1058 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1059 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1060 
1061 	/* Call backfs vnode op after extracting the backvp */
1062 	mutex_enter(&cp->c_statelock);
1063 	backvp = cp->c_backvp;
1064 	mutex_exit(&cp->c_statelock);
1065 
1066 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1067 					"backvp %p\n", cp, backvp));
1068 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1069 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1070 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1071 
1072 	return (error);
1073 }
1074 
1075 /*
1076  * see if we've charged ourselves for frontfile data at
1077  * the given offset.  If not, allocate a block for it now.
1078  */
1079 static int
1080 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1081 {
1082 	u_offset_t blockoff;
1083 	int error;
1084 	int inc;
1085 
1086 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1087 	ASSERT(PAGESIZE <= MAXBSIZE);
1088 
1089 	error = 0;
1090 	blockoff = offset & (offset_t)MAXBMASK;
1091 
1092 	/* get the front file if necessary so allocblocks works */
1093 	if ((cp->c_frontvp == NULL) &&
1094 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1095 		(void) cachefs_getfrontfile(cp);
1096 	}
1097 	if (cp->c_flags & CN_NOCACHE)
1098 		return (1);
1099 
1100 	if (cachefs_check_allocmap(cp, blockoff))
1101 		return (0);
1102 
1103 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1104 		if (cachefs_check_allocmap(cp, blockoff+inc))
1105 			return (0);
1106 
1107 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1108 	    cp->c_metadata.md_rltype);
1109 	if (error == 0) {
1110 		cp->c_metadata.md_frontblks++;
1111 		cp->c_flags |= CN_UPDATED;
1112 	}
1113 	return (error);
1114 }
1115 
1116 /*
1117  * Called only by cachefs_write to write 1 page or less of data.
1118  *	base   - base address kernel addr space
1119  *	tcount - Total bytes to move - < MAXBSIZE
1120  */
1121 static int
1122 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1123 {
1124 	struct cnode *cp =  VTOC(vp);
1125 	fscache_t *fscp = C_TO_FSCACHE(cp);
1126 	register int n;
1127 	register u_offset_t offset;
1128 	int error = 0, terror;
1129 	extern struct as kas;
1130 	u_offset_t lastpage_off;
1131 	int pagecreate = 0;
1132 	int newpage;
1133 
1134 #ifdef CFSDEBUG
1135 	CFS_DEBUG(CFSDEBUG_VOPS)
1136 		printf(
1137 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1138 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1139 #endif
1140 
1141 	/*
1142 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1143 	 * uiomove() because page faults may cause the cache to be invalidated
1144 	 * out from under us.
1145 	 */
1146 	do {
1147 		offset = uiop->uio_loffset;
1148 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1149 
1150 		/*
1151 		 * If not connected then need to make sure we have space
1152 		 * to perform the write.  We could make this check
1153 		 * a little tighter by only doing it if we are growing the file.
1154 		 */
1155 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1156 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1157 			    cp->c_metadata.md_rltype);
1158 			if (error)
1159 				break;
1160 			cachefs_freeblocks(fscp->fs_cache, 1,
1161 			    cp->c_metadata.md_rltype);
1162 		}
1163 
1164 		/*
1165 		 * n is the number of bytes required to satisfy the request
1166 		 * or the number of bytes to fill out the page.
1167 		 */
1168 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1169 		if (n > tcount)
1170 			n = tcount;
1171 
1172 		/*
1173 		 * The number of bytes of data in the last page can not
1174 		 * be accurately be determined while page is being
1175 		 * uiomove'd to and the size of the file being updated.
1176 		 * Thus, inform threads which need to know accurately
1177 		 * how much data is in the last page of the file.  They
1178 		 * will not do the i/o immediately, but will arrange for
1179 		 * the i/o to happen later when this modify operation
1180 		 * will have finished.
1181 		 *
1182 		 * in similar NFS code, this is done right before the
1183 		 * uiomove(), which is best.  but here in cachefs, we
1184 		 * have two uiomove()s, so we must do it here.
1185 		 */
1186 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1187 		mutex_enter(&cp->c_statelock);
1188 		cp->c_flags |= CN_CMODINPROG;
1189 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1190 		mutex_exit(&cp->c_statelock);
1191 
1192 		/*
1193 		 * Check to see if we can skip reading in the page
1194 		 * and just allocate the memory.  We can do this
1195 		 * if we are going to rewrite the entire mapping
1196 		 * or if we are going to write to or beyond the current
1197 		 * end of file from the beginning of the mapping.
1198 		 */
1199 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1200 			((cp->c_size == 0) && (offset < PAGESIZE)) ||
1201 			((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1202 			((offset + n) >= cp->c_size))) {
1203 			pagecreate = 1;
1204 
1205 			/*
1206 			 * segmap_pagecreate() returns 1 if it calls
1207 			 * page_create_va() to allocate any pages.
1208 			 */
1209 			newpage = segmap_pagecreate(segkmap,
1210 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1211 			    PAGESIZE, 0);
1212 			/* do not zero page if we are overwriting all of it */
1213 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1214 			    (n == PAGESIZE))) {
1215 				(void) kzero((void *)
1216 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1217 				    PAGESIZE);
1218 			}
1219 			error = uiomove(base, n, UIO_WRITE, uiop);
1220 
1221 			/*
1222 			 * Unlock the page allocated by page_create_va()
1223 			 * in segmap_pagecreate()
1224 			 */
1225 			if (newpage)
1226 				segmap_pageunlock(segkmap,
1227 					(caddr_t)((uintptr_t)base &
1228 						(uintptr_t)PAGEMASK),
1229 					PAGESIZE, S_WRITE);
1230 		} else {
1231 			/*
1232 			 * KLUDGE ! Use segmap_fault instead of faulting and
1233 			 * using as_fault() to avoid a recursive readers lock
1234 			 * on kas.
1235 			 */
1236 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1237 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1238 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1239 			if (error) {
1240 				if (FC_CODE(error) == FC_OBJERR)
1241 					error =  FC_ERRNO(error);
1242 				else
1243 					error = EIO;
1244 				break;
1245 			}
1246 			error = uiomove(base, n, UIO_WRITE, uiop);
1247 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1248 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1249 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1250 		}
1251 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1252 		base += n;
1253 		tcount -= n;
1254 
1255 		/* get access to the file system */
1256 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1257 			error = terror;
1258 			break;
1259 		}
1260 
1261 		/*
1262 		 * cp->c_attr.va_size is the maximum number of
1263 		 * bytes known to be in the file.
1264 		 * Make sure it is at least as high as the
1265 		 * last byte we just wrote into the buffer.
1266 		 */
1267 		mutex_enter(&cp->c_statelock);
1268 		if (cp->c_size < uiop->uio_loffset) {
1269 			cp->c_size = uiop->uio_loffset;
1270 		}
1271 		if (cp->c_size != cp->c_attr.va_size) {
1272 			cp->c_attr.va_size = cp->c_size;
1273 			cp->c_flags |= CN_UPDATED;
1274 		}
1275 		/* c_size is now correct, so we can clear modinprog */
1276 		cp->c_flags &= ~CN_CMODINPROG;
1277 		if (error == 0) {
1278 			cp->c_flags |= CDIRTY;
1279 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1280 				/*
1281 				 * if we're not in NOCACHE mode
1282 				 * (i.e., single-writer), we update the
1283 				 * allocmap here rather than waiting until
1284 				 * cachefspush is called.  This prevents
1285 				 * getpage from clustering up pages from
1286 				 * the backfile and stomping over the changes
1287 				 * we make here.
1288 				 */
1289 				if (cachefs_charge_page(cp, offset) == 0) {
1290 					cachefs_update_allocmap(cp,
1291 					    offset & (offset_t)PAGEMASK,
1292 							(size_t)PAGESIZE);
1293 				}
1294 
1295 				/* else we ran out of space */
1296 				else {
1297 					/* nocache file if connected */
1298 					if (fscp->fs_cdconnected ==
1299 					    CFS_CD_CONNECTED)
1300 						cachefs_nocache(cp);
1301 					/*
1302 					 * If disconnected then cannot
1303 					 * nocache the file.  Let it have
1304 					 * the space.
1305 					 */
1306 					else {
1307 						cp->c_metadata.md_frontblks++;
1308 						cp->c_flags |= CN_UPDATED;
1309 						cachefs_update_allocmap(cp,
1310 						    offset & (offset_t)PAGEMASK,
1311 						    (size_t)PAGESIZE);
1312 					}
1313 				}
1314 			}
1315 		}
1316 		mutex_exit(&cp->c_statelock);
1317 		cachefs_cd_release(fscp);
1318 	} while (tcount > 0 && error == 0);
1319 
1320 	if (cp->c_flags & CN_CMODINPROG) {
1321 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1322 		mutex_enter(&cp->c_statelock);
1323 		cp->c_flags &= ~CN_CMODINPROG;
1324 		mutex_exit(&cp->c_statelock);
1325 	}
1326 
1327 #ifdef CFS_CD_DEBUG
1328 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1329 #endif
1330 
1331 #ifdef CFSDEBUG
1332 	CFS_DEBUG(CFSDEBUG_VOPS)
1333 		printf("cachefs_writepage: EXIT error %d\n", error);
1334 #endif
1335 
1336 	return (error);
1337 }
1338 
1339 /*
1340  * Pushes out pages to the back and/or front file system.
1341  */
1342 static int
1343 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1344     int flags, cred_t *cr)
1345 {
1346 	struct cnode *cp = VTOC(vp);
1347 	struct buf *bp;
1348 	int error;
1349 	fscache_t *fscp = C_TO_FSCACHE(cp);
1350 	u_offset_t iooff;
1351 	size_t iolen;
1352 	u_offset_t lbn;
1353 	u_offset_t lbn_off;
1354 	uint_t bsize;
1355 
1356 	ASSERT((flags & B_ASYNC) == 0);
1357 	ASSERT(!vn_is_readonly(vp));
1358 	ASSERT(pp != NULL);
1359 	ASSERT(cr != NULL);
1360 
1361 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1362 	lbn = pp->p_offset / bsize;
1363 	lbn_off = lbn * bsize;
1364 
1365 	/*
1366 	 * Find a kluster that fits in one block, or in
1367 	 * one page if pages are bigger than blocks.  If
1368 	 * there is less file space allocated than a whole
1369 	 * page, we'll shorten the i/o request below.
1370 	 */
1371 
1372 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1373 			roundup(bsize, PAGESIZE), flags);
1374 
1375 	/*
1376 	 * The CN_CMODINPROG flag makes sure that we use a correct
1377 	 * value of c_size, below.  CN_CMODINPROG is set in
1378 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1379 	 * indicates that a uiomove() is in progress and the c_size
1380 	 * has not been made consistent with the new size of the
1381 	 * file. When the uiomove() completes the c_size is updated
1382 	 * and the CN_CMODINPROG flag is cleared.
1383 	 *
1384 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1385 	 * and cachefs_push_connected see a consistent value of
1386 	 * c_size.  Without this handshaking, it is possible that
1387 	 * these routines will pick up the old value of c_size before
1388 	 * the uiomove() in cachefs_writepage() completes.  This will
1389 	 * result in the vn_rdwr() being too small, and data loss.
1390 	 *
1391 	 * More precisely, there is a window between the time the
1392 	 * uiomove() completes and the time the c_size is updated. If
1393 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1394 	 * page will be picked up, because it is dirty; it will be
1395 	 * unlocked, unless it was pagecreate'd. When the page is
1396 	 * picked up as dirty, the dirty bit is reset
1397 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1398 	 * checked.  This will still be the old size.  Therefore, the
1399 	 * page will not be written out to the correct length, and the
1400 	 * page will be clean, so the data may disappear.
1401 	 */
1402 	if (cp->c_flags & CN_CMODINPROG) {
1403 		mutex_enter(&cp->c_statelock);
1404 		if ((cp->c_flags & CN_CMODINPROG) &&
1405 		    cp->c_modaddr + MAXBSIZE > iooff &&
1406 		    cp->c_modaddr < iooff + iolen) {
1407 			page_t *plist;
1408 
1409 			/*
1410 			 * A write is in progress for this region of
1411 			 * the file.  If we did not detect
1412 			 * CN_CMODINPROG here then this path through
1413 			 * cachefs_push_connected() would eventually
1414 			 * do the vn_rdwr() and may not write out all
1415 			 * of the data in the pages.  We end up losing
1416 			 * data. So we decide to set the modified bit
1417 			 * on each page in the page list and mark the
1418 			 * cnode with CDIRTY.  This push will be
1419 			 * restarted at some later time.
1420 			 */
1421 
1422 			plist = pp;
1423 			while (plist != NULL) {
1424 				pp = plist;
1425 				page_sub(&plist, pp);
1426 				hat_setmod(pp);
1427 				page_io_unlock(pp);
1428 				page_unlock(pp);
1429 			}
1430 			cp->c_flags |= CDIRTY;
1431 			mutex_exit(&cp->c_statelock);
1432 			if (offp)
1433 				*offp = iooff;
1434 			if (lenp)
1435 				*lenp = iolen;
1436 			return (0);
1437 		}
1438 		mutex_exit(&cp->c_statelock);
1439 	}
1440 
1441 	/*
1442 	 * Set the pages up for pageout.
1443 	 */
1444 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1445 	if (bp == NULL) {
1446 
1447 		/*
1448 		 * currently, there is no way for pageio_setup() to
1449 		 * return NULL, since it uses its own scheme for
1450 		 * kmem_alloc()ing that shouldn't return NULL, and
1451 		 * since pageio_setup() itself dereferences the thing
1452 		 * it's about to return.  still, we need to be ready
1453 		 * in case this ever does start happening.
1454 		 */
1455 
1456 		error = ENOMEM;
1457 		goto writedone;
1458 	}
1459 	/*
1460 	 * pageio_setup should have set b_addr to 0.  This
1461 	 * is correct since we want to do I/O on a page
1462 	 * boundary.  bp_mapin will use this addr to calculate
1463 	 * an offset, and then set b_addr to the kernel virtual
1464 	 * address it allocated for us.
1465 	 */
1466 	bp->b_edev = 0;
1467 	bp->b_dev = 0;
1468 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1469 	bp_mapin(bp);
1470 
1471 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1472 	if (iolen > bp->b_bcount)
1473 		iolen  = bp->b_bcount;
1474 
1475 	/* if connected */
1476 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1477 		/* write to the back file first */
1478 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1479 
1480 		/* write to the front file if allowed */
1481 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1482 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1483 			/* try to write to the front file */
1484 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1485 		}
1486 	}
1487 
1488 	/* else if disconnected */
1489 	else {
1490 		/* try to write to the front file */
1491 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1492 	}
1493 
1494 	bp_mapout(bp);
1495 	pageio_done(bp);
1496 
1497 writedone:
1498 
1499 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1500 	if (offp)
1501 		*offp = iooff;
1502 	if (lenp)
1503 		*lenp = iolen;
1504 
1505 	/* XXX ask bob mastors how to fix this someday */
1506 	mutex_enter(&cp->c_statelock);
1507 	if (error) {
1508 		if (error == ENOSPC) {
1509 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1510 			    CFS_ISFS_SOFT(fscp)) {
1511 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1512 				cp->c_error = error;
1513 			}
1514 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1515 		    (error != EINTR)) {
1516 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1517 			cp->c_error = error;
1518 		}
1519 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1520 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1521 	}
1522 	mutex_exit(&cp->c_statelock);
1523 
1524 	return (error);
1525 }
1526 
1527 /*
1528  * Pushes out pages to the back file system.
1529  */
1530 static int
1531 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1532     u_offset_t iooff, cred_t *cr)
1533 {
1534 	struct cnode *cp = VTOC(vp);
1535 	int error = 0;
1536 	int mode = 0;
1537 	fscache_t *fscp = C_TO_FSCACHE(cp);
1538 	ssize_t resid;
1539 	vnode_t *backvp;
1540 
1541 	/* get the back file if necessary */
1542 	mutex_enter(&cp->c_statelock);
1543 	if (cp->c_backvp == NULL) {
1544 		error = cachefs_getbackvp(fscp, cp);
1545 		if (error) {
1546 			mutex_exit(&cp->c_statelock);
1547 			goto out;
1548 		}
1549 	}
1550 	backvp = cp->c_backvp;
1551 	VN_HOLD(backvp);
1552 	mutex_exit(&cp->c_statelock);
1553 
1554 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1555 		mode = FSYNC;
1556 
1557 	/* write to the back file */
1558 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1559 	    iolen, iooff, UIO_SYSSPACE, mode,
1560 	    RLIM64_INFINITY, cr, &resid);
1561 	if (error) {
1562 #ifdef CFSDEBUG
1563 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1564 			printf("cachefspush: error %d cr %p\n",
1565 				error, (void *)cr);
1566 #endif
1567 		bp->b_flags |= B_ERROR;
1568 	}
1569 	VN_RELE(backvp);
1570 out:
1571 	return (error);
1572 }
1573 
1574 /*
1575  * Pushes out pages to the front file system.
1576  * Called for both connected and disconnected states.
1577  */
1578 static int
1579 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1580     u_offset_t iooff, cred_t *cr)
1581 {
1582 	struct cnode *cp = VTOC(vp);
1583 	fscache_t *fscp = C_TO_FSCACHE(cp);
1584 	int error = 0;
1585 	ssize_t resid;
1586 	u_offset_t popoff;
1587 	off_t commit = 0;
1588 	uint_t seq;
1589 	enum cachefs_rl_type type;
1590 	vnode_t *frontvp = NULL;
1591 
1592 	mutex_enter(&cp->c_statelock);
1593 
1594 	if (!CFS_ISFS_NONSHARED(fscp)) {
1595 		error = ETIMEDOUT;
1596 		goto out;
1597 	}
1598 
1599 	/* get the front file if necessary */
1600 	if ((cp->c_frontvp == NULL) &&
1601 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1602 		(void) cachefs_getfrontfile(cp);
1603 	}
1604 	if (cp->c_flags & CN_NOCACHE) {
1605 		error = ETIMEDOUT;
1606 		goto out;
1607 	}
1608 
1609 	/* if disconnected, needs to be populated and have good attributes */
1610 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1611 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1612 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1613 		error = ETIMEDOUT;
1614 		goto out;
1615 	}
1616 
1617 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1618 		if (cachefs_charge_page(cp, popoff)) {
1619 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1620 				cachefs_nocache(cp);
1621 				goto out;
1622 			} else {
1623 				error = ENOSPC;
1624 				goto out;
1625 			}
1626 		}
1627 	}
1628 
1629 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1630 		/* log the first putpage to a file */
1631 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1632 			/* uses open's creds if we have them */
1633 			if (cp->c_cred)
1634 				cr = cp->c_cred;
1635 
1636 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1637 				error = cachefs_dlog_cidmap(fscp);
1638 				if (error) {
1639 					error = ENOSPC;
1640 					goto out;
1641 				}
1642 				cp->c_metadata.md_flags |= MD_MAPPING;
1643 			}
1644 
1645 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1646 			if (commit == 0) {
1647 				/* out of space */
1648 				error = ENOSPC;
1649 				goto out;
1650 			}
1651 
1652 			cp->c_metadata.md_seq = seq;
1653 			type = cp->c_metadata.md_rltype;
1654 			cachefs_modified(cp);
1655 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1656 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1657 			cp->c_flags |= CN_UPDATED;
1658 		}
1659 
1660 		/* subsequent putpages just get a new sequence number */
1661 		else {
1662 			/* but only if it matters */
1663 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1664 				seq = cachefs_dlog_seqnext(fscp);
1665 				if (seq == 0) {
1666 					error = ENOSPC;
1667 					goto out;
1668 				}
1669 				cp->c_metadata.md_seq = seq;
1670 				cp->c_flags |= CN_UPDATED;
1671 				/* XXX maybe should do write_metadata here */
1672 			}
1673 		}
1674 	}
1675 
1676 	frontvp = cp->c_frontvp;
1677 	VN_HOLD(frontvp);
1678 	mutex_exit(&cp->c_statelock);
1679 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1680 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1681 	    RLIM64_INFINITY, kcred, &resid);
1682 	mutex_enter(&cp->c_statelock);
1683 	VN_RELE(frontvp);
1684 	frontvp = NULL;
1685 	if (error) {
1686 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1687 			cachefs_nocache(cp);
1688 			error = 0;
1689 			goto out;
1690 		} else {
1691 			goto out;
1692 		}
1693 	}
1694 
1695 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1696 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1697 		CN_POPULATION_PENDING);
1698 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1699 		gethrestime(&cp->c_metadata.md_localmtime);
1700 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1701 	}
1702 
1703 out:
1704 	if (commit) {
1705 		/* commit the log record */
1706 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1707 		if (cachefs_dlog_commit(fscp, commit, error)) {
1708 			/*EMPTY*/
1709 			/* XXX fix on panic */
1710 		}
1711 	}
1712 
1713 	if (error && commit) {
1714 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1715 		cachefs_rlent_moveto(fscp->fs_cache, type,
1716 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1717 		cp->c_metadata.md_rltype = type;
1718 		cp->c_flags |= CN_UPDATED;
1719 	}
1720 	mutex_exit(&cp->c_statelock);
1721 	return (error);
1722 }
1723 
1724 /*ARGSUSED*/
1725 static int
1726 cachefs_dump(struct vnode *vp, caddr_t foo1, int foo2, int foo3,
1727     caller_context_t *ct)
1728 {
1729 	return (ENOSYS); /* should we panic if we get here? */
1730 }
1731 
1732 /*ARGSUSED*/
1733 static int
1734 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1735 	int *rvalp, caller_context_t *ct)
1736 {
1737 	int error;
1738 	struct cnode *cp = VTOC(vp);
1739 	struct fscache *fscp = C_TO_FSCACHE(cp);
1740 	struct cachefscache *cachep;
1741 	extern kmutex_t cachefs_cachelock;
1742 	extern cachefscache_t *cachefs_cachelist;
1743 	cachefsio_pack_t *packp;
1744 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1745 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1746 	void *dinp, *doutp;
1747 	int (*dcmd_routine)(vnode_t *, void *, void *);
1748 
1749 	if (getzoneid() != GLOBAL_ZONEID)
1750 		return (EPERM);
1751 
1752 	/*
1753 	 * Cachefs only provides pass-through support for NFSv4,
1754 	 * and all vnode operations are passed through to the
1755 	 * back file system. For NFSv4 pass-through to work, only
1756 	 * connected operation is supported, the cnode backvp must
1757 	 * exist, and cachefs optional (eg., disconnectable) flags
1758 	 * are turned off. Assert these conditions which ensure
1759 	 * that only a subset of the ioctls are "truly supported"
1760 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1761 	 * The packing operations are meaningless since there is
1762 	 * no caching for NFSv4, and the called functions silently
1763 	 * return if the backfilesystem is NFSv4. The daemon
1764 	 * commands except for those above are essentially used
1765 	 * for disconnectable operation support (including log
1766 	 * rolling), so in each called function, we assert that
1767 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1768 	 * are from "cfsfstype" which is not a documented
1769 	 * command. However, the command is visible in
1770 	 * /usr/lib/fs/cachefs so the commands are simply let
1771 	 * through (don't seem to impact pass-through functionality).
1772 	 */
1773 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1774 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1775 
1776 	switch (cmd) {
1777 	case CACHEFSIO_PACK:
1778 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1779 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1780 		if (!error)
1781 			error = cachefs_pack(vp, packp->p_name, cred);
1782 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1783 		break;
1784 
1785 	case CACHEFSIO_UNPACK:
1786 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1787 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1788 		if (!error)
1789 			error = cachefs_unpack(vp, packp->p_name, cred);
1790 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1791 		break;
1792 
1793 	case CACHEFSIO_PACKINFO:
1794 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1795 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1796 		if (!error)
1797 			error = cachefs_packinfo(vp, packp->p_name,
1798 			    &packp->p_status, cred);
1799 		if (!error)
1800 			error = xcopyout(packp, (void *)arg,
1801 			    sizeof (cachefsio_pack_t));
1802 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1803 		break;
1804 
1805 	case CACHEFSIO_UNPACKALL:
1806 		error = cachefs_unpackall(vp);
1807 		break;
1808 
1809 	case CACHEFSIO_DCMD:
1810 		/*
1811 		 * This is a private interface between the cachefsd and
1812 		 * this file system.
1813 		 */
1814 
1815 		/* must be root to use these commands */
1816 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1817 			return (EPERM);
1818 
1819 		/* get the command packet */
1820 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1821 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1822 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1823 		if (error)
1824 			return (error);
1825 
1826 		/* copy in the data for the operation */
1827 		dinp = NULL;
1828 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1829 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1830 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1831 			    inlen);
1832 			if (error)
1833 				return (error);
1834 		}
1835 
1836 		/* allocate space for the result */
1837 		doutp = NULL;
1838 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1839 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1840 
1841 		/*
1842 		 * Assert NFSv4 only allows the daemonid and getstats
1843 		 * daemon requests
1844 		 */
1845 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1846 			STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1847 			STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1848 
1849 		/* get the routine to execute */
1850 		dcmd_routine = NULL;
1851 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1852 		case CFSDCMD_DAEMONID:
1853 			dcmd_routine = cachefs_io_daemonid;
1854 			break;
1855 		case CFSDCMD_STATEGET:
1856 			dcmd_routine = cachefs_io_stateget;
1857 			break;
1858 		case CFSDCMD_STATESET:
1859 			dcmd_routine = cachefs_io_stateset;
1860 			break;
1861 		case CFSDCMD_XWAIT:
1862 			dcmd_routine = cachefs_io_xwait;
1863 			break;
1864 		case CFSDCMD_EXISTS:
1865 			dcmd_routine = cachefs_io_exists;
1866 			break;
1867 		case CFSDCMD_LOSTFOUND:
1868 			dcmd_routine = cachefs_io_lostfound;
1869 			break;
1870 		case CFSDCMD_GETINFO:
1871 			dcmd_routine = cachefs_io_getinfo;
1872 			break;
1873 		case CFSDCMD_CIDTOFID:
1874 			dcmd_routine = cachefs_io_cidtofid;
1875 			break;
1876 		case CFSDCMD_GETATTRFID:
1877 			dcmd_routine = cachefs_io_getattrfid;
1878 			break;
1879 		case CFSDCMD_GETATTRNAME:
1880 			dcmd_routine = cachefs_io_getattrname;
1881 			break;
1882 		case CFSDCMD_GETSTATS:
1883 			dcmd_routine = cachefs_io_getstats;
1884 			break;
1885 		case CFSDCMD_ROOTFID:
1886 			dcmd_routine = cachefs_io_rootfid;
1887 			break;
1888 		case CFSDCMD_CREATE:
1889 			dcmd_routine = cachefs_io_create;
1890 			break;
1891 		case CFSDCMD_REMOVE:
1892 			dcmd_routine = cachefs_io_remove;
1893 			break;
1894 		case CFSDCMD_LINK:
1895 			dcmd_routine = cachefs_io_link;
1896 			break;
1897 		case CFSDCMD_RENAME:
1898 			dcmd_routine = cachefs_io_rename;
1899 			break;
1900 		case CFSDCMD_MKDIR:
1901 			dcmd_routine = cachefs_io_mkdir;
1902 			break;
1903 		case CFSDCMD_RMDIR:
1904 			dcmd_routine = cachefs_io_rmdir;
1905 			break;
1906 		case CFSDCMD_SYMLINK:
1907 			dcmd_routine = cachefs_io_symlink;
1908 			break;
1909 		case CFSDCMD_SETATTR:
1910 			dcmd_routine = cachefs_io_setattr;
1911 			break;
1912 		case CFSDCMD_SETSECATTR:
1913 			dcmd_routine = cachefs_io_setsecattr;
1914 			break;
1915 		case CFSDCMD_PUSHBACK:
1916 			dcmd_routine = cachefs_io_pushback;
1917 			break;
1918 		default:
1919 			error = ENOTTY;
1920 			break;
1921 		}
1922 
1923 		/* execute the routine */
1924 		if (dcmd_routine)
1925 			error = (*dcmd_routine)(vp, dinp, doutp);
1926 
1927 		/* copy out the result */
1928 		if ((error == 0) && doutp)
1929 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1930 			    outlen);
1931 
1932 		/* free allocated memory */
1933 		if (dinp)
1934 			cachefs_kmem_free(dinp, inlen);
1935 		if (doutp)
1936 			cachefs_kmem_free(doutp, outlen);
1937 
1938 		break;
1939 
1940 	case _FIOCOD:
1941 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1942 			error = EPERM;
1943 			break;
1944 		}
1945 
1946 		error = EBUSY;
1947 		if (arg) {
1948 			/* non-zero arg means do all filesystems */
1949 			mutex_enter(&cachefs_cachelock);
1950 			for (cachep = cachefs_cachelist; cachep != NULL;
1951 			    cachep = cachep->c_next) {
1952 				mutex_enter(&cachep->c_fslistlock);
1953 				for (fscp = cachep->c_fslist;
1954 				    fscp != NULL;
1955 				    fscp = fscp->fs_next) {
1956 					if (CFS_ISFS_CODCONST(fscp)) {
1957 						gethrestime(&fscp->fs_cod_time);
1958 						error = 0;
1959 					}
1960 				}
1961 				mutex_exit(&cachep->c_fslistlock);
1962 			}
1963 			mutex_exit(&cachefs_cachelock);
1964 		} else {
1965 			if (CFS_ISFS_CODCONST(fscp)) {
1966 				gethrestime(&fscp->fs_cod_time);
1967 				error = 0;
1968 			}
1969 		}
1970 		break;
1971 
1972 	case _FIOSTOPCACHE:
1973 		error = cachefs_stop_cache(cp);
1974 		break;
1975 
1976 	default:
1977 		error = ENOTTY;
1978 		break;
1979 	}
1980 
1981 	/* return the result */
1982 	return (error);
1983 }
1984 
1985 ino64_t
1986 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1987 {
1988 	ino64_t new;
1989 
1990 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1991 
1992 	for (;;) {
1993 		fscp->fs_info.fi_localfileno++;
1994 		if (fscp->fs_info.fi_localfileno == 0)
1995 			fscp->fs_info.fi_localfileno = 3;
1996 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
1997 
1998 		new = fscp->fs_info.fi_localfileno;
1999 		if (! cachefs_fileno_inuse(fscp, new))
2000 			break;
2001 	}
2002 
2003 	cachefs_inum_register(fscp, old, new);
2004 	cachefs_inum_register(fscp, new, 0);
2005 	return (new);
2006 }
2007 
2008 /*ARGSUSED*/
2009 static int
2010 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2011 	caller_context_t *ct)
2012 {
2013 	struct cnode *cp = VTOC(vp);
2014 	fscache_t *fscp = C_TO_FSCACHE(cp);
2015 	int error = 0;
2016 	int held = 0;
2017 	int connected = 0;
2018 
2019 #ifdef CFSDEBUG
2020 	CFS_DEBUG(CFSDEBUG_VOPS)
2021 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2022 #endif
2023 
2024 	if (getzoneid() != GLOBAL_ZONEID)
2025 		return (EPERM);
2026 
2027 	/* Call backfilesystem getattr if NFSv4 */
2028 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2029 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2030 		goto out;
2031 	}
2032 
2033 	/*
2034 	 * If it has been specified that the return value will
2035 	 * just be used as a hint, and we are only being asked
2036 	 * for size, fsid or rdevid, then return the client's
2037 	 * notion of these values without checking to make sure
2038 	 * that the attribute cache is up to date.
2039 	 * The whole point is to avoid an over the wire GETATTR
2040 	 * call.
2041 	 */
2042 	if (flags & ATTR_HINT) {
2043 		if (vap->va_mask ==
2044 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2045 			if (vap->va_mask | AT_SIZE)
2046 				vap->va_size = cp->c_size;
2047 			/*
2048 			 * Return the FSID of the cachefs filesystem,
2049 			 * not the back filesystem
2050 			 */
2051 			if (vap->va_mask | AT_FSID)
2052 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2053 			if (vap->va_mask | AT_RDEV)
2054 				vap->va_rdev = cp->c_attr.va_rdev;
2055 			return (0);
2056 		}
2057 	}
2058 
2059 	/*
2060 	 * Only need to flush pages if asking for the mtime
2061 	 * and if there any dirty pages.
2062 	 */
2063 	if (vap->va_mask & AT_MTIME) {
2064 		/*EMPTY*/
2065 #if 0
2066 		/*
2067 		 * XXX bob: stolen from nfs code, need to do something similar
2068 		 */
2069 		rp = VTOR(vp);
2070 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2071 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2072 #endif
2073 	}
2074 
2075 	for (;;) {
2076 		/* get (or renew) access to the file system */
2077 		if (held) {
2078 			cachefs_cd_release(fscp);
2079 			held = 0;
2080 		}
2081 		error = cachefs_cd_access(fscp, connected, 0);
2082 		if (error)
2083 			goto out;
2084 		held = 1;
2085 
2086 		/*
2087 		 * If it has been specified that the return value will
2088 		 * just be used as a hint, and we are only being asked
2089 		 * for size, fsid or rdevid, then return the client's
2090 		 * notion of these values without checking to make sure
2091 		 * that the attribute cache is up to date.
2092 		 * The whole point is to avoid an over the wire GETATTR
2093 		 * call.
2094 		 */
2095 		if (flags & ATTR_HINT) {
2096 			if (vap->va_mask ==
2097 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2098 				if (vap->va_mask | AT_SIZE)
2099 					vap->va_size = cp->c_size;
2100 				/*
2101 				 * Return the FSID of the cachefs filesystem,
2102 				 * not the back filesystem
2103 				 */
2104 				if (vap->va_mask | AT_FSID)
2105 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2106 				if (vap->va_mask | AT_RDEV)
2107 					vap->va_rdev = cp->c_attr.va_rdev;
2108 				goto out;
2109 			}
2110 		}
2111 
2112 		mutex_enter(&cp->c_statelock);
2113 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2114 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2115 			mutex_exit(&cp->c_statelock);
2116 			connected = 1;
2117 			continue;
2118 		}
2119 
2120 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2121 		if (CFS_TIMEOUT(fscp, error)) {
2122 			mutex_exit(&cp->c_statelock);
2123 			cachefs_cd_release(fscp);
2124 			held = 0;
2125 			cachefs_cd_timedout(fscp);
2126 			continue;
2127 		}
2128 		if (error) {
2129 			mutex_exit(&cp->c_statelock);
2130 			break;
2131 		}
2132 
2133 		/* check for fileno conflict */
2134 		if ((fscp->fs_inum_size > 0) &&
2135 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2136 			ino64_t fakenum;
2137 
2138 			mutex_exit(&cp->c_statelock);
2139 			mutex_enter(&fscp->fs_fslock);
2140 			fakenum = cachefs_inum_real2fake(fscp,
2141 			    cp->c_attr.va_nodeid);
2142 			if (fakenum == 0) {
2143 				fakenum = cachefs_fileno_conflict(fscp,
2144 				    cp->c_attr.va_nodeid);
2145 			}
2146 			mutex_exit(&fscp->fs_fslock);
2147 
2148 			mutex_enter(&cp->c_statelock);
2149 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2150 			cp->c_metadata.md_localfileno = fakenum;
2151 			cp->c_flags |= CN_UPDATED;
2152 		}
2153 
2154 		/* copy out the attributes */
2155 		*vap = cp->c_attr;
2156 
2157 		/*
2158 		 * return the FSID of the cachefs filesystem,
2159 		 * not the back filesystem
2160 		 */
2161 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2162 
2163 		/* return our idea of the size */
2164 		if (cp->c_size > vap->va_size)
2165 			vap->va_size = cp->c_size;
2166 
2167 		/* overwrite with our version of fileno and timestamps */
2168 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2169 		vap->va_mtime = cp->c_metadata.md_localmtime;
2170 		vap->va_ctime = cp->c_metadata.md_localctime;
2171 
2172 		mutex_exit(&cp->c_statelock);
2173 		break;
2174 	}
2175 out:
2176 	if (held)
2177 		cachefs_cd_release(fscp);
2178 #ifdef CFS_CD_DEBUG
2179 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2180 #endif
2181 
2182 #ifdef CFSDEBUG
2183 	CFS_DEBUG(CFSDEBUG_VOPS)
2184 		printf("cachefs_getattr: EXIT error = %d\n", error);
2185 #endif
2186 	return (error);
2187 }
2188 
2189 /*
2190  * cachefs_getattr_backfs_nfsv4
2191  *
2192  * Call NFSv4 back filesystem to handle the getattr (cachefs
2193  * pass-through support for NFSv4).
2194  */
2195 static int
2196 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2197     int flags, cred_t *cr, caller_context_t *ct)
2198 {
2199 	cnode_t *cp = VTOC(vp);
2200 	fscache_t *fscp = C_TO_FSCACHE(cp);
2201 	vnode_t *backvp;
2202 	int error;
2203 
2204 	/*
2205 	 * For NFSv4 pass-through to work, only connected operation
2206 	 * is supported, the cnode backvp must exist, and cachefs
2207 	 * optional (eg., disconnectable) flags are turned off. Assert
2208 	 * these conditions for the getattr operation.
2209 	 */
2210 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2211 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2212 
2213 	/* Call backfs vnode op after extracting backvp */
2214 	mutex_enter(&cp->c_statelock);
2215 	backvp = cp->c_backvp;
2216 	mutex_exit(&cp->c_statelock);
2217 
2218 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2219 					" backvp %p\n", cp, backvp));
2220 	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2221 
2222 	/* Update attributes */
2223 	cp->c_attr = *vap;
2224 
2225 	/*
2226 	 * return the FSID of the cachefs filesystem,
2227 	 * not the back filesystem
2228 	 */
2229 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2230 
2231 	return (error);
2232 }
2233 
2234 /*ARGSUSED4*/
2235 static int
2236 cachefs_setattr(
2237 	vnode_t *vp,
2238 	vattr_t *vap,
2239 	int flags,
2240 	cred_t *cr,
2241 	caller_context_t *ct)
2242 {
2243 	cnode_t *cp = VTOC(vp);
2244 	fscache_t *fscp = C_TO_FSCACHE(cp);
2245 	int error;
2246 	int connected;
2247 	int held = 0;
2248 
2249 	if (getzoneid() != GLOBAL_ZONEID)
2250 		return (EPERM);
2251 
2252 	/*
2253 	 * Cachefs only provides pass-through support for NFSv4,
2254 	 * and all vnode operations are passed through to the
2255 	 * back file system. For NFSv4 pass-through to work, only
2256 	 * connected operation is supported, the cnode backvp must
2257 	 * exist, and cachefs optional (eg., disconnectable) flags
2258 	 * are turned off. Assert these conditions to ensure that
2259 	 * the backfilesystem is called for the setattr operation.
2260 	 */
2261 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2262 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2263 
2264 	connected = 0;
2265 	for (;;) {
2266 		/* drop hold on file system */
2267 		if (held) {
2268 			/* Won't loop with NFSv4 connected behavior */
2269 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2270 			cachefs_cd_release(fscp);
2271 			held = 0;
2272 		}
2273 
2274 		/* acquire access to the file system */
2275 		error = cachefs_cd_access(fscp, connected, 1);
2276 		if (error)
2277 			break;
2278 		held = 1;
2279 
2280 		/* perform the setattr */
2281 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2282 		if (error) {
2283 			/* if connected */
2284 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2285 				if (CFS_TIMEOUT(fscp, error)) {
2286 					cachefs_cd_release(fscp);
2287 					held = 0;
2288 					cachefs_cd_timedout(fscp);
2289 					connected = 0;
2290 					continue;
2291 				}
2292 			}
2293 
2294 			/* else must be disconnected */
2295 			else {
2296 				if (CFS_TIMEOUT(fscp, error)) {
2297 					connected = 1;
2298 					continue;
2299 				}
2300 			}
2301 		}
2302 		break;
2303 	}
2304 
2305 	if (held) {
2306 		cachefs_cd_release(fscp);
2307 	}
2308 #ifdef CFS_CD_DEBUG
2309 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2310 #endif
2311 	return (error);
2312 }
2313 
2314 static int
2315 cachefs_setattr_common(
2316 	vnode_t *vp,
2317 	vattr_t *vap,
2318 	int flags,
2319 	cred_t *cr,
2320 	caller_context_t *ct)
2321 {
2322 	cnode_t *cp = VTOC(vp);
2323 	fscache_t *fscp = C_TO_FSCACHE(cp);
2324 	cachefscache_t *cachep = fscp->fs_cache;
2325 	uint_t mask = vap->va_mask;
2326 	int error = 0;
2327 	uint_t bcnt;
2328 
2329 	/* Cannot set these attributes. */
2330 	if (mask & AT_NOSET)
2331 		return (EINVAL);
2332 
2333 	/*
2334 	 * Truncate file.  Must have write permission and not be a directory.
2335 	 */
2336 	if (mask & AT_SIZE) {
2337 		if (vp->v_type == VDIR) {
2338 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2339 				cachefs_log_truncate(cachep, EISDIR,
2340 				    fscp->fs_cfsvfsp,
2341 				    &cp->c_metadata.md_cookie,
2342 				    cp->c_id.cid_fileno,
2343 				    crgetuid(cr), vap->va_size);
2344 			return (EISDIR);
2345 		}
2346 	}
2347 
2348 	/*
2349 	 * Gotta deal with one special case here, where we're setting the
2350 	 * size of the file. First, we zero out part of the page after the
2351 	 * new size of the file. Then we toss (not write) all pages after
2352 	 * page in which the new offset occurs. Note that the NULL passed
2353 	 * in instead of a putapage() fn parameter is correct, since
2354 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2355 	 */
2356 
2357 	rw_enter(&cp->c_rwlock, RW_WRITER);
2358 
2359 	/* sync dirty pages */
2360 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2361 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2362 		if (error == EINTR)
2363 			goto out;
2364 	}
2365 	error = 0;
2366 
2367 	/* if connected */
2368 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2369 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2370 	}
2371 	/* else must be disconnected */
2372 	else {
2373 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2374 	}
2375 	if (error)
2376 		goto out;
2377 
2378 	/*
2379 	 * If the file size has been changed then
2380 	 * toss whole pages beyond the end of the file and zero
2381 	 * the portion of the last page that is beyond the end of the file.
2382 	 */
2383 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2384 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2385 		if (bcnt)
2386 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2387 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2388 			B_TRUNC | B_INVAL, cr);
2389 	}
2390 
2391 out:
2392 	rw_exit(&cp->c_rwlock);
2393 
2394 	if ((mask & AT_SIZE) &&
2395 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2396 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2397 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2398 		    crgetuid(cr), vap->va_size);
2399 
2400 	return (error);
2401 }
2402 
2403 static int
2404 cachefs_setattr_connected(
2405 	vnode_t *vp,
2406 	vattr_t *vap,
2407 	int flags,
2408 	cred_t *cr,
2409 	caller_context_t *ct)
2410 {
2411 	cnode_t *cp = VTOC(vp);
2412 	fscache_t *fscp = C_TO_FSCACHE(cp);
2413 	uint_t mask = vap->va_mask;
2414 	int error = 0;
2415 	int setsize;
2416 
2417 	mutex_enter(&cp->c_statelock);
2418 
2419 	if (cp->c_backvp == NULL) {
2420 		error = cachefs_getbackvp(fscp, cp);
2421 		if (error)
2422 			goto out;
2423 	}
2424 
2425 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2426 	if (error)
2427 		goto out;
2428 
2429 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2430 					"backvp %p\n", cp, cp->c_backvp));
2431 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2432 	if (error) {
2433 		goto out;
2434 	}
2435 
2436 	/* if the size of the file is being changed */
2437 	if (mask & AT_SIZE) {
2438 		cp->c_size = vap->va_size;
2439 		error = 0;
2440 		setsize = 0;
2441 
2442 		/* see if okay to try to set the file size */
2443 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2444 		    CFS_ISFS_NONSHARED(fscp)) {
2445 			/* okay to set size if file is populated */
2446 			if (cp->c_metadata.md_flags & MD_POPULATED)
2447 				setsize = 1;
2448 
2449 			/*
2450 			 * Okay to set size if front file exists and setting
2451 			 * file size to zero.
2452 			 */
2453 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2454 			    (vap->va_size == 0))
2455 				setsize = 1;
2456 		}
2457 
2458 		/* if okay to try to set the file size */
2459 		if (setsize) {
2460 			error = 0;
2461 			if (cp->c_frontvp == NULL)
2462 				error = cachefs_getfrontfile(cp);
2463 			if (error == 0)
2464 				error = cachefs_frontfile_size(cp, cp->c_size);
2465 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2466 			/* make sure file gets nocached */
2467 			error = EEXIST;
2468 		}
2469 
2470 		/* if we have to nocache the file */
2471 		if (error) {
2472 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2473 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2474 				cachefs_nocache(cp);
2475 			error = 0;
2476 		}
2477 	}
2478 
2479 	cp->c_flags |= CN_UPDATED;
2480 
2481 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2482 	cp->c_attr.va_mask = AT_ALL;
2483 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2484 	if (error)
2485 		goto out;
2486 
2487 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2488 	cp->c_size = cp->c_attr.va_size;
2489 
2490 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2491 out:
2492 	mutex_exit(&cp->c_statelock);
2493 	return (error);
2494 }
2495 
2496 /*
2497  * perform the setattr on the local file system
2498  */
2499 /*ARGSUSED4*/
2500 static int
2501 cachefs_setattr_disconnected(
2502 	vnode_t *vp,
2503 	vattr_t *vap,
2504 	int flags,
2505 	cred_t *cr,
2506 	caller_context_t *ct)
2507 {
2508 	cnode_t *cp = VTOC(vp);
2509 	fscache_t *fscp = C_TO_FSCACHE(cp);
2510 	int mask;
2511 	int error;
2512 	int newfile;
2513 	off_t commit = 0;
2514 
2515 	if (CFS_ISFS_WRITE_AROUND(fscp))
2516 		return (ETIMEDOUT);
2517 
2518 	/* if we do not have good attributes */
2519 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2520 		return (ETIMEDOUT);
2521 
2522 	/* primary concern is to keep this routine as much like ufs_setattr */
2523 
2524 	mutex_enter(&cp->c_statelock);
2525 
2526 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2527 			    cachefs_access_local, cp);
2528 
2529 	if (error)
2530 		goto out;
2531 
2532 	mask = vap->va_mask;
2533 
2534 	/* if changing the size of the file */
2535 	if (mask & AT_SIZE) {
2536 		if (vp->v_type == VDIR) {
2537 			error = EISDIR;
2538 			goto out;
2539 		}
2540 
2541 		if (vp->v_type == VFIFO) {
2542 			error = 0;
2543 			goto out;
2544 		}
2545 
2546 		if ((vp->v_type != VREG) &&
2547 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2548 			error = EINVAL;
2549 			goto out;
2550 		}
2551 
2552 		if (vap->va_size > fscp->fs_offmax) {
2553 			error = EFBIG;
2554 			goto out;
2555 		}
2556 
2557 		/* if the file is not populated and we are not truncating it */
2558 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2559 		    (vap->va_size != 0)) {
2560 			error = ETIMEDOUT;
2561 			goto out;
2562 		}
2563 
2564 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2565 			error = cachefs_dlog_cidmap(fscp);
2566 			if (error) {
2567 				error = ENOSPC;
2568 				goto out;
2569 			}
2570 			cp->c_metadata.md_flags |= MD_MAPPING;
2571 		}
2572 
2573 		/* log the operation */
2574 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2575 		if (commit == 0) {
2576 			error = ENOSPC;
2577 			goto out;
2578 		}
2579 		cp->c_flags &= ~CN_NOCACHE;
2580 
2581 		/* special case truncating fast sym links */
2582 		if ((vp->v_type == VLNK) &&
2583 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2584 			/* XXX how can we get here */
2585 			/* XXX should update mtime */
2586 			cp->c_size = 0;
2587 			error = 0;
2588 			goto out;
2589 		}
2590 
2591 		/* get the front file, this may create one */
2592 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2593 		if (cp->c_frontvp == NULL) {
2594 			error = cachefs_getfrontfile(cp);
2595 			if (error)
2596 				goto out;
2597 		}
2598 		ASSERT(cp->c_frontvp);
2599 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2600 			/* allocate space for the metadata */
2601 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2602 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2603 			    == 0);
2604 			error = filegrp_write_metadata(cp->c_filegrp,
2605 			    &cp->c_id, &cp->c_metadata);
2606 			if (error)
2607 				goto out;
2608 		}
2609 
2610 		/* change the size of the front file */
2611 		error = cachefs_frontfile_size(cp, vap->va_size);
2612 		if (error)
2613 			goto out;
2614 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2615 		gethrestime(&cp->c_metadata.md_localmtime);
2616 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2617 		cachefs_modified(cp);
2618 		cp->c_flags |= CN_UPDATED;
2619 	}
2620 
2621 	if (mask & AT_MODE) {
2622 		/* mark as modified */
2623 		if (cachefs_modified_alloc(cp)) {
2624 			error = ENOSPC;
2625 			goto out;
2626 		}
2627 
2628 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2629 			error = cachefs_dlog_cidmap(fscp);
2630 			if (error) {
2631 				error = ENOSPC;
2632 				goto out;
2633 			}
2634 			cp->c_metadata.md_flags |= MD_MAPPING;
2635 		}
2636 
2637 		/* log the operation if not already logged */
2638 		if (commit == 0) {
2639 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2640 			if (commit == 0) {
2641 				error = ENOSPC;
2642 				goto out;
2643 			}
2644 		}
2645 
2646 		cp->c_attr.va_mode &= S_IFMT;
2647 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2648 		gethrestime(&cp->c_metadata.md_localctime);
2649 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2650 		cp->c_flags |= CN_UPDATED;
2651 	}
2652 
2653 	if (mask & (AT_UID|AT_GID)) {
2654 
2655 		/* mark as modified */
2656 		if (cachefs_modified_alloc(cp)) {
2657 			error = ENOSPC;
2658 			goto out;
2659 		}
2660 
2661 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2662 			error = cachefs_dlog_cidmap(fscp);
2663 			if (error) {
2664 				error = ENOSPC;
2665 				goto out;
2666 			}
2667 			cp->c_metadata.md_flags |= MD_MAPPING;
2668 		}
2669 
2670 		/* log the operation if not already logged */
2671 		if (commit == 0) {
2672 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2673 			if (commit == 0) {
2674 				error = ENOSPC;
2675 				goto out;
2676 			}
2677 		}
2678 
2679 		if (mask & AT_UID)
2680 			cp->c_attr.va_uid = vap->va_uid;
2681 
2682 		if (mask & AT_GID)
2683 			cp->c_attr.va_gid = vap->va_gid;
2684 		gethrestime(&cp->c_metadata.md_localctime);
2685 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2686 		cp->c_flags |= CN_UPDATED;
2687 	}
2688 
2689 
2690 	if (mask & (AT_MTIME|AT_ATIME)) {
2691 		/* mark as modified */
2692 		if (cachefs_modified_alloc(cp)) {
2693 			error = ENOSPC;
2694 			goto out;
2695 		}
2696 
2697 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2698 			error = cachefs_dlog_cidmap(fscp);
2699 			if (error) {
2700 				error = ENOSPC;
2701 				goto out;
2702 			}
2703 			cp->c_metadata.md_flags |= MD_MAPPING;
2704 		}
2705 
2706 		/* log the operation if not already logged */
2707 		if (commit == 0) {
2708 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2709 			if (commit == 0) {
2710 				error = ENOSPC;
2711 				goto out;
2712 			}
2713 		}
2714 
2715 		if (mask & AT_MTIME) {
2716 			cp->c_metadata.md_localmtime = vap->va_mtime;
2717 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2718 		}
2719 		if (mask & AT_ATIME)
2720 			cp->c_attr.va_atime = vap->va_atime;
2721 		gethrestime(&cp->c_metadata.md_localctime);
2722 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2723 		cp->c_flags |= CN_UPDATED;
2724 	}
2725 
2726 out:
2727 	mutex_exit(&cp->c_statelock);
2728 
2729 	/* commit the log entry */
2730 	if (commit) {
2731 		if (cachefs_dlog_commit(fscp, commit, error)) {
2732 			/*EMPTY*/
2733 			/* XXX bob: fix on panic */
2734 		}
2735 	}
2736 	return (error);
2737 }
2738 
2739 /* ARGSUSED */
2740 static int
2741 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2742 	caller_context_t *ct)
2743 {
2744 	cnode_t *cp = VTOC(vp);
2745 	fscache_t *fscp = C_TO_FSCACHE(cp);
2746 	int error;
2747 	int held = 0;
2748 	int connected = 0;
2749 
2750 #ifdef CFSDEBUG
2751 	CFS_DEBUG(CFSDEBUG_VOPS)
2752 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2753 #endif
2754 	if (getzoneid() != GLOBAL_ZONEID) {
2755 		error = EPERM;
2756 		goto out;
2757 	}
2758 
2759 	/*
2760 	 * Cachefs only provides pass-through support for NFSv4,
2761 	 * and all vnode operations are passed through to the
2762 	 * back file system. For NFSv4 pass-through to work, only
2763 	 * connected operation is supported, the cnode backvp must
2764 	 * exist, and cachefs optional (eg., disconnectable) flags
2765 	 * are turned off. Assert these conditions to ensure that
2766 	 * the backfilesystem is called for the access operation.
2767 	 */
2768 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2769 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2770 
2771 	for (;;) {
2772 		/* get (or renew) access to the file system */
2773 		if (held) {
2774 			/* Won't loop with NFSv4 connected behavior */
2775 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2776 			cachefs_cd_release(fscp);
2777 			held = 0;
2778 		}
2779 		error = cachefs_cd_access(fscp, connected, 0);
2780 		if (error)
2781 			break;
2782 		held = 1;
2783 
2784 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2785 			error = cachefs_access_connected(vp, mode, flags,
2786 			    cr);
2787 			if (CFS_TIMEOUT(fscp, error)) {
2788 				cachefs_cd_release(fscp);
2789 				held = 0;
2790 				cachefs_cd_timedout(fscp);
2791 				connected = 0;
2792 				continue;
2793 			}
2794 		} else {
2795 			mutex_enter(&cp->c_statelock);
2796 			error = cachefs_access_local(cp, mode, cr);
2797 			mutex_exit(&cp->c_statelock);
2798 			if (CFS_TIMEOUT(fscp, error)) {
2799 				if (cachefs_cd_access_miss(fscp)) {
2800 					mutex_enter(&cp->c_statelock);
2801 					if (cp->c_backvp == NULL) {
2802 						(void) cachefs_getbackvp(fscp,
2803 						    cp);
2804 					}
2805 					mutex_exit(&cp->c_statelock);
2806 					error = cachefs_access_connected(vp,
2807 					    mode, flags, cr);
2808 					if (!CFS_TIMEOUT(fscp, error))
2809 						break;
2810 					delay(5*hz);
2811 					connected = 0;
2812 					continue;
2813 				}
2814 				connected = 1;
2815 				continue;
2816 			}
2817 		}
2818 		break;
2819 	}
2820 	if (held)
2821 		cachefs_cd_release(fscp);
2822 #ifdef CFS_CD_DEBUG
2823 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2824 #endif
2825 out:
2826 #ifdef CFSDEBUG
2827 	CFS_DEBUG(CFSDEBUG_VOPS)
2828 		printf("cachefs_access: EXIT error = %d\n", error);
2829 #endif
2830 	return (error);
2831 }
2832 
2833 static int
2834 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2835 {
2836 	cnode_t *cp = VTOC(vp);
2837 	fscache_t *fscp = C_TO_FSCACHE(cp);
2838 	int error = 0;
2839 
2840 	mutex_enter(&cp->c_statelock);
2841 
2842 	/* Make sure the cnode attrs are valid first. */
2843 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2844 	if (error)
2845 		goto out;
2846 
2847 	/* see if can do a local file system check */
2848 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2849 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2850 		error = cachefs_access_local(cp, mode, cr);
2851 		goto out;
2852 	}
2853 
2854 	/* else do a remote file system check */
2855 	else {
2856 		if (cp->c_backvp == NULL) {
2857 			error = cachefs_getbackvp(fscp, cp);
2858 			if (error)
2859 				goto out;
2860 		}
2861 
2862 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2863 			("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2864 			cp, cp->c_backvp));
2865 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2866 
2867 		/*
2868 		 * even though we don't `need' the ACL to do access
2869 		 * via the backvp, we should cache it here to make our
2870 		 * behavior more reasonable if we go disconnected.
2871 		 */
2872 
2873 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2874 		    (cachefs_vtype_aclok(vp)) &&
2875 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2876 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2877 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2878 			(void) cachefs_cacheacl(cp, NULL);
2879 	}
2880 out:
2881 	/*
2882 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2883 	 * the vn_open retry will read the file anew from backfs
2884 	 */
2885 	if (error == ESTALE)
2886 		cachefs_cnode_stale(cp);
2887 
2888 	mutex_exit(&cp->c_statelock);
2889 	return (error);
2890 }
2891 
2892 /*
2893  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2894  * the link is placed in the metadata itself (no front file is allocated).
2895  */
2896 /*ARGSUSED*/
2897 static int
2898 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2899 {
2900 	int error = 0;
2901 	cnode_t *cp = VTOC(vp);
2902 	fscache_t *fscp = C_TO_FSCACHE(cp);
2903 	cachefscache_t *cachep = fscp->fs_cache;
2904 	int held = 0;
2905 	int connected = 0;
2906 
2907 	if (getzoneid() != GLOBAL_ZONEID)
2908 		return (EPERM);
2909 
2910 	if (vp->v_type != VLNK)
2911 		return (EINVAL);
2912 
2913 	/*
2914 	 * Cachefs only provides pass-through support for NFSv4,
2915 	 * and all vnode operations are passed through to the
2916 	 * back file system. For NFSv4 pass-through to work, only
2917 	 * connected operation is supported, the cnode backvp must
2918 	 * exist, and cachefs optional (eg., disconnectable) flags
2919 	 * are turned off. Assert these conditions to ensure that
2920 	 * the backfilesystem is called for the readlink operation.
2921 	 */
2922 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2923 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2924 
2925 	for (;;) {
2926 		/* get (or renew) access to the file system */
2927 		if (held) {
2928 			/* Won't loop with NFSv4 connected behavior */
2929 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2930 			cachefs_cd_release(fscp);
2931 			held = 0;
2932 		}
2933 		error = cachefs_cd_access(fscp, connected, 0);
2934 		if (error)
2935 			break;
2936 		held = 1;
2937 
2938 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2939 			/*
2940 			 * since readlink_connected will call stuffsymlink
2941 			 * on success, have to serialize access
2942 			 */
2943 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2944 				cachefs_cd_release(fscp);
2945 				rw_enter(&cp->c_rwlock, RW_WRITER);
2946 				error = cachefs_cd_access(fscp, connected, 0);
2947 				if (error) {
2948 					held = 0;
2949 					rw_exit(&cp->c_rwlock);
2950 					break;
2951 				}
2952 			}
2953 			error = cachefs_readlink_connected(vp, uiop, cr);
2954 			rw_exit(&cp->c_rwlock);
2955 			if (CFS_TIMEOUT(fscp, error)) {
2956 				cachefs_cd_release(fscp);
2957 				held = 0;
2958 				cachefs_cd_timedout(fscp);
2959 				connected = 0;
2960 				continue;
2961 			}
2962 		} else {
2963 			error = cachefs_readlink_disconnected(vp, uiop);
2964 			if (CFS_TIMEOUT(fscp, error)) {
2965 				if (cachefs_cd_access_miss(fscp)) {
2966 					/* as above */
2967 					if (!rw_tryenter(&cp->c_rwlock,
2968 					    RW_WRITER)) {
2969 						cachefs_cd_release(fscp);
2970 						rw_enter(&cp->c_rwlock,
2971 						    RW_WRITER);
2972 						error = cachefs_cd_access(fscp,
2973 						    connected, 0);
2974 						if (error) {
2975 							held = 0;
2976 							rw_exit(&cp->c_rwlock);
2977 							break;
2978 						}
2979 					}
2980 					error = cachefs_readlink_connected(vp,
2981 					    uiop, cr);
2982 					rw_exit(&cp->c_rwlock);
2983 					if (!CFS_TIMEOUT(fscp, error))
2984 						break;
2985 					delay(5*hz);
2986 					connected = 0;
2987 					continue;
2988 				}
2989 				connected = 1;
2990 				continue;
2991 			}
2992 		}
2993 		break;
2994 	}
2995 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
2996 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
2997 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2998 		    crgetuid(cr), cp->c_size);
2999 
3000 	if (held)
3001 		cachefs_cd_release(fscp);
3002 #ifdef CFS_CD_DEBUG
3003 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3004 #endif
3005 
3006 	/*
3007 	 * The over the wire error for attempting to readlink something
3008 	 * other than a symbolic link is ENXIO.  However, we need to
3009 	 * return EINVAL instead of ENXIO, so we map it here.
3010 	 */
3011 	return (error == ENXIO ? EINVAL : error);
3012 }
3013 
3014 static int
3015 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3016 {
3017 	int error;
3018 	cnode_t *cp = VTOC(vp);
3019 	fscache_t *fscp = C_TO_FSCACHE(cp);
3020 	caddr_t buf;
3021 	int buflen;
3022 	int readcache = 0;
3023 
3024 	mutex_enter(&cp->c_statelock);
3025 
3026 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3027 	if (error)
3028 		goto out;
3029 
3030 	/* if the sym link is cached as a fast sym link */
3031 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3032 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3033 		error = uiomove(cp->c_metadata.md_allocinfo,
3034 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3035 #ifdef CFSDEBUG
3036 		readcache = 1;
3037 		goto out;
3038 #else /* CFSDEBUG */
3039 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3040 		if (cp->c_size != 0) {
3041 			readcache = 1;
3042 			goto out;
3043 		}
3044 #endif /* CFSDEBUG */
3045 	}
3046 
3047 	/* if the sym link is cached in a front file */
3048 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3049 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3050 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3051 		if (cp->c_frontvp == NULL) {
3052 			(void) cachefs_getfrontfile(cp);
3053 		}
3054 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3055 			/* read symlink data from frontfile */
3056 			uiop->uio_offset = 0;
3057 			(void) VOP_RWLOCK(cp->c_frontvp,
3058 						V_WRITELOCK_FALSE, NULL);
3059 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3060 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3061 
3062 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3063 			if (cp->c_size != 0) {
3064 				readcache = 1;
3065 				goto out;
3066 			}
3067 		}
3068 	}
3069 
3070 	/* get the sym link contents from the back fs */
3071 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3072 	if (error)
3073 		goto out;
3074 
3075 	/* copy the contents out to the user */
3076 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3077 
3078 	/*
3079 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3080 	 * or if NFSv4 pass-through is enabled.
3081 	 */
3082 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3083 		cachefs_nocache(cp);
3084 	}
3085 
3086 	cachefs_kmem_free(buf, MAXPATHLEN);
3087 
3088 out:
3089 	mutex_exit(&cp->c_statelock);
3090 	if (error == 0) {
3091 		if (readcache)
3092 			fscp->fs_stats.st_hits++;
3093 		else
3094 			fscp->fs_stats.st_misses++;
3095 	}
3096 	return (error);
3097 }
3098 
3099 static int
3100 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3101 {
3102 	int error;
3103 	cnode_t *cp = VTOC(vp);
3104 	fscache_t *fscp = C_TO_FSCACHE(cp);
3105 	int readcache = 0;
3106 
3107 	mutex_enter(&cp->c_statelock);
3108 
3109 	/* if the sym link is cached as a fast sym link */
3110 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3111 		error = uiomove(cp->c_metadata.md_allocinfo,
3112 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3113 		readcache = 1;
3114 		goto out;
3115 	}
3116 
3117 	/* if the sym link is cached in a front file */
3118 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3119 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3120 		if (cp->c_frontvp == NULL) {
3121 			(void) cachefs_getfrontfile(cp);
3122 		}
3123 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3124 			/* read symlink data from frontfile */
3125 			uiop->uio_offset = 0;
3126 			(void) VOP_RWLOCK(cp->c_frontvp,
3127 						V_WRITELOCK_FALSE, NULL);
3128 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3129 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3130 			readcache = 1;
3131 			goto out;
3132 		}
3133 	}
3134 	error = ETIMEDOUT;
3135 
3136 out:
3137 	mutex_exit(&cp->c_statelock);
3138 	if (error == 0) {
3139 		if (readcache)
3140 			fscp->fs_stats.st_hits++;
3141 		else
3142 			fscp->fs_stats.st_misses++;
3143 	}
3144 	return (error);
3145 }
3146 
3147 /*ARGSUSED*/
3148 static int
3149 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3150 {
3151 	cnode_t *cp = VTOC(vp);
3152 	int error = 0;
3153 	fscache_t *fscp = C_TO_FSCACHE(cp);
3154 	int held = 0;
3155 	int connected = 0;
3156 
3157 #ifdef CFSDEBUG
3158 	CFS_DEBUG(CFSDEBUG_VOPS)
3159 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3160 #endif
3161 
3162 	if (getzoneid() != GLOBAL_ZONEID) {
3163 		error = EPERM;
3164 		goto out;
3165 	}
3166 
3167 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3168 		goto out;
3169 
3170 	/*
3171 	 * Cachefs only provides pass-through support for NFSv4,
3172 	 * and all vnode operations are passed through to the
3173 	 * back file system. For NFSv4 pass-through to work, only
3174 	 * connected operation is supported, the cnode backvp must
3175 	 * exist, and cachefs optional (eg., disconnectable) flags
3176 	 * are turned off. Assert these conditions to ensure that
3177 	 * the backfilesystem is called for the fsync operation.
3178 	 */
3179 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3180 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3181 
3182 	for (;;) {
3183 		/* get (or renew) access to the file system */
3184 		if (held) {
3185 			/* Won't loop with NFSv4 connected behavior */
3186 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3187 			cachefs_cd_release(fscp);
3188 			held = 0;
3189 		}
3190 		error = cachefs_cd_access(fscp, connected, 1);
3191 		if (error)
3192 			break;
3193 		held = 1;
3194 		connected = 0;
3195 
3196 		/* if a regular file, write out the pages */
3197 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3198 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3199 			error = cachefs_putpage_common(vp, (offset_t)0,
3200 			    0, 0, cr);
3201 			if (CFS_TIMEOUT(fscp, error)) {
3202 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3203 					cachefs_cd_release(fscp);
3204 					held = 0;
3205 					cachefs_cd_timedout(fscp);
3206 					continue;
3207 				} else {
3208 					connected = 1;
3209 					continue;
3210 				}
3211 			}
3212 
3213 			/* if no space left in cache, wait until connected */
3214 			if ((error == ENOSPC) &&
3215 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3216 				connected = 1;
3217 				continue;
3218 			}
3219 
3220 			/* clear the cnode error if putpage worked */
3221 			if ((error == 0) && cp->c_error) {
3222 				mutex_enter(&cp->c_statelock);
3223 				cp->c_error = 0;
3224 				mutex_exit(&cp->c_statelock);
3225 			}
3226 
3227 			if (error)
3228 				break;
3229 		}
3230 
3231 		/* if connected, sync the backvp */
3232 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3233 		    cp->c_backvp) {
3234 			mutex_enter(&cp->c_statelock);
3235 			if (cp->c_backvp) {
3236 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3237 					("cachefs_fsync (nfsv4): cnode %p, "
3238 					"backvp %p\n", cp, cp->c_backvp));
3239 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3240 				    ct);
3241 				if (CFS_TIMEOUT(fscp, error)) {
3242 					mutex_exit(&cp->c_statelock);
3243 					cachefs_cd_release(fscp);
3244 					held = 0;
3245 					cachefs_cd_timedout(fscp);
3246 					continue;
3247 				} else if (error && (error != EINTR))
3248 					cp->c_error = error;
3249 			}
3250 			mutex_exit(&cp->c_statelock);
3251 		}
3252 
3253 		/* sync the metadata and the front file to the front fs */
3254 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3255 			error = cachefs_sync_metadata(cp);
3256 			if (error &&
3257 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3258 				error = 0;
3259 		}
3260 		break;
3261 	}
3262 
3263 	if (error == 0)
3264 		error = cp->c_error;
3265 
3266 	if (held)
3267 		cachefs_cd_release(fscp);
3268 
3269 out:
3270 #ifdef CFS_CD_DEBUG
3271 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3272 #endif
3273 
3274 #ifdef CFSDEBUG
3275 	CFS_DEBUG(CFSDEBUG_VOPS)
3276 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3277 #endif
3278 	return (error);
3279 }
3280 
3281 /*
3282  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3283  */
3284 int
3285 cachefs_sync_metadata(cnode_t *cp)
3286 {
3287 	int error = 0;
3288 	struct filegrp *fgp;
3289 	struct vattr va;
3290 	fscache_t *fscp = C_TO_FSCACHE(cp);
3291 
3292 #ifdef CFSDEBUG
3293 	CFS_DEBUG(CFSDEBUG_VOPS)
3294 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3295 			(void *)cp, cp->c_flags);
3296 #endif
3297 
3298 	mutex_enter(&cp->c_statelock);
3299 	if ((cp->c_flags & CN_UPDATED) == 0)
3300 		goto out;
3301 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3302 		goto out;
3303 	fgp = cp->c_filegrp;
3304 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3305 		goto out;
3306 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3307 		goto out;
3308 
3309 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3310 		mutex_exit(&cp->c_statelock);
3311 		error = filegrp_allocattr(fgp);
3312 		mutex_enter(&cp->c_statelock);
3313 		if (error) {
3314 			error = 0;
3315 			goto out;
3316 		}
3317 	}
3318 
3319 	if (cp->c_flags & CN_ALLOC_PENDING) {
3320 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3321 		    &cp->c_id);
3322 		if (error)
3323 			goto out;
3324 		cp->c_flags &= ~CN_ALLOC_PENDING;
3325 	}
3326 
3327 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3328 		if (cp->c_frontvp != NULL) {
3329 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3330 			if (error) {
3331 				cp->c_metadata.md_timestamp.tv_sec = 0;
3332 			} else {
3333 				va.va_mask = AT_MTIME;
3334 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3335 					kcred, NULL);
3336 				if (error)
3337 					goto out;
3338 				cp->c_metadata.md_timestamp = va.va_mtime;
3339 				cp->c_flags &=
3340 				~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3341 			}
3342 		} else {
3343 			cp->c_flags &=
3344 				~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3345 		}
3346 	}
3347 
3348 	/*
3349 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3350 	 * XXX tony: How can CN_UPDATED not be set?????
3351 	 */
3352 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3353 			(cp->c_flags & CN_UPDATED)) {
3354 		error = filegrp_write_metadata(fgp, &cp->c_id,
3355 				&cp->c_metadata);
3356 		if (error)
3357 			goto out;
3358 	}
3359 out:
3360 	if (error) {
3361 		/* XXX modified files? */
3362 		if (cp->c_metadata.md_rlno) {
3363 			cachefs_removefrontfile(&cp->c_metadata,
3364 			    &cp->c_id, fgp);
3365 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3366 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3367 			cp->c_metadata.md_rlno = 0;
3368 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3369 			if (cp->c_frontvp) {
3370 				VN_RELE(cp->c_frontvp);
3371 				cp->c_frontvp = NULL;
3372 			}
3373 		}
3374 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3375 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3376 		cp->c_flags |= CN_ALLOC_PENDING;
3377 		cachefs_nocache(cp);
3378 	}
3379 	/*
3380 	 * we clear the updated bit even on errors because a retry
3381 	 * will probably fail also.
3382 	 */
3383 	cp->c_flags &= ~CN_UPDATED;
3384 	mutex_exit(&cp->c_statelock);
3385 
3386 #ifdef CFSDEBUG
3387 	CFS_DEBUG(CFSDEBUG_VOPS)
3388 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3389 			(void *)cp, cp->c_flags);
3390 #endif
3391 
3392 	return (error);
3393 }
3394 
3395 /*
3396  * This is the vop entry point for inactivating a vnode.
3397  * It just queues the request for the async thread which
3398  * calls cachefs_inactive.
3399  * Because of the dnlc, it is not safe to grab most locks here.
3400  */
3401 /*ARGSUSED*/
3402 static void
3403 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3404 {
3405 	cnode_t *cp;
3406 	struct cachefs_req *rp;
3407 	fscache_t *fscp;
3408 
3409 #ifdef CFSDEBUG
3410 	CFS_DEBUG(CFSDEBUG_VOPS)
3411 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3412 #endif
3413 
3414 	cp = VTOC(vp);
3415 	fscp = C_TO_FSCACHE(cp);
3416 
3417 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3418 
3419 	/*
3420 	 * Cachefs only provides pass-through support for NFSv4,
3421 	 * and all vnode operations are passed through to the
3422 	 * back file system. For NFSv4 pass-through to work, only
3423 	 * connected operation is supported, the cnode backvp must
3424 	 * exist, and cachefs optional (eg., disconnectable) flags
3425 	 * are turned off. Assert these conditions to ensure that
3426 	 * the backfilesystem is called for the inactive operation.
3427 	 */
3428 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3429 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3430 
3431 	/* vn_rele() set the v_count == 1 */
3432 
3433 	cp->c_ipending = 1;
3434 
3435 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3436 	rp->cfs_cmd = CFS_IDLE;
3437 	rp->cfs_cr = cr;
3438 	crhold(rp->cfs_cr);
3439 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3440 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3441 
3442 #ifdef CFSDEBUG
3443 	CFS_DEBUG(CFSDEBUG_VOPS)
3444 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3445 #endif
3446 }
3447 
3448 /* ARGSUSED */
3449 static int
3450 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3451     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3452     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3453 
3454 {
3455 	int error = 0;
3456 	cnode_t *dcp = VTOC(dvp);
3457 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3458 	int held = 0;
3459 	int connected = 0;
3460 
3461 #ifdef CFSDEBUG
3462 	CFS_DEBUG(CFSDEBUG_VOPS)
3463 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3464 #endif
3465 
3466 	if (getzoneid() != GLOBAL_ZONEID) {
3467 		error = EPERM;
3468 		goto out;
3469 	}
3470 
3471 	/*
3472 	 * Cachefs only provides pass-through support for NFSv4,
3473 	 * and all vnode operations are passed through to the
3474 	 * back file system. For NFSv4 pass-through to work, only
3475 	 * connected operation is supported, the cnode backvp must
3476 	 * exist, and cachefs optional (eg., disconnectable) flags
3477 	 * are turned off. Assert these conditions to ensure that
3478 	 * the backfilesystem is called for the lookup operation.
3479 	 */
3480 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3481 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3482 
3483 	for (;;) {
3484 		/* get (or renew) access to the file system */
3485 		if (held) {
3486 			/* Won't loop with NFSv4 connected behavior */
3487 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3488 			cachefs_cd_release(fscp);
3489 			held = 0;
3490 		}
3491 		error = cachefs_cd_access(fscp, connected, 0);
3492 		if (error)
3493 			break;
3494 		held = 1;
3495 
3496 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3497 			flags, rdir, cr);
3498 		if (CFS_TIMEOUT(fscp, error)) {
3499 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3500 				cachefs_cd_release(fscp);
3501 				held = 0;
3502 				cachefs_cd_timedout(fscp);
3503 				connected = 0;
3504 				continue;
3505 			} else {
3506 				if (cachefs_cd_access_miss(fscp)) {
3507 					rw_enter(&dcp->c_rwlock, RW_READER);
3508 					error = cachefs_lookup_back(dvp, nm,
3509 					    vpp, cr);
3510 					rw_exit(&dcp->c_rwlock);
3511 					if (!CFS_TIMEOUT(fscp, error))
3512 						break;
3513 					delay(5*hz);
3514 					connected = 0;
3515 					continue;
3516 				}
3517 				connected = 1;
3518 				continue;
3519 			}
3520 		}
3521 		break;
3522 	}
3523 	if (held)
3524 		cachefs_cd_release(fscp);
3525 
3526 	if (error == 0 && IS_DEVVP(*vpp)) {
3527 		struct vnode *newvp;
3528 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3529 		VN_RELE(*vpp);
3530 		if (newvp == NULL) {
3531 			error = ENOSYS;
3532 		} else {
3533 			*vpp = newvp;
3534 		}
3535 	}
3536 
3537 #ifdef CFS_CD_DEBUG
3538 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3539 #endif
3540 out:
3541 #ifdef CFSDEBUG
3542 	CFS_DEBUG(CFSDEBUG_VOPS)
3543 		printf("cachefs_lookup: EXIT error = %d\n", error);
3544 #endif
3545 
3546 	return (error);
3547 }
3548 
3549 /* ARGSUSED */
3550 int
3551 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3552     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3553 {
3554 	int error = 0;
3555 	cnode_t *cp, *dcp = VTOC(dvp);
3556 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3557 	struct fid cookie;
3558 	u_offset_t d_offset;
3559 	struct cachefs_req *rp;
3560 	cfs_cid_t cid, dircid;
3561 	uint_t flag;
3562 	uint_t uncached = 0;
3563 
3564 	*vpp = NULL;
3565 
3566 	/*
3567 	 * If lookup is for "", just return dvp.  Don't need
3568 	 * to send it over the wire, look it up in the dnlc,
3569 	 * or perform any access checks.
3570 	 */
3571 	if (*nm == '\0') {
3572 		VN_HOLD(dvp);
3573 		*vpp = dvp;
3574 		return (0);
3575 	}
3576 
3577 	/* can't do lookups in non-directories */
3578 	if (dvp->v_type != VDIR)
3579 		return (ENOTDIR);
3580 
3581 	/* perform access check, also does consistency check if connected */
3582 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3583 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3584 	} else {
3585 		mutex_enter(&dcp->c_statelock);
3586 		error = cachefs_access_local(dcp, VEXEC, cr);
3587 		mutex_exit(&dcp->c_statelock);
3588 	}
3589 	if (error)
3590 		return (error);
3591 
3592 	/*
3593 	 * If lookup is for ".", just return dvp.  Don't need
3594 	 * to send it over the wire or look it up in the dnlc,
3595 	 * just need to check access.
3596 	 */
3597 	if (strcmp(nm, ".") == 0) {
3598 		VN_HOLD(dvp);
3599 		*vpp = dvp;
3600 		return (0);
3601 	}
3602 
3603 	/* check the dnlc */
3604 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3605 	if (*vpp)
3606 		return (0);
3607 
3608 	/* read lock the dir before starting the search */
3609 	rw_enter(&dcp->c_rwlock, RW_READER);
3610 
3611 	mutex_enter(&dcp->c_statelock);
3612 	dircid = dcp->c_id;
3613 
3614 	dcp->c_usage++;
3615 
3616 	/* if front file is not usable, lookup on the back fs */
3617 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3618 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3619 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3620 		mutex_exit(&dcp->c_statelock);
3621 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3622 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3623 		else
3624 			error = ETIMEDOUT;
3625 		goto out;
3626 	}
3627 
3628 	/* if the front file is not populated, try to populate it */
3629 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3630 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3631 			error = ETIMEDOUT;
3632 			mutex_exit(&dcp->c_statelock);
3633 			goto out;
3634 		}
3635 
3636 		if (cachefs_async_okay()) {
3637 			/* cannot populate if cache is not writable */
3638 			ASSERT((dcp->c_flags &
3639 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3640 			dcp->c_flags |= CN_ASYNC_POPULATE;
3641 
3642 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3643 			rp->cfs_cmd = CFS_POPULATE;
3644 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3645 			rp->cfs_cr = cr;
3646 
3647 			crhold(cr);
3648 			VN_HOLD(dvp);
3649 
3650 			cachefs_addqueue(rp, &fscp->fs_workq);
3651 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3652 			error = cachefs_dir_fill(dcp, cr);
3653 			if (error != 0) {
3654 				mutex_exit(&dcp->c_statelock);
3655 				goto out;
3656 			}
3657 		}
3658 		/* no populate if too many asyncs and we have to cache ACLs */
3659 
3660 		mutex_exit(&dcp->c_statelock);
3661 
3662 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3663 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3664 		else
3665 			error = ETIMEDOUT;
3666 		goto out;
3667 	}
3668 
3669 	/* by now we have a valid cached front file that we can search */
3670 
3671 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3672 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3673 			&d_offset, &cid);
3674 	mutex_exit(&dcp->c_statelock);
3675 
3676 	if (error) {
3677 		/* if the entry does not have the fid, go get it */
3678 		if (error == EINVAL) {
3679 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3680 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3681 			else
3682 				error = ETIMEDOUT;
3683 		}
3684 
3685 		/* errors other than does not exist */
3686 		else if (error != ENOENT) {
3687 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3688 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3689 			else
3690 				error = ETIMEDOUT;
3691 		}
3692 		goto out;
3693 	}
3694 
3695 	/*
3696 	 * Else we found the entry in the cached directory.
3697 	 * Make a cnode for it.
3698 	 */
3699 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3700 	    cr, 0, &cp);
3701 	if (error == ESTALE) {
3702 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3703 		mutex_enter(&dcp->c_statelock);
3704 		cachefs_nocache(dcp);
3705 		mutex_exit(&dcp->c_statelock);
3706 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3707 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3708 			uncached = 1;
3709 		} else
3710 			error = ETIMEDOUT;
3711 	} else if (error == 0) {
3712 		*vpp = CTOV(cp);
3713 	}
3714 
3715 out:
3716 	if (error == 0) {
3717 		/* put the entry in the dnlc */
3718 		if (cachefs_dnlc)
3719 			dnlc_enter(dvp, nm, *vpp);
3720 
3721 		/* save the cid of the parent so can find the name */
3722 		cp = VTOC(*vpp);
3723 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3724 		    sizeof (cfs_cid_t)) != 0) {
3725 			mutex_enter(&cp->c_statelock);
3726 			cp->c_metadata.md_parent = dircid;
3727 			cp->c_flags |= CN_UPDATED;
3728 			mutex_exit(&cp->c_statelock);
3729 		}
3730 	}
3731 
3732 	rw_exit(&dcp->c_rwlock);
3733 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3734 		(void) cachefs_pack_common(dvp, cr);
3735 	return (error);
3736 }
3737 
3738 /*
3739  * Called from cachefs_lookup_common when the back file system needs to be
3740  * examined to perform the lookup.
3741  */
3742 static int
3743 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3744     cred_t *cr)
3745 {
3746 	int error = 0;
3747 	cnode_t *cp, *dcp = VTOC(dvp);
3748 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3749 	vnode_t *backvp = NULL;
3750 	struct vattr va;
3751 	struct fid cookie;
3752 	cfs_cid_t cid;
3753 	uint32_t valid_fid;
3754 
3755 	mutex_enter(&dcp->c_statelock);
3756 
3757 	/* do a lookup on the back FS to get the back vnode */
3758 	if (dcp->c_backvp == NULL) {
3759 		error = cachefs_getbackvp(fscp, dcp);
3760 		if (error)
3761 			goto out;
3762 	}
3763 
3764 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3765 		("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3766 		dcp, dcp->c_backvp, nm));
3767 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3768 	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3769 	if (error)
3770 		goto out;
3771 	if (IS_DEVVP(backvp)) {
3772 		struct vnode *devvp = backvp;
3773 
3774 		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3775 			VN_HOLD(backvp);
3776 			VN_RELE(devvp);
3777 		}
3778 	}
3779 
3780 	/* get the fid and attrs from the back fs */
3781 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3782 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3783 	if (error)
3784 		goto out;
3785 
3786 	cid.cid_fileno = va.va_nodeid;
3787 	cid.cid_flags = 0;
3788 
3789 #if 0
3790 	/* XXX bob: this is probably no longer necessary */
3791 	/* if the directory entry was incomplete, we can complete it now */
3792 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3793 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3794 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3795 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3796 	}
3797 #endif
3798 
3799 out:
3800 	mutex_exit(&dcp->c_statelock);
3801 
3802 	/* create the cnode */
3803 	if (error == 0) {
3804 		error = cachefs_cnode_make(&cid, fscp,
3805 					(valid_fid ? &cookie : NULL),
3806 					&va, backvp, cr, 0, &cp);
3807 		if (error == 0) {
3808 			*vpp = CTOV(cp);
3809 		}
3810 	}
3811 
3812 	if (backvp)
3813 		VN_RELE(backvp);
3814 
3815 	return (error);
3816 }
3817 
3818 /*ARGSUSED7*/
3819 static int
3820 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3821     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3822     caller_context_t *ct, vsecattr_t *vsecp)
3823 
3824 {
3825 	cnode_t *dcp = VTOC(dvp);
3826 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3827 	cachefscache_t *cachep = fscp->fs_cache;
3828 	int error;
3829 	int connected = 0;
3830 	int held = 0;
3831 
3832 #ifdef CFSDEBUG
3833 	CFS_DEBUG(CFSDEBUG_VOPS)
3834 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3835 		    (void *)dvp, nm);
3836 #endif
3837 	if (getzoneid() != GLOBAL_ZONEID) {
3838 		error = EPERM;
3839 		goto out;
3840 	}
3841 
3842 	/*
3843 	 * Cachefs only provides pass-through support for NFSv4,
3844 	 * and all vnode operations are passed through to the
3845 	 * back file system. For NFSv4 pass-through to work, only
3846 	 * connected operation is supported, the cnode backvp must
3847 	 * exist, and cachefs optional (eg., disconnectable) flags
3848 	 * are turned off. Assert these conditions to ensure that
3849 	 * the backfilesystem is called for the create operation.
3850 	 */
3851 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3852 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3853 
3854 	for (;;) {
3855 		/* get (or renew) access to the file system */
3856 		if (held) {
3857 			/* Won't loop with NFSv4 connected behavior */
3858 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3859 			cachefs_cd_release(fscp);
3860 			held = 0;
3861 		}
3862 		error = cachefs_cd_access(fscp, connected, 1);
3863 		if (error)
3864 			break;
3865 		held = 1;
3866 
3867 		/*
3868 		 * if we are connected, perform the remote portion of the
3869 		 * create.
3870 		 */
3871 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3872 			error = cachefs_create_connected(dvp, nm, vap,
3873 			    exclusive, mode, vpp, cr);
3874 			if (CFS_TIMEOUT(fscp, error)) {
3875 				cachefs_cd_release(fscp);
3876 				held = 0;
3877 				cachefs_cd_timedout(fscp);
3878 				connected = 0;
3879 				continue;
3880 			} else if (error) {
3881 				break;
3882 			}
3883 		}
3884 
3885 		/* else we must be disconnected */
3886 		else {
3887 			error = cachefs_create_disconnected(dvp, nm, vap,
3888 			    exclusive, mode, vpp, cr);
3889 			if (CFS_TIMEOUT(fscp, error)) {
3890 				connected = 1;
3891 				continue;
3892 			} else if (error) {
3893 				break;
3894 			}
3895 		}
3896 		break;
3897 	}
3898 
3899 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3900 		fid_t *fidp = NULL;
3901 		ino64_t fileno = 0;
3902 		cnode_t *cp = NULL;
3903 		if (error == 0)
3904 			cp = VTOC(*vpp);
3905 
3906 		if (cp != NULL) {
3907 			fidp = &cp->c_metadata.md_cookie;
3908 			fileno = cp->c_id.cid_fileno;
3909 		}
3910 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3911 		    fidp, fileno, crgetuid(cr));
3912 	}
3913 
3914 	if (held)
3915 		cachefs_cd_release(fscp);
3916 
3917 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3918 		(void) cachefs_pack(dvp, nm, cr);
3919 	if (error == 0 && IS_DEVVP(*vpp)) {
3920 		struct vnode *spcvp;
3921 
3922 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3923 		VN_RELE(*vpp);
3924 		if (spcvp == NULL) {
3925 			error = ENOSYS;
3926 		} else {
3927 			*vpp = spcvp;
3928 		}
3929 	}
3930 
3931 #ifdef CFS_CD_DEBUG
3932 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3933 #endif
3934 out:
3935 #ifdef CFSDEBUG
3936 	CFS_DEBUG(CFSDEBUG_VOPS)
3937 		printf("cachefs_create: EXIT error %d\n", error);
3938 #endif
3939 	return (error);
3940 }
3941 
3942 
3943 static int
3944 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3945     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3946 {
3947 	cnode_t *dcp = VTOC(dvp);
3948 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3949 	int error;
3950 	vnode_t *tvp = NULL;
3951 	vnode_t *devvp;
3952 	fid_t cookie;
3953 	vattr_t va;
3954 	cnode_t *ncp;
3955 	cfs_cid_t cid;
3956 	vnode_t *vp;
3957 	uint32_t valid_fid;
3958 
3959 	/* special case if file already exists */
3960 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3961 	if (CFS_TIMEOUT(fscp, error))
3962 		return (error);
3963 	if (error == 0) {
3964 		if (exclusive == EXCL)
3965 			error = EEXIST;
3966 		else if (vp->v_type == VDIR && (mode & VWRITE))
3967 			error = EISDIR;
3968 		else if ((error =
3969 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3970 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3971 				vap->va_mask = AT_SIZE;
3972 				error = cachefs_setattr_common(vp, vap, 0,
3973 				    cr, NULL);
3974 			}
3975 		}
3976 		if (error) {
3977 			VN_RELE(vp);
3978 		} else
3979 			*vpp = vp;
3980 		return (error);
3981 	}
3982 
3983 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3984 	mutex_enter(&dcp->c_statelock);
3985 
3986 	/* consistency check the directory */
3987 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3988 	if (error) {
3989 		mutex_exit(&dcp->c_statelock);
3990 		goto out;
3991 	}
3992 
3993 	/* get the backvp if necessary */
3994 	if (dcp->c_backvp == NULL) {
3995 		error = cachefs_getbackvp(fscp, dcp);
3996 		if (error) {
3997 			mutex_exit(&dcp->c_statelock);
3998 			goto out;
3999 		}
4000 	}
4001 
4002 	/* create the file on the back fs */
4003 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4004 		("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4005 		"name %s\n", dcp, dcp->c_backvp, nm));
4006 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4007 	    &devvp, cr, 0, NULL, NULL);
4008 	mutex_exit(&dcp->c_statelock);
4009 	if (error)
4010 		goto out;
4011 	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4012 		VN_HOLD(tvp);
4013 		VN_RELE(devvp);
4014 	} else {
4015 		tvp = devvp;
4016 	}
4017 
4018 	/* get the fid and attrs from the back fs */
4019 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4020 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4021 	if (error)
4022 		goto out;
4023 
4024 	/* make the cnode */
4025 	cid.cid_fileno = va.va_nodeid;
4026 	cid.cid_flags = 0;
4027 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4028 					&va, tvp, cr, 0, &ncp);
4029 	if (error)
4030 		goto out;
4031 
4032 	*vpp = CTOV(ncp);
4033 
4034 	/* enter it in the parent directory */
4035 	mutex_enter(&dcp->c_statelock);
4036 	if (CFS_ISFS_NONSHARED(fscp) &&
4037 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4038 		/* see if entry already exists */
4039 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4040 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4041 		if (error == ENOENT) {
4042 			/* entry, does not exist, add the new file */
4043 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4044 			    &ncp->c_id, SM_ASYNC);
4045 			if (error) {
4046 				cachefs_nocache(dcp);
4047 				error = 0;
4048 			}
4049 			/* XXX should this be done elsewhere, too? */
4050 			dnlc_enter(dvp, nm, *vpp);
4051 		} else {
4052 			/* entry exists or some other problem */
4053 			cachefs_nocache(dcp);
4054 			error = 0;
4055 		}
4056 	}
4057 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4058 	mutex_exit(&dcp->c_statelock);
4059 
4060 out:
4061 	rw_exit(&dcp->c_rwlock);
4062 	if (tvp)
4063 		VN_RELE(tvp);
4064 
4065 	return (error);
4066 }
4067 
4068 static int
4069 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4070 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4071 {
4072 	cnode_t *dcp = VTOC(dvp);
4073 	cnode_t *cp;
4074 	cnode_t *ncp = NULL;
4075 	vnode_t *vp;
4076 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4077 	int error = 0;
4078 	struct vattr va;
4079 	timestruc_t current_time;
4080 	off_t commit = 0;
4081 	fid_t cookie;
4082 	cfs_cid_t cid;
4083 
4084 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4085 	mutex_enter(&dcp->c_statelock);
4086 
4087 	/* give up if the directory is not populated */
4088 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4089 		mutex_exit(&dcp->c_statelock);
4090 		rw_exit(&dcp->c_rwlock);
4091 		return (ETIMEDOUT);
4092 	}
4093 
4094 	/* special case if file already exists */
4095 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4096 	if (error == EINVAL) {
4097 		mutex_exit(&dcp->c_statelock);
4098 		rw_exit(&dcp->c_rwlock);
4099 		return (ETIMEDOUT);
4100 	}
4101 	if (error == 0) {
4102 		mutex_exit(&dcp->c_statelock);
4103 		rw_exit(&dcp->c_rwlock);
4104 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4105 		    cr, 0, &cp);
4106 		if (error) {
4107 			return (error);
4108 		}
4109 		vp = CTOV(cp);
4110 
4111 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4112 			error = ETIMEDOUT;
4113 		else if (exclusive == EXCL)
4114 			error = EEXIST;
4115 		else if (vp->v_type == VDIR && (mode & VWRITE))
4116 			error = EISDIR;
4117 		else {
4118 			mutex_enter(&cp->c_statelock);
4119 			error = cachefs_access_local(cp, mode, cr);
4120 			mutex_exit(&cp->c_statelock);
4121 			if (!error) {
4122 				if ((vap->va_mask & AT_SIZE) &&
4123 				    (vp->v_type == VREG)) {
4124 					vap->va_mask = AT_SIZE;
4125 					error = cachefs_setattr_common(vp,
4126 					    vap, 0, cr, NULL);
4127 				}
4128 			}
4129 		}
4130 		if (error) {
4131 			VN_RELE(vp);
4132 		} else
4133 			*vpp = vp;
4134 		return (error);
4135 	}
4136 
4137 	/* give up if cannot modify the cache */
4138 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4139 		mutex_exit(&dcp->c_statelock);
4140 		error = ETIMEDOUT;
4141 		goto out;
4142 	}
4143 
4144 	/* check access */
4145 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4146 		mutex_exit(&dcp->c_statelock);
4147 		goto out;
4148 	}
4149 
4150 	/* mark dir as modified */
4151 	cachefs_modified(dcp);
4152 	mutex_exit(&dcp->c_statelock);
4153 
4154 	/* must be privileged to set sticky bit */
4155 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4156 		vap->va_mode &= ~VSVTX;
4157 
4158 	/* make up a reasonable set of attributes */
4159 	cachefs_attr_setup(vap, &va, dcp, cr);
4160 
4161 	/* create the cnode */
4162 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4163 	if (error)
4164 		goto out;
4165 
4166 	mutex_enter(&ncp->c_statelock);
4167 
4168 	/* get the front file now instead of later */
4169 	if (vap->va_type == VREG) {
4170 		error = cachefs_getfrontfile(ncp);
4171 		if (error) {
4172 			mutex_exit(&ncp->c_statelock);
4173 			goto out;
4174 		}
4175 		ASSERT(ncp->c_frontvp != NULL);
4176 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4177 		ncp->c_metadata.md_flags |= MD_POPULATED;
4178 	} else {
4179 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4180 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4181 			(void) filegrp_allocattr(ncp->c_filegrp);
4182 		}
4183 		error = filegrp_create_metadata(ncp->c_filegrp,
4184 		    &ncp->c_metadata, &ncp->c_id);
4185 		if (error) {
4186 			mutex_exit(&ncp->c_statelock);
4187 			goto out;
4188 		}
4189 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4190 	}
4191 	mutex_enter(&dcp->c_statelock);
4192 	cachefs_creategid(dcp, ncp, vap, cr);
4193 	cachefs_createacl(dcp, ncp);
4194 	mutex_exit(&dcp->c_statelock);
4195 
4196 	/* set times on the file */
4197 	gethrestime(&current_time);
4198 	ncp->c_metadata.md_vattr.va_atime = current_time;
4199 	ncp->c_metadata.md_localctime = current_time;
4200 	ncp->c_metadata.md_localmtime = current_time;
4201 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4202 
4203 	/* reserve space for the daemon cid mapping */
4204 	error = cachefs_dlog_cidmap(fscp);
4205 	if (error) {
4206 		mutex_exit(&ncp->c_statelock);
4207 		goto out;
4208 	}
4209 	ncp->c_metadata.md_flags |= MD_MAPPING;
4210 
4211 	/* mark the new file as modified */
4212 	if (cachefs_modified_alloc(ncp)) {
4213 		mutex_exit(&ncp->c_statelock);
4214 		error = ENOSPC;
4215 		goto out;
4216 	}
4217 	ncp->c_flags |= CN_UPDATED;
4218 
4219 	/*
4220 	 * write the metadata now rather than waiting until
4221 	 * inactive so that if there's no space we can let
4222 	 * the caller know.
4223 	 */
4224 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4225 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4226 	error = filegrp_write_metadata(ncp->c_filegrp,
4227 	    &ncp->c_id, &ncp->c_metadata);
4228 	if (error) {
4229 		mutex_exit(&ncp->c_statelock);
4230 		goto out;
4231 	}
4232 
4233 	/* log the operation */
4234 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4235 	    mode, ncp, 0, cr);
4236 	if (commit == 0) {
4237 		mutex_exit(&ncp->c_statelock);
4238 		error = ENOSPC;
4239 		goto out;
4240 	}
4241 
4242 	mutex_exit(&ncp->c_statelock);
4243 
4244 	mutex_enter(&dcp->c_statelock);
4245 
4246 	/* update parent dir times */
4247 	dcp->c_metadata.md_localmtime = current_time;
4248 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4249 	dcp->c_flags |= CN_UPDATED;
4250 
4251 	/* enter new file name in the parent directory */
4252 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4253 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4254 		    &ncp->c_id, 0);
4255 		if (error) {
4256 			cachefs_nocache(dcp);
4257 			mutex_exit(&dcp->c_statelock);
4258 			error = ETIMEDOUT;
4259 			goto out;
4260 		}
4261 		dnlc_enter(dvp, nm, CTOV(ncp));
4262 	} else {
4263 		mutex_exit(&dcp->c_statelock);
4264 		error = ETIMEDOUT;
4265 		goto out;
4266 	}
4267 	mutex_exit(&dcp->c_statelock);
4268 
4269 out:
4270 	rw_exit(&dcp->c_rwlock);
4271 
4272 	if (commit) {
4273 		if (cachefs_dlog_commit(fscp, commit, error)) {
4274 			/*EMPTY*/
4275 			/* XXX bob: fix on panic */
4276 		}
4277 	}
4278 	if (error) {
4279 		/* destroy the cnode we created */
4280 		if (ncp) {
4281 			mutex_enter(&ncp->c_statelock);
4282 			ncp->c_flags |= CN_DESTROY;
4283 			mutex_exit(&ncp->c_statelock);
4284 			VN_RELE(CTOV(ncp));
4285 		}
4286 	} else {
4287 		*vpp = CTOV(ncp);
4288 	}
4289 	return (error);
4290 }
4291 
4292 /*ARGSUSED*/
4293 static int
4294 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4295     int flags)
4296 {
4297 	cnode_t *dcp = VTOC(dvp);
4298 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4299 	cachefscache_t *cachep = fscp->fs_cache;
4300 	int error = 0;
4301 	int held = 0;
4302 	int connected = 0;
4303 	size_t namlen;
4304 	vnode_t *vp = NULL;
4305 	int vfslock = 0;
4306 
4307 #ifdef CFSDEBUG
4308 	CFS_DEBUG(CFSDEBUG_VOPS)
4309 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4310 		    (void *)dvp, nm);
4311 #endif
4312 	if (getzoneid() != GLOBAL_ZONEID) {
4313 		error = EPERM;
4314 		goto out;
4315 	}
4316 
4317 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4318 		ASSERT(dcp->c_flags & CN_NOCACHE);
4319 
4320 	/*
4321 	 * Cachefs only provides pass-through support for NFSv4,
4322 	 * and all vnode operations are passed through to the
4323 	 * back file system. For NFSv4 pass-through to work, only
4324 	 * connected operation is supported, the cnode backvp must
4325 	 * exist, and cachefs optional (eg., disconnectable) flags
4326 	 * are turned off. Assert these conditions to ensure that
4327 	 * the backfilesystem is called for the remove operation.
4328 	 */
4329 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4330 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4331 
4332 	for (;;) {
4333 		if (vfslock) {
4334 			vn_vfsunlock(vp);
4335 			vfslock = 0;
4336 		}
4337 		if (vp) {
4338 			VN_RELE(vp);
4339 			vp = NULL;
4340 		}
4341 
4342 		/* get (or renew) access to the file system */
4343 		if (held) {
4344 			/* Won't loop with NFSv4 connected behavior */
4345 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4346 			cachefs_cd_release(fscp);
4347 			held = 0;
4348 		}
4349 		error = cachefs_cd_access(fscp, connected, 1);
4350 		if (error)
4351 			break;
4352 		held = 1;
4353 
4354 		/* if disconnected, do some extra error checking */
4355 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4356 			/* check permissions */
4357 			mutex_enter(&dcp->c_statelock);
4358 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4359 			mutex_exit(&dcp->c_statelock);
4360 			if (CFS_TIMEOUT(fscp, error)) {
4361 				connected = 1;
4362 				continue;
4363 			}
4364 			if (error)
4365 				break;
4366 
4367 			namlen = strlen(nm);
4368 			if (namlen == 0) {
4369 				error = EINVAL;
4370 				break;
4371 			}
4372 
4373 			/* cannot remove . and .. */
4374 			if (nm[0] == '.') {
4375 				if (namlen == 1) {
4376 					error = EINVAL;
4377 					break;
4378 				} else if (namlen == 2 && nm[1] == '.') {
4379 					error = EEXIST;
4380 					break;
4381 				}
4382 			}
4383 
4384 		}
4385 
4386 		/* get the cnode of the file to delete */
4387 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4388 		if (error) {
4389 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4390 				if (CFS_TIMEOUT(fscp, error)) {
4391 					cachefs_cd_release(fscp);
4392 					held = 0;
4393 					cachefs_cd_timedout(fscp);
4394 					connected = 0;
4395 					continue;
4396 				}
4397 			} else {
4398 				if (CFS_TIMEOUT(fscp, error)) {
4399 					connected = 1;
4400 					continue;
4401 				}
4402 			}
4403 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4404 				struct fid foo;
4405 
4406 				bzero(&foo, sizeof (foo));
4407 				cachefs_log_remove(cachep, error,
4408 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4409 			}
4410 			break;
4411 		}
4412 
4413 		if (vp->v_type == VDIR) {
4414 			/* must be privileged to remove dirs with unlink() */
4415 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4416 				break;
4417 
4418 			/* see ufs_dirremove for why this is done, mount race */
4419 			if (vn_vfswlock(vp)) {
4420 				error = EBUSY;
4421 				break;
4422 			}
4423 			vfslock = 1;
4424 			if (vn_mountedvfs(vp) != NULL) {
4425 				error = EBUSY;
4426 				break;
4427 			}
4428 		}
4429 
4430 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4431 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4432 			if (CFS_TIMEOUT(fscp, error)) {
4433 				cachefs_cd_release(fscp);
4434 				held = 0;
4435 				cachefs_cd_timedout(fscp);
4436 				connected = 0;
4437 				continue;
4438 			}
4439 		} else {
4440 			error = cachefs_remove_disconnected(dvp, nm, cr,
4441 				vp);
4442 			if (CFS_TIMEOUT(fscp, error)) {
4443 				connected = 1;
4444 				continue;
4445 			}
4446 		}
4447 		break;
4448 	}
4449 
4450 #if 0
4451 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4452 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4453 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4454 		    crgetuid(cr));
4455 #endif
4456 
4457 	if (held)
4458 		cachefs_cd_release(fscp);
4459 
4460 	if (vfslock)
4461 		vn_vfsunlock(vp);
4462 
4463 	if (vp)
4464 		VN_RELE(vp);
4465 
4466 #ifdef CFS_CD_DEBUG
4467 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4468 #endif
4469 out:
4470 #ifdef CFSDEBUG
4471 	CFS_DEBUG(CFSDEBUG_VOPS)
4472 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4473 #endif
4474 
4475 	return (error);
4476 }
4477 
4478 int
4479 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4480 {
4481 	cnode_t *dcp = VTOC(dvp);
4482 	cnode_t *cp = VTOC(vp);
4483 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4484 	int error = 0;
4485 
4486 	/*
4487 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4488 	 * activity on the directory.
4489 	 */
4490 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4491 
4492 	/* purge dnlc of this entry so can get accurate vnode count */
4493 	dnlc_purge_vp(vp);
4494 
4495 	/*
4496 	 * If the cnode is active, make a link to the file
4497 	 * so operations on the file will continue.
4498 	 */
4499 	if ((vp->v_type != VDIR) &&
4500 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4501 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4502 		if (error)
4503 			goto out;
4504 	}
4505 
4506 	/* else call backfs NFSv4 handler if NFSv4 */
4507 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4508 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4509 		goto out;
4510 	}
4511 
4512 	/* else drop the backvp so nfs does not do rename */
4513 	else if (cp->c_backvp) {
4514 		mutex_enter(&cp->c_statelock);
4515 		if (cp->c_backvp) {
4516 			VN_RELE(cp->c_backvp);
4517 			cp->c_backvp = NULL;
4518 		}
4519 		mutex_exit(&cp->c_statelock);
4520 	}
4521 
4522 	mutex_enter(&dcp->c_statelock);
4523 
4524 	/* get the backvp */
4525 	if (dcp->c_backvp == NULL) {
4526 		error = cachefs_getbackvp(fscp, dcp);
4527 		if (error) {
4528 			mutex_exit(&dcp->c_statelock);
4529 			goto out;
4530 		}
4531 	}
4532 
4533 	/* check directory consistency */
4534 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4535 	if (error) {
4536 		mutex_exit(&dcp->c_statelock);
4537 		goto out;
4538 	}
4539 
4540 	/* perform the remove on the back fs */
4541 	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4542 	if (error) {
4543 		mutex_exit(&dcp->c_statelock);
4544 		goto out;
4545 	}
4546 
4547 	/* the dir has been modified */
4548 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4549 
4550 	/* remove the entry from the populated directory */
4551 	if (CFS_ISFS_NONSHARED(fscp) &&
4552 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4553 		error = cachefs_dir_rmentry(dcp, nm);
4554 		if (error) {
4555 			cachefs_nocache(dcp);
4556 			error = 0;
4557 		}
4558 	}
4559 	mutex_exit(&dcp->c_statelock);
4560 
4561 	/* fix up the file we deleted */
4562 	mutex_enter(&cp->c_statelock);
4563 	if (cp->c_attr.va_nlink == 1)
4564 		cp->c_flags |= CN_DESTROY;
4565 	else
4566 		cp->c_flags |= CN_UPDATED;
4567 
4568 	cp->c_attr.va_nlink--;
4569 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4570 	mutex_exit(&cp->c_statelock);
4571 
4572 out:
4573 	rw_exit(&dcp->c_rwlock);
4574 	return (error);
4575 }
4576 
4577 /*
4578  * cachefs_remove_backfs_nfsv4
4579  *
4580  * Call NFSv4 back filesystem to handle the remove (cachefs
4581  * pass-through support for NFSv4).
4582  */
4583 int
4584 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4585 {
4586 	cnode_t *dcp = VTOC(dvp);
4587 	cnode_t *cp = VTOC(vp);
4588 	vnode_t *dbackvp;
4589 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4590 	int error = 0;
4591 
4592 	/*
4593 	 * For NFSv4 pass-through to work, only connected operation
4594 	 * is supported, the cnode backvp must exist, and cachefs
4595 	 * optional (eg., disconnectable) flags are turned off. Assert
4596 	 * these conditions for the getattr operation.
4597 	 */
4598 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4599 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4600 
4601 	/* Should hold the directory readwrite lock to update directory */
4602 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4603 
4604 	/*
4605 	 * Update attributes for directory. Note that
4606 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4607 	 * held, so grab it before calling the routine.
4608 	 */
4609 	mutex_enter(&dcp->c_statelock);
4610 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4611 	mutex_exit(&dcp->c_statelock);
4612 	if (error)
4613 		goto out;
4614 
4615 	/*
4616 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4617 	 * asserts for c_statelock being held, so grab it before
4618 	 * calling the routine.
4619 	 */
4620 	mutex_enter(&cp->c_statelock);
4621 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4622 	if (error) {
4623 		mutex_exit(&cp->c_statelock);
4624 		goto out;
4625 	}
4626 
4627 	/*
4628 	 * Drop the backvp so nfs if the link count is 1 so that
4629 	 * nfs does not do rename. Ensure that we will destroy the cnode
4630 	 * since this cnode no longer contains the backvp. Note that we
4631 	 * maintain lock on this cnode to prevent change till the remove
4632 	 * completes, otherwise other operations will encounter an ESTALE
4633 	 * if they try to use the cnode with CN_DESTROY set (see
4634 	 * cachefs_get_backvp()), or change the state of the cnode
4635 	 * while we're removing it.
4636 	 */
4637 	if (cp->c_attr.va_nlink == 1) {
4638 		/*
4639 		 * The unldvp information is created for the case
4640 		 * when there is more than one reference on the
4641 		 * vnode when a remove operation is called. If the
4642 		 * remove itself was holding a reference to the
4643 		 * vnode, then a subsequent remove will remove the
4644 		 * backvp, so we need to get rid of the unldvp
4645 		 * before removing the backvp. An alternate would
4646 		 * be to simply ignore the remove and let the
4647 		 * inactivation routine do the deletion of the
4648 		 * unldvp.
4649 		 */
4650 		if (cp->c_unldvp) {
4651 			VN_RELE(cp->c_unldvp);
4652 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4653 			crfree(cp->c_unlcred);
4654 			cp->c_unldvp = NULL;
4655 			cp->c_unlcred = NULL;
4656 		}
4657 		cp->c_flags |= CN_DESTROY;
4658 		cp->c_attr.va_nlink = 0;
4659 		VN_RELE(cp->c_backvp);
4660 		cp->c_backvp = NULL;
4661 	}
4662 
4663 	/* perform the remove on back fs after extracting directory backvp */
4664 	mutex_enter(&dcp->c_statelock);
4665 	dbackvp = dcp->c_backvp;
4666 	mutex_exit(&dcp->c_statelock);
4667 
4668 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4669 		("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4670 		dcp, dbackvp, nm));
4671 	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4672 	if (error) {
4673 		mutex_exit(&cp->c_statelock);
4674 		goto out;
4675 	}
4676 
4677 	/* fix up the file we deleted, if not destroying the cnode */
4678 	if ((cp->c_flags & CN_DESTROY) == 0) {
4679 		cp->c_attr.va_nlink--;
4680 		cp->c_flags |= CN_UPDATED;
4681 	}
4682 
4683 	mutex_exit(&cp->c_statelock);
4684 
4685 out:
4686 	return (error);
4687 }
4688 
4689 int
4690 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4691     vnode_t *vp)
4692 {
4693 	cnode_t *dcp = VTOC(dvp);
4694 	cnode_t *cp = VTOC(vp);
4695 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4696 	int error = 0;
4697 	off_t commit = 0;
4698 	timestruc_t current_time;
4699 
4700 	if (CFS_ISFS_WRITE_AROUND(fscp))
4701 		return (ETIMEDOUT);
4702 
4703 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4704 		return (ETIMEDOUT);
4705 
4706 	/*
4707 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4708 	 * activity on the directory.
4709 	 */
4710 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4711 
4712 	/* dir must be populated */
4713 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4714 		error = ETIMEDOUT;
4715 		goto out;
4716 	}
4717 
4718 	mutex_enter(&dcp->c_statelock);
4719 	mutex_enter(&cp->c_statelock);
4720 
4721 	error = cachefs_stickyrmchk(dcp, cp, cr);
4722 
4723 	mutex_exit(&cp->c_statelock);
4724 	mutex_exit(&dcp->c_statelock);
4725 	if (error)
4726 		goto out;
4727 
4728 	/* purge dnlc of this entry so can get accurate vnode count */
4729 	dnlc_purge_vp(vp);
4730 
4731 	/*
4732 	 * If the cnode is active, make a link to the file
4733 	 * so operations on the file will continue.
4734 	 */
4735 	if ((vp->v_type != VDIR) &&
4736 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4737 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4738 		if (error)
4739 			goto out;
4740 	}
4741 
4742 	if (cp->c_attr.va_nlink > 1) {
4743 		mutex_enter(&cp->c_statelock);
4744 		if (cachefs_modified_alloc(cp)) {
4745 			mutex_exit(&cp->c_statelock);
4746 			error = ENOSPC;
4747 			goto out;
4748 		}
4749 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4750 			error = cachefs_dlog_cidmap(fscp);
4751 			if (error) {
4752 				mutex_exit(&cp->c_statelock);
4753 				error = ENOSPC;
4754 				goto out;
4755 			}
4756 			cp->c_metadata.md_flags |= MD_MAPPING;
4757 			cp->c_flags |= CN_UPDATED;
4758 		}
4759 		mutex_exit(&cp->c_statelock);
4760 	}
4761 
4762 	/* log the remove */
4763 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4764 	if (commit == 0) {
4765 		error = ENOSPC;
4766 		goto out;
4767 	}
4768 
4769 	/* remove the file from the dir */
4770 	mutex_enter(&dcp->c_statelock);
4771 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4772 		mutex_exit(&dcp->c_statelock);
4773 		error = ETIMEDOUT;
4774 		goto out;
4775 
4776 	}
4777 	cachefs_modified(dcp);
4778 	error = cachefs_dir_rmentry(dcp, nm);
4779 	if (error) {
4780 		mutex_exit(&dcp->c_statelock);
4781 		if (error == ENOTDIR)
4782 			error = ETIMEDOUT;
4783 		goto out;
4784 	}
4785 
4786 	/* update parent dir times */
4787 	gethrestime(&current_time);
4788 	dcp->c_metadata.md_localctime = current_time;
4789 	dcp->c_metadata.md_localmtime = current_time;
4790 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4791 	dcp->c_flags |= CN_UPDATED;
4792 	mutex_exit(&dcp->c_statelock);
4793 
4794 	/* adjust file we are deleting */
4795 	mutex_enter(&cp->c_statelock);
4796 	cp->c_attr.va_nlink--;
4797 	cp->c_metadata.md_localctime = current_time;
4798 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4799 	if (cp->c_attr.va_nlink == 0) {
4800 		cp->c_flags |= CN_DESTROY;
4801 	} else {
4802 		cp->c_flags |= CN_UPDATED;
4803 	}
4804 	mutex_exit(&cp->c_statelock);
4805 
4806 out:
4807 	if (commit) {
4808 		/* commit the log entry */
4809 		if (cachefs_dlog_commit(fscp, commit, error)) {
4810 			/*EMPTY*/
4811 			/* XXX bob: fix on panic */
4812 		}
4813 	}
4814 
4815 	rw_exit(&dcp->c_rwlock);
4816 	return (error);
4817 }
4818 
4819 /*ARGSUSED*/
4820 static int
4821 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4822     caller_context_t *ct, int flags)
4823 {
4824 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4825 	cnode_t *tdcp = VTOC(tdvp);
4826 	struct vnode *realvp;
4827 	int error = 0;
4828 	int held = 0;
4829 	int connected = 0;
4830 
4831 #ifdef CFSDEBUG
4832 	CFS_DEBUG(CFSDEBUG_VOPS)
4833 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4834 			(void *)fvp, (void *)tdvp, tnm);
4835 #endif
4836 
4837 	if (getzoneid() != GLOBAL_ZONEID) {
4838 		error = EPERM;
4839 		goto out;
4840 	}
4841 
4842 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4843 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4844 
4845 	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4846 		fvp = realvp;
4847 	}
4848 
4849 	/*
4850 	 * Cachefs only provides pass-through support for NFSv4,
4851 	 * and all vnode operations are passed through to the
4852 	 * back file system. For NFSv4 pass-through to work, only
4853 	 * connected operation is supported, the cnode backvp must
4854 	 * exist, and cachefs optional (eg., disconnectable) flags
4855 	 * are turned off. Assert these conditions to ensure that
4856 	 * the backfilesystem is called for the link operation.
4857 	 */
4858 
4859 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4860 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4861 
4862 	for (;;) {
4863 		/* get (or renew) access to the file system */
4864 		if (held) {
4865 			/* Won't loop with NFSv4 connected behavior */
4866 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4867 			rw_exit(&tdcp->c_rwlock);
4868 			cachefs_cd_release(fscp);
4869 			held = 0;
4870 		}
4871 		error = cachefs_cd_access(fscp, connected, 1);
4872 		if (error)
4873 			break;
4874 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4875 		held = 1;
4876 
4877 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4878 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4879 			if (CFS_TIMEOUT(fscp, error)) {
4880 				rw_exit(&tdcp->c_rwlock);
4881 				cachefs_cd_release(fscp);
4882 				held = 0;
4883 				cachefs_cd_timedout(fscp);
4884 				connected = 0;
4885 				continue;
4886 			}
4887 		} else {
4888 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4889 				cr);
4890 			if (CFS_TIMEOUT(fscp, error)) {
4891 				connected = 1;
4892 				continue;
4893 			}
4894 		}
4895 		break;
4896 	}
4897 
4898 	if (held) {
4899 		rw_exit(&tdcp->c_rwlock);
4900 		cachefs_cd_release(fscp);
4901 	}
4902 
4903 #ifdef CFS_CD_DEBUG
4904 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4905 #endif
4906 out:
4907 #ifdef CFSDEBUG
4908 	CFS_DEBUG(CFSDEBUG_VOPS)
4909 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4910 			(void *)fvp, (void *)tdvp, tnm);
4911 #endif
4912 	return (error);
4913 }
4914 
4915 static int
4916 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4917 {
4918 	cnode_t *tdcp = VTOC(tdvp);
4919 	cnode_t *fcp = VTOC(fvp);
4920 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4921 	int error = 0;
4922 	vnode_t *backvp = NULL;
4923 
4924 	if (tdcp != fcp) {
4925 		mutex_enter(&fcp->c_statelock);
4926 
4927 		if (fcp->c_backvp == NULL) {
4928 			error = cachefs_getbackvp(fscp, fcp);
4929 			if (error) {
4930 				mutex_exit(&fcp->c_statelock);
4931 				goto out;
4932 			}
4933 		}
4934 
4935 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4936 		if (error) {
4937 			mutex_exit(&fcp->c_statelock);
4938 			goto out;
4939 		}
4940 		backvp = fcp->c_backvp;
4941 		VN_HOLD(backvp);
4942 		mutex_exit(&fcp->c_statelock);
4943 	}
4944 
4945 	mutex_enter(&tdcp->c_statelock);
4946 
4947 	/* get backvp of target directory */
4948 	if (tdcp->c_backvp == NULL) {
4949 		error = cachefs_getbackvp(fscp, tdcp);
4950 		if (error) {
4951 			mutex_exit(&tdcp->c_statelock);
4952 			goto out;
4953 		}
4954 	}
4955 
4956 	/* consistency check target directory */
4957 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4958 	if (error) {
4959 		mutex_exit(&tdcp->c_statelock);
4960 		goto out;
4961 	}
4962 	if (backvp == NULL) {
4963 		backvp = tdcp->c_backvp;
4964 		VN_HOLD(backvp);
4965 	}
4966 
4967 	/* perform the link on the back fs */
4968 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4969 		("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4970 		"name %s\n", tdcp, tdcp->c_backvp, tnm));
4971 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4972 	if (error) {
4973 		mutex_exit(&tdcp->c_statelock);
4974 		goto out;
4975 	}
4976 
4977 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4978 
4979 	/* if the dir is populated, add the new link */
4980 	if (CFS_ISFS_NONSHARED(fscp) &&
4981 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4982 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4983 		    &fcp->c_id, SM_ASYNC);
4984 		if (error) {
4985 			cachefs_nocache(tdcp);
4986 			error = 0;
4987 		}
4988 	}
4989 	mutex_exit(&tdcp->c_statelock);
4990 
4991 	/* get the new link count on the file */
4992 	mutex_enter(&fcp->c_statelock);
4993 	fcp->c_flags |= CN_UPDATED;
4994 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
4995 	if (fcp->c_backvp == NULL) {
4996 		error = cachefs_getbackvp(fscp, fcp);
4997 		if (error) {
4998 			mutex_exit(&fcp->c_statelock);
4999 			goto out;
5000 		}
5001 	}
5002 
5003 	/* XXX bob: given what modify_cobject does this seems unnecessary */
5004 	fcp->c_attr.va_mask = AT_ALL;
5005 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5006 	mutex_exit(&fcp->c_statelock);
5007 out:
5008 	if (backvp)
5009 		VN_RELE(backvp);
5010 
5011 	return (error);
5012 }
5013 
5014 static int
5015 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5016     cred_t *cr)
5017 {
5018 	cnode_t *tdcp = VTOC(tdvp);
5019 	cnode_t *fcp = VTOC(fvp);
5020 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5021 	int error = 0;
5022 	timestruc_t current_time;
5023 	off_t commit = 0;
5024 
5025 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5026 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5027 		return (EPERM);
5028 
5029 	if (CFS_ISFS_WRITE_AROUND(fscp))
5030 		return (ETIMEDOUT);
5031 
5032 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5033 		return (ETIMEDOUT);
5034 
5035 	mutex_enter(&tdcp->c_statelock);
5036 
5037 	/* check permissions */
5038 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5039 		mutex_exit(&tdcp->c_statelock);
5040 		goto out;
5041 	}
5042 
5043 	/* the directory front file must be populated */
5044 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5045 		error = ETIMEDOUT;
5046 		mutex_exit(&tdcp->c_statelock);
5047 		goto out;
5048 	}
5049 
5050 	/* make sure tnm does not already exist in the directory */
5051 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5052 	if (error == ENOTDIR) {
5053 		error = ETIMEDOUT;
5054 		mutex_exit(&tdcp->c_statelock);
5055 		goto out;
5056 	}
5057 	if (error != ENOENT) {
5058 		error = EEXIST;
5059 		mutex_exit(&tdcp->c_statelock);
5060 		goto out;
5061 	}
5062 
5063 	mutex_enter(&fcp->c_statelock);
5064 
5065 	/* create a mapping for the file if necessary */
5066 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5067 		error = cachefs_dlog_cidmap(fscp);
5068 		if (error) {
5069 			mutex_exit(&fcp->c_statelock);
5070 			mutex_exit(&tdcp->c_statelock);
5071 			error = ENOSPC;
5072 			goto out;
5073 		}
5074 		fcp->c_metadata.md_flags |= MD_MAPPING;
5075 		fcp->c_flags |= CN_UPDATED;
5076 	}
5077 
5078 	/* mark file as modified */
5079 	if (cachefs_modified_alloc(fcp)) {
5080 		mutex_exit(&fcp->c_statelock);
5081 		mutex_exit(&tdcp->c_statelock);
5082 		error = ENOSPC;
5083 		goto out;
5084 	}
5085 	mutex_exit(&fcp->c_statelock);
5086 
5087 	/* log the operation */
5088 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5089 	if (commit == 0) {
5090 		mutex_exit(&tdcp->c_statelock);
5091 		error = ENOSPC;
5092 		goto out;
5093 	}
5094 
5095 	gethrestime(&current_time);
5096 
5097 	/* make the new link */
5098 	cachefs_modified(tdcp);
5099 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5100 	    &fcp->c_id, SM_ASYNC);
5101 	if (error) {
5102 		error = 0;
5103 		mutex_exit(&tdcp->c_statelock);
5104 		goto out;
5105 	}
5106 
5107 	/* Update mtime/ctime of parent dir */
5108 	tdcp->c_metadata.md_localmtime = current_time;
5109 	tdcp->c_metadata.md_localctime = current_time;
5110 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5111 	tdcp->c_flags |= CN_UPDATED;
5112 	mutex_exit(&tdcp->c_statelock);
5113 
5114 	/* update the file we linked to */
5115 	mutex_enter(&fcp->c_statelock);
5116 	fcp->c_attr.va_nlink++;
5117 	fcp->c_metadata.md_localctime = current_time;
5118 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5119 	fcp->c_flags |= CN_UPDATED;
5120 	mutex_exit(&fcp->c_statelock);
5121 
5122 out:
5123 	if (commit) {
5124 		/* commit the log entry */
5125 		if (cachefs_dlog_commit(fscp, commit, error)) {
5126 			/*EMPTY*/
5127 			/* XXX bob: fix on panic */
5128 		}
5129 	}
5130 
5131 	return (error);
5132 }
5133 
5134 /*
5135  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5136  * cnodes atomically.
5137  */
5138 kmutex_t cachefs_rename_lock;
5139 
5140 /*ARGSUSED*/
5141 static int
5142 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5143     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5144 {
5145 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5146 	cachefscache_t *cachep = fscp->fs_cache;
5147 	int error = 0;
5148 	int held = 0;
5149 	int connected = 0;
5150 	vnode_t *delvp = NULL;
5151 	vnode_t *tvp = NULL;
5152 	int vfslock = 0;
5153 	struct vnode *realvp;
5154 
5155 	if (getzoneid() != GLOBAL_ZONEID)
5156 		return (EPERM);
5157 
5158 	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5159 		ndvp = realvp;
5160 
5161 	/*
5162 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5163 	 * directory cnodes better indicate NOCACHE mode as well.
5164 	 */
5165 	ASSERT
5166 	    ((fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5167 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5168 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5169 
5170 	/*
5171 	 * Cachefs only provides pass-through support for NFSv4,
5172 	 * and all vnode operations are passed through to the
5173 	 * back file system. For NFSv4 pass-through to work, only
5174 	 * connected operation is supported, the cnode backvp must
5175 	 * exist, and cachefs optional (eg., disconnectable) flags
5176 	 * are turned off. Assert these conditions to ensure that
5177 	 * the backfilesystem is called for the rename operation.
5178 	 */
5179 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5180 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5181 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5182 
5183 	for (;;) {
5184 		if (vfslock) {
5185 			vn_vfsunlock(delvp);
5186 			vfslock = 0;
5187 		}
5188 		if (delvp) {
5189 			VN_RELE(delvp);
5190 			delvp = NULL;
5191 		}
5192 
5193 		/* get (or renew) access to the file system */
5194 		if (held) {
5195 			/* Won't loop for NFSv4 connected support */
5196 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5197 			cachefs_cd_release(fscp);
5198 			held = 0;
5199 		}
5200 		error = cachefs_cd_access(fscp, connected, 1);
5201 		if (error)
5202 			break;
5203 		held = 1;
5204 
5205 		/* sanity check */
5206 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5207 			error = EINVAL;
5208 			break;
5209 		}
5210 
5211 		/* cannot rename from or to . or .. */
5212 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5213 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5214 			error = EINVAL;
5215 			break;
5216 		}
5217 
5218 		if (odvp != ndvp) {
5219 			/*
5220 			 * if moving a directory, its notion
5221 			 * of ".." will change
5222 			 */
5223 			error = cachefs_lookup_common(odvp, onm, &tvp,
5224 			    NULL, 0, NULL, cr);
5225 			if (error == 0) {
5226 				ASSERT(tvp != NULL);
5227 				if (tvp->v_type == VDIR) {
5228 					cnode_t *cp = VTOC(tvp);
5229 
5230 					dnlc_remove(tvp, "..");
5231 
5232 					mutex_enter(&cp->c_statelock);
5233 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5234 					mutex_exit(&cp->c_statelock);
5235 				}
5236 			} else {
5237 				tvp = NULL;
5238 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5239 					if (CFS_TIMEOUT(fscp, error)) {
5240 						cachefs_cd_release(fscp);
5241 						held = 0;
5242 						cachefs_cd_timedout(fscp);
5243 						connected = 0;
5244 						continue;
5245 					}
5246 				} else {
5247 					if (CFS_TIMEOUT(fscp, error)) {
5248 						connected = 1;
5249 						continue;
5250 					}
5251 				}
5252 				break;
5253 			}
5254 		}
5255 
5256 		/* get the cnode if file being deleted */
5257 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5258 			NULL, cr);
5259 		if (error) {
5260 			delvp = NULL;
5261 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5262 				if (CFS_TIMEOUT(fscp, error)) {
5263 					cachefs_cd_release(fscp);
5264 					held = 0;
5265 					cachefs_cd_timedout(fscp);
5266 					connected = 0;
5267 					continue;
5268 				}
5269 			} else {
5270 				if (CFS_TIMEOUT(fscp, error)) {
5271 					connected = 1;
5272 					continue;
5273 				}
5274 			}
5275 			if (error != ENOENT)
5276 				break;
5277 		}
5278 
5279 		if (delvp && delvp->v_type == VDIR) {
5280 			/* see ufs_dirremove for why this is done, mount race */
5281 			if (vn_vfswlock(delvp)) {
5282 				error = EBUSY;
5283 				break;
5284 			}
5285 			vfslock = 1;
5286 			if (vn_mountedvfs(delvp) != NULL) {
5287 				error = EBUSY;
5288 				break;
5289 			}
5290 		}
5291 
5292 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5293 			error = cachefs_rename_connected(odvp, onm,
5294 				ndvp, nnm, cr, delvp);
5295 			if (CFS_TIMEOUT(fscp, error)) {
5296 				cachefs_cd_release(fscp);
5297 				held = 0;
5298 				cachefs_cd_timedout(fscp);
5299 				connected = 0;
5300 				continue;
5301 			}
5302 		} else {
5303 			error = cachefs_rename_disconnected(odvp, onm,
5304 				ndvp, nnm, cr, delvp);
5305 			if (CFS_TIMEOUT(fscp, error)) {
5306 				connected = 1;
5307 				continue;
5308 			}
5309 		}
5310 		break;
5311 	}
5312 
5313 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5314 		struct fid gone;
5315 
5316 		bzero(&gone, sizeof (gone));
5317 		gone.fid_len = MAXFIDSZ;
5318 		if (delvp != NULL)
5319 			(void) VOP_FID(delvp, &gone, ct);
5320 
5321 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5322 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5323 	}
5324 
5325 	if (held)
5326 		cachefs_cd_release(fscp);
5327 
5328 	if (vfslock)
5329 		vn_vfsunlock(delvp);
5330 
5331 	if (delvp)
5332 		VN_RELE(delvp);
5333 	if (tvp)
5334 		VN_RELE(tvp);
5335 
5336 #ifdef CFS_CD_DEBUG
5337 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5338 #endif
5339 	return (error);
5340 }
5341 
5342 static int
5343 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5344     char *nnm, cred_t *cr, vnode_t *delvp)
5345 {
5346 	cnode_t *odcp = VTOC(odvp);
5347 	cnode_t *ndcp = VTOC(ndvp);
5348 	vnode_t *revp = NULL;
5349 	cnode_t *recp;
5350 	cnode_t *delcp;
5351 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5352 	int error = 0;
5353 	struct fid cookie;
5354 	struct fid *cookiep;
5355 	cfs_cid_t cid;
5356 	int gotdirent;
5357 
5358 	/* find the file we are renaming */
5359 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5360 	if (error)
5361 		return (error);
5362 	recp = VTOC(revp);
5363 
5364 	/*
5365 	 * To avoid deadlock, we acquire this global rename lock before
5366 	 * we try to get the locks for the source and target directories.
5367 	 */
5368 	mutex_enter(&cachefs_rename_lock);
5369 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5370 	if (odcp != ndcp) {
5371 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5372 	}
5373 	mutex_exit(&cachefs_rename_lock);
5374 
5375 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5376 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5377 
5378 	mutex_enter(&odcp->c_statelock);
5379 	if (odcp->c_backvp == NULL) {
5380 		error = cachefs_getbackvp(fscp, odcp);
5381 		if (error) {
5382 			mutex_exit(&odcp->c_statelock);
5383 			goto out;
5384 		}
5385 	}
5386 
5387 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5388 	if (error) {
5389 		mutex_exit(&odcp->c_statelock);
5390 		goto out;
5391 	}
5392 	mutex_exit(&odcp->c_statelock);
5393 
5394 	if (odcp != ndcp) {
5395 		mutex_enter(&ndcp->c_statelock);
5396 		if (ndcp->c_backvp == NULL) {
5397 			error = cachefs_getbackvp(fscp, ndcp);
5398 			if (error) {
5399 				mutex_exit(&ndcp->c_statelock);
5400 				goto out;
5401 			}
5402 		}
5403 
5404 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5405 		if (error) {
5406 			mutex_exit(&ndcp->c_statelock);
5407 			goto out;
5408 		}
5409 		mutex_exit(&ndcp->c_statelock);
5410 	}
5411 
5412 	/* if a file is being deleted because of this rename */
5413 	if (delvp) {
5414 		/* if src and dest file are same */
5415 		if (delvp == revp) {
5416 			error = 0;
5417 			goto out;
5418 		}
5419 
5420 		/*
5421 		 * If the cnode is active, make a link to the file
5422 		 * so operations on the file will continue.
5423 		 */
5424 		dnlc_purge_vp(delvp);
5425 		delcp = VTOC(delvp);
5426 		if ((delvp->v_type != VDIR) &&
5427 		    !((delvp->v_count == 1) ||
5428 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5429 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5430 			if (error)
5431 				goto out;
5432 		}
5433 	}
5434 
5435 	/* do the rename on the back fs */
5436 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5437 		("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5438 		" ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5439 		odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5440 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5441 	    0);
5442 	if (error)
5443 		goto out;
5444 
5445 	/* purge mappings to file in the old directory */
5446 	dnlc_purge_vp(odvp);
5447 
5448 	/* purge mappings in the new dir if we deleted a file */
5449 	if (delvp && (odvp != ndvp))
5450 		dnlc_purge_vp(ndvp);
5451 
5452 	/* update the file we just deleted */
5453 	if (delvp) {
5454 		mutex_enter(&delcp->c_statelock);
5455 		if (delcp->c_attr.va_nlink == 1) {
5456 			delcp->c_flags |= CN_DESTROY;
5457 		} else {
5458 			delcp->c_flags |= CN_UPDATED;
5459 		}
5460 		delcp->c_attr.va_nlink--;
5461 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5462 		mutex_exit(&delcp->c_statelock);
5463 	}
5464 
5465 	/* find the entry in the old directory */
5466 	mutex_enter(&odcp->c_statelock);
5467 	gotdirent = 0;
5468 	cookiep = NULL;
5469 	if (CFS_ISFS_NONSHARED(fscp) &&
5470 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5471 		error = cachefs_dir_look(odcp, onm, &cookie,
5472 			NULL, NULL, &cid);
5473 		if (error == 0 || error == EINVAL) {
5474 			gotdirent = 1;
5475 			if (error == 0)
5476 				cookiep = &cookie;
5477 		} else {
5478 			cachefs_inval_object(odcp);
5479 		}
5480 	}
5481 	error = 0;
5482 
5483 	/* remove the directory entry from the old directory */
5484 	if (gotdirent) {
5485 		error = cachefs_dir_rmentry(odcp, onm);
5486 		if (error) {
5487 			cachefs_nocache(odcp);
5488 			error = 0;
5489 		}
5490 	}
5491 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5492 	mutex_exit(&odcp->c_statelock);
5493 
5494 	/* install the directory entry in the new directory */
5495 	mutex_enter(&ndcp->c_statelock);
5496 	if (CFS_ISFS_NONSHARED(fscp) &&
5497 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5498 		error = 1;
5499 		if (gotdirent) {
5500 			ASSERT(cid.cid_fileno != 0);
5501 			error = 0;
5502 			if (delvp) {
5503 				error = cachefs_dir_rmentry(ndcp, nnm);
5504 			}
5505 			if (error == 0) {
5506 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5507 				    &cid, SM_ASYNC);
5508 			}
5509 		}
5510 		if (error) {
5511 			cachefs_nocache(ndcp);
5512 			error = 0;
5513 		}
5514 	}
5515 	if (odcp != ndcp)
5516 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5517 	mutex_exit(&ndcp->c_statelock);
5518 
5519 	/* ctime of renamed file has changed */
5520 	mutex_enter(&recp->c_statelock);
5521 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5522 	mutex_exit(&recp->c_statelock);
5523 
5524 out:
5525 	if (odcp != ndcp)
5526 		rw_exit(&ndcp->c_rwlock);
5527 	rw_exit(&odcp->c_rwlock);
5528 
5529 	VN_RELE(revp);
5530 
5531 	return (error);
5532 }
5533 
5534 static int
5535 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5536     char *nnm, cred_t *cr, vnode_t *delvp)
5537 {
5538 	cnode_t *odcp = VTOC(odvp);
5539 	cnode_t *ndcp = VTOC(ndvp);
5540 	cnode_t *delcp = NULL;
5541 	vnode_t *revp = NULL;
5542 	cnode_t *recp;
5543 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5544 	int error = 0;
5545 	struct fid cookie;
5546 	struct fid *cookiep;
5547 	cfs_cid_t cid;
5548 	off_t commit = 0;
5549 	timestruc_t current_time;
5550 
5551 	if (CFS_ISFS_WRITE_AROUND(fscp))
5552 		return (ETIMEDOUT);
5553 
5554 	/* find the file we are renaming */
5555 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5556 	if (error)
5557 		return (error);
5558 	recp = VTOC(revp);
5559 
5560 	/*
5561 	 * To avoid deadlock, we acquire this global rename lock before
5562 	 * we try to get the locks for the source and target directories.
5563 	 */
5564 	mutex_enter(&cachefs_rename_lock);
5565 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5566 	if (odcp != ndcp) {
5567 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5568 	}
5569 	mutex_exit(&cachefs_rename_lock);
5570 
5571 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5572 		error = ETIMEDOUT;
5573 		goto out;
5574 	}
5575 
5576 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5577 		mutex_enter(&recp->c_statelock);
5578 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5579 			error = cachefs_dlog_cidmap(fscp);
5580 			if (error) {
5581 				mutex_exit(&recp->c_statelock);
5582 				error = ENOSPC;
5583 				goto out;
5584 			}
5585 			recp->c_metadata.md_flags |= MD_MAPPING;
5586 			recp->c_flags |= CN_UPDATED;
5587 		}
5588 		mutex_exit(&recp->c_statelock);
5589 	}
5590 
5591 	/* check permissions */
5592 	/* XXX clean up this mutex junk sometime */
5593 	mutex_enter(&odcp->c_statelock);
5594 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5595 	mutex_exit(&odcp->c_statelock);
5596 	if (error != 0)
5597 		goto out;
5598 	mutex_enter(&ndcp->c_statelock);
5599 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5600 	mutex_exit(&ndcp->c_statelock);
5601 	if (error != 0)
5602 		goto out;
5603 	mutex_enter(&odcp->c_statelock);
5604 	error = cachefs_stickyrmchk(odcp, recp, cr);
5605 	mutex_exit(&odcp->c_statelock);
5606 	if (error != 0)
5607 		goto out;
5608 
5609 	/* dirs must be populated */
5610 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5611 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5612 		error = ETIMEDOUT;
5613 		goto out;
5614 	}
5615 
5616 	/* for now do not allow moving dirs because could cause cycles */
5617 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5618 	    (revp == odvp)) {
5619 		error = ETIMEDOUT;
5620 		goto out;
5621 	}
5622 
5623 	/* if a file is being deleted because of this rename */
5624 	if (delvp) {
5625 		delcp = VTOC(delvp);
5626 
5627 		/* if src and dest file are the same */
5628 		if (delvp == revp) {
5629 			error = 0;
5630 			goto out;
5631 		}
5632 
5633 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5634 			error = ETIMEDOUT;
5635 			goto out;
5636 		}
5637 
5638 		/* if there are hard links to this file */
5639 		if (delcp->c_attr.va_nlink > 1) {
5640 			mutex_enter(&delcp->c_statelock);
5641 			if (cachefs_modified_alloc(delcp)) {
5642 				mutex_exit(&delcp->c_statelock);
5643 				error = ENOSPC;
5644 				goto out;
5645 			}
5646 
5647 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5648 				error = cachefs_dlog_cidmap(fscp);
5649 				if (error) {
5650 					mutex_exit(&delcp->c_statelock);
5651 					error = ENOSPC;
5652 					goto out;
5653 				}
5654 				delcp->c_metadata.md_flags |= MD_MAPPING;
5655 				delcp->c_flags |= CN_UPDATED;
5656 			}
5657 			mutex_exit(&delcp->c_statelock);
5658 		}
5659 
5660 		/* make sure we can delete file */
5661 		mutex_enter(&ndcp->c_statelock);
5662 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5663 		mutex_exit(&ndcp->c_statelock);
5664 		if (error != 0)
5665 			goto out;
5666 
5667 		/*
5668 		 * If the cnode is active, make a link to the file
5669 		 * so operations on the file will continue.
5670 		 */
5671 		dnlc_purge_vp(delvp);
5672 		if ((delvp->v_type != VDIR) &&
5673 		    !((delvp->v_count == 1) ||
5674 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5675 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5676 			if (error)
5677 				goto out;
5678 		}
5679 	}
5680 
5681 	/* purge mappings to file in the old directory */
5682 	dnlc_purge_vp(odvp);
5683 
5684 	/* purge mappings in the new dir if we deleted a file */
5685 	if (delvp && (odvp != ndvp))
5686 		dnlc_purge_vp(ndvp);
5687 
5688 	/* find the entry in the old directory */
5689 	mutex_enter(&odcp->c_statelock);
5690 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5691 		mutex_exit(&odcp->c_statelock);
5692 		error = ETIMEDOUT;
5693 		goto out;
5694 	}
5695 	cookiep = NULL;
5696 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5697 	if (error == 0 || error == EINVAL) {
5698 		if (error == 0)
5699 			cookiep = &cookie;
5700 	} else {
5701 		mutex_exit(&odcp->c_statelock);
5702 		if (error == ENOTDIR)
5703 			error = ETIMEDOUT;
5704 		goto out;
5705 	}
5706 	error = 0;
5707 
5708 	/* write the log entry */
5709 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5710 	    recp, delcp);
5711 	if (commit == 0) {
5712 		mutex_exit(&odcp->c_statelock);
5713 		error = ENOSPC;
5714 		goto out;
5715 	}
5716 
5717 	/* remove the directory entry from the old directory */
5718 	cachefs_modified(odcp);
5719 	error = cachefs_dir_rmentry(odcp, onm);
5720 	if (error) {
5721 		mutex_exit(&odcp->c_statelock);
5722 		if (error == ENOTDIR)
5723 			error = ETIMEDOUT;
5724 		goto out;
5725 	}
5726 	mutex_exit(&odcp->c_statelock);
5727 
5728 	/* install the directory entry in the new directory */
5729 	mutex_enter(&ndcp->c_statelock);
5730 	error = ENOTDIR;
5731 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5732 		ASSERT(cid.cid_fileno != 0);
5733 		cachefs_modified(ndcp);
5734 		error = 0;
5735 		if (delvp) {
5736 			error = cachefs_dir_rmentry(ndcp, nnm);
5737 		}
5738 		if (error == 0) {
5739 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5740 			    &cid, SM_ASYNC);
5741 		}
5742 	}
5743 	if (error) {
5744 		cachefs_nocache(ndcp);
5745 		mutex_exit(&ndcp->c_statelock);
5746 		mutex_enter(&odcp->c_statelock);
5747 		cachefs_nocache(odcp);
5748 		mutex_exit(&odcp->c_statelock);
5749 		if (error == ENOTDIR)
5750 			error = ETIMEDOUT;
5751 		goto out;
5752 	}
5753 	mutex_exit(&ndcp->c_statelock);
5754 
5755 	gethrestime(&current_time);
5756 
5757 	/* update the file we just deleted */
5758 	if (delvp) {
5759 		mutex_enter(&delcp->c_statelock);
5760 		delcp->c_attr.va_nlink--;
5761 		delcp->c_metadata.md_localctime = current_time;
5762 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5763 		if (delcp->c_attr.va_nlink == 0) {
5764 			delcp->c_flags |= CN_DESTROY;
5765 		} else {
5766 			delcp->c_flags |= CN_UPDATED;
5767 		}
5768 		mutex_exit(&delcp->c_statelock);
5769 	}
5770 
5771 	/* update the file we renamed */
5772 	mutex_enter(&recp->c_statelock);
5773 	recp->c_metadata.md_localctime = current_time;
5774 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5775 	recp->c_flags |= CN_UPDATED;
5776 	mutex_exit(&recp->c_statelock);
5777 
5778 	/* update the source directory */
5779 	mutex_enter(&odcp->c_statelock);
5780 	odcp->c_metadata.md_localctime = current_time;
5781 	odcp->c_metadata.md_localmtime = current_time;
5782 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5783 	odcp->c_flags |= CN_UPDATED;
5784 	mutex_exit(&odcp->c_statelock);
5785 
5786 	/* update the destination directory */
5787 	if (odcp != ndcp) {
5788 		mutex_enter(&ndcp->c_statelock);
5789 		ndcp->c_metadata.md_localctime = current_time;
5790 		ndcp->c_metadata.md_localmtime = current_time;
5791 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5792 		ndcp->c_flags |= CN_UPDATED;
5793 		mutex_exit(&ndcp->c_statelock);
5794 	}
5795 
5796 out:
5797 	if (commit) {
5798 		/* commit the log entry */
5799 		if (cachefs_dlog_commit(fscp, commit, error)) {
5800 			/*EMPTY*/
5801 			/* XXX bob: fix on panic */
5802 		}
5803 	}
5804 
5805 	if (odcp != ndcp)
5806 		rw_exit(&ndcp->c_rwlock);
5807 	rw_exit(&odcp->c_rwlock);
5808 
5809 	VN_RELE(revp);
5810 
5811 	return (error);
5812 }
5813 
5814 /*ARGSUSED*/
5815 static int
5816 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5817     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5818 {
5819 	cnode_t *dcp = VTOC(dvp);
5820 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5821 	cachefscache_t *cachep = fscp->fs_cache;
5822 	int error = 0;
5823 	int held = 0;
5824 	int connected = 0;
5825 
5826 #ifdef CFSDEBUG
5827 	CFS_DEBUG(CFSDEBUG_VOPS)
5828 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5829 #endif
5830 
5831 	if (getzoneid() != GLOBAL_ZONEID) {
5832 		error = EPERM;
5833 		goto out;
5834 	}
5835 
5836 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5837 		ASSERT(dcp->c_flags & CN_NOCACHE);
5838 
5839 	/*
5840 	 * Cachefs only provides pass-through support for NFSv4,
5841 	 * and all vnode operations are passed through to the
5842 	 * back file system. For NFSv4 pass-through to work, only
5843 	 * connected operation is supported, the cnode backvp must
5844 	 * exist, and cachefs optional (eg., disconnectable) flags
5845 	 * are turned off. Assert these conditions to ensure that
5846 	 * the backfilesystem is called for the mkdir operation.
5847 	 */
5848 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5849 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5850 
5851 	for (;;) {
5852 		/* get (or renew) access to the file system */
5853 		if (held) {
5854 			/* Won't loop with NFSv4 connected behavior */
5855 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5856 			rw_exit(&dcp->c_rwlock);
5857 			cachefs_cd_release(fscp);
5858 			held = 0;
5859 		}
5860 		error = cachefs_cd_access(fscp, connected, 1);
5861 		if (error)
5862 			break;
5863 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5864 		held = 1;
5865 
5866 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5867 			error = cachefs_mkdir_connected(dvp, nm, vap,
5868 				vpp, cr);
5869 			if (CFS_TIMEOUT(fscp, error)) {
5870 				rw_exit(&dcp->c_rwlock);
5871 				cachefs_cd_release(fscp);
5872 				held = 0;
5873 				cachefs_cd_timedout(fscp);
5874 				connected = 0;
5875 				continue;
5876 			}
5877 		} else {
5878 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5879 				vpp, cr);
5880 			if (CFS_TIMEOUT(fscp, error)) {
5881 				connected = 1;
5882 				continue;
5883 			}
5884 		}
5885 		break;
5886 	}
5887 
5888 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5889 		fid_t *fidp = NULL;
5890 		ino64_t fileno = 0;
5891 		cnode_t *cp = NULL;
5892 		if (error == 0)
5893 			cp = VTOC(*vpp);
5894 
5895 		if (cp != NULL) {
5896 			fidp = &cp->c_metadata.md_cookie;
5897 			fileno = cp->c_id.cid_fileno;
5898 		}
5899 
5900 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5901 		    fidp, fileno, crgetuid(cr));
5902 	}
5903 
5904 	if (held) {
5905 		rw_exit(&dcp->c_rwlock);
5906 		cachefs_cd_release(fscp);
5907 	}
5908 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5909 		(void) cachefs_pack(dvp, nm, cr);
5910 
5911 #ifdef CFS_CD_DEBUG
5912 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5913 #endif
5914 out:
5915 #ifdef CFSDEBUG
5916 	CFS_DEBUG(CFSDEBUG_VOPS)
5917 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5918 #endif
5919 	return (error);
5920 }
5921 
5922 static int
5923 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5924     vnode_t **vpp, cred_t *cr)
5925 {
5926 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5927 	struct vnode *vp = NULL;
5928 	int error = 0;
5929 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5930 	struct fid cookie;
5931 	struct vattr attr;
5932 	cfs_cid_t cid, dircid;
5933 	uint32_t valid_fid;
5934 
5935 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5936 		ASSERT(dcp->c_flags & CN_NOCACHE);
5937 
5938 	mutex_enter(&dcp->c_statelock);
5939 
5940 	/* get backvp of dir */
5941 	if (dcp->c_backvp == NULL) {
5942 		error = cachefs_getbackvp(fscp, dcp);
5943 		if (error) {
5944 			mutex_exit(&dcp->c_statelock);
5945 			goto out;
5946 		}
5947 	}
5948 
5949 	/* consistency check the directory */
5950 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5951 	if (error) {
5952 		mutex_exit(&dcp->c_statelock);
5953 		goto out;
5954 	}
5955 	dircid = dcp->c_id;
5956 
5957 	/* make the dir on the back fs */
5958 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5959 		("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5960 		"name %s\n", dcp, dcp->c_backvp, nm));
5961 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5962 	mutex_exit(&dcp->c_statelock);
5963 	if (error) {
5964 		goto out;
5965 	}
5966 
5967 	/* get the cookie and make the cnode */
5968 	attr.va_mask = AT_ALL;
5969 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5970 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5971 	if (error) {
5972 		goto out;
5973 	}
5974 	cid.cid_flags = 0;
5975 	cid.cid_fileno = attr.va_nodeid;
5976 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5977 					&attr, vp, cr, 0, &newcp);
5978 	if (error) {
5979 		goto out;
5980 	}
5981 	ASSERT(CTOV(newcp)->v_type == VDIR);
5982 	*vpp = CTOV(newcp);
5983 
5984 	/* if the dir is populated, add the new entry */
5985 	mutex_enter(&dcp->c_statelock);
5986 	if (CFS_ISFS_NONSHARED(fscp) &&
5987 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5988 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5989 		    SM_ASYNC);
5990 		if (error) {
5991 			cachefs_nocache(dcp);
5992 			error = 0;
5993 		}
5994 	}
5995 	dcp->c_attr.va_nlink++;
5996 	dcp->c_flags |= CN_UPDATED;
5997 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
5998 	mutex_exit(&dcp->c_statelock);
5999 
6000 	/* XXX bob: should we do a filldir here? or just add . and .. */
6001 	/* maybe should kick off an async filldir so caller does not wait */
6002 
6003 	/* put the entry in the dnlc */
6004 	if (cachefs_dnlc)
6005 		dnlc_enter(dvp, nm, *vpp);
6006 
6007 	/* save the fileno of the parent so can find the name */
6008 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6009 	    sizeof (cfs_cid_t)) != 0) {
6010 		mutex_enter(&newcp->c_statelock);
6011 		newcp->c_metadata.md_parent = dircid;
6012 		newcp->c_flags |= CN_UPDATED;
6013 		mutex_exit(&newcp->c_statelock);
6014 	}
6015 out:
6016 	if (vp)
6017 		VN_RELE(vp);
6018 
6019 	return (error);
6020 }
6021 
6022 static int
6023 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6024     vnode_t **vpp, cred_t *cr)
6025 {
6026 	cnode_t *dcp = VTOC(dvp);
6027 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6028 	int error;
6029 	cnode_t *newcp = NULL;
6030 	struct vattr va;
6031 	timestruc_t current_time;
6032 	off_t commit = 0;
6033 	char *s;
6034 	int namlen;
6035 
6036 	/* don't allow '/' characters in pathname component */
6037 	for (s = nm, namlen = 0; *s; s++, namlen++)
6038 		if (*s == '/')
6039 			return (EACCES);
6040 	if (namlen == 0)
6041 		return (EINVAL);
6042 
6043 	if (CFS_ISFS_WRITE_AROUND(fscp))
6044 		return (ETIMEDOUT);
6045 
6046 	mutex_enter(&dcp->c_statelock);
6047 
6048 	/* check permissions */
6049 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6050 		mutex_exit(&dcp->c_statelock);
6051 		goto out;
6052 	}
6053 
6054 	/* the directory front file must be populated */
6055 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6056 		error = ETIMEDOUT;
6057 		mutex_exit(&dcp->c_statelock);
6058 		goto out;
6059 	}
6060 
6061 	/* make sure nm does not already exist in the directory */
6062 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6063 	if (error == ENOTDIR) {
6064 		error = ETIMEDOUT;
6065 		mutex_exit(&dcp->c_statelock);
6066 		goto out;
6067 	}
6068 	if (error != ENOENT) {
6069 		error = EEXIST;
6070 		mutex_exit(&dcp->c_statelock);
6071 		goto out;
6072 	}
6073 
6074 	/* make up a reasonable set of attributes */
6075 	cachefs_attr_setup(vap, &va, dcp, cr);
6076 	va.va_type = VDIR;
6077 	va.va_mode |= S_IFDIR;
6078 	va.va_nlink = 2;
6079 
6080 	mutex_exit(&dcp->c_statelock);
6081 
6082 	/* create the cnode */
6083 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6084 	if (error)
6085 		goto out;
6086 
6087 	mutex_enter(&newcp->c_statelock);
6088 
6089 	error = cachefs_dlog_cidmap(fscp);
6090 	if (error) {
6091 		mutex_exit(&newcp->c_statelock);
6092 		goto out;
6093 	}
6094 
6095 	cachefs_creategid(dcp, newcp, vap, cr);
6096 	mutex_enter(&dcp->c_statelock);
6097 	cachefs_createacl(dcp, newcp);
6098 	mutex_exit(&dcp->c_statelock);
6099 	gethrestime(&current_time);
6100 	newcp->c_metadata.md_vattr.va_atime = current_time;
6101 	newcp->c_metadata.md_localctime = current_time;
6102 	newcp->c_metadata.md_localmtime = current_time;
6103 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6104 	    MD_LOCALCTIME;
6105 	newcp->c_flags |= CN_UPDATED;
6106 
6107 	/* make a front file for the new directory, add . and .. */
6108 	error = cachefs_dir_new(dcp, newcp);
6109 	if (error) {
6110 		mutex_exit(&newcp->c_statelock);
6111 		goto out;
6112 	}
6113 	cachefs_modified(newcp);
6114 
6115 	/*
6116 	 * write the metadata now rather than waiting until
6117 	 * inactive so that if there's no space we can let
6118 	 * the caller know.
6119 	 */
6120 	ASSERT(newcp->c_frontvp);
6121 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6122 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6123 	error = filegrp_write_metadata(newcp->c_filegrp,
6124 	    &newcp->c_id, &newcp->c_metadata);
6125 	if (error) {
6126 		mutex_exit(&newcp->c_statelock);
6127 		goto out;
6128 	}
6129 	mutex_exit(&newcp->c_statelock);
6130 
6131 	/* log the operation */
6132 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6133 	if (commit == 0) {
6134 		error = ENOSPC;
6135 		goto out;
6136 	}
6137 
6138 	mutex_enter(&dcp->c_statelock);
6139 
6140 	/* make sure directory is still populated */
6141 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6142 		mutex_exit(&dcp->c_statelock);
6143 		error = ETIMEDOUT;
6144 		goto out;
6145 	}
6146 	cachefs_modified(dcp);
6147 
6148 	/* enter the new file in the directory */
6149 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6150 		&newcp->c_id, SM_ASYNC);
6151 	if (error) {
6152 		mutex_exit(&dcp->c_statelock);
6153 		goto out;
6154 	}
6155 
6156 	/* update parent dir times */
6157 	dcp->c_metadata.md_localctime = current_time;
6158 	dcp->c_metadata.md_localmtime = current_time;
6159 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6160 	dcp->c_attr.va_nlink++;
6161 	dcp->c_flags |= CN_UPDATED;
6162 	mutex_exit(&dcp->c_statelock);
6163 
6164 out:
6165 	if (commit) {
6166 		/* commit the log entry */
6167 		if (cachefs_dlog_commit(fscp, commit, error)) {
6168 			/*EMPTY*/
6169 			/* XXX bob: fix on panic */
6170 		}
6171 	}
6172 	if (error) {
6173 		if (newcp) {
6174 			mutex_enter(&newcp->c_statelock);
6175 			newcp->c_flags |= CN_DESTROY;
6176 			mutex_exit(&newcp->c_statelock);
6177 			VN_RELE(CTOV(newcp));
6178 		}
6179 	} else {
6180 		*vpp = CTOV(newcp);
6181 	}
6182 	return (error);
6183 }
6184 
6185 /*ARGSUSED*/
6186 static int
6187 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6188     caller_context_t *ct, int flags)
6189 {
6190 	cnode_t *dcp = VTOC(dvp);
6191 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6192 	cachefscache_t *cachep = fscp->fs_cache;
6193 	int error = 0;
6194 	int held = 0;
6195 	int connected = 0;
6196 	size_t namlen;
6197 	vnode_t *vp = NULL;
6198 	int vfslock = 0;
6199 
6200 #ifdef CFSDEBUG
6201 	CFS_DEBUG(CFSDEBUG_VOPS)
6202 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6203 #endif
6204 
6205 	if (getzoneid() != GLOBAL_ZONEID) {
6206 		error = EPERM;
6207 		goto out;
6208 	}
6209 
6210 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6211 		ASSERT(dcp->c_flags & CN_NOCACHE);
6212 
6213 	/*
6214 	 * Cachefs only provides pass-through support for NFSv4,
6215 	 * and all vnode operations are passed through to the
6216 	 * back file system. For NFSv4 pass-through to work, only
6217 	 * connected operation is supported, the cnode backvp must
6218 	 * exist, and cachefs optional (eg., disconnectable) flags
6219 	 * are turned off. Assert these conditions to ensure that
6220 	 * the backfilesystem is called for the rmdir operation.
6221 	 */
6222 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6223 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6224 
6225 	for (;;) {
6226 		if (vfslock) {
6227 			vn_vfsunlock(vp);
6228 			vfslock = 0;
6229 		}
6230 		if (vp) {
6231 			VN_RELE(vp);
6232 			vp = NULL;
6233 		}
6234 
6235 		/* get (or renew) access to the file system */
6236 		if (held) {
6237 			/* Won't loop with NFSv4 connected behavior */
6238 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6239 			cachefs_cd_release(fscp);
6240 			held = 0;
6241 		}
6242 		error = cachefs_cd_access(fscp, connected, 1);
6243 		if (error)
6244 			break;
6245 		held = 1;
6246 
6247 		/* if disconnected, do some extra error checking */
6248 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6249 			/* check permissions */
6250 			mutex_enter(&dcp->c_statelock);
6251 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6252 			mutex_exit(&dcp->c_statelock);
6253 			if (CFS_TIMEOUT(fscp, error)) {
6254 				connected = 1;
6255 				continue;
6256 			}
6257 			if (error)
6258 				break;
6259 
6260 			namlen = strlen(nm);
6261 			if (namlen == 0) {
6262 				error = EINVAL;
6263 				break;
6264 			}
6265 
6266 			/* cannot remove . and .. */
6267 			if (nm[0] == '.') {
6268 				if (namlen == 1) {
6269 					error = EINVAL;
6270 					break;
6271 				} else if (namlen == 2 && nm[1] == '.') {
6272 					error = EEXIST;
6273 					break;
6274 				}
6275 			}
6276 
6277 		}
6278 
6279 		/* get the cnode of the dir to remove */
6280 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6281 		if (error) {
6282 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6283 				if (CFS_TIMEOUT(fscp, error)) {
6284 					cachefs_cd_release(fscp);
6285 					held = 0;
6286 					cachefs_cd_timedout(fscp);
6287 					connected = 0;
6288 					continue;
6289 				}
6290 			} else {
6291 				if (CFS_TIMEOUT(fscp, error)) {
6292 					connected = 1;
6293 					continue;
6294 				}
6295 			}
6296 			break;
6297 		}
6298 
6299 		/* must be a dir */
6300 		if (vp->v_type != VDIR) {
6301 			error = ENOTDIR;
6302 			break;
6303 		}
6304 
6305 		/* must not be current dir */
6306 		if (VOP_CMP(vp, cdir, ct)) {
6307 			error = EINVAL;
6308 			break;
6309 		}
6310 
6311 		/* see ufs_dirremove for why this is done, mount race */
6312 		if (vn_vfswlock(vp)) {
6313 			error = EBUSY;
6314 			break;
6315 		}
6316 		vfslock = 1;
6317 		if (vn_mountedvfs(vp) != NULL) {
6318 			error = EBUSY;
6319 			break;
6320 		}
6321 
6322 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6323 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6324 				cr, vp);
6325 			if (CFS_TIMEOUT(fscp, error)) {
6326 				cachefs_cd_release(fscp);
6327 				held = 0;
6328 				cachefs_cd_timedout(fscp);
6329 				connected = 0;
6330 				continue;
6331 			}
6332 		} else {
6333 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6334 				cr, vp);
6335 			if (CFS_TIMEOUT(fscp, error)) {
6336 				connected = 1;
6337 				continue;
6338 			}
6339 		}
6340 		break;
6341 	}
6342 
6343 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6344 		ino64_t fileno = 0;
6345 		fid_t *fidp = NULL;
6346 		cnode_t *cp = NULL;
6347 		if (vp)
6348 			cp = VTOC(vp);
6349 
6350 		if (cp != NULL) {
6351 			fidp = &cp->c_metadata.md_cookie;
6352 			fileno = cp->c_id.cid_fileno;
6353 		}
6354 
6355 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6356 		    fidp, fileno, crgetuid(cr));
6357 	}
6358 
6359 	if (held) {
6360 		cachefs_cd_release(fscp);
6361 	}
6362 
6363 	if (vfslock)
6364 		vn_vfsunlock(vp);
6365 
6366 	if (vp)
6367 		VN_RELE(vp);
6368 
6369 #ifdef CFS_CD_DEBUG
6370 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6371 #endif
6372 out:
6373 #ifdef CFSDEBUG
6374 	CFS_DEBUG(CFSDEBUG_VOPS)
6375 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6376 #endif
6377 
6378 	return (error);
6379 }
6380 
6381 static int
6382 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6383     vnode_t *vp)
6384 {
6385 	cnode_t *dcp = VTOC(dvp);
6386 	cnode_t *cp = VTOC(vp);
6387 	int error = 0;
6388 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6389 
6390 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6391 	mutex_enter(&dcp->c_statelock);
6392 	mutex_enter(&cp->c_statelock);
6393 
6394 	if (dcp->c_backvp == NULL) {
6395 		error = cachefs_getbackvp(fscp, dcp);
6396 		if (error) {
6397 			goto out;
6398 		}
6399 	}
6400 
6401 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6402 	if (error)
6403 		goto out;
6404 
6405 	/* rmdir on the back fs */
6406 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6407 		("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6408 		"name %s\n", dcp, dcp->c_backvp, nm));
6409 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6410 	if (error)
6411 		goto out;
6412 
6413 	/* if the dir is populated, remove the entry from it */
6414 	if (CFS_ISFS_NONSHARED(fscp) &&
6415 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6416 		error = cachefs_dir_rmentry(dcp, nm);
6417 		if (error) {
6418 			cachefs_nocache(dcp);
6419 			error = 0;
6420 		}
6421 	}
6422 
6423 	/*
6424 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6425 	 * flag on the cnode. The cached object will then be destroyed
6426 	 * at inactive time where the chickens come home to roost :-)
6427 	 * The link cnt for directories is bumped down by 2 'cause the "."
6428 	 * entry has to be elided too ! The link cnt for the parent goes down
6429 	 * by 1 (because of "..").
6430 	 */
6431 	cp->c_attr.va_nlink -= 2;
6432 	dcp->c_attr.va_nlink--;
6433 	if (cp->c_attr.va_nlink == 0) {
6434 		cp->c_flags |= CN_DESTROY;
6435 	} else {
6436 		cp->c_flags |= CN_UPDATED;
6437 	}
6438 	dcp->c_flags |= CN_UPDATED;
6439 
6440 	dnlc_purge_vp(vp);
6441 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6442 
6443 out:
6444 	mutex_exit(&cp->c_statelock);
6445 	mutex_exit(&dcp->c_statelock);
6446 	rw_exit(&dcp->c_rwlock);
6447 
6448 	return (error);
6449 }
6450 
6451 static int
6452 /*ARGSUSED*/
6453 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6454     cred_t *cr, vnode_t *vp)
6455 {
6456 	cnode_t *dcp = VTOC(dvp);
6457 	cnode_t *cp = VTOC(vp);
6458 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6459 	int error = 0;
6460 	off_t commit = 0;
6461 	timestruc_t current_time;
6462 
6463 	if (CFS_ISFS_WRITE_AROUND(fscp))
6464 		return (ETIMEDOUT);
6465 
6466 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6467 	mutex_enter(&dcp->c_statelock);
6468 	mutex_enter(&cp->c_statelock);
6469 
6470 	/* both directories must be populated */
6471 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6472 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6473 		error = ETIMEDOUT;
6474 		goto out;
6475 	}
6476 
6477 	/* if sticky bit set on the dir, more access checks to perform */
6478 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6479 		goto out;
6480 	}
6481 
6482 	/* make sure dir is empty */
6483 	if (cp->c_attr.va_nlink > 2) {
6484 		error = cachefs_dir_empty(cp);
6485 		if (error) {
6486 			if (error == ENOTDIR)
6487 				error = ETIMEDOUT;
6488 			goto out;
6489 		}
6490 		cachefs_modified(cp);
6491 	}
6492 	cachefs_modified(dcp);
6493 
6494 	/* log the operation */
6495 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6496 	if (commit == 0) {
6497 		error = ENOSPC;
6498 		goto out;
6499 	}
6500 
6501 	/* remove name from parent dir */
6502 	error = cachefs_dir_rmentry(dcp, nm);
6503 	if (error == ENOTDIR) {
6504 		error = ETIMEDOUT;
6505 		goto out;
6506 	}
6507 	if (error)
6508 		goto out;
6509 
6510 	gethrestime(&current_time);
6511 
6512 	/* update deleted dir values */
6513 	cp->c_attr.va_nlink -= 2;
6514 	if (cp->c_attr.va_nlink == 0)
6515 		cp->c_flags |= CN_DESTROY;
6516 	else {
6517 		cp->c_metadata.md_localctime = current_time;
6518 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6519 		cp->c_flags |= CN_UPDATED;
6520 	}
6521 
6522 	/* update parent values */
6523 	dcp->c_metadata.md_localctime = current_time;
6524 	dcp->c_metadata.md_localmtime = current_time;
6525 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6526 	dcp->c_attr.va_nlink--;
6527 	dcp->c_flags |= CN_UPDATED;
6528 
6529 out:
6530 	mutex_exit(&cp->c_statelock);
6531 	mutex_exit(&dcp->c_statelock);
6532 	rw_exit(&dcp->c_rwlock);
6533 	if (commit) {
6534 		/* commit the log entry */
6535 		if (cachefs_dlog_commit(fscp, commit, error)) {
6536 			/*EMPTY*/
6537 			/* XXX bob: fix on panic */
6538 		}
6539 		dnlc_purge_vp(vp);
6540 	}
6541 	return (error);
6542 }
6543 
6544 /*ARGSUSED*/
6545 static int
6546 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6547     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6548 {
6549 	cnode_t *dcp = VTOC(dvp);
6550 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6551 	cachefscache_t *cachep = fscp->fs_cache;
6552 	int error = 0;
6553 	int held = 0;
6554 	int connected = 0;
6555 
6556 #ifdef CFSDEBUG
6557 	CFS_DEBUG(CFSDEBUG_VOPS)
6558 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6559 		    (void *)dvp, lnm, tnm);
6560 #endif
6561 
6562 	if (getzoneid() != GLOBAL_ZONEID) {
6563 		error = EPERM;
6564 		goto out;
6565 	}
6566 
6567 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6568 		ASSERT(dcp->c_flags & CN_NOCACHE);
6569 
6570 	/*
6571 	 * Cachefs only provides pass-through support for NFSv4,
6572 	 * and all vnode operations are passed through to the
6573 	 * back file system. For NFSv4 pass-through to work, only
6574 	 * connected operation is supported, the cnode backvp must
6575 	 * exist, and cachefs optional (eg., disconnectable) flags
6576 	 * are turned off. Assert these conditions to ensure that
6577 	 * the backfilesystem is called for the symlink operation.
6578 	 */
6579 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6580 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6581 
6582 	for (;;) {
6583 		/* get (or renew) access to the file system */
6584 		if (held) {
6585 			/* Won't loop with NFSv4 connected behavior */
6586 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6587 			rw_exit(&dcp->c_rwlock);
6588 			cachefs_cd_release(fscp);
6589 			held = 0;
6590 		}
6591 		error = cachefs_cd_access(fscp, connected, 1);
6592 		if (error)
6593 			break;
6594 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6595 		held = 1;
6596 
6597 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6598 			error = cachefs_symlink_connected(dvp, lnm, tva,
6599 				tnm, cr);
6600 			if (CFS_TIMEOUT(fscp, error)) {
6601 				rw_exit(&dcp->c_rwlock);
6602 				cachefs_cd_release(fscp);
6603 				held = 0;
6604 				cachefs_cd_timedout(fscp);
6605 				connected = 0;
6606 				continue;
6607 			}
6608 		} else {
6609 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6610 				tnm, cr);
6611 			if (CFS_TIMEOUT(fscp, error)) {
6612 				connected = 1;
6613 				continue;
6614 			}
6615 		}
6616 		break;
6617 	}
6618 
6619 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6620 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6621 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6622 		    crgetuid(cr), (uint_t)strlen(tnm));
6623 
6624 	if (held) {
6625 		rw_exit(&dcp->c_rwlock);
6626 		cachefs_cd_release(fscp);
6627 	}
6628 
6629 #ifdef CFS_CD_DEBUG
6630 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6631 #endif
6632 out:
6633 #ifdef CFSDEBUG
6634 	CFS_DEBUG(CFSDEBUG_VOPS)
6635 		printf("cachefs_symlink: EXIT error = %d\n", error);
6636 #endif
6637 	return (error);
6638 }
6639 
6640 static int
6641 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6642     char *tnm, cred_t *cr)
6643 {
6644 	cnode_t *dcp = VTOC(dvp);
6645 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6646 	int error = 0;
6647 	vnode_t *backvp = NULL;
6648 	cnode_t *newcp = NULL;
6649 	struct vattr va;
6650 	struct fid cookie;
6651 	cfs_cid_t cid;
6652 	uint32_t valid_fid;
6653 
6654 	mutex_enter(&dcp->c_statelock);
6655 
6656 	if (dcp->c_backvp == NULL) {
6657 		error = cachefs_getbackvp(fscp, dcp);
6658 		if (error) {
6659 			cachefs_nocache(dcp);
6660 			mutex_exit(&dcp->c_statelock);
6661 			goto out;
6662 		}
6663 	}
6664 
6665 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6666 	if (error) {
6667 		mutex_exit(&dcp->c_statelock);
6668 		goto out;
6669 	}
6670 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6671 		("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6672 		"lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6673 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6674 	if (error) {
6675 		mutex_exit(&dcp->c_statelock);
6676 		goto out;
6677 	}
6678 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6679 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6680 		cachefs_nocache(dcp);
6681 		mutex_exit(&dcp->c_statelock);
6682 		goto out;
6683 	}
6684 
6685 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6686 
6687 	/* lookup the symlink we just created and get its fid and attrs */
6688 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6689 	    NULL, NULL, NULL);
6690 	if (backvp == NULL) {
6691 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6692 			cachefs_nocache(dcp);
6693 		mutex_exit(&dcp->c_statelock);
6694 		goto out;
6695 	}
6696 
6697 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6698 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6699 	if (error) {
6700 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6701 		error = 0;
6702 		cachefs_nocache(dcp);
6703 		mutex_exit(&dcp->c_statelock);
6704 		goto out;
6705 	}
6706 	cid.cid_fileno = va.va_nodeid;
6707 	cid.cid_flags = 0;
6708 
6709 	/* if the dir is cached, add the symlink to it */
6710 	if (CFS_ISFS_NONSHARED(fscp) &&
6711 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6712 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6713 		if (error) {
6714 			cachefs_nocache(dcp);
6715 			error = 0;
6716 		}
6717 	}
6718 	mutex_exit(&dcp->c_statelock);
6719 
6720 	/* make the cnode for the sym link */
6721 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6722 						&va, backvp, cr, 0, &newcp);
6723 	if (error) {
6724 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6725 		cachefs_nocache(dcp);
6726 		error = 0;
6727 		goto out;
6728 	}
6729 
6730 	/* try to cache the symlink contents */
6731 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6732 	mutex_enter(&newcp->c_statelock);
6733 
6734 	/*
6735 	 * try to cache the sym link, note that its a noop if NOCACHE
6736 	 * or NFSv4 is set
6737 	 */
6738 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6739 	if (error) {
6740 		cachefs_nocache(newcp);
6741 		error = 0;
6742 	}
6743 	mutex_exit(&newcp->c_statelock);
6744 	rw_exit(&newcp->c_rwlock);
6745 
6746 out:
6747 	if (backvp)
6748 		VN_RELE(backvp);
6749 	if (newcp)
6750 		VN_RELE(CTOV(newcp));
6751 	return (error);
6752 }
6753 
6754 static int
6755 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6756     char *tnm, cred_t *cr)
6757 {
6758 	cnode_t *dcp = VTOC(dvp);
6759 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6760 	int error;
6761 	cnode_t *newcp = NULL;
6762 	struct vattr va;
6763 	timestruc_t current_time;
6764 	off_t commit = 0;
6765 
6766 	if (CFS_ISFS_WRITE_AROUND(fscp))
6767 		return (ETIMEDOUT);
6768 
6769 	mutex_enter(&dcp->c_statelock);
6770 
6771 	/* check permissions */
6772 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6773 		mutex_exit(&dcp->c_statelock);
6774 		goto out;
6775 	}
6776 
6777 	/* the directory front file must be populated */
6778 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6779 		error = ETIMEDOUT;
6780 		mutex_exit(&dcp->c_statelock);
6781 		goto out;
6782 	}
6783 
6784 	/* make sure lnm does not already exist in the directory */
6785 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6786 	if (error == ENOTDIR) {
6787 		error = ETIMEDOUT;
6788 		mutex_exit(&dcp->c_statelock);
6789 		goto out;
6790 	}
6791 	if (error != ENOENT) {
6792 		error = EEXIST;
6793 		mutex_exit(&dcp->c_statelock);
6794 		goto out;
6795 	}
6796 
6797 	/* make up a reasonable set of attributes */
6798 	cachefs_attr_setup(tva, &va, dcp, cr);
6799 	va.va_type = VLNK;
6800 	va.va_mode |= S_IFLNK;
6801 	va.va_size = strlen(tnm);
6802 
6803 	mutex_exit(&dcp->c_statelock);
6804 
6805 	/* create the cnode */
6806 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6807 	if (error)
6808 		goto out;
6809 
6810 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6811 	mutex_enter(&newcp->c_statelock);
6812 
6813 	error = cachefs_dlog_cidmap(fscp);
6814 	if (error) {
6815 		mutex_exit(&newcp->c_statelock);
6816 		rw_exit(&newcp->c_rwlock);
6817 		error = ENOSPC;
6818 		goto out;
6819 	}
6820 
6821 	cachefs_creategid(dcp, newcp, tva, cr);
6822 	mutex_enter(&dcp->c_statelock);
6823 	cachefs_createacl(dcp, newcp);
6824 	mutex_exit(&dcp->c_statelock);
6825 	gethrestime(&current_time);
6826 	newcp->c_metadata.md_vattr.va_atime = current_time;
6827 	newcp->c_metadata.md_localctime = current_time;
6828 	newcp->c_metadata.md_localmtime = current_time;
6829 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6830 	    MD_LOCALCTIME;
6831 	newcp->c_flags |= CN_UPDATED;
6832 
6833 	/* log the operation */
6834 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6835 	if (commit == 0) {
6836 		mutex_exit(&newcp->c_statelock);
6837 		rw_exit(&newcp->c_rwlock);
6838 		error = ENOSPC;
6839 		goto out;
6840 	}
6841 
6842 	/* store the symlink contents */
6843 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6844 	if (error) {
6845 		mutex_exit(&newcp->c_statelock);
6846 		rw_exit(&newcp->c_rwlock);
6847 		goto out;
6848 	}
6849 	if (cachefs_modified_alloc(newcp)) {
6850 		mutex_exit(&newcp->c_statelock);
6851 		rw_exit(&newcp->c_rwlock);
6852 		error = ENOSPC;
6853 		goto out;
6854 	}
6855 
6856 	/*
6857 	 * write the metadata now rather than waiting until
6858 	 * inactive so that if there's no space we can let
6859 	 * the caller know.
6860 	 */
6861 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6862 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6863 			(void) filegrp_allocattr(newcp->c_filegrp);
6864 		}
6865 		error = filegrp_create_metadata(newcp->c_filegrp,
6866 		    &newcp->c_metadata, &newcp->c_id);
6867 		if (error) {
6868 			mutex_exit(&newcp->c_statelock);
6869 			rw_exit(&newcp->c_rwlock);
6870 			goto out;
6871 		}
6872 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6873 	}
6874 	error = filegrp_write_metadata(newcp->c_filegrp,
6875 	    &newcp->c_id, &newcp->c_metadata);
6876 	if (error) {
6877 		mutex_exit(&newcp->c_statelock);
6878 		rw_exit(&newcp->c_rwlock);
6879 		goto out;
6880 	}
6881 	mutex_exit(&newcp->c_statelock);
6882 	rw_exit(&newcp->c_rwlock);
6883 
6884 	mutex_enter(&dcp->c_statelock);
6885 
6886 	/* enter the new file in the directory */
6887 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6888 		error = ETIMEDOUT;
6889 		mutex_exit(&dcp->c_statelock);
6890 		goto out;
6891 	}
6892 	cachefs_modified(dcp);
6893 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6894 		&newcp->c_id, SM_ASYNC);
6895 	if (error) {
6896 		mutex_exit(&dcp->c_statelock);
6897 		goto out;
6898 	}
6899 
6900 	/* update parent dir times */
6901 	dcp->c_metadata.md_localctime = current_time;
6902 	dcp->c_metadata.md_localmtime = current_time;
6903 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6904 	dcp->c_flags |= CN_UPDATED;
6905 	mutex_exit(&dcp->c_statelock);
6906 
6907 out:
6908 	if (commit) {
6909 		/* commit the log entry */
6910 		if (cachefs_dlog_commit(fscp, commit, error)) {
6911 			/*EMPTY*/
6912 			/* XXX bob: fix on panic */
6913 		}
6914 	}
6915 
6916 	if (error) {
6917 		if (newcp) {
6918 			mutex_enter(&newcp->c_statelock);
6919 			newcp->c_flags |= CN_DESTROY;
6920 			mutex_exit(&newcp->c_statelock);
6921 		}
6922 	}
6923 	if (newcp) {
6924 		VN_RELE(CTOV(newcp));
6925 	}
6926 
6927 	return (error);
6928 }
6929 
6930 /*ARGSUSED*/
6931 static int
6932 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6933     caller_context_t *ct, int flags)
6934 {
6935 	cnode_t *dcp = VTOC(vp);
6936 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6937 	cachefscache_t *cachep = fscp->fs_cache;
6938 	int error = 0;
6939 	int held = 0;
6940 	int connected = 0;
6941 
6942 #ifdef CFSDEBUG
6943 	CFS_DEBUG(CFSDEBUG_VOPS)
6944 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6945 #endif
6946 	if (getzoneid() != GLOBAL_ZONEID) {
6947 		error = EPERM;
6948 		goto out;
6949 	}
6950 
6951 	/*
6952 	 * Cachefs only provides pass-through support for NFSv4,
6953 	 * and all vnode operations are passed through to the
6954 	 * back file system. For NFSv4 pass-through to work, only
6955 	 * connected operation is supported, the cnode backvp must
6956 	 * exist, and cachefs optional (eg., disconnectable) flags
6957 	 * are turned off. Assert these conditions to ensure that
6958 	 * the backfilesystem is called for the readdir operation.
6959 	 */
6960 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6961 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6962 
6963 	for (;;) {
6964 		/* get (or renew) access to the file system */
6965 		if (held) {
6966 			/* Won't loop with NFSv4 connected behavior */
6967 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6968 			rw_exit(&dcp->c_rwlock);
6969 			cachefs_cd_release(fscp);
6970 			held = 0;
6971 		}
6972 		error = cachefs_cd_access(fscp, connected, 0);
6973 		if (error)
6974 			break;
6975 		rw_enter(&dcp->c_rwlock, RW_READER);
6976 		held = 1;
6977 
6978 		/* quit if link count of zero (posix) */
6979 		if (dcp->c_attr.va_nlink == 0) {
6980 			if (eofp)
6981 				*eofp = 1;
6982 			error = 0;
6983 			break;
6984 		}
6985 
6986 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6987 			error = cachefs_readdir_connected(vp, uiop, cr,
6988 			    eofp);
6989 			if (CFS_TIMEOUT(fscp, error)) {
6990 				rw_exit(&dcp->c_rwlock);
6991 				cachefs_cd_release(fscp);
6992 				held = 0;
6993 				cachefs_cd_timedout(fscp);
6994 				connected = 0;
6995 				continue;
6996 			}
6997 		} else {
6998 			error = cachefs_readdir_disconnected(vp, uiop, cr,
6999 			    eofp);
7000 			if (CFS_TIMEOUT(fscp, error)) {
7001 				if (cachefs_cd_access_miss(fscp)) {
7002 					error = cachefs_readdir_connected(vp,
7003 					    uiop, cr, eofp);
7004 					if (!CFS_TIMEOUT(fscp, error))
7005 						break;
7006 					delay(5*hz);
7007 					connected = 0;
7008 					continue;
7009 				}
7010 				connected = 1;
7011 				continue;
7012 			}
7013 		}
7014 		break;
7015 	}
7016 
7017 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7018 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7019 		&dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7020 		crgetuid(cr), uiop->uio_loffset, *eofp);
7021 
7022 	if (held) {
7023 		rw_exit(&dcp->c_rwlock);
7024 		cachefs_cd_release(fscp);
7025 	}
7026 
7027 #ifdef CFS_CD_DEBUG
7028 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7029 #endif
7030 out:
7031 #ifdef CFSDEBUG
7032 	CFS_DEBUG(CFSDEBUG_VOPS)
7033 		printf("cachefs_readdir: EXIT error = %d\n", error);
7034 #endif
7035 
7036 	return (error);
7037 }
7038 
7039 static int
7040 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7041 {
7042 	cnode_t *dcp = VTOC(vp);
7043 	int error;
7044 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7045 	struct cachefs_req *rp;
7046 
7047 	mutex_enter(&dcp->c_statelock);
7048 
7049 	/* check directory consistency */
7050 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7051 	if (error)
7052 		goto out;
7053 	dcp->c_usage++;
7054 
7055 	/* if dir was modified, toss old contents */
7056 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7057 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7058 		cachefs_inval_object(dcp);
7059 	}
7060 
7061 	error = 0;
7062 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7063 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7064 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7065 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7066 
7067 		if (cachefs_async_okay()) {
7068 
7069 			/*
7070 			 * Set up asynchronous request to fill this
7071 			 * directory.
7072 			 */
7073 
7074 			dcp->c_flags |= CN_ASYNC_POPULATE;
7075 
7076 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7077 			rp->cfs_cmd = CFS_POPULATE;
7078 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7079 			rp->cfs_cr = cr;
7080 
7081 			crhold(cr);
7082 			VN_HOLD(vp);
7083 
7084 			cachefs_addqueue(rp, &fscp->fs_workq);
7085 		} else {
7086 			error = cachefs_dir_fill(dcp, cr);
7087 			if (error != 0)
7088 				cachefs_nocache(dcp);
7089 		}
7090 	}
7091 
7092 	/* if front file is populated */
7093 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7094 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7095 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7096 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7097 		error = cachefs_dir_read(dcp, uiop, eofp);
7098 		if (error == 0)
7099 			fscp->fs_stats.st_hits++;
7100 	}
7101 
7102 	/* if front file could not be used */
7103 	if ((error != 0) ||
7104 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7105 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7106 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7107 
7108 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7109 			!CFS_ISFS_BACKFS_NFSV4(fscp))
7110 			cachefs_nocache(dcp);
7111 
7112 		/* get the back vp */
7113 		if (dcp->c_backvp == NULL) {
7114 			error = cachefs_getbackvp(fscp, dcp);
7115 			if (error)
7116 				goto out;
7117 		}
7118 
7119 		if (fscp->fs_inum_size > 0) {
7120 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7121 		} else {
7122 			/* do the dir read from the back fs */
7123 			(void) VOP_RWLOCK(dcp->c_backvp,
7124 						V_WRITELOCK_FALSE, NULL);
7125 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7126 				("cachefs_readdir (nfsv4): "
7127 				"dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7128 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7129 			    NULL, 0);
7130 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7131 		}
7132 
7133 		if (error == 0)
7134 			fscp->fs_stats.st_misses++;
7135 	}
7136 
7137 out:
7138 	mutex_exit(&dcp->c_statelock);
7139 
7140 	return (error);
7141 }
7142 
7143 static int
7144 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7145 {
7146 	int error = 0;
7147 	fscache_t *fscp = C_TO_FSCACHE(cp);
7148 	caddr_t buffy = NULL;
7149 	int buffysize = MAXBSIZE;
7150 	caddr_t chrp, end;
7151 	ino64_t newinum;
7152 	struct dirent64 *de;
7153 	uio_t uioin;
7154 	iovec_t iov;
7155 
7156 	ASSERT(cp->c_backvp != NULL);
7157 	ASSERT(fscp->fs_inum_size > 0);
7158 
7159 	if (uiop->uio_resid < buffysize)
7160 		buffysize = (int)uiop->uio_resid;
7161 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7162 
7163 	iov.iov_base = buffy;
7164 	iov.iov_len = buffysize;
7165 	uioin.uio_iov = &iov;
7166 	uioin.uio_iovcnt = 1;
7167 	uioin.uio_segflg = UIO_SYSSPACE;
7168 	uioin.uio_fmode = 0;
7169 	uioin.uio_extflg = UIO_COPY_CACHED;
7170 	uioin.uio_loffset = uiop->uio_loffset;
7171 	uioin.uio_resid = buffysize;
7172 
7173 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7174 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7175 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7176 
7177 	if (error != 0)
7178 		goto out;
7179 
7180 	end = buffy + buffysize - uioin.uio_resid;
7181 
7182 	mutex_exit(&cp->c_statelock);
7183 	mutex_enter(&fscp->fs_fslock);
7184 
7185 
7186 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7187 		de = (dirent64_t *)chrp;
7188 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7189 		if (newinum == 0)
7190 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7191 		de->d_ino = newinum;
7192 	}
7193 	mutex_exit(&fscp->fs_fslock);
7194 	mutex_enter(&cp->c_statelock);
7195 
7196 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7197 	uiop->uio_loffset = uioin.uio_loffset;
7198 
7199 out:
7200 
7201 	if (buffy != NULL)
7202 		cachefs_kmem_free(buffy, buffysize);
7203 
7204 	return (error);
7205 }
7206 
7207 static int
7208 /*ARGSUSED*/
7209 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7210     int *eofp)
7211 {
7212 	cnode_t *dcp = VTOC(vp);
7213 	int error;
7214 
7215 	mutex_enter(&dcp->c_statelock);
7216 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7217 		error = ETIMEDOUT;
7218 	} else {
7219 		error = cachefs_dir_read(dcp, uiop, eofp);
7220 		if (error == ENOTDIR)
7221 			error = ETIMEDOUT;
7222 	}
7223 	mutex_exit(&dcp->c_statelock);
7224 
7225 	return (error);
7226 }
7227 
7228 /*ARGSUSED*/
7229 static int
7230 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7231 {
7232 	int error = 0;
7233 	struct cnode *cp = VTOC(vp);
7234 	fscache_t *fscp = C_TO_FSCACHE(cp);
7235 
7236 	/*
7237 	 * Cachefs only provides pass-through support for NFSv4,
7238 	 * and all vnode operations are passed through to the
7239 	 * back file system. For NFSv4 pass-through to work, only
7240 	 * connected operation is supported, the cnode backvp must
7241 	 * exist, and cachefs optional (eg., disconnectable) flags
7242 	 * are turned off. Assert these conditions, then bail
7243 	 * as  NFSv4 doesn't support VOP_FID.
7244 	 */
7245 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7246 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7247 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7248 		return (ENOTSUP);
7249 	}
7250 
7251 	mutex_enter(&cp->c_statelock);
7252 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7253 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7254 		error = ENOSPC;
7255 	} else {
7256 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7257 		    cp->c_metadata.md_cookie.fid_len);
7258 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7259 	}
7260 	mutex_exit(&cp->c_statelock);
7261 	return (error);
7262 }
7263 
7264 /* ARGSUSED2 */
7265 static int
7266 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7267 {
7268 	cnode_t *cp = VTOC(vp);
7269 
7270 	/*
7271 	 * XXX - This is ifdef'ed out for now. The problem -
7272 	 * getdents() acquires the read version of rwlock, then we come
7273 	 * into cachefs_readdir() and that wants to acquire the write version
7274 	 * of this lock (if its going to populate the directory). This is
7275 	 * a problem, this can be solved by introducing another lock in the
7276 	 * cnode.
7277 	 */
7278 /* XXX */
7279 	if (vp->v_type != VREG)
7280 		return (-1);
7281 	if (write_lock)
7282 		rw_enter(&cp->c_rwlock, RW_WRITER);
7283 	else
7284 		rw_enter(&cp->c_rwlock, RW_READER);
7285 	return (write_lock);
7286 }
7287 
7288 /* ARGSUSED */
7289 static void
7290 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7291 {
7292 	cnode_t *cp = VTOC(vp);
7293 	if (vp->v_type != VREG)
7294 		return;
7295 	rw_exit(&cp->c_rwlock);
7296 }
7297 
7298 /* ARGSUSED */
7299 static int
7300 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7301     caller_context_t *ct)
7302 {
7303 	return (0);
7304 }
7305 
7306 static int cachefs_lostpage = 0;
7307 /*
7308  * Return all the pages from [off..off+len] in file
7309  */
7310 /*ARGSUSED*/
7311 static int
7312 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7313 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7314 	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7315 {
7316 	cnode_t *cp = VTOC(vp);
7317 	int error;
7318 	fscache_t *fscp = C_TO_FSCACHE(cp);
7319 	cachefscache_t *cachep = fscp->fs_cache;
7320 	int held = 0;
7321 	int connected = 0;
7322 
7323 #ifdef CFSDEBUG
7324 	u_offset_t offx = (u_offset_t)off;
7325 
7326 	CFS_DEBUG(CFSDEBUG_VOPS)
7327 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7328 		    (void *)vp, offx, len, rw);
7329 #endif
7330 	if (getzoneid() != GLOBAL_ZONEID) {
7331 		error = EPERM;
7332 		goto out;
7333 	}
7334 
7335 	if (vp->v_flag & VNOMAP) {
7336 		error = ENOSYS;
7337 		goto out;
7338 	}
7339 
7340 	/* Call backfilesystem if NFSv4 */
7341 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7342 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7343 						plsz, seg, addr, rw, cr);
7344 		goto out;
7345 	}
7346 
7347 	/* XXX sam: make this do an async populate? */
7348 	if (pl == NULL) {
7349 		error = 0;
7350 		goto out;
7351 	}
7352 	if (protp != NULL)
7353 		*protp = PROT_ALL;
7354 
7355 	for (;;) {
7356 		/* get (or renew) access to the file system */
7357 		if (held) {
7358 			cachefs_cd_release(fscp);
7359 			held = 0;
7360 		}
7361 		error = cachefs_cd_access(fscp, connected, 0);
7362 		if (error)
7363 			break;
7364 		held = 1;
7365 
7366 		/*
7367 		 * If we are getting called as a side effect of a
7368 		 * cachefs_write()
7369 		 * operation the local file size might not be extended yet.
7370 		 * In this case we want to be able to return pages of zeroes.
7371 		 */
7372 		if ((u_offset_t)off + len >
7373 			((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7374 			if (seg != segkmap) {
7375 				error = EFAULT;
7376 				break;
7377 			}
7378 		}
7379 		if (len <= PAGESIZE)
7380 			error = cachefs_getapage(vp, (u_offset_t)off, len,
7381 			    protp, pl, plsz, seg, addr, rw, cr);
7382 		else
7383 			error = pvn_getpages(cachefs_getapage, vp,
7384 			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7385 			    rw, cr);
7386 		if (error == 0)
7387 			break;
7388 
7389 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7390 		    error == EAGAIN) {
7391 			connected = 0;
7392 			continue;
7393 		}
7394 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7395 			if (CFS_TIMEOUT(fscp, error)) {
7396 				cachefs_cd_release(fscp);
7397 				held = 0;
7398 				cachefs_cd_timedout(fscp);
7399 				connected = 0;
7400 				continue;
7401 			}
7402 		} else {
7403 			if (CFS_TIMEOUT(fscp, error)) {
7404 				if (cachefs_cd_access_miss(fscp)) {
7405 					if (len <= PAGESIZE)
7406 						error = cachefs_getapage_back(
7407 						    vp, (u_offset_t)off,
7408 						    len, protp, pl,
7409 						    plsz, seg, addr, rw, cr);
7410 					else
7411 						error = pvn_getpages(
7412 						    cachefs_getapage_back, vp,
7413 						    (u_offset_t)off, len,
7414 						    protp, pl,
7415 						    plsz, seg, addr, rw, cr);
7416 					if (!CFS_TIMEOUT(fscp, error) &&
7417 					    (error != EAGAIN))
7418 						break;
7419 					delay(5*hz);
7420 					connected = 0;
7421 					continue;
7422 				}
7423 				connected = 1;
7424 				continue;
7425 			}
7426 		}
7427 		break;
7428 	}
7429 
7430 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7431 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7432 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7433 		    crgetuid(cr), off, len);
7434 
7435 	if (held) {
7436 		cachefs_cd_release(fscp);
7437 	}
7438 
7439 out:
7440 #ifdef CFS_CD_DEBUG
7441 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7442 #endif
7443 #ifdef CFSDEBUG
7444 	CFS_DEBUG(CFSDEBUG_VOPS)
7445 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7446 		    (void *)vp, error);
7447 #endif
7448 	return (error);
7449 }
7450 
7451 /*
7452  * cachefs_getpage_backfs_nfsv4
7453  *
7454  * Call NFSv4 back filesystem to handle the getpage (cachefs
7455  * pass-through support for NFSv4).
7456  */
7457 static int
7458 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7459 			uint_t *protp, struct page *pl[], size_t plsz,
7460 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7461 			cred_t *cr)
7462 {
7463 	cnode_t *cp = VTOC(vp);
7464 	fscache_t *fscp = C_TO_FSCACHE(cp);
7465 	vnode_t *backvp;
7466 	int error;
7467 
7468 	/*
7469 	 * For NFSv4 pass-through to work, only connected operation is
7470 	 * supported, the cnode backvp must exist, and cachefs optional
7471 	 * (eg., disconnectable) flags are turned off. Assert these
7472 	 * conditions for the getpage operation.
7473 	 */
7474 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7475 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7476 
7477 	/* Call backfs vnode op after extracting backvp */
7478 	mutex_enter(&cp->c_statelock);
7479 	backvp = cp->c_backvp;
7480 	mutex_exit(&cp->c_statelock);
7481 
7482 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7483 		("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7484 		cp, backvp));
7485 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7486 				    addr, rw, cr, NULL);
7487 
7488 	return (error);
7489 }
7490 
7491 /*
7492  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7493  */
7494 /*ARGSUSED*/
7495 static int
7496 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7497 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7498 	enum seg_rw rw, cred_t *cr)
7499 {
7500 	cnode_t *cp = VTOC(vp);
7501 	page_t **ppp, *pp = NULL;
7502 	fscache_t *fscp = C_TO_FSCACHE(cp);
7503 	cachefscache_t *cachep = fscp->fs_cache;
7504 	int error = 0;
7505 	struct page **ourpl;
7506 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7507 	int index = 0;
7508 	int downgrade;
7509 	int have_statelock = 0;
7510 	u_offset_t popoff;
7511 	size_t popsize = 0;
7512 
7513 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7514 
7515 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7516 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7517 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7518 	else
7519 		ourpl = ourstackpl;
7520 
7521 	ourpl[0] = NULL;
7522 	off = off & (offset_t)PAGEMASK;
7523 again:
7524 	/*
7525 	 * Look for the page
7526 	 */
7527 	if (page_exists(vp, off) == 0) {
7528 		/*
7529 		 * Need to do work to get the page.
7530 		 * Grab our lock because we are going to
7531 		 * modify the state of the cnode.
7532 		 */
7533 		if (! have_statelock) {
7534 			mutex_enter(&cp->c_statelock);
7535 			have_statelock = 1;
7536 		}
7537 		/*
7538 		 * If we're in NOCACHE mode, we will need a backvp
7539 		 */
7540 		if (cp->c_flags & CN_NOCACHE) {
7541 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7542 				error = ETIMEDOUT;
7543 				goto out;
7544 			}
7545 			if (cp->c_backvp == NULL) {
7546 				error = cachefs_getbackvp(fscp, cp);
7547 				if (error)
7548 					goto out;
7549 			}
7550 			error = VOP_GETPAGE(cp->c_backvp, off,
7551 					PAGESIZE, protp, ourpl, PAGESIZE, seg,
7552 					addr, S_READ, cr, NULL);
7553 			/*
7554 			 * backfs returns EFAULT when we are trying for a
7555 			 * page beyond EOF but cachefs has the knowledge that
7556 			 * it is not beyond EOF be cause cp->c_size is
7557 			 * greater then the offset requested.
7558 			 */
7559 			if (error == EFAULT) {
7560 				error = 0;
7561 				pp = page_create_va(vp, off, PAGESIZE,
7562 				    PG_EXCL | PG_WAIT, seg, addr);
7563 				if (pp == NULL)
7564 					goto again;
7565 				pagezero(pp, 0, PAGESIZE);
7566 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7567 				goto out;
7568 			}
7569 			if (error)
7570 				goto out;
7571 			goto getpages;
7572 		}
7573 		/*
7574 		 * We need a front file. If we can't get it,
7575 		 * put the cnode in NOCACHE mode and try again.
7576 		 */
7577 		if (cp->c_frontvp == NULL) {
7578 			error = cachefs_getfrontfile(cp);
7579 			if (error) {
7580 				cachefs_nocache(cp);
7581 				error = EAGAIN;
7582 				goto out;
7583 			}
7584 		}
7585 		/*
7586 		 * Check if the front file needs population.
7587 		 * If population is necessary, make sure we have a
7588 		 * backvp as well. We will get the page from the backvp.
7589 		 * bug 4152459-
7590 		 * But if the file system is in disconnected mode
7591 		 * and the file is a local file then do not check the
7592 		 * allocmap.
7593 		 */
7594 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7595 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7596 		    (cachefs_check_allocmap(cp, off) == 0)) {
7597 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7598 				error = ETIMEDOUT;
7599 				goto out;
7600 			}
7601 			if (cp->c_backvp == NULL) {
7602 				error = cachefs_getbackvp(fscp, cp);
7603 				if (error)
7604 					goto out;
7605 			}
7606 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7607 				cachefs_cluster_allocmap(off, &popoff,
7608 				    &popsize,
7609 				    fscp->fs_info.fi_popsize, cp);
7610 				if (popsize != 0) {
7611 					error = cachefs_populate(cp,
7612 					    popoff, popsize,
7613 					    cp->c_frontvp, cp->c_backvp,
7614 					    cp->c_size, cr);
7615 					if (error) {
7616 						cachefs_nocache(cp);
7617 						error = EAGAIN;
7618 						goto out;
7619 					} else {
7620 						cp->c_flags |=
7621 						    CN_UPDATED |
7622 						    CN_NEED_FRONT_SYNC |
7623 						    CN_POPULATION_PENDING;
7624 					}
7625 					popsize = popsize - (off - popoff);
7626 				} else {
7627 					popsize = PAGESIZE;
7628 				}
7629 			}
7630 			/* else XXX assert CN_NOCACHE? */
7631 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7632 					PAGESIZE, protp, ourpl, popsize,
7633 					seg, addr, S_READ, cr, NULL);
7634 			if (error)
7635 				goto out;
7636 			fscp->fs_stats.st_misses++;
7637 		} else {
7638 			if (cp->c_flags & CN_POPULATION_PENDING) {
7639 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7640 				    NULL);
7641 				cp->c_flags &= ~CN_POPULATION_PENDING;
7642 				if (error) {
7643 					cachefs_nocache(cp);
7644 					error = EAGAIN;
7645 					goto out;
7646 				}
7647 			}
7648 			/*
7649 			 * File was populated so we get the page from the
7650 			 * frontvp
7651 			 */
7652 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7653 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7654 			    rw, cr, NULL);
7655 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7656 				cachefs_log_gpfront(cachep, error,
7657 				    fscp->fs_cfsvfsp,
7658 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7659 				    crgetuid(cr), off, PAGESIZE);
7660 			if (error) {
7661 				cachefs_nocache(cp);
7662 				error = EAGAIN;
7663 				goto out;
7664 			}
7665 			fscp->fs_stats.st_hits++;
7666 		}
7667 getpages:
7668 		ASSERT(have_statelock);
7669 		if (have_statelock) {
7670 			mutex_exit(&cp->c_statelock);
7671 			have_statelock = 0;
7672 		}
7673 		downgrade = 0;
7674 		for (ppp = ourpl; *ppp; ppp++) {
7675 			if ((*ppp)->p_offset < off) {
7676 				index++;
7677 				page_unlock(*ppp);
7678 				continue;
7679 			}
7680 			if (PAGE_SHARED(*ppp)) {
7681 				if (page_tryupgrade(*ppp) == 0) {
7682 					for (ppp = &ourpl[index]; *ppp; ppp++)
7683 						page_unlock(*ppp);
7684 					error = EAGAIN;
7685 					goto out;
7686 				}
7687 				downgrade = 1;
7688 			}
7689 			ASSERT(PAGE_EXCL(*ppp));
7690 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7691 			page_rename(*ppp, vp, (*ppp)->p_offset);
7692 		}
7693 		pl[0] = ourpl[index];
7694 		pl[1] = NULL;
7695 		if (downgrade) {
7696 			page_downgrade(ourpl[index]);
7697 		}
7698 		/* Unlock the rest of the pages from the cluster */
7699 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7700 			page_unlock(*ppp);
7701 	} else {
7702 		ASSERT(! have_statelock);
7703 		if (have_statelock) {
7704 			mutex_exit(&cp->c_statelock);
7705 			have_statelock = 0;
7706 		}
7707 		/* XXX SE_SHARED probably isn't what we *always* want */
7708 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7709 			cachefs_lostpage++;
7710 			goto again;
7711 		}
7712 		pl[0] = pp;
7713 		pl[1] = NULL;
7714 		/* XXX increment st_hits?  i don't think so, but... */
7715 	}
7716 
7717 out:
7718 	if (have_statelock) {
7719 		mutex_exit(&cp->c_statelock);
7720 		have_statelock = 0;
7721 	}
7722 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7723 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7724 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7725 	return (error);
7726 }
7727 
7728 /* gets a page but only from the back fs */
7729 /*ARGSUSED*/
7730 static int
7731 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7732     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7733     caddr_t addr, enum seg_rw rw, cred_t *cr)
7734 {
7735 	cnode_t *cp = VTOC(vp);
7736 	page_t **ppp, *pp = NULL;
7737 	fscache_t *fscp = C_TO_FSCACHE(cp);
7738 	int error = 0;
7739 	struct page *ourpl[17];
7740 	int index = 0;
7741 	int have_statelock = 0;
7742 	int downgrade;
7743 
7744 	/*
7745 	 * Grab the cnode statelock so the cnode state won't change
7746 	 * while we're in here.
7747 	 */
7748 	ourpl[0] = NULL;
7749 	off = off & (offset_t)PAGEMASK;
7750 again:
7751 	if (page_exists(vp, off) == 0) {
7752 		if (! have_statelock) {
7753 			mutex_enter(&cp->c_statelock);
7754 			have_statelock = 1;
7755 		}
7756 
7757 		if (cp->c_backvp == NULL) {
7758 			error = cachefs_getbackvp(fscp, cp);
7759 			if (error)
7760 				goto out;
7761 		}
7762 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7763 			PAGESIZE, protp, ourpl, PAGESIZE, seg,
7764 			addr, S_READ, cr, NULL);
7765 		if (error)
7766 			goto out;
7767 
7768 		if (have_statelock) {
7769 			mutex_exit(&cp->c_statelock);
7770 			have_statelock = 0;
7771 		}
7772 		downgrade = 0;
7773 		for (ppp = ourpl; *ppp; ppp++) {
7774 			if ((*ppp)->p_offset < off) {
7775 				index++;
7776 				page_unlock(*ppp);
7777 				continue;
7778 			}
7779 			if (PAGE_SHARED(*ppp)) {
7780 				if (page_tryupgrade(*ppp) == 0) {
7781 					for (ppp = &ourpl[index]; *ppp; ppp++)
7782 						page_unlock(*ppp);
7783 					error = EAGAIN;
7784 					goto out;
7785 				}
7786 				downgrade = 1;
7787 			}
7788 			ASSERT(PAGE_EXCL(*ppp));
7789 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7790 			page_rename(*ppp, vp, (*ppp)->p_offset);
7791 		}
7792 		pl[0] = ourpl[index];
7793 		pl[1] = NULL;
7794 		if (downgrade) {
7795 			page_downgrade(ourpl[index]);
7796 		}
7797 		/* Unlock the rest of the pages from the cluster */
7798 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7799 			page_unlock(*ppp);
7800 	} else {
7801 		ASSERT(! have_statelock);
7802 		if (have_statelock) {
7803 			mutex_exit(&cp->c_statelock);
7804 			have_statelock = 0;
7805 		}
7806 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7807 			cachefs_lostpage++;
7808 			goto again;
7809 		}
7810 		pl[0] = pp;
7811 		pl[1] = NULL;
7812 	}
7813 
7814 out:
7815 	if (have_statelock) {
7816 		mutex_exit(&cp->c_statelock);
7817 		have_statelock = 0;
7818 	}
7819 	return (error);
7820 }
7821 
7822 /*ARGSUSED*/
7823 static int
7824 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7825     caller_context_t *ct)
7826 {
7827 	cnode_t *cp = VTOC(vp);
7828 	int error = 0;
7829 	fscache_t *fscp = C_TO_FSCACHE(cp);
7830 	int held = 0;
7831 	int connected = 0;
7832 
7833 	if (getzoneid() != GLOBAL_ZONEID)
7834 		return (EPERM);
7835 
7836 	/* Call backfilesytem if NFSv4 */
7837 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7838 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7839 		goto out;
7840 	}
7841 
7842 	for (;;) {
7843 		/* get (or renew) access to the file system */
7844 		if (held) {
7845 			cachefs_cd_release(fscp);
7846 			held = 0;
7847 		}
7848 		error = cachefs_cd_access(fscp, connected, 1);
7849 		if (error)
7850 			break;
7851 		held = 1;
7852 
7853 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7854 		if (error == 0)
7855 			break;
7856 
7857 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7858 			if (CFS_TIMEOUT(fscp, error)) {
7859 				cachefs_cd_release(fscp);
7860 				held = 0;
7861 				cachefs_cd_timedout(fscp);
7862 				connected = 0;
7863 				continue;
7864 			}
7865 		} else {
7866 			if (NOMEMWAIT()) {
7867 				error = 0;
7868 				goto out;
7869 			}
7870 			if (CFS_TIMEOUT(fscp, error)) {
7871 				connected = 1;
7872 				continue;
7873 			}
7874 		}
7875 		break;
7876 	}
7877 
7878 out:
7879 
7880 	if (held) {
7881 		cachefs_cd_release(fscp);
7882 	}
7883 
7884 #ifdef CFS_CD_DEBUG
7885 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7886 #endif
7887 	return (error);
7888 }
7889 
7890 /*
7891  * cachefs_putpage_backfs_nfsv4
7892  *
7893  * Call NFSv4 back filesystem to handle the putpage (cachefs
7894  * pass-through support for NFSv4).
7895  */
7896 static int
7897 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7898 			cred_t *cr)
7899 {
7900 	cnode_t *cp = VTOC(vp);
7901 	fscache_t *fscp = C_TO_FSCACHE(cp);
7902 	vnode_t *backvp;
7903 	int error;
7904 
7905 	/*
7906 	 * For NFSv4 pass-through to work, only connected operation is
7907 	 * supported, the cnode backvp must exist, and cachefs optional
7908 	 * (eg., disconnectable) flags are turned off. Assert these
7909 	 * conditions for the putpage operation.
7910 	 */
7911 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7912 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7913 
7914 	/* Call backfs vnode op after extracting backvp */
7915 	mutex_enter(&cp->c_statelock);
7916 	backvp = cp->c_backvp;
7917 	mutex_exit(&cp->c_statelock);
7918 
7919 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7920 		("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7921 		cp, backvp));
7922 	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7923 
7924 	return (error);
7925 }
7926 
7927 /*
7928  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7929  * If len == 0, do from off to EOF.
7930  *
7931  * The normal cases should be len == 0 & off == 0 (entire vp list),
7932  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7933  * (from pageout).
7934  */
7935 
7936 /*ARGSUSED*/
7937 int
7938 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7939     int flags, cred_t *cr)
7940 {
7941 	struct cnode *cp  = VTOC(vp);
7942 	struct page *pp;
7943 	size_t io_len;
7944 	u_offset_t eoff, io_off;
7945 	int error = 0;
7946 	fscache_t *fscp = C_TO_FSCACHE(cp);
7947 	cachefscache_t *cachep = fscp->fs_cache;
7948 
7949 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7950 		return (0);
7951 	}
7952 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7953 	    (flags & B_INVAL) == 0))
7954 		return (0);
7955 
7956 	/*
7957 	 * Should never have cached data for the cachefs vnode
7958 	 * if NFSv4 is in use.
7959 	 */
7960 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7961 
7962 	/*
7963 	 * If this is an async putpage let a thread handle it.
7964 	 */
7965 	if (flags & B_ASYNC) {
7966 		struct cachefs_req *rp;
7967 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7968 
7969 		if (ttoproc(curthread) == proc_pageout) {
7970 			/*
7971 			 * If this is the page daemon we
7972 			 * do the push synchronously (Dangerous!) and hope
7973 			 * we can free enough to keep running...
7974 			 */
7975 			flags &= ~B_ASYNC;
7976 			goto again;
7977 		}
7978 
7979 		if (! cachefs_async_okay()) {
7980 
7981 			/*
7982 			 * this is somewhat like NFS's behavior.  keep
7983 			 * the system from thrashing.  we've seen
7984 			 * cases where async queues get out of
7985 			 * control, especially if
7986 			 * madvise(MADV_SEQUENTIAL) is done on a large
7987 			 * mmap()ed file that is read sequentially.
7988 			 */
7989 
7990 			flags &= ~B_ASYNC;
7991 			goto again;
7992 		}
7993 
7994 		/*
7995 		 * if no flags other than B_ASYNC were set,
7996 		 * we coalesce putpage requests into a single one for the
7997 		 * whole file (len = off = 0).  If such a request is
7998 		 * already queued, we're done.
7999 		 *
8000 		 * If there are other flags set (e.g., B_INVAL), we don't
8001 		 * attempt to coalesce and we use the specified length and
8002 		 * offset.
8003 		 */
8004 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8005 		mutex_enter(&cp->c_iomutex);
8006 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8007 			rp->cfs_cmd = CFS_PUTPAGE;
8008 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8009 			if (tflags == 0) {
8010 				off = len = 0;
8011 				cp->c_ioflags |= CIO_PUTPAGES;
8012 			}
8013 			rp->cfs_req_u.cu_putpage.cp_off = off;
8014 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8015 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8016 			rp->cfs_cr = cr;
8017 			crhold(rp->cfs_cr);
8018 			VN_HOLD(vp);
8019 			cp->c_nio++;
8020 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8021 		} else {
8022 			kmem_cache_free(cachefs_req_cache, rp);
8023 		}
8024 
8025 		mutex_exit(&cp->c_iomutex);
8026 		return (0);
8027 	}
8028 
8029 
8030 again:
8031 	if (len == 0) {
8032 		/*
8033 		 * Search the entire vp list for pages >= off
8034 		 */
8035 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8036 	} else {
8037 		/*
8038 		 * Do a range from [off...off + len] looking for pages
8039 		 * to deal with.
8040 		 */
8041 		eoff = (u_offset_t)off + len;
8042 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8043 			io_off += io_len) {
8044 			/*
8045 			 * If we are not invalidating, synchronously
8046 			 * freeing or writing pages use the routine
8047 			 * page_lookup_nowait() to prevent reclaiming
8048 			 * them from the free list.
8049 			 */
8050 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8051 				pp = page_lookup(vp, io_off,
8052 					(flags & (B_INVAL | B_FREE)) ?
8053 					    SE_EXCL : SE_SHARED);
8054 			} else {
8055 				/* XXX this looks like dead code */
8056 				pp = page_lookup_nowait(vp, io_off,
8057 					(flags & B_FREE) ? SE_EXCL : SE_SHARED);
8058 			}
8059 
8060 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8061 				io_len = PAGESIZE;
8062 			else {
8063 				error = cachefs_push(vp, pp, &io_off,
8064 					&io_len, flags, cr);
8065 				if (error != 0)
8066 					break;
8067 				/*
8068 				 * "io_off" and "io_len" are returned as
8069 				 * the range of pages we actually wrote.
8070 				 * This allows us to skip ahead more quickly
8071 				 * since several pages may've been dealt
8072 				 * with by this iteration of the loop.
8073 				 */
8074 			}
8075 		}
8076 	}
8077 
8078 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8079 		cp->c_flags &= ~CDIRTY;
8080 	}
8081 
8082 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8083 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8084 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8085 		    crgetuid(cr), off, len);
8086 
8087 	return (error);
8088 
8089 }
8090 
8091 /*ARGSUSED*/
8092 static int
8093 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8094     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8095     caller_context_t *ct)
8096 {
8097 	cnode_t *cp = VTOC(vp);
8098 	fscache_t *fscp = C_TO_FSCACHE(cp);
8099 	struct segvn_crargs vn_a;
8100 	int error;
8101 	int held = 0;
8102 	int writing;
8103 	int connected = 0;
8104 
8105 #ifdef CFSDEBUG
8106 	u_offset_t offx = (u_offset_t)off;
8107 
8108 	CFS_DEBUG(CFSDEBUG_VOPS)
8109 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8110 			(void *)vp, offx, len, flags);
8111 #endif
8112 	if (getzoneid() != GLOBAL_ZONEID) {
8113 		error = EPERM;
8114 		goto out;
8115 	}
8116 
8117 	if (vp->v_flag & VNOMAP) {
8118 		error = ENOSYS;
8119 		goto out;
8120 	}
8121 	if (off < 0 || (offset_t)(off + len) < 0) {
8122 		error = ENXIO;
8123 		goto out;
8124 	}
8125 	if (vp->v_type != VREG) {
8126 		error = ENODEV;
8127 		goto out;
8128 	}
8129 
8130 	/*
8131 	 * Check to see if the vnode is currently marked as not cachable.
8132 	 * If so, we have to refuse the map request as this violates the
8133 	 * don't cache attribute.
8134 	 */
8135 	if (vp->v_flag & VNOCACHE)
8136 		return (EAGAIN);
8137 
8138 #ifdef OBSOLETE
8139 	/*
8140 	 * If file is being locked, disallow mapping.
8141 	 */
8142 	if (vn_has_flocks(vp)) {
8143 		error = EAGAIN;
8144 		goto out;
8145 	}
8146 #endif
8147 
8148 	/* call backfilesystem if NFSv4 */
8149 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8150 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8151 						maxprot, flags, cr);
8152 		goto out;
8153 	}
8154 
8155 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8156 
8157 	for (;;) {
8158 		/* get (or renew) access to the file system */
8159 		if (held) {
8160 			cachefs_cd_release(fscp);
8161 			held = 0;
8162 		}
8163 		error = cachefs_cd_access(fscp, connected, writing);
8164 		if (error)
8165 			break;
8166 		held = 1;
8167 
8168 		if (writing) {
8169 			mutex_enter(&cp->c_statelock);
8170 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8171 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8172 					connected = 1;
8173 					continue;
8174 				} else {
8175 					cachefs_nocache(cp);
8176 				}
8177 			}
8178 
8179 			/*
8180 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8181 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8182 			 * not need to try to push out any pages.
8183 			 * This bit gets cleared when the cnode goes inactive.
8184 			 */
8185 			cp->c_flags |= CN_MAPWRITE;
8186 
8187 			mutex_exit(&cp->c_statelock);
8188 		}
8189 		break;
8190 	}
8191 
8192 	if (held) {
8193 		cachefs_cd_release(fscp);
8194 	}
8195 
8196 	as_rangelock(as);
8197 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8198 	if (error != 0) {
8199 		as_rangeunlock(as);
8200 		goto out;
8201 	}
8202 
8203 	/*
8204 	 * package up all the data passed in into a segvn_args struct and
8205 	 * call as_map with segvn_create function to create a new segment
8206 	 * in the address space.
8207 	 */
8208 	vn_a.vp = vp;
8209 	vn_a.offset = off;
8210 	vn_a.type = flags & MAP_TYPE;
8211 	vn_a.prot = (uchar_t)prot;
8212 	vn_a.maxprot = (uchar_t)maxprot;
8213 	vn_a.cred = cr;
8214 	vn_a.amp = NULL;
8215 	vn_a.flags = flags & ~MAP_TYPE;
8216 	vn_a.szc = 0;
8217 	vn_a.lgrp_mem_policy_flags = 0;
8218 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8219 	as_rangeunlock(as);
8220 out:
8221 
8222 #ifdef CFS_CD_DEBUG
8223 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8224 #endif
8225 #ifdef CFSDEBUG
8226 	CFS_DEBUG(CFSDEBUG_VOPS)
8227 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8228 #endif
8229 	return (error);
8230 }
8231 
8232 /*
8233  * cachefs_map_backfs_nfsv4
8234  *
8235  * Call NFSv4 back filesystem to handle the map (cachefs
8236  * pass-through support for NFSv4).
8237  */
8238 static int
8239 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8240 			caddr_t *addrp, size_t len, uchar_t prot,
8241 			uchar_t maxprot, uint_t flags, cred_t *cr)
8242 {
8243 	cnode_t *cp = VTOC(vp);
8244 	fscache_t *fscp = C_TO_FSCACHE(cp);
8245 	vnode_t *backvp;
8246 	int error;
8247 
8248 	/*
8249 	 * For NFSv4 pass-through to work, only connected operation is
8250 	 * supported, the cnode backvp must exist, and cachefs optional
8251 	 * (eg., disconnectable) flags are turned off. Assert these
8252 	 * conditions for the map operation.
8253 	 */
8254 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8255 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8256 
8257 	/* Call backfs vnode op after extracting backvp */
8258 	mutex_enter(&cp->c_statelock);
8259 	backvp = cp->c_backvp;
8260 	mutex_exit(&cp->c_statelock);
8261 
8262 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8263 		("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8264 		cp, backvp));
8265 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8266 	    NULL);
8267 
8268 	return (error);
8269 }
8270 
8271 /*ARGSUSED*/
8272 static int
8273 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8274     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8275     cred_t *cr, caller_context_t *ct)
8276 {
8277 	cnode_t *cp = VTOC(vp);
8278 	fscache_t *fscp = C_TO_FSCACHE(cp);
8279 
8280 	if (getzoneid() != GLOBAL_ZONEID)
8281 		return (EPERM);
8282 
8283 	if (vp->v_flag & VNOMAP)
8284 		return (ENOSYS);
8285 
8286 	/*
8287 	 * Check this is not an NFSv4 filesystem, as the mapping
8288 	 * is not done on the cachefs filesystem if NFSv4 is in
8289 	 * use.
8290 	 */
8291 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8292 
8293 	mutex_enter(&cp->c_statelock);
8294 	cp->c_mapcnt += btopr(len);
8295 	mutex_exit(&cp->c_statelock);
8296 	return (0);
8297 }
8298 
8299 /*ARGSUSED*/
8300 static int
8301 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8302 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8303 	cred_t *cr, caller_context_t *ct)
8304 {
8305 	cnode_t *cp = VTOC(vp);
8306 	fscache_t *fscp = C_TO_FSCACHE(cp);
8307 	int error;
8308 	int connected = 0;
8309 	int held = 0;
8310 
8311 	/*
8312 	 * The file may be passed in to (or inherited into) the zone, so we
8313 	 * need to let this operation go through since it happens as part of
8314 	 * exiting.
8315 	 */
8316 	if (vp->v_flag & VNOMAP)
8317 		return (ENOSYS);
8318 
8319 	/*
8320 	 * Check this is not an NFSv4 filesystem, as the mapping
8321 	 * is not done on the cachefs filesystem if NFSv4 is in
8322 	 * use.
8323 	 */
8324 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8325 
8326 	mutex_enter(&cp->c_statelock);
8327 	cp->c_mapcnt -= btopr(len);
8328 	ASSERT(cp->c_mapcnt >= 0);
8329 	mutex_exit(&cp->c_statelock);
8330 
8331 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8332 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8333 		return (0);
8334 
8335 	for (;;) {
8336 		/* get (or renew) access to the file system */
8337 		if (held) {
8338 			cachefs_cd_release(fscp);
8339 			held = 0;
8340 		}
8341 		error = cachefs_cd_access(fscp, connected, 1);
8342 		if (error)
8343 			break;
8344 		held = 1;
8345 		connected = 0;
8346 
8347 		error = cachefs_putpage_common(vp, (offset_t)0,
8348 		    (uint_t)0, 0, cr);
8349 		if (CFS_TIMEOUT(fscp, error)) {
8350 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8351 				cachefs_cd_release(fscp);
8352 				held = 0;
8353 				cachefs_cd_timedout(fscp);
8354 				continue;
8355 			} else {
8356 				connected = 1;
8357 				continue;
8358 			}
8359 		}
8360 
8361 		/* if no space left in cache, wait until connected */
8362 		if ((error == ENOSPC) &&
8363 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8364 			connected = 1;
8365 			continue;
8366 		}
8367 
8368 		mutex_enter(&cp->c_statelock);
8369 		if (!error)
8370 			error = cp->c_error;
8371 		cp->c_error = 0;
8372 		mutex_exit(&cp->c_statelock);
8373 		break;
8374 	}
8375 
8376 	if (held)
8377 		cachefs_cd_release(fscp);
8378 
8379 #ifdef CFS_CD_DEBUG
8380 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8381 #endif
8382 	return (error);
8383 }
8384 
8385 /* ARGSUSED */
8386 static int
8387 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8388 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8389 	caller_context_t *ct)
8390 {
8391 	struct cnode *cp = VTOC(vp);
8392 	int error;
8393 	struct fscache *fscp = C_TO_FSCACHE(cp);
8394 	vnode_t *backvp;
8395 	int held = 0;
8396 	int connected = 0;
8397 
8398 	if (getzoneid() != GLOBAL_ZONEID)
8399 		return (EPERM);
8400 
8401 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8402 		return (EINVAL);
8403 
8404 	/* Disallow locking of files that are currently mapped */
8405 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8406 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8407 		return (EAGAIN);
8408 	}
8409 
8410 	/*
8411 	 * Cachefs only provides pass-through support for NFSv4,
8412 	 * and all vnode operations are passed through to the
8413 	 * back file system. For NFSv4 pass-through to work, only
8414 	 * connected operation is supported, the cnode backvp must
8415 	 * exist, and cachefs optional (eg., disconnectable) flags
8416 	 * are turned off. Assert these conditions to ensure that
8417 	 * the backfilesystem is called for the frlock operation.
8418 	 */
8419 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8420 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8421 
8422 	/* XXX bob: nfs does a bunch more checks than we do */
8423 	if (CFS_ISFS_LLOCK(fscp)) {
8424 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8425 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8426 	}
8427 
8428 	for (;;) {
8429 		/* get (or renew) access to the file system */
8430 		if (held) {
8431 			/* Won't loop with NFSv4 connected behavior */
8432 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8433 			cachefs_cd_release(fscp);
8434 			held = 0;
8435 		}
8436 		error = cachefs_cd_access(fscp, connected, 0);
8437 		if (error)
8438 			break;
8439 		held = 1;
8440 
8441 		/* if not connected, quit or wait */
8442 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8443 			connected = 1;
8444 			continue;
8445 		}
8446 
8447 		/* nocache the file */
8448 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8449 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8450 			mutex_enter(&cp->c_statelock);
8451 			cachefs_nocache(cp);
8452 			mutex_exit(&cp->c_statelock);
8453 		}
8454 
8455 		/*
8456 		 * XXX bob: probably should do a consistency check
8457 		 * Pass arguments unchanged if NFSv4 is the backfs.
8458 		 */
8459 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8460 			bfp->l_start += cp->c_size;
8461 			bfp->l_whence = 0;
8462 		}
8463 
8464 		/* get the back vp */
8465 		mutex_enter(&cp->c_statelock);
8466 		if (cp->c_backvp == NULL) {
8467 			error = cachefs_getbackvp(fscp, cp);
8468 			if (error) {
8469 				mutex_exit(&cp->c_statelock);
8470 				break;
8471 			}
8472 		}
8473 		backvp = cp->c_backvp;
8474 		VN_HOLD(backvp);
8475 		mutex_exit(&cp->c_statelock);
8476 
8477 		/*
8478 		 * make sure we can flush currently dirty pages before
8479 		 * allowing the lock
8480 		 */
8481 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8482 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8483 			error = cachefs_putpage(
8484 			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8485 			if (error) {
8486 				error = ENOLCK;
8487 				VN_RELE(backvp);
8488 				break;
8489 			}
8490 		}
8491 
8492 		/* do lock on the back file */
8493 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8494 			("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8495 			cp, backvp));
8496 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8497 		    ct);
8498 		VN_RELE(backvp);
8499 		if (CFS_TIMEOUT(fscp, error)) {
8500 			connected = 1;
8501 			continue;
8502 		}
8503 		break;
8504 	}
8505 
8506 	if (held) {
8507 		cachefs_cd_release(fscp);
8508 	}
8509 
8510 	/*
8511 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8512 	 * cache does not give inconsistent results on locked files shared
8513 	 * between clients.  The VNOCACHE flag is never turned off as long
8514 	 * as the vnode is active because it is hard to figure out when the
8515 	 * last lock is gone.
8516 	 * XXX - what if some already has the vnode mapped in?
8517 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8518 	 */
8519 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8520 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8521 		vp->v_flag |= VNOCACHE;
8522 
8523 #ifdef CFS_CD_DEBUG
8524 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8525 #endif
8526 	return (error);
8527 }
8528 
8529 /*
8530  * Free storage space associated with the specified vnode.  The portion
8531  * to be freed is specified by bfp->l_start and bfp->l_len (already
8532  * normalized to a "whence" of 0).
8533  *
8534  * This is an experimental facility whose continued existence is not
8535  * guaranteed.  Currently, we only support the special case
8536  * of l_len == 0, meaning free to end of file.
8537  */
8538 /* ARGSUSED */
8539 static int
8540 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8541 	offset_t offset, cred_t *cr, caller_context_t *ct)
8542 {
8543 	cnode_t *cp = VTOC(vp);
8544 	fscache_t *fscp = C_TO_FSCACHE(cp);
8545 	int error;
8546 
8547 	ASSERT(vp->v_type == VREG);
8548 	if (getzoneid() != GLOBAL_ZONEID)
8549 		return (EPERM);
8550 	if (cmd != F_FREESP)
8551 		return (EINVAL);
8552 
8553 	/* call backfilesystem if NFSv4 */
8554 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8555 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8556 						offset, cr, ct);
8557 		goto out;
8558 	}
8559 
8560 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8561 		ASSERT(bfp->l_start >= 0);
8562 		if (bfp->l_len == 0) {
8563 			struct vattr va;
8564 
8565 			va.va_size = bfp->l_start;
8566 			va.va_mask = AT_SIZE;
8567 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8568 		} else
8569 			error = EINVAL;
8570 	}
8571 
8572 out:
8573 	return (error);
8574 }
8575 
8576 /*
8577  * cachefs_space_backfs_nfsv4
8578  *
8579  * Call NFSv4 back filesystem to handle the space (cachefs
8580  * pass-through support for NFSv4).
8581  */
8582 static int
8583 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8584 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8585 {
8586 	cnode_t *cp = VTOC(vp);
8587 	fscache_t *fscp = C_TO_FSCACHE(cp);
8588 	vnode_t *backvp;
8589 	int error;
8590 
8591 	/*
8592 	 * For NFSv4 pass-through to work, only connected operation is
8593 	 * supported, the cnode backvp must exist, and cachefs optional
8594 	 * (eg., disconnectable) flags are turned off. Assert these
8595 	 * conditions for the space operation.
8596 	 */
8597 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8598 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8599 
8600 	/* Call backfs vnode op after extracting backvp */
8601 	mutex_enter(&cp->c_statelock);
8602 	backvp = cp->c_backvp;
8603 	mutex_exit(&cp->c_statelock);
8604 
8605 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8606 		("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8607 		cp, backvp));
8608 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8609 
8610 	return (error);
8611 }
8612 
8613 /*ARGSUSED*/
8614 static int
8615 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8616 {
8617 	return (EINVAL);
8618 }
8619 
8620 /*ARGSUSED*/
8621 static int
8622 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8623 	int flags, cred_t *cr, caller_context_t *ct)
8624 {
8625 	return (ENOSYS);
8626 }
8627 
8628 static int
8629 cachefs_setsecattr_connected(cnode_t *cp,
8630     vsecattr_t *vsec, int flag, cred_t *cr)
8631 {
8632 	fscache_t *fscp = C_TO_FSCACHE(cp);
8633 	int error = 0;
8634 
8635 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8636 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8637 
8638 	mutex_enter(&cp->c_statelock);
8639 
8640 	if (cp->c_backvp == NULL) {
8641 		error = cachefs_getbackvp(fscp, cp);
8642 		if (error) {
8643 			cachefs_nocache(cp);
8644 			goto out;
8645 		}
8646 	}
8647 
8648 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8649 	if (error)
8650 		goto out;
8651 
8652 	/* only owner can set acl */
8653 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8654 		error = EINVAL;
8655 		goto out;
8656 	}
8657 
8658 
8659 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8660 		("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8661 		cp, cp->c_backvp));
8662 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8663 	if (error) {
8664 		goto out;
8665 	}
8666 
8667 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8668 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8669 		cachefs_nocache(cp);
8670 		goto out;
8671 	}
8672 
8673 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8674 
8675 	/* acl may have changed permissions -- handle this. */
8676 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8677 		cachefs_acl2perm(cp, vsec);
8678 
8679 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8680 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8681 		error = cachefs_cacheacl(cp, vsec);
8682 		if (error != 0) {
8683 #ifdef CFSDEBUG
8684 			CFS_DEBUG(CFSDEBUG_VOPS)
8685 				printf("cachefs_setacl: cacheacl: error %d\n",
8686 				    error);
8687 #endif /* CFSDEBUG */
8688 			error = 0;
8689 			cachefs_nocache(cp);
8690 		}
8691 	}
8692 
8693 out:
8694 	mutex_exit(&cp->c_statelock);
8695 
8696 	return (error);
8697 }
8698 
8699 static int
8700 cachefs_setsecattr_disconnected(cnode_t *cp,
8701     vsecattr_t *vsec, int flag, cred_t *cr)
8702 {
8703 	fscache_t *fscp = C_TO_FSCACHE(cp);
8704 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8705 	off_t commit = 0;
8706 	int error = 0;
8707 
8708 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8709 
8710 	if (CFS_ISFS_WRITE_AROUND(fscp))
8711 		return (ETIMEDOUT);
8712 
8713 	mutex_enter(&cp->c_statelock);
8714 
8715 	/* only owner can set acl */
8716 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8717 		error = EINVAL;
8718 		goto out;
8719 	}
8720 
8721 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8722 		error = ETIMEDOUT;
8723 		goto out;
8724 	}
8725 
8726 	/* XXX do i need this?  is this right? */
8727 	if (cp->c_flags & CN_ALLOC_PENDING) {
8728 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8729 			(void) filegrp_allocattr(cp->c_filegrp);
8730 		}
8731 		error = filegrp_create_metadata(cp->c_filegrp,
8732 		    &cp->c_metadata, &cp->c_id);
8733 		if (error) {
8734 			goto out;
8735 		}
8736 		cp->c_flags &= ~CN_ALLOC_PENDING;
8737 	}
8738 
8739 	/* XXX is this right? */
8740 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8741 		error = cachefs_dlog_cidmap(fscp);
8742 		if (error) {
8743 			error = ENOSPC;
8744 			goto out;
8745 		}
8746 		cp->c_metadata.md_flags |= MD_MAPPING;
8747 		cp->c_flags |= CN_UPDATED;
8748 	}
8749 
8750 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8751 	if (commit == 0)
8752 		goto out;
8753 
8754 	/* fix modes in metadata */
8755 	cachefs_acl2perm(cp, vsec);
8756 
8757 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8758 		error = cachefs_cacheacl(cp, vsec);
8759 		if (error != 0) {
8760 			goto out;
8761 		}
8762 	}
8763 
8764 	/* XXX is this right? */
8765 	if (cachefs_modified_alloc(cp)) {
8766 		error = ENOSPC;
8767 		goto out;
8768 	}
8769 
8770 out:
8771 	if (error != 0)
8772 		cp->c_metadata.md_vattr.va_mode = failmode;
8773 
8774 	mutex_exit(&cp->c_statelock);
8775 
8776 	if (commit) {
8777 		if (cachefs_dlog_commit(fscp, commit, error)) {
8778 			/*EMPTY*/
8779 			/* XXX fix on panic? */
8780 		}
8781 	}
8782 
8783 	return (error);
8784 }
8785 
8786 /*ARGSUSED*/
8787 static int
8788 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8789     caller_context_t *ct)
8790 {
8791 	cnode_t *cp = VTOC(vp);
8792 	fscache_t *fscp = C_TO_FSCACHE(cp);
8793 	int connected = 0;
8794 	int held = 0;
8795 	int error = 0;
8796 
8797 #ifdef CFSDEBUG
8798 	CFS_DEBUG(CFSDEBUG_VOPS)
8799 	    printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8800 #endif
8801 	if (getzoneid() != GLOBAL_ZONEID) {
8802 		error = EPERM;
8803 		goto out;
8804 	}
8805 
8806 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8807 		error = ENOSYS;
8808 		goto out;
8809 	}
8810 
8811 	if (! cachefs_vtype_aclok(vp)) {
8812 		error = EINVAL;
8813 		goto out;
8814 	}
8815 
8816 	/*
8817 	 * Cachefs only provides pass-through support for NFSv4,
8818 	 * and all vnode operations are passed through to the
8819 	 * back file system. For NFSv4 pass-through to work, only
8820 	 * connected operation is supported, the cnode backvp must
8821 	 * exist, and cachefs optional (eg., disconnectable) flags
8822 	 * are turned off. Assert these conditions to ensure that
8823 	 * the backfilesystem is called for the setsecattr operation.
8824 	 */
8825 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8826 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8827 
8828 	for (;;) {
8829 		/* drop hold on file system */
8830 		if (held) {
8831 			/* Won't loop with NFSv4 connected operation */
8832 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8833 			cachefs_cd_release(fscp);
8834 			held = 0;
8835 		}
8836 
8837 		/* acquire access to the file system */
8838 		error = cachefs_cd_access(fscp, connected, 1);
8839 		if (error)
8840 			break;
8841 		held = 1;
8842 
8843 		/* perform the setattr */
8844 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8845 			error = cachefs_setsecattr_connected(cp,
8846 			    vsec, flag, cr);
8847 		else
8848 			error = cachefs_setsecattr_disconnected(cp,
8849 			    vsec, flag, cr);
8850 		if (error) {
8851 			/* if connected */
8852 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8853 				if (CFS_TIMEOUT(fscp, error)) {
8854 					cachefs_cd_release(fscp);
8855 					held = 0;
8856 					cachefs_cd_timedout(fscp);
8857 					connected = 0;
8858 					continue;
8859 				}
8860 			}
8861 
8862 			/* else must be disconnected */
8863 			else {
8864 				if (CFS_TIMEOUT(fscp, error)) {
8865 					connected = 1;
8866 					continue;
8867 				}
8868 			}
8869 		}
8870 		break;
8871 	}
8872 
8873 	if (held) {
8874 		cachefs_cd_release(fscp);
8875 	}
8876 	return (error);
8877 
8878 out:
8879 #ifdef CFS_CD_DEBUG
8880 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8881 #endif
8882 
8883 #ifdef CFSDEBUG
8884 	CFS_DEBUG(CFSDEBUG_VOPS)
8885 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8886 #endif
8887 	return (error);
8888 }
8889 
8890 /*
8891  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8892  * sanitize the acl.
8893  */
8894 
8895 static void
8896 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8897 {
8898 	aclent_t *aclp;
8899 	int i;
8900 
8901 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8902 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8903 		switch (aclp->a_type) {
8904 		case USER_OBJ:
8905 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8906 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8907 			break;
8908 
8909 		case GROUP_OBJ:
8910 			cp->c_metadata.md_vattr.va_mode &= (~070);
8911 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8912 			break;
8913 
8914 		case OTHER_OBJ:
8915 			cp->c_metadata.md_vattr.va_mode &= (~07);
8916 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8917 			break;
8918 
8919 		case CLASS_OBJ:
8920 			cp->c_metadata.md_aclclass = aclp->a_perm;
8921 			break;
8922 		}
8923 	}
8924 
8925 	cp->c_flags |= CN_UPDATED;
8926 }
8927 
8928 static int
8929 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8930     caller_context_t *ct)
8931 {
8932 	cnode_t *cp = VTOC(vp);
8933 	fscache_t *fscp = C_TO_FSCACHE(cp);
8934 	int held = 0, connected = 0;
8935 	int error = 0;
8936 
8937 #ifdef CFSDEBUG
8938 	CFS_DEBUG(CFSDEBUG_VOPS)
8939 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8940 #endif
8941 
8942 	if (getzoneid() != GLOBAL_ZONEID) {
8943 		error = EPERM;
8944 		goto out;
8945 	}
8946 
8947 	/*
8948 	 * Cachefs only provides pass-through support for NFSv4,
8949 	 * and all vnode operations are passed through to the
8950 	 * back file system. For NFSv4 pass-through to work, only
8951 	 * connected operation is supported, the cnode backvp must
8952 	 * exist, and cachefs optional (eg., disconnectable) flags
8953 	 * are turned off. Assert these conditions to ensure that
8954 	 * the backfilesystem is called for the getsecattr operation.
8955 	 */
8956 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8957 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8958 
8959 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8960 		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8961 		goto out;
8962 	}
8963 
8964 	for (;;) {
8965 		if (held) {
8966 			/* Won't loop with NFSv4 connected behavior */
8967 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8968 			cachefs_cd_release(fscp);
8969 			held = 0;
8970 		}
8971 		error = cachefs_cd_access(fscp, connected, 0);
8972 		if (error)
8973 			break;
8974 		held = 1;
8975 
8976 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8977 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8978 			    cr);
8979 			if (CFS_TIMEOUT(fscp, error)) {
8980 				cachefs_cd_release(fscp);
8981 				held = 0;
8982 				cachefs_cd_timedout(fscp);
8983 				connected = 0;
8984 				continue;
8985 			}
8986 		} else {
8987 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8988 			    cr);
8989 			if (CFS_TIMEOUT(fscp, error)) {
8990 				if (cachefs_cd_access_miss(fscp)) {
8991 					error = cachefs_getsecattr_connected(vp,
8992 					    vsec, flag, cr);
8993 					if (!CFS_TIMEOUT(fscp, error))
8994 						break;
8995 					delay(5*hz);
8996 					connected = 0;
8997 					continue;
8998 				}
8999 				connected = 1;
9000 				continue;
9001 			}
9002 		}
9003 		break;
9004 	}
9005 
9006 out:
9007 	if (held)
9008 		cachefs_cd_release(fscp);
9009 
9010 #ifdef CFS_CD_DEBUG
9011 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9012 #endif
9013 #ifdef CFSDEBUG
9014 	CFS_DEBUG(CFSDEBUG_VOPS)
9015 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9016 #endif
9017 	return (error);
9018 }
9019 
9020 static int
9021 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9022     caller_context_t *ct)
9023 {
9024 	cnode_t *cp = VTOC(vp);
9025 	fscache_t *fscp = C_TO_FSCACHE(cp);
9026 	int error = 0;
9027 	vnode_t *backvp;
9028 
9029 #ifdef CFSDEBUG
9030 	CFS_DEBUG(CFSDEBUG_VOPS)
9031 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9032 #endif
9033 
9034 	if (getzoneid() != GLOBAL_ZONEID) {
9035 		error = EPERM;
9036 		goto out;
9037 	}
9038 
9039 	/*
9040 	 * Cachefs only provides pass-through support for NFSv4,
9041 	 * and all vnode operations are passed through to the
9042 	 * back file system. For NFSv4 pass-through to work, only
9043 	 * connected operation is supported, the cnode backvp must
9044 	 * exist, and cachefs optional (eg., disconnectable) flags
9045 	 * are turned off. Assert these conditions to ensure that
9046 	 * the backfilesystem is called for the shrlock operation.
9047 	 */
9048 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9049 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9050 
9051 	mutex_enter(&cp->c_statelock);
9052 	if (cp->c_backvp == NULL)
9053 		error = cachefs_getbackvp(fscp, cp);
9054 	backvp = cp->c_backvp;
9055 	mutex_exit(&cp->c_statelock);
9056 	ASSERT((error != 0) || (backvp != NULL));
9057 
9058 	if (error == 0) {
9059 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9060 			("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9061 			cp, backvp));
9062 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9063 	}
9064 
9065 out:
9066 #ifdef CFSDEBUG
9067 	CFS_DEBUG(CFSDEBUG_VOPS)
9068 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9069 #endif
9070 	return (error);
9071 }
9072 
9073 static int
9074 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9075     cred_t *cr)
9076 {
9077 	cnode_t *cp = VTOC(vp);
9078 	fscache_t *fscp = C_TO_FSCACHE(cp);
9079 	int hit = 0;
9080 	int error = 0;
9081 
9082 
9083 	mutex_enter(&cp->c_statelock);
9084 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9085 	if (error)
9086 		goto out;
9087 
9088 	/* read from the cache if we can */
9089 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9090 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9091 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9092 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9093 		error = cachefs_getaclfromcache(cp, vsec);
9094 		if (error) {
9095 			cachefs_nocache(cp);
9096 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9097 			error = 0;
9098 		} else {
9099 			hit = 1;
9100 			goto out;
9101 		}
9102 	}
9103 
9104 	ASSERT(error == 0);
9105 	if (cp->c_backvp == NULL)
9106 		error = cachefs_getbackvp(fscp, cp);
9107 	if (error)
9108 		goto out;
9109 
9110 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9111 		("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9112 		cp, cp->c_backvp));
9113 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9114 	if (error)
9115 		goto out;
9116 
9117 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9118 	    (cachefs_vtype_aclok(vp)) &&
9119 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9120 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9121 		error = cachefs_cacheacl(cp, vsec);
9122 		if (error) {
9123 			error = 0;
9124 			cachefs_nocache(cp);
9125 		}
9126 	}
9127 
9128 out:
9129 	if (error == 0) {
9130 		if (hit)
9131 			fscp->fs_stats.st_hits++;
9132 		else
9133 			fscp->fs_stats.st_misses++;
9134 	}
9135 	mutex_exit(&cp->c_statelock);
9136 
9137 	return (error);
9138 }
9139 
9140 static int
9141 /*ARGSUSED*/
9142 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9143     cred_t *cr)
9144 {
9145 	cnode_t *cp = VTOC(vp);
9146 	fscache_t *fscp = C_TO_FSCACHE(cp);
9147 	int hit = 0;
9148 	int error = 0;
9149 
9150 
9151 	mutex_enter(&cp->c_statelock);
9152 
9153 	/* read from the cache if we can */
9154 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9155 	    (cp->c_metadata.md_flags & MD_ACL)) {
9156 		error = cachefs_getaclfromcache(cp, vsec);
9157 		if (error) {
9158 			cachefs_nocache(cp);
9159 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9160 			error = 0;
9161 		} else {
9162 			hit = 1;
9163 			goto out;
9164 		}
9165 	}
9166 	error = ETIMEDOUT;
9167 
9168 out:
9169 	if (error == 0) {
9170 		if (hit)
9171 			fscp->fs_stats.st_hits++;
9172 		else
9173 			fscp->fs_stats.st_misses++;
9174 	}
9175 	mutex_exit(&cp->c_statelock);
9176 
9177 	return (error);
9178 }
9179 
9180 /*
9181  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9182  * the frontfile if possible; otherwise, the adjunct directory.
9183  *
9184  * inputs:
9185  * cp - the cnode, with its statelock already held
9186  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9187  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9188  *
9189  * returns:
9190  * 0 - all is well
9191  * nonzero - errno
9192  */
9193 
9194 int
9195 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9196 {
9197 	fscache_t *fscp = C_TO_FSCACHE(cp);
9198 	vsecattr_t vsec;
9199 	aclent_t *aclp;
9200 	int gotvsec = 0;
9201 	int error = 0;
9202 	vnode_t *vp = NULL;
9203 	void *aclkeep = NULL;
9204 	int i;
9205 
9206 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9207 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9208 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9209 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9210 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9211 
9212 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9213 		error = ENOSYS;
9214 		goto out;
9215 	}
9216 
9217 	if (vsecp == NULL) {
9218 		if (cp->c_backvp == NULL)
9219 			error = cachefs_getbackvp(fscp, cp);
9220 		if (error != 0)
9221 			goto out;
9222 		vsecp = &vsec;
9223 		bzero(&vsec, sizeof (vsec));
9224 		vsecp->vsa_mask =
9225 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9226 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9227 		if (error != 0) {
9228 			goto out;
9229 		}
9230 		gotvsec = 1;
9231 	} else if (vsecp->vsa_mask & VSA_ACL) {
9232 		aclkeep = vsecp->vsa_aclentp;
9233 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9234 		    sizeof (aclent_t), KM_SLEEP);
9235 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9236 		    sizeof (aclent_t));
9237 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9238 		/* unless there's real data, we can cache nothing. */
9239 		return (0);
9240 	}
9241 
9242 	/*
9243 	 * prevent the ACL from chmoding our frontfile, and
9244 	 * snarf the class info
9245 	 */
9246 
9247 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9248 	    (VSA_ACL | VSA_ACLCNT)) {
9249 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9250 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9251 			switch (aclp->a_type) {
9252 			case CLASS_OBJ:
9253 				cp->c_metadata.md_aclclass =
9254 			    aclp->a_perm;
9255 				/*FALLTHROUGH*/
9256 			case USER_OBJ:
9257 			case GROUP_OBJ:
9258 			case OTHER_OBJ:
9259 				aclp->a_perm = 06;
9260 			}
9261 		}
9262 	}
9263 
9264 	/*
9265 	 * if the frontfile exists, then we always do the work.  but,
9266 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9267 	 * then we don't want to do the work.  otherwise, an `ls -l'
9268 	 * will create tons of emtpy frontfiles.
9269 	 */
9270 
9271 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9272 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9273 	    <= MIN_ACL_ENTRIES)) {
9274 		cp->c_metadata.md_flags |= MD_ACL;
9275 		cp->c_flags |= CN_UPDATED;
9276 		goto out;
9277 	}
9278 
9279 	/*
9280 	 * if we have a default ACL, then we need a
9281 	 * real live directory in the frontfs that we
9282 	 * can apply the ACL to.  if not, then we just
9283 	 * use the frontfile.  we get the frontfile
9284 	 * regardless -- that way, we know the
9285 	 * directory for the frontfile exists.
9286 	 */
9287 
9288 	if (vsecp->vsa_dfaclcnt > 0) {
9289 		if (cp->c_acldirvp == NULL)
9290 			error = cachefs_getacldirvp(cp);
9291 		if (error != 0)
9292 			goto out;
9293 		vp = cp->c_acldirvp;
9294 	} else {
9295 		if (cp->c_frontvp == NULL)
9296 			error = cachefs_getfrontfile(cp);
9297 		if (error != 0)
9298 			goto out;
9299 		vp = cp->c_frontvp;
9300 	}
9301 	ASSERT(vp != NULL);
9302 
9303 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9304 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9305 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9306 	if (error != 0) {
9307 #ifdef CFSDEBUG
9308 		CFS_DEBUG(CFSDEBUG_VOPS)
9309 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9310 			    error);
9311 #endif /* CFSDEBUG */
9312 		/*
9313 		 * If there was an error, we don't want to call
9314 		 * cachefs_nocache(); so, set error to 0.
9315 		 * We will call cachefs_purgeacl(), in order to
9316 		 * clean such things as adjunct ACL directories.
9317 		 */
9318 		cachefs_purgeacl(cp);
9319 		error = 0;
9320 		goto out;
9321 	}
9322 	if (vp == cp->c_frontvp)
9323 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9324 
9325 	cp->c_metadata.md_flags |= MD_ACL;
9326 	cp->c_flags |= CN_UPDATED;
9327 
9328 out:
9329 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9330 		cachefs_nocache(cp);
9331 
9332 	if (gotvsec) {
9333 		if (vsec.vsa_aclcnt)
9334 			kmem_free(vsec.vsa_aclentp,
9335 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9336 		if (vsec.vsa_dfaclcnt)
9337 			kmem_free(vsec.vsa_dfaclentp,
9338 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9339 	} else if (aclkeep != NULL) {
9340 		cachefs_kmem_free(vsecp->vsa_aclentp,
9341 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9342 		vsecp->vsa_aclentp = aclkeep;
9343 	}
9344 
9345 	return (error);
9346 }
9347 
9348 void
9349 cachefs_purgeacl(cnode_t *cp)
9350 {
9351 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9352 
9353 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9354 
9355 	if (cp->c_acldirvp != NULL) {
9356 		VN_RELE(cp->c_acldirvp);
9357 		cp->c_acldirvp = NULL;
9358 	}
9359 
9360 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9361 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9362 
9363 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9364 		make_ascii_name(&cp->c_id, name);
9365 		(void) strcat(name, ".d");
9366 
9367 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9368 		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9369 	}
9370 
9371 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9372 	cp->c_flags |= CN_UPDATED;
9373 }
9374 
9375 static int
9376 cachefs_getacldirvp(cnode_t *cp)
9377 {
9378 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9379 	int error = 0;
9380 
9381 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9382 	ASSERT(cp->c_acldirvp == NULL);
9383 
9384 	if (cp->c_frontvp == NULL)
9385 		error = cachefs_getfrontfile(cp);
9386 	if (error != 0)
9387 		goto out;
9388 
9389 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9390 	make_ascii_name(&cp->c_id, name);
9391 	(void) strcat(name, ".d");
9392 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9393 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9394 	if ((error != 0) && (error != ENOENT))
9395 		goto out;
9396 
9397 	if (error != 0) {
9398 		vattr_t va;
9399 
9400 		va.va_mode = S_IFDIR | 0777;
9401 		va.va_uid = 0;
9402 		va.va_gid = 0;
9403 		va.va_type = VDIR;
9404 		va.va_mask = AT_TYPE | AT_MODE |
9405 		    AT_UID | AT_GID;
9406 		error =
9407 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9408 			name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9409 		if (error != 0)
9410 			goto out;
9411 	}
9412 
9413 	ASSERT(cp->c_acldirvp != NULL);
9414 	cp->c_metadata.md_flags |= MD_ACLDIR;
9415 	cp->c_flags |= CN_UPDATED;
9416 
9417 out:
9418 	if (error != 0)
9419 		cp->c_acldirvp = NULL;
9420 	return (error);
9421 }
9422 
9423 static int
9424 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9425 {
9426 	aclent_t *aclp;
9427 	int error = 0;
9428 	vnode_t *vp = NULL;
9429 	int i;
9430 
9431 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9432 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9433 	ASSERT(vsec->vsa_aclentp == NULL);
9434 
9435 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9436 		if (cp->c_acldirvp == NULL)
9437 			error = cachefs_getacldirvp(cp);
9438 		if (error != 0)
9439 			goto out;
9440 		vp = cp->c_acldirvp;
9441 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9442 		if (cp->c_frontvp == NULL)
9443 			error = cachefs_getfrontfile(cp);
9444 		if (error != 0)
9445 			goto out;
9446 		vp = cp->c_frontvp;
9447 	} else {
9448 
9449 		/*
9450 		 * if we get here, then we know that MD_ACL is on,
9451 		 * meaning an ACL was successfully cached.  we also
9452 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9453 		 * this has to be an entry without a `real' ACL.
9454 		 * thus, we forge whatever is necessary.
9455 		 */
9456 
9457 		if (vsec->vsa_mask & VSA_ACLCNT)
9458 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9459 
9460 		if (vsec->vsa_mask & VSA_ACL) {
9461 			vsec->vsa_aclentp =
9462 			    kmem_zalloc(MIN_ACL_ENTRIES *
9463 			    sizeof (aclent_t), KM_SLEEP);
9464 			aclp = (aclent_t *)vsec->vsa_aclentp;
9465 			aclp->a_type = USER_OBJ;
9466 			++aclp;
9467 			aclp->a_type = GROUP_OBJ;
9468 			++aclp;
9469 			aclp->a_type = OTHER_OBJ;
9470 			++aclp;
9471 			aclp->a_type = CLASS_OBJ;
9472 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9473 			    sizeof (aclent_t), cmp2acls);
9474 		}
9475 
9476 		ASSERT(vp == NULL);
9477 	}
9478 
9479 	if (vp != NULL) {
9480 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9481 #ifdef CFSDEBUG
9482 			CFS_DEBUG(CFSDEBUG_VOPS)
9483 				printf("cachefs_getaclfromcache: error %d\n",
9484 				    error);
9485 #endif /* CFSDEBUG */
9486 			goto out;
9487 		}
9488 	}
9489 
9490 	if (vsec->vsa_aclentp != NULL) {
9491 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9492 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9493 			switch (aclp->a_type) {
9494 			case USER_OBJ:
9495 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9496 				aclp->a_perm =
9497 				    cp->c_metadata.md_vattr.va_mode & 0700;
9498 				aclp->a_perm >>= 6;
9499 				break;
9500 
9501 			case GROUP_OBJ:
9502 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9503 				aclp->a_perm =
9504 				    cp->c_metadata.md_vattr.va_mode & 070;
9505 				aclp->a_perm >>= 3;
9506 				break;
9507 
9508 			case OTHER_OBJ:
9509 				aclp->a_perm =
9510 				    cp->c_metadata.md_vattr.va_mode & 07;
9511 				break;
9512 
9513 			case CLASS_OBJ:
9514 				aclp->a_perm =
9515 				    cp->c_metadata.md_aclclass;
9516 				break;
9517 			}
9518 		}
9519 	}
9520 
9521 out:
9522 
9523 	if (error != 0)
9524 		cachefs_nocache(cp);
9525 
9526 	return (error);
9527 }
9528 
9529 /*
9530  * Fills in targp with attribute information from srcp, cp
9531  * and if necessary the system.
9532  */
9533 static void
9534 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9535 {
9536 	time_t	now;
9537 
9538 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9539 
9540 	/*
9541 	 * Add code to fill in the va struct.  We use the fields from
9542 	 * the srcp struct if they are populated, otherwise we guess
9543 	 */
9544 
9545 	targp->va_mask = 0;	/* initialize all fields */
9546 	targp->va_mode = srcp->va_mode;
9547 	targp->va_type = srcp->va_type;
9548 	targp->va_nlink = 1;
9549 	targp->va_nodeid = 0;
9550 
9551 	if (srcp->va_mask & AT_UID)
9552 		targp->va_uid = srcp->va_uid;
9553 	else
9554 		targp->va_uid = crgetuid(cr);
9555 
9556 	if (srcp->va_mask & AT_GID)
9557 		targp->va_gid = srcp->va_gid;
9558 	else
9559 		targp->va_gid = crgetgid(cr);
9560 
9561 	if (srcp->va_mask & AT_FSID)
9562 		targp->va_fsid = srcp->va_fsid;
9563 	else
9564 		targp->va_fsid = 0;	/* initialize all fields */
9565 
9566 	now = gethrestime_sec();
9567 	if (srcp->va_mask & AT_ATIME)
9568 		targp->va_atime = srcp->va_atime;
9569 	else
9570 		targp->va_atime.tv_sec = now;
9571 
9572 	if (srcp->va_mask & AT_MTIME)
9573 		targp->va_mtime = srcp->va_mtime;
9574 	else
9575 		targp->va_mtime.tv_sec = now;
9576 
9577 	if (srcp->va_mask & AT_CTIME)
9578 		targp->va_ctime = srcp->va_ctime;
9579 	else
9580 		targp->va_ctime.tv_sec = now;
9581 
9582 
9583 	if (srcp->va_mask & AT_SIZE)
9584 		targp->va_size = srcp->va_size;
9585 	else
9586 		targp->va_size = 0;
9587 
9588 	/*
9589 	 * the remaing fields are set by the fs and not changable.
9590 	 * we populate these entries useing the parent directory
9591 	 * values.  It's a small hack, but should work.
9592 	 */
9593 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9594 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9595 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9596 	targp->va_seq = 0; /* Never keep the sequence number */
9597 }
9598 
9599 /*
9600  * set the gid for a newly created file.  The algorithm is as follows:
9601  *
9602  *	1) If the gid is set in the attribute list, then use it if
9603  *	   the caller is privileged, belongs to the target group, or
9604  *	   the group is the same as the parent directory.
9605  *
9606  *	2) If the parent directory's set-gid bit is clear, then use
9607  *	   the process gid
9608  *
9609  *	3) Otherwise, use the gid of the parent directory.
9610  *
9611  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9612  * this routine.
9613  */
9614 static void
9615 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9616 {
9617 	if ((vap->va_mask & AT_GID) &&
9618 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9619 	    groupmember(vap->va_gid, cr) ||
9620 	    secpolicy_vnode_create_gid(cr) != 0)) {
9621 		newcp->c_attr.va_gid = vap->va_gid;
9622 	} else {
9623 		if (dcp->c_attr.va_mode & S_ISGID)
9624 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9625 		else
9626 			newcp->c_attr.va_gid = crgetgid(cr);
9627 	}
9628 
9629 	/*
9630 	 * if we're creating a directory, and the parent directory has the
9631 	 * set-GID bit set, set it on the new directory.
9632 	 * Otherwise, if the user is neither privileged nor a member of the
9633 	 * file's new group, clear the file's set-GID bit.
9634 	 */
9635 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9636 		newcp->c_attr.va_mode |= S_ISGID;
9637 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9638 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9639 		newcp->c_attr.va_mode &= ~S_ISGID;
9640 }
9641 
9642 /*
9643  * create an acl for the newly created file.  should be called right
9644  * after cachefs_creategid.
9645  */
9646 
9647 static void
9648 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9649 {
9650 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9651 	vsecattr_t vsec;
9652 	int gotvsec = 0;
9653 	int error = 0; /* placeholder */
9654 	aclent_t *aclp;
9655 	o_mode_t *classp = NULL;
9656 	o_mode_t gunion = 0;
9657 	int i;
9658 
9659 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9660 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9661 		return;
9662 
9663 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9664 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9665 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9666 
9667 	/*
9668 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9669 	 * would hit code paths that isn't hit anywhere else.
9670 	 */
9671 
9672 	bzero(&vsec, sizeof (vsec));
9673 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9674 	error = cachefs_getaclfromcache(dcp, &vsec);
9675 	if (error != 0)
9676 		goto out;
9677 	gotvsec = 1;
9678 
9679 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9680 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9681 			kmem_free(vsec.vsa_aclentp,
9682 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9683 
9684 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9685 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9686 		vsec.vsa_dfaclcnt = 0;
9687 		vsec.vsa_dfaclentp = NULL;
9688 
9689 		if (newcp->c_attr.va_type == VDIR) {
9690 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9691 			    sizeof (aclent_t), KM_SLEEP);
9692 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9693 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9694 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9695 		}
9696 
9697 		/*
9698 		 * this function should be called pretty much after
9699 		 * the rest of the file creation stuff is done.  so,
9700 		 * uid, gid, etc. should be `right'.  we'll go with
9701 		 * that, rather than trying to determine whether to
9702 		 * get stuff from cr or va.
9703 		 */
9704 
9705 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9706 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9707 			switch (aclp->a_type) {
9708 			case DEF_USER_OBJ:
9709 				aclp->a_type = USER_OBJ;
9710 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9711 				aclp->a_perm =
9712 				    newcp->c_metadata.md_vattr.va_mode;
9713 				aclp->a_perm &= 0700;
9714 				aclp->a_perm >>= 6;
9715 				break;
9716 
9717 			case DEF_GROUP_OBJ:
9718 				aclp->a_type = GROUP_OBJ;
9719 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9720 				aclp->a_perm =
9721 				    newcp->c_metadata.md_vattr.va_mode;
9722 				aclp->a_perm &= 070;
9723 				aclp->a_perm >>= 3;
9724 				gunion |= aclp->a_perm;
9725 				break;
9726 
9727 			case DEF_OTHER_OBJ:
9728 				aclp->a_type = OTHER_OBJ;
9729 				aclp->a_perm =
9730 				    newcp->c_metadata.md_vattr.va_mode & 07;
9731 				break;
9732 
9733 			case DEF_CLASS_OBJ:
9734 				aclp->a_type = CLASS_OBJ;
9735 				classp = &(aclp->a_perm);
9736 				break;
9737 
9738 			case DEF_USER:
9739 				aclp->a_type = USER;
9740 				gunion |= aclp->a_perm;
9741 				break;
9742 
9743 			case DEF_GROUP:
9744 				aclp->a_type = GROUP;
9745 				gunion |= aclp->a_perm;
9746 				break;
9747 			}
9748 		}
9749 
9750 		/* XXX is this the POSIX thing to do? */
9751 		if (classp != NULL)
9752 			*classp &= gunion;
9753 
9754 		/*
9755 		 * we don't need to log this; rather, we clear the
9756 		 * MD_ACL bit when we reconnect.
9757 		 */
9758 
9759 		error = cachefs_cacheacl(newcp, &vsec);
9760 		if (error != 0)
9761 			goto out;
9762 	}
9763 
9764 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9765 	newcp->c_metadata.md_flags |= MD_ACL;
9766 	newcp->c_flags |= CN_UPDATED;
9767 
9768 out:
9769 
9770 	if (gotvsec) {
9771 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9772 			kmem_free(vsec.vsa_aclentp,
9773 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9774 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9775 			kmem_free(vsec.vsa_dfaclentp,
9776 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9777 	}
9778 }
9779 
9780 /*
9781  * this is translated from the UFS code for access checking.
9782  */
9783 
9784 static int
9785 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9786 {
9787 	cnode_t *cp = vcp;
9788 	fscache_t *fscp = C_TO_FSCACHE(cp);
9789 	int shift = 0;
9790 
9791 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9792 
9793 	if (mode & VWRITE) {
9794 		/*
9795 		 * Disallow write attempts on read-only
9796 		 * file systems, unless the file is special.
9797 		 */
9798 		struct vnode *vp = CTOV(cp);
9799 		if (vn_is_readonly(vp)) {
9800 			if (!IS_DEVVP(vp)) {
9801 				return (EROFS);
9802 			}
9803 		}
9804 	}
9805 
9806 	/*
9807 	 * if we need to do ACLs, do it.  this works whether anyone
9808 	 * has explicitly made an ACL or not.
9809 	 */
9810 
9811 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9812 	    (cachefs_vtype_aclok(CTOV(cp))))
9813 		return (cachefs_acl_access(cp, mode, cr));
9814 
9815 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9816 		shift += 3;
9817 		if (!groupmember(cp->c_attr.va_gid, cr))
9818 			shift += 3;
9819 	}
9820 
9821 	/* compute missing mode bits */
9822 	mode &= ~(cp->c_attr.va_mode << shift);
9823 
9824 	if (mode == 0)
9825 		return (0);
9826 
9827 	return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode));
9828 }
9829 
9830 /*
9831  * This is transcribed from ufs_acl_access().  If that changes, then
9832  * this should, too.
9833  *
9834  * Check the cnode's ACL's to see if this mode of access is
9835  * allowed; return 0 if allowed, EACCES if not.
9836  *
9837  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9838  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9839  */
9840 
9841 #define	ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \
9842 		    secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM)))
9843 
9844 static int
9845 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9846 {
9847 	int error = 0;
9848 
9849 	fscache_t *fscp = C_TO_FSCACHE(cp);
9850 
9851 	int mask = ~0;
9852 	int ismask = 0;
9853 
9854 	int gperm = 0;
9855 	int ngroup = 0;
9856 
9857 	vsecattr_t vsec;
9858 	int gotvsec = 0;
9859 	aclent_t *aclp;
9860 
9861 	uid_t owner = cp->c_attr.va_uid;
9862 
9863 	int i;
9864 
9865 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9866 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9867 
9868 	/*
9869 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9870 	 * but then i believe we'd be the only thing exercising those
9871 	 * code paths -- probably a bad thing.
9872 	 */
9873 
9874 	bzero(&vsec, sizeof (vsec));
9875 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9876 
9877 	/* XXX KLUDGE! correct insidious 0-class problem */
9878 	if (cp->c_metadata.md_aclclass == 0 &&
9879 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9880 		cachefs_purgeacl(cp);
9881 again:
9882 	if (cp->c_metadata.md_flags & MD_ACL) {
9883 		error = cachefs_getaclfromcache(cp, &vsec);
9884 		if (error != 0) {
9885 #ifdef CFSDEBUG
9886 			if (error != ETIMEDOUT)
9887 				CFS_DEBUG(CFSDEBUG_VOPS)
9888 					printf("cachefs_acl_access():"
9889 					    "error %d from getaclfromcache()\n",
9890 					    error);
9891 #endif /* CFSDEBUG */
9892 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9893 				goto again;
9894 			} else {
9895 				goto out;
9896 			}
9897 		}
9898 	} else {
9899 		if (cp->c_backvp == NULL) {
9900 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9901 				error = cachefs_getbackvp(fscp, cp);
9902 			else
9903 				error = ETIMEDOUT;
9904 		}
9905 		if (error == 0)
9906 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9907 			    NULL);
9908 		if (error != 0) {
9909 #ifdef CFSDEBUG
9910 			CFS_DEBUG(CFSDEBUG_VOPS)
9911 				printf("cachefs_acl_access():"
9912 				    "error %d from getsecattr(backvp)\n",
9913 				    error);
9914 #endif /* CFSDEBUG */
9915 			goto out;
9916 		}
9917 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9918 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9919 			(void) cachefs_cacheacl(cp, &vsec);
9920 	}
9921 	gotvsec = 1;
9922 
9923 	ASSERT(error == 0);
9924 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9925 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9926 		switch (aclp->a_type) {
9927 		case USER_OBJ:
9928 			/*
9929 			 * this might look cleaner in the 2nd loop
9930 			 * below, but we do it here as an
9931 			 * optimization.
9932 			 */
9933 
9934 			owner = aclp->a_id;
9935 			if (crgetuid(cr) == owner) {
9936 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9937 							cr, cp);
9938 				goto out;
9939 			}
9940 			break;
9941 
9942 		case CLASS_OBJ:
9943 			mask = aclp->a_perm;
9944 			ismask = 1;
9945 			break;
9946 		}
9947 	}
9948 
9949 	ASSERT(error == 0);
9950 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9951 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9952 		switch (aclp->a_type) {
9953 		case USER:
9954 			if (crgetuid(cr) == aclp->a_id) {
9955 				error = ACL_MODE_CHECK(mode,
9956 					(aclp->a_perm & mask) << 6, cr, cp);
9957 				goto out;
9958 			}
9959 			break;
9960 
9961 		case GROUP_OBJ:
9962 			if (groupmember(aclp->a_id, cr)) {
9963 				++ngroup;
9964 				gperm |= aclp->a_perm;
9965 				if (! ismask) {
9966 					error = ACL_MODE_CHECK(mode,
9967 							aclp->a_perm << 6,
9968 							cr, cp);
9969 					goto out;
9970 				}
9971 			}
9972 			break;
9973 
9974 		case GROUP:
9975 			if (groupmember(aclp->a_id, cr)) {
9976 				++ngroup;
9977 				gperm |= aclp->a_perm;
9978 			}
9979 			break;
9980 
9981 		case OTHER_OBJ:
9982 			if (ngroup == 0) {
9983 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9984 						cr, cp);
9985 				goto out;
9986 			}
9987 			break;
9988 
9989 		default:
9990 			break;
9991 		}
9992 	}
9993 
9994 	ASSERT(ngroup > 0);
9995 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
9996 
9997 out:
9998 	if (gotvsec) {
9999 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10000 			kmem_free(vsec.vsa_aclentp,
10001 			    vsec.vsa_aclcnt * sizeof (aclent_t));
10002 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10003 			kmem_free(vsec.vsa_dfaclentp,
10004 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
10005 	}
10006 
10007 	return (error);
10008 }
10009 
10010 /*
10011  * see if permissions allow for removal of the given file from
10012  * the given directory.
10013  */
10014 static int
10015 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10016 {
10017 	uid_t uid;
10018 	/*
10019 	 * If the containing directory is sticky, the user must:
10020 	 *  - own the directory, or
10021 	 *  - own the file, or
10022 	 *  - be able to write the file (if it's a plain file), or
10023 	 *  - be sufficiently privileged.
10024 	 */
10025 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10026 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10027 	    (uid != cp->c_attr.va_uid) &&
10028 	    (cp->c_attr.va_type != VREG ||
10029 	    cachefs_access_local(cp, VWRITE, cr) != 0))
10030 		return (secpolicy_vnode_remove(cr));
10031 
10032 	return (0);
10033 }
10034 
10035 /*
10036  * Returns a new name, may even be unique.
10037  * Stolen from nfs code.
10038  * Since now we will use renaming to .cfs* in place of .nfs*
10039  * for CacheFS. Both NFS and CacheFS will rename opened files.
10040  */
10041 static char cachefs_prefix[] = ".cfs";
10042 kmutex_t cachefs_newnum_lock;
10043 
10044 static char *
10045 cachefs_newname(void)
10046 {
10047 	static uint_t newnum = 0;
10048 	char *news;
10049 	char *s, *p;
10050 	uint_t id;
10051 
10052 	mutex_enter(&cachefs_newnum_lock);
10053 	if (newnum == 0) {
10054 		newnum = gethrestime_sec() & 0xfffff;
10055 		newnum |= 0x10000;
10056 	}
10057 	id = newnum++;
10058 	mutex_exit(&cachefs_newnum_lock);
10059 
10060 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10061 	s = news;
10062 	p = cachefs_prefix;
10063 	while (*p != '\0')
10064 		*s++ = *p++;
10065 	while (id != 0) {
10066 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10067 		id >>= 4;
10068 	}
10069 	*s = '\0';
10070 	return (news);
10071 }
10072 
10073 /*
10074  * Called to rename the specified file to a temporary file so
10075  * operations to the file after remove work.
10076  * Must call this routine with the dir c_rwlock held as a writer.
10077  */
10078 static int
10079 /*ARGSUSED*/
10080 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10081 {
10082 	cnode_t *cp = VTOC(vp);
10083 	char *tmpname;
10084 	fscache_t *fscp = C_TO_FSCACHE(cp);
10085 	int error;
10086 
10087 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10088 
10089 	/* get the new name for the file */
10090 	tmpname = cachefs_newname();
10091 
10092 	/* do the link */
10093 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10094 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10095 	else
10096 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10097 	if (error) {
10098 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10099 		return (error);
10100 	}
10101 
10102 	mutex_enter(&cp->c_statelock);
10103 	if (cp->c_unldvp) {
10104 		VN_RELE(cp->c_unldvp);
10105 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10106 		crfree(cp->c_unlcred);
10107 	}
10108 
10109 	VN_HOLD(dvp);
10110 	cp->c_unldvp = dvp;
10111 	crhold(cr);
10112 	cp->c_unlcred = cr;
10113 	cp->c_unlname = tmpname;
10114 
10115 	/* drop the backvp so NFS does not also do a rename */
10116 	mutex_exit(&cp->c_statelock);
10117 
10118 	return (0);
10119 }
10120 
10121 /*
10122  * Marks the cnode as modified.
10123  */
10124 static void
10125 cachefs_modified(cnode_t *cp)
10126 {
10127 	fscache_t *fscp = C_TO_FSCACHE(cp);
10128 	struct vattr va;
10129 	int error;
10130 
10131 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10132 	ASSERT(cp->c_metadata.md_rlno);
10133 
10134 	/* if not on the modify list */
10135 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10136 		/* put on modified list, also marks the file as modified */
10137 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10138 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10139 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10140 		cp->c_flags |= CN_UPDATED;
10141 
10142 		/* if a modified regular file that is not local */
10143 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10144 		    (cp->c_metadata.md_flags & MD_FILE) &&
10145 		    (cp->c_attr.va_type == VREG)) {
10146 
10147 			if (cp->c_frontvp == NULL)
10148 				(void) cachefs_getfrontfile(cp);
10149 			if (cp->c_frontvp) {
10150 				/* identify file so fsck knows it is modified */
10151 				va.va_mode = 0766;
10152 				va.va_mask = AT_MODE;
10153 				error = VOP_SETATTR(cp->c_frontvp,
10154 				    &va, 0, kcred, NULL);
10155 				if (error) {
10156 					cmn_err(CE_WARN,
10157 					    "Cannot change ff mode.\n");
10158 				}
10159 			}
10160 		}
10161 	}
10162 }
10163 
10164 /*
10165  * Marks the cnode as modified.
10166  * Allocates a rl slot for the cnode if necessary.
10167  * Returns 0 for success, !0 if cannot get an rl slot.
10168  */
10169 static int
10170 cachefs_modified_alloc(cnode_t *cp)
10171 {
10172 	fscache_t *fscp = C_TO_FSCACHE(cp);
10173 	filegrp_t *fgp = cp->c_filegrp;
10174 	int error;
10175 	rl_entry_t rl_ent;
10176 
10177 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10178 
10179 	/* get the rl slot if needed */
10180 	if (cp->c_metadata.md_rlno == 0) {
10181 		/* get a metadata slot if we do not have one yet */
10182 		if (cp->c_flags & CN_ALLOC_PENDING) {
10183 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10184 				(void) filegrp_allocattr(cp->c_filegrp);
10185 			}
10186 			error = filegrp_create_metadata(cp->c_filegrp,
10187 			    &cp->c_metadata, &cp->c_id);
10188 			if (error)
10189 				return (error);
10190 			cp->c_flags &= ~CN_ALLOC_PENDING;
10191 		}
10192 
10193 		/* get a free rl entry */
10194 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10195 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10196 		rl_ent.rl_fsid = fscp->fs_cfsid;
10197 		rl_ent.rl_attrc = 0;
10198 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10199 		    &cp->c_metadata.md_rlno);
10200 		if (error)
10201 			return (error);
10202 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10203 
10204 		/* hold the filegrp so the attrcache file is not gc */
10205 		error = filegrp_ffhold(fgp);
10206 		if (error) {
10207 			cachefs_rlent_moveto(fscp->fs_cache,
10208 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10209 			cp->c_metadata.md_rlno = 0;
10210 			return (error);
10211 		}
10212 	}
10213 	cachefs_modified(cp);
10214 	return (0);
10215 }
10216 
10217 int
10218 cachefs_vtype_aclok(vnode_t *vp)
10219 {
10220 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10221 
10222 	if (vp->v_type == VNON)
10223 		return (0);
10224 
10225 	for (vtp = oktypes; *vtp != VNON; vtp++)
10226 		if (vp->v_type == *vtp)
10227 			break;
10228 
10229 	return (*vtp != VNON);
10230 }
10231 
10232 static int
10233 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10234     caller_context_t *ct)
10235 {
10236 	int error = 0;
10237 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10238 
10239 	/* Assert cachefs compatibility if NFSv4 is in use */
10240 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10241 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10242 
10243 	if (cmd == _PC_FILESIZEBITS) {
10244 		u_offset_t maxsize = fscp->fs_offmax;
10245 		(*valp) = 0;
10246 		while (maxsize != 0) {
10247 			maxsize >>= 1;
10248 			(*valp)++;
10249 		}
10250 		(*valp)++;
10251 	} else
10252 		error = fs_pathconf(vp, cmd, valp, cr, ct);
10253 
10254 	return (error);
10255 }
10256