xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_vnops.c (revision cde58dbc6a23d4d38db7c8866312be83221c765f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/param.h>
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/cred.h>
29 #include <sys/proc.h>
30 #include <sys/user.h>
31 #include <sys/time.h>
32 #include <sys/vnode.h>
33 #include <sys/vfs.h>
34 #include <sys/vfs_opreg.h>
35 #include <sys/file.h>
36 #include <sys/filio.h>
37 #include <sys/uio.h>
38 #include <sys/buf.h>
39 #include <sys/mman.h>
40 #include <sys/tiuser.h>
41 #include <sys/pathname.h>
42 #include <sys/dirent.h>
43 #include <sys/conf.h>
44 #include <sys/debug.h>
45 #include <sys/vmsystm.h>
46 #include <sys/fcntl.h>
47 #include <sys/flock.h>
48 #include <sys/swap.h>
49 #include <sys/errno.h>
50 #include <sys/sysmacros.h>
51 #include <sys/disp.h>
52 #include <sys/kmem.h>
53 #include <sys/cmn_err.h>
54 #include <sys/vtrace.h>
55 #include <sys/mount.h>
56 #include <sys/bootconf.h>
57 #include <sys/dnlc.h>
58 #include <sys/stat.h>
59 #include <sys/acl.h>
60 #include <sys/policy.h>
61 #include <rpc/types.h>
62 
63 #include <vm/hat.h>
64 #include <vm/as.h>
65 #include <vm/page.h>
66 #include <vm/pvn.h>
67 #include <vm/seg.h>
68 #include <vm/seg_map.h>
69 #include <vm/seg_vn.h>
70 #include <vm/rm.h>
71 #include <sys/fs/cachefs_fs.h>
72 #include <sys/fs/cachefs_dir.h>
73 #include <sys/fs/cachefs_dlog.h>
74 #include <sys/fs/cachefs_ioctl.h>
75 #include <sys/fs/cachefs_log.h>
76 #include <fs/fs_subr.h>
77 
78 int cachefs_dnlc;	/* use dnlc, debugging */
79 
80 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
81     cred_t *cr);
82 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
83     cred_t *cr);
84 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
85 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
86 static int cachefs_getacldirvp(cnode_t *cp);
87 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
88 static int cachefs_access_local(void *cp, int mode, cred_t *cr);
89 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
90 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
91     u_offset_t iooff, cred_t *cr);
92 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
93     u_offset_t iooff, cred_t *cr);
94 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
95     cred_t *cr, caller_context_t *ct);
96 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
97     int flags, cred_t *cr, caller_context_t *ct);
98 static int cachefs_access_connected(struct vnode *vp, int mode,
99     int flags, cred_t *cr);
100 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
101     cred_t *cr);
102 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
103     char *tnm, cred_t *cr);
104 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
105     vattr_t *tva, char *tnm, cred_t *cr);
106 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
107     cred_t *cr);
108 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
109     char *tnm, cred_t *cr);
110 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
111     vnode_t **vpp, cred_t *cr);
112 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
113     vnode_t **vpp, cred_t *cr);
114 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
115 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
116     vnode_t *cdir, cred_t *cr, vnode_t *vp);
117 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
118     vnode_t *cdir, cred_t *cr, vnode_t *vp);
119 static char *cachefs_newname(void);
120 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
121     cred_t *cr);
122 static int cachefs_rename_connected(vnode_t *odvp, char *onm,
123     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
124 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
125     vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
126 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
127     int *eofp);
128 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
129     cred_t *cr, int *eofp);
130 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
131 	cred_t *cr, int *eofp);
132 
133 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
134     cred_t *cr, caller_context_t *ct);
135 
136 static	int	cachefs_open(struct vnode **, int, cred_t *,
137 			caller_context_t *);
138 static	int	cachefs_close(struct vnode *, int, int, offset_t,
139 			cred_t *, caller_context_t *);
140 static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
141 			caller_context_t *);
142 static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
143 			caller_context_t *);
144 static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
145 			int *, caller_context_t *);
146 static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
147 			cred_t *, caller_context_t *);
148 static	int	cachefs_setattr(struct vnode *, struct vattr *,
149 			int, cred_t *, caller_context_t *);
150 static	int	cachefs_access(struct vnode *, int, int, cred_t *,
151 			caller_context_t *);
152 static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
153 			struct pathname *, int, struct vnode *, cred_t *,
154 			caller_context_t *, int *, pathname_t *);
155 static	int	cachefs_create(struct vnode *, char *, struct vattr *,
156 			enum vcexcl, int, struct vnode **, cred_t *, int,
157 			caller_context_t *, vsecattr_t *);
158 static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
159 			vattr_t *vap, enum vcexcl exclusive, int mode,
160 			vnode_t **vpp, cred_t *cr);
161 static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
162 			vattr_t *vap, enum vcexcl exclusive, int mode,
163 			vnode_t **vpp, cred_t *cr);
164 static	int	cachefs_remove(struct vnode *, char *, cred_t *,
165 			caller_context_t *, int);
166 static	int	cachefs_link(struct vnode *, struct vnode *, char *,
167 			cred_t *, caller_context_t *, int);
168 static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
169 			char *, cred_t *, caller_context_t *, int);
170 static	int	cachefs_mkdir(struct vnode *, char *, struct
171 			vattr *, struct vnode **, cred_t *, caller_context_t *,
172 			int, vsecattr_t *);
173 static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
174 			cred_t *, caller_context_t *, int);
175 static	int	cachefs_readdir(struct vnode *, struct uio *,
176 			cred_t *, int *, caller_context_t *, int);
177 static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
178 			char *, cred_t *, caller_context_t *, int);
179 static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
180 			caller_context_t *);
181 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
182 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
183 static	int	cachefs_fsync(struct vnode *, int, cred_t *,
184 			caller_context_t *);
185 static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
186 static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
187 static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
188 static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
189 static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
190 			caller_context_t *);
191 static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
192 			int, offset_t, struct flk_callback *, cred_t *,
193 			caller_context_t *);
194 static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
195 			offset_t, cred_t *, caller_context_t *);
196 static	int	cachefs_realvp(struct vnode *, struct vnode **,
197 			caller_context_t *);
198 static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
199 			struct page *[], size_t, struct seg *, caddr_t,
200 			enum seg_rw, cred_t *, caller_context_t *);
201 static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
202 			struct page *[], size_t, struct seg *, caddr_t,
203 			enum seg_rw, cred_t *);
204 static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
205 		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
206 			enum seg_rw, cred_t *);
207 static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
208 			cred_t *, caller_context_t *);
209 static	int	cachefs_map(struct vnode *, offset_t, struct as *,
210 			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
211 			caller_context_t *);
212 static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
213 			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
214 			caller_context_t *);
215 static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
216 			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
217 			caller_context_t *);
218 static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
219 			int flag, cred_t *cr, caller_context_t *);
220 static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
221 			int flag, cred_t *cr, caller_context_t *);
222 static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
223 			cred_t *, caller_context_t *);
224 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
225     cred_t *cr);
226 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
227     int flag, cred_t *cr);
228 
229 static int	cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
230 			caller_context_t *);
231 static int	cachefs_pageio(struct vnode *, page_t *,
232 		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
233 static int	cachefs_writepage(struct vnode *vp, caddr_t base,
234 		    int tcount, struct uio *uiop);
235 static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
236 			caller_context_t *);
237 
238 static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
239 			cred_t *cr, caller_context_t *ct);
240 static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
241 			cred_t *cr, caller_context_t *ct);
242 static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
243 			int flags, cred_t *cr, caller_context_t *ct);
244 static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
245 			vnode_t *vp);
246 static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
247 			size_t len, uint_t *protp, struct page *pl[],
248 			size_t plsz, struct seg *seg, caddr_t addr,
249 			enum seg_rw rw, cred_t *cr);
250 static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
251 			size_t len, int flags, cred_t *cr);
252 static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
253 			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
254 			uchar_t maxprot, uint_t flags, cred_t *cr);
255 static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
256 			struct flock64 *bfp, int flag, offset_t offset,
257 			cred_t *cr, caller_context_t *ct);
258 
259 struct vnodeops *cachefs_vnodeops;
260 
261 static const fs_operation_def_t cachefs_vnodeops_template[] = {
262 	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
263 	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
264 	VOPNAME_READ,		{ .vop_read = cachefs_read },
265 	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
266 	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
267 	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
268 	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
269 	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
270 	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
271 	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
272 	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
273 	VOPNAME_LINK,		{ .vop_link = cachefs_link },
274 	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
275 	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
276 	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
277 	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
278 	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
279 	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
280 	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
281 	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
282 	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
283 	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
284 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
285 	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
286 	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
287 	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
288 	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
289 	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
290 	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
291 	VOPNAME_MAP,		{ .vop_map = cachefs_map },
292 	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
293 	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
294 	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
295 	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
296 	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
297 	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
298 	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
299 	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
300 	NULL,			NULL
301 };
302 
303 /* forward declarations of statics */
304 static void cachefs_modified(cnode_t *cp);
305 static int cachefs_modified_alloc(cnode_t *cp);
306 
307 int
308 cachefs_init_vnops(char *name)
309 {
310 	return (vn_make_ops(name,
311 	    cachefs_vnodeops_template, &cachefs_vnodeops));
312 }
313 
314 struct vnodeops *
315 cachefs_getvnodeops(void)
316 {
317 	return (cachefs_vnodeops);
318 }
319 
320 static int
321 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
322 {
323 	int error = 0;
324 	cnode_t *cp = VTOC(*vpp);
325 	fscache_t *fscp = C_TO_FSCACHE(cp);
326 	int held = 0;
327 	int type;
328 	int connected = 0;
329 
330 #ifdef CFSDEBUG
331 	CFS_DEBUG(CFSDEBUG_VOPS)
332 		printf("cachefs_open: ENTER vpp %p flag %x\n",
333 		    (void *)vpp, flag);
334 #endif
335 	if (getzoneid() != GLOBAL_ZONEID) {
336 		error = EPERM;
337 		goto out;
338 	}
339 	if ((flag & FWRITE) &&
340 	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
341 		error = EISDIR;
342 		goto out;
343 	}
344 
345 	/*
346 	 * Cachefs only provides pass-through support for NFSv4,
347 	 * and all vnode operations are passed through to the
348 	 * back file system. For NFSv4 pass-through to work, only
349 	 * connected operation is supported, the cnode backvp must
350 	 * exist, and cachefs optional (eg., disconnectable) flags
351 	 * are turned off. Assert these conditions to ensure that
352 	 * the backfilesystem is called for the open operation.
353 	 */
354 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
355 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
356 
357 	for (;;) {
358 		/* get (or renew) access to the file system */
359 		if (held) {
360 			/* Won't loop with NFSv4 connected behavior */
361 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
362 			cachefs_cd_release(fscp);
363 			held = 0;
364 		}
365 		error = cachefs_cd_access(fscp, connected, 0);
366 		if (error)
367 			goto out;
368 		held = 1;
369 
370 		mutex_enter(&cp->c_statelock);
371 
372 		/* grab creds if we do not have any yet */
373 		if (cp->c_cred == NULL) {
374 			crhold(cr);
375 			cp->c_cred = cr;
376 		}
377 		cp->c_flags |= CN_NEEDOPEN;
378 
379 		/* if we are disconnected */
380 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
381 			/* if we cannot write to the file system */
382 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
383 				mutex_exit(&cp->c_statelock);
384 				connected = 1;
385 				continue;
386 			}
387 			/*
388 			 * Allow read only requests to continue
389 			 */
390 			if ((flag & (FWRITE|FREAD)) == FREAD) {
391 				/* track the flag for opening the backvp */
392 				cp->c_rdcnt++;
393 				mutex_exit(&cp->c_statelock);
394 				error = 0;
395 				break;
396 			}
397 
398 			/*
399 			 * check credentials  - if this procs
400 			 * credentials don't match the creds in the
401 			 * cnode disallow writing while disconnected.
402 			 */
403 			if (crcmp(cp->c_cred, CRED()) != 0 &&
404 			    secpolicy_vnode_access2(CRED(), *vpp,
405 			    cp->c_attr.va_uid, 0, VWRITE) != 0) {
406 				mutex_exit(&cp->c_statelock);
407 				connected = 1;
408 				continue;
409 			}
410 			/* to get here, we know that the WRITE flag is on */
411 			cp->c_wrcnt++;
412 			if (flag & FREAD)
413 				cp->c_rdcnt++;
414 		}
415 
416 		/* else if we are connected */
417 		else {
418 			/* if cannot use the cached copy of the file */
419 			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
420 			    ((cp->c_flags & CN_NOCACHE) == 0))
421 				cachefs_nocache(cp);
422 
423 			/* pass open to the back file */
424 			if (cp->c_backvp) {
425 				cp->c_flags &= ~CN_NEEDOPEN;
426 				CFS_DPRINT_BACKFS_NFSV4(fscp,
427 				    ("cachefs_open (nfsv4): cnode %p, "
428 				    "backvp %p\n", cp, cp->c_backvp));
429 				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
430 				if (CFS_TIMEOUT(fscp, error)) {
431 					mutex_exit(&cp->c_statelock);
432 					cachefs_cd_release(fscp);
433 					held = 0;
434 					cachefs_cd_timedout(fscp);
435 					continue;
436 				} else if (error) {
437 					mutex_exit(&cp->c_statelock);
438 					break;
439 				}
440 			} else {
441 				/* backvp will be VOP_OPEN'd later */
442 				if (flag & FREAD)
443 					cp->c_rdcnt++;
444 				if (flag & FWRITE)
445 					cp->c_wrcnt++;
446 			}
447 
448 			/*
449 			 * Now perform a consistency check on the file.
450 			 * If strict consistency then force a check to
451 			 * the backfs even if the timeout has not expired
452 			 * for close-to-open consistency.
453 			 */
454 			type = 0;
455 			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
456 				type = C_BACK_CHECK;
457 			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
458 			if (CFS_TIMEOUT(fscp, error)) {
459 				mutex_exit(&cp->c_statelock);
460 				cachefs_cd_release(fscp);
461 				held = 0;
462 				cachefs_cd_timedout(fscp);
463 				continue;
464 			}
465 		}
466 		mutex_exit(&cp->c_statelock);
467 		break;
468 	}
469 	if (held)
470 		cachefs_cd_release(fscp);
471 out:
472 #ifdef CFS_CD_DEBUG
473 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
474 #endif
475 #ifdef CFSDEBUG
476 	CFS_DEBUG(CFSDEBUG_VOPS)
477 		printf("cachefs_open: EXIT vpp %p error %d\n",
478 		    (void *)vpp, error);
479 #endif
480 	return (error);
481 }
482 
483 /* ARGSUSED */
484 static int
485 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
486 	caller_context_t *ct)
487 {
488 	int error = 0;
489 	cnode_t *cp = VTOC(vp);
490 	fscache_t *fscp = C_TO_FSCACHE(cp);
491 	int held = 0;
492 	int connected = 0;
493 	int close_cnt = 1;
494 	cachefscache_t *cachep;
495 
496 #ifdef CFSDEBUG
497 	CFS_DEBUG(CFSDEBUG_VOPS)
498 		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
499 #endif
500 	/*
501 	 * Cachefs only provides pass-through support for NFSv4,
502 	 * and all vnode operations are passed through to the
503 	 * back file system. For NFSv4 pass-through to work, only
504 	 * connected operation is supported, the cnode backvp must
505 	 * exist, and cachefs optional (eg., disconnectable) flags
506 	 * are turned off. Assert these conditions to ensure that
507 	 * the backfilesystem is called for the close operation.
508 	 */
509 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
510 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
511 
512 	/*
513 	 * File could have been passed in or inherited from the global zone, so
514 	 * we don't want to flat out reject the request; we'll just leave things
515 	 * the way they are and let the backfs (NFS) deal with it.
516 	 */
517 	/* get rid of any local locks */
518 	if (CFS_ISFS_LLOCK(fscp)) {
519 		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
520 	}
521 
522 	/* clean up if this is the daemon closing down */
523 	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
524 	    ((ttoproc(curthread)->p_pid) != 0) &&
525 	    (vp == fscp->fs_rootvp) &&
526 	    (count == 1)) {
527 		mutex_enter(&fscp->fs_cdlock);
528 		fscp->fs_cddaemonid = 0;
529 		if (fscp->fs_dlogfile)
530 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
531 		else
532 			fscp->fs_cdconnected = CFS_CD_CONNECTED;
533 		cv_broadcast(&fscp->fs_cdwaitcv);
534 		mutex_exit(&fscp->fs_cdlock);
535 		if (fscp->fs_flags & CFS_FS_ROOTFS) {
536 			cachep = fscp->fs_cache;
537 			mutex_enter(&cachep->c_contentslock);
538 			ASSERT(cachep->c_rootdaemonid != 0);
539 			cachep->c_rootdaemonid = 0;
540 			mutex_exit(&cachep->c_contentslock);
541 		}
542 		return (0);
543 	}
544 
545 	for (;;) {
546 		/* get (or renew) access to the file system */
547 		if (held) {
548 			/* Won't loop with NFSv4 connected behavior */
549 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
550 			cachefs_cd_release(fscp);
551 			held = 0;
552 		}
553 		error = cachefs_cd_access(fscp, connected, 0);
554 		if (error)
555 			goto out;
556 		held = 1;
557 		connected = 0;
558 
559 		/* if not the last close */
560 		if (count > 1) {
561 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
562 				goto out;
563 			mutex_enter(&cp->c_statelock);
564 			if (cp->c_backvp) {
565 				CFS_DPRINT_BACKFS_NFSV4(fscp,
566 				    ("cachefs_close (nfsv4): cnode %p, "
567 				    "backvp %p\n", cp, cp->c_backvp));
568 				error = VOP_CLOSE(cp->c_backvp, flag, count,
569 				    offset, cr, ct);
570 				if (CFS_TIMEOUT(fscp, error)) {
571 					mutex_exit(&cp->c_statelock);
572 					cachefs_cd_release(fscp);
573 					held = 0;
574 					cachefs_cd_timedout(fscp);
575 					continue;
576 				}
577 			}
578 			mutex_exit(&cp->c_statelock);
579 			goto out;
580 		}
581 
582 		/*
583 		 * If the file is an unlinked file, then flush the lookup
584 		 * cache so that inactive will be called if this is
585 		 * the last reference.  It will invalidate all of the
586 		 * cached pages, without writing them out.  Writing them
587 		 * out is not required because they will be written to a
588 		 * file which will be immediately removed.
589 		 */
590 		if (cp->c_unldvp != NULL) {
591 			dnlc_purge_vp(vp);
592 			mutex_enter(&cp->c_statelock);
593 			error = cp->c_error;
594 			cp->c_error = 0;
595 			mutex_exit(&cp->c_statelock);
596 			/* always call VOP_CLOSE() for back fs vnode */
597 		}
598 
599 		/* force dirty data to stable storage */
600 		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
601 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
602 			/* clean the cachefs pages synchronously */
603 			error = cachefs_putpage_common(vp, (offset_t)0,
604 			    0, 0, cr);
605 			if (CFS_TIMEOUT(fscp, error)) {
606 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
607 					cachefs_cd_release(fscp);
608 					held = 0;
609 					cachefs_cd_timedout(fscp);
610 					continue;
611 				} else {
612 					connected = 1;
613 					continue;
614 				}
615 			}
616 
617 			/* if no space left in cache, wait until connected */
618 			if ((error == ENOSPC) &&
619 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
620 				connected = 1;
621 				continue;
622 			}
623 
624 			/* clear the cnode error if putpage worked */
625 			if ((error == 0) && cp->c_error) {
626 				mutex_enter(&cp->c_statelock);
627 				cp->c_error = 0;
628 				mutex_exit(&cp->c_statelock);
629 			}
630 
631 			/* if any other important error */
632 			if (cp->c_error) {
633 				/* get rid of the pages */
634 				(void) cachefs_putpage_common(vp,
635 				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
636 				dnlc_purge_vp(vp);
637 			}
638 		}
639 
640 		mutex_enter(&cp->c_statelock);
641 		if (cp->c_backvp &&
642 		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
643 			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
644 			    offset, cr, ct);
645 			if (CFS_TIMEOUT(fscp, error)) {
646 				mutex_exit(&cp->c_statelock);
647 				cachefs_cd_release(fscp);
648 				held = 0;
649 				cachefs_cd_timedout(fscp);
650 				/* don't decrement the vnode counts again */
651 				close_cnt = 0;
652 				continue;
653 			}
654 		}
655 		mutex_exit(&cp->c_statelock);
656 		break;
657 	}
658 
659 	mutex_enter(&cp->c_statelock);
660 	if (!error)
661 		error = cp->c_error;
662 	cp->c_error = 0;
663 	mutex_exit(&cp->c_statelock);
664 
665 out:
666 	if (held)
667 		cachefs_cd_release(fscp);
668 #ifdef CFS_CD_DEBUG
669 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
670 #endif
671 
672 #ifdef CFSDEBUG
673 	CFS_DEBUG(CFSDEBUG_VOPS)
674 		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
675 #endif
676 	return (error);
677 }
678 
679 /*ARGSUSED*/
680 static int
681 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
682 	caller_context_t *ct)
683 {
684 	struct cnode *cp = VTOC(vp);
685 	fscache_t *fscp = C_TO_FSCACHE(cp);
686 	register u_offset_t off;
687 	register int mapoff;
688 	register caddr_t base;
689 	int n;
690 	offset_t diff;
691 	uint_t flags = 0;
692 	int error = 0;
693 
694 #if 0
695 	if (vp->v_flag & VNOCACHE)
696 		flags = SM_INVAL;
697 #endif
698 	if (getzoneid() != GLOBAL_ZONEID)
699 		return (EPERM);
700 	if (vp->v_type != VREG)
701 		return (EISDIR);
702 
703 	ASSERT(RW_READ_HELD(&cp->c_rwlock));
704 
705 	if (uiop->uio_resid == 0)
706 		return (0);
707 
708 
709 	if (uiop->uio_loffset < (offset_t)0)
710 		return (EINVAL);
711 
712 	/*
713 	 * Call backfilesystem to read if NFSv4, the cachefs code
714 	 * does the read from the back filesystem asynchronously
715 	 * which is not supported by pass-through functionality.
716 	 */
717 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
718 		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
719 		goto out;
720 	}
721 
722 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
723 		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
724 		    uiop->uio_resid, uiop->uio_fmode, ct);
725 		if (error)
726 			return (error);
727 	}
728 
729 	/*
730 	 * Sit in a loop and transfer (uiomove) the data in up to
731 	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
732 	 * address space as needed and then released.
733 	 */
734 	do {
735 		/*
736 		 *	off	Offset of current MAXBSIZE chunk
737 		 *	mapoff	Offset within the current chunk
738 		 *	n	Number of bytes to move from this chunk
739 		 *	base	kernel address of mapped in chunk
740 		 */
741 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
742 		mapoff = uiop->uio_loffset & MAXBOFFSET;
743 		n = MAXBSIZE - mapoff;
744 		if (n > uiop->uio_resid)
745 			n = (uint_t)uiop->uio_resid;
746 
747 		/* perform consistency check */
748 		error = cachefs_cd_access(fscp, 0, 0);
749 		if (error)
750 			break;
751 		mutex_enter(&cp->c_statelock);
752 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
753 		diff = cp->c_size - uiop->uio_loffset;
754 		mutex_exit(&cp->c_statelock);
755 		if (CFS_TIMEOUT(fscp, error)) {
756 			cachefs_cd_release(fscp);
757 			cachefs_cd_timedout(fscp);
758 			error = 0;
759 			continue;
760 		}
761 		cachefs_cd_release(fscp);
762 
763 		if (error)
764 			break;
765 
766 		if (diff <= (offset_t)0)
767 			break;
768 		if (diff < (offset_t)n)
769 			n = diff;
770 
771 		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
772 
773 		error = segmap_fault(kas.a_hat, segkmap, base, n,
774 		    F_SOFTLOCK, S_READ);
775 		if (error) {
776 			(void) segmap_release(segkmap, base, 0);
777 			if (FC_CODE(error) == FC_OBJERR)
778 				error =  FC_ERRNO(error);
779 			else
780 				error = EIO;
781 			break;
782 		}
783 		error = uiomove(base+mapoff, n, UIO_READ, uiop);
784 		(void) segmap_fault(kas.a_hat, segkmap, base, n,
785 		    F_SOFTUNLOCK, S_READ);
786 		if (error == 0) {
787 			/*
788 			 * if we read a whole page(s), or to eof,
789 			 *  we won't need this page(s) again soon.
790 			 */
791 			if (n + mapoff == MAXBSIZE ||
792 			    uiop->uio_loffset == cp->c_size)
793 				flags |= SM_DONTNEED;
794 		}
795 		(void) segmap_release(segkmap, base, flags);
796 	} while (error == 0 && uiop->uio_resid > 0);
797 
798 out:
799 #ifdef CFSDEBUG
800 	CFS_DEBUG(CFSDEBUG_VOPS)
801 		printf("cachefs_read: EXIT error %d resid %ld\n", error,
802 		    uiop->uio_resid);
803 #endif
804 	return (error);
805 }
806 
807 /*
808  * cachefs_read_backfs_nfsv4
809  *
810  * Call NFSv4 back filesystem to handle the read (cachefs
811  * pass-through support for NFSv4).
812  */
813 static int
814 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
815 			caller_context_t *ct)
816 {
817 	cnode_t *cp = VTOC(vp);
818 	fscache_t *fscp = C_TO_FSCACHE(cp);
819 	vnode_t *backvp;
820 	int error;
821 
822 	/*
823 	 * For NFSv4 pass-through to work, only connected operation
824 	 * is supported, the cnode backvp must exist, and cachefs
825 	 * optional (eg., disconnectable) flags are turned off. Assert
826 	 * these conditions for the read operation.
827 	 */
828 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
829 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
830 
831 	/* Call backfs vnode op after extracting backvp */
832 	mutex_enter(&cp->c_statelock);
833 	backvp = cp->c_backvp;
834 	mutex_exit(&cp->c_statelock);
835 
836 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
837 	    "backvp %p\n", cp, backvp));
838 
839 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
840 	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
841 	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
842 
843 	/* Increment cache miss counter */
844 	fscp->fs_stats.st_misses++;
845 
846 	return (error);
847 }
848 
849 /*ARGSUSED*/
850 static int
851 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
852 	caller_context_t *ct)
853 {
854 	struct cnode *cp = VTOC(vp);
855 	fscache_t *fscp = C_TO_FSCACHE(cp);
856 	int error = 0;
857 	u_offset_t off;
858 	caddr_t base;
859 	uint_t bsize;
860 	uint_t flags;
861 	int n, on;
862 	rlim64_t limit = uiop->uio_llimit;
863 	ssize_t resid;
864 	offset_t offset;
865 	offset_t remainder;
866 
867 #ifdef CFSDEBUG
868 	CFS_DEBUG(CFSDEBUG_VOPS)
869 		printf(
870 		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
871 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid,
872 		    cp->c_flags);
873 #endif
874 	if (getzoneid() != GLOBAL_ZONEID) {
875 		error = EPERM;
876 		goto out;
877 	}
878 	if (vp->v_type != VREG) {
879 		error = EISDIR;
880 		goto out;
881 	}
882 
883 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
884 
885 	if (uiop->uio_resid == 0) {
886 		goto out;
887 	}
888 
889 	/* Call backfilesystem to write if NFSv4 */
890 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
891 		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
892 		goto out2;
893 	}
894 
895 	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
896 		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
897 		    uiop->uio_resid, uiop->uio_fmode, ct);
898 		if (error)
899 			goto out;
900 	}
901 
902 	if (ioflag & FAPPEND) {
903 		for (;;) {
904 			/* do consistency check to get correct file size */
905 			error = cachefs_cd_access(fscp, 0, 1);
906 			if (error)
907 				goto out;
908 			mutex_enter(&cp->c_statelock);
909 			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
910 			uiop->uio_loffset = cp->c_size;
911 			mutex_exit(&cp->c_statelock);
912 			if (CFS_TIMEOUT(fscp, error)) {
913 				cachefs_cd_release(fscp);
914 				cachefs_cd_timedout(fscp);
915 				continue;
916 			}
917 			cachefs_cd_release(fscp);
918 			if (error)
919 				goto out;
920 			break;
921 		}
922 	}
923 
924 	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
925 		limit = MAXOFFSET_T;
926 
927 	if (uiop->uio_loffset >= limit) {
928 		proc_t *p = ttoproc(curthread);
929 
930 		mutex_enter(&p->p_lock);
931 		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
932 		    p, RCA_UNSAFE_SIGINFO);
933 		mutex_exit(&p->p_lock);
934 		error = EFBIG;
935 		goto out;
936 	}
937 	if (uiop->uio_loffset > fscp->fs_offmax) {
938 		error = EFBIG;
939 		goto out;
940 	}
941 
942 	if (limit > fscp->fs_offmax)
943 		limit = fscp->fs_offmax;
944 
945 	if (uiop->uio_loffset < (offset_t)0) {
946 		error = EINVAL;
947 		goto out;
948 	}
949 
950 	offset = uiop->uio_loffset + uiop->uio_resid;
951 	/*
952 	 * Check to make sure that the process will not exceed
953 	 * its limit on file size.  It is okay to write up to
954 	 * the limit, but not beyond.  Thus, the write which
955 	 * reaches the limit will be short and the next write
956 	 * will return an error.
957 	 */
958 	remainder = 0;
959 	if (offset > limit) {
960 		remainder = (int)(offset - (u_offset_t)limit);
961 		uiop->uio_resid = limit - uiop->uio_loffset;
962 		if (uiop->uio_resid <= 0) {
963 			proc_t *p = ttoproc(curthread);
964 
965 			uiop->uio_resid += remainder;
966 			mutex_enter(&p->p_lock);
967 			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
968 			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
969 			mutex_exit(&p->p_lock);
970 			error = EFBIG;
971 			goto out;
972 		}
973 	}
974 
975 	resid = uiop->uio_resid;
976 	offset = uiop->uio_loffset;
977 	bsize = vp->v_vfsp->vfs_bsize;
978 
979 	/* loop around and do the write in MAXBSIZE chunks */
980 	do {
981 		/* mapping offset */
982 		off = uiop->uio_loffset & (offset_t)MAXBMASK;
983 		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
984 		n = MAXBSIZE - on;
985 		if (n > uiop->uio_resid)
986 			n = (int)uiop->uio_resid;
987 
988 		/*
989 		 * Touch the page and fault it in if it is not in
990 		 * core before segmap_getmapflt can lock it. This
991 		 * is to avoid the deadlock if the buffer is mapped
992 		 * to the same file through mmap which we want to
993 		 * write to.
994 		 */
995 		uio_prefaultpages((long)n, uiop);
996 
997 		base = segmap_getmap(segkmap, vp, off);
998 		error = cachefs_writepage(vp, (base + on), n, uiop);
999 		if (error == 0) {
1000 			flags = 0;
1001 			/*
1002 			 * Have written a whole block.Start an
1003 			 * asynchronous write and mark the buffer to
1004 			 * indicate that it won't be needed again
1005 			 * soon.
1006 			 */
1007 			if (n + on == bsize) {
1008 				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1009 			}
1010 #if 0
1011 			/* XXX need to understand this */
1012 			if ((ioflag & (FSYNC|FDSYNC)) ||
1013 			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1014 				flags &= ~SM_ASYNC;
1015 				flags |= SM_WRITE;
1016 			}
1017 #else
1018 			if (ioflag & (FSYNC|FDSYNC)) {
1019 				flags &= ~SM_ASYNC;
1020 				flags |= SM_WRITE;
1021 			}
1022 #endif
1023 			error = segmap_release(segkmap, base, flags);
1024 		} else {
1025 			(void) segmap_release(segkmap, base, 0);
1026 		}
1027 	} while (error == 0 && uiop->uio_resid > 0);
1028 
1029 out:
1030 	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1031 		uiop->uio_resid = resid;
1032 		uiop->uio_loffset = offset;
1033 	} else
1034 		uiop->uio_resid += remainder;
1035 
1036 out2:
1037 #ifdef CFSDEBUG
1038 	CFS_DEBUG(CFSDEBUG_VOPS)
1039 		printf("cachefs_write: EXIT error %d\n", error);
1040 #endif
1041 	return (error);
1042 }
1043 
1044 /*
1045  * cachefs_write_backfs_nfsv4
1046  *
1047  * Call NFSv4 back filesystem to handle the write (cachefs
1048  * pass-through support for NFSv4).
1049  */
1050 static int
1051 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1052 			caller_context_t *ct)
1053 {
1054 	cnode_t *cp = VTOC(vp);
1055 	fscache_t *fscp = C_TO_FSCACHE(cp);
1056 	vnode_t *backvp;
1057 	int error;
1058 
1059 	/*
1060 	 * For NFSv4 pass-through to work, only connected operation
1061 	 * is supported, the cnode backvp must exist, and cachefs
1062 	 * optional (eg., disconnectable) flags are turned off. Assert
1063 	 * these conditions for the read operation.
1064 	 */
1065 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1066 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1067 
1068 	/* Call backfs vnode op after extracting the backvp */
1069 	mutex_enter(&cp->c_statelock);
1070 	backvp = cp->c_backvp;
1071 	mutex_exit(&cp->c_statelock);
1072 
1073 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1074 	    "backvp %p\n", cp, backvp));
1075 	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1076 	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1077 	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1078 
1079 	return (error);
1080 }
1081 
1082 /*
1083  * see if we've charged ourselves for frontfile data at
1084  * the given offset.  If not, allocate a block for it now.
1085  */
1086 static int
1087 cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1088 {
1089 	u_offset_t blockoff;
1090 	int error;
1091 	int inc;
1092 
1093 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1094 	/*LINTED*/
1095 	ASSERT(PAGESIZE <= MAXBSIZE);
1096 
1097 	error = 0;
1098 	blockoff = offset & (offset_t)MAXBMASK;
1099 
1100 	/* get the front file if necessary so allocblocks works */
1101 	if ((cp->c_frontvp == NULL) &&
1102 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1103 		(void) cachefs_getfrontfile(cp);
1104 	}
1105 	if (cp->c_flags & CN_NOCACHE)
1106 		return (1);
1107 
1108 	if (cachefs_check_allocmap(cp, blockoff))
1109 		return (0);
1110 
1111 	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1112 		if (cachefs_check_allocmap(cp, blockoff+inc))
1113 			return (0);
1114 
1115 	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1116 	    cp->c_metadata.md_rltype);
1117 	if (error == 0) {
1118 		cp->c_metadata.md_frontblks++;
1119 		cp->c_flags |= CN_UPDATED;
1120 	}
1121 	return (error);
1122 }
1123 
1124 /*
1125  * Called only by cachefs_write to write 1 page or less of data.
1126  *	base   - base address kernel addr space
1127  *	tcount - Total bytes to move - < MAXBSIZE
1128  */
1129 static int
1130 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1131 {
1132 	struct cnode *cp =  VTOC(vp);
1133 	fscache_t *fscp = C_TO_FSCACHE(cp);
1134 	register int n;
1135 	register u_offset_t offset;
1136 	int error = 0, terror;
1137 	extern struct as kas;
1138 	u_offset_t lastpage_off;
1139 	int pagecreate = 0;
1140 	int newpage;
1141 
1142 #ifdef CFSDEBUG
1143 	CFS_DEBUG(CFSDEBUG_VOPS)
1144 		printf(
1145 		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1146 		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1147 #endif
1148 
1149 	/*
1150 	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1151 	 * uiomove() because page faults may cause the cache to be invalidated
1152 	 * out from under us.
1153 	 */
1154 	do {
1155 		offset = uiop->uio_loffset;
1156 		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1157 
1158 		/*
1159 		 * If not connected then need to make sure we have space
1160 		 * to perform the write.  We could make this check
1161 		 * a little tighter by only doing it if we are growing the file.
1162 		 */
1163 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1164 			error = cachefs_allocblocks(fscp->fs_cache, 1,
1165 			    cp->c_metadata.md_rltype);
1166 			if (error)
1167 				break;
1168 			cachefs_freeblocks(fscp->fs_cache, 1,
1169 			    cp->c_metadata.md_rltype);
1170 		}
1171 
1172 		/*
1173 		 * n is the number of bytes required to satisfy the request
1174 		 * or the number of bytes to fill out the page.
1175 		 */
1176 		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1177 		if (n > tcount)
1178 			n = tcount;
1179 
1180 		/*
1181 		 * The number of bytes of data in the last page can not
1182 		 * be accurately be determined while page is being
1183 		 * uiomove'd to and the size of the file being updated.
1184 		 * Thus, inform threads which need to know accurately
1185 		 * how much data is in the last page of the file.  They
1186 		 * will not do the i/o immediately, but will arrange for
1187 		 * the i/o to happen later when this modify operation
1188 		 * will have finished.
1189 		 *
1190 		 * in similar NFS code, this is done right before the
1191 		 * uiomove(), which is best.  but here in cachefs, we
1192 		 * have two uiomove()s, so we must do it here.
1193 		 */
1194 		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1195 		mutex_enter(&cp->c_statelock);
1196 		cp->c_flags |= CN_CMODINPROG;
1197 		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1198 		mutex_exit(&cp->c_statelock);
1199 
1200 		/*
1201 		 * Check to see if we can skip reading in the page
1202 		 * and just allocate the memory.  We can do this
1203 		 * if we are going to rewrite the entire mapping
1204 		 * or if we are going to write to or beyond the current
1205 		 * end of file from the beginning of the mapping.
1206 		 */
1207 		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1208 		    ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1209 		    ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1210 		    ((offset + n) >= cp->c_size))) {
1211 			pagecreate = 1;
1212 
1213 			/*
1214 			 * segmap_pagecreate() returns 1 if it calls
1215 			 * page_create_va() to allocate any pages.
1216 			 */
1217 			newpage = segmap_pagecreate(segkmap,
1218 			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1219 			    PAGESIZE, 0);
1220 			/* do not zero page if we are overwriting all of it */
1221 			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1222 			    (n == PAGESIZE))) {
1223 				(void) kzero((void *)
1224 				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1225 				    PAGESIZE);
1226 			}
1227 			error = uiomove(base, n, UIO_WRITE, uiop);
1228 
1229 			/*
1230 			 * Unlock the page allocated by page_create_va()
1231 			 * in segmap_pagecreate()
1232 			 */
1233 			if (newpage)
1234 				segmap_pageunlock(segkmap,
1235 				    (caddr_t)((uintptr_t)base &
1236 				    (uintptr_t)PAGEMASK),
1237 				    PAGESIZE, S_WRITE);
1238 		} else {
1239 			/*
1240 			 * KLUDGE ! Use segmap_fault instead of faulting and
1241 			 * using as_fault() to avoid a recursive readers lock
1242 			 * on kas.
1243 			 */
1244 			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1245 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1246 			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1247 			if (error) {
1248 				if (FC_CODE(error) == FC_OBJERR)
1249 					error =  FC_ERRNO(error);
1250 				else
1251 					error = EIO;
1252 				break;
1253 			}
1254 			error = uiomove(base, n, UIO_WRITE, uiop);
1255 			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1256 			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1257 			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1258 		}
1259 		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1260 		base += n;
1261 		tcount -= n;
1262 
1263 		/* get access to the file system */
1264 		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1265 			error = terror;
1266 			break;
1267 		}
1268 
1269 		/*
1270 		 * cp->c_attr.va_size is the maximum number of
1271 		 * bytes known to be in the file.
1272 		 * Make sure it is at least as high as the
1273 		 * last byte we just wrote into the buffer.
1274 		 */
1275 		mutex_enter(&cp->c_statelock);
1276 		if (cp->c_size < uiop->uio_loffset) {
1277 			cp->c_size = uiop->uio_loffset;
1278 		}
1279 		if (cp->c_size != cp->c_attr.va_size) {
1280 			cp->c_attr.va_size = cp->c_size;
1281 			cp->c_flags |= CN_UPDATED;
1282 		}
1283 		/* c_size is now correct, so we can clear modinprog */
1284 		cp->c_flags &= ~CN_CMODINPROG;
1285 		if (error == 0) {
1286 			cp->c_flags |= CDIRTY;
1287 			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1288 				/*
1289 				 * if we're not in NOCACHE mode
1290 				 * (i.e., single-writer), we update the
1291 				 * allocmap here rather than waiting until
1292 				 * cachefspush is called.  This prevents
1293 				 * getpage from clustering up pages from
1294 				 * the backfile and stomping over the changes
1295 				 * we make here.
1296 				 */
1297 				if (cachefs_charge_page(cp, offset) == 0) {
1298 					cachefs_update_allocmap(cp,
1299 					    offset & (offset_t)PAGEMASK,
1300 					    (size_t)PAGESIZE);
1301 				}
1302 
1303 				/* else we ran out of space */
1304 				else {
1305 					/* nocache file if connected */
1306 					if (fscp->fs_cdconnected ==
1307 					    CFS_CD_CONNECTED)
1308 						cachefs_nocache(cp);
1309 					/*
1310 					 * If disconnected then cannot
1311 					 * nocache the file.  Let it have
1312 					 * the space.
1313 					 */
1314 					else {
1315 						cp->c_metadata.md_frontblks++;
1316 						cp->c_flags |= CN_UPDATED;
1317 						cachefs_update_allocmap(cp,
1318 						    offset & (offset_t)PAGEMASK,
1319 						    (size_t)PAGESIZE);
1320 					}
1321 				}
1322 			}
1323 		}
1324 		mutex_exit(&cp->c_statelock);
1325 		cachefs_cd_release(fscp);
1326 	} while (tcount > 0 && error == 0);
1327 
1328 	if (cp->c_flags & CN_CMODINPROG) {
1329 		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1330 		mutex_enter(&cp->c_statelock);
1331 		cp->c_flags &= ~CN_CMODINPROG;
1332 		mutex_exit(&cp->c_statelock);
1333 	}
1334 
1335 #ifdef CFS_CD_DEBUG
1336 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1337 #endif
1338 
1339 #ifdef CFSDEBUG
1340 	CFS_DEBUG(CFSDEBUG_VOPS)
1341 		printf("cachefs_writepage: EXIT error %d\n", error);
1342 #endif
1343 
1344 	return (error);
1345 }
1346 
1347 /*
1348  * Pushes out pages to the back and/or front file system.
1349  */
1350 static int
1351 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1352     int flags, cred_t *cr)
1353 {
1354 	struct cnode *cp = VTOC(vp);
1355 	struct buf *bp;
1356 	int error;
1357 	fscache_t *fscp = C_TO_FSCACHE(cp);
1358 	u_offset_t iooff;
1359 	size_t iolen;
1360 	u_offset_t lbn;
1361 	u_offset_t lbn_off;
1362 	uint_t bsize;
1363 
1364 	ASSERT((flags & B_ASYNC) == 0);
1365 	ASSERT(!vn_is_readonly(vp));
1366 	ASSERT(pp != NULL);
1367 	ASSERT(cr != NULL);
1368 
1369 	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1370 	lbn = pp->p_offset / bsize;
1371 	lbn_off = lbn * bsize;
1372 
1373 	/*
1374 	 * Find a kluster that fits in one block, or in
1375 	 * one page if pages are bigger than blocks.  If
1376 	 * there is less file space allocated than a whole
1377 	 * page, we'll shorten the i/o request below.
1378 	 */
1379 
1380 	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1381 	    roundup(bsize, PAGESIZE), flags);
1382 
1383 	/*
1384 	 * The CN_CMODINPROG flag makes sure that we use a correct
1385 	 * value of c_size, below.  CN_CMODINPROG is set in
1386 	 * cachefs_writepage().  When CN_CMODINPROG is set it
1387 	 * indicates that a uiomove() is in progress and the c_size
1388 	 * has not been made consistent with the new size of the
1389 	 * file. When the uiomove() completes the c_size is updated
1390 	 * and the CN_CMODINPROG flag is cleared.
1391 	 *
1392 	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1393 	 * and cachefs_push_connected see a consistent value of
1394 	 * c_size.  Without this handshaking, it is possible that
1395 	 * these routines will pick up the old value of c_size before
1396 	 * the uiomove() in cachefs_writepage() completes.  This will
1397 	 * result in the vn_rdwr() being too small, and data loss.
1398 	 *
1399 	 * More precisely, there is a window between the time the
1400 	 * uiomove() completes and the time the c_size is updated. If
1401 	 * a VOP_PUTPAGE() operation intervenes in this window, the
1402 	 * page will be picked up, because it is dirty; it will be
1403 	 * unlocked, unless it was pagecreate'd. When the page is
1404 	 * picked up as dirty, the dirty bit is reset
1405 	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1406 	 * checked.  This will still be the old size.  Therefore, the
1407 	 * page will not be written out to the correct length, and the
1408 	 * page will be clean, so the data may disappear.
1409 	 */
1410 	if (cp->c_flags & CN_CMODINPROG) {
1411 		mutex_enter(&cp->c_statelock);
1412 		if ((cp->c_flags & CN_CMODINPROG) &&
1413 		    cp->c_modaddr + MAXBSIZE > iooff &&
1414 		    cp->c_modaddr < iooff + iolen) {
1415 			page_t *plist;
1416 
1417 			/*
1418 			 * A write is in progress for this region of
1419 			 * the file.  If we did not detect
1420 			 * CN_CMODINPROG here then this path through
1421 			 * cachefs_push_connected() would eventually
1422 			 * do the vn_rdwr() and may not write out all
1423 			 * of the data in the pages.  We end up losing
1424 			 * data. So we decide to set the modified bit
1425 			 * on each page in the page list and mark the
1426 			 * cnode with CDIRTY.  This push will be
1427 			 * restarted at some later time.
1428 			 */
1429 
1430 			plist = pp;
1431 			while (plist != NULL) {
1432 				pp = plist;
1433 				page_sub(&plist, pp);
1434 				hat_setmod(pp);
1435 				page_io_unlock(pp);
1436 				page_unlock(pp);
1437 			}
1438 			cp->c_flags |= CDIRTY;
1439 			mutex_exit(&cp->c_statelock);
1440 			if (offp)
1441 				*offp = iooff;
1442 			if (lenp)
1443 				*lenp = iolen;
1444 			return (0);
1445 		}
1446 		mutex_exit(&cp->c_statelock);
1447 	}
1448 
1449 	/*
1450 	 * Set the pages up for pageout.
1451 	 */
1452 	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1453 	if (bp == NULL) {
1454 
1455 		/*
1456 		 * currently, there is no way for pageio_setup() to
1457 		 * return NULL, since it uses its own scheme for
1458 		 * kmem_alloc()ing that shouldn't return NULL, and
1459 		 * since pageio_setup() itself dereferences the thing
1460 		 * it's about to return.  still, we need to be ready
1461 		 * in case this ever does start happening.
1462 		 */
1463 
1464 		error = ENOMEM;
1465 		goto writedone;
1466 	}
1467 	/*
1468 	 * pageio_setup should have set b_addr to 0.  This
1469 	 * is correct since we want to do I/O on a page
1470 	 * boundary.  bp_mapin will use this addr to calculate
1471 	 * an offset, and then set b_addr to the kernel virtual
1472 	 * address it allocated for us.
1473 	 */
1474 	bp->b_edev = 0;
1475 	bp->b_dev = 0;
1476 	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1477 	bp_mapin(bp);
1478 
1479 	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1480 	if (iolen > bp->b_bcount)
1481 		iolen  = bp->b_bcount;
1482 
1483 	/* if connected */
1484 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1485 		/* write to the back file first */
1486 		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1487 
1488 		/* write to the front file if allowed */
1489 		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1490 		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1491 			/* try to write to the front file */
1492 			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1493 		}
1494 	}
1495 
1496 	/* else if disconnected */
1497 	else {
1498 		/* try to write to the front file */
1499 		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1500 	}
1501 
1502 	bp_mapout(bp);
1503 	pageio_done(bp);
1504 
1505 writedone:
1506 
1507 	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1508 	if (offp)
1509 		*offp = iooff;
1510 	if (lenp)
1511 		*lenp = iolen;
1512 
1513 	/* XXX ask bob mastors how to fix this someday */
1514 	mutex_enter(&cp->c_statelock);
1515 	if (error) {
1516 		if (error == ENOSPC) {
1517 			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1518 			    CFS_ISFS_SOFT(fscp)) {
1519 				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1520 				cp->c_error = error;
1521 			}
1522 		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1523 		    (error != EINTR)) {
1524 			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1525 			cp->c_error = error;
1526 		}
1527 	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1528 		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1529 	}
1530 	mutex_exit(&cp->c_statelock);
1531 
1532 	return (error);
1533 }
1534 
1535 /*
1536  * Pushes out pages to the back file system.
1537  */
1538 static int
1539 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1540     u_offset_t iooff, cred_t *cr)
1541 {
1542 	struct cnode *cp = VTOC(vp);
1543 	int error = 0;
1544 	int mode = 0;
1545 	fscache_t *fscp = C_TO_FSCACHE(cp);
1546 	ssize_t resid;
1547 	vnode_t *backvp;
1548 
1549 	/* get the back file if necessary */
1550 	mutex_enter(&cp->c_statelock);
1551 	if (cp->c_backvp == NULL) {
1552 		error = cachefs_getbackvp(fscp, cp);
1553 		if (error) {
1554 			mutex_exit(&cp->c_statelock);
1555 			goto out;
1556 		}
1557 	}
1558 	backvp = cp->c_backvp;
1559 	VN_HOLD(backvp);
1560 	mutex_exit(&cp->c_statelock);
1561 
1562 	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1563 		mode = FSYNC;
1564 
1565 	/* write to the back file */
1566 	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1567 	    iolen, iooff, UIO_SYSSPACE, mode,
1568 	    RLIM64_INFINITY, cr, &resid);
1569 	if (error) {
1570 #ifdef CFSDEBUG
1571 		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1572 			printf("cachefspush: error %d cr %p\n",
1573 			    error, (void *)cr);
1574 #endif
1575 		bp->b_flags |= B_ERROR;
1576 	}
1577 	VN_RELE(backvp);
1578 out:
1579 	return (error);
1580 }
1581 
1582 /*
1583  * Pushes out pages to the front file system.
1584  * Called for both connected and disconnected states.
1585  */
1586 static int
1587 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1588     u_offset_t iooff, cred_t *cr)
1589 {
1590 	struct cnode *cp = VTOC(vp);
1591 	fscache_t *fscp = C_TO_FSCACHE(cp);
1592 	int error = 0;
1593 	ssize_t resid;
1594 	u_offset_t popoff;
1595 	off_t commit = 0;
1596 	uint_t seq;
1597 	enum cachefs_rl_type type;
1598 	vnode_t *frontvp = NULL;
1599 
1600 	mutex_enter(&cp->c_statelock);
1601 
1602 	if (!CFS_ISFS_NONSHARED(fscp)) {
1603 		error = ETIMEDOUT;
1604 		goto out;
1605 	}
1606 
1607 	/* get the front file if necessary */
1608 	if ((cp->c_frontvp == NULL) &&
1609 	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1610 		(void) cachefs_getfrontfile(cp);
1611 	}
1612 	if (cp->c_flags & CN_NOCACHE) {
1613 		error = ETIMEDOUT;
1614 		goto out;
1615 	}
1616 
1617 	/* if disconnected, needs to be populated and have good attributes */
1618 	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1619 	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1620 	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1621 		error = ETIMEDOUT;
1622 		goto out;
1623 	}
1624 
1625 	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1626 		if (cachefs_charge_page(cp, popoff)) {
1627 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1628 				cachefs_nocache(cp);
1629 				goto out;
1630 			} else {
1631 				error = ENOSPC;
1632 				goto out;
1633 			}
1634 		}
1635 	}
1636 
1637 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1638 		/* log the first putpage to a file */
1639 		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1640 			/* uses open's creds if we have them */
1641 			if (cp->c_cred)
1642 				cr = cp->c_cred;
1643 
1644 			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1645 				error = cachefs_dlog_cidmap(fscp);
1646 				if (error) {
1647 					error = ENOSPC;
1648 					goto out;
1649 				}
1650 				cp->c_metadata.md_flags |= MD_MAPPING;
1651 			}
1652 
1653 			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1654 			if (commit == 0) {
1655 				/* out of space */
1656 				error = ENOSPC;
1657 				goto out;
1658 			}
1659 
1660 			cp->c_metadata.md_seq = seq;
1661 			type = cp->c_metadata.md_rltype;
1662 			cachefs_modified(cp);
1663 			cp->c_metadata.md_flags |= MD_PUTPAGE;
1664 			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1665 			cp->c_flags |= CN_UPDATED;
1666 		}
1667 
1668 		/* subsequent putpages just get a new sequence number */
1669 		else {
1670 			/* but only if it matters */
1671 			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1672 				seq = cachefs_dlog_seqnext(fscp);
1673 				if (seq == 0) {
1674 					error = ENOSPC;
1675 					goto out;
1676 				}
1677 				cp->c_metadata.md_seq = seq;
1678 				cp->c_flags |= CN_UPDATED;
1679 				/* XXX maybe should do write_metadata here */
1680 			}
1681 		}
1682 	}
1683 
1684 	frontvp = cp->c_frontvp;
1685 	VN_HOLD(frontvp);
1686 	mutex_exit(&cp->c_statelock);
1687 	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1688 	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1689 	    RLIM64_INFINITY, kcred, &resid);
1690 	mutex_enter(&cp->c_statelock);
1691 	VN_RELE(frontvp);
1692 	frontvp = NULL;
1693 	if (error) {
1694 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1695 			cachefs_nocache(cp);
1696 			error = 0;
1697 			goto out;
1698 		} else {
1699 			goto out;
1700 		}
1701 	}
1702 
1703 	(void) cachefs_update_allocmap(cp, iooff, iolen);
1704 	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1705 	    CN_POPULATION_PENDING);
1706 	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1707 		gethrestime(&cp->c_metadata.md_localmtime);
1708 		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1709 	}
1710 
1711 out:
1712 	if (commit) {
1713 		/* commit the log record */
1714 		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1715 		if (cachefs_dlog_commit(fscp, commit, error)) {
1716 			/*EMPTY*/
1717 			/* XXX fix on panic */
1718 		}
1719 	}
1720 
1721 	if (error && commit) {
1722 		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1723 		cachefs_rlent_moveto(fscp->fs_cache, type,
1724 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1725 		cp->c_metadata.md_rltype = type;
1726 		cp->c_flags |= CN_UPDATED;
1727 	}
1728 	mutex_exit(&cp->c_statelock);
1729 	return (error);
1730 }
1731 
1732 /*ARGSUSED*/
1733 static int
1734 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1735     caller_context_t *ct)
1736 {
1737 	return (ENOSYS); /* should we panic if we get here? */
1738 }
1739 
1740 /*ARGSUSED*/
1741 static int
1742 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1743 	int *rvalp, caller_context_t *ct)
1744 {
1745 	int error;
1746 	struct cnode *cp = VTOC(vp);
1747 	struct fscache *fscp = C_TO_FSCACHE(cp);
1748 	struct cachefscache *cachep;
1749 	extern kmutex_t cachefs_cachelock;
1750 	extern cachefscache_t *cachefs_cachelist;
1751 	cachefsio_pack_t *packp;
1752 	STRUCT_DECL(cachefsio_dcmd, dcmd);
1753 	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1754 	void *dinp, *doutp;
1755 	int (*dcmd_routine)(vnode_t *, void *, void *);
1756 
1757 	if (getzoneid() != GLOBAL_ZONEID)
1758 		return (EPERM);
1759 
1760 	/*
1761 	 * Cachefs only provides pass-through support for NFSv4,
1762 	 * and all vnode operations are passed through to the
1763 	 * back file system. For NFSv4 pass-through to work, only
1764 	 * connected operation is supported, the cnode backvp must
1765 	 * exist, and cachefs optional (eg., disconnectable) flags
1766 	 * are turned off. Assert these conditions which ensure
1767 	 * that only a subset of the ioctls are "truly supported"
1768 	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1769 	 * The packing operations are meaningless since there is
1770 	 * no caching for NFSv4, and the called functions silently
1771 	 * return if the backfilesystem is NFSv4. The daemon
1772 	 * commands except for those above are essentially used
1773 	 * for disconnectable operation support (including log
1774 	 * rolling), so in each called function, we assert that
1775 	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1776 	 * are from "cfsfstype" which is not a documented
1777 	 * command. However, the command is visible in
1778 	 * /usr/lib/fs/cachefs so the commands are simply let
1779 	 * through (don't seem to impact pass-through functionality).
1780 	 */
1781 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1782 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1783 
1784 	switch (cmd) {
1785 	case CACHEFSIO_PACK:
1786 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1787 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1788 		if (!error)
1789 			error = cachefs_pack(vp, packp->p_name, cred);
1790 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1791 		break;
1792 
1793 	case CACHEFSIO_UNPACK:
1794 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1795 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1796 		if (!error)
1797 			error = cachefs_unpack(vp, packp->p_name, cred);
1798 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1799 		break;
1800 
1801 	case CACHEFSIO_PACKINFO:
1802 		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1803 		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1804 		if (!error)
1805 			error = cachefs_packinfo(vp, packp->p_name,
1806 			    &packp->p_status, cred);
1807 		if (!error)
1808 			error = xcopyout(packp, (void *)arg,
1809 			    sizeof (cachefsio_pack_t));
1810 		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1811 		break;
1812 
1813 	case CACHEFSIO_UNPACKALL:
1814 		error = cachefs_unpackall(vp);
1815 		break;
1816 
1817 	case CACHEFSIO_DCMD:
1818 		/*
1819 		 * This is a private interface between the cachefsd and
1820 		 * this file system.
1821 		 */
1822 
1823 		/* must be root to use these commands */
1824 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1825 			return (EPERM);
1826 
1827 		/* get the command packet */
1828 		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1829 		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1830 		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1831 		if (error)
1832 			return (error);
1833 
1834 		/* copy in the data for the operation */
1835 		dinp = NULL;
1836 		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1837 			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1838 			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1839 			    inlen);
1840 			if (error)
1841 				return (error);
1842 		}
1843 
1844 		/* allocate space for the result */
1845 		doutp = NULL;
1846 		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1847 			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1848 
1849 		/*
1850 		 * Assert NFSv4 only allows the daemonid and getstats
1851 		 * daemon requests
1852 		 */
1853 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1854 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1855 		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1856 
1857 		/* get the routine to execute */
1858 		dcmd_routine = NULL;
1859 		switch (STRUCT_FGET(dcmd, d_cmd)) {
1860 		case CFSDCMD_DAEMONID:
1861 			dcmd_routine = cachefs_io_daemonid;
1862 			break;
1863 		case CFSDCMD_STATEGET:
1864 			dcmd_routine = cachefs_io_stateget;
1865 			break;
1866 		case CFSDCMD_STATESET:
1867 			dcmd_routine = cachefs_io_stateset;
1868 			break;
1869 		case CFSDCMD_XWAIT:
1870 			dcmd_routine = cachefs_io_xwait;
1871 			break;
1872 		case CFSDCMD_EXISTS:
1873 			dcmd_routine = cachefs_io_exists;
1874 			break;
1875 		case CFSDCMD_LOSTFOUND:
1876 			dcmd_routine = cachefs_io_lostfound;
1877 			break;
1878 		case CFSDCMD_GETINFO:
1879 			dcmd_routine = cachefs_io_getinfo;
1880 			break;
1881 		case CFSDCMD_CIDTOFID:
1882 			dcmd_routine = cachefs_io_cidtofid;
1883 			break;
1884 		case CFSDCMD_GETATTRFID:
1885 			dcmd_routine = cachefs_io_getattrfid;
1886 			break;
1887 		case CFSDCMD_GETATTRNAME:
1888 			dcmd_routine = cachefs_io_getattrname;
1889 			break;
1890 		case CFSDCMD_GETSTATS:
1891 			dcmd_routine = cachefs_io_getstats;
1892 			break;
1893 		case CFSDCMD_ROOTFID:
1894 			dcmd_routine = cachefs_io_rootfid;
1895 			break;
1896 		case CFSDCMD_CREATE:
1897 			dcmd_routine = cachefs_io_create;
1898 			break;
1899 		case CFSDCMD_REMOVE:
1900 			dcmd_routine = cachefs_io_remove;
1901 			break;
1902 		case CFSDCMD_LINK:
1903 			dcmd_routine = cachefs_io_link;
1904 			break;
1905 		case CFSDCMD_RENAME:
1906 			dcmd_routine = cachefs_io_rename;
1907 			break;
1908 		case CFSDCMD_MKDIR:
1909 			dcmd_routine = cachefs_io_mkdir;
1910 			break;
1911 		case CFSDCMD_RMDIR:
1912 			dcmd_routine = cachefs_io_rmdir;
1913 			break;
1914 		case CFSDCMD_SYMLINK:
1915 			dcmd_routine = cachefs_io_symlink;
1916 			break;
1917 		case CFSDCMD_SETATTR:
1918 			dcmd_routine = cachefs_io_setattr;
1919 			break;
1920 		case CFSDCMD_SETSECATTR:
1921 			dcmd_routine = cachefs_io_setsecattr;
1922 			break;
1923 		case CFSDCMD_PUSHBACK:
1924 			dcmd_routine = cachefs_io_pushback;
1925 			break;
1926 		default:
1927 			error = ENOTTY;
1928 			break;
1929 		}
1930 
1931 		/* execute the routine */
1932 		if (dcmd_routine)
1933 			error = (*dcmd_routine)(vp, dinp, doutp);
1934 
1935 		/* copy out the result */
1936 		if ((error == 0) && doutp)
1937 			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1938 			    outlen);
1939 
1940 		/* free allocated memory */
1941 		if (dinp)
1942 			cachefs_kmem_free(dinp, inlen);
1943 		if (doutp)
1944 			cachefs_kmem_free(doutp, outlen);
1945 
1946 		break;
1947 
1948 	case _FIOCOD:
1949 		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1950 			error = EPERM;
1951 			break;
1952 		}
1953 
1954 		error = EBUSY;
1955 		if (arg) {
1956 			/* non-zero arg means do all filesystems */
1957 			mutex_enter(&cachefs_cachelock);
1958 			for (cachep = cachefs_cachelist; cachep != NULL;
1959 			    cachep = cachep->c_next) {
1960 				mutex_enter(&cachep->c_fslistlock);
1961 				for (fscp = cachep->c_fslist;
1962 				    fscp != NULL;
1963 				    fscp = fscp->fs_next) {
1964 					if (CFS_ISFS_CODCONST(fscp)) {
1965 						gethrestime(&fscp->fs_cod_time);
1966 						error = 0;
1967 					}
1968 				}
1969 				mutex_exit(&cachep->c_fslistlock);
1970 			}
1971 			mutex_exit(&cachefs_cachelock);
1972 		} else {
1973 			if (CFS_ISFS_CODCONST(fscp)) {
1974 				gethrestime(&fscp->fs_cod_time);
1975 				error = 0;
1976 			}
1977 		}
1978 		break;
1979 
1980 	case _FIOSTOPCACHE:
1981 		error = cachefs_stop_cache(cp);
1982 		break;
1983 
1984 	default:
1985 		error = ENOTTY;
1986 		break;
1987 	}
1988 
1989 	/* return the result */
1990 	return (error);
1991 }
1992 
1993 ino64_t
1994 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1995 {
1996 	ino64_t new;
1997 
1998 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1999 
2000 	for (;;) {
2001 		fscp->fs_info.fi_localfileno++;
2002 		if (fscp->fs_info.fi_localfileno == 0)
2003 			fscp->fs_info.fi_localfileno = 3;
2004 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
2005 
2006 		new = fscp->fs_info.fi_localfileno;
2007 		if (! cachefs_fileno_inuse(fscp, new))
2008 			break;
2009 	}
2010 
2011 	cachefs_inum_register(fscp, old, new);
2012 	cachefs_inum_register(fscp, new, 0);
2013 	return (new);
2014 }
2015 
2016 /*ARGSUSED*/
2017 static int
2018 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2019 	caller_context_t *ct)
2020 {
2021 	struct cnode *cp = VTOC(vp);
2022 	fscache_t *fscp = C_TO_FSCACHE(cp);
2023 	int error = 0;
2024 	int held = 0;
2025 	int connected = 0;
2026 
2027 #ifdef CFSDEBUG
2028 	CFS_DEBUG(CFSDEBUG_VOPS)
2029 		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2030 #endif
2031 
2032 	if (getzoneid() != GLOBAL_ZONEID)
2033 		return (EPERM);
2034 
2035 	/* Call backfilesystem getattr if NFSv4 */
2036 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2037 		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2038 		goto out;
2039 	}
2040 
2041 	/*
2042 	 * If it has been specified that the return value will
2043 	 * just be used as a hint, and we are only being asked
2044 	 * for size, fsid or rdevid, then return the client's
2045 	 * notion of these values without checking to make sure
2046 	 * that the attribute cache is up to date.
2047 	 * The whole point is to avoid an over the wire GETATTR
2048 	 * call.
2049 	 */
2050 	if (flags & ATTR_HINT) {
2051 		if (vap->va_mask ==
2052 		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2053 			if (vap->va_mask | AT_SIZE)
2054 				vap->va_size = cp->c_size;
2055 			/*
2056 			 * Return the FSID of the cachefs filesystem,
2057 			 * not the back filesystem
2058 			 */
2059 			if (vap->va_mask | AT_FSID)
2060 				vap->va_fsid = vp->v_vfsp->vfs_dev;
2061 			if (vap->va_mask | AT_RDEV)
2062 				vap->va_rdev = cp->c_attr.va_rdev;
2063 			return (0);
2064 		}
2065 	}
2066 
2067 	/*
2068 	 * Only need to flush pages if asking for the mtime
2069 	 * and if there any dirty pages.
2070 	 */
2071 	if (vap->va_mask & AT_MTIME) {
2072 		/*EMPTY*/
2073 #if 0
2074 		/*
2075 		 * XXX bob: stolen from nfs code, need to do something similar
2076 		 */
2077 		rp = VTOR(vp);
2078 		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2079 			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2080 #endif
2081 	}
2082 
2083 	for (;;) {
2084 		/* get (or renew) access to the file system */
2085 		if (held) {
2086 			cachefs_cd_release(fscp);
2087 			held = 0;
2088 		}
2089 		error = cachefs_cd_access(fscp, connected, 0);
2090 		if (error)
2091 			goto out;
2092 		held = 1;
2093 
2094 		/*
2095 		 * If it has been specified that the return value will
2096 		 * just be used as a hint, and we are only being asked
2097 		 * for size, fsid or rdevid, then return the client's
2098 		 * notion of these values without checking to make sure
2099 		 * that the attribute cache is up to date.
2100 		 * The whole point is to avoid an over the wire GETATTR
2101 		 * call.
2102 		 */
2103 		if (flags & ATTR_HINT) {
2104 			if (vap->va_mask ==
2105 			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2106 				if (vap->va_mask | AT_SIZE)
2107 					vap->va_size = cp->c_size;
2108 				/*
2109 				 * Return the FSID of the cachefs filesystem,
2110 				 * not the back filesystem
2111 				 */
2112 				if (vap->va_mask | AT_FSID)
2113 					vap->va_fsid = vp->v_vfsp->vfs_dev;
2114 				if (vap->va_mask | AT_RDEV)
2115 					vap->va_rdev = cp->c_attr.va_rdev;
2116 				goto out;
2117 			}
2118 		}
2119 
2120 		mutex_enter(&cp->c_statelock);
2121 		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2122 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2123 			mutex_exit(&cp->c_statelock);
2124 			connected = 1;
2125 			continue;
2126 		}
2127 
2128 		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2129 		if (CFS_TIMEOUT(fscp, error)) {
2130 			mutex_exit(&cp->c_statelock);
2131 			cachefs_cd_release(fscp);
2132 			held = 0;
2133 			cachefs_cd_timedout(fscp);
2134 			continue;
2135 		}
2136 		if (error) {
2137 			mutex_exit(&cp->c_statelock);
2138 			break;
2139 		}
2140 
2141 		/* check for fileno conflict */
2142 		if ((fscp->fs_inum_size > 0) &&
2143 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2144 			ino64_t fakenum;
2145 
2146 			mutex_exit(&cp->c_statelock);
2147 			mutex_enter(&fscp->fs_fslock);
2148 			fakenum = cachefs_inum_real2fake(fscp,
2149 			    cp->c_attr.va_nodeid);
2150 			if (fakenum == 0) {
2151 				fakenum = cachefs_fileno_conflict(fscp,
2152 				    cp->c_attr.va_nodeid);
2153 			}
2154 			mutex_exit(&fscp->fs_fslock);
2155 
2156 			mutex_enter(&cp->c_statelock);
2157 			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2158 			cp->c_metadata.md_localfileno = fakenum;
2159 			cp->c_flags |= CN_UPDATED;
2160 		}
2161 
2162 		/* copy out the attributes */
2163 		*vap = cp->c_attr;
2164 
2165 		/*
2166 		 * return the FSID of the cachefs filesystem,
2167 		 * not the back filesystem
2168 		 */
2169 		vap->va_fsid = vp->v_vfsp->vfs_dev;
2170 
2171 		/* return our idea of the size */
2172 		if (cp->c_size > vap->va_size)
2173 			vap->va_size = cp->c_size;
2174 
2175 		/* overwrite with our version of fileno and timestamps */
2176 		vap->va_nodeid = cp->c_metadata.md_localfileno;
2177 		vap->va_mtime = cp->c_metadata.md_localmtime;
2178 		vap->va_ctime = cp->c_metadata.md_localctime;
2179 
2180 		mutex_exit(&cp->c_statelock);
2181 		break;
2182 	}
2183 out:
2184 	if (held)
2185 		cachefs_cd_release(fscp);
2186 #ifdef CFS_CD_DEBUG
2187 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2188 #endif
2189 
2190 #ifdef CFSDEBUG
2191 	CFS_DEBUG(CFSDEBUG_VOPS)
2192 		printf("cachefs_getattr: EXIT error = %d\n", error);
2193 #endif
2194 	return (error);
2195 }
2196 
2197 /*
2198  * cachefs_getattr_backfs_nfsv4
2199  *
2200  * Call NFSv4 back filesystem to handle the getattr (cachefs
2201  * pass-through support for NFSv4).
2202  */
2203 static int
2204 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2205     int flags, cred_t *cr, caller_context_t *ct)
2206 {
2207 	cnode_t *cp = VTOC(vp);
2208 	fscache_t *fscp = C_TO_FSCACHE(cp);
2209 	vnode_t *backvp;
2210 	int error;
2211 
2212 	/*
2213 	 * For NFSv4 pass-through to work, only connected operation
2214 	 * is supported, the cnode backvp must exist, and cachefs
2215 	 * optional (eg., disconnectable) flags are turned off. Assert
2216 	 * these conditions for the getattr operation.
2217 	 */
2218 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2219 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2220 
2221 	/* Call backfs vnode op after extracting backvp */
2222 	mutex_enter(&cp->c_statelock);
2223 	backvp = cp->c_backvp;
2224 	mutex_exit(&cp->c_statelock);
2225 
2226 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2227 	    " backvp %p\n", cp, backvp));
2228 	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2229 
2230 	/* Update attributes */
2231 	cp->c_attr = *vap;
2232 
2233 	/*
2234 	 * return the FSID of the cachefs filesystem,
2235 	 * not the back filesystem
2236 	 */
2237 	vap->va_fsid = vp->v_vfsp->vfs_dev;
2238 
2239 	return (error);
2240 }
2241 
2242 /*ARGSUSED4*/
2243 static int
2244 cachefs_setattr(
2245 	vnode_t *vp,
2246 	vattr_t *vap,
2247 	int flags,
2248 	cred_t *cr,
2249 	caller_context_t *ct)
2250 {
2251 	cnode_t *cp = VTOC(vp);
2252 	fscache_t *fscp = C_TO_FSCACHE(cp);
2253 	int error;
2254 	int connected;
2255 	int held = 0;
2256 
2257 	if (getzoneid() != GLOBAL_ZONEID)
2258 		return (EPERM);
2259 
2260 	/*
2261 	 * Cachefs only provides pass-through support for NFSv4,
2262 	 * and all vnode operations are passed through to the
2263 	 * back file system. For NFSv4 pass-through to work, only
2264 	 * connected operation is supported, the cnode backvp must
2265 	 * exist, and cachefs optional (eg., disconnectable) flags
2266 	 * are turned off. Assert these conditions to ensure that
2267 	 * the backfilesystem is called for the setattr operation.
2268 	 */
2269 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2270 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2271 
2272 	connected = 0;
2273 	for (;;) {
2274 		/* drop hold on file system */
2275 		if (held) {
2276 			/* Won't loop with NFSv4 connected behavior */
2277 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2278 			cachefs_cd_release(fscp);
2279 			held = 0;
2280 		}
2281 
2282 		/* acquire access to the file system */
2283 		error = cachefs_cd_access(fscp, connected, 1);
2284 		if (error)
2285 			break;
2286 		held = 1;
2287 
2288 		/* perform the setattr */
2289 		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2290 		if (error) {
2291 			/* if connected */
2292 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2293 				if (CFS_TIMEOUT(fscp, error)) {
2294 					cachefs_cd_release(fscp);
2295 					held = 0;
2296 					cachefs_cd_timedout(fscp);
2297 					connected = 0;
2298 					continue;
2299 				}
2300 			}
2301 
2302 			/* else must be disconnected */
2303 			else {
2304 				if (CFS_TIMEOUT(fscp, error)) {
2305 					connected = 1;
2306 					continue;
2307 				}
2308 			}
2309 		}
2310 		break;
2311 	}
2312 
2313 	if (held) {
2314 		cachefs_cd_release(fscp);
2315 	}
2316 #ifdef CFS_CD_DEBUG
2317 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2318 #endif
2319 	return (error);
2320 }
2321 
2322 static int
2323 cachefs_setattr_common(
2324 	vnode_t *vp,
2325 	vattr_t *vap,
2326 	int flags,
2327 	cred_t *cr,
2328 	caller_context_t *ct)
2329 {
2330 	cnode_t *cp = VTOC(vp);
2331 	fscache_t *fscp = C_TO_FSCACHE(cp);
2332 	cachefscache_t *cachep = fscp->fs_cache;
2333 	uint_t mask = vap->va_mask;
2334 	int error = 0;
2335 	uint_t bcnt;
2336 
2337 	/* Cannot set these attributes. */
2338 	if (mask & AT_NOSET)
2339 		return (EINVAL);
2340 
2341 	/*
2342 	 * Truncate file.  Must have write permission and not be a directory.
2343 	 */
2344 	if (mask & AT_SIZE) {
2345 		if (vp->v_type == VDIR) {
2346 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2347 				cachefs_log_truncate(cachep, EISDIR,
2348 				    fscp->fs_cfsvfsp,
2349 				    &cp->c_metadata.md_cookie,
2350 				    cp->c_id.cid_fileno,
2351 				    crgetuid(cr), vap->va_size);
2352 			return (EISDIR);
2353 		}
2354 	}
2355 
2356 	/*
2357 	 * Gotta deal with one special case here, where we're setting the
2358 	 * size of the file. First, we zero out part of the page after the
2359 	 * new size of the file. Then we toss (not write) all pages after
2360 	 * page in which the new offset occurs. Note that the NULL passed
2361 	 * in instead of a putapage() fn parameter is correct, since
2362 	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2363 	 */
2364 
2365 	rw_enter(&cp->c_rwlock, RW_WRITER);
2366 
2367 	/* sync dirty pages */
2368 	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2369 		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2370 		if (error == EINTR)
2371 			goto out;
2372 	}
2373 	error = 0;
2374 
2375 	/* if connected */
2376 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2377 		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2378 	}
2379 	/* else must be disconnected */
2380 	else {
2381 		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2382 	}
2383 	if (error)
2384 		goto out;
2385 
2386 	/*
2387 	 * If the file size has been changed then
2388 	 * toss whole pages beyond the end of the file and zero
2389 	 * the portion of the last page that is beyond the end of the file.
2390 	 */
2391 	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2392 		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2393 		if (bcnt)
2394 			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2395 		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2396 		    B_TRUNC | B_INVAL, cr);
2397 	}
2398 
2399 out:
2400 	rw_exit(&cp->c_rwlock);
2401 
2402 	if ((mask & AT_SIZE) &&
2403 	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2404 		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2405 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2406 		    crgetuid(cr), vap->va_size);
2407 
2408 	return (error);
2409 }
2410 
2411 static int
2412 cachefs_setattr_connected(
2413 	vnode_t *vp,
2414 	vattr_t *vap,
2415 	int flags,
2416 	cred_t *cr,
2417 	caller_context_t *ct)
2418 {
2419 	cnode_t *cp = VTOC(vp);
2420 	fscache_t *fscp = C_TO_FSCACHE(cp);
2421 	uint_t mask = vap->va_mask;
2422 	int error = 0;
2423 	int setsize;
2424 
2425 	mutex_enter(&cp->c_statelock);
2426 
2427 	if (cp->c_backvp == NULL) {
2428 		error = cachefs_getbackvp(fscp, cp);
2429 		if (error)
2430 			goto out;
2431 	}
2432 
2433 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2434 	if (error)
2435 		goto out;
2436 
2437 	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2438 	    "backvp %p\n", cp, cp->c_backvp));
2439 	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2440 	if (error) {
2441 		goto out;
2442 	}
2443 
2444 	/* if the size of the file is being changed */
2445 	if (mask & AT_SIZE) {
2446 		cp->c_size = vap->va_size;
2447 		error = 0;
2448 		setsize = 0;
2449 
2450 		/* see if okay to try to set the file size */
2451 		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2452 		    CFS_ISFS_NONSHARED(fscp)) {
2453 			/* okay to set size if file is populated */
2454 			if (cp->c_metadata.md_flags & MD_POPULATED)
2455 				setsize = 1;
2456 
2457 			/*
2458 			 * Okay to set size if front file exists and setting
2459 			 * file size to zero.
2460 			 */
2461 			if ((cp->c_metadata.md_flags & MD_FILE) &&
2462 			    (vap->va_size == 0))
2463 				setsize = 1;
2464 		}
2465 
2466 		/* if okay to try to set the file size */
2467 		if (setsize) {
2468 			error = 0;
2469 			if (cp->c_frontvp == NULL)
2470 				error = cachefs_getfrontfile(cp);
2471 			if (error == 0)
2472 				error = cachefs_frontfile_size(cp, cp->c_size);
2473 		} else if (cp->c_metadata.md_flags & MD_FILE) {
2474 			/* make sure file gets nocached */
2475 			error = EEXIST;
2476 		}
2477 
2478 		/* if we have to nocache the file */
2479 		if (error) {
2480 			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2481 			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2482 				cachefs_nocache(cp);
2483 			error = 0;
2484 		}
2485 	}
2486 
2487 	cp->c_flags |= CN_UPDATED;
2488 
2489 	/* XXX bob: given what modify_cobject does this seems unnecessary */
2490 	cp->c_attr.va_mask = AT_ALL;
2491 	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2492 	if (error)
2493 		goto out;
2494 
2495 	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2496 	cp->c_size = cp->c_attr.va_size;
2497 
2498 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2499 out:
2500 	mutex_exit(&cp->c_statelock);
2501 	return (error);
2502 }
2503 
2504 /*
2505  * perform the setattr on the local file system
2506  */
2507 /*ARGSUSED4*/
2508 static int
2509 cachefs_setattr_disconnected(
2510 	vnode_t *vp,
2511 	vattr_t *vap,
2512 	int flags,
2513 	cred_t *cr,
2514 	caller_context_t *ct)
2515 {
2516 	cnode_t *cp = VTOC(vp);
2517 	fscache_t *fscp = C_TO_FSCACHE(cp);
2518 	int mask;
2519 	int error;
2520 	int newfile;
2521 	off_t commit = 0;
2522 
2523 	if (CFS_ISFS_WRITE_AROUND(fscp))
2524 		return (ETIMEDOUT);
2525 
2526 	/* if we do not have good attributes */
2527 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2528 		return (ETIMEDOUT);
2529 
2530 	/* primary concern is to keep this routine as much like ufs_setattr */
2531 
2532 	mutex_enter(&cp->c_statelock);
2533 
2534 	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2535 	    cachefs_access_local, cp);
2536 
2537 	if (error)
2538 		goto out;
2539 
2540 	mask = vap->va_mask;
2541 
2542 	/* if changing the size of the file */
2543 	if (mask & AT_SIZE) {
2544 		if (vp->v_type == VDIR) {
2545 			error = EISDIR;
2546 			goto out;
2547 		}
2548 
2549 		if (vp->v_type == VFIFO) {
2550 			error = 0;
2551 			goto out;
2552 		}
2553 
2554 		if ((vp->v_type != VREG) &&
2555 		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2556 			error = EINVAL;
2557 			goto out;
2558 		}
2559 
2560 		if (vap->va_size > fscp->fs_offmax) {
2561 			error = EFBIG;
2562 			goto out;
2563 		}
2564 
2565 		/* if the file is not populated and we are not truncating it */
2566 		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2567 		    (vap->va_size != 0)) {
2568 			error = ETIMEDOUT;
2569 			goto out;
2570 		}
2571 
2572 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2573 			error = cachefs_dlog_cidmap(fscp);
2574 			if (error) {
2575 				error = ENOSPC;
2576 				goto out;
2577 			}
2578 			cp->c_metadata.md_flags |= MD_MAPPING;
2579 		}
2580 
2581 		/* log the operation */
2582 		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2583 		if (commit == 0) {
2584 			error = ENOSPC;
2585 			goto out;
2586 		}
2587 		cp->c_flags &= ~CN_NOCACHE;
2588 
2589 		/* special case truncating fast sym links */
2590 		if ((vp->v_type == VLNK) &&
2591 		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2592 			/* XXX how can we get here */
2593 			/* XXX should update mtime */
2594 			cp->c_size = 0;
2595 			error = 0;
2596 			goto out;
2597 		}
2598 
2599 		/* get the front file, this may create one */
2600 		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2601 		if (cp->c_frontvp == NULL) {
2602 			error = cachefs_getfrontfile(cp);
2603 			if (error)
2604 				goto out;
2605 		}
2606 		ASSERT(cp->c_frontvp);
2607 		if (newfile && (cp->c_flags & CN_UPDATED)) {
2608 			/* allocate space for the metadata */
2609 			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2610 			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2611 			    == 0);
2612 			error = filegrp_write_metadata(cp->c_filegrp,
2613 			    &cp->c_id, &cp->c_metadata);
2614 			if (error)
2615 				goto out;
2616 		}
2617 
2618 		/* change the size of the front file */
2619 		error = cachefs_frontfile_size(cp, vap->va_size);
2620 		if (error)
2621 			goto out;
2622 		cp->c_attr.va_size = cp->c_size = vap->va_size;
2623 		gethrestime(&cp->c_metadata.md_localmtime);
2624 		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2625 		cachefs_modified(cp);
2626 		cp->c_flags |= CN_UPDATED;
2627 	}
2628 
2629 	if (mask & AT_MODE) {
2630 		/* mark as modified */
2631 		if (cachefs_modified_alloc(cp)) {
2632 			error = ENOSPC;
2633 			goto out;
2634 		}
2635 
2636 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2637 			error = cachefs_dlog_cidmap(fscp);
2638 			if (error) {
2639 				error = ENOSPC;
2640 				goto out;
2641 			}
2642 			cp->c_metadata.md_flags |= MD_MAPPING;
2643 		}
2644 
2645 		/* log the operation if not already logged */
2646 		if (commit == 0) {
2647 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2648 			if (commit == 0) {
2649 				error = ENOSPC;
2650 				goto out;
2651 			}
2652 		}
2653 
2654 		cp->c_attr.va_mode &= S_IFMT;
2655 		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2656 		gethrestime(&cp->c_metadata.md_localctime);
2657 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2658 		cp->c_flags |= CN_UPDATED;
2659 	}
2660 
2661 	if (mask & (AT_UID|AT_GID)) {
2662 
2663 		/* mark as modified */
2664 		if (cachefs_modified_alloc(cp)) {
2665 			error = ENOSPC;
2666 			goto out;
2667 		}
2668 
2669 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2670 			error = cachefs_dlog_cidmap(fscp);
2671 			if (error) {
2672 				error = ENOSPC;
2673 				goto out;
2674 			}
2675 			cp->c_metadata.md_flags |= MD_MAPPING;
2676 		}
2677 
2678 		/* log the operation if not already logged */
2679 		if (commit == 0) {
2680 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2681 			if (commit == 0) {
2682 				error = ENOSPC;
2683 				goto out;
2684 			}
2685 		}
2686 
2687 		if (mask & AT_UID)
2688 			cp->c_attr.va_uid = vap->va_uid;
2689 
2690 		if (mask & AT_GID)
2691 			cp->c_attr.va_gid = vap->va_gid;
2692 		gethrestime(&cp->c_metadata.md_localctime);
2693 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2694 		cp->c_flags |= CN_UPDATED;
2695 	}
2696 
2697 
2698 	if (mask & (AT_MTIME|AT_ATIME)) {
2699 		/* mark as modified */
2700 		if (cachefs_modified_alloc(cp)) {
2701 			error = ENOSPC;
2702 			goto out;
2703 		}
2704 
2705 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2706 			error = cachefs_dlog_cidmap(fscp);
2707 			if (error) {
2708 				error = ENOSPC;
2709 				goto out;
2710 			}
2711 			cp->c_metadata.md_flags |= MD_MAPPING;
2712 		}
2713 
2714 		/* log the operation if not already logged */
2715 		if (commit == 0) {
2716 			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2717 			if (commit == 0) {
2718 				error = ENOSPC;
2719 				goto out;
2720 			}
2721 		}
2722 
2723 		if (mask & AT_MTIME) {
2724 			cp->c_metadata.md_localmtime = vap->va_mtime;
2725 			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2726 		}
2727 		if (mask & AT_ATIME)
2728 			cp->c_attr.va_atime = vap->va_atime;
2729 		gethrestime(&cp->c_metadata.md_localctime);
2730 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2731 		cp->c_flags |= CN_UPDATED;
2732 	}
2733 
2734 out:
2735 	mutex_exit(&cp->c_statelock);
2736 
2737 	/* commit the log entry */
2738 	if (commit) {
2739 		if (cachefs_dlog_commit(fscp, commit, error)) {
2740 			/*EMPTY*/
2741 			/* XXX bob: fix on panic */
2742 		}
2743 	}
2744 	return (error);
2745 }
2746 
2747 /* ARGSUSED */
2748 static int
2749 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2750 	caller_context_t *ct)
2751 {
2752 	cnode_t *cp = VTOC(vp);
2753 	fscache_t *fscp = C_TO_FSCACHE(cp);
2754 	int error;
2755 	int held = 0;
2756 	int connected = 0;
2757 
2758 #ifdef CFSDEBUG
2759 	CFS_DEBUG(CFSDEBUG_VOPS)
2760 		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2761 #endif
2762 	if (getzoneid() != GLOBAL_ZONEID) {
2763 		error = EPERM;
2764 		goto out;
2765 	}
2766 
2767 	/*
2768 	 * Cachefs only provides pass-through support for NFSv4,
2769 	 * and all vnode operations are passed through to the
2770 	 * back file system. For NFSv4 pass-through to work, only
2771 	 * connected operation is supported, the cnode backvp must
2772 	 * exist, and cachefs optional (eg., disconnectable) flags
2773 	 * are turned off. Assert these conditions to ensure that
2774 	 * the backfilesystem is called for the access operation.
2775 	 */
2776 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2777 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2778 
2779 	for (;;) {
2780 		/* get (or renew) access to the file system */
2781 		if (held) {
2782 			/* Won't loop with NFSv4 connected behavior */
2783 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2784 			cachefs_cd_release(fscp);
2785 			held = 0;
2786 		}
2787 		error = cachefs_cd_access(fscp, connected, 0);
2788 		if (error)
2789 			break;
2790 		held = 1;
2791 
2792 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2793 			error = cachefs_access_connected(vp, mode, flags,
2794 			    cr);
2795 			if (CFS_TIMEOUT(fscp, error)) {
2796 				cachefs_cd_release(fscp);
2797 				held = 0;
2798 				cachefs_cd_timedout(fscp);
2799 				connected = 0;
2800 				continue;
2801 			}
2802 		} else {
2803 			mutex_enter(&cp->c_statelock);
2804 			error = cachefs_access_local(cp, mode, cr);
2805 			mutex_exit(&cp->c_statelock);
2806 			if (CFS_TIMEOUT(fscp, error)) {
2807 				if (cachefs_cd_access_miss(fscp)) {
2808 					mutex_enter(&cp->c_statelock);
2809 					if (cp->c_backvp == NULL) {
2810 						(void) cachefs_getbackvp(fscp,
2811 						    cp);
2812 					}
2813 					mutex_exit(&cp->c_statelock);
2814 					error = cachefs_access_connected(vp,
2815 					    mode, flags, cr);
2816 					if (!CFS_TIMEOUT(fscp, error))
2817 						break;
2818 					delay(5*hz);
2819 					connected = 0;
2820 					continue;
2821 				}
2822 				connected = 1;
2823 				continue;
2824 			}
2825 		}
2826 		break;
2827 	}
2828 	if (held)
2829 		cachefs_cd_release(fscp);
2830 #ifdef CFS_CD_DEBUG
2831 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2832 #endif
2833 out:
2834 #ifdef CFSDEBUG
2835 	CFS_DEBUG(CFSDEBUG_VOPS)
2836 		printf("cachefs_access: EXIT error = %d\n", error);
2837 #endif
2838 	return (error);
2839 }
2840 
2841 static int
2842 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2843 {
2844 	cnode_t *cp = VTOC(vp);
2845 	fscache_t *fscp = C_TO_FSCACHE(cp);
2846 	int error = 0;
2847 
2848 	mutex_enter(&cp->c_statelock);
2849 
2850 	/* Make sure the cnode attrs are valid first. */
2851 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2852 	if (error)
2853 		goto out;
2854 
2855 	/* see if can do a local file system check */
2856 	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2857 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2858 		error = cachefs_access_local(cp, mode, cr);
2859 		goto out;
2860 	}
2861 
2862 	/* else do a remote file system check */
2863 	else {
2864 		if (cp->c_backvp == NULL) {
2865 			error = cachefs_getbackvp(fscp, cp);
2866 			if (error)
2867 				goto out;
2868 		}
2869 
2870 		CFS_DPRINT_BACKFS_NFSV4(fscp,
2871 		    ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2872 		    cp, cp->c_backvp));
2873 		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2874 
2875 		/*
2876 		 * even though we don't `need' the ACL to do access
2877 		 * via the backvp, we should cache it here to make our
2878 		 * behavior more reasonable if we go disconnected.
2879 		 */
2880 
2881 		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2882 		    (cachefs_vtype_aclok(vp)) &&
2883 		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2884 		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2885 		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2886 			(void) cachefs_cacheacl(cp, NULL);
2887 	}
2888 out:
2889 	/*
2890 	 * If NFS returned ESTALE, mark this cnode as stale, so that
2891 	 * the vn_open retry will read the file anew from backfs
2892 	 */
2893 	if (error == ESTALE)
2894 		cachefs_cnode_stale(cp);
2895 
2896 	mutex_exit(&cp->c_statelock);
2897 	return (error);
2898 }
2899 
2900 /*
2901  * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2902  * the link is placed in the metadata itself (no front file is allocated).
2903  */
2904 /*ARGSUSED*/
2905 static int
2906 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2907 {
2908 	int error = 0;
2909 	cnode_t *cp = VTOC(vp);
2910 	fscache_t *fscp = C_TO_FSCACHE(cp);
2911 	cachefscache_t *cachep = fscp->fs_cache;
2912 	int held = 0;
2913 	int connected = 0;
2914 
2915 	if (getzoneid() != GLOBAL_ZONEID)
2916 		return (EPERM);
2917 
2918 	if (vp->v_type != VLNK)
2919 		return (EINVAL);
2920 
2921 	/*
2922 	 * Cachefs only provides pass-through support for NFSv4,
2923 	 * and all vnode operations are passed through to the
2924 	 * back file system. For NFSv4 pass-through to work, only
2925 	 * connected operation is supported, the cnode backvp must
2926 	 * exist, and cachefs optional (eg., disconnectable) flags
2927 	 * are turned off. Assert these conditions to ensure that
2928 	 * the backfilesystem is called for the readlink operation.
2929 	 */
2930 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2931 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2932 
2933 	for (;;) {
2934 		/* get (or renew) access to the file system */
2935 		if (held) {
2936 			/* Won't loop with NFSv4 connected behavior */
2937 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2938 			cachefs_cd_release(fscp);
2939 			held = 0;
2940 		}
2941 		error = cachefs_cd_access(fscp, connected, 0);
2942 		if (error)
2943 			break;
2944 		held = 1;
2945 
2946 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2947 			/*
2948 			 * since readlink_connected will call stuffsymlink
2949 			 * on success, have to serialize access
2950 			 */
2951 			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2952 				cachefs_cd_release(fscp);
2953 				rw_enter(&cp->c_rwlock, RW_WRITER);
2954 				error = cachefs_cd_access(fscp, connected, 0);
2955 				if (error) {
2956 					held = 0;
2957 					rw_exit(&cp->c_rwlock);
2958 					break;
2959 				}
2960 			}
2961 			error = cachefs_readlink_connected(vp, uiop, cr);
2962 			rw_exit(&cp->c_rwlock);
2963 			if (CFS_TIMEOUT(fscp, error)) {
2964 				cachefs_cd_release(fscp);
2965 				held = 0;
2966 				cachefs_cd_timedout(fscp);
2967 				connected = 0;
2968 				continue;
2969 			}
2970 		} else {
2971 			error = cachefs_readlink_disconnected(vp, uiop);
2972 			if (CFS_TIMEOUT(fscp, error)) {
2973 				if (cachefs_cd_access_miss(fscp)) {
2974 					/* as above */
2975 					if (!rw_tryenter(&cp->c_rwlock,
2976 					    RW_WRITER)) {
2977 						cachefs_cd_release(fscp);
2978 						rw_enter(&cp->c_rwlock,
2979 						    RW_WRITER);
2980 						error = cachefs_cd_access(fscp,
2981 						    connected, 0);
2982 						if (error) {
2983 							held = 0;
2984 							rw_exit(&cp->c_rwlock);
2985 							break;
2986 						}
2987 					}
2988 					error = cachefs_readlink_connected(vp,
2989 					    uiop, cr);
2990 					rw_exit(&cp->c_rwlock);
2991 					if (!CFS_TIMEOUT(fscp, error))
2992 						break;
2993 					delay(5*hz);
2994 					connected = 0;
2995 					continue;
2996 				}
2997 				connected = 1;
2998 				continue;
2999 			}
3000 		}
3001 		break;
3002 	}
3003 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
3004 		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
3005 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
3006 		    crgetuid(cr), cp->c_size);
3007 
3008 	if (held)
3009 		cachefs_cd_release(fscp);
3010 #ifdef CFS_CD_DEBUG
3011 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3012 #endif
3013 
3014 	/*
3015 	 * The over the wire error for attempting to readlink something
3016 	 * other than a symbolic link is ENXIO.  However, we need to
3017 	 * return EINVAL instead of ENXIO, so we map it here.
3018 	 */
3019 	return (error == ENXIO ? EINVAL : error);
3020 }
3021 
3022 static int
3023 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3024 {
3025 	int error;
3026 	cnode_t *cp = VTOC(vp);
3027 	fscache_t *fscp = C_TO_FSCACHE(cp);
3028 	caddr_t buf;
3029 	int buflen;
3030 	int readcache = 0;
3031 
3032 	mutex_enter(&cp->c_statelock);
3033 
3034 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3035 	if (error)
3036 		goto out;
3037 
3038 	/* if the sym link is cached as a fast sym link */
3039 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3040 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3041 		error = uiomove(cp->c_metadata.md_allocinfo,
3042 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3043 #ifdef CFSDEBUG
3044 		readcache = 1;
3045 		goto out;
3046 #else /* CFSDEBUG */
3047 		/* XXX KLUDGE! correct for insidious 0-len symlink */
3048 		if (cp->c_size != 0) {
3049 			readcache = 1;
3050 			goto out;
3051 		}
3052 #endif /* CFSDEBUG */
3053 	}
3054 
3055 	/* if the sym link is cached in a front file */
3056 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3057 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3058 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3059 		if (cp->c_frontvp == NULL) {
3060 			(void) cachefs_getfrontfile(cp);
3061 		}
3062 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3063 			/* read symlink data from frontfile */
3064 			uiop->uio_offset = 0;
3065 			(void) VOP_RWLOCK(cp->c_frontvp,
3066 			    V_WRITELOCK_FALSE, NULL);
3067 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3068 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3069 
3070 			/* XXX KLUDGE! correct for insidious 0-len symlink */
3071 			if (cp->c_size != 0) {
3072 				readcache = 1;
3073 				goto out;
3074 			}
3075 		}
3076 	}
3077 
3078 	/* get the sym link contents from the back fs */
3079 	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3080 	if (error)
3081 		goto out;
3082 
3083 	/* copy the contents out to the user */
3084 	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3085 
3086 	/*
3087 	 * try to cache the sym link, note that its a noop if NOCACHE is set
3088 	 * or if NFSv4 pass-through is enabled.
3089 	 */
3090 	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3091 		cachefs_nocache(cp);
3092 	}
3093 
3094 	cachefs_kmem_free(buf, MAXPATHLEN);
3095 
3096 out:
3097 	mutex_exit(&cp->c_statelock);
3098 	if (error == 0) {
3099 		if (readcache)
3100 			fscp->fs_stats.st_hits++;
3101 		else
3102 			fscp->fs_stats.st_misses++;
3103 	}
3104 	return (error);
3105 }
3106 
3107 static int
3108 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3109 {
3110 	int error;
3111 	cnode_t *cp = VTOC(vp);
3112 	fscache_t *fscp = C_TO_FSCACHE(cp);
3113 	int readcache = 0;
3114 
3115 	mutex_enter(&cp->c_statelock);
3116 
3117 	/* if the sym link is cached as a fast sym link */
3118 	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3119 		error = uiomove(cp->c_metadata.md_allocinfo,
3120 		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3121 		readcache = 1;
3122 		goto out;
3123 	}
3124 
3125 	/* if the sym link is cached in a front file */
3126 	if (cp->c_metadata.md_flags & MD_POPULATED) {
3127 		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3128 		if (cp->c_frontvp == NULL) {
3129 			(void) cachefs_getfrontfile(cp);
3130 		}
3131 		if (cp->c_metadata.md_flags & MD_POPULATED) {
3132 			/* read symlink data from frontfile */
3133 			uiop->uio_offset = 0;
3134 			(void) VOP_RWLOCK(cp->c_frontvp,
3135 			    V_WRITELOCK_FALSE, NULL);
3136 			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3137 			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3138 			readcache = 1;
3139 			goto out;
3140 		}
3141 	}
3142 	error = ETIMEDOUT;
3143 
3144 out:
3145 	mutex_exit(&cp->c_statelock);
3146 	if (error == 0) {
3147 		if (readcache)
3148 			fscp->fs_stats.st_hits++;
3149 		else
3150 			fscp->fs_stats.st_misses++;
3151 	}
3152 	return (error);
3153 }
3154 
3155 /*ARGSUSED*/
3156 static int
3157 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3158 {
3159 	cnode_t *cp = VTOC(vp);
3160 	int error = 0;
3161 	fscache_t *fscp = C_TO_FSCACHE(cp);
3162 	int held = 0;
3163 	int connected = 0;
3164 
3165 #ifdef CFSDEBUG
3166 	CFS_DEBUG(CFSDEBUG_VOPS)
3167 		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3168 #endif
3169 
3170 	if (getzoneid() != GLOBAL_ZONEID) {
3171 		error = EPERM;
3172 		goto out;
3173 	}
3174 
3175 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3176 		goto out;
3177 
3178 	/*
3179 	 * Cachefs only provides pass-through support for NFSv4,
3180 	 * and all vnode operations are passed through to the
3181 	 * back file system. For NFSv4 pass-through to work, only
3182 	 * connected operation is supported, the cnode backvp must
3183 	 * exist, and cachefs optional (eg., disconnectable) flags
3184 	 * are turned off. Assert these conditions to ensure that
3185 	 * the backfilesystem is called for the fsync operation.
3186 	 */
3187 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3188 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3189 
3190 	for (;;) {
3191 		/* get (or renew) access to the file system */
3192 		if (held) {
3193 			/* Won't loop with NFSv4 connected behavior */
3194 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3195 			cachefs_cd_release(fscp);
3196 			held = 0;
3197 		}
3198 		error = cachefs_cd_access(fscp, connected, 1);
3199 		if (error)
3200 			break;
3201 		held = 1;
3202 		connected = 0;
3203 
3204 		/* if a regular file, write out the pages */
3205 		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3206 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3207 			error = cachefs_putpage_common(vp, (offset_t)0,
3208 			    0, 0, cr);
3209 			if (CFS_TIMEOUT(fscp, error)) {
3210 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3211 					cachefs_cd_release(fscp);
3212 					held = 0;
3213 					cachefs_cd_timedout(fscp);
3214 					continue;
3215 				} else {
3216 					connected = 1;
3217 					continue;
3218 				}
3219 			}
3220 
3221 			/* if no space left in cache, wait until connected */
3222 			if ((error == ENOSPC) &&
3223 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3224 				connected = 1;
3225 				continue;
3226 			}
3227 
3228 			/* clear the cnode error if putpage worked */
3229 			if ((error == 0) && cp->c_error) {
3230 				mutex_enter(&cp->c_statelock);
3231 				cp->c_error = 0;
3232 				mutex_exit(&cp->c_statelock);
3233 			}
3234 
3235 			if (error)
3236 				break;
3237 		}
3238 
3239 		/* if connected, sync the backvp */
3240 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3241 		    cp->c_backvp) {
3242 			mutex_enter(&cp->c_statelock);
3243 			if (cp->c_backvp) {
3244 				CFS_DPRINT_BACKFS_NFSV4(fscp,
3245 				    ("cachefs_fsync (nfsv4): cnode %p, "
3246 				    "backvp %p\n", cp, cp->c_backvp));
3247 				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3248 				    ct);
3249 				if (CFS_TIMEOUT(fscp, error)) {
3250 					mutex_exit(&cp->c_statelock);
3251 					cachefs_cd_release(fscp);
3252 					held = 0;
3253 					cachefs_cd_timedout(fscp);
3254 					continue;
3255 				} else if (error && (error != EINTR))
3256 					cp->c_error = error;
3257 			}
3258 			mutex_exit(&cp->c_statelock);
3259 		}
3260 
3261 		/* sync the metadata and the front file to the front fs */
3262 		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3263 			error = cachefs_sync_metadata(cp);
3264 			if (error &&
3265 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3266 				error = 0;
3267 		}
3268 		break;
3269 	}
3270 
3271 	if (error == 0)
3272 		error = cp->c_error;
3273 
3274 	if (held)
3275 		cachefs_cd_release(fscp);
3276 
3277 out:
3278 #ifdef CFS_CD_DEBUG
3279 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3280 #endif
3281 
3282 #ifdef CFSDEBUG
3283 	CFS_DEBUG(CFSDEBUG_VOPS)
3284 		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3285 #endif
3286 	return (error);
3287 }
3288 
3289 /*
3290  * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3291  */
3292 int
3293 cachefs_sync_metadata(cnode_t *cp)
3294 {
3295 	int error = 0;
3296 	struct filegrp *fgp;
3297 	struct vattr va;
3298 	fscache_t *fscp = C_TO_FSCACHE(cp);
3299 
3300 #ifdef CFSDEBUG
3301 	CFS_DEBUG(CFSDEBUG_VOPS)
3302 		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3303 		    (void *)cp, cp->c_flags);
3304 #endif
3305 
3306 	mutex_enter(&cp->c_statelock);
3307 	if ((cp->c_flags & CN_UPDATED) == 0)
3308 		goto out;
3309 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3310 		goto out;
3311 	fgp = cp->c_filegrp;
3312 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3313 		goto out;
3314 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3315 		goto out;
3316 
3317 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3318 		mutex_exit(&cp->c_statelock);
3319 		error = filegrp_allocattr(fgp);
3320 		mutex_enter(&cp->c_statelock);
3321 		if (error) {
3322 			error = 0;
3323 			goto out;
3324 		}
3325 	}
3326 
3327 	if (cp->c_flags & CN_ALLOC_PENDING) {
3328 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3329 		    &cp->c_id);
3330 		if (error)
3331 			goto out;
3332 		cp->c_flags &= ~CN_ALLOC_PENDING;
3333 	}
3334 
3335 	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3336 		if (cp->c_frontvp != NULL) {
3337 			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3338 			if (error) {
3339 				cp->c_metadata.md_timestamp.tv_sec = 0;
3340 			} else {
3341 				va.va_mask = AT_MTIME;
3342 				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3343 				    kcred, NULL);
3344 				if (error)
3345 					goto out;
3346 				cp->c_metadata.md_timestamp = va.va_mtime;
3347 				cp->c_flags &=
3348 				    ~(CN_NEED_FRONT_SYNC |
3349 				    CN_POPULATION_PENDING);
3350 			}
3351 		} else {
3352 			cp->c_flags &=
3353 			    ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3354 		}
3355 	}
3356 
3357 	/*
3358 	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3359 	 * XXX tony: How can CN_UPDATED not be set?????
3360 	 */
3361 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3362 	    (cp->c_flags & CN_UPDATED)) {
3363 		error = filegrp_write_metadata(fgp, &cp->c_id,
3364 		    &cp->c_metadata);
3365 		if (error)
3366 			goto out;
3367 	}
3368 out:
3369 	if (error) {
3370 		/* XXX modified files? */
3371 		if (cp->c_metadata.md_rlno) {
3372 			cachefs_removefrontfile(&cp->c_metadata,
3373 			    &cp->c_id, fgp);
3374 			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3375 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3376 			cp->c_metadata.md_rlno = 0;
3377 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3378 			if (cp->c_frontvp) {
3379 				VN_RELE(cp->c_frontvp);
3380 				cp->c_frontvp = NULL;
3381 			}
3382 		}
3383 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3384 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3385 		cp->c_flags |= CN_ALLOC_PENDING;
3386 		cachefs_nocache(cp);
3387 	}
3388 	/*
3389 	 * we clear the updated bit even on errors because a retry
3390 	 * will probably fail also.
3391 	 */
3392 	cp->c_flags &= ~CN_UPDATED;
3393 	mutex_exit(&cp->c_statelock);
3394 
3395 #ifdef CFSDEBUG
3396 	CFS_DEBUG(CFSDEBUG_VOPS)
3397 		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3398 		    (void *)cp, cp->c_flags);
3399 #endif
3400 
3401 	return (error);
3402 }
3403 
3404 /*
3405  * This is the vop entry point for inactivating a vnode.
3406  * It just queues the request for the async thread which
3407  * calls cachefs_inactive.
3408  * Because of the dnlc, it is not safe to grab most locks here.
3409  */
3410 /*ARGSUSED*/
3411 static void
3412 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3413 {
3414 	cnode_t *cp;
3415 	struct cachefs_req *rp;
3416 	fscache_t *fscp;
3417 
3418 #ifdef CFSDEBUG
3419 	CFS_DEBUG(CFSDEBUG_VOPS)
3420 		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3421 #endif
3422 
3423 	cp = VTOC(vp);
3424 	fscp = C_TO_FSCACHE(cp);
3425 
3426 	ASSERT((cp->c_flags & CN_IDLE) == 0);
3427 
3428 	/*
3429 	 * Cachefs only provides pass-through support for NFSv4,
3430 	 * and all vnode operations are passed through to the
3431 	 * back file system. For NFSv4 pass-through to work, only
3432 	 * connected operation is supported, the cnode backvp must
3433 	 * exist, and cachefs optional (eg., disconnectable) flags
3434 	 * are turned off. Assert these conditions to ensure that
3435 	 * the backfilesystem is called for the inactive operation.
3436 	 */
3437 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3438 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3439 
3440 	/* vn_rele() set the v_count == 1 */
3441 
3442 	cp->c_ipending = 1;
3443 
3444 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3445 	rp->cfs_cmd = CFS_IDLE;
3446 	rp->cfs_cr = cr;
3447 	crhold(rp->cfs_cr);
3448 	rp->cfs_req_u.cu_idle.ci_vp = vp;
3449 	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3450 
3451 #ifdef CFSDEBUG
3452 	CFS_DEBUG(CFSDEBUG_VOPS)
3453 		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3454 #endif
3455 }
3456 
3457 /* ARGSUSED */
3458 static int
3459 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3460     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3461     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3462 
3463 {
3464 	int error = 0;
3465 	cnode_t *dcp = VTOC(dvp);
3466 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3467 	int held = 0;
3468 	int connected = 0;
3469 
3470 #ifdef CFSDEBUG
3471 	CFS_DEBUG(CFSDEBUG_VOPS)
3472 		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3473 #endif
3474 
3475 	if (getzoneid() != GLOBAL_ZONEID) {
3476 		error = EPERM;
3477 		goto out;
3478 	}
3479 
3480 	/*
3481 	 * Cachefs only provides pass-through support for NFSv4,
3482 	 * and all vnode operations are passed through to the
3483 	 * back file system. For NFSv4 pass-through to work, only
3484 	 * connected operation is supported, the cnode backvp must
3485 	 * exist, and cachefs optional (eg., disconnectable) flags
3486 	 * are turned off. Assert these conditions to ensure that
3487 	 * the backfilesystem is called for the lookup operation.
3488 	 */
3489 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3490 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3491 
3492 	for (;;) {
3493 		/* get (or renew) access to the file system */
3494 		if (held) {
3495 			/* Won't loop with NFSv4 connected behavior */
3496 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3497 			cachefs_cd_release(fscp);
3498 			held = 0;
3499 		}
3500 		error = cachefs_cd_access(fscp, connected, 0);
3501 		if (error)
3502 			break;
3503 		held = 1;
3504 
3505 		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3506 			flags, rdir, cr);
3507 		if (CFS_TIMEOUT(fscp, error)) {
3508 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3509 				cachefs_cd_release(fscp);
3510 				held = 0;
3511 				cachefs_cd_timedout(fscp);
3512 				connected = 0;
3513 				continue;
3514 			} else {
3515 				if (cachefs_cd_access_miss(fscp)) {
3516 					rw_enter(&dcp->c_rwlock, RW_READER);
3517 					error = cachefs_lookup_back(dvp, nm,
3518 					    vpp, cr);
3519 					rw_exit(&dcp->c_rwlock);
3520 					if (!CFS_TIMEOUT(fscp, error))
3521 						break;
3522 					delay(5*hz);
3523 					connected = 0;
3524 					continue;
3525 				}
3526 				connected = 1;
3527 				continue;
3528 			}
3529 		}
3530 		break;
3531 	}
3532 	if (held)
3533 		cachefs_cd_release(fscp);
3534 
3535 	if (error == 0 && IS_DEVVP(*vpp)) {
3536 		struct vnode *newvp;
3537 		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3538 		VN_RELE(*vpp);
3539 		if (newvp == NULL) {
3540 			error = ENOSYS;
3541 		} else {
3542 			*vpp = newvp;
3543 		}
3544 	}
3545 
3546 #ifdef CFS_CD_DEBUG
3547 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3548 #endif
3549 out:
3550 #ifdef CFSDEBUG
3551 	CFS_DEBUG(CFSDEBUG_VOPS)
3552 		printf("cachefs_lookup: EXIT error = %d\n", error);
3553 #endif
3554 
3555 	return (error);
3556 }
3557 
3558 /* ARGSUSED */
3559 int
3560 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3561     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3562 {
3563 	int error = 0;
3564 	cnode_t *cp, *dcp = VTOC(dvp);
3565 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3566 	struct fid cookie;
3567 	u_offset_t d_offset;
3568 	struct cachefs_req *rp;
3569 	cfs_cid_t cid, dircid;
3570 	uint_t flag;
3571 	uint_t uncached = 0;
3572 
3573 	*vpp = NULL;
3574 
3575 	/*
3576 	 * If lookup is for "", just return dvp.  Don't need
3577 	 * to send it over the wire, look it up in the dnlc,
3578 	 * or perform any access checks.
3579 	 */
3580 	if (*nm == '\0') {
3581 		VN_HOLD(dvp);
3582 		*vpp = dvp;
3583 		return (0);
3584 	}
3585 
3586 	/* can't do lookups in non-directories */
3587 	if (dvp->v_type != VDIR)
3588 		return (ENOTDIR);
3589 
3590 	/* perform access check, also does consistency check if connected */
3591 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3592 		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3593 	} else {
3594 		mutex_enter(&dcp->c_statelock);
3595 		error = cachefs_access_local(dcp, VEXEC, cr);
3596 		mutex_exit(&dcp->c_statelock);
3597 	}
3598 	if (error)
3599 		return (error);
3600 
3601 	/*
3602 	 * If lookup is for ".", just return dvp.  Don't need
3603 	 * to send it over the wire or look it up in the dnlc,
3604 	 * just need to check access.
3605 	 */
3606 	if (strcmp(nm, ".") == 0) {
3607 		VN_HOLD(dvp);
3608 		*vpp = dvp;
3609 		return (0);
3610 	}
3611 
3612 	/* check the dnlc */
3613 	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3614 	if (*vpp)
3615 		return (0);
3616 
3617 	/* read lock the dir before starting the search */
3618 	rw_enter(&dcp->c_rwlock, RW_READER);
3619 
3620 	mutex_enter(&dcp->c_statelock);
3621 	dircid = dcp->c_id;
3622 
3623 	dcp->c_usage++;
3624 
3625 	/* if front file is not usable, lookup on the back fs */
3626 	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3627 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3628 	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3629 		mutex_exit(&dcp->c_statelock);
3630 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3631 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3632 		else
3633 			error = ETIMEDOUT;
3634 		goto out;
3635 	}
3636 
3637 	/* if the front file is not populated, try to populate it */
3638 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3639 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3640 			error = ETIMEDOUT;
3641 			mutex_exit(&dcp->c_statelock);
3642 			goto out;
3643 		}
3644 
3645 		if (cachefs_async_okay()) {
3646 			/* cannot populate if cache is not writable */
3647 			ASSERT((dcp->c_flags &
3648 			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3649 			dcp->c_flags |= CN_ASYNC_POPULATE;
3650 
3651 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3652 			rp->cfs_cmd = CFS_POPULATE;
3653 			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3654 			rp->cfs_cr = cr;
3655 
3656 			crhold(cr);
3657 			VN_HOLD(dvp);
3658 
3659 			cachefs_addqueue(rp, &fscp->fs_workq);
3660 		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3661 			error = cachefs_dir_fill(dcp, cr);
3662 			if (error != 0) {
3663 				mutex_exit(&dcp->c_statelock);
3664 				goto out;
3665 			}
3666 		}
3667 		/* no populate if too many asyncs and we have to cache ACLs */
3668 
3669 		mutex_exit(&dcp->c_statelock);
3670 
3671 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3672 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3673 		else
3674 			error = ETIMEDOUT;
3675 		goto out;
3676 	}
3677 
3678 	/* by now we have a valid cached front file that we can search */
3679 
3680 	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3681 	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3682 	    &d_offset, &cid);
3683 	mutex_exit(&dcp->c_statelock);
3684 
3685 	if (error) {
3686 		/* if the entry does not have the fid, go get it */
3687 		if (error == EINVAL) {
3688 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3689 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3690 			else
3691 				error = ETIMEDOUT;
3692 		}
3693 
3694 		/* errors other than does not exist */
3695 		else if (error != ENOENT) {
3696 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3697 				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3698 			else
3699 				error = ETIMEDOUT;
3700 		}
3701 		goto out;
3702 	}
3703 
3704 	/*
3705 	 * Else we found the entry in the cached directory.
3706 	 * Make a cnode for it.
3707 	 */
3708 	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3709 	    cr, 0, &cp);
3710 	if (error == ESTALE) {
3711 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3712 		mutex_enter(&dcp->c_statelock);
3713 		cachefs_nocache(dcp);
3714 		mutex_exit(&dcp->c_statelock);
3715 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3716 			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3717 			uncached = 1;
3718 		} else
3719 			error = ETIMEDOUT;
3720 	} else if (error == 0) {
3721 		*vpp = CTOV(cp);
3722 	}
3723 
3724 out:
3725 	if (error == 0) {
3726 		/* put the entry in the dnlc */
3727 		if (cachefs_dnlc)
3728 			dnlc_enter(dvp, nm, *vpp);
3729 
3730 		/* save the cid of the parent so can find the name */
3731 		cp = VTOC(*vpp);
3732 		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3733 		    sizeof (cfs_cid_t)) != 0) {
3734 			mutex_enter(&cp->c_statelock);
3735 			cp->c_metadata.md_parent = dircid;
3736 			cp->c_flags |= CN_UPDATED;
3737 			mutex_exit(&cp->c_statelock);
3738 		}
3739 	}
3740 
3741 	rw_exit(&dcp->c_rwlock);
3742 	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3743 		(void) cachefs_pack_common(dvp, cr);
3744 	return (error);
3745 }
3746 
3747 /*
3748  * Called from cachefs_lookup_common when the back file system needs to be
3749  * examined to perform the lookup.
3750  */
3751 static int
3752 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3753     cred_t *cr)
3754 {
3755 	int error = 0;
3756 	cnode_t *cp, *dcp = VTOC(dvp);
3757 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3758 	vnode_t *backvp = NULL;
3759 	struct vattr va;
3760 	struct fid cookie;
3761 	cfs_cid_t cid;
3762 	uint32_t valid_fid;
3763 
3764 	mutex_enter(&dcp->c_statelock);
3765 
3766 	/* do a lookup on the back FS to get the back vnode */
3767 	if (dcp->c_backvp == NULL) {
3768 		error = cachefs_getbackvp(fscp, dcp);
3769 		if (error)
3770 			goto out;
3771 	}
3772 
3773 	CFS_DPRINT_BACKFS_NFSV4(fscp,
3774 	    ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3775 	    dcp, dcp->c_backvp, nm));
3776 	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3777 	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3778 	if (error)
3779 		goto out;
3780 	if (IS_DEVVP(backvp)) {
3781 		struct vnode *devvp = backvp;
3782 
3783 		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3784 			VN_HOLD(backvp);
3785 			VN_RELE(devvp);
3786 		}
3787 	}
3788 
3789 	/* get the fid and attrs from the back fs */
3790 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3791 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3792 	if (error)
3793 		goto out;
3794 
3795 	cid.cid_fileno = va.va_nodeid;
3796 	cid.cid_flags = 0;
3797 
3798 #if 0
3799 	/* XXX bob: this is probably no longer necessary */
3800 	/* if the directory entry was incomplete, we can complete it now */
3801 	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3802 	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3803 	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3804 		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3805 	}
3806 #endif
3807 
3808 out:
3809 	mutex_exit(&dcp->c_statelock);
3810 
3811 	/* create the cnode */
3812 	if (error == 0) {
3813 		error = cachefs_cnode_make(&cid, fscp,
3814 		    (valid_fid ? &cookie : NULL),
3815 		    &va, backvp, cr, 0, &cp);
3816 		if (error == 0) {
3817 			*vpp = CTOV(cp);
3818 		}
3819 	}
3820 
3821 	if (backvp)
3822 		VN_RELE(backvp);
3823 
3824 	return (error);
3825 }
3826 
3827 /*ARGSUSED7*/
3828 static int
3829 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3830     vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3831     caller_context_t *ct, vsecattr_t *vsecp)
3832 
3833 {
3834 	cnode_t *dcp = VTOC(dvp);
3835 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3836 	cachefscache_t *cachep = fscp->fs_cache;
3837 	int error;
3838 	int connected = 0;
3839 	int held = 0;
3840 
3841 #ifdef CFSDEBUG
3842 	CFS_DEBUG(CFSDEBUG_VOPS)
3843 		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3844 		    (void *)dvp, nm);
3845 #endif
3846 	if (getzoneid() != GLOBAL_ZONEID) {
3847 		error = EPERM;
3848 		goto out;
3849 	}
3850 
3851 	/*
3852 	 * Cachefs only provides pass-through support for NFSv4,
3853 	 * and all vnode operations are passed through to the
3854 	 * back file system. For NFSv4 pass-through to work, only
3855 	 * connected operation is supported, the cnode backvp must
3856 	 * exist, and cachefs optional (eg., disconnectable) flags
3857 	 * are turned off. Assert these conditions to ensure that
3858 	 * the backfilesystem is called for the create operation.
3859 	 */
3860 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3861 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3862 
3863 	for (;;) {
3864 		/* get (or renew) access to the file system */
3865 		if (held) {
3866 			/* Won't loop with NFSv4 connected behavior */
3867 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3868 			cachefs_cd_release(fscp);
3869 			held = 0;
3870 		}
3871 		error = cachefs_cd_access(fscp, connected, 1);
3872 		if (error)
3873 			break;
3874 		held = 1;
3875 
3876 		/*
3877 		 * if we are connected, perform the remote portion of the
3878 		 * create.
3879 		 */
3880 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3881 			error = cachefs_create_connected(dvp, nm, vap,
3882 			    exclusive, mode, vpp, cr);
3883 			if (CFS_TIMEOUT(fscp, error)) {
3884 				cachefs_cd_release(fscp);
3885 				held = 0;
3886 				cachefs_cd_timedout(fscp);
3887 				connected = 0;
3888 				continue;
3889 			} else if (error) {
3890 				break;
3891 			}
3892 		}
3893 
3894 		/* else we must be disconnected */
3895 		else {
3896 			error = cachefs_create_disconnected(dvp, nm, vap,
3897 			    exclusive, mode, vpp, cr);
3898 			if (CFS_TIMEOUT(fscp, error)) {
3899 				connected = 1;
3900 				continue;
3901 			} else if (error) {
3902 				break;
3903 			}
3904 		}
3905 		break;
3906 	}
3907 
3908 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3909 		fid_t *fidp = NULL;
3910 		ino64_t fileno = 0;
3911 		cnode_t *cp = NULL;
3912 		if (error == 0)
3913 			cp = VTOC(*vpp);
3914 
3915 		if (cp != NULL) {
3916 			fidp = &cp->c_metadata.md_cookie;
3917 			fileno = cp->c_id.cid_fileno;
3918 		}
3919 		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3920 		    fidp, fileno, crgetuid(cr));
3921 	}
3922 
3923 	if (held)
3924 		cachefs_cd_release(fscp);
3925 
3926 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3927 		(void) cachefs_pack(dvp, nm, cr);
3928 	if (error == 0 && IS_DEVVP(*vpp)) {
3929 		struct vnode *spcvp;
3930 
3931 		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3932 		VN_RELE(*vpp);
3933 		if (spcvp == NULL) {
3934 			error = ENOSYS;
3935 		} else {
3936 			*vpp = spcvp;
3937 		}
3938 	}
3939 
3940 #ifdef CFS_CD_DEBUG
3941 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3942 #endif
3943 out:
3944 #ifdef CFSDEBUG
3945 	CFS_DEBUG(CFSDEBUG_VOPS)
3946 		printf("cachefs_create: EXIT error %d\n", error);
3947 #endif
3948 	return (error);
3949 }
3950 
3951 
3952 static int
3953 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3954     enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3955 {
3956 	cnode_t *dcp = VTOC(dvp);
3957 	fscache_t *fscp = C_TO_FSCACHE(dcp);
3958 	int error;
3959 	vnode_t *tvp = NULL;
3960 	vnode_t *devvp;
3961 	fid_t cookie;
3962 	vattr_t va;
3963 	cnode_t *ncp;
3964 	cfs_cid_t cid;
3965 	vnode_t *vp;
3966 	uint32_t valid_fid;
3967 
3968 	/* special case if file already exists */
3969 	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3970 	if (CFS_TIMEOUT(fscp, error))
3971 		return (error);
3972 	if (error == 0) {
3973 		if (exclusive == EXCL)
3974 			error = EEXIST;
3975 		else if (vp->v_type == VDIR && (mode & VWRITE))
3976 			error = EISDIR;
3977 		else if ((error =
3978 		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3979 			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3980 				vap->va_mask = AT_SIZE;
3981 				error = cachefs_setattr_common(vp, vap, 0,
3982 				    cr, NULL);
3983 			}
3984 		}
3985 		if (error) {
3986 			VN_RELE(vp);
3987 		} else
3988 			*vpp = vp;
3989 		return (error);
3990 	}
3991 
3992 	rw_enter(&dcp->c_rwlock, RW_WRITER);
3993 	mutex_enter(&dcp->c_statelock);
3994 
3995 	/* consistency check the directory */
3996 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3997 	if (error) {
3998 		mutex_exit(&dcp->c_statelock);
3999 		goto out;
4000 	}
4001 
4002 	/* get the backvp if necessary */
4003 	if (dcp->c_backvp == NULL) {
4004 		error = cachefs_getbackvp(fscp, dcp);
4005 		if (error) {
4006 			mutex_exit(&dcp->c_statelock);
4007 			goto out;
4008 		}
4009 	}
4010 
4011 	/* create the file on the back fs */
4012 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4013 	    ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4014 	    "name %s\n", dcp, dcp->c_backvp, nm));
4015 	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4016 	    &devvp, cr, 0, NULL, NULL);
4017 	mutex_exit(&dcp->c_statelock);
4018 	if (error)
4019 		goto out;
4020 	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4021 		VN_HOLD(tvp);
4022 		VN_RELE(devvp);
4023 	} else {
4024 		tvp = devvp;
4025 	}
4026 
4027 	/* get the fid and attrs from the back fs */
4028 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4029 	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4030 	if (error)
4031 		goto out;
4032 
4033 	/* make the cnode */
4034 	cid.cid_fileno = va.va_nodeid;
4035 	cid.cid_flags = 0;
4036 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4037 	    &va, tvp, cr, 0, &ncp);
4038 	if (error)
4039 		goto out;
4040 
4041 	*vpp = CTOV(ncp);
4042 
4043 	/* enter it in the parent directory */
4044 	mutex_enter(&dcp->c_statelock);
4045 	if (CFS_ISFS_NONSHARED(fscp) &&
4046 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4047 		/* see if entry already exists */
4048 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4049 		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4050 		if (error == ENOENT) {
4051 			/* entry, does not exist, add the new file */
4052 			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4053 			    &ncp->c_id, SM_ASYNC);
4054 			if (error) {
4055 				cachefs_nocache(dcp);
4056 				error = 0;
4057 			}
4058 			/* XXX should this be done elsewhere, too? */
4059 			dnlc_enter(dvp, nm, *vpp);
4060 		} else {
4061 			/* entry exists or some other problem */
4062 			cachefs_nocache(dcp);
4063 			error = 0;
4064 		}
4065 	}
4066 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4067 	mutex_exit(&dcp->c_statelock);
4068 
4069 out:
4070 	rw_exit(&dcp->c_rwlock);
4071 	if (tvp)
4072 		VN_RELE(tvp);
4073 
4074 	return (error);
4075 }
4076 
4077 static int
4078 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4079 	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4080 {
4081 	cnode_t *dcp = VTOC(dvp);
4082 	cnode_t *cp;
4083 	cnode_t *ncp = NULL;
4084 	vnode_t *vp;
4085 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4086 	int error = 0;
4087 	struct vattr va;
4088 	timestruc_t current_time;
4089 	off_t commit = 0;
4090 	fid_t cookie;
4091 	cfs_cid_t cid;
4092 
4093 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4094 	mutex_enter(&dcp->c_statelock);
4095 
4096 	/* give up if the directory is not populated */
4097 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4098 		mutex_exit(&dcp->c_statelock);
4099 		rw_exit(&dcp->c_rwlock);
4100 		return (ETIMEDOUT);
4101 	}
4102 
4103 	/* special case if file already exists */
4104 	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4105 	if (error == EINVAL) {
4106 		mutex_exit(&dcp->c_statelock);
4107 		rw_exit(&dcp->c_rwlock);
4108 		return (ETIMEDOUT);
4109 	}
4110 	if (error == 0) {
4111 		mutex_exit(&dcp->c_statelock);
4112 		rw_exit(&dcp->c_rwlock);
4113 		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4114 		    cr, 0, &cp);
4115 		if (error) {
4116 			return (error);
4117 		}
4118 		vp = CTOV(cp);
4119 
4120 		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4121 			error = ETIMEDOUT;
4122 		else if (exclusive == EXCL)
4123 			error = EEXIST;
4124 		else if (vp->v_type == VDIR && (mode & VWRITE))
4125 			error = EISDIR;
4126 		else {
4127 			mutex_enter(&cp->c_statelock);
4128 			error = cachefs_access_local(cp, mode, cr);
4129 			mutex_exit(&cp->c_statelock);
4130 			if (!error) {
4131 				if ((vap->va_mask & AT_SIZE) &&
4132 				    (vp->v_type == VREG)) {
4133 					vap->va_mask = AT_SIZE;
4134 					error = cachefs_setattr_common(vp,
4135 					    vap, 0, cr, NULL);
4136 				}
4137 			}
4138 		}
4139 		if (error) {
4140 			VN_RELE(vp);
4141 		} else
4142 			*vpp = vp;
4143 		return (error);
4144 	}
4145 
4146 	/* give up if cannot modify the cache */
4147 	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4148 		mutex_exit(&dcp->c_statelock);
4149 		error = ETIMEDOUT;
4150 		goto out;
4151 	}
4152 
4153 	/* check access */
4154 	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4155 		mutex_exit(&dcp->c_statelock);
4156 		goto out;
4157 	}
4158 
4159 	/* mark dir as modified */
4160 	cachefs_modified(dcp);
4161 	mutex_exit(&dcp->c_statelock);
4162 
4163 	/* must be privileged to set sticky bit */
4164 	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4165 		vap->va_mode &= ~VSVTX;
4166 
4167 	/* make up a reasonable set of attributes */
4168 	cachefs_attr_setup(vap, &va, dcp, cr);
4169 
4170 	/* create the cnode */
4171 	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4172 	if (error)
4173 		goto out;
4174 
4175 	mutex_enter(&ncp->c_statelock);
4176 
4177 	/* get the front file now instead of later */
4178 	if (vap->va_type == VREG) {
4179 		error = cachefs_getfrontfile(ncp);
4180 		if (error) {
4181 			mutex_exit(&ncp->c_statelock);
4182 			goto out;
4183 		}
4184 		ASSERT(ncp->c_frontvp != NULL);
4185 		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4186 		ncp->c_metadata.md_flags |= MD_POPULATED;
4187 	} else {
4188 		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4189 		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4190 			(void) filegrp_allocattr(ncp->c_filegrp);
4191 		}
4192 		error = filegrp_create_metadata(ncp->c_filegrp,
4193 		    &ncp->c_metadata, &ncp->c_id);
4194 		if (error) {
4195 			mutex_exit(&ncp->c_statelock);
4196 			goto out;
4197 		}
4198 		ncp->c_flags &= ~CN_ALLOC_PENDING;
4199 	}
4200 	mutex_enter(&dcp->c_statelock);
4201 	cachefs_creategid(dcp, ncp, vap, cr);
4202 	cachefs_createacl(dcp, ncp);
4203 	mutex_exit(&dcp->c_statelock);
4204 
4205 	/* set times on the file */
4206 	gethrestime(&current_time);
4207 	ncp->c_metadata.md_vattr.va_atime = current_time;
4208 	ncp->c_metadata.md_localctime = current_time;
4209 	ncp->c_metadata.md_localmtime = current_time;
4210 	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4211 
4212 	/* reserve space for the daemon cid mapping */
4213 	error = cachefs_dlog_cidmap(fscp);
4214 	if (error) {
4215 		mutex_exit(&ncp->c_statelock);
4216 		goto out;
4217 	}
4218 	ncp->c_metadata.md_flags |= MD_MAPPING;
4219 
4220 	/* mark the new file as modified */
4221 	if (cachefs_modified_alloc(ncp)) {
4222 		mutex_exit(&ncp->c_statelock);
4223 		error = ENOSPC;
4224 		goto out;
4225 	}
4226 	ncp->c_flags |= CN_UPDATED;
4227 
4228 	/*
4229 	 * write the metadata now rather than waiting until
4230 	 * inactive so that if there's no space we can let
4231 	 * the caller know.
4232 	 */
4233 	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4234 	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4235 	error = filegrp_write_metadata(ncp->c_filegrp,
4236 	    &ncp->c_id, &ncp->c_metadata);
4237 	if (error) {
4238 		mutex_exit(&ncp->c_statelock);
4239 		goto out;
4240 	}
4241 
4242 	/* log the operation */
4243 	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4244 	    mode, ncp, 0, cr);
4245 	if (commit == 0) {
4246 		mutex_exit(&ncp->c_statelock);
4247 		error = ENOSPC;
4248 		goto out;
4249 	}
4250 
4251 	mutex_exit(&ncp->c_statelock);
4252 
4253 	mutex_enter(&dcp->c_statelock);
4254 
4255 	/* update parent dir times */
4256 	dcp->c_metadata.md_localmtime = current_time;
4257 	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4258 	dcp->c_flags |= CN_UPDATED;
4259 
4260 	/* enter new file name in the parent directory */
4261 	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4262 		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4263 		    &ncp->c_id, 0);
4264 		if (error) {
4265 			cachefs_nocache(dcp);
4266 			mutex_exit(&dcp->c_statelock);
4267 			error = ETIMEDOUT;
4268 			goto out;
4269 		}
4270 		dnlc_enter(dvp, nm, CTOV(ncp));
4271 	} else {
4272 		mutex_exit(&dcp->c_statelock);
4273 		error = ETIMEDOUT;
4274 		goto out;
4275 	}
4276 	mutex_exit(&dcp->c_statelock);
4277 
4278 out:
4279 	rw_exit(&dcp->c_rwlock);
4280 
4281 	if (commit) {
4282 		if (cachefs_dlog_commit(fscp, commit, error)) {
4283 			/*EMPTY*/
4284 			/* XXX bob: fix on panic */
4285 		}
4286 	}
4287 	if (error) {
4288 		/* destroy the cnode we created */
4289 		if (ncp) {
4290 			mutex_enter(&ncp->c_statelock);
4291 			ncp->c_flags |= CN_DESTROY;
4292 			mutex_exit(&ncp->c_statelock);
4293 			VN_RELE(CTOV(ncp));
4294 		}
4295 	} else {
4296 		*vpp = CTOV(ncp);
4297 	}
4298 	return (error);
4299 }
4300 
4301 /*ARGSUSED*/
4302 static int
4303 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4304     int flags)
4305 {
4306 	cnode_t *dcp = VTOC(dvp);
4307 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4308 	cachefscache_t *cachep = fscp->fs_cache;
4309 	int error = 0;
4310 	int held = 0;
4311 	int connected = 0;
4312 	size_t namlen;
4313 	vnode_t *vp = NULL;
4314 	int vfslock = 0;
4315 
4316 #ifdef CFSDEBUG
4317 	CFS_DEBUG(CFSDEBUG_VOPS)
4318 		printf("cachefs_remove: ENTER dvp %p name %s\n",
4319 		    (void *)dvp, nm);
4320 #endif
4321 	if (getzoneid() != GLOBAL_ZONEID) {
4322 		error = EPERM;
4323 		goto out;
4324 	}
4325 
4326 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4327 		ASSERT(dcp->c_flags & CN_NOCACHE);
4328 
4329 	/*
4330 	 * Cachefs only provides pass-through support for NFSv4,
4331 	 * and all vnode operations are passed through to the
4332 	 * back file system. For NFSv4 pass-through to work, only
4333 	 * connected operation is supported, the cnode backvp must
4334 	 * exist, and cachefs optional (eg., disconnectable) flags
4335 	 * are turned off. Assert these conditions to ensure that
4336 	 * the backfilesystem is called for the remove operation.
4337 	 */
4338 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4339 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4340 
4341 	for (;;) {
4342 		if (vfslock) {
4343 			vn_vfsunlock(vp);
4344 			vfslock = 0;
4345 		}
4346 		if (vp) {
4347 			VN_RELE(vp);
4348 			vp = NULL;
4349 		}
4350 
4351 		/* get (or renew) access to the file system */
4352 		if (held) {
4353 			/* Won't loop with NFSv4 connected behavior */
4354 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4355 			cachefs_cd_release(fscp);
4356 			held = 0;
4357 		}
4358 		error = cachefs_cd_access(fscp, connected, 1);
4359 		if (error)
4360 			break;
4361 		held = 1;
4362 
4363 		/* if disconnected, do some extra error checking */
4364 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4365 			/* check permissions */
4366 			mutex_enter(&dcp->c_statelock);
4367 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4368 			mutex_exit(&dcp->c_statelock);
4369 			if (CFS_TIMEOUT(fscp, error)) {
4370 				connected = 1;
4371 				continue;
4372 			}
4373 			if (error)
4374 				break;
4375 
4376 			namlen = strlen(nm);
4377 			if (namlen == 0) {
4378 				error = EINVAL;
4379 				break;
4380 			}
4381 
4382 			/* cannot remove . and .. */
4383 			if (nm[0] == '.') {
4384 				if (namlen == 1) {
4385 					error = EINVAL;
4386 					break;
4387 				} else if (namlen == 2 && nm[1] == '.') {
4388 					error = EEXIST;
4389 					break;
4390 				}
4391 			}
4392 
4393 		}
4394 
4395 		/* get the cnode of the file to delete */
4396 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4397 		if (error) {
4398 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4399 				if (CFS_TIMEOUT(fscp, error)) {
4400 					cachefs_cd_release(fscp);
4401 					held = 0;
4402 					cachefs_cd_timedout(fscp);
4403 					connected = 0;
4404 					continue;
4405 				}
4406 			} else {
4407 				if (CFS_TIMEOUT(fscp, error)) {
4408 					connected = 1;
4409 					continue;
4410 				}
4411 			}
4412 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4413 				struct fid foo;
4414 
4415 				bzero(&foo, sizeof (foo));
4416 				cachefs_log_remove(cachep, error,
4417 				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4418 			}
4419 			break;
4420 		}
4421 
4422 		if (vp->v_type == VDIR) {
4423 			/* must be privileged to remove dirs with unlink() */
4424 			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4425 				break;
4426 
4427 			/* see ufs_dirremove for why this is done, mount race */
4428 			if (vn_vfswlock(vp)) {
4429 				error = EBUSY;
4430 				break;
4431 			}
4432 			vfslock = 1;
4433 			if (vn_mountedvfs(vp) != NULL) {
4434 				error = EBUSY;
4435 				break;
4436 			}
4437 		}
4438 
4439 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4440 			error = cachefs_remove_connected(dvp, nm, cr, vp);
4441 			if (CFS_TIMEOUT(fscp, error)) {
4442 				cachefs_cd_release(fscp);
4443 				held = 0;
4444 				cachefs_cd_timedout(fscp);
4445 				connected = 0;
4446 				continue;
4447 			}
4448 		} else {
4449 			error = cachefs_remove_disconnected(dvp, nm, cr,
4450 			    vp);
4451 			if (CFS_TIMEOUT(fscp, error)) {
4452 				connected = 1;
4453 				continue;
4454 			}
4455 		}
4456 		break;
4457 	}
4458 
4459 #if 0
4460 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4461 		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4462 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4463 		    crgetuid(cr));
4464 #endif
4465 
4466 	if (held)
4467 		cachefs_cd_release(fscp);
4468 
4469 	if (vfslock)
4470 		vn_vfsunlock(vp);
4471 
4472 	if (vp)
4473 		VN_RELE(vp);
4474 
4475 #ifdef CFS_CD_DEBUG
4476 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4477 #endif
4478 out:
4479 #ifdef CFSDEBUG
4480 	CFS_DEBUG(CFSDEBUG_VOPS)
4481 		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4482 #endif
4483 
4484 	return (error);
4485 }
4486 
4487 int
4488 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4489 {
4490 	cnode_t *dcp = VTOC(dvp);
4491 	cnode_t *cp = VTOC(vp);
4492 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4493 	int error = 0;
4494 
4495 	/*
4496 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4497 	 * activity on the directory.
4498 	 */
4499 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4500 
4501 	/* purge dnlc of this entry so can get accurate vnode count */
4502 	dnlc_purge_vp(vp);
4503 
4504 	/*
4505 	 * If the cnode is active, make a link to the file
4506 	 * so operations on the file will continue.
4507 	 */
4508 	if ((vp->v_type != VDIR) &&
4509 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4510 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4511 		if (error)
4512 			goto out;
4513 	}
4514 
4515 	/* else call backfs NFSv4 handler if NFSv4 */
4516 	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4517 		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4518 		goto out;
4519 	}
4520 
4521 	/* else drop the backvp so nfs does not do rename */
4522 	else if (cp->c_backvp) {
4523 		mutex_enter(&cp->c_statelock);
4524 		if (cp->c_backvp) {
4525 			VN_RELE(cp->c_backvp);
4526 			cp->c_backvp = NULL;
4527 		}
4528 		mutex_exit(&cp->c_statelock);
4529 	}
4530 
4531 	mutex_enter(&dcp->c_statelock);
4532 
4533 	/* get the backvp */
4534 	if (dcp->c_backvp == NULL) {
4535 		error = cachefs_getbackvp(fscp, dcp);
4536 		if (error) {
4537 			mutex_exit(&dcp->c_statelock);
4538 			goto out;
4539 		}
4540 	}
4541 
4542 	/* check directory consistency */
4543 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4544 	if (error) {
4545 		mutex_exit(&dcp->c_statelock);
4546 		goto out;
4547 	}
4548 
4549 	/* perform the remove on the back fs */
4550 	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4551 	if (error) {
4552 		mutex_exit(&dcp->c_statelock);
4553 		goto out;
4554 	}
4555 
4556 	/* the dir has been modified */
4557 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4558 
4559 	/* remove the entry from the populated directory */
4560 	if (CFS_ISFS_NONSHARED(fscp) &&
4561 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4562 		error = cachefs_dir_rmentry(dcp, nm);
4563 		if (error) {
4564 			cachefs_nocache(dcp);
4565 			error = 0;
4566 		}
4567 	}
4568 	mutex_exit(&dcp->c_statelock);
4569 
4570 	/* fix up the file we deleted */
4571 	mutex_enter(&cp->c_statelock);
4572 	if (cp->c_attr.va_nlink == 1)
4573 		cp->c_flags |= CN_DESTROY;
4574 	else
4575 		cp->c_flags |= CN_UPDATED;
4576 
4577 	cp->c_attr.va_nlink--;
4578 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4579 	mutex_exit(&cp->c_statelock);
4580 
4581 out:
4582 	rw_exit(&dcp->c_rwlock);
4583 	return (error);
4584 }
4585 
4586 /*
4587  * cachefs_remove_backfs_nfsv4
4588  *
4589  * Call NFSv4 back filesystem to handle the remove (cachefs
4590  * pass-through support for NFSv4).
4591  */
4592 int
4593 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4594 {
4595 	cnode_t *dcp = VTOC(dvp);
4596 	cnode_t *cp = VTOC(vp);
4597 	vnode_t *dbackvp;
4598 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4599 	int error = 0;
4600 
4601 	/*
4602 	 * For NFSv4 pass-through to work, only connected operation
4603 	 * is supported, the cnode backvp must exist, and cachefs
4604 	 * optional (eg., disconnectable) flags are turned off. Assert
4605 	 * these conditions for the getattr operation.
4606 	 */
4607 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4608 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4609 
4610 	/* Should hold the directory readwrite lock to update directory */
4611 	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4612 
4613 	/*
4614 	 * Update attributes for directory. Note that
4615 	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4616 	 * held, so grab it before calling the routine.
4617 	 */
4618 	mutex_enter(&dcp->c_statelock);
4619 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4620 	mutex_exit(&dcp->c_statelock);
4621 	if (error)
4622 		goto out;
4623 
4624 	/*
4625 	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4626 	 * asserts for c_statelock being held, so grab it before
4627 	 * calling the routine.
4628 	 */
4629 	mutex_enter(&cp->c_statelock);
4630 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4631 	if (error) {
4632 		mutex_exit(&cp->c_statelock);
4633 		goto out;
4634 	}
4635 
4636 	/*
4637 	 * Drop the backvp so nfs if the link count is 1 so that
4638 	 * nfs does not do rename. Ensure that we will destroy the cnode
4639 	 * since this cnode no longer contains the backvp. Note that we
4640 	 * maintain lock on this cnode to prevent change till the remove
4641 	 * completes, otherwise other operations will encounter an ESTALE
4642 	 * if they try to use the cnode with CN_DESTROY set (see
4643 	 * cachefs_get_backvp()), or change the state of the cnode
4644 	 * while we're removing it.
4645 	 */
4646 	if (cp->c_attr.va_nlink == 1) {
4647 		/*
4648 		 * The unldvp information is created for the case
4649 		 * when there is more than one reference on the
4650 		 * vnode when a remove operation is called. If the
4651 		 * remove itself was holding a reference to the
4652 		 * vnode, then a subsequent remove will remove the
4653 		 * backvp, so we need to get rid of the unldvp
4654 		 * before removing the backvp. An alternate would
4655 		 * be to simply ignore the remove and let the
4656 		 * inactivation routine do the deletion of the
4657 		 * unldvp.
4658 		 */
4659 		if (cp->c_unldvp) {
4660 			VN_RELE(cp->c_unldvp);
4661 			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4662 			crfree(cp->c_unlcred);
4663 			cp->c_unldvp = NULL;
4664 			cp->c_unlcred = NULL;
4665 		}
4666 		cp->c_flags |= CN_DESTROY;
4667 		cp->c_attr.va_nlink = 0;
4668 		VN_RELE(cp->c_backvp);
4669 		cp->c_backvp = NULL;
4670 	}
4671 
4672 	/* perform the remove on back fs after extracting directory backvp */
4673 	mutex_enter(&dcp->c_statelock);
4674 	dbackvp = dcp->c_backvp;
4675 	mutex_exit(&dcp->c_statelock);
4676 
4677 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4678 	    ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4679 	    dcp, dbackvp, nm));
4680 	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4681 	if (error) {
4682 		mutex_exit(&cp->c_statelock);
4683 		goto out;
4684 	}
4685 
4686 	/* fix up the file we deleted, if not destroying the cnode */
4687 	if ((cp->c_flags & CN_DESTROY) == 0) {
4688 		cp->c_attr.va_nlink--;
4689 		cp->c_flags |= CN_UPDATED;
4690 	}
4691 
4692 	mutex_exit(&cp->c_statelock);
4693 
4694 out:
4695 	return (error);
4696 }
4697 
4698 int
4699 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4700     vnode_t *vp)
4701 {
4702 	cnode_t *dcp = VTOC(dvp);
4703 	cnode_t *cp = VTOC(vp);
4704 	fscache_t *fscp = C_TO_FSCACHE(dcp);
4705 	int error = 0;
4706 	off_t commit = 0;
4707 	timestruc_t current_time;
4708 
4709 	if (CFS_ISFS_WRITE_AROUND(fscp))
4710 		return (ETIMEDOUT);
4711 
4712 	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4713 		return (ETIMEDOUT);
4714 
4715 	/*
4716 	 * Acquire the rwlock (WRITER) on the directory to prevent other
4717 	 * activity on the directory.
4718 	 */
4719 	rw_enter(&dcp->c_rwlock, RW_WRITER);
4720 
4721 	/* dir must be populated */
4722 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4723 		error = ETIMEDOUT;
4724 		goto out;
4725 	}
4726 
4727 	mutex_enter(&dcp->c_statelock);
4728 	mutex_enter(&cp->c_statelock);
4729 
4730 	error = cachefs_stickyrmchk(dcp, cp, cr);
4731 
4732 	mutex_exit(&cp->c_statelock);
4733 	mutex_exit(&dcp->c_statelock);
4734 	if (error)
4735 		goto out;
4736 
4737 	/* purge dnlc of this entry so can get accurate vnode count */
4738 	dnlc_purge_vp(vp);
4739 
4740 	/*
4741 	 * If the cnode is active, make a link to the file
4742 	 * so operations on the file will continue.
4743 	 */
4744 	if ((vp->v_type != VDIR) &&
4745 	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4746 		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4747 		if (error)
4748 			goto out;
4749 	}
4750 
4751 	if (cp->c_attr.va_nlink > 1) {
4752 		mutex_enter(&cp->c_statelock);
4753 		if (cachefs_modified_alloc(cp)) {
4754 			mutex_exit(&cp->c_statelock);
4755 			error = ENOSPC;
4756 			goto out;
4757 		}
4758 		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4759 			error = cachefs_dlog_cidmap(fscp);
4760 			if (error) {
4761 				mutex_exit(&cp->c_statelock);
4762 				error = ENOSPC;
4763 				goto out;
4764 			}
4765 			cp->c_metadata.md_flags |= MD_MAPPING;
4766 			cp->c_flags |= CN_UPDATED;
4767 		}
4768 		mutex_exit(&cp->c_statelock);
4769 	}
4770 
4771 	/* log the remove */
4772 	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4773 	if (commit == 0) {
4774 		error = ENOSPC;
4775 		goto out;
4776 	}
4777 
4778 	/* remove the file from the dir */
4779 	mutex_enter(&dcp->c_statelock);
4780 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4781 		mutex_exit(&dcp->c_statelock);
4782 		error = ETIMEDOUT;
4783 		goto out;
4784 
4785 	}
4786 	cachefs_modified(dcp);
4787 	error = cachefs_dir_rmentry(dcp, nm);
4788 	if (error) {
4789 		mutex_exit(&dcp->c_statelock);
4790 		if (error == ENOTDIR)
4791 			error = ETIMEDOUT;
4792 		goto out;
4793 	}
4794 
4795 	/* update parent dir times */
4796 	gethrestime(&current_time);
4797 	dcp->c_metadata.md_localctime = current_time;
4798 	dcp->c_metadata.md_localmtime = current_time;
4799 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4800 	dcp->c_flags |= CN_UPDATED;
4801 	mutex_exit(&dcp->c_statelock);
4802 
4803 	/* adjust file we are deleting */
4804 	mutex_enter(&cp->c_statelock);
4805 	cp->c_attr.va_nlink--;
4806 	cp->c_metadata.md_localctime = current_time;
4807 	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4808 	if (cp->c_attr.va_nlink == 0) {
4809 		cp->c_flags |= CN_DESTROY;
4810 	} else {
4811 		cp->c_flags |= CN_UPDATED;
4812 	}
4813 	mutex_exit(&cp->c_statelock);
4814 
4815 out:
4816 	if (commit) {
4817 		/* commit the log entry */
4818 		if (cachefs_dlog_commit(fscp, commit, error)) {
4819 			/*EMPTY*/
4820 			/* XXX bob: fix on panic */
4821 		}
4822 	}
4823 
4824 	rw_exit(&dcp->c_rwlock);
4825 	return (error);
4826 }
4827 
4828 /*ARGSUSED*/
4829 static int
4830 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4831     caller_context_t *ct, int flags)
4832 {
4833 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4834 	cnode_t *tdcp = VTOC(tdvp);
4835 	struct vnode *realvp;
4836 	int error = 0;
4837 	int held = 0;
4838 	int connected = 0;
4839 
4840 #ifdef CFSDEBUG
4841 	CFS_DEBUG(CFSDEBUG_VOPS)
4842 		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4843 		    (void *)fvp, (void *)tdvp, tnm);
4844 #endif
4845 
4846 	if (getzoneid() != GLOBAL_ZONEID) {
4847 		error = EPERM;
4848 		goto out;
4849 	}
4850 
4851 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4852 		ASSERT(tdcp->c_flags & CN_NOCACHE);
4853 
4854 	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4855 		fvp = realvp;
4856 	}
4857 
4858 	/*
4859 	 * Cachefs only provides pass-through support for NFSv4,
4860 	 * and all vnode operations are passed through to the
4861 	 * back file system. For NFSv4 pass-through to work, only
4862 	 * connected operation is supported, the cnode backvp must
4863 	 * exist, and cachefs optional (eg., disconnectable) flags
4864 	 * are turned off. Assert these conditions to ensure that
4865 	 * the backfilesystem is called for the link operation.
4866 	 */
4867 
4868 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4869 	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4870 
4871 	for (;;) {
4872 		/* get (or renew) access to the file system */
4873 		if (held) {
4874 			/* Won't loop with NFSv4 connected behavior */
4875 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4876 			rw_exit(&tdcp->c_rwlock);
4877 			cachefs_cd_release(fscp);
4878 			held = 0;
4879 		}
4880 		error = cachefs_cd_access(fscp, connected, 1);
4881 		if (error)
4882 			break;
4883 		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4884 		held = 1;
4885 
4886 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4887 			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4888 			if (CFS_TIMEOUT(fscp, error)) {
4889 				rw_exit(&tdcp->c_rwlock);
4890 				cachefs_cd_release(fscp);
4891 				held = 0;
4892 				cachefs_cd_timedout(fscp);
4893 				connected = 0;
4894 				continue;
4895 			}
4896 		} else {
4897 			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4898 			    cr);
4899 			if (CFS_TIMEOUT(fscp, error)) {
4900 				connected = 1;
4901 				continue;
4902 			}
4903 		}
4904 		break;
4905 	}
4906 
4907 	if (held) {
4908 		rw_exit(&tdcp->c_rwlock);
4909 		cachefs_cd_release(fscp);
4910 	}
4911 
4912 #ifdef CFS_CD_DEBUG
4913 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4914 #endif
4915 out:
4916 #ifdef CFSDEBUG
4917 	CFS_DEBUG(CFSDEBUG_VOPS)
4918 		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4919 		    (void *)fvp, (void *)tdvp, tnm);
4920 #endif
4921 	return (error);
4922 }
4923 
4924 static int
4925 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4926 {
4927 	cnode_t *tdcp = VTOC(tdvp);
4928 	cnode_t *fcp = VTOC(fvp);
4929 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4930 	int error = 0;
4931 	vnode_t *backvp = NULL;
4932 
4933 	if (tdcp != fcp) {
4934 		mutex_enter(&fcp->c_statelock);
4935 
4936 		if (fcp->c_backvp == NULL) {
4937 			error = cachefs_getbackvp(fscp, fcp);
4938 			if (error) {
4939 				mutex_exit(&fcp->c_statelock);
4940 				goto out;
4941 			}
4942 		}
4943 
4944 		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4945 		if (error) {
4946 			mutex_exit(&fcp->c_statelock);
4947 			goto out;
4948 		}
4949 		backvp = fcp->c_backvp;
4950 		VN_HOLD(backvp);
4951 		mutex_exit(&fcp->c_statelock);
4952 	}
4953 
4954 	mutex_enter(&tdcp->c_statelock);
4955 
4956 	/* get backvp of target directory */
4957 	if (tdcp->c_backvp == NULL) {
4958 		error = cachefs_getbackvp(fscp, tdcp);
4959 		if (error) {
4960 			mutex_exit(&tdcp->c_statelock);
4961 			goto out;
4962 		}
4963 	}
4964 
4965 	/* consistency check target directory */
4966 	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4967 	if (error) {
4968 		mutex_exit(&tdcp->c_statelock);
4969 		goto out;
4970 	}
4971 	if (backvp == NULL) {
4972 		backvp = tdcp->c_backvp;
4973 		VN_HOLD(backvp);
4974 	}
4975 
4976 	/* perform the link on the back fs */
4977 	CFS_DPRINT_BACKFS_NFSV4(fscp,
4978 	    ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4979 	    "name %s\n", tdcp, tdcp->c_backvp, tnm));
4980 	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4981 	if (error) {
4982 		mutex_exit(&tdcp->c_statelock);
4983 		goto out;
4984 	}
4985 
4986 	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4987 
4988 	/* if the dir is populated, add the new link */
4989 	if (CFS_ISFS_NONSHARED(fscp) &&
4990 	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4991 		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4992 		    &fcp->c_id, SM_ASYNC);
4993 		if (error) {
4994 			cachefs_nocache(tdcp);
4995 			error = 0;
4996 		}
4997 	}
4998 	mutex_exit(&tdcp->c_statelock);
4999 
5000 	/* get the new link count on the file */
5001 	mutex_enter(&fcp->c_statelock);
5002 	fcp->c_flags |= CN_UPDATED;
5003 	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
5004 	if (fcp->c_backvp == NULL) {
5005 		error = cachefs_getbackvp(fscp, fcp);
5006 		if (error) {
5007 			mutex_exit(&fcp->c_statelock);
5008 			goto out;
5009 		}
5010 	}
5011 
5012 	/* XXX bob: given what modify_cobject does this seems unnecessary */
5013 	fcp->c_attr.va_mask = AT_ALL;
5014 	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5015 	mutex_exit(&fcp->c_statelock);
5016 out:
5017 	if (backvp)
5018 		VN_RELE(backvp);
5019 
5020 	return (error);
5021 }
5022 
5023 static int
5024 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5025     cred_t *cr)
5026 {
5027 	cnode_t *tdcp = VTOC(tdvp);
5028 	cnode_t *fcp = VTOC(fvp);
5029 	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5030 	int error = 0;
5031 	timestruc_t current_time;
5032 	off_t commit = 0;
5033 
5034 	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5035 	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5036 		return (EPERM);
5037 
5038 	if (CFS_ISFS_WRITE_AROUND(fscp))
5039 		return (ETIMEDOUT);
5040 
5041 	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5042 		return (ETIMEDOUT);
5043 
5044 	mutex_enter(&tdcp->c_statelock);
5045 
5046 	/* check permissions */
5047 	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5048 		mutex_exit(&tdcp->c_statelock);
5049 		goto out;
5050 	}
5051 
5052 	/* the directory front file must be populated */
5053 	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5054 		error = ETIMEDOUT;
5055 		mutex_exit(&tdcp->c_statelock);
5056 		goto out;
5057 	}
5058 
5059 	/* make sure tnm does not already exist in the directory */
5060 	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5061 	if (error == ENOTDIR) {
5062 		error = ETIMEDOUT;
5063 		mutex_exit(&tdcp->c_statelock);
5064 		goto out;
5065 	}
5066 	if (error != ENOENT) {
5067 		error = EEXIST;
5068 		mutex_exit(&tdcp->c_statelock);
5069 		goto out;
5070 	}
5071 
5072 	mutex_enter(&fcp->c_statelock);
5073 
5074 	/* create a mapping for the file if necessary */
5075 	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5076 		error = cachefs_dlog_cidmap(fscp);
5077 		if (error) {
5078 			mutex_exit(&fcp->c_statelock);
5079 			mutex_exit(&tdcp->c_statelock);
5080 			error = ENOSPC;
5081 			goto out;
5082 		}
5083 		fcp->c_metadata.md_flags |= MD_MAPPING;
5084 		fcp->c_flags |= CN_UPDATED;
5085 	}
5086 
5087 	/* mark file as modified */
5088 	if (cachefs_modified_alloc(fcp)) {
5089 		mutex_exit(&fcp->c_statelock);
5090 		mutex_exit(&tdcp->c_statelock);
5091 		error = ENOSPC;
5092 		goto out;
5093 	}
5094 	mutex_exit(&fcp->c_statelock);
5095 
5096 	/* log the operation */
5097 	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5098 	if (commit == 0) {
5099 		mutex_exit(&tdcp->c_statelock);
5100 		error = ENOSPC;
5101 		goto out;
5102 	}
5103 
5104 	gethrestime(&current_time);
5105 
5106 	/* make the new link */
5107 	cachefs_modified(tdcp);
5108 	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5109 	    &fcp->c_id, SM_ASYNC);
5110 	if (error) {
5111 		error = 0;
5112 		mutex_exit(&tdcp->c_statelock);
5113 		goto out;
5114 	}
5115 
5116 	/* Update mtime/ctime of parent dir */
5117 	tdcp->c_metadata.md_localmtime = current_time;
5118 	tdcp->c_metadata.md_localctime = current_time;
5119 	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5120 	tdcp->c_flags |= CN_UPDATED;
5121 	mutex_exit(&tdcp->c_statelock);
5122 
5123 	/* update the file we linked to */
5124 	mutex_enter(&fcp->c_statelock);
5125 	fcp->c_attr.va_nlink++;
5126 	fcp->c_metadata.md_localctime = current_time;
5127 	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5128 	fcp->c_flags |= CN_UPDATED;
5129 	mutex_exit(&fcp->c_statelock);
5130 
5131 out:
5132 	if (commit) {
5133 		/* commit the log entry */
5134 		if (cachefs_dlog_commit(fscp, commit, error)) {
5135 			/*EMPTY*/
5136 			/* XXX bob: fix on panic */
5137 		}
5138 	}
5139 
5140 	return (error);
5141 }
5142 
5143 /*
5144  * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5145  * cnodes atomically.
5146  */
5147 kmutex_t cachefs_rename_lock;
5148 
5149 /*ARGSUSED*/
5150 static int
5151 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5152     char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5153 {
5154 	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5155 	cachefscache_t *cachep = fscp->fs_cache;
5156 	int error = 0;
5157 	int held = 0;
5158 	int connected = 0;
5159 	vnode_t *delvp = NULL;
5160 	vnode_t *tvp = NULL;
5161 	int vfslock = 0;
5162 	struct vnode *realvp;
5163 
5164 	if (getzoneid() != GLOBAL_ZONEID)
5165 		return (EPERM);
5166 
5167 	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5168 		ndvp = realvp;
5169 
5170 	/*
5171 	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5172 	 * directory cnodes better indicate NOCACHE mode as well.
5173 	 */
5174 	ASSERT(
5175 	    (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5176 	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5177 	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5178 
5179 	/*
5180 	 * Cachefs only provides pass-through support for NFSv4,
5181 	 * and all vnode operations are passed through to the
5182 	 * back file system. For NFSv4 pass-through to work, only
5183 	 * connected operation is supported, the cnode backvp must
5184 	 * exist, and cachefs optional (eg., disconnectable) flags
5185 	 * are turned off. Assert these conditions to ensure that
5186 	 * the backfilesystem is called for the rename operation.
5187 	 */
5188 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5189 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5190 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5191 
5192 	for (;;) {
5193 		if (vfslock) {
5194 			vn_vfsunlock(delvp);
5195 			vfslock = 0;
5196 		}
5197 		if (delvp) {
5198 			VN_RELE(delvp);
5199 			delvp = NULL;
5200 		}
5201 
5202 		/* get (or renew) access to the file system */
5203 		if (held) {
5204 			/* Won't loop for NFSv4 connected support */
5205 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5206 			cachefs_cd_release(fscp);
5207 			held = 0;
5208 		}
5209 		error = cachefs_cd_access(fscp, connected, 1);
5210 		if (error)
5211 			break;
5212 		held = 1;
5213 
5214 		/* sanity check */
5215 		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5216 			error = EINVAL;
5217 			break;
5218 		}
5219 
5220 		/* cannot rename from or to . or .. */
5221 		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5222 		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5223 			error = EINVAL;
5224 			break;
5225 		}
5226 
5227 		if (odvp != ndvp) {
5228 			/*
5229 			 * if moving a directory, its notion
5230 			 * of ".." will change
5231 			 */
5232 			error = cachefs_lookup_common(odvp, onm, &tvp,
5233 			    NULL, 0, NULL, cr);
5234 			if (error == 0) {
5235 				ASSERT(tvp != NULL);
5236 				if (tvp->v_type == VDIR) {
5237 					cnode_t *cp = VTOC(tvp);
5238 
5239 					dnlc_remove(tvp, "..");
5240 
5241 					mutex_enter(&cp->c_statelock);
5242 					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5243 					mutex_exit(&cp->c_statelock);
5244 				}
5245 			} else {
5246 				tvp = NULL;
5247 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5248 					if (CFS_TIMEOUT(fscp, error)) {
5249 						cachefs_cd_release(fscp);
5250 						held = 0;
5251 						cachefs_cd_timedout(fscp);
5252 						connected = 0;
5253 						continue;
5254 					}
5255 				} else {
5256 					if (CFS_TIMEOUT(fscp, error)) {
5257 						connected = 1;
5258 						continue;
5259 					}
5260 				}
5261 				break;
5262 			}
5263 		}
5264 
5265 		/* get the cnode if file being deleted */
5266 		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5267 		    NULL, cr);
5268 		if (error) {
5269 			delvp = NULL;
5270 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5271 				if (CFS_TIMEOUT(fscp, error)) {
5272 					cachefs_cd_release(fscp);
5273 					held = 0;
5274 					cachefs_cd_timedout(fscp);
5275 					connected = 0;
5276 					continue;
5277 				}
5278 			} else {
5279 				if (CFS_TIMEOUT(fscp, error)) {
5280 					connected = 1;
5281 					continue;
5282 				}
5283 			}
5284 			if (error != ENOENT)
5285 				break;
5286 		}
5287 
5288 		if (delvp && delvp->v_type == VDIR) {
5289 			/* see ufs_dirremove for why this is done, mount race */
5290 			if (vn_vfswlock(delvp)) {
5291 				error = EBUSY;
5292 				break;
5293 			}
5294 			vfslock = 1;
5295 			if (vn_mountedvfs(delvp) != NULL) {
5296 				error = EBUSY;
5297 				break;
5298 			}
5299 		}
5300 
5301 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5302 			error = cachefs_rename_connected(odvp, onm,
5303 			    ndvp, nnm, cr, delvp);
5304 			if (CFS_TIMEOUT(fscp, error)) {
5305 				cachefs_cd_release(fscp);
5306 				held = 0;
5307 				cachefs_cd_timedout(fscp);
5308 				connected = 0;
5309 				continue;
5310 			}
5311 		} else {
5312 			error = cachefs_rename_disconnected(odvp, onm,
5313 			    ndvp, nnm, cr, delvp);
5314 			if (CFS_TIMEOUT(fscp, error)) {
5315 				connected = 1;
5316 				continue;
5317 			}
5318 		}
5319 		break;
5320 	}
5321 
5322 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5323 		struct fid gone;
5324 
5325 		bzero(&gone, sizeof (gone));
5326 		gone.fid_len = MAXFIDSZ;
5327 		if (delvp != NULL)
5328 			(void) VOP_FID(delvp, &gone, ct);
5329 
5330 		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5331 		    &gone, 0, (delvp != NULL), crgetuid(cr));
5332 	}
5333 
5334 	if (held)
5335 		cachefs_cd_release(fscp);
5336 
5337 	if (vfslock)
5338 		vn_vfsunlock(delvp);
5339 
5340 	if (delvp)
5341 		VN_RELE(delvp);
5342 	if (tvp)
5343 		VN_RELE(tvp);
5344 
5345 #ifdef CFS_CD_DEBUG
5346 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5347 #endif
5348 	return (error);
5349 }
5350 
5351 static int
5352 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5353     char *nnm, cred_t *cr, vnode_t *delvp)
5354 {
5355 	cnode_t *odcp = VTOC(odvp);
5356 	cnode_t *ndcp = VTOC(ndvp);
5357 	vnode_t *revp = NULL;
5358 	cnode_t *recp;
5359 	cnode_t *delcp;
5360 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5361 	int error = 0;
5362 	struct fid cookie;
5363 	struct fid *cookiep;
5364 	cfs_cid_t cid;
5365 	int gotdirent;
5366 
5367 	/* find the file we are renaming */
5368 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5369 	if (error)
5370 		return (error);
5371 	recp = VTOC(revp);
5372 
5373 	/*
5374 	 * To avoid deadlock, we acquire this global rename lock before
5375 	 * we try to get the locks for the source and target directories.
5376 	 */
5377 	mutex_enter(&cachefs_rename_lock);
5378 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5379 	if (odcp != ndcp) {
5380 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5381 	}
5382 	mutex_exit(&cachefs_rename_lock);
5383 
5384 	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5385 	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5386 
5387 	mutex_enter(&odcp->c_statelock);
5388 	if (odcp->c_backvp == NULL) {
5389 		error = cachefs_getbackvp(fscp, odcp);
5390 		if (error) {
5391 			mutex_exit(&odcp->c_statelock);
5392 			goto out;
5393 		}
5394 	}
5395 
5396 	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5397 	if (error) {
5398 		mutex_exit(&odcp->c_statelock);
5399 		goto out;
5400 	}
5401 	mutex_exit(&odcp->c_statelock);
5402 
5403 	if (odcp != ndcp) {
5404 		mutex_enter(&ndcp->c_statelock);
5405 		if (ndcp->c_backvp == NULL) {
5406 			error = cachefs_getbackvp(fscp, ndcp);
5407 			if (error) {
5408 				mutex_exit(&ndcp->c_statelock);
5409 				goto out;
5410 			}
5411 		}
5412 
5413 		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5414 		if (error) {
5415 			mutex_exit(&ndcp->c_statelock);
5416 			goto out;
5417 		}
5418 		mutex_exit(&ndcp->c_statelock);
5419 	}
5420 
5421 	/* if a file is being deleted because of this rename */
5422 	if (delvp) {
5423 		/* if src and dest file are same */
5424 		if (delvp == revp) {
5425 			error = 0;
5426 			goto out;
5427 		}
5428 
5429 		/*
5430 		 * If the cnode is active, make a link to the file
5431 		 * so operations on the file will continue.
5432 		 */
5433 		dnlc_purge_vp(delvp);
5434 		delcp = VTOC(delvp);
5435 		if ((delvp->v_type != VDIR) &&
5436 		    !((delvp->v_count == 1) ||
5437 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5438 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5439 			if (error)
5440 				goto out;
5441 		}
5442 	}
5443 
5444 	/* do the rename on the back fs */
5445 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5446 	    ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5447 	    " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5448 	    odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5449 	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5450 	    0);
5451 	if (error)
5452 		goto out;
5453 
5454 	/* purge mappings to file in the old directory */
5455 	dnlc_purge_vp(odvp);
5456 
5457 	/* purge mappings in the new dir if we deleted a file */
5458 	if (delvp && (odvp != ndvp))
5459 		dnlc_purge_vp(ndvp);
5460 
5461 	/* update the file we just deleted */
5462 	if (delvp) {
5463 		mutex_enter(&delcp->c_statelock);
5464 		if (delcp->c_attr.va_nlink == 1) {
5465 			delcp->c_flags |= CN_DESTROY;
5466 		} else {
5467 			delcp->c_flags |= CN_UPDATED;
5468 		}
5469 		delcp->c_attr.va_nlink--;
5470 		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5471 		mutex_exit(&delcp->c_statelock);
5472 	}
5473 
5474 	/* find the entry in the old directory */
5475 	mutex_enter(&odcp->c_statelock);
5476 	gotdirent = 0;
5477 	cookiep = NULL;
5478 	if (CFS_ISFS_NONSHARED(fscp) &&
5479 	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5480 		error = cachefs_dir_look(odcp, onm, &cookie,
5481 		    NULL, NULL, &cid);
5482 		if (error == 0 || error == EINVAL) {
5483 			gotdirent = 1;
5484 			if (error == 0)
5485 				cookiep = &cookie;
5486 		} else {
5487 			cachefs_inval_object(odcp);
5488 		}
5489 	}
5490 	error = 0;
5491 
5492 	/* remove the directory entry from the old directory */
5493 	if (gotdirent) {
5494 		error = cachefs_dir_rmentry(odcp, onm);
5495 		if (error) {
5496 			cachefs_nocache(odcp);
5497 			error = 0;
5498 		}
5499 	}
5500 	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5501 	mutex_exit(&odcp->c_statelock);
5502 
5503 	/* install the directory entry in the new directory */
5504 	mutex_enter(&ndcp->c_statelock);
5505 	if (CFS_ISFS_NONSHARED(fscp) &&
5506 	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5507 		error = 1;
5508 		if (gotdirent) {
5509 			ASSERT(cid.cid_fileno != 0);
5510 			error = 0;
5511 			if (delvp) {
5512 				error = cachefs_dir_rmentry(ndcp, nnm);
5513 			}
5514 			if (error == 0) {
5515 				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5516 				    &cid, SM_ASYNC);
5517 			}
5518 		}
5519 		if (error) {
5520 			cachefs_nocache(ndcp);
5521 			error = 0;
5522 		}
5523 	}
5524 	if (odcp != ndcp)
5525 		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5526 	mutex_exit(&ndcp->c_statelock);
5527 
5528 	/* ctime of renamed file has changed */
5529 	mutex_enter(&recp->c_statelock);
5530 	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5531 	mutex_exit(&recp->c_statelock);
5532 
5533 out:
5534 	if (odcp != ndcp)
5535 		rw_exit(&ndcp->c_rwlock);
5536 	rw_exit(&odcp->c_rwlock);
5537 
5538 	VN_RELE(revp);
5539 
5540 	return (error);
5541 }
5542 
5543 static int
5544 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5545     char *nnm, cred_t *cr, vnode_t *delvp)
5546 {
5547 	cnode_t *odcp = VTOC(odvp);
5548 	cnode_t *ndcp = VTOC(ndvp);
5549 	cnode_t *delcp = NULL;
5550 	vnode_t *revp = NULL;
5551 	cnode_t *recp;
5552 	fscache_t *fscp = C_TO_FSCACHE(odcp);
5553 	int error = 0;
5554 	struct fid cookie;
5555 	struct fid *cookiep;
5556 	cfs_cid_t cid;
5557 	off_t commit = 0;
5558 	timestruc_t current_time;
5559 
5560 	if (CFS_ISFS_WRITE_AROUND(fscp))
5561 		return (ETIMEDOUT);
5562 
5563 	/* find the file we are renaming */
5564 	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5565 	if (error)
5566 		return (error);
5567 	recp = VTOC(revp);
5568 
5569 	/*
5570 	 * To avoid deadlock, we acquire this global rename lock before
5571 	 * we try to get the locks for the source and target directories.
5572 	 */
5573 	mutex_enter(&cachefs_rename_lock);
5574 	rw_enter(&odcp->c_rwlock, RW_WRITER);
5575 	if (odcp != ndcp) {
5576 		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5577 	}
5578 	mutex_exit(&cachefs_rename_lock);
5579 
5580 	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5581 		error = ETIMEDOUT;
5582 		goto out;
5583 	}
5584 
5585 	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5586 		mutex_enter(&recp->c_statelock);
5587 		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5588 			error = cachefs_dlog_cidmap(fscp);
5589 			if (error) {
5590 				mutex_exit(&recp->c_statelock);
5591 				error = ENOSPC;
5592 				goto out;
5593 			}
5594 			recp->c_metadata.md_flags |= MD_MAPPING;
5595 			recp->c_flags |= CN_UPDATED;
5596 		}
5597 		mutex_exit(&recp->c_statelock);
5598 	}
5599 
5600 	/* check permissions */
5601 	/* XXX clean up this mutex junk sometime */
5602 	mutex_enter(&odcp->c_statelock);
5603 	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5604 	mutex_exit(&odcp->c_statelock);
5605 	if (error != 0)
5606 		goto out;
5607 	mutex_enter(&ndcp->c_statelock);
5608 	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5609 	mutex_exit(&ndcp->c_statelock);
5610 	if (error != 0)
5611 		goto out;
5612 	mutex_enter(&odcp->c_statelock);
5613 	error = cachefs_stickyrmchk(odcp, recp, cr);
5614 	mutex_exit(&odcp->c_statelock);
5615 	if (error != 0)
5616 		goto out;
5617 
5618 	/* dirs must be populated */
5619 	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5620 	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5621 		error = ETIMEDOUT;
5622 		goto out;
5623 	}
5624 
5625 	/* for now do not allow moving dirs because could cause cycles */
5626 	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5627 	    (revp == odvp)) {
5628 		error = ETIMEDOUT;
5629 		goto out;
5630 	}
5631 
5632 	/* if a file is being deleted because of this rename */
5633 	if (delvp) {
5634 		delcp = VTOC(delvp);
5635 
5636 		/* if src and dest file are the same */
5637 		if (delvp == revp) {
5638 			error = 0;
5639 			goto out;
5640 		}
5641 
5642 		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5643 			error = ETIMEDOUT;
5644 			goto out;
5645 		}
5646 
5647 		/* if there are hard links to this file */
5648 		if (delcp->c_attr.va_nlink > 1) {
5649 			mutex_enter(&delcp->c_statelock);
5650 			if (cachefs_modified_alloc(delcp)) {
5651 				mutex_exit(&delcp->c_statelock);
5652 				error = ENOSPC;
5653 				goto out;
5654 			}
5655 
5656 			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5657 				error = cachefs_dlog_cidmap(fscp);
5658 				if (error) {
5659 					mutex_exit(&delcp->c_statelock);
5660 					error = ENOSPC;
5661 					goto out;
5662 				}
5663 				delcp->c_metadata.md_flags |= MD_MAPPING;
5664 				delcp->c_flags |= CN_UPDATED;
5665 			}
5666 			mutex_exit(&delcp->c_statelock);
5667 		}
5668 
5669 		/* make sure we can delete file */
5670 		mutex_enter(&ndcp->c_statelock);
5671 		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5672 		mutex_exit(&ndcp->c_statelock);
5673 		if (error != 0)
5674 			goto out;
5675 
5676 		/*
5677 		 * If the cnode is active, make a link to the file
5678 		 * so operations on the file will continue.
5679 		 */
5680 		dnlc_purge_vp(delvp);
5681 		if ((delvp->v_type != VDIR) &&
5682 		    !((delvp->v_count == 1) ||
5683 		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5684 			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5685 			if (error)
5686 				goto out;
5687 		}
5688 	}
5689 
5690 	/* purge mappings to file in the old directory */
5691 	dnlc_purge_vp(odvp);
5692 
5693 	/* purge mappings in the new dir if we deleted a file */
5694 	if (delvp && (odvp != ndvp))
5695 		dnlc_purge_vp(ndvp);
5696 
5697 	/* find the entry in the old directory */
5698 	mutex_enter(&odcp->c_statelock);
5699 	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5700 		mutex_exit(&odcp->c_statelock);
5701 		error = ETIMEDOUT;
5702 		goto out;
5703 	}
5704 	cookiep = NULL;
5705 	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5706 	if (error == 0 || error == EINVAL) {
5707 		if (error == 0)
5708 			cookiep = &cookie;
5709 	} else {
5710 		mutex_exit(&odcp->c_statelock);
5711 		if (error == ENOTDIR)
5712 			error = ETIMEDOUT;
5713 		goto out;
5714 	}
5715 	error = 0;
5716 
5717 	/* write the log entry */
5718 	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5719 	    recp, delcp);
5720 	if (commit == 0) {
5721 		mutex_exit(&odcp->c_statelock);
5722 		error = ENOSPC;
5723 		goto out;
5724 	}
5725 
5726 	/* remove the directory entry from the old directory */
5727 	cachefs_modified(odcp);
5728 	error = cachefs_dir_rmentry(odcp, onm);
5729 	if (error) {
5730 		mutex_exit(&odcp->c_statelock);
5731 		if (error == ENOTDIR)
5732 			error = ETIMEDOUT;
5733 		goto out;
5734 	}
5735 	mutex_exit(&odcp->c_statelock);
5736 
5737 	/* install the directory entry in the new directory */
5738 	mutex_enter(&ndcp->c_statelock);
5739 	error = ENOTDIR;
5740 	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5741 		ASSERT(cid.cid_fileno != 0);
5742 		cachefs_modified(ndcp);
5743 		error = 0;
5744 		if (delvp) {
5745 			error = cachefs_dir_rmentry(ndcp, nnm);
5746 		}
5747 		if (error == 0) {
5748 			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5749 			    &cid, SM_ASYNC);
5750 		}
5751 	}
5752 	if (error) {
5753 		cachefs_nocache(ndcp);
5754 		mutex_exit(&ndcp->c_statelock);
5755 		mutex_enter(&odcp->c_statelock);
5756 		cachefs_nocache(odcp);
5757 		mutex_exit(&odcp->c_statelock);
5758 		if (error == ENOTDIR)
5759 			error = ETIMEDOUT;
5760 		goto out;
5761 	}
5762 	mutex_exit(&ndcp->c_statelock);
5763 
5764 	gethrestime(&current_time);
5765 
5766 	/* update the file we just deleted */
5767 	if (delvp) {
5768 		mutex_enter(&delcp->c_statelock);
5769 		delcp->c_attr.va_nlink--;
5770 		delcp->c_metadata.md_localctime = current_time;
5771 		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5772 		if (delcp->c_attr.va_nlink == 0) {
5773 			delcp->c_flags |= CN_DESTROY;
5774 		} else {
5775 			delcp->c_flags |= CN_UPDATED;
5776 		}
5777 		mutex_exit(&delcp->c_statelock);
5778 	}
5779 
5780 	/* update the file we renamed */
5781 	mutex_enter(&recp->c_statelock);
5782 	recp->c_metadata.md_localctime = current_time;
5783 	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5784 	recp->c_flags |= CN_UPDATED;
5785 	mutex_exit(&recp->c_statelock);
5786 
5787 	/* update the source directory */
5788 	mutex_enter(&odcp->c_statelock);
5789 	odcp->c_metadata.md_localctime = current_time;
5790 	odcp->c_metadata.md_localmtime = current_time;
5791 	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5792 	odcp->c_flags |= CN_UPDATED;
5793 	mutex_exit(&odcp->c_statelock);
5794 
5795 	/* update the destination directory */
5796 	if (odcp != ndcp) {
5797 		mutex_enter(&ndcp->c_statelock);
5798 		ndcp->c_metadata.md_localctime = current_time;
5799 		ndcp->c_metadata.md_localmtime = current_time;
5800 		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5801 		ndcp->c_flags |= CN_UPDATED;
5802 		mutex_exit(&ndcp->c_statelock);
5803 	}
5804 
5805 out:
5806 	if (commit) {
5807 		/* commit the log entry */
5808 		if (cachefs_dlog_commit(fscp, commit, error)) {
5809 			/*EMPTY*/
5810 			/* XXX bob: fix on panic */
5811 		}
5812 	}
5813 
5814 	if (odcp != ndcp)
5815 		rw_exit(&ndcp->c_rwlock);
5816 	rw_exit(&odcp->c_rwlock);
5817 
5818 	VN_RELE(revp);
5819 
5820 	return (error);
5821 }
5822 
5823 /*ARGSUSED*/
5824 static int
5825 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5826     cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5827 {
5828 	cnode_t *dcp = VTOC(dvp);
5829 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5830 	cachefscache_t *cachep = fscp->fs_cache;
5831 	int error = 0;
5832 	int held = 0;
5833 	int connected = 0;
5834 
5835 #ifdef CFSDEBUG
5836 	CFS_DEBUG(CFSDEBUG_VOPS)
5837 		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5838 #endif
5839 
5840 	if (getzoneid() != GLOBAL_ZONEID) {
5841 		error = EPERM;
5842 		goto out;
5843 	}
5844 
5845 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5846 		ASSERT(dcp->c_flags & CN_NOCACHE);
5847 
5848 	/*
5849 	 * Cachefs only provides pass-through support for NFSv4,
5850 	 * and all vnode operations are passed through to the
5851 	 * back file system. For NFSv4 pass-through to work, only
5852 	 * connected operation is supported, the cnode backvp must
5853 	 * exist, and cachefs optional (eg., disconnectable) flags
5854 	 * are turned off. Assert these conditions to ensure that
5855 	 * the backfilesystem is called for the mkdir operation.
5856 	 */
5857 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5858 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5859 
5860 	for (;;) {
5861 		/* get (or renew) access to the file system */
5862 		if (held) {
5863 			/* Won't loop with NFSv4 connected behavior */
5864 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5865 			rw_exit(&dcp->c_rwlock);
5866 			cachefs_cd_release(fscp);
5867 			held = 0;
5868 		}
5869 		error = cachefs_cd_access(fscp, connected, 1);
5870 		if (error)
5871 			break;
5872 		rw_enter(&dcp->c_rwlock, RW_WRITER);
5873 		held = 1;
5874 
5875 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5876 			error = cachefs_mkdir_connected(dvp, nm, vap,
5877 			    vpp, cr);
5878 			if (CFS_TIMEOUT(fscp, error)) {
5879 				rw_exit(&dcp->c_rwlock);
5880 				cachefs_cd_release(fscp);
5881 				held = 0;
5882 				cachefs_cd_timedout(fscp);
5883 				connected = 0;
5884 				continue;
5885 			}
5886 		} else {
5887 			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5888 			    vpp, cr);
5889 			if (CFS_TIMEOUT(fscp, error)) {
5890 				connected = 1;
5891 				continue;
5892 			}
5893 		}
5894 		break;
5895 	}
5896 
5897 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5898 		fid_t *fidp = NULL;
5899 		ino64_t fileno = 0;
5900 		cnode_t *cp = NULL;
5901 		if (error == 0)
5902 			cp = VTOC(*vpp);
5903 
5904 		if (cp != NULL) {
5905 			fidp = &cp->c_metadata.md_cookie;
5906 			fileno = cp->c_id.cid_fileno;
5907 		}
5908 
5909 		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5910 		    fidp, fileno, crgetuid(cr));
5911 	}
5912 
5913 	if (held) {
5914 		rw_exit(&dcp->c_rwlock);
5915 		cachefs_cd_release(fscp);
5916 	}
5917 	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5918 		(void) cachefs_pack(dvp, nm, cr);
5919 
5920 #ifdef CFS_CD_DEBUG
5921 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5922 #endif
5923 out:
5924 #ifdef CFSDEBUG
5925 	CFS_DEBUG(CFSDEBUG_VOPS)
5926 		printf("cachefs_mkdir: EXIT error = %d\n", error);
5927 #endif
5928 	return (error);
5929 }
5930 
5931 static int
5932 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5933     vnode_t **vpp, cred_t *cr)
5934 {
5935 	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5936 	struct vnode *vp = NULL;
5937 	int error = 0;
5938 	fscache_t *fscp = C_TO_FSCACHE(dcp);
5939 	struct fid cookie;
5940 	struct vattr attr;
5941 	cfs_cid_t cid, dircid;
5942 	uint32_t valid_fid;
5943 
5944 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5945 		ASSERT(dcp->c_flags & CN_NOCACHE);
5946 
5947 	mutex_enter(&dcp->c_statelock);
5948 
5949 	/* get backvp of dir */
5950 	if (dcp->c_backvp == NULL) {
5951 		error = cachefs_getbackvp(fscp, dcp);
5952 		if (error) {
5953 			mutex_exit(&dcp->c_statelock);
5954 			goto out;
5955 		}
5956 	}
5957 
5958 	/* consistency check the directory */
5959 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5960 	if (error) {
5961 		mutex_exit(&dcp->c_statelock);
5962 		goto out;
5963 	}
5964 	dircid = dcp->c_id;
5965 
5966 	/* make the dir on the back fs */
5967 	CFS_DPRINT_BACKFS_NFSV4(fscp,
5968 	    ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5969 	    "name %s\n", dcp, dcp->c_backvp, nm));
5970 	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5971 	mutex_exit(&dcp->c_statelock);
5972 	if (error) {
5973 		goto out;
5974 	}
5975 
5976 	/* get the cookie and make the cnode */
5977 	attr.va_mask = AT_ALL;
5978 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5979 	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5980 	if (error) {
5981 		goto out;
5982 	}
5983 	cid.cid_flags = 0;
5984 	cid.cid_fileno = attr.va_nodeid;
5985 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5986 	    &attr, vp, cr, 0, &newcp);
5987 	if (error) {
5988 		goto out;
5989 	}
5990 	ASSERT(CTOV(newcp)->v_type == VDIR);
5991 	*vpp = CTOV(newcp);
5992 
5993 	/* if the dir is populated, add the new entry */
5994 	mutex_enter(&dcp->c_statelock);
5995 	if (CFS_ISFS_NONSHARED(fscp) &&
5996 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5997 		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5998 		    SM_ASYNC);
5999 		if (error) {
6000 			cachefs_nocache(dcp);
6001 			error = 0;
6002 		}
6003 	}
6004 	dcp->c_attr.va_nlink++;
6005 	dcp->c_flags |= CN_UPDATED;
6006 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6007 	mutex_exit(&dcp->c_statelock);
6008 
6009 	/* XXX bob: should we do a filldir here? or just add . and .. */
6010 	/* maybe should kick off an async filldir so caller does not wait */
6011 
6012 	/* put the entry in the dnlc */
6013 	if (cachefs_dnlc)
6014 		dnlc_enter(dvp, nm, *vpp);
6015 
6016 	/* save the fileno of the parent so can find the name */
6017 	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6018 	    sizeof (cfs_cid_t)) != 0) {
6019 		mutex_enter(&newcp->c_statelock);
6020 		newcp->c_metadata.md_parent = dircid;
6021 		newcp->c_flags |= CN_UPDATED;
6022 		mutex_exit(&newcp->c_statelock);
6023 	}
6024 out:
6025 	if (vp)
6026 		VN_RELE(vp);
6027 
6028 	return (error);
6029 }
6030 
6031 static int
6032 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6033     vnode_t **vpp, cred_t *cr)
6034 {
6035 	cnode_t *dcp = VTOC(dvp);
6036 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6037 	int error;
6038 	cnode_t *newcp = NULL;
6039 	struct vattr va;
6040 	timestruc_t current_time;
6041 	off_t commit = 0;
6042 	char *s;
6043 	int namlen;
6044 
6045 	/* don't allow '/' characters in pathname component */
6046 	for (s = nm, namlen = 0; *s; s++, namlen++)
6047 		if (*s == '/')
6048 			return (EACCES);
6049 	if (namlen == 0)
6050 		return (EINVAL);
6051 
6052 	if (CFS_ISFS_WRITE_AROUND(fscp))
6053 		return (ETIMEDOUT);
6054 
6055 	mutex_enter(&dcp->c_statelock);
6056 
6057 	/* check permissions */
6058 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6059 		mutex_exit(&dcp->c_statelock);
6060 		goto out;
6061 	}
6062 
6063 	/* the directory front file must be populated */
6064 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6065 		error = ETIMEDOUT;
6066 		mutex_exit(&dcp->c_statelock);
6067 		goto out;
6068 	}
6069 
6070 	/* make sure nm does not already exist in the directory */
6071 	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6072 	if (error == ENOTDIR) {
6073 		error = ETIMEDOUT;
6074 		mutex_exit(&dcp->c_statelock);
6075 		goto out;
6076 	}
6077 	if (error != ENOENT) {
6078 		error = EEXIST;
6079 		mutex_exit(&dcp->c_statelock);
6080 		goto out;
6081 	}
6082 
6083 	/* make up a reasonable set of attributes */
6084 	cachefs_attr_setup(vap, &va, dcp, cr);
6085 	va.va_type = VDIR;
6086 	va.va_mode |= S_IFDIR;
6087 	va.va_nlink = 2;
6088 
6089 	mutex_exit(&dcp->c_statelock);
6090 
6091 	/* create the cnode */
6092 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6093 	if (error)
6094 		goto out;
6095 
6096 	mutex_enter(&newcp->c_statelock);
6097 
6098 	error = cachefs_dlog_cidmap(fscp);
6099 	if (error) {
6100 		mutex_exit(&newcp->c_statelock);
6101 		goto out;
6102 	}
6103 
6104 	cachefs_creategid(dcp, newcp, vap, cr);
6105 	mutex_enter(&dcp->c_statelock);
6106 	cachefs_createacl(dcp, newcp);
6107 	mutex_exit(&dcp->c_statelock);
6108 	gethrestime(&current_time);
6109 	newcp->c_metadata.md_vattr.va_atime = current_time;
6110 	newcp->c_metadata.md_localctime = current_time;
6111 	newcp->c_metadata.md_localmtime = current_time;
6112 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6113 	    MD_LOCALCTIME;
6114 	newcp->c_flags |= CN_UPDATED;
6115 
6116 	/* make a front file for the new directory, add . and .. */
6117 	error = cachefs_dir_new(dcp, newcp);
6118 	if (error) {
6119 		mutex_exit(&newcp->c_statelock);
6120 		goto out;
6121 	}
6122 	cachefs_modified(newcp);
6123 
6124 	/*
6125 	 * write the metadata now rather than waiting until
6126 	 * inactive so that if there's no space we can let
6127 	 * the caller know.
6128 	 */
6129 	ASSERT(newcp->c_frontvp);
6130 	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6131 	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6132 	error = filegrp_write_metadata(newcp->c_filegrp,
6133 	    &newcp->c_id, &newcp->c_metadata);
6134 	if (error) {
6135 		mutex_exit(&newcp->c_statelock);
6136 		goto out;
6137 	}
6138 	mutex_exit(&newcp->c_statelock);
6139 
6140 	/* log the operation */
6141 	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6142 	if (commit == 0) {
6143 		error = ENOSPC;
6144 		goto out;
6145 	}
6146 
6147 	mutex_enter(&dcp->c_statelock);
6148 
6149 	/* make sure directory is still populated */
6150 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6151 		mutex_exit(&dcp->c_statelock);
6152 		error = ETIMEDOUT;
6153 		goto out;
6154 	}
6155 	cachefs_modified(dcp);
6156 
6157 	/* enter the new file in the directory */
6158 	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6159 	    &newcp->c_id, SM_ASYNC);
6160 	if (error) {
6161 		mutex_exit(&dcp->c_statelock);
6162 		goto out;
6163 	}
6164 
6165 	/* update parent dir times */
6166 	dcp->c_metadata.md_localctime = current_time;
6167 	dcp->c_metadata.md_localmtime = current_time;
6168 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6169 	dcp->c_attr.va_nlink++;
6170 	dcp->c_flags |= CN_UPDATED;
6171 	mutex_exit(&dcp->c_statelock);
6172 
6173 out:
6174 	if (commit) {
6175 		/* commit the log entry */
6176 		if (cachefs_dlog_commit(fscp, commit, error)) {
6177 			/*EMPTY*/
6178 			/* XXX bob: fix on panic */
6179 		}
6180 	}
6181 	if (error) {
6182 		if (newcp) {
6183 			mutex_enter(&newcp->c_statelock);
6184 			newcp->c_flags |= CN_DESTROY;
6185 			mutex_exit(&newcp->c_statelock);
6186 			VN_RELE(CTOV(newcp));
6187 		}
6188 	} else {
6189 		*vpp = CTOV(newcp);
6190 	}
6191 	return (error);
6192 }
6193 
6194 /*ARGSUSED*/
6195 static int
6196 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6197     caller_context_t *ct, int flags)
6198 {
6199 	cnode_t *dcp = VTOC(dvp);
6200 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6201 	cachefscache_t *cachep = fscp->fs_cache;
6202 	int error = 0;
6203 	int held = 0;
6204 	int connected = 0;
6205 	size_t namlen;
6206 	vnode_t *vp = NULL;
6207 	int vfslock = 0;
6208 
6209 #ifdef CFSDEBUG
6210 	CFS_DEBUG(CFSDEBUG_VOPS)
6211 		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6212 #endif
6213 
6214 	if (getzoneid() != GLOBAL_ZONEID) {
6215 		error = EPERM;
6216 		goto out;
6217 	}
6218 
6219 	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6220 		ASSERT(dcp->c_flags & CN_NOCACHE);
6221 
6222 	/*
6223 	 * Cachefs only provides pass-through support for NFSv4,
6224 	 * and all vnode operations are passed through to the
6225 	 * back file system. For NFSv4 pass-through to work, only
6226 	 * connected operation is supported, the cnode backvp must
6227 	 * exist, and cachefs optional (eg., disconnectable) flags
6228 	 * are turned off. Assert these conditions to ensure that
6229 	 * the backfilesystem is called for the rmdir operation.
6230 	 */
6231 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6232 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6233 
6234 	for (;;) {
6235 		if (vfslock) {
6236 			vn_vfsunlock(vp);
6237 			vfslock = 0;
6238 		}
6239 		if (vp) {
6240 			VN_RELE(vp);
6241 			vp = NULL;
6242 		}
6243 
6244 		/* get (or renew) access to the file system */
6245 		if (held) {
6246 			/* Won't loop with NFSv4 connected behavior */
6247 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6248 			cachefs_cd_release(fscp);
6249 			held = 0;
6250 		}
6251 		error = cachefs_cd_access(fscp, connected, 1);
6252 		if (error)
6253 			break;
6254 		held = 1;
6255 
6256 		/* if disconnected, do some extra error checking */
6257 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6258 			/* check permissions */
6259 			mutex_enter(&dcp->c_statelock);
6260 			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6261 			mutex_exit(&dcp->c_statelock);
6262 			if (CFS_TIMEOUT(fscp, error)) {
6263 				connected = 1;
6264 				continue;
6265 			}
6266 			if (error)
6267 				break;
6268 
6269 			namlen = strlen(nm);
6270 			if (namlen == 0) {
6271 				error = EINVAL;
6272 				break;
6273 			}
6274 
6275 			/* cannot remove . and .. */
6276 			if (nm[0] == '.') {
6277 				if (namlen == 1) {
6278 					error = EINVAL;
6279 					break;
6280 				} else if (namlen == 2 && nm[1] == '.') {
6281 					error = EEXIST;
6282 					break;
6283 				}
6284 			}
6285 
6286 		}
6287 
6288 		/* get the cnode of the dir to remove */
6289 		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6290 		if (error) {
6291 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6292 				if (CFS_TIMEOUT(fscp, error)) {
6293 					cachefs_cd_release(fscp);
6294 					held = 0;
6295 					cachefs_cd_timedout(fscp);
6296 					connected = 0;
6297 					continue;
6298 				}
6299 			} else {
6300 				if (CFS_TIMEOUT(fscp, error)) {
6301 					connected = 1;
6302 					continue;
6303 				}
6304 			}
6305 			break;
6306 		}
6307 
6308 		/* must be a dir */
6309 		if (vp->v_type != VDIR) {
6310 			error = ENOTDIR;
6311 			break;
6312 		}
6313 
6314 		/* must not be current dir */
6315 		if (VOP_CMP(vp, cdir, ct)) {
6316 			error = EINVAL;
6317 			break;
6318 		}
6319 
6320 		/* see ufs_dirremove for why this is done, mount race */
6321 		if (vn_vfswlock(vp)) {
6322 			error = EBUSY;
6323 			break;
6324 		}
6325 		vfslock = 1;
6326 		if (vn_mountedvfs(vp) != NULL) {
6327 			error = EBUSY;
6328 			break;
6329 		}
6330 
6331 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6332 			error = cachefs_rmdir_connected(dvp, nm, cdir,
6333 			    cr, vp);
6334 			if (CFS_TIMEOUT(fscp, error)) {
6335 				cachefs_cd_release(fscp);
6336 				held = 0;
6337 				cachefs_cd_timedout(fscp);
6338 				connected = 0;
6339 				continue;
6340 			}
6341 		} else {
6342 			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6343 			    cr, vp);
6344 			if (CFS_TIMEOUT(fscp, error)) {
6345 				connected = 1;
6346 				continue;
6347 			}
6348 		}
6349 		break;
6350 	}
6351 
6352 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6353 		ino64_t fileno = 0;
6354 		fid_t *fidp = NULL;
6355 		cnode_t *cp = NULL;
6356 		if (vp)
6357 			cp = VTOC(vp);
6358 
6359 		if (cp != NULL) {
6360 			fidp = &cp->c_metadata.md_cookie;
6361 			fileno = cp->c_id.cid_fileno;
6362 		}
6363 
6364 		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6365 		    fidp, fileno, crgetuid(cr));
6366 	}
6367 
6368 	if (held) {
6369 		cachefs_cd_release(fscp);
6370 	}
6371 
6372 	if (vfslock)
6373 		vn_vfsunlock(vp);
6374 
6375 	if (vp)
6376 		VN_RELE(vp);
6377 
6378 #ifdef CFS_CD_DEBUG
6379 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6380 #endif
6381 out:
6382 #ifdef CFSDEBUG
6383 	CFS_DEBUG(CFSDEBUG_VOPS)
6384 		printf("cachefs_rmdir: EXIT error = %d\n", error);
6385 #endif
6386 
6387 	return (error);
6388 }
6389 
6390 static int
6391 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6392     vnode_t *vp)
6393 {
6394 	cnode_t *dcp = VTOC(dvp);
6395 	cnode_t *cp = VTOC(vp);
6396 	int error = 0;
6397 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6398 
6399 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6400 	mutex_enter(&dcp->c_statelock);
6401 	mutex_enter(&cp->c_statelock);
6402 
6403 	if (dcp->c_backvp == NULL) {
6404 		error = cachefs_getbackvp(fscp, dcp);
6405 		if (error) {
6406 			goto out;
6407 		}
6408 	}
6409 
6410 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6411 	if (error)
6412 		goto out;
6413 
6414 	/* rmdir on the back fs */
6415 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6416 	    ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6417 	    "name %s\n", dcp, dcp->c_backvp, nm));
6418 	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6419 	if (error)
6420 		goto out;
6421 
6422 	/* if the dir is populated, remove the entry from it */
6423 	if (CFS_ISFS_NONSHARED(fscp) &&
6424 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6425 		error = cachefs_dir_rmentry(dcp, nm);
6426 		if (error) {
6427 			cachefs_nocache(dcp);
6428 			error = 0;
6429 		}
6430 	}
6431 
6432 	/*
6433 	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6434 	 * flag on the cnode. The cached object will then be destroyed
6435 	 * at inactive time where the chickens come home to roost :-)
6436 	 * The link cnt for directories is bumped down by 2 'cause the "."
6437 	 * entry has to be elided too ! The link cnt for the parent goes down
6438 	 * by 1 (because of "..").
6439 	 */
6440 	cp->c_attr.va_nlink -= 2;
6441 	dcp->c_attr.va_nlink--;
6442 	if (cp->c_attr.va_nlink == 0) {
6443 		cp->c_flags |= CN_DESTROY;
6444 	} else {
6445 		cp->c_flags |= CN_UPDATED;
6446 	}
6447 	dcp->c_flags |= CN_UPDATED;
6448 
6449 	dnlc_purge_vp(vp);
6450 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6451 
6452 out:
6453 	mutex_exit(&cp->c_statelock);
6454 	mutex_exit(&dcp->c_statelock);
6455 	rw_exit(&dcp->c_rwlock);
6456 
6457 	return (error);
6458 }
6459 
6460 static int
6461 /*ARGSUSED*/
6462 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6463     cred_t *cr, vnode_t *vp)
6464 {
6465 	cnode_t *dcp = VTOC(dvp);
6466 	cnode_t *cp = VTOC(vp);
6467 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6468 	int error = 0;
6469 	off_t commit = 0;
6470 	timestruc_t current_time;
6471 
6472 	if (CFS_ISFS_WRITE_AROUND(fscp))
6473 		return (ETIMEDOUT);
6474 
6475 	rw_enter(&dcp->c_rwlock, RW_WRITER);
6476 	mutex_enter(&dcp->c_statelock);
6477 	mutex_enter(&cp->c_statelock);
6478 
6479 	/* both directories must be populated */
6480 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6481 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6482 		error = ETIMEDOUT;
6483 		goto out;
6484 	}
6485 
6486 	/* if sticky bit set on the dir, more access checks to perform */
6487 	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6488 		goto out;
6489 	}
6490 
6491 	/* make sure dir is empty */
6492 	if (cp->c_attr.va_nlink > 2) {
6493 		error = cachefs_dir_empty(cp);
6494 		if (error) {
6495 			if (error == ENOTDIR)
6496 				error = ETIMEDOUT;
6497 			goto out;
6498 		}
6499 		cachefs_modified(cp);
6500 	}
6501 	cachefs_modified(dcp);
6502 
6503 	/* log the operation */
6504 	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6505 	if (commit == 0) {
6506 		error = ENOSPC;
6507 		goto out;
6508 	}
6509 
6510 	/* remove name from parent dir */
6511 	error = cachefs_dir_rmentry(dcp, nm);
6512 	if (error == ENOTDIR) {
6513 		error = ETIMEDOUT;
6514 		goto out;
6515 	}
6516 	if (error)
6517 		goto out;
6518 
6519 	gethrestime(&current_time);
6520 
6521 	/* update deleted dir values */
6522 	cp->c_attr.va_nlink -= 2;
6523 	if (cp->c_attr.va_nlink == 0)
6524 		cp->c_flags |= CN_DESTROY;
6525 	else {
6526 		cp->c_metadata.md_localctime = current_time;
6527 		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6528 		cp->c_flags |= CN_UPDATED;
6529 	}
6530 
6531 	/* update parent values */
6532 	dcp->c_metadata.md_localctime = current_time;
6533 	dcp->c_metadata.md_localmtime = current_time;
6534 	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6535 	dcp->c_attr.va_nlink--;
6536 	dcp->c_flags |= CN_UPDATED;
6537 
6538 out:
6539 	mutex_exit(&cp->c_statelock);
6540 	mutex_exit(&dcp->c_statelock);
6541 	rw_exit(&dcp->c_rwlock);
6542 	if (commit) {
6543 		/* commit the log entry */
6544 		if (cachefs_dlog_commit(fscp, commit, error)) {
6545 			/*EMPTY*/
6546 			/* XXX bob: fix on panic */
6547 		}
6548 		dnlc_purge_vp(vp);
6549 	}
6550 	return (error);
6551 }
6552 
6553 /*ARGSUSED*/
6554 static int
6555 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6556     char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6557 {
6558 	cnode_t *dcp = VTOC(dvp);
6559 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6560 	cachefscache_t *cachep = fscp->fs_cache;
6561 	int error = 0;
6562 	int held = 0;
6563 	int connected = 0;
6564 
6565 #ifdef CFSDEBUG
6566 	CFS_DEBUG(CFSDEBUG_VOPS)
6567 		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6568 		    (void *)dvp, lnm, tnm);
6569 #endif
6570 
6571 	if (getzoneid() != GLOBAL_ZONEID) {
6572 		error = EPERM;
6573 		goto out;
6574 	}
6575 
6576 	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6577 		ASSERT(dcp->c_flags & CN_NOCACHE);
6578 
6579 	/*
6580 	 * Cachefs only provides pass-through support for NFSv4,
6581 	 * and all vnode operations are passed through to the
6582 	 * back file system. For NFSv4 pass-through to work, only
6583 	 * connected operation is supported, the cnode backvp must
6584 	 * exist, and cachefs optional (eg., disconnectable) flags
6585 	 * are turned off. Assert these conditions to ensure that
6586 	 * the backfilesystem is called for the symlink operation.
6587 	 */
6588 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6589 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6590 
6591 	for (;;) {
6592 		/* get (or renew) access to the file system */
6593 		if (held) {
6594 			/* Won't loop with NFSv4 connected behavior */
6595 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6596 			rw_exit(&dcp->c_rwlock);
6597 			cachefs_cd_release(fscp);
6598 			held = 0;
6599 		}
6600 		error = cachefs_cd_access(fscp, connected, 1);
6601 		if (error)
6602 			break;
6603 		rw_enter(&dcp->c_rwlock, RW_WRITER);
6604 		held = 1;
6605 
6606 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6607 			error = cachefs_symlink_connected(dvp, lnm, tva,
6608 			    tnm, cr);
6609 			if (CFS_TIMEOUT(fscp, error)) {
6610 				rw_exit(&dcp->c_rwlock);
6611 				cachefs_cd_release(fscp);
6612 				held = 0;
6613 				cachefs_cd_timedout(fscp);
6614 				connected = 0;
6615 				continue;
6616 			}
6617 		} else {
6618 			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6619 			    tnm, cr);
6620 			if (CFS_TIMEOUT(fscp, error)) {
6621 				connected = 1;
6622 				continue;
6623 			}
6624 		}
6625 		break;
6626 	}
6627 
6628 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6629 		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6630 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6631 		    crgetuid(cr), (uint_t)strlen(tnm));
6632 
6633 	if (held) {
6634 		rw_exit(&dcp->c_rwlock);
6635 		cachefs_cd_release(fscp);
6636 	}
6637 
6638 #ifdef CFS_CD_DEBUG
6639 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6640 #endif
6641 out:
6642 #ifdef CFSDEBUG
6643 	CFS_DEBUG(CFSDEBUG_VOPS)
6644 		printf("cachefs_symlink: EXIT error = %d\n", error);
6645 #endif
6646 	return (error);
6647 }
6648 
6649 static int
6650 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6651     char *tnm, cred_t *cr)
6652 {
6653 	cnode_t *dcp = VTOC(dvp);
6654 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6655 	int error = 0;
6656 	vnode_t *backvp = NULL;
6657 	cnode_t *newcp = NULL;
6658 	struct vattr va;
6659 	struct fid cookie;
6660 	cfs_cid_t cid;
6661 	uint32_t valid_fid;
6662 
6663 	mutex_enter(&dcp->c_statelock);
6664 
6665 	if (dcp->c_backvp == NULL) {
6666 		error = cachefs_getbackvp(fscp, dcp);
6667 		if (error) {
6668 			cachefs_nocache(dcp);
6669 			mutex_exit(&dcp->c_statelock);
6670 			goto out;
6671 		}
6672 	}
6673 
6674 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6675 	if (error) {
6676 		mutex_exit(&dcp->c_statelock);
6677 		goto out;
6678 	}
6679 	CFS_DPRINT_BACKFS_NFSV4(fscp,
6680 	    ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6681 	    "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6682 	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6683 	if (error) {
6684 		mutex_exit(&dcp->c_statelock);
6685 		goto out;
6686 	}
6687 	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6688 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6689 		cachefs_nocache(dcp);
6690 		mutex_exit(&dcp->c_statelock);
6691 		goto out;
6692 	}
6693 
6694 	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6695 
6696 	/* lookup the symlink we just created and get its fid and attrs */
6697 	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6698 	    NULL, NULL, NULL);
6699 	if (backvp == NULL) {
6700 		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6701 			cachefs_nocache(dcp);
6702 		mutex_exit(&dcp->c_statelock);
6703 		goto out;
6704 	}
6705 
6706 	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6707 	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6708 	if (error) {
6709 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6710 		error = 0;
6711 		cachefs_nocache(dcp);
6712 		mutex_exit(&dcp->c_statelock);
6713 		goto out;
6714 	}
6715 	cid.cid_fileno = va.va_nodeid;
6716 	cid.cid_flags = 0;
6717 
6718 	/* if the dir is cached, add the symlink to it */
6719 	if (CFS_ISFS_NONSHARED(fscp) &&
6720 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6721 		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6722 		if (error) {
6723 			cachefs_nocache(dcp);
6724 			error = 0;
6725 		}
6726 	}
6727 	mutex_exit(&dcp->c_statelock);
6728 
6729 	/* make the cnode for the sym link */
6730 	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6731 	    &va, backvp, cr, 0, &newcp);
6732 	if (error) {
6733 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6734 		cachefs_nocache(dcp);
6735 		error = 0;
6736 		goto out;
6737 	}
6738 
6739 	/* try to cache the symlink contents */
6740 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6741 	mutex_enter(&newcp->c_statelock);
6742 
6743 	/*
6744 	 * try to cache the sym link, note that its a noop if NOCACHE
6745 	 * or NFSv4 is set
6746 	 */
6747 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6748 	if (error) {
6749 		cachefs_nocache(newcp);
6750 		error = 0;
6751 	}
6752 	mutex_exit(&newcp->c_statelock);
6753 	rw_exit(&newcp->c_rwlock);
6754 
6755 out:
6756 	if (backvp)
6757 		VN_RELE(backvp);
6758 	if (newcp)
6759 		VN_RELE(CTOV(newcp));
6760 	return (error);
6761 }
6762 
6763 static int
6764 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6765     char *tnm, cred_t *cr)
6766 {
6767 	cnode_t *dcp = VTOC(dvp);
6768 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6769 	int error;
6770 	cnode_t *newcp = NULL;
6771 	struct vattr va;
6772 	timestruc_t current_time;
6773 	off_t commit = 0;
6774 
6775 	if (CFS_ISFS_WRITE_AROUND(fscp))
6776 		return (ETIMEDOUT);
6777 
6778 	mutex_enter(&dcp->c_statelock);
6779 
6780 	/* check permissions */
6781 	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6782 		mutex_exit(&dcp->c_statelock);
6783 		goto out;
6784 	}
6785 
6786 	/* the directory front file must be populated */
6787 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6788 		error = ETIMEDOUT;
6789 		mutex_exit(&dcp->c_statelock);
6790 		goto out;
6791 	}
6792 
6793 	/* make sure lnm does not already exist in the directory */
6794 	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6795 	if (error == ENOTDIR) {
6796 		error = ETIMEDOUT;
6797 		mutex_exit(&dcp->c_statelock);
6798 		goto out;
6799 	}
6800 	if (error != ENOENT) {
6801 		error = EEXIST;
6802 		mutex_exit(&dcp->c_statelock);
6803 		goto out;
6804 	}
6805 
6806 	/* make up a reasonable set of attributes */
6807 	cachefs_attr_setup(tva, &va, dcp, cr);
6808 	va.va_type = VLNK;
6809 	va.va_mode |= S_IFLNK;
6810 	va.va_size = strlen(tnm);
6811 
6812 	mutex_exit(&dcp->c_statelock);
6813 
6814 	/* create the cnode */
6815 	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6816 	if (error)
6817 		goto out;
6818 
6819 	rw_enter(&newcp->c_rwlock, RW_WRITER);
6820 	mutex_enter(&newcp->c_statelock);
6821 
6822 	error = cachefs_dlog_cidmap(fscp);
6823 	if (error) {
6824 		mutex_exit(&newcp->c_statelock);
6825 		rw_exit(&newcp->c_rwlock);
6826 		error = ENOSPC;
6827 		goto out;
6828 	}
6829 
6830 	cachefs_creategid(dcp, newcp, tva, cr);
6831 	mutex_enter(&dcp->c_statelock);
6832 	cachefs_createacl(dcp, newcp);
6833 	mutex_exit(&dcp->c_statelock);
6834 	gethrestime(&current_time);
6835 	newcp->c_metadata.md_vattr.va_atime = current_time;
6836 	newcp->c_metadata.md_localctime = current_time;
6837 	newcp->c_metadata.md_localmtime = current_time;
6838 	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6839 	    MD_LOCALCTIME;
6840 	newcp->c_flags |= CN_UPDATED;
6841 
6842 	/* log the operation */
6843 	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6844 	if (commit == 0) {
6845 		mutex_exit(&newcp->c_statelock);
6846 		rw_exit(&newcp->c_rwlock);
6847 		error = ENOSPC;
6848 		goto out;
6849 	}
6850 
6851 	/* store the symlink contents */
6852 	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6853 	if (error) {
6854 		mutex_exit(&newcp->c_statelock);
6855 		rw_exit(&newcp->c_rwlock);
6856 		goto out;
6857 	}
6858 	if (cachefs_modified_alloc(newcp)) {
6859 		mutex_exit(&newcp->c_statelock);
6860 		rw_exit(&newcp->c_rwlock);
6861 		error = ENOSPC;
6862 		goto out;
6863 	}
6864 
6865 	/*
6866 	 * write the metadata now rather than waiting until
6867 	 * inactive so that if there's no space we can let
6868 	 * the caller know.
6869 	 */
6870 	if (newcp->c_flags & CN_ALLOC_PENDING) {
6871 		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6872 			(void) filegrp_allocattr(newcp->c_filegrp);
6873 		}
6874 		error = filegrp_create_metadata(newcp->c_filegrp,
6875 		    &newcp->c_metadata, &newcp->c_id);
6876 		if (error) {
6877 			mutex_exit(&newcp->c_statelock);
6878 			rw_exit(&newcp->c_rwlock);
6879 			goto out;
6880 		}
6881 		newcp->c_flags &= ~CN_ALLOC_PENDING;
6882 	}
6883 	error = filegrp_write_metadata(newcp->c_filegrp,
6884 	    &newcp->c_id, &newcp->c_metadata);
6885 	if (error) {
6886 		mutex_exit(&newcp->c_statelock);
6887 		rw_exit(&newcp->c_rwlock);
6888 		goto out;
6889 	}
6890 	mutex_exit(&newcp->c_statelock);
6891 	rw_exit(&newcp->c_rwlock);
6892 
6893 	mutex_enter(&dcp->c_statelock);
6894 
6895 	/* enter the new file in the directory */
6896 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6897 		error = ETIMEDOUT;
6898 		mutex_exit(&dcp->c_statelock);
6899 		goto out;
6900 	}
6901 	cachefs_modified(dcp);
6902 	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6903 	    &newcp->c_id, SM_ASYNC);
6904 	if (error) {
6905 		mutex_exit(&dcp->c_statelock);
6906 		goto out;
6907 	}
6908 
6909 	/* update parent dir times */
6910 	dcp->c_metadata.md_localctime = current_time;
6911 	dcp->c_metadata.md_localmtime = current_time;
6912 	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6913 	dcp->c_flags |= CN_UPDATED;
6914 	mutex_exit(&dcp->c_statelock);
6915 
6916 out:
6917 	if (commit) {
6918 		/* commit the log entry */
6919 		if (cachefs_dlog_commit(fscp, commit, error)) {
6920 			/*EMPTY*/
6921 			/* XXX bob: fix on panic */
6922 		}
6923 	}
6924 
6925 	if (error) {
6926 		if (newcp) {
6927 			mutex_enter(&newcp->c_statelock);
6928 			newcp->c_flags |= CN_DESTROY;
6929 			mutex_exit(&newcp->c_statelock);
6930 		}
6931 	}
6932 	if (newcp) {
6933 		VN_RELE(CTOV(newcp));
6934 	}
6935 
6936 	return (error);
6937 }
6938 
6939 /*ARGSUSED*/
6940 static int
6941 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6942     caller_context_t *ct, int flags)
6943 {
6944 	cnode_t *dcp = VTOC(vp);
6945 	fscache_t *fscp = C_TO_FSCACHE(dcp);
6946 	cachefscache_t *cachep = fscp->fs_cache;
6947 	int error = 0;
6948 	int held = 0;
6949 	int connected = 0;
6950 
6951 #ifdef CFSDEBUG
6952 	CFS_DEBUG(CFSDEBUG_VOPS)
6953 		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6954 #endif
6955 	if (getzoneid() != GLOBAL_ZONEID) {
6956 		error = EPERM;
6957 		goto out;
6958 	}
6959 
6960 	/*
6961 	 * Cachefs only provides pass-through support for NFSv4,
6962 	 * and all vnode operations are passed through to the
6963 	 * back file system. For NFSv4 pass-through to work, only
6964 	 * connected operation is supported, the cnode backvp must
6965 	 * exist, and cachefs optional (eg., disconnectable) flags
6966 	 * are turned off. Assert these conditions to ensure that
6967 	 * the backfilesystem is called for the readdir operation.
6968 	 */
6969 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6970 	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6971 
6972 	for (;;) {
6973 		/* get (or renew) access to the file system */
6974 		if (held) {
6975 			/* Won't loop with NFSv4 connected behavior */
6976 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6977 			rw_exit(&dcp->c_rwlock);
6978 			cachefs_cd_release(fscp);
6979 			held = 0;
6980 		}
6981 		error = cachefs_cd_access(fscp, connected, 0);
6982 		if (error)
6983 			break;
6984 		rw_enter(&dcp->c_rwlock, RW_READER);
6985 		held = 1;
6986 
6987 		/* quit if link count of zero (posix) */
6988 		if (dcp->c_attr.va_nlink == 0) {
6989 			if (eofp)
6990 				*eofp = 1;
6991 			error = 0;
6992 			break;
6993 		}
6994 
6995 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6996 			error = cachefs_readdir_connected(vp, uiop, cr,
6997 			    eofp);
6998 			if (CFS_TIMEOUT(fscp, error)) {
6999 				rw_exit(&dcp->c_rwlock);
7000 				cachefs_cd_release(fscp);
7001 				held = 0;
7002 				cachefs_cd_timedout(fscp);
7003 				connected = 0;
7004 				continue;
7005 			}
7006 		} else {
7007 			error = cachefs_readdir_disconnected(vp, uiop, cr,
7008 			    eofp);
7009 			if (CFS_TIMEOUT(fscp, error)) {
7010 				if (cachefs_cd_access_miss(fscp)) {
7011 					error = cachefs_readdir_connected(vp,
7012 					    uiop, cr, eofp);
7013 					if (!CFS_TIMEOUT(fscp, error))
7014 						break;
7015 					delay(5*hz);
7016 					connected = 0;
7017 					continue;
7018 				}
7019 				connected = 1;
7020 				continue;
7021 			}
7022 		}
7023 		break;
7024 	}
7025 
7026 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7027 		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7028 		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7029 		    crgetuid(cr), uiop->uio_loffset, *eofp);
7030 
7031 	if (held) {
7032 		rw_exit(&dcp->c_rwlock);
7033 		cachefs_cd_release(fscp);
7034 	}
7035 
7036 #ifdef CFS_CD_DEBUG
7037 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7038 #endif
7039 out:
7040 #ifdef CFSDEBUG
7041 	CFS_DEBUG(CFSDEBUG_VOPS)
7042 		printf("cachefs_readdir: EXIT error = %d\n", error);
7043 #endif
7044 
7045 	return (error);
7046 }
7047 
7048 static int
7049 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7050 {
7051 	cnode_t *dcp = VTOC(vp);
7052 	int error;
7053 	fscache_t *fscp = C_TO_FSCACHE(dcp);
7054 	struct cachefs_req *rp;
7055 
7056 	mutex_enter(&dcp->c_statelock);
7057 
7058 	/* check directory consistency */
7059 	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7060 	if (error)
7061 		goto out;
7062 	dcp->c_usage++;
7063 
7064 	/* if dir was modified, toss old contents */
7065 	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7066 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7067 		cachefs_inval_object(dcp);
7068 	}
7069 
7070 	error = 0;
7071 	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7072 	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7073 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7074 	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7075 
7076 		if (cachefs_async_okay()) {
7077 
7078 			/*
7079 			 * Set up asynchronous request to fill this
7080 			 * directory.
7081 			 */
7082 
7083 			dcp->c_flags |= CN_ASYNC_POPULATE;
7084 
7085 			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7086 			rp->cfs_cmd = CFS_POPULATE;
7087 			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7088 			rp->cfs_cr = cr;
7089 
7090 			crhold(cr);
7091 			VN_HOLD(vp);
7092 
7093 			cachefs_addqueue(rp, &fscp->fs_workq);
7094 		} else {
7095 			error = cachefs_dir_fill(dcp, cr);
7096 			if (error != 0)
7097 				cachefs_nocache(dcp);
7098 		}
7099 	}
7100 
7101 	/* if front file is populated */
7102 	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7103 	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7104 	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7105 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7106 		error = cachefs_dir_read(dcp, uiop, eofp);
7107 		if (error == 0)
7108 			fscp->fs_stats.st_hits++;
7109 	}
7110 
7111 	/* if front file could not be used */
7112 	if ((error != 0) ||
7113 	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7114 	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7115 	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7116 
7117 		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7118 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
7119 			cachefs_nocache(dcp);
7120 
7121 		/* get the back vp */
7122 		if (dcp->c_backvp == NULL) {
7123 			error = cachefs_getbackvp(fscp, dcp);
7124 			if (error)
7125 				goto out;
7126 		}
7127 
7128 		if (fscp->fs_inum_size > 0) {
7129 			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7130 		} else {
7131 			/* do the dir read from the back fs */
7132 			(void) VOP_RWLOCK(dcp->c_backvp,
7133 			    V_WRITELOCK_FALSE, NULL);
7134 			CFS_DPRINT_BACKFS_NFSV4(fscp,
7135 			    ("cachefs_readdir (nfsv4): "
7136 			    "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7137 			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7138 			    NULL, 0);
7139 			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7140 		}
7141 
7142 		if (error == 0)
7143 			fscp->fs_stats.st_misses++;
7144 	}
7145 
7146 out:
7147 	mutex_exit(&dcp->c_statelock);
7148 
7149 	return (error);
7150 }
7151 
7152 static int
7153 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7154 {
7155 	int error = 0;
7156 	fscache_t *fscp = C_TO_FSCACHE(cp);
7157 	caddr_t buffy = NULL;
7158 	int buffysize = MAXBSIZE;
7159 	caddr_t chrp, end;
7160 	ino64_t newinum;
7161 	struct dirent64 *de;
7162 	uio_t uioin;
7163 	iovec_t iov;
7164 
7165 	ASSERT(cp->c_backvp != NULL);
7166 	ASSERT(fscp->fs_inum_size > 0);
7167 
7168 	if (uiop->uio_resid < buffysize)
7169 		buffysize = (int)uiop->uio_resid;
7170 	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7171 
7172 	iov.iov_base = buffy;
7173 	iov.iov_len = buffysize;
7174 	uioin.uio_iov = &iov;
7175 	uioin.uio_iovcnt = 1;
7176 	uioin.uio_segflg = UIO_SYSSPACE;
7177 	uioin.uio_fmode = 0;
7178 	uioin.uio_extflg = UIO_COPY_CACHED;
7179 	uioin.uio_loffset = uiop->uio_loffset;
7180 	uioin.uio_resid = buffysize;
7181 
7182 	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7183 	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7184 	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7185 
7186 	if (error != 0)
7187 		goto out;
7188 
7189 	end = buffy + buffysize - uioin.uio_resid;
7190 
7191 	mutex_exit(&cp->c_statelock);
7192 	mutex_enter(&fscp->fs_fslock);
7193 
7194 
7195 	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7196 		de = (dirent64_t *)chrp;
7197 		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7198 		if (newinum == 0)
7199 			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7200 		de->d_ino = newinum;
7201 	}
7202 	mutex_exit(&fscp->fs_fslock);
7203 	mutex_enter(&cp->c_statelock);
7204 
7205 	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7206 	uiop->uio_loffset = uioin.uio_loffset;
7207 
7208 out:
7209 
7210 	if (buffy != NULL)
7211 		cachefs_kmem_free(buffy, buffysize);
7212 
7213 	return (error);
7214 }
7215 
7216 static int
7217 /*ARGSUSED*/
7218 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7219     int *eofp)
7220 {
7221 	cnode_t *dcp = VTOC(vp);
7222 	int error;
7223 
7224 	mutex_enter(&dcp->c_statelock);
7225 	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7226 		error = ETIMEDOUT;
7227 	} else {
7228 		error = cachefs_dir_read(dcp, uiop, eofp);
7229 		if (error == ENOTDIR)
7230 			error = ETIMEDOUT;
7231 	}
7232 	mutex_exit(&dcp->c_statelock);
7233 
7234 	return (error);
7235 }
7236 
7237 /*ARGSUSED*/
7238 static int
7239 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7240 {
7241 	int error = 0;
7242 	struct cnode *cp = VTOC(vp);
7243 	fscache_t *fscp = C_TO_FSCACHE(cp);
7244 
7245 	/*
7246 	 * Cachefs only provides pass-through support for NFSv4,
7247 	 * and all vnode operations are passed through to the
7248 	 * back file system. For NFSv4 pass-through to work, only
7249 	 * connected operation is supported, the cnode backvp must
7250 	 * exist, and cachefs optional (eg., disconnectable) flags
7251 	 * are turned off. Assert these conditions, then bail
7252 	 * as  NFSv4 doesn't support VOP_FID.
7253 	 */
7254 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7255 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7256 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7257 		return (ENOTSUP);
7258 	}
7259 
7260 	mutex_enter(&cp->c_statelock);
7261 	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7262 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7263 		error = ENOSPC;
7264 	} else {
7265 		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7266 		    cp->c_metadata.md_cookie.fid_len);
7267 		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7268 	}
7269 	mutex_exit(&cp->c_statelock);
7270 	return (error);
7271 }
7272 
7273 /* ARGSUSED2 */
7274 static int
7275 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7276 {
7277 	cnode_t *cp = VTOC(vp);
7278 
7279 	/*
7280 	 * XXX - This is ifdef'ed out for now. The problem -
7281 	 * getdents() acquires the read version of rwlock, then we come
7282 	 * into cachefs_readdir() and that wants to acquire the write version
7283 	 * of this lock (if its going to populate the directory). This is
7284 	 * a problem, this can be solved by introducing another lock in the
7285 	 * cnode.
7286 	 */
7287 /* XXX */
7288 	if (vp->v_type != VREG)
7289 		return (-1);
7290 	if (write_lock)
7291 		rw_enter(&cp->c_rwlock, RW_WRITER);
7292 	else
7293 		rw_enter(&cp->c_rwlock, RW_READER);
7294 	return (write_lock);
7295 }
7296 
7297 /* ARGSUSED */
7298 static void
7299 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7300 {
7301 	cnode_t *cp = VTOC(vp);
7302 	if (vp->v_type != VREG)
7303 		return;
7304 	rw_exit(&cp->c_rwlock);
7305 }
7306 
7307 /* ARGSUSED */
7308 static int
7309 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7310     caller_context_t *ct)
7311 {
7312 	return (0);
7313 }
7314 
7315 static int cachefs_lostpage = 0;
7316 /*
7317  * Return all the pages from [off..off+len] in file
7318  */
7319 /*ARGSUSED*/
7320 static int
7321 cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7322 	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7323 	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7324 {
7325 	cnode_t *cp = VTOC(vp);
7326 	int error;
7327 	fscache_t *fscp = C_TO_FSCACHE(cp);
7328 	cachefscache_t *cachep = fscp->fs_cache;
7329 	int held = 0;
7330 	int connected = 0;
7331 
7332 #ifdef CFSDEBUG
7333 	u_offset_t offx = (u_offset_t)off;
7334 
7335 	CFS_DEBUG(CFSDEBUG_VOPS)
7336 		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7337 		    (void *)vp, offx, len, rw);
7338 #endif
7339 	if (getzoneid() != GLOBAL_ZONEID) {
7340 		error = EPERM;
7341 		goto out;
7342 	}
7343 
7344 	if (vp->v_flag & VNOMAP) {
7345 		error = ENOSYS;
7346 		goto out;
7347 	}
7348 
7349 	/* Call backfilesystem if NFSv4 */
7350 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7351 		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7352 		    plsz, seg, addr, rw, cr);
7353 		goto out;
7354 	}
7355 
7356 	/* XXX sam: make this do an async populate? */
7357 	if (pl == NULL) {
7358 		error = 0;
7359 		goto out;
7360 	}
7361 	if (protp != NULL)
7362 		*protp = PROT_ALL;
7363 
7364 	for (;;) {
7365 		/* get (or renew) access to the file system */
7366 		if (held) {
7367 			cachefs_cd_release(fscp);
7368 			held = 0;
7369 		}
7370 		error = cachefs_cd_access(fscp, connected, 0);
7371 		if (error)
7372 			break;
7373 		held = 1;
7374 
7375 		/*
7376 		 * If we are getting called as a side effect of a
7377 		 * cachefs_write()
7378 		 * operation the local file size might not be extended yet.
7379 		 * In this case we want to be able to return pages of zeroes.
7380 		 */
7381 		if ((u_offset_t)off + len >
7382 		    ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7383 			if (seg != segkmap) {
7384 				error = EFAULT;
7385 				break;
7386 			}
7387 		}
7388 		if (len <= PAGESIZE)
7389 			error = cachefs_getapage(vp, (u_offset_t)off, len,
7390 			    protp, pl, plsz, seg, addr, rw, cr);
7391 		else
7392 			error = pvn_getpages(cachefs_getapage, vp,
7393 			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7394 			    rw, cr);
7395 		if (error == 0)
7396 			break;
7397 
7398 		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7399 		    error == EAGAIN) {
7400 			connected = 0;
7401 			continue;
7402 		}
7403 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7404 			if (CFS_TIMEOUT(fscp, error)) {
7405 				cachefs_cd_release(fscp);
7406 				held = 0;
7407 				cachefs_cd_timedout(fscp);
7408 				connected = 0;
7409 				continue;
7410 			}
7411 		} else {
7412 			if (CFS_TIMEOUT(fscp, error)) {
7413 				if (cachefs_cd_access_miss(fscp)) {
7414 					if (len <= PAGESIZE)
7415 						error = cachefs_getapage_back(
7416 						    vp, (u_offset_t)off,
7417 						    len, protp, pl,
7418 						    plsz, seg, addr, rw, cr);
7419 					else
7420 						error = pvn_getpages(
7421 						    cachefs_getapage_back, vp,
7422 						    (u_offset_t)off, len,
7423 						    protp, pl,
7424 						    plsz, seg, addr, rw, cr);
7425 					if (!CFS_TIMEOUT(fscp, error) &&
7426 					    (error != EAGAIN))
7427 						break;
7428 					delay(5*hz);
7429 					connected = 0;
7430 					continue;
7431 				}
7432 				connected = 1;
7433 				continue;
7434 			}
7435 		}
7436 		break;
7437 	}
7438 
7439 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7440 		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7441 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7442 		    crgetuid(cr), off, len);
7443 
7444 	if (held) {
7445 		cachefs_cd_release(fscp);
7446 	}
7447 
7448 out:
7449 #ifdef CFS_CD_DEBUG
7450 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7451 #endif
7452 #ifdef CFSDEBUG
7453 	CFS_DEBUG(CFSDEBUG_VOPS)
7454 		printf("cachefs_getpage: EXIT vp %p error %d\n",
7455 		    (void *)vp, error);
7456 #endif
7457 	return (error);
7458 }
7459 
7460 /*
7461  * cachefs_getpage_backfs_nfsv4
7462  *
7463  * Call NFSv4 back filesystem to handle the getpage (cachefs
7464  * pass-through support for NFSv4).
7465  */
7466 static int
7467 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7468 			uint_t *protp, struct page *pl[], size_t plsz,
7469 			struct seg *seg, caddr_t addr, enum seg_rw rw,
7470 			cred_t *cr)
7471 {
7472 	cnode_t *cp = VTOC(vp);
7473 	fscache_t *fscp = C_TO_FSCACHE(cp);
7474 	vnode_t *backvp;
7475 	int error;
7476 
7477 	/*
7478 	 * For NFSv4 pass-through to work, only connected operation is
7479 	 * supported, the cnode backvp must exist, and cachefs optional
7480 	 * (eg., disconnectable) flags are turned off. Assert these
7481 	 * conditions for the getpage operation.
7482 	 */
7483 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7484 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7485 
7486 	/* Call backfs vnode op after extracting backvp */
7487 	mutex_enter(&cp->c_statelock);
7488 	backvp = cp->c_backvp;
7489 	mutex_exit(&cp->c_statelock);
7490 
7491 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7492 	    ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7493 	    cp, backvp));
7494 	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7495 	    addr, rw, cr, NULL);
7496 
7497 	return (error);
7498 }
7499 
7500 /*
7501  * Called from pvn_getpages or cachefs_getpage to get a particular page.
7502  */
7503 /*ARGSUSED*/
7504 static int
7505 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7506 	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7507 	enum seg_rw rw, cred_t *cr)
7508 {
7509 	cnode_t *cp = VTOC(vp);
7510 	page_t **ppp, *pp = NULL;
7511 	fscache_t *fscp = C_TO_FSCACHE(cp);
7512 	cachefscache_t *cachep = fscp->fs_cache;
7513 	int error = 0;
7514 	struct page **ourpl;
7515 	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7516 	int index = 0;
7517 	int downgrade;
7518 	int have_statelock = 0;
7519 	u_offset_t popoff;
7520 	size_t popsize = 0;
7521 
7522 	/*LINTED*/
7523 	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7524 
7525 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7526 		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7527 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7528 	else
7529 		ourpl = ourstackpl;
7530 
7531 	ourpl[0] = NULL;
7532 	off = off & (offset_t)PAGEMASK;
7533 again:
7534 	/*
7535 	 * Look for the page
7536 	 */
7537 	if (page_exists(vp, off) == 0) {
7538 		/*
7539 		 * Need to do work to get the page.
7540 		 * Grab our lock because we are going to
7541 		 * modify the state of the cnode.
7542 		 */
7543 		if (! have_statelock) {
7544 			mutex_enter(&cp->c_statelock);
7545 			have_statelock = 1;
7546 		}
7547 		/*
7548 		 * If we're in NOCACHE mode, we will need a backvp
7549 		 */
7550 		if (cp->c_flags & CN_NOCACHE) {
7551 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7552 				error = ETIMEDOUT;
7553 				goto out;
7554 			}
7555 			if (cp->c_backvp == NULL) {
7556 				error = cachefs_getbackvp(fscp, cp);
7557 				if (error)
7558 					goto out;
7559 			}
7560 			error = VOP_GETPAGE(cp->c_backvp, off,
7561 			    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7562 			    addr, S_READ, cr, NULL);
7563 			/*
7564 			 * backfs returns EFAULT when we are trying for a
7565 			 * page beyond EOF but cachefs has the knowledge that
7566 			 * it is not beyond EOF be cause cp->c_size is
7567 			 * greater then the offset requested.
7568 			 */
7569 			if (error == EFAULT) {
7570 				error = 0;
7571 				pp = page_create_va(vp, off, PAGESIZE,
7572 				    PG_EXCL | PG_WAIT, seg, addr);
7573 				if (pp == NULL)
7574 					goto again;
7575 				pagezero(pp, 0, PAGESIZE);
7576 				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7577 				goto out;
7578 			}
7579 			if (error)
7580 				goto out;
7581 			goto getpages;
7582 		}
7583 		/*
7584 		 * We need a front file. If we can't get it,
7585 		 * put the cnode in NOCACHE mode and try again.
7586 		 */
7587 		if (cp->c_frontvp == NULL) {
7588 			error = cachefs_getfrontfile(cp);
7589 			if (error) {
7590 				cachefs_nocache(cp);
7591 				error = EAGAIN;
7592 				goto out;
7593 			}
7594 		}
7595 		/*
7596 		 * Check if the front file needs population.
7597 		 * If population is necessary, make sure we have a
7598 		 * backvp as well. We will get the page from the backvp.
7599 		 * bug 4152459-
7600 		 * But if the file system is in disconnected mode
7601 		 * and the file is a local file then do not check the
7602 		 * allocmap.
7603 		 */
7604 		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7605 		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7606 		    (cachefs_check_allocmap(cp, off) == 0)) {
7607 			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7608 				error = ETIMEDOUT;
7609 				goto out;
7610 			}
7611 			if (cp->c_backvp == NULL) {
7612 				error = cachefs_getbackvp(fscp, cp);
7613 				if (error)
7614 					goto out;
7615 			}
7616 			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7617 				cachefs_cluster_allocmap(off, &popoff,
7618 				    &popsize,
7619 				    fscp->fs_info.fi_popsize, cp);
7620 				if (popsize != 0) {
7621 					error = cachefs_populate(cp,
7622 					    popoff, popsize,
7623 					    cp->c_frontvp, cp->c_backvp,
7624 					    cp->c_size, cr);
7625 					if (error) {
7626 						cachefs_nocache(cp);
7627 						error = EAGAIN;
7628 						goto out;
7629 					} else {
7630 						cp->c_flags |=
7631 						    CN_UPDATED |
7632 						    CN_NEED_FRONT_SYNC |
7633 						    CN_POPULATION_PENDING;
7634 					}
7635 					popsize = popsize - (off - popoff);
7636 				} else {
7637 					popsize = PAGESIZE;
7638 				}
7639 			}
7640 			/* else XXX assert CN_NOCACHE? */
7641 			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7642 			    PAGESIZE, protp, ourpl, popsize,
7643 			    seg, addr, S_READ, cr, NULL);
7644 			if (error)
7645 				goto out;
7646 			fscp->fs_stats.st_misses++;
7647 		} else {
7648 			if (cp->c_flags & CN_POPULATION_PENDING) {
7649 				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7650 				    NULL);
7651 				cp->c_flags &= ~CN_POPULATION_PENDING;
7652 				if (error) {
7653 					cachefs_nocache(cp);
7654 					error = EAGAIN;
7655 					goto out;
7656 				}
7657 			}
7658 			/*
7659 			 * File was populated so we get the page from the
7660 			 * frontvp
7661 			 */
7662 			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7663 			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7664 			    rw, cr, NULL);
7665 			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7666 				cachefs_log_gpfront(cachep, error,
7667 				    fscp->fs_cfsvfsp,
7668 				    &cp->c_metadata.md_cookie, cp->c_fileno,
7669 				    crgetuid(cr), off, PAGESIZE);
7670 			if (error) {
7671 				cachefs_nocache(cp);
7672 				error = EAGAIN;
7673 				goto out;
7674 			}
7675 			fscp->fs_stats.st_hits++;
7676 		}
7677 getpages:
7678 		ASSERT(have_statelock);
7679 		if (have_statelock) {
7680 			mutex_exit(&cp->c_statelock);
7681 			have_statelock = 0;
7682 		}
7683 		downgrade = 0;
7684 		for (ppp = ourpl; *ppp; ppp++) {
7685 			if ((*ppp)->p_offset < off) {
7686 				index++;
7687 				page_unlock(*ppp);
7688 				continue;
7689 			}
7690 			if (PAGE_SHARED(*ppp)) {
7691 				if (page_tryupgrade(*ppp) == 0) {
7692 					for (ppp = &ourpl[index]; *ppp; ppp++)
7693 						page_unlock(*ppp);
7694 					error = EAGAIN;
7695 					goto out;
7696 				}
7697 				downgrade = 1;
7698 			}
7699 			ASSERT(PAGE_EXCL(*ppp));
7700 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7701 			page_rename(*ppp, vp, (*ppp)->p_offset);
7702 		}
7703 		pl[0] = ourpl[index];
7704 		pl[1] = NULL;
7705 		if (downgrade) {
7706 			page_downgrade(ourpl[index]);
7707 		}
7708 		/* Unlock the rest of the pages from the cluster */
7709 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7710 			page_unlock(*ppp);
7711 	} else {
7712 		ASSERT(! have_statelock);
7713 		if (have_statelock) {
7714 			mutex_exit(&cp->c_statelock);
7715 			have_statelock = 0;
7716 		}
7717 		/* XXX SE_SHARED probably isn't what we *always* want */
7718 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7719 			cachefs_lostpage++;
7720 			goto again;
7721 		}
7722 		pl[0] = pp;
7723 		pl[1] = NULL;
7724 		/* XXX increment st_hits?  i don't think so, but... */
7725 	}
7726 
7727 out:
7728 	if (have_statelock) {
7729 		mutex_exit(&cp->c_statelock);
7730 		have_statelock = 0;
7731 	}
7732 	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7733 		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7734 		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7735 	return (error);
7736 }
7737 
7738 /* gets a page but only from the back fs */
7739 /*ARGSUSED*/
7740 static int
7741 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7742     uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7743     caddr_t addr, enum seg_rw rw, cred_t *cr)
7744 {
7745 	cnode_t *cp = VTOC(vp);
7746 	page_t **ppp, *pp = NULL;
7747 	fscache_t *fscp = C_TO_FSCACHE(cp);
7748 	int error = 0;
7749 	struct page *ourpl[17];
7750 	int index = 0;
7751 	int have_statelock = 0;
7752 	int downgrade;
7753 
7754 	/*
7755 	 * Grab the cnode statelock so the cnode state won't change
7756 	 * while we're in here.
7757 	 */
7758 	ourpl[0] = NULL;
7759 	off = off & (offset_t)PAGEMASK;
7760 again:
7761 	if (page_exists(vp, off) == 0) {
7762 		if (! have_statelock) {
7763 			mutex_enter(&cp->c_statelock);
7764 			have_statelock = 1;
7765 		}
7766 
7767 		if (cp->c_backvp == NULL) {
7768 			error = cachefs_getbackvp(fscp, cp);
7769 			if (error)
7770 				goto out;
7771 		}
7772 		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7773 		    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7774 		    addr, S_READ, cr, NULL);
7775 		if (error)
7776 			goto out;
7777 
7778 		if (have_statelock) {
7779 			mutex_exit(&cp->c_statelock);
7780 			have_statelock = 0;
7781 		}
7782 		downgrade = 0;
7783 		for (ppp = ourpl; *ppp; ppp++) {
7784 			if ((*ppp)->p_offset < off) {
7785 				index++;
7786 				page_unlock(*ppp);
7787 				continue;
7788 			}
7789 			if (PAGE_SHARED(*ppp)) {
7790 				if (page_tryupgrade(*ppp) == 0) {
7791 					for (ppp = &ourpl[index]; *ppp; ppp++)
7792 						page_unlock(*ppp);
7793 					error = EAGAIN;
7794 					goto out;
7795 				}
7796 				downgrade = 1;
7797 			}
7798 			ASSERT(PAGE_EXCL(*ppp));
7799 			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7800 			page_rename(*ppp, vp, (*ppp)->p_offset);
7801 		}
7802 		pl[0] = ourpl[index];
7803 		pl[1] = NULL;
7804 		if (downgrade) {
7805 			page_downgrade(ourpl[index]);
7806 		}
7807 		/* Unlock the rest of the pages from the cluster */
7808 		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7809 			page_unlock(*ppp);
7810 	} else {
7811 		ASSERT(! have_statelock);
7812 		if (have_statelock) {
7813 			mutex_exit(&cp->c_statelock);
7814 			have_statelock = 0;
7815 		}
7816 		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7817 			cachefs_lostpage++;
7818 			goto again;
7819 		}
7820 		pl[0] = pp;
7821 		pl[1] = NULL;
7822 	}
7823 
7824 out:
7825 	if (have_statelock) {
7826 		mutex_exit(&cp->c_statelock);
7827 		have_statelock = 0;
7828 	}
7829 	return (error);
7830 }
7831 
7832 /*ARGSUSED*/
7833 static int
7834 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7835     caller_context_t *ct)
7836 {
7837 	cnode_t *cp = VTOC(vp);
7838 	int error = 0;
7839 	fscache_t *fscp = C_TO_FSCACHE(cp);
7840 	int held = 0;
7841 	int connected = 0;
7842 
7843 	if (getzoneid() != GLOBAL_ZONEID)
7844 		return (EPERM);
7845 
7846 	/* Call backfilesytem if NFSv4 */
7847 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7848 		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7849 		goto out;
7850 	}
7851 
7852 	for (;;) {
7853 		/* get (or renew) access to the file system */
7854 		if (held) {
7855 			cachefs_cd_release(fscp);
7856 			held = 0;
7857 		}
7858 		error = cachefs_cd_access(fscp, connected, 1);
7859 		if (error)
7860 			break;
7861 		held = 1;
7862 
7863 		error = cachefs_putpage_common(vp, off, len, flags, cr);
7864 		if (error == 0)
7865 			break;
7866 
7867 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7868 			if (CFS_TIMEOUT(fscp, error)) {
7869 				cachefs_cd_release(fscp);
7870 				held = 0;
7871 				cachefs_cd_timedout(fscp);
7872 				connected = 0;
7873 				continue;
7874 			}
7875 		} else {
7876 			if (NOMEMWAIT()) {
7877 				error = 0;
7878 				goto out;
7879 			}
7880 			if (CFS_TIMEOUT(fscp, error)) {
7881 				connected = 1;
7882 				continue;
7883 			}
7884 		}
7885 		break;
7886 	}
7887 
7888 out:
7889 
7890 	if (held) {
7891 		cachefs_cd_release(fscp);
7892 	}
7893 
7894 #ifdef CFS_CD_DEBUG
7895 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7896 #endif
7897 	return (error);
7898 }
7899 
7900 /*
7901  * cachefs_putpage_backfs_nfsv4
7902  *
7903  * Call NFSv4 back filesystem to handle the putpage (cachefs
7904  * pass-through support for NFSv4).
7905  */
7906 static int
7907 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7908 			cred_t *cr)
7909 {
7910 	cnode_t *cp = VTOC(vp);
7911 	fscache_t *fscp = C_TO_FSCACHE(cp);
7912 	vnode_t *backvp;
7913 	int error;
7914 
7915 	/*
7916 	 * For NFSv4 pass-through to work, only connected operation is
7917 	 * supported, the cnode backvp must exist, and cachefs optional
7918 	 * (eg., disconnectable) flags are turned off. Assert these
7919 	 * conditions for the putpage operation.
7920 	 */
7921 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7922 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7923 
7924 	/* Call backfs vnode op after extracting backvp */
7925 	mutex_enter(&cp->c_statelock);
7926 	backvp = cp->c_backvp;
7927 	mutex_exit(&cp->c_statelock);
7928 
7929 	CFS_DPRINT_BACKFS_NFSV4(fscp,
7930 	    ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7931 	    cp, backvp));
7932 	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7933 
7934 	return (error);
7935 }
7936 
7937 /*
7938  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7939  * If len == 0, do from off to EOF.
7940  *
7941  * The normal cases should be len == 0 & off == 0 (entire vp list),
7942  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7943  * (from pageout).
7944  */
7945 
7946 /*ARGSUSED*/
7947 int
7948 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7949     int flags, cred_t *cr)
7950 {
7951 	struct cnode *cp  = VTOC(vp);
7952 	struct page *pp;
7953 	size_t io_len;
7954 	u_offset_t eoff, io_off;
7955 	int error = 0;
7956 	fscache_t *fscp = C_TO_FSCACHE(cp);
7957 	cachefscache_t *cachep = fscp->fs_cache;
7958 
7959 	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7960 		return (0);
7961 	}
7962 	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7963 	    (flags & B_INVAL) == 0))
7964 		return (0);
7965 
7966 	/*
7967 	 * Should never have cached data for the cachefs vnode
7968 	 * if NFSv4 is in use.
7969 	 */
7970 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7971 
7972 	/*
7973 	 * If this is an async putpage let a thread handle it.
7974 	 */
7975 	if (flags & B_ASYNC) {
7976 		struct cachefs_req *rp;
7977 		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7978 
7979 		if (ttoproc(curthread) == proc_pageout) {
7980 			/*
7981 			 * If this is the page daemon we
7982 			 * do the push synchronously (Dangerous!) and hope
7983 			 * we can free enough to keep running...
7984 			 */
7985 			flags &= ~B_ASYNC;
7986 			goto again;
7987 		}
7988 
7989 		if (! cachefs_async_okay()) {
7990 
7991 			/*
7992 			 * this is somewhat like NFS's behavior.  keep
7993 			 * the system from thrashing.  we've seen
7994 			 * cases where async queues get out of
7995 			 * control, especially if
7996 			 * madvise(MADV_SEQUENTIAL) is done on a large
7997 			 * mmap()ed file that is read sequentially.
7998 			 */
7999 
8000 			flags &= ~B_ASYNC;
8001 			goto again;
8002 		}
8003 
8004 		/*
8005 		 * if no flags other than B_ASYNC were set,
8006 		 * we coalesce putpage requests into a single one for the
8007 		 * whole file (len = off = 0).  If such a request is
8008 		 * already queued, we're done.
8009 		 *
8010 		 * If there are other flags set (e.g., B_INVAL), we don't
8011 		 * attempt to coalesce and we use the specified length and
8012 		 * offset.
8013 		 */
8014 		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8015 		mutex_enter(&cp->c_iomutex);
8016 		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8017 			rp->cfs_cmd = CFS_PUTPAGE;
8018 			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8019 			if (tflags == 0) {
8020 				off = len = 0;
8021 				cp->c_ioflags |= CIO_PUTPAGES;
8022 			}
8023 			rp->cfs_req_u.cu_putpage.cp_off = off;
8024 			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8025 			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8026 			rp->cfs_cr = cr;
8027 			crhold(rp->cfs_cr);
8028 			VN_HOLD(vp);
8029 			cp->c_nio++;
8030 			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8031 		} else {
8032 			kmem_cache_free(cachefs_req_cache, rp);
8033 		}
8034 
8035 		mutex_exit(&cp->c_iomutex);
8036 		return (0);
8037 	}
8038 
8039 
8040 again:
8041 	if (len == 0) {
8042 		/*
8043 		 * Search the entire vp list for pages >= off
8044 		 */
8045 		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8046 	} else {
8047 		/*
8048 		 * Do a range from [off...off + len] looking for pages
8049 		 * to deal with.
8050 		 */
8051 		eoff = (u_offset_t)off + len;
8052 		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8053 		    io_off += io_len) {
8054 			/*
8055 			 * If we are not invalidating, synchronously
8056 			 * freeing or writing pages use the routine
8057 			 * page_lookup_nowait() to prevent reclaiming
8058 			 * them from the free list.
8059 			 */
8060 			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8061 				pp = page_lookup(vp, io_off,
8062 				    (flags & (B_INVAL | B_FREE)) ?
8063 				    SE_EXCL : SE_SHARED);
8064 			} else {
8065 				/* XXX this looks like dead code */
8066 				pp = page_lookup_nowait(vp, io_off,
8067 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8068 			}
8069 
8070 			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8071 				io_len = PAGESIZE;
8072 			else {
8073 				error = cachefs_push(vp, pp, &io_off,
8074 				    &io_len, flags, cr);
8075 				if (error != 0)
8076 					break;
8077 				/*
8078 				 * "io_off" and "io_len" are returned as
8079 				 * the range of pages we actually wrote.
8080 				 * This allows us to skip ahead more quickly
8081 				 * since several pages may've been dealt
8082 				 * with by this iteration of the loop.
8083 				 */
8084 			}
8085 		}
8086 	}
8087 
8088 	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8089 		cp->c_flags &= ~CDIRTY;
8090 	}
8091 
8092 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8093 		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8094 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8095 		    crgetuid(cr), off, len);
8096 
8097 	return (error);
8098 
8099 }
8100 
8101 /*ARGSUSED*/
8102 static int
8103 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8104     size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8105     caller_context_t *ct)
8106 {
8107 	cnode_t *cp = VTOC(vp);
8108 	fscache_t *fscp = C_TO_FSCACHE(cp);
8109 	struct segvn_crargs vn_a;
8110 	int error;
8111 	int held = 0;
8112 	int writing;
8113 	int connected = 0;
8114 
8115 #ifdef CFSDEBUG
8116 	u_offset_t offx = (u_offset_t)off;
8117 
8118 	CFS_DEBUG(CFSDEBUG_VOPS)
8119 		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8120 		    (void *)vp, offx, len, flags);
8121 #endif
8122 	if (getzoneid() != GLOBAL_ZONEID) {
8123 		error = EPERM;
8124 		goto out;
8125 	}
8126 
8127 	if (vp->v_flag & VNOMAP) {
8128 		error = ENOSYS;
8129 		goto out;
8130 	}
8131 	if (off < 0 || (offset_t)(off + len) < 0) {
8132 		error = ENXIO;
8133 		goto out;
8134 	}
8135 	if (vp->v_type != VREG) {
8136 		error = ENODEV;
8137 		goto out;
8138 	}
8139 
8140 	/*
8141 	 * Check to see if the vnode is currently marked as not cachable.
8142 	 * If so, we have to refuse the map request as this violates the
8143 	 * don't cache attribute.
8144 	 */
8145 	if (vp->v_flag & VNOCACHE)
8146 		return (EAGAIN);
8147 
8148 #ifdef OBSOLETE
8149 	/*
8150 	 * If file is being locked, disallow mapping.
8151 	 */
8152 	if (vn_has_flocks(vp)) {
8153 		error = EAGAIN;
8154 		goto out;
8155 	}
8156 #endif
8157 
8158 	/* call backfilesystem if NFSv4 */
8159 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8160 		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8161 		    maxprot, flags, cr);
8162 		goto out;
8163 	}
8164 
8165 	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8166 
8167 	for (;;) {
8168 		/* get (or renew) access to the file system */
8169 		if (held) {
8170 			cachefs_cd_release(fscp);
8171 			held = 0;
8172 		}
8173 		error = cachefs_cd_access(fscp, connected, writing);
8174 		if (error)
8175 			break;
8176 		held = 1;
8177 
8178 		if (writing) {
8179 			mutex_enter(&cp->c_statelock);
8180 			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8181 				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8182 					connected = 1;
8183 					continue;
8184 				} else {
8185 					cachefs_nocache(cp);
8186 				}
8187 			}
8188 
8189 			/*
8190 			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8191 			 * If CN_MAPWRITE is not set then cachefs_delmap does
8192 			 * not need to try to push out any pages.
8193 			 * This bit gets cleared when the cnode goes inactive.
8194 			 */
8195 			cp->c_flags |= CN_MAPWRITE;
8196 
8197 			mutex_exit(&cp->c_statelock);
8198 		}
8199 		break;
8200 	}
8201 
8202 	if (held) {
8203 		cachefs_cd_release(fscp);
8204 	}
8205 
8206 	as_rangelock(as);
8207 	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8208 	if (error != 0) {
8209 		as_rangeunlock(as);
8210 		goto out;
8211 	}
8212 
8213 	/*
8214 	 * package up all the data passed in into a segvn_args struct and
8215 	 * call as_map with segvn_create function to create a new segment
8216 	 * in the address space.
8217 	 */
8218 	vn_a.vp = vp;
8219 	vn_a.offset = off;
8220 	vn_a.type = flags & MAP_TYPE;
8221 	vn_a.prot = (uchar_t)prot;
8222 	vn_a.maxprot = (uchar_t)maxprot;
8223 	vn_a.cred = cr;
8224 	vn_a.amp = NULL;
8225 	vn_a.flags = flags & ~MAP_TYPE;
8226 	vn_a.szc = 0;
8227 	vn_a.lgrp_mem_policy_flags = 0;
8228 	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8229 	as_rangeunlock(as);
8230 out:
8231 
8232 #ifdef CFS_CD_DEBUG
8233 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8234 #endif
8235 #ifdef CFSDEBUG
8236 	CFS_DEBUG(CFSDEBUG_VOPS)
8237 		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8238 #endif
8239 	return (error);
8240 }
8241 
8242 /*
8243  * cachefs_map_backfs_nfsv4
8244  *
8245  * Call NFSv4 back filesystem to handle the map (cachefs
8246  * pass-through support for NFSv4).
8247  */
8248 static int
8249 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8250 			caddr_t *addrp, size_t len, uchar_t prot,
8251 			uchar_t maxprot, uint_t flags, cred_t *cr)
8252 {
8253 	cnode_t *cp = VTOC(vp);
8254 	fscache_t *fscp = C_TO_FSCACHE(cp);
8255 	vnode_t *backvp;
8256 	int error;
8257 
8258 	/*
8259 	 * For NFSv4 pass-through to work, only connected operation is
8260 	 * supported, the cnode backvp must exist, and cachefs optional
8261 	 * (eg., disconnectable) flags are turned off. Assert these
8262 	 * conditions for the map operation.
8263 	 */
8264 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8265 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8266 
8267 	/* Call backfs vnode op after extracting backvp */
8268 	mutex_enter(&cp->c_statelock);
8269 	backvp = cp->c_backvp;
8270 	mutex_exit(&cp->c_statelock);
8271 
8272 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8273 	    ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8274 	    cp, backvp));
8275 	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8276 	    NULL);
8277 
8278 	return (error);
8279 }
8280 
8281 /*ARGSUSED*/
8282 static int
8283 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8284     caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8285     cred_t *cr, caller_context_t *ct)
8286 {
8287 	cnode_t *cp = VTOC(vp);
8288 	fscache_t *fscp = C_TO_FSCACHE(cp);
8289 
8290 	if (getzoneid() != GLOBAL_ZONEID)
8291 		return (EPERM);
8292 
8293 	if (vp->v_flag & VNOMAP)
8294 		return (ENOSYS);
8295 
8296 	/*
8297 	 * Check this is not an NFSv4 filesystem, as the mapping
8298 	 * is not done on the cachefs filesystem if NFSv4 is in
8299 	 * use.
8300 	 */
8301 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8302 
8303 	mutex_enter(&cp->c_statelock);
8304 	cp->c_mapcnt += btopr(len);
8305 	mutex_exit(&cp->c_statelock);
8306 	return (0);
8307 }
8308 
8309 /*ARGSUSED*/
8310 static int
8311 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8312 	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8313 	cred_t *cr, caller_context_t *ct)
8314 {
8315 	cnode_t *cp = VTOC(vp);
8316 	fscache_t *fscp = C_TO_FSCACHE(cp);
8317 	int error;
8318 	int connected = 0;
8319 	int held = 0;
8320 
8321 	/*
8322 	 * The file may be passed in to (or inherited into) the zone, so we
8323 	 * need to let this operation go through since it happens as part of
8324 	 * exiting.
8325 	 */
8326 	if (vp->v_flag & VNOMAP)
8327 		return (ENOSYS);
8328 
8329 	/*
8330 	 * Check this is not an NFSv4 filesystem, as the mapping
8331 	 * is not done on the cachefs filesystem if NFSv4 is in
8332 	 * use.
8333 	 */
8334 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8335 
8336 	mutex_enter(&cp->c_statelock);
8337 	cp->c_mapcnt -= btopr(len);
8338 	ASSERT(cp->c_mapcnt >= 0);
8339 	mutex_exit(&cp->c_statelock);
8340 
8341 	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8342 	    ((cp->c_flags & CN_MAPWRITE) == 0))
8343 		return (0);
8344 
8345 	for (;;) {
8346 		/* get (or renew) access to the file system */
8347 		if (held) {
8348 			cachefs_cd_release(fscp);
8349 			held = 0;
8350 		}
8351 		error = cachefs_cd_access(fscp, connected, 1);
8352 		if (error)
8353 			break;
8354 		held = 1;
8355 		connected = 0;
8356 
8357 		error = cachefs_putpage_common(vp, (offset_t)0,
8358 		    (uint_t)0, 0, cr);
8359 		if (CFS_TIMEOUT(fscp, error)) {
8360 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8361 				cachefs_cd_release(fscp);
8362 				held = 0;
8363 				cachefs_cd_timedout(fscp);
8364 				continue;
8365 			} else {
8366 				connected = 1;
8367 				continue;
8368 			}
8369 		}
8370 
8371 		/* if no space left in cache, wait until connected */
8372 		if ((error == ENOSPC) &&
8373 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8374 			connected = 1;
8375 			continue;
8376 		}
8377 
8378 		mutex_enter(&cp->c_statelock);
8379 		if (!error)
8380 			error = cp->c_error;
8381 		cp->c_error = 0;
8382 		mutex_exit(&cp->c_statelock);
8383 		break;
8384 	}
8385 
8386 	if (held)
8387 		cachefs_cd_release(fscp);
8388 
8389 #ifdef CFS_CD_DEBUG
8390 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8391 #endif
8392 	return (error);
8393 }
8394 
8395 /* ARGSUSED */
8396 static int
8397 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8398 	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8399 	caller_context_t *ct)
8400 {
8401 	struct cnode *cp = VTOC(vp);
8402 	int error;
8403 	struct fscache *fscp = C_TO_FSCACHE(cp);
8404 	vnode_t *backvp;
8405 	int held = 0;
8406 	int connected = 0;
8407 
8408 	if (getzoneid() != GLOBAL_ZONEID)
8409 		return (EPERM);
8410 
8411 	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8412 		return (EINVAL);
8413 
8414 	/* Disallow locking of files that are currently mapped */
8415 	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8416 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8417 		return (EAGAIN);
8418 	}
8419 
8420 	/*
8421 	 * Cachefs only provides pass-through support for NFSv4,
8422 	 * and all vnode operations are passed through to the
8423 	 * back file system. For NFSv4 pass-through to work, only
8424 	 * connected operation is supported, the cnode backvp must
8425 	 * exist, and cachefs optional (eg., disconnectable) flags
8426 	 * are turned off. Assert these conditions to ensure that
8427 	 * the backfilesystem is called for the frlock operation.
8428 	 */
8429 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8430 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8431 
8432 	/* XXX bob: nfs does a bunch more checks than we do */
8433 	if (CFS_ISFS_LLOCK(fscp)) {
8434 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8435 		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8436 	}
8437 
8438 	for (;;) {
8439 		/* get (or renew) access to the file system */
8440 		if (held) {
8441 			/* Won't loop with NFSv4 connected behavior */
8442 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8443 			cachefs_cd_release(fscp);
8444 			held = 0;
8445 		}
8446 		error = cachefs_cd_access(fscp, connected, 0);
8447 		if (error)
8448 			break;
8449 		held = 1;
8450 
8451 		/* if not connected, quit or wait */
8452 		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8453 			connected = 1;
8454 			continue;
8455 		}
8456 
8457 		/* nocache the file */
8458 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8459 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8460 			mutex_enter(&cp->c_statelock);
8461 			cachefs_nocache(cp);
8462 			mutex_exit(&cp->c_statelock);
8463 		}
8464 
8465 		/*
8466 		 * XXX bob: probably should do a consistency check
8467 		 * Pass arguments unchanged if NFSv4 is the backfs.
8468 		 */
8469 		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8470 			bfp->l_start += cp->c_size;
8471 			bfp->l_whence = 0;
8472 		}
8473 
8474 		/* get the back vp */
8475 		mutex_enter(&cp->c_statelock);
8476 		if (cp->c_backvp == NULL) {
8477 			error = cachefs_getbackvp(fscp, cp);
8478 			if (error) {
8479 				mutex_exit(&cp->c_statelock);
8480 				break;
8481 			}
8482 		}
8483 		backvp = cp->c_backvp;
8484 		VN_HOLD(backvp);
8485 		mutex_exit(&cp->c_statelock);
8486 
8487 		/*
8488 		 * make sure we can flush currently dirty pages before
8489 		 * allowing the lock
8490 		 */
8491 		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8492 		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8493 			error = cachefs_putpage(
8494 			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8495 			if (error) {
8496 				error = ENOLCK;
8497 				VN_RELE(backvp);
8498 				break;
8499 			}
8500 		}
8501 
8502 		/* do lock on the back file */
8503 		CFS_DPRINT_BACKFS_NFSV4(fscp,
8504 		    ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8505 		    cp, backvp));
8506 		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8507 		    ct);
8508 		VN_RELE(backvp);
8509 		if (CFS_TIMEOUT(fscp, error)) {
8510 			connected = 1;
8511 			continue;
8512 		}
8513 		break;
8514 	}
8515 
8516 	if (held) {
8517 		cachefs_cd_release(fscp);
8518 	}
8519 
8520 	/*
8521 	 * If we are setting a lock mark the vnode VNOCACHE so the page
8522 	 * cache does not give inconsistent results on locked files shared
8523 	 * between clients.  The VNOCACHE flag is never turned off as long
8524 	 * as the vnode is active because it is hard to figure out when the
8525 	 * last lock is gone.
8526 	 * XXX - what if some already has the vnode mapped in?
8527 	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8528 	 */
8529 	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8530 	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8531 		vp->v_flag |= VNOCACHE;
8532 
8533 #ifdef CFS_CD_DEBUG
8534 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8535 #endif
8536 	return (error);
8537 }
8538 
8539 /*
8540  * Free storage space associated with the specified vnode.  The portion
8541  * to be freed is specified by bfp->l_start and bfp->l_len (already
8542  * normalized to a "whence" of 0).
8543  *
8544  * This is an experimental facility whose continued existence is not
8545  * guaranteed.  Currently, we only support the special case
8546  * of l_len == 0, meaning free to end of file.
8547  */
8548 /* ARGSUSED */
8549 static int
8550 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8551 	offset_t offset, cred_t *cr, caller_context_t *ct)
8552 {
8553 	cnode_t *cp = VTOC(vp);
8554 	fscache_t *fscp = C_TO_FSCACHE(cp);
8555 	int error;
8556 
8557 	ASSERT(vp->v_type == VREG);
8558 	if (getzoneid() != GLOBAL_ZONEID)
8559 		return (EPERM);
8560 	if (cmd != F_FREESP)
8561 		return (EINVAL);
8562 
8563 	/* call backfilesystem if NFSv4 */
8564 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8565 		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8566 		    offset, cr, ct);
8567 		goto out;
8568 	}
8569 
8570 	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8571 		ASSERT(bfp->l_start >= 0);
8572 		if (bfp->l_len == 0) {
8573 			struct vattr va;
8574 
8575 			va.va_size = bfp->l_start;
8576 			va.va_mask = AT_SIZE;
8577 			error = cachefs_setattr(vp, &va, 0, cr, ct);
8578 		} else
8579 			error = EINVAL;
8580 	}
8581 
8582 out:
8583 	return (error);
8584 }
8585 
8586 /*
8587  * cachefs_space_backfs_nfsv4
8588  *
8589  * Call NFSv4 back filesystem to handle the space (cachefs
8590  * pass-through support for NFSv4).
8591  */
8592 static int
8593 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8594 		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8595 {
8596 	cnode_t *cp = VTOC(vp);
8597 	fscache_t *fscp = C_TO_FSCACHE(cp);
8598 	vnode_t *backvp;
8599 	int error;
8600 
8601 	/*
8602 	 * For NFSv4 pass-through to work, only connected operation is
8603 	 * supported, the cnode backvp must exist, and cachefs optional
8604 	 * (eg., disconnectable) flags are turned off. Assert these
8605 	 * conditions for the space operation.
8606 	 */
8607 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8608 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8609 
8610 	/* Call backfs vnode op after extracting backvp */
8611 	mutex_enter(&cp->c_statelock);
8612 	backvp = cp->c_backvp;
8613 	mutex_exit(&cp->c_statelock);
8614 
8615 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8616 	    ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8617 	    cp, backvp));
8618 	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8619 
8620 	return (error);
8621 }
8622 
8623 /*ARGSUSED*/
8624 static int
8625 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8626 {
8627 	return (EINVAL);
8628 }
8629 
8630 /*ARGSUSED*/
8631 static int
8632 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8633 	int flags, cred_t *cr, caller_context_t *ct)
8634 {
8635 	return (ENOSYS);
8636 }
8637 
8638 static int
8639 cachefs_setsecattr_connected(cnode_t *cp,
8640     vsecattr_t *vsec, int flag, cred_t *cr)
8641 {
8642 	fscache_t *fscp = C_TO_FSCACHE(cp);
8643 	int error = 0;
8644 
8645 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8646 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8647 
8648 	mutex_enter(&cp->c_statelock);
8649 
8650 	if (cp->c_backvp == NULL) {
8651 		error = cachefs_getbackvp(fscp, cp);
8652 		if (error) {
8653 			cachefs_nocache(cp);
8654 			goto out;
8655 		}
8656 	}
8657 
8658 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8659 	if (error)
8660 		goto out;
8661 
8662 	/* only owner can set acl */
8663 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8664 		error = EINVAL;
8665 		goto out;
8666 	}
8667 
8668 
8669 	CFS_DPRINT_BACKFS_NFSV4(fscp,
8670 	    ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8671 	    cp, cp->c_backvp));
8672 	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8673 	if (error) {
8674 		goto out;
8675 	}
8676 
8677 	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8678 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8679 		cachefs_nocache(cp);
8680 		goto out;
8681 	}
8682 
8683 	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8684 
8685 	/* acl may have changed permissions -- handle this. */
8686 	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8687 		cachefs_acl2perm(cp, vsec);
8688 
8689 	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8690 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8691 		error = cachefs_cacheacl(cp, vsec);
8692 		if (error != 0) {
8693 #ifdef CFSDEBUG
8694 			CFS_DEBUG(CFSDEBUG_VOPS)
8695 				printf("cachefs_setacl: cacheacl: error %d\n",
8696 				    error);
8697 #endif /* CFSDEBUG */
8698 			error = 0;
8699 			cachefs_nocache(cp);
8700 		}
8701 	}
8702 
8703 out:
8704 	mutex_exit(&cp->c_statelock);
8705 
8706 	return (error);
8707 }
8708 
8709 static int
8710 cachefs_setsecattr_disconnected(cnode_t *cp,
8711     vsecattr_t *vsec, int flag, cred_t *cr)
8712 {
8713 	fscache_t *fscp = C_TO_FSCACHE(cp);
8714 	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8715 	off_t commit = 0;
8716 	int error = 0;
8717 
8718 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8719 
8720 	if (CFS_ISFS_WRITE_AROUND(fscp))
8721 		return (ETIMEDOUT);
8722 
8723 	mutex_enter(&cp->c_statelock);
8724 
8725 	/* only owner can set acl */
8726 	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8727 		error = EINVAL;
8728 		goto out;
8729 	}
8730 
8731 	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8732 		error = ETIMEDOUT;
8733 		goto out;
8734 	}
8735 
8736 	/* XXX do i need this?  is this right? */
8737 	if (cp->c_flags & CN_ALLOC_PENDING) {
8738 		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8739 			(void) filegrp_allocattr(cp->c_filegrp);
8740 		}
8741 		error = filegrp_create_metadata(cp->c_filegrp,
8742 		    &cp->c_metadata, &cp->c_id);
8743 		if (error) {
8744 			goto out;
8745 		}
8746 		cp->c_flags &= ~CN_ALLOC_PENDING;
8747 	}
8748 
8749 	/* XXX is this right? */
8750 	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8751 		error = cachefs_dlog_cidmap(fscp);
8752 		if (error) {
8753 			error = ENOSPC;
8754 			goto out;
8755 		}
8756 		cp->c_metadata.md_flags |= MD_MAPPING;
8757 		cp->c_flags |= CN_UPDATED;
8758 	}
8759 
8760 	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8761 	if (commit == 0)
8762 		goto out;
8763 
8764 	/* fix modes in metadata */
8765 	cachefs_acl2perm(cp, vsec);
8766 
8767 	if ((cp->c_flags & CN_NOCACHE) == 0) {
8768 		error = cachefs_cacheacl(cp, vsec);
8769 		if (error != 0) {
8770 			goto out;
8771 		}
8772 	}
8773 
8774 	/* XXX is this right? */
8775 	if (cachefs_modified_alloc(cp)) {
8776 		error = ENOSPC;
8777 		goto out;
8778 	}
8779 
8780 out:
8781 	if (error != 0)
8782 		cp->c_metadata.md_vattr.va_mode = failmode;
8783 
8784 	mutex_exit(&cp->c_statelock);
8785 
8786 	if (commit) {
8787 		if (cachefs_dlog_commit(fscp, commit, error)) {
8788 			/*EMPTY*/
8789 			/* XXX fix on panic? */
8790 		}
8791 	}
8792 
8793 	return (error);
8794 }
8795 
8796 /*ARGSUSED*/
8797 static int
8798 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8799     caller_context_t *ct)
8800 {
8801 	cnode_t *cp = VTOC(vp);
8802 	fscache_t *fscp = C_TO_FSCACHE(cp);
8803 	int connected = 0;
8804 	int held = 0;
8805 	int error = 0;
8806 
8807 #ifdef CFSDEBUG
8808 	CFS_DEBUG(CFSDEBUG_VOPS)
8809 		printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8810 #endif
8811 	if (getzoneid() != GLOBAL_ZONEID) {
8812 		error = EPERM;
8813 		goto out;
8814 	}
8815 
8816 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8817 		error = ENOSYS;
8818 		goto out;
8819 	}
8820 
8821 	if (! cachefs_vtype_aclok(vp)) {
8822 		error = EINVAL;
8823 		goto out;
8824 	}
8825 
8826 	/*
8827 	 * Cachefs only provides pass-through support for NFSv4,
8828 	 * and all vnode operations are passed through to the
8829 	 * back file system. For NFSv4 pass-through to work, only
8830 	 * connected operation is supported, the cnode backvp must
8831 	 * exist, and cachefs optional (eg., disconnectable) flags
8832 	 * are turned off. Assert these conditions to ensure that
8833 	 * the backfilesystem is called for the setsecattr operation.
8834 	 */
8835 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8836 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8837 
8838 	for (;;) {
8839 		/* drop hold on file system */
8840 		if (held) {
8841 			/* Won't loop with NFSv4 connected operation */
8842 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8843 			cachefs_cd_release(fscp);
8844 			held = 0;
8845 		}
8846 
8847 		/* acquire access to the file system */
8848 		error = cachefs_cd_access(fscp, connected, 1);
8849 		if (error)
8850 			break;
8851 		held = 1;
8852 
8853 		/* perform the setattr */
8854 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8855 			error = cachefs_setsecattr_connected(cp,
8856 			    vsec, flag, cr);
8857 		else
8858 			error = cachefs_setsecattr_disconnected(cp,
8859 			    vsec, flag, cr);
8860 		if (error) {
8861 			/* if connected */
8862 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8863 				if (CFS_TIMEOUT(fscp, error)) {
8864 					cachefs_cd_release(fscp);
8865 					held = 0;
8866 					cachefs_cd_timedout(fscp);
8867 					connected = 0;
8868 					continue;
8869 				}
8870 			}
8871 
8872 			/* else must be disconnected */
8873 			else {
8874 				if (CFS_TIMEOUT(fscp, error)) {
8875 					connected = 1;
8876 					continue;
8877 				}
8878 			}
8879 		}
8880 		break;
8881 	}
8882 
8883 	if (held) {
8884 		cachefs_cd_release(fscp);
8885 	}
8886 	return (error);
8887 
8888 out:
8889 #ifdef CFS_CD_DEBUG
8890 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8891 #endif
8892 
8893 #ifdef CFSDEBUG
8894 	CFS_DEBUG(CFSDEBUG_VOPS)
8895 		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8896 #endif
8897 	return (error);
8898 }
8899 
8900 /*
8901  * call this BEFORE calling cachefs_cacheacl(), as the latter will
8902  * sanitize the acl.
8903  */
8904 
8905 static void
8906 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8907 {
8908 	aclent_t *aclp;
8909 	int i;
8910 
8911 	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8912 		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8913 		switch (aclp->a_type) {
8914 		case USER_OBJ:
8915 			cp->c_metadata.md_vattr.va_mode &= (~0700);
8916 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8917 			break;
8918 
8919 		case GROUP_OBJ:
8920 			cp->c_metadata.md_vattr.va_mode &= (~070);
8921 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8922 			break;
8923 
8924 		case OTHER_OBJ:
8925 			cp->c_metadata.md_vattr.va_mode &= (~07);
8926 			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8927 			break;
8928 
8929 		case CLASS_OBJ:
8930 			cp->c_metadata.md_aclclass = aclp->a_perm;
8931 			break;
8932 		}
8933 	}
8934 
8935 	cp->c_flags |= CN_UPDATED;
8936 }
8937 
8938 static int
8939 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8940     caller_context_t *ct)
8941 {
8942 	cnode_t *cp = VTOC(vp);
8943 	fscache_t *fscp = C_TO_FSCACHE(cp);
8944 	int held = 0, connected = 0;
8945 	int error = 0;
8946 
8947 #ifdef CFSDEBUG
8948 	CFS_DEBUG(CFSDEBUG_VOPS)
8949 		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8950 #endif
8951 
8952 	if (getzoneid() != GLOBAL_ZONEID) {
8953 		error = EPERM;
8954 		goto out;
8955 	}
8956 
8957 	/*
8958 	 * Cachefs only provides pass-through support for NFSv4,
8959 	 * and all vnode operations are passed through to the
8960 	 * back file system. For NFSv4 pass-through to work, only
8961 	 * connected operation is supported, the cnode backvp must
8962 	 * exist, and cachefs optional (eg., disconnectable) flags
8963 	 * are turned off. Assert these conditions to ensure that
8964 	 * the backfilesystem is called for the getsecattr operation.
8965 	 */
8966 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8967 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8968 
8969 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8970 		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8971 		goto out;
8972 	}
8973 
8974 	for (;;) {
8975 		if (held) {
8976 			/* Won't loop with NFSv4 connected behavior */
8977 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8978 			cachefs_cd_release(fscp);
8979 			held = 0;
8980 		}
8981 		error = cachefs_cd_access(fscp, connected, 0);
8982 		if (error)
8983 			break;
8984 		held = 1;
8985 
8986 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8987 			error = cachefs_getsecattr_connected(vp, vsec, flag,
8988 			    cr);
8989 			if (CFS_TIMEOUT(fscp, error)) {
8990 				cachefs_cd_release(fscp);
8991 				held = 0;
8992 				cachefs_cd_timedout(fscp);
8993 				connected = 0;
8994 				continue;
8995 			}
8996 		} else {
8997 			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8998 			    cr);
8999 			if (CFS_TIMEOUT(fscp, error)) {
9000 				if (cachefs_cd_access_miss(fscp)) {
9001 					error = cachefs_getsecattr_connected(vp,
9002 					    vsec, flag, cr);
9003 					if (!CFS_TIMEOUT(fscp, error))
9004 						break;
9005 					delay(5*hz);
9006 					connected = 0;
9007 					continue;
9008 				}
9009 				connected = 1;
9010 				continue;
9011 			}
9012 		}
9013 		break;
9014 	}
9015 
9016 out:
9017 	if (held)
9018 		cachefs_cd_release(fscp);
9019 
9020 #ifdef CFS_CD_DEBUG
9021 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9022 #endif
9023 #ifdef CFSDEBUG
9024 	CFS_DEBUG(CFSDEBUG_VOPS)
9025 		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9026 #endif
9027 	return (error);
9028 }
9029 
9030 static int
9031 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9032     caller_context_t *ct)
9033 {
9034 	cnode_t *cp = VTOC(vp);
9035 	fscache_t *fscp = C_TO_FSCACHE(cp);
9036 	int error = 0;
9037 	vnode_t *backvp;
9038 
9039 #ifdef CFSDEBUG
9040 	CFS_DEBUG(CFSDEBUG_VOPS)
9041 		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9042 #endif
9043 
9044 	if (getzoneid() != GLOBAL_ZONEID) {
9045 		error = EPERM;
9046 		goto out;
9047 	}
9048 
9049 	/*
9050 	 * Cachefs only provides pass-through support for NFSv4,
9051 	 * and all vnode operations are passed through to the
9052 	 * back file system. For NFSv4 pass-through to work, only
9053 	 * connected operation is supported, the cnode backvp must
9054 	 * exist, and cachefs optional (eg., disconnectable) flags
9055 	 * are turned off. Assert these conditions to ensure that
9056 	 * the backfilesystem is called for the shrlock operation.
9057 	 */
9058 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9059 	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9060 
9061 	mutex_enter(&cp->c_statelock);
9062 	if (cp->c_backvp == NULL)
9063 		error = cachefs_getbackvp(fscp, cp);
9064 	backvp = cp->c_backvp;
9065 	mutex_exit(&cp->c_statelock);
9066 	ASSERT((error != 0) || (backvp != NULL));
9067 
9068 	if (error == 0) {
9069 		CFS_DPRINT_BACKFS_NFSV4(fscp,
9070 		    ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9071 		    cp, backvp));
9072 		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9073 	}
9074 
9075 out:
9076 #ifdef CFSDEBUG
9077 	CFS_DEBUG(CFSDEBUG_VOPS)
9078 		printf("cachefs_shrlock: EXIT error = %d\n", error);
9079 #endif
9080 	return (error);
9081 }
9082 
9083 static int
9084 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9085     cred_t *cr)
9086 {
9087 	cnode_t *cp = VTOC(vp);
9088 	fscache_t *fscp = C_TO_FSCACHE(cp);
9089 	int hit = 0;
9090 	int error = 0;
9091 
9092 
9093 	mutex_enter(&cp->c_statelock);
9094 	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9095 	if (error)
9096 		goto out;
9097 
9098 	/* read from the cache if we can */
9099 	if ((cp->c_metadata.md_flags & MD_ACL) &&
9100 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9101 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9102 		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9103 		error = cachefs_getaclfromcache(cp, vsec);
9104 		if (error) {
9105 			cachefs_nocache(cp);
9106 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9107 			error = 0;
9108 		} else {
9109 			hit = 1;
9110 			goto out;
9111 		}
9112 	}
9113 
9114 	ASSERT(error == 0);
9115 	if (cp->c_backvp == NULL)
9116 		error = cachefs_getbackvp(fscp, cp);
9117 	if (error)
9118 		goto out;
9119 
9120 	CFS_DPRINT_BACKFS_NFSV4(fscp,
9121 	    ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9122 	    cp, cp->c_backvp));
9123 	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9124 	if (error)
9125 		goto out;
9126 
9127 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9128 	    (cachefs_vtype_aclok(vp)) &&
9129 	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9130 	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9131 		error = cachefs_cacheacl(cp, vsec);
9132 		if (error) {
9133 			error = 0;
9134 			cachefs_nocache(cp);
9135 		}
9136 	}
9137 
9138 out:
9139 	if (error == 0) {
9140 		if (hit)
9141 			fscp->fs_stats.st_hits++;
9142 		else
9143 			fscp->fs_stats.st_misses++;
9144 	}
9145 	mutex_exit(&cp->c_statelock);
9146 
9147 	return (error);
9148 }
9149 
9150 static int
9151 /*ARGSUSED*/
9152 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9153     cred_t *cr)
9154 {
9155 	cnode_t *cp = VTOC(vp);
9156 	fscache_t *fscp = C_TO_FSCACHE(cp);
9157 	int hit = 0;
9158 	int error = 0;
9159 
9160 
9161 	mutex_enter(&cp->c_statelock);
9162 
9163 	/* read from the cache if we can */
9164 	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9165 	    (cp->c_metadata.md_flags & MD_ACL)) {
9166 		error = cachefs_getaclfromcache(cp, vsec);
9167 		if (error) {
9168 			cachefs_nocache(cp);
9169 			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9170 			error = 0;
9171 		} else {
9172 			hit = 1;
9173 			goto out;
9174 		}
9175 	}
9176 	error = ETIMEDOUT;
9177 
9178 out:
9179 	if (error == 0) {
9180 		if (hit)
9181 			fscp->fs_stats.st_hits++;
9182 		else
9183 			fscp->fs_stats.st_misses++;
9184 	}
9185 	mutex_exit(&cp->c_statelock);
9186 
9187 	return (error);
9188 }
9189 
9190 /*
9191  * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9192  * the frontfile if possible; otherwise, the adjunct directory.
9193  *
9194  * inputs:
9195  * cp - the cnode, with its statelock already held
9196  * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9197  *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9198  *
9199  * returns:
9200  * 0 - all is well
9201  * nonzero - errno
9202  */
9203 
9204 int
9205 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9206 {
9207 	fscache_t *fscp = C_TO_FSCACHE(cp);
9208 	vsecattr_t vsec;
9209 	aclent_t *aclp;
9210 	int gotvsec = 0;
9211 	int error = 0;
9212 	vnode_t *vp = NULL;
9213 	void *aclkeep = NULL;
9214 	int i;
9215 
9216 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9217 	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9218 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9219 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9220 	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9221 
9222 	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9223 		error = ENOSYS;
9224 		goto out;
9225 	}
9226 
9227 	if (vsecp == NULL) {
9228 		if (cp->c_backvp == NULL)
9229 			error = cachefs_getbackvp(fscp, cp);
9230 		if (error != 0)
9231 			goto out;
9232 		vsecp = &vsec;
9233 		bzero(&vsec, sizeof (vsec));
9234 		vsecp->vsa_mask =
9235 		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9236 		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9237 		if (error != 0) {
9238 			goto out;
9239 		}
9240 		gotvsec = 1;
9241 	} else if (vsecp->vsa_mask & VSA_ACL) {
9242 		aclkeep = vsecp->vsa_aclentp;
9243 		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9244 		    sizeof (aclent_t), KM_SLEEP);
9245 		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9246 		    sizeof (aclent_t));
9247 	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9248 		/* unless there's real data, we can cache nothing. */
9249 		return (0);
9250 	}
9251 
9252 	/*
9253 	 * prevent the ACL from chmoding our frontfile, and
9254 	 * snarf the class info
9255 	 */
9256 
9257 	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9258 	    (VSA_ACL | VSA_ACLCNT)) {
9259 		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9260 			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9261 			switch (aclp->a_type) {
9262 			case CLASS_OBJ:
9263 				cp->c_metadata.md_aclclass =
9264 				    aclp->a_perm;
9265 				/*FALLTHROUGH*/
9266 			case USER_OBJ:
9267 			case GROUP_OBJ:
9268 			case OTHER_OBJ:
9269 				aclp->a_perm = 06;
9270 			}
9271 		}
9272 	}
9273 
9274 	/*
9275 	 * if the frontfile exists, then we always do the work.  but,
9276 	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9277 	 * then we don't want to do the work.  otherwise, an `ls -l'
9278 	 * will create tons of emtpy frontfiles.
9279 	 */
9280 
9281 	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9282 	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9283 	    <= MIN_ACL_ENTRIES)) {
9284 		cp->c_metadata.md_flags |= MD_ACL;
9285 		cp->c_flags |= CN_UPDATED;
9286 		goto out;
9287 	}
9288 
9289 	/*
9290 	 * if we have a default ACL, then we need a
9291 	 * real live directory in the frontfs that we
9292 	 * can apply the ACL to.  if not, then we just
9293 	 * use the frontfile.  we get the frontfile
9294 	 * regardless -- that way, we know the
9295 	 * directory for the frontfile exists.
9296 	 */
9297 
9298 	if (vsecp->vsa_dfaclcnt > 0) {
9299 		if (cp->c_acldirvp == NULL)
9300 			error = cachefs_getacldirvp(cp);
9301 		if (error != 0)
9302 			goto out;
9303 		vp = cp->c_acldirvp;
9304 	} else {
9305 		if (cp->c_frontvp == NULL)
9306 			error = cachefs_getfrontfile(cp);
9307 		if (error != 0)
9308 			goto out;
9309 		vp = cp->c_frontvp;
9310 	}
9311 	ASSERT(vp != NULL);
9312 
9313 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9314 	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9315 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9316 	if (error != 0) {
9317 #ifdef CFSDEBUG
9318 		CFS_DEBUG(CFSDEBUG_VOPS)
9319 			printf("cachefs_cacheacl: setsecattr: error %d\n",
9320 			    error);
9321 #endif /* CFSDEBUG */
9322 		/*
9323 		 * If there was an error, we don't want to call
9324 		 * cachefs_nocache(); so, set error to 0.
9325 		 * We will call cachefs_purgeacl(), in order to
9326 		 * clean such things as adjunct ACL directories.
9327 		 */
9328 		cachefs_purgeacl(cp);
9329 		error = 0;
9330 		goto out;
9331 	}
9332 	if (vp == cp->c_frontvp)
9333 		cp->c_flags |= CN_NEED_FRONT_SYNC;
9334 
9335 	cp->c_metadata.md_flags |= MD_ACL;
9336 	cp->c_flags |= CN_UPDATED;
9337 
9338 out:
9339 	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9340 		cachefs_nocache(cp);
9341 
9342 	if (gotvsec) {
9343 		if (vsec.vsa_aclcnt)
9344 			kmem_free(vsec.vsa_aclentp,
9345 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9346 		if (vsec.vsa_dfaclcnt)
9347 			kmem_free(vsec.vsa_dfaclentp,
9348 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9349 	} else if (aclkeep != NULL) {
9350 		cachefs_kmem_free(vsecp->vsa_aclentp,
9351 		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9352 		vsecp->vsa_aclentp = aclkeep;
9353 	}
9354 
9355 	return (error);
9356 }
9357 
9358 void
9359 cachefs_purgeacl(cnode_t *cp)
9360 {
9361 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9362 
9363 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9364 
9365 	if (cp->c_acldirvp != NULL) {
9366 		VN_RELE(cp->c_acldirvp);
9367 		cp->c_acldirvp = NULL;
9368 	}
9369 
9370 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9371 		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9372 
9373 		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9374 		make_ascii_name(&cp->c_id, name);
9375 		(void) strcat(name, ".d");
9376 
9377 		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9378 		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9379 	}
9380 
9381 	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9382 	cp->c_flags |= CN_UPDATED;
9383 }
9384 
9385 static int
9386 cachefs_getacldirvp(cnode_t *cp)
9387 {
9388 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9389 	int error = 0;
9390 
9391 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9392 	ASSERT(cp->c_acldirvp == NULL);
9393 
9394 	if (cp->c_frontvp == NULL)
9395 		error = cachefs_getfrontfile(cp);
9396 	if (error != 0)
9397 		goto out;
9398 
9399 	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9400 	make_ascii_name(&cp->c_id, name);
9401 	(void) strcat(name, ".d");
9402 	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9403 	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9404 	if ((error != 0) && (error != ENOENT))
9405 		goto out;
9406 
9407 	if (error != 0) {
9408 		vattr_t va;
9409 
9410 		va.va_mode = S_IFDIR | 0777;
9411 		va.va_uid = 0;
9412 		va.va_gid = 0;
9413 		va.va_type = VDIR;
9414 		va.va_mask = AT_TYPE | AT_MODE |
9415 		    AT_UID | AT_GID;
9416 		error =
9417 		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9418 		    name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9419 		if (error != 0)
9420 			goto out;
9421 	}
9422 
9423 	ASSERT(cp->c_acldirvp != NULL);
9424 	cp->c_metadata.md_flags |= MD_ACLDIR;
9425 	cp->c_flags |= CN_UPDATED;
9426 
9427 out:
9428 	if (error != 0)
9429 		cp->c_acldirvp = NULL;
9430 	return (error);
9431 }
9432 
9433 static int
9434 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9435 {
9436 	aclent_t *aclp;
9437 	int error = 0;
9438 	vnode_t *vp = NULL;
9439 	int i;
9440 
9441 	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9442 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9443 	ASSERT(vsec->vsa_aclentp == NULL);
9444 
9445 	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9446 		if (cp->c_acldirvp == NULL)
9447 			error = cachefs_getacldirvp(cp);
9448 		if (error != 0)
9449 			goto out;
9450 		vp = cp->c_acldirvp;
9451 	} else if (cp->c_metadata.md_flags & MD_FILE) {
9452 		if (cp->c_frontvp == NULL)
9453 			error = cachefs_getfrontfile(cp);
9454 		if (error != 0)
9455 			goto out;
9456 		vp = cp->c_frontvp;
9457 	} else {
9458 
9459 		/*
9460 		 * if we get here, then we know that MD_ACL is on,
9461 		 * meaning an ACL was successfully cached.  we also
9462 		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9463 		 * this has to be an entry without a `real' ACL.
9464 		 * thus, we forge whatever is necessary.
9465 		 */
9466 
9467 		if (vsec->vsa_mask & VSA_ACLCNT)
9468 			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9469 
9470 		if (vsec->vsa_mask & VSA_ACL) {
9471 			vsec->vsa_aclentp =
9472 			    kmem_zalloc(MIN_ACL_ENTRIES *
9473 			    sizeof (aclent_t), KM_SLEEP);
9474 			aclp = (aclent_t *)vsec->vsa_aclentp;
9475 			aclp->a_type = USER_OBJ;
9476 			++aclp;
9477 			aclp->a_type = GROUP_OBJ;
9478 			++aclp;
9479 			aclp->a_type = OTHER_OBJ;
9480 			++aclp;
9481 			aclp->a_type = CLASS_OBJ;
9482 			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9483 			    sizeof (aclent_t), cmp2acls);
9484 		}
9485 
9486 		ASSERT(vp == NULL);
9487 	}
9488 
9489 	if (vp != NULL) {
9490 		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9491 #ifdef CFSDEBUG
9492 			CFS_DEBUG(CFSDEBUG_VOPS)
9493 				printf("cachefs_getaclfromcache: error %d\n",
9494 				    error);
9495 #endif /* CFSDEBUG */
9496 			goto out;
9497 		}
9498 	}
9499 
9500 	if (vsec->vsa_aclentp != NULL) {
9501 		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9502 			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9503 			switch (aclp->a_type) {
9504 			case USER_OBJ:
9505 				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9506 				aclp->a_perm =
9507 				    cp->c_metadata.md_vattr.va_mode & 0700;
9508 				aclp->a_perm >>= 6;
9509 				break;
9510 
9511 			case GROUP_OBJ:
9512 				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9513 				aclp->a_perm =
9514 				    cp->c_metadata.md_vattr.va_mode & 070;
9515 				aclp->a_perm >>= 3;
9516 				break;
9517 
9518 			case OTHER_OBJ:
9519 				aclp->a_perm =
9520 				    cp->c_metadata.md_vattr.va_mode & 07;
9521 				break;
9522 
9523 			case CLASS_OBJ:
9524 				aclp->a_perm =
9525 				    cp->c_metadata.md_aclclass;
9526 				break;
9527 			}
9528 		}
9529 	}
9530 
9531 out:
9532 
9533 	if (error != 0)
9534 		cachefs_nocache(cp);
9535 
9536 	return (error);
9537 }
9538 
9539 /*
9540  * Fills in targp with attribute information from srcp, cp
9541  * and if necessary the system.
9542  */
9543 static void
9544 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9545 {
9546 	time_t	now;
9547 
9548 	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9549 
9550 	/*
9551 	 * Add code to fill in the va struct.  We use the fields from
9552 	 * the srcp struct if they are populated, otherwise we guess
9553 	 */
9554 
9555 	targp->va_mask = 0;	/* initialize all fields */
9556 	targp->va_mode = srcp->va_mode;
9557 	targp->va_type = srcp->va_type;
9558 	targp->va_nlink = 1;
9559 	targp->va_nodeid = 0;
9560 
9561 	if (srcp->va_mask & AT_UID)
9562 		targp->va_uid = srcp->va_uid;
9563 	else
9564 		targp->va_uid = crgetuid(cr);
9565 
9566 	if (srcp->va_mask & AT_GID)
9567 		targp->va_gid = srcp->va_gid;
9568 	else
9569 		targp->va_gid = crgetgid(cr);
9570 
9571 	if (srcp->va_mask & AT_FSID)
9572 		targp->va_fsid = srcp->va_fsid;
9573 	else
9574 		targp->va_fsid = 0;	/* initialize all fields */
9575 
9576 	now = gethrestime_sec();
9577 	if (srcp->va_mask & AT_ATIME)
9578 		targp->va_atime = srcp->va_atime;
9579 	else
9580 		targp->va_atime.tv_sec = now;
9581 
9582 	if (srcp->va_mask & AT_MTIME)
9583 		targp->va_mtime = srcp->va_mtime;
9584 	else
9585 		targp->va_mtime.tv_sec = now;
9586 
9587 	if (srcp->va_mask & AT_CTIME)
9588 		targp->va_ctime = srcp->va_ctime;
9589 	else
9590 		targp->va_ctime.tv_sec = now;
9591 
9592 
9593 	if (srcp->va_mask & AT_SIZE)
9594 		targp->va_size = srcp->va_size;
9595 	else
9596 		targp->va_size = 0;
9597 
9598 	/*
9599 	 * the remaing fields are set by the fs and not changable.
9600 	 * we populate these entries useing the parent directory
9601 	 * values.  It's a small hack, but should work.
9602 	 */
9603 	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9604 	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9605 	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9606 	targp->va_seq = 0; /* Never keep the sequence number */
9607 }
9608 
9609 /*
9610  * set the gid for a newly created file.  The algorithm is as follows:
9611  *
9612  *	1) If the gid is set in the attribute list, then use it if
9613  *	   the caller is privileged, belongs to the target group, or
9614  *	   the group is the same as the parent directory.
9615  *
9616  *	2) If the parent directory's set-gid bit is clear, then use
9617  *	   the process gid
9618  *
9619  *	3) Otherwise, use the gid of the parent directory.
9620  *
9621  * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9622  * this routine.
9623  */
9624 static void
9625 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9626 {
9627 	if ((vap->va_mask & AT_GID) &&
9628 	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9629 	    groupmember(vap->va_gid, cr) ||
9630 	    secpolicy_vnode_create_gid(cr) != 0)) {
9631 		newcp->c_attr.va_gid = vap->va_gid;
9632 	} else {
9633 		if (dcp->c_attr.va_mode & S_ISGID)
9634 			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9635 		else
9636 			newcp->c_attr.va_gid = crgetgid(cr);
9637 	}
9638 
9639 	/*
9640 	 * if we're creating a directory, and the parent directory has the
9641 	 * set-GID bit set, set it on the new directory.
9642 	 * Otherwise, if the user is neither privileged nor a member of the
9643 	 * file's new group, clear the file's set-GID bit.
9644 	 */
9645 	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9646 		newcp->c_attr.va_mode |= S_ISGID;
9647 	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9648 	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9649 		newcp->c_attr.va_mode &= ~S_ISGID;
9650 }
9651 
9652 /*
9653  * create an acl for the newly created file.  should be called right
9654  * after cachefs_creategid.
9655  */
9656 
9657 static void
9658 cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9659 {
9660 	fscache_t *fscp = C_TO_FSCACHE(dcp);
9661 	vsecattr_t vsec;
9662 	int gotvsec = 0;
9663 	int error = 0; /* placeholder */
9664 	aclent_t *aclp;
9665 	o_mode_t *classp = NULL;
9666 	o_mode_t gunion = 0;
9667 	int i;
9668 
9669 	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9670 	    (! cachefs_vtype_aclok(CTOV(newcp))))
9671 		return;
9672 
9673 	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9674 	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9675 	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9676 
9677 	/*
9678 	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9679 	 * would hit code paths that isn't hit anywhere else.
9680 	 */
9681 
9682 	bzero(&vsec, sizeof (vsec));
9683 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9684 	error = cachefs_getaclfromcache(dcp, &vsec);
9685 	if (error != 0)
9686 		goto out;
9687 	gotvsec = 1;
9688 
9689 	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9690 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9691 			kmem_free(vsec.vsa_aclentp,
9692 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9693 
9694 		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9695 		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9696 		vsec.vsa_dfaclcnt = 0;
9697 		vsec.vsa_dfaclentp = NULL;
9698 
9699 		if (newcp->c_attr.va_type == VDIR) {
9700 			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9701 			    sizeof (aclent_t), KM_SLEEP);
9702 			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9703 			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9704 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9705 		}
9706 
9707 		/*
9708 		 * this function should be called pretty much after
9709 		 * the rest of the file creation stuff is done.  so,
9710 		 * uid, gid, etc. should be `right'.  we'll go with
9711 		 * that, rather than trying to determine whether to
9712 		 * get stuff from cr or va.
9713 		 */
9714 
9715 		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9716 			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9717 			switch (aclp->a_type) {
9718 			case DEF_USER_OBJ:
9719 				aclp->a_type = USER_OBJ;
9720 				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9721 				aclp->a_perm =
9722 				    newcp->c_metadata.md_vattr.va_mode;
9723 				aclp->a_perm &= 0700;
9724 				aclp->a_perm >>= 6;
9725 				break;
9726 
9727 			case DEF_GROUP_OBJ:
9728 				aclp->a_type = GROUP_OBJ;
9729 				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9730 				aclp->a_perm =
9731 				    newcp->c_metadata.md_vattr.va_mode;
9732 				aclp->a_perm &= 070;
9733 				aclp->a_perm >>= 3;
9734 				gunion |= aclp->a_perm;
9735 				break;
9736 
9737 			case DEF_OTHER_OBJ:
9738 				aclp->a_type = OTHER_OBJ;
9739 				aclp->a_perm =
9740 				    newcp->c_metadata.md_vattr.va_mode & 07;
9741 				break;
9742 
9743 			case DEF_CLASS_OBJ:
9744 				aclp->a_type = CLASS_OBJ;
9745 				classp = &(aclp->a_perm);
9746 				break;
9747 
9748 			case DEF_USER:
9749 				aclp->a_type = USER;
9750 				gunion |= aclp->a_perm;
9751 				break;
9752 
9753 			case DEF_GROUP:
9754 				aclp->a_type = GROUP;
9755 				gunion |= aclp->a_perm;
9756 				break;
9757 			}
9758 		}
9759 
9760 		/* XXX is this the POSIX thing to do? */
9761 		if (classp != NULL)
9762 			*classp &= gunion;
9763 
9764 		/*
9765 		 * we don't need to log this; rather, we clear the
9766 		 * MD_ACL bit when we reconnect.
9767 		 */
9768 
9769 		error = cachefs_cacheacl(newcp, &vsec);
9770 		if (error != 0)
9771 			goto out;
9772 	}
9773 
9774 	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9775 	newcp->c_metadata.md_flags |= MD_ACL;
9776 	newcp->c_flags |= CN_UPDATED;
9777 
9778 out:
9779 
9780 	if (gotvsec) {
9781 		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9782 			kmem_free(vsec.vsa_aclentp,
9783 			    vsec.vsa_aclcnt * sizeof (aclent_t));
9784 		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9785 			kmem_free(vsec.vsa_dfaclentp,
9786 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9787 	}
9788 }
9789 
9790 /*
9791  * this is translated from the UFS code for access checking.
9792  */
9793 
9794 static int
9795 cachefs_access_local(void *vcp, int mode, cred_t *cr)
9796 {
9797 	cnode_t *cp = vcp;
9798 	fscache_t *fscp = C_TO_FSCACHE(cp);
9799 	int shift = 0;
9800 
9801 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9802 
9803 	if (mode & VWRITE) {
9804 		/*
9805 		 * Disallow write attempts on read-only
9806 		 * file systems, unless the file is special.
9807 		 */
9808 		struct vnode *vp = CTOV(cp);
9809 		if (vn_is_readonly(vp)) {
9810 			if (!IS_DEVVP(vp)) {
9811 				return (EROFS);
9812 			}
9813 		}
9814 	}
9815 
9816 	/*
9817 	 * if we need to do ACLs, do it.  this works whether anyone
9818 	 * has explicitly made an ACL or not.
9819 	 */
9820 
9821 	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9822 	    (cachefs_vtype_aclok(CTOV(cp))))
9823 		return (cachefs_acl_access(cp, mode, cr));
9824 
9825 	if (crgetuid(cr) != cp->c_attr.va_uid) {
9826 		shift += 3;
9827 		if (!groupmember(cp->c_attr.va_gid, cr))
9828 			shift += 3;
9829 	}
9830 
9831 	return (secpolicy_vnode_access2(cr, CTOV(cp), cp->c_attr.va_uid,
9832 	    cp->c_attr.va_mode << shift, mode));
9833 }
9834 
9835 /*
9836  * This is transcribed from ufs_acl_access().  If that changes, then
9837  * this should, too.
9838  *
9839  * Check the cnode's ACL's to see if this mode of access is
9840  * allowed; return 0 if allowed, EACCES if not.
9841  *
9842  * We follow the procedure defined in Sec. 3.3.5, ACL Access
9843  * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9844  */
9845 
9846 #define	ACL_MODE_CHECK(M, PERM, C, I) \
9847     secpolicy_vnode_access2(C, CTOV(I), owner, (PERM), (M))
9848 
9849 static int
9850 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9851 {
9852 	int error = 0;
9853 
9854 	fscache_t *fscp = C_TO_FSCACHE(cp);
9855 
9856 	int mask = ~0;
9857 	int ismask = 0;
9858 
9859 	int gperm = 0;
9860 	int ngroup = 0;
9861 
9862 	vsecattr_t vsec;
9863 	int gotvsec = 0;
9864 	aclent_t *aclp;
9865 
9866 	uid_t owner = cp->c_attr.va_uid;
9867 
9868 	int i;
9869 
9870 	ASSERT(MUTEX_HELD(&cp->c_statelock));
9871 	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9872 
9873 	/*
9874 	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9875 	 * but then i believe we'd be the only thing exercising those
9876 	 * code paths -- probably a bad thing.
9877 	 */
9878 
9879 	bzero(&vsec, sizeof (vsec));
9880 	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9881 
9882 	/* XXX KLUDGE! correct insidious 0-class problem */
9883 	if (cp->c_metadata.md_aclclass == 0 &&
9884 	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9885 		cachefs_purgeacl(cp);
9886 again:
9887 	if (cp->c_metadata.md_flags & MD_ACL) {
9888 		error = cachefs_getaclfromcache(cp, &vsec);
9889 		if (error != 0) {
9890 #ifdef CFSDEBUG
9891 			if (error != ETIMEDOUT)
9892 				CFS_DEBUG(CFSDEBUG_VOPS)
9893 					printf("cachefs_acl_access():"
9894 					    "error %d from getaclfromcache()\n",
9895 					    error);
9896 #endif /* CFSDEBUG */
9897 			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9898 				goto again;
9899 			} else {
9900 				goto out;
9901 			}
9902 		}
9903 	} else {
9904 		if (cp->c_backvp == NULL) {
9905 			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9906 				error = cachefs_getbackvp(fscp, cp);
9907 			else
9908 				error = ETIMEDOUT;
9909 		}
9910 		if (error == 0)
9911 			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9912 			    NULL);
9913 		if (error != 0) {
9914 #ifdef CFSDEBUG
9915 			CFS_DEBUG(CFSDEBUG_VOPS)
9916 				printf("cachefs_acl_access():"
9917 				    "error %d from getsecattr(backvp)\n",
9918 				    error);
9919 #endif /* CFSDEBUG */
9920 			goto out;
9921 		}
9922 		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9923 		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9924 			(void) cachefs_cacheacl(cp, &vsec);
9925 	}
9926 	gotvsec = 1;
9927 
9928 	ASSERT(error == 0);
9929 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9930 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9931 		switch (aclp->a_type) {
9932 		case USER_OBJ:
9933 			/*
9934 			 * this might look cleaner in the 2nd loop
9935 			 * below, but we do it here as an
9936 			 * optimization.
9937 			 */
9938 
9939 			owner = aclp->a_id;
9940 			if (crgetuid(cr) == owner) {
9941 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9942 				    cr, cp);
9943 				goto out;
9944 			}
9945 			break;
9946 
9947 		case CLASS_OBJ:
9948 			mask = aclp->a_perm;
9949 			ismask = 1;
9950 			break;
9951 		}
9952 	}
9953 
9954 	ASSERT(error == 0);
9955 	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9956 		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9957 		switch (aclp->a_type) {
9958 		case USER:
9959 			if (crgetuid(cr) == aclp->a_id) {
9960 				error = ACL_MODE_CHECK(mode,
9961 				    (aclp->a_perm & mask) << 6, cr, cp);
9962 				goto out;
9963 			}
9964 			break;
9965 
9966 		case GROUP_OBJ:
9967 			if (groupmember(aclp->a_id, cr)) {
9968 				++ngroup;
9969 				gperm |= aclp->a_perm;
9970 				if (! ismask) {
9971 					error = ACL_MODE_CHECK(mode,
9972 					    aclp->a_perm << 6,
9973 					    cr, cp);
9974 					goto out;
9975 				}
9976 			}
9977 			break;
9978 
9979 		case GROUP:
9980 			if (groupmember(aclp->a_id, cr)) {
9981 				++ngroup;
9982 				gperm |= aclp->a_perm;
9983 			}
9984 			break;
9985 
9986 		case OTHER_OBJ:
9987 			if (ngroup == 0) {
9988 				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9989 				    cr, cp);
9990 				goto out;
9991 			}
9992 			break;
9993 
9994 		default:
9995 			break;
9996 		}
9997 	}
9998 
9999 	ASSERT(ngroup > 0);
10000 	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
10001 
10002 out:
10003 	if (gotvsec) {
10004 		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10005 			kmem_free(vsec.vsa_aclentp,
10006 			    vsec.vsa_aclcnt * sizeof (aclent_t));
10007 		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10008 			kmem_free(vsec.vsa_dfaclentp,
10009 			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
10010 	}
10011 
10012 	return (error);
10013 }
10014 
10015 /*
10016  * see if permissions allow for removal of the given file from
10017  * the given directory.
10018  */
10019 static int
10020 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10021 {
10022 	uid_t uid;
10023 	/*
10024 	 * If the containing directory is sticky, the user must:
10025 	 *  - own the directory, or
10026 	 *  - own the file, or
10027 	 *  - be able to write the file (if it's a plain file), or
10028 	 *  - be sufficiently privileged.
10029 	 */
10030 	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10031 	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10032 	    (uid != cp->c_attr.va_uid) &&
10033 	    (cp->c_attr.va_type != VREG ||
10034 	    cachefs_access_local(cp, VWRITE, cr) != 0))
10035 		return (secpolicy_vnode_remove(cr));
10036 
10037 	return (0);
10038 }
10039 
10040 /*
10041  * Returns a new name, may even be unique.
10042  * Stolen from nfs code.
10043  * Since now we will use renaming to .cfs* in place of .nfs*
10044  * for CacheFS. Both NFS and CacheFS will rename opened files.
10045  */
10046 static char cachefs_prefix[] = ".cfs";
10047 kmutex_t cachefs_newnum_lock;
10048 
10049 static char *
10050 cachefs_newname(void)
10051 {
10052 	static uint_t newnum = 0;
10053 	char *news;
10054 	char *s, *p;
10055 	uint_t id;
10056 
10057 	mutex_enter(&cachefs_newnum_lock);
10058 	if (newnum == 0) {
10059 		newnum = gethrestime_sec() & 0xfffff;
10060 		newnum |= 0x10000;
10061 	}
10062 	id = newnum++;
10063 	mutex_exit(&cachefs_newnum_lock);
10064 
10065 	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10066 	s = news;
10067 	p = cachefs_prefix;
10068 	while (*p != '\0')
10069 		*s++ = *p++;
10070 	while (id != 0) {
10071 		*s++ = "0123456789ABCDEF"[id & 0x0f];
10072 		id >>= 4;
10073 	}
10074 	*s = '\0';
10075 	return (news);
10076 }
10077 
10078 /*
10079  * Called to rename the specified file to a temporary file so
10080  * operations to the file after remove work.
10081  * Must call this routine with the dir c_rwlock held as a writer.
10082  */
10083 static int
10084 /*ARGSUSED*/
10085 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10086 {
10087 	cnode_t *cp = VTOC(vp);
10088 	char *tmpname;
10089 	fscache_t *fscp = C_TO_FSCACHE(cp);
10090 	int error;
10091 
10092 	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10093 
10094 	/* get the new name for the file */
10095 	tmpname = cachefs_newname();
10096 
10097 	/* do the link */
10098 	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10099 		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10100 	else
10101 		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10102 	if (error) {
10103 		cachefs_kmem_free(tmpname, MAXNAMELEN);
10104 		return (error);
10105 	}
10106 
10107 	mutex_enter(&cp->c_statelock);
10108 	if (cp->c_unldvp) {
10109 		VN_RELE(cp->c_unldvp);
10110 		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10111 		crfree(cp->c_unlcred);
10112 	}
10113 
10114 	VN_HOLD(dvp);
10115 	cp->c_unldvp = dvp;
10116 	crhold(cr);
10117 	cp->c_unlcred = cr;
10118 	cp->c_unlname = tmpname;
10119 
10120 	/* drop the backvp so NFS does not also do a rename */
10121 	mutex_exit(&cp->c_statelock);
10122 
10123 	return (0);
10124 }
10125 
10126 /*
10127  * Marks the cnode as modified.
10128  */
10129 static void
10130 cachefs_modified(cnode_t *cp)
10131 {
10132 	fscache_t *fscp = C_TO_FSCACHE(cp);
10133 	struct vattr va;
10134 	int error;
10135 
10136 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10137 	ASSERT(cp->c_metadata.md_rlno);
10138 
10139 	/* if not on the modify list */
10140 	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10141 		/* put on modified list, also marks the file as modified */
10142 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10143 		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10144 		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10145 		cp->c_flags |= CN_UPDATED;
10146 
10147 		/* if a modified regular file that is not local */
10148 		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10149 		    (cp->c_metadata.md_flags & MD_FILE) &&
10150 		    (cp->c_attr.va_type == VREG)) {
10151 
10152 			if (cp->c_frontvp == NULL)
10153 				(void) cachefs_getfrontfile(cp);
10154 			if (cp->c_frontvp) {
10155 				/* identify file so fsck knows it is modified */
10156 				va.va_mode = 0766;
10157 				va.va_mask = AT_MODE;
10158 				error = VOP_SETATTR(cp->c_frontvp,
10159 				    &va, 0, kcred, NULL);
10160 				if (error) {
10161 					cmn_err(CE_WARN,
10162 					    "Cannot change ff mode.\n");
10163 				}
10164 			}
10165 		}
10166 	}
10167 }
10168 
10169 /*
10170  * Marks the cnode as modified.
10171  * Allocates a rl slot for the cnode if necessary.
10172  * Returns 0 for success, !0 if cannot get an rl slot.
10173  */
10174 static int
10175 cachefs_modified_alloc(cnode_t *cp)
10176 {
10177 	fscache_t *fscp = C_TO_FSCACHE(cp);
10178 	filegrp_t *fgp = cp->c_filegrp;
10179 	int error;
10180 	rl_entry_t rl_ent;
10181 
10182 	ASSERT(MUTEX_HELD(&cp->c_statelock));
10183 
10184 	/* get the rl slot if needed */
10185 	if (cp->c_metadata.md_rlno == 0) {
10186 		/* get a metadata slot if we do not have one yet */
10187 		if (cp->c_flags & CN_ALLOC_PENDING) {
10188 			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10189 				(void) filegrp_allocattr(cp->c_filegrp);
10190 			}
10191 			error = filegrp_create_metadata(cp->c_filegrp,
10192 			    &cp->c_metadata, &cp->c_id);
10193 			if (error)
10194 				return (error);
10195 			cp->c_flags &= ~CN_ALLOC_PENDING;
10196 		}
10197 
10198 		/* get a free rl entry */
10199 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10200 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10201 		rl_ent.rl_fsid = fscp->fs_cfsid;
10202 		rl_ent.rl_attrc = 0;
10203 		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10204 		    &cp->c_metadata.md_rlno);
10205 		if (error)
10206 			return (error);
10207 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10208 
10209 		/* hold the filegrp so the attrcache file is not gc */
10210 		error = filegrp_ffhold(fgp);
10211 		if (error) {
10212 			cachefs_rlent_moveto(fscp->fs_cache,
10213 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10214 			cp->c_metadata.md_rlno = 0;
10215 			return (error);
10216 		}
10217 	}
10218 	cachefs_modified(cp);
10219 	return (0);
10220 }
10221 
10222 int
10223 cachefs_vtype_aclok(vnode_t *vp)
10224 {
10225 	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10226 
10227 	if (vp->v_type == VNON)
10228 		return (0);
10229 
10230 	for (vtp = oktypes; *vtp != VNON; vtp++)
10231 		if (vp->v_type == *vtp)
10232 			break;
10233 
10234 	return (*vtp != VNON);
10235 }
10236 
10237 static int
10238 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10239     caller_context_t *ct)
10240 {
10241 	int error = 0;
10242 	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10243 
10244 	/* Assert cachefs compatibility if NFSv4 is in use */
10245 	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10246 	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10247 
10248 	if (cmd == _PC_FILESIZEBITS) {
10249 		u_offset_t maxsize = fscp->fs_offmax;
10250 		(*valp) = 0;
10251 		while (maxsize != 0) {
10252 			maxsize >>= 1;
10253 			(*valp)++;
10254 		}
10255 		(*valp)++;
10256 	} else
10257 		error = fs_pathconf(vp, cmd, valp, cr, ct);
10258 
10259 	return (error);
10260 }
10261