1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/cred.h> 32 #include <sys/proc.h> 33 #include <sys/user.h> 34 #include <sys/time.h> 35 #include <sys/vnode.h> 36 #include <sys/vfs.h> 37 #include <sys/vfs_opreg.h> 38 #include <sys/file.h> 39 #include <sys/filio.h> 40 #include <sys/uio.h> 41 #include <sys/buf.h> 42 #include <sys/mman.h> 43 #include <sys/tiuser.h> 44 #include <sys/pathname.h> 45 #include <sys/dirent.h> 46 #include <sys/conf.h> 47 #include <sys/debug.h> 48 #include <sys/vmsystm.h> 49 #include <sys/fcntl.h> 50 #include <sys/flock.h> 51 #include <sys/swap.h> 52 #include <sys/errno.h> 53 #include <sys/sysmacros.h> 54 #include <sys/disp.h> 55 #include <sys/kmem.h> 56 #include <sys/cmn_err.h> 57 #include <sys/vtrace.h> 58 #include <sys/mount.h> 59 #include <sys/bootconf.h> 60 #include <sys/dnlc.h> 61 #include <sys/stat.h> 62 #include <sys/acl.h> 63 #include <sys/policy.h> 64 #include <rpc/types.h> 65 66 #include <vm/hat.h> 67 #include <vm/as.h> 68 #include <vm/page.h> 69 #include <vm/pvn.h> 70 #include <vm/seg.h> 71 #include <vm/seg_map.h> 72 #include <vm/seg_vn.h> 73 #include <vm/rm.h> 74 #include <sys/fs/cachefs_fs.h> 75 #include <sys/fs/cachefs_dir.h> 76 #include <sys/fs/cachefs_dlog.h> 77 #include <sys/fs/cachefs_ioctl.h> 78 #include <sys/fs/cachefs_log.h> 79 #include <fs/fs_subr.h> 80 81 int cachefs_dnlc; /* use dnlc, debugging */ 82 83 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, 84 cred_t *cr); 85 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, 86 cred_t *cr); 87 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp); 88 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec); 89 static int cachefs_getacldirvp(cnode_t *cp); 90 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec); 91 static int cachefs_access_local(void *cp, int mode, cred_t *cr); 92 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr); 93 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen, 94 u_offset_t iooff, cred_t *cr); 95 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen, 96 u_offset_t iooff, cred_t *cr); 97 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags, 98 cred_t *cr, caller_context_t *ct); 99 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap, 100 int flags, cred_t *cr, caller_context_t *ct); 101 static int cachefs_access_connected(struct vnode *vp, int mode, 102 int flags, cred_t *cr); 103 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp, 104 cred_t *cr); 105 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva, 106 char *tnm, cred_t *cr); 107 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, 108 vattr_t *tva, char *tnm, cred_t *cr); 109 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, 110 cred_t *cr); 111 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, 112 char *tnm, cred_t *cr); 113 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap, 114 vnode_t **vpp, cred_t *cr); 115 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 116 vnode_t **vpp, cred_t *cr); 117 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr); 118 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm, 119 vnode_t *cdir, cred_t *cr, vnode_t *vp); 120 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, 121 vnode_t *cdir, cred_t *cr, vnode_t *vp); 122 static char *cachefs_newname(void); 123 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, 124 cred_t *cr); 125 static int cachefs_rename_connected(vnode_t *odvp, char *onm, 126 vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp); 127 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm, 128 vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp); 129 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, 130 int *eofp); 131 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, 132 cred_t *cr, int *eofp); 133 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop, 134 cred_t *cr, int *eofp); 135 136 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags, 137 cred_t *cr, caller_context_t *ct); 138 139 static int cachefs_open(struct vnode **, int, cred_t *, 140 caller_context_t *); 141 static int cachefs_close(struct vnode *, int, int, offset_t, 142 cred_t *, caller_context_t *); 143 static int cachefs_read(struct vnode *, struct uio *, int, cred_t *, 144 caller_context_t *); 145 static int cachefs_write(struct vnode *, struct uio *, int, cred_t *, 146 caller_context_t *); 147 static int cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *, 148 int *, caller_context_t *); 149 static int cachefs_getattr(struct vnode *, struct vattr *, int, 150 cred_t *, caller_context_t *); 151 static int cachefs_setattr(struct vnode *, struct vattr *, 152 int, cred_t *, caller_context_t *); 153 static int cachefs_access(struct vnode *, int, int, cred_t *, 154 caller_context_t *); 155 static int cachefs_lookup(struct vnode *, char *, struct vnode **, 156 struct pathname *, int, struct vnode *, cred_t *, 157 caller_context_t *, int *, pathname_t *); 158 static int cachefs_create(struct vnode *, char *, struct vattr *, 159 enum vcexcl, int, struct vnode **, cred_t *, int, 160 caller_context_t *, vsecattr_t *); 161 static int cachefs_create_connected(vnode_t *dvp, char *nm, 162 vattr_t *vap, enum vcexcl exclusive, int mode, 163 vnode_t **vpp, cred_t *cr); 164 static int cachefs_create_disconnected(vnode_t *dvp, char *nm, 165 vattr_t *vap, enum vcexcl exclusive, int mode, 166 vnode_t **vpp, cred_t *cr); 167 static int cachefs_remove(struct vnode *, char *, cred_t *, 168 caller_context_t *, int); 169 static int cachefs_link(struct vnode *, struct vnode *, char *, 170 cred_t *, caller_context_t *, int); 171 static int cachefs_rename(struct vnode *, char *, struct vnode *, 172 char *, cred_t *, caller_context_t *, int); 173 static int cachefs_mkdir(struct vnode *, char *, struct 174 vattr *, struct vnode **, cred_t *, caller_context_t *, 175 int, vsecattr_t *); 176 static int cachefs_rmdir(struct vnode *, char *, struct vnode *, 177 cred_t *, caller_context_t *, int); 178 static int cachefs_readdir(struct vnode *, struct uio *, 179 cred_t *, int *, caller_context_t *, int); 180 static int cachefs_symlink(struct vnode *, char *, struct vattr *, 181 char *, cred_t *, caller_context_t *, int); 182 static int cachefs_readlink(struct vnode *, struct uio *, cred_t *, 183 caller_context_t *); 184 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr); 185 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop); 186 static int cachefs_fsync(struct vnode *, int, cred_t *, 187 caller_context_t *); 188 static void cachefs_inactive(struct vnode *, cred_t *, caller_context_t *); 189 static int cachefs_fid(struct vnode *, struct fid *, caller_context_t *); 190 static int cachefs_rwlock(struct vnode *, int, caller_context_t *); 191 static void cachefs_rwunlock(struct vnode *, int, caller_context_t *); 192 static int cachefs_seek(struct vnode *, offset_t, offset_t *, 193 caller_context_t *); 194 static int cachefs_frlock(struct vnode *, int, struct flock64 *, 195 int, offset_t, struct flk_callback *, cred_t *, 196 caller_context_t *); 197 static int cachefs_space(struct vnode *, int, struct flock64 *, int, 198 offset_t, cred_t *, caller_context_t *); 199 static int cachefs_realvp(struct vnode *, struct vnode **, 200 caller_context_t *); 201 static int cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *, 202 struct page *[], size_t, struct seg *, caddr_t, 203 enum seg_rw, cred_t *, caller_context_t *); 204 static int cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 205 struct page *[], size_t, struct seg *, caddr_t, 206 enum seg_rw, cred_t *); 207 static int cachefs_getapage_back(struct vnode *, u_offset_t, size_t, 208 uint_t *, struct page *[], size_t, struct seg *, caddr_t, 209 enum seg_rw, cred_t *); 210 static int cachefs_putpage(struct vnode *, offset_t, size_t, int, 211 cred_t *, caller_context_t *); 212 static int cachefs_map(struct vnode *, offset_t, struct as *, 213 caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *, 214 caller_context_t *); 215 static int cachefs_addmap(struct vnode *, offset_t, struct as *, 216 caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *, 217 caller_context_t *); 218 static int cachefs_delmap(struct vnode *, offset_t, struct as *, 219 caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *, 220 caller_context_t *); 221 static int cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, 222 int flag, cred_t *cr, caller_context_t *); 223 static int cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, 224 int flag, cred_t *cr, caller_context_t *); 225 static int cachefs_shrlock(vnode_t *, int, struct shrlock *, int, 226 cred_t *, caller_context_t *); 227 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag, 228 cred_t *cr); 229 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, 230 int flag, cred_t *cr); 231 232 static int cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t, 233 caller_context_t *); 234 static int cachefs_pageio(struct vnode *, page_t *, 235 u_offset_t, size_t, int, cred_t *, caller_context_t *); 236 static int cachefs_writepage(struct vnode *vp, caddr_t base, 237 int tcount, struct uio *uiop); 238 static int cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *, 239 caller_context_t *); 240 241 static int cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, 242 cred_t *cr, caller_context_t *ct); 243 static int cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, 244 cred_t *cr, caller_context_t *ct); 245 static int cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap, 246 int flags, cred_t *cr, caller_context_t *ct); 247 static int cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, 248 vnode_t *vp); 249 static int cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, 250 size_t len, uint_t *protp, struct page *pl[], 251 size_t plsz, struct seg *seg, caddr_t addr, 252 enum seg_rw rw, cred_t *cr); 253 static int cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, 254 size_t len, int flags, cred_t *cr); 255 static int cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, 256 struct as *as, caddr_t *addrp, size_t len, uchar_t prot, 257 uchar_t maxprot, uint_t flags, cred_t *cr); 258 static int cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, 259 struct flock64 *bfp, int flag, offset_t offset, 260 cred_t *cr, caller_context_t *ct); 261 262 struct vnodeops *cachefs_vnodeops; 263 264 static const fs_operation_def_t cachefs_vnodeops_template[] = { 265 VOPNAME_OPEN, { .vop_open = cachefs_open }, 266 VOPNAME_CLOSE, { .vop_close = cachefs_close }, 267 VOPNAME_READ, { .vop_read = cachefs_read }, 268 VOPNAME_WRITE, { .vop_write = cachefs_write }, 269 VOPNAME_IOCTL, { .vop_ioctl = cachefs_ioctl }, 270 VOPNAME_GETATTR, { .vop_getattr = cachefs_getattr }, 271 VOPNAME_SETATTR, { .vop_setattr = cachefs_setattr }, 272 VOPNAME_ACCESS, { .vop_access = cachefs_access }, 273 VOPNAME_LOOKUP, { .vop_lookup = cachefs_lookup }, 274 VOPNAME_CREATE, { .vop_create = cachefs_create }, 275 VOPNAME_REMOVE, { .vop_remove = cachefs_remove }, 276 VOPNAME_LINK, { .vop_link = cachefs_link }, 277 VOPNAME_RENAME, { .vop_rename = cachefs_rename }, 278 VOPNAME_MKDIR, { .vop_mkdir = cachefs_mkdir }, 279 VOPNAME_RMDIR, { .vop_rmdir = cachefs_rmdir }, 280 VOPNAME_READDIR, { .vop_readdir = cachefs_readdir }, 281 VOPNAME_SYMLINK, { .vop_symlink = cachefs_symlink }, 282 VOPNAME_READLINK, { .vop_readlink = cachefs_readlink }, 283 VOPNAME_FSYNC, { .vop_fsync = cachefs_fsync }, 284 VOPNAME_INACTIVE, { .vop_inactive = cachefs_inactive }, 285 VOPNAME_FID, { .vop_fid = cachefs_fid }, 286 VOPNAME_RWLOCK, { .vop_rwlock = cachefs_rwlock }, 287 VOPNAME_RWUNLOCK, { .vop_rwunlock = cachefs_rwunlock }, 288 VOPNAME_SEEK, { .vop_seek = cachefs_seek }, 289 VOPNAME_FRLOCK, { .vop_frlock = cachefs_frlock }, 290 VOPNAME_SPACE, { .vop_space = cachefs_space }, 291 VOPNAME_REALVP, { .vop_realvp = cachefs_realvp }, 292 VOPNAME_GETPAGE, { .vop_getpage = cachefs_getpage }, 293 VOPNAME_PUTPAGE, { .vop_putpage = cachefs_putpage }, 294 VOPNAME_MAP, { .vop_map = cachefs_map }, 295 VOPNAME_ADDMAP, { .vop_addmap = cachefs_addmap }, 296 VOPNAME_DELMAP, { .vop_delmap = cachefs_delmap }, 297 VOPNAME_DUMP, { .vop_dump = cachefs_dump }, 298 VOPNAME_PATHCONF, { .vop_pathconf = cachefs_pathconf }, 299 VOPNAME_PAGEIO, { .vop_pageio = cachefs_pageio }, 300 VOPNAME_SETSECATTR, { .vop_setsecattr = cachefs_setsecattr }, 301 VOPNAME_GETSECATTR, { .vop_getsecattr = cachefs_getsecattr }, 302 VOPNAME_SHRLOCK, { .vop_shrlock = cachefs_shrlock }, 303 NULL, NULL 304 }; 305 306 /* forward declarations of statics */ 307 static void cachefs_modified(cnode_t *cp); 308 static int cachefs_modified_alloc(cnode_t *cp); 309 310 int 311 cachefs_init_vnops(char *name) 312 { 313 return (vn_make_ops(name, 314 cachefs_vnodeops_template, &cachefs_vnodeops)); 315 } 316 317 struct vnodeops * 318 cachefs_getvnodeops(void) 319 { 320 return (cachefs_vnodeops); 321 } 322 323 static int 324 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 325 { 326 int error = 0; 327 cnode_t *cp = VTOC(*vpp); 328 fscache_t *fscp = C_TO_FSCACHE(cp); 329 int held = 0; 330 int type; 331 int connected = 0; 332 333 #ifdef CFSDEBUG 334 CFS_DEBUG(CFSDEBUG_VOPS) 335 printf("cachefs_open: ENTER vpp %p flag %x\n", 336 (void *)vpp, flag); 337 #endif 338 if (getzoneid() != GLOBAL_ZONEID) { 339 error = EPERM; 340 goto out; 341 } 342 if ((flag & FWRITE) && 343 ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) { 344 error = EISDIR; 345 goto out; 346 } 347 348 /* 349 * Cachefs only provides pass-through support for NFSv4, 350 * and all vnode operations are passed through to the 351 * back file system. For NFSv4 pass-through to work, only 352 * connected operation is supported, the cnode backvp must 353 * exist, and cachefs optional (eg., disconnectable) flags 354 * are turned off. Assert these conditions to ensure that 355 * the backfilesystem is called for the open operation. 356 */ 357 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 358 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 359 360 for (;;) { 361 /* get (or renew) access to the file system */ 362 if (held) { 363 /* Won't loop with NFSv4 connected behavior */ 364 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 365 cachefs_cd_release(fscp); 366 held = 0; 367 } 368 error = cachefs_cd_access(fscp, connected, 0); 369 if (error) 370 goto out; 371 held = 1; 372 373 mutex_enter(&cp->c_statelock); 374 375 /* grab creds if we do not have any yet */ 376 if (cp->c_cred == NULL) { 377 crhold(cr); 378 cp->c_cred = cr; 379 } 380 cp->c_flags |= CN_NEEDOPEN; 381 382 /* if we are disconnected */ 383 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 384 /* if we cannot write to the file system */ 385 if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) { 386 mutex_exit(&cp->c_statelock); 387 connected = 1; 388 continue; 389 } 390 /* 391 * Allow read only requests to continue 392 */ 393 if ((flag & (FWRITE|FREAD)) == FREAD) { 394 /* track the flag for opening the backvp */ 395 cp->c_rdcnt++; 396 mutex_exit(&cp->c_statelock); 397 error = 0; 398 break; 399 } 400 401 /* 402 * check credentials - if this procs 403 * credentials don't match the creds in the 404 * cnode disallow writing while disconnected. 405 */ 406 if (crcmp(cp->c_cred, CRED()) != 0 && 407 secpolicy_vnode_access(CRED(), *vpp, 408 cp->c_attr.va_uid, VWRITE) != 0) { 409 mutex_exit(&cp->c_statelock); 410 connected = 1; 411 continue; 412 } 413 /* to get here, we know that the WRITE flag is on */ 414 cp->c_wrcnt++; 415 if (flag & FREAD) 416 cp->c_rdcnt++; 417 } 418 419 /* else if we are connected */ 420 else { 421 /* if cannot use the cached copy of the file */ 422 if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) && 423 ((cp->c_flags & CN_NOCACHE) == 0)) 424 cachefs_nocache(cp); 425 426 /* pass open to the back file */ 427 if (cp->c_backvp) { 428 cp->c_flags &= ~CN_NEEDOPEN; 429 CFS_DPRINT_BACKFS_NFSV4(fscp, 430 ("cachefs_open (nfsv4): cnode %p, " 431 "backvp %p\n", cp, cp->c_backvp)); 432 error = VOP_OPEN(&cp->c_backvp, flag, cr, ct); 433 if (CFS_TIMEOUT(fscp, error)) { 434 mutex_exit(&cp->c_statelock); 435 cachefs_cd_release(fscp); 436 held = 0; 437 cachefs_cd_timedout(fscp); 438 continue; 439 } else if (error) { 440 mutex_exit(&cp->c_statelock); 441 break; 442 } 443 } else { 444 /* backvp will be VOP_OPEN'd later */ 445 if (flag & FREAD) 446 cp->c_rdcnt++; 447 if (flag & FWRITE) 448 cp->c_wrcnt++; 449 } 450 451 /* 452 * Now perform a consistency check on the file. 453 * If strict consistency then force a check to 454 * the backfs even if the timeout has not expired 455 * for close-to-open consistency. 456 */ 457 type = 0; 458 if (fscp->fs_consttype == CFS_FS_CONST_STRICT) 459 type = C_BACK_CHECK; 460 error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr); 461 if (CFS_TIMEOUT(fscp, error)) { 462 mutex_exit(&cp->c_statelock); 463 cachefs_cd_release(fscp); 464 held = 0; 465 cachefs_cd_timedout(fscp); 466 continue; 467 } 468 } 469 mutex_exit(&cp->c_statelock); 470 break; 471 } 472 if (held) 473 cachefs_cd_release(fscp); 474 out: 475 #ifdef CFS_CD_DEBUG 476 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 477 #endif 478 #ifdef CFSDEBUG 479 CFS_DEBUG(CFSDEBUG_VOPS) 480 printf("cachefs_open: EXIT vpp %p error %d\n", 481 (void *)vpp, error); 482 #endif 483 return (error); 484 } 485 486 /* ARGSUSED */ 487 static int 488 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 489 caller_context_t *ct) 490 { 491 int error = 0; 492 cnode_t *cp = VTOC(vp); 493 fscache_t *fscp = C_TO_FSCACHE(cp); 494 int held = 0; 495 int connected = 0; 496 int close_cnt = 1; 497 cachefscache_t *cachep; 498 499 #ifdef CFSDEBUG 500 CFS_DEBUG(CFSDEBUG_VOPS) 501 printf("cachefs_close: ENTER vp %p\n", (void *)vp); 502 #endif 503 /* 504 * Cachefs only provides pass-through support for NFSv4, 505 * and all vnode operations are passed through to the 506 * back file system. For NFSv4 pass-through to work, only 507 * connected operation is supported, the cnode backvp must 508 * exist, and cachefs optional (eg., disconnectable) flags 509 * are turned off. Assert these conditions to ensure that 510 * the backfilesystem is called for the close operation. 511 */ 512 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 513 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 514 515 /* 516 * File could have been passed in or inherited from the global zone, so 517 * we don't want to flat out reject the request; we'll just leave things 518 * the way they are and let the backfs (NFS) deal with it. 519 */ 520 /* get rid of any local locks */ 521 if (CFS_ISFS_LLOCK(fscp)) { 522 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 523 } 524 525 /* clean up if this is the daemon closing down */ 526 if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) && 527 ((ttoproc(curthread)->p_pid) != 0) && 528 (vp == fscp->fs_rootvp) && 529 (count == 1)) { 530 mutex_enter(&fscp->fs_cdlock); 531 fscp->fs_cddaemonid = 0; 532 if (fscp->fs_dlogfile) 533 fscp->fs_cdconnected = CFS_CD_DISCONNECTED; 534 else 535 fscp->fs_cdconnected = CFS_CD_CONNECTED; 536 cv_broadcast(&fscp->fs_cdwaitcv); 537 mutex_exit(&fscp->fs_cdlock); 538 if (fscp->fs_flags & CFS_FS_ROOTFS) { 539 cachep = fscp->fs_cache; 540 mutex_enter(&cachep->c_contentslock); 541 ASSERT(cachep->c_rootdaemonid != 0); 542 cachep->c_rootdaemonid = 0; 543 mutex_exit(&cachep->c_contentslock); 544 } 545 return (0); 546 } 547 548 for (;;) { 549 /* get (or renew) access to the file system */ 550 if (held) { 551 /* Won't loop with NFSv4 connected behavior */ 552 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 553 cachefs_cd_release(fscp); 554 held = 0; 555 } 556 error = cachefs_cd_access(fscp, connected, 0); 557 if (error) 558 goto out; 559 held = 1; 560 connected = 0; 561 562 /* if not the last close */ 563 if (count > 1) { 564 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) 565 goto out; 566 mutex_enter(&cp->c_statelock); 567 if (cp->c_backvp) { 568 CFS_DPRINT_BACKFS_NFSV4(fscp, 569 ("cachefs_close (nfsv4): cnode %p, " 570 "backvp %p\n", cp, cp->c_backvp)); 571 error = VOP_CLOSE(cp->c_backvp, flag, count, 572 offset, cr, ct); 573 if (CFS_TIMEOUT(fscp, error)) { 574 mutex_exit(&cp->c_statelock); 575 cachefs_cd_release(fscp); 576 held = 0; 577 cachefs_cd_timedout(fscp); 578 continue; 579 } 580 } 581 mutex_exit(&cp->c_statelock); 582 goto out; 583 } 584 585 /* 586 * If the file is an unlinked file, then flush the lookup 587 * cache so that inactive will be called if this is 588 * the last reference. It will invalidate all of the 589 * cached pages, without writing them out. Writing them 590 * out is not required because they will be written to a 591 * file which will be immediately removed. 592 */ 593 if (cp->c_unldvp != NULL) { 594 dnlc_purge_vp(vp); 595 mutex_enter(&cp->c_statelock); 596 error = cp->c_error; 597 cp->c_error = 0; 598 mutex_exit(&cp->c_statelock); 599 /* always call VOP_CLOSE() for back fs vnode */ 600 } 601 602 /* force dirty data to stable storage */ 603 else if ((vp->v_type == VREG) && (flag & FWRITE) && 604 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 605 /* clean the cachefs pages synchronously */ 606 error = cachefs_putpage_common(vp, (offset_t)0, 607 0, 0, cr); 608 if (CFS_TIMEOUT(fscp, error)) { 609 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 610 cachefs_cd_release(fscp); 611 held = 0; 612 cachefs_cd_timedout(fscp); 613 continue; 614 } else { 615 connected = 1; 616 continue; 617 } 618 } 619 620 /* if no space left in cache, wait until connected */ 621 if ((error == ENOSPC) && 622 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 623 connected = 1; 624 continue; 625 } 626 627 /* clear the cnode error if putpage worked */ 628 if ((error == 0) && cp->c_error) { 629 mutex_enter(&cp->c_statelock); 630 cp->c_error = 0; 631 mutex_exit(&cp->c_statelock); 632 } 633 634 /* if any other important error */ 635 if (cp->c_error) { 636 /* get rid of the pages */ 637 (void) cachefs_putpage_common(vp, 638 (offset_t)0, 0, B_INVAL | B_FORCE, cr); 639 dnlc_purge_vp(vp); 640 } 641 } 642 643 mutex_enter(&cp->c_statelock); 644 if (cp->c_backvp && 645 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) { 646 error = VOP_CLOSE(cp->c_backvp, flag, close_cnt, 647 offset, cr, ct); 648 if (CFS_TIMEOUT(fscp, error)) { 649 mutex_exit(&cp->c_statelock); 650 cachefs_cd_release(fscp); 651 held = 0; 652 cachefs_cd_timedout(fscp); 653 /* don't decrement the vnode counts again */ 654 close_cnt = 0; 655 continue; 656 } 657 } 658 mutex_exit(&cp->c_statelock); 659 break; 660 } 661 662 mutex_enter(&cp->c_statelock); 663 if (!error) 664 error = cp->c_error; 665 cp->c_error = 0; 666 mutex_exit(&cp->c_statelock); 667 668 out: 669 if (held) 670 cachefs_cd_release(fscp); 671 #ifdef CFS_CD_DEBUG 672 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 673 #endif 674 675 #ifdef CFSDEBUG 676 CFS_DEBUG(CFSDEBUG_VOPS) 677 printf("cachefs_close: EXIT vp %p\n", (void *)vp); 678 #endif 679 return (error); 680 } 681 682 /*ARGSUSED*/ 683 static int 684 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 685 caller_context_t *ct) 686 { 687 struct cnode *cp = VTOC(vp); 688 fscache_t *fscp = C_TO_FSCACHE(cp); 689 register u_offset_t off; 690 register int mapoff; 691 register caddr_t base; 692 int n; 693 offset_t diff; 694 uint_t flags = 0; 695 int error = 0; 696 697 #if 0 698 if (vp->v_flag & VNOCACHE) 699 flags = SM_INVAL; 700 #endif 701 if (getzoneid() != GLOBAL_ZONEID) 702 return (EPERM); 703 if (vp->v_type != VREG) 704 return (EISDIR); 705 706 ASSERT(RW_READ_HELD(&cp->c_rwlock)); 707 708 if (uiop->uio_resid == 0) 709 return (0); 710 711 712 if (uiop->uio_loffset < (offset_t)0) 713 return (EINVAL); 714 715 /* 716 * Call backfilesystem to read if NFSv4, the cachefs code 717 * does the read from the back filesystem asynchronously 718 * which is not supported by pass-through functionality. 719 */ 720 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 721 error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct); 722 goto out; 723 } 724 725 if (MANDLOCK(vp, cp->c_attr.va_mode)) { 726 error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset, 727 uiop->uio_resid, uiop->uio_fmode, ct); 728 if (error) 729 return (error); 730 } 731 732 /* 733 * Sit in a loop and transfer (uiomove) the data in up to 734 * MAXBSIZE chunks. Each chunk is mapped into the kernel's 735 * address space as needed and then released. 736 */ 737 do { 738 /* 739 * off Offset of current MAXBSIZE chunk 740 * mapoff Offset within the current chunk 741 * n Number of bytes to move from this chunk 742 * base kernel address of mapped in chunk 743 */ 744 off = uiop->uio_loffset & (offset_t)MAXBMASK; 745 mapoff = uiop->uio_loffset & MAXBOFFSET; 746 n = MAXBSIZE - mapoff; 747 if (n > uiop->uio_resid) 748 n = (uint_t)uiop->uio_resid; 749 750 /* perform consistency check */ 751 error = cachefs_cd_access(fscp, 0, 0); 752 if (error) 753 break; 754 mutex_enter(&cp->c_statelock); 755 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 756 diff = cp->c_size - uiop->uio_loffset; 757 mutex_exit(&cp->c_statelock); 758 if (CFS_TIMEOUT(fscp, error)) { 759 cachefs_cd_release(fscp); 760 cachefs_cd_timedout(fscp); 761 error = 0; 762 continue; 763 } 764 cachefs_cd_release(fscp); 765 766 if (error) 767 break; 768 769 if (diff <= (offset_t)0) 770 break; 771 if (diff < (offset_t)n) 772 n = diff; 773 774 base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ); 775 776 error = segmap_fault(kas.a_hat, segkmap, base, n, 777 F_SOFTLOCK, S_READ); 778 if (error) { 779 (void) segmap_release(segkmap, base, 0); 780 if (FC_CODE(error) == FC_OBJERR) 781 error = FC_ERRNO(error); 782 else 783 error = EIO; 784 break; 785 } 786 error = uiomove(base+mapoff, n, UIO_READ, uiop); 787 (void) segmap_fault(kas.a_hat, segkmap, base, n, 788 F_SOFTUNLOCK, S_READ); 789 if (error == 0) { 790 /* 791 * if we read a whole page(s), or to eof, 792 * we won't need this page(s) again soon. 793 */ 794 if (n + mapoff == MAXBSIZE || 795 uiop->uio_loffset == cp->c_size) 796 flags |= SM_DONTNEED; 797 } 798 (void) segmap_release(segkmap, base, flags); 799 } while (error == 0 && uiop->uio_resid > 0); 800 801 out: 802 #ifdef CFSDEBUG 803 CFS_DEBUG(CFSDEBUG_VOPS) 804 printf("cachefs_read: EXIT error %d resid %ld\n", error, 805 uiop->uio_resid); 806 #endif 807 return (error); 808 } 809 810 /* 811 * cachefs_read_backfs_nfsv4 812 * 813 * Call NFSv4 back filesystem to handle the read (cachefs 814 * pass-through support for NFSv4). 815 */ 816 static int 817 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 818 caller_context_t *ct) 819 { 820 cnode_t *cp = VTOC(vp); 821 fscache_t *fscp = C_TO_FSCACHE(cp); 822 vnode_t *backvp; 823 int error; 824 825 /* 826 * For NFSv4 pass-through to work, only connected operation 827 * is supported, the cnode backvp must exist, and cachefs 828 * optional (eg., disconnectable) flags are turned off. Assert 829 * these conditions for the read operation. 830 */ 831 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 832 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 833 834 /* Call backfs vnode op after extracting backvp */ 835 mutex_enter(&cp->c_statelock); 836 backvp = cp->c_backvp; 837 mutex_exit(&cp->c_statelock); 838 839 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, " 840 "backvp %p\n", cp, backvp)); 841 842 (void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct); 843 error = VOP_READ(backvp, uiop, ioflag, cr, ct); 844 VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct); 845 846 /* Increment cache miss counter */ 847 fscp->fs_stats.st_misses++; 848 849 return (error); 850 } 851 852 /*ARGSUSED*/ 853 static int 854 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 855 caller_context_t *ct) 856 { 857 struct cnode *cp = VTOC(vp); 858 fscache_t *fscp = C_TO_FSCACHE(cp); 859 int error = 0; 860 u_offset_t off; 861 caddr_t base; 862 uint_t bsize; 863 uint_t flags; 864 int n, on; 865 rlim64_t limit = uiop->uio_llimit; 866 ssize_t resid; 867 offset_t offset; 868 offset_t remainder; 869 870 #ifdef CFSDEBUG 871 CFS_DEBUG(CFSDEBUG_VOPS) 872 printf( 873 "cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n", 874 (void *)vp, uiop->uio_loffset, uiop->uio_resid, 875 cp->c_flags); 876 #endif 877 if (getzoneid() != GLOBAL_ZONEID) { 878 error = EPERM; 879 goto out; 880 } 881 if (vp->v_type != VREG) { 882 error = EISDIR; 883 goto out; 884 } 885 886 ASSERT(RW_WRITE_HELD(&cp->c_rwlock)); 887 888 if (uiop->uio_resid == 0) { 889 goto out; 890 } 891 892 /* Call backfilesystem to write if NFSv4 */ 893 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 894 error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct); 895 goto out2; 896 } 897 898 if (MANDLOCK(vp, cp->c_attr.va_mode)) { 899 error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset, 900 uiop->uio_resid, uiop->uio_fmode, ct); 901 if (error) 902 goto out; 903 } 904 905 if (ioflag & FAPPEND) { 906 for (;;) { 907 /* do consistency check to get correct file size */ 908 error = cachefs_cd_access(fscp, 0, 1); 909 if (error) 910 goto out; 911 mutex_enter(&cp->c_statelock); 912 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 913 uiop->uio_loffset = cp->c_size; 914 mutex_exit(&cp->c_statelock); 915 if (CFS_TIMEOUT(fscp, error)) { 916 cachefs_cd_release(fscp); 917 cachefs_cd_timedout(fscp); 918 continue; 919 } 920 cachefs_cd_release(fscp); 921 if (error) 922 goto out; 923 break; 924 } 925 } 926 927 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 928 limit = MAXOFFSET_T; 929 930 if (uiop->uio_loffset >= limit) { 931 proc_t *p = ttoproc(curthread); 932 933 mutex_enter(&p->p_lock); 934 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 935 p, RCA_UNSAFE_SIGINFO); 936 mutex_exit(&p->p_lock); 937 error = EFBIG; 938 goto out; 939 } 940 if (uiop->uio_loffset > fscp->fs_offmax) { 941 error = EFBIG; 942 goto out; 943 } 944 945 if (limit > fscp->fs_offmax) 946 limit = fscp->fs_offmax; 947 948 if (uiop->uio_loffset < (offset_t)0) { 949 error = EINVAL; 950 goto out; 951 } 952 953 offset = uiop->uio_loffset + uiop->uio_resid; 954 /* 955 * Check to make sure that the process will not exceed 956 * its limit on file size. It is okay to write up to 957 * the limit, but not beyond. Thus, the write which 958 * reaches the limit will be short and the next write 959 * will return an error. 960 */ 961 remainder = 0; 962 if (offset > limit) { 963 remainder = (int)(offset - (u_offset_t)limit); 964 uiop->uio_resid = limit - uiop->uio_loffset; 965 if (uiop->uio_resid <= 0) { 966 proc_t *p = ttoproc(curthread); 967 968 uiop->uio_resid += remainder; 969 mutex_enter(&p->p_lock); 970 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 971 p->p_rctls, p, RCA_UNSAFE_SIGINFO); 972 mutex_exit(&p->p_lock); 973 error = EFBIG; 974 goto out; 975 } 976 } 977 978 resid = uiop->uio_resid; 979 offset = uiop->uio_loffset; 980 bsize = vp->v_vfsp->vfs_bsize; 981 982 /* loop around and do the write in MAXBSIZE chunks */ 983 do { 984 /* mapping offset */ 985 off = uiop->uio_loffset & (offset_t)MAXBMASK; 986 on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */ 987 n = MAXBSIZE - on; 988 if (n > uiop->uio_resid) 989 n = (int)uiop->uio_resid; 990 base = segmap_getmap(segkmap, vp, off); 991 error = cachefs_writepage(vp, (base + on), n, uiop); 992 if (error == 0) { 993 flags = 0; 994 /* 995 * Have written a whole block.Start an 996 * asynchronous write and mark the buffer to 997 * indicate that it won't be needed again 998 * soon. 999 */ 1000 if (n + on == bsize) { 1001 flags = SM_WRITE |SM_ASYNC |SM_DONTNEED; 1002 } 1003 #if 0 1004 /* XXX need to understand this */ 1005 if ((ioflag & (FSYNC|FDSYNC)) || 1006 (cp->c_backvp && vn_has_flocks(cp->c_backvp))) { 1007 flags &= ~SM_ASYNC; 1008 flags |= SM_WRITE; 1009 } 1010 #else 1011 if (ioflag & (FSYNC|FDSYNC)) { 1012 flags &= ~SM_ASYNC; 1013 flags |= SM_WRITE; 1014 } 1015 #endif 1016 error = segmap_release(segkmap, base, flags); 1017 } else { 1018 (void) segmap_release(segkmap, base, 0); 1019 } 1020 } while (error == 0 && uiop->uio_resid > 0); 1021 1022 out: 1023 if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) { 1024 uiop->uio_resid = resid; 1025 uiop->uio_loffset = offset; 1026 } else 1027 uiop->uio_resid += remainder; 1028 1029 out2: 1030 #ifdef CFSDEBUG 1031 CFS_DEBUG(CFSDEBUG_VOPS) 1032 printf("cachefs_write: EXIT error %d\n", error); 1033 #endif 1034 return (error); 1035 } 1036 1037 /* 1038 * cachefs_write_backfs_nfsv4 1039 * 1040 * Call NFSv4 back filesystem to handle the write (cachefs 1041 * pass-through support for NFSv4). 1042 */ 1043 static int 1044 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 1045 caller_context_t *ct) 1046 { 1047 cnode_t *cp = VTOC(vp); 1048 fscache_t *fscp = C_TO_FSCACHE(cp); 1049 vnode_t *backvp; 1050 int error; 1051 1052 /* 1053 * For NFSv4 pass-through to work, only connected operation 1054 * is supported, the cnode backvp must exist, and cachefs 1055 * optional (eg., disconnectable) flags are turned off. Assert 1056 * these conditions for the read operation. 1057 */ 1058 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 1059 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 1060 1061 /* Call backfs vnode op after extracting the backvp */ 1062 mutex_enter(&cp->c_statelock); 1063 backvp = cp->c_backvp; 1064 mutex_exit(&cp->c_statelock); 1065 1066 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, " 1067 "backvp %p\n", cp, backvp)); 1068 (void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct); 1069 error = VOP_WRITE(backvp, uiop, ioflag, cr, ct); 1070 VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct); 1071 1072 return (error); 1073 } 1074 1075 /* 1076 * see if we've charged ourselves for frontfile data at 1077 * the given offset. If not, allocate a block for it now. 1078 */ 1079 static int 1080 cachefs_charge_page(struct cnode *cp, u_offset_t offset) 1081 { 1082 u_offset_t blockoff; 1083 int error; 1084 int inc; 1085 1086 ASSERT(MUTEX_HELD(&cp->c_statelock)); 1087 ASSERT(PAGESIZE <= MAXBSIZE); 1088 1089 error = 0; 1090 blockoff = offset & (offset_t)MAXBMASK; 1091 1092 /* get the front file if necessary so allocblocks works */ 1093 if ((cp->c_frontvp == NULL) && 1094 ((cp->c_flags & CN_NOCACHE) == 0)) { 1095 (void) cachefs_getfrontfile(cp); 1096 } 1097 if (cp->c_flags & CN_NOCACHE) 1098 return (1); 1099 1100 if (cachefs_check_allocmap(cp, blockoff)) 1101 return (0); 1102 1103 for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE) 1104 if (cachefs_check_allocmap(cp, blockoff+inc)) 1105 return (0); 1106 1107 error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1, 1108 cp->c_metadata.md_rltype); 1109 if (error == 0) { 1110 cp->c_metadata.md_frontblks++; 1111 cp->c_flags |= CN_UPDATED; 1112 } 1113 return (error); 1114 } 1115 1116 /* 1117 * Called only by cachefs_write to write 1 page or less of data. 1118 * base - base address kernel addr space 1119 * tcount - Total bytes to move - < MAXBSIZE 1120 */ 1121 static int 1122 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop) 1123 { 1124 struct cnode *cp = VTOC(vp); 1125 fscache_t *fscp = C_TO_FSCACHE(cp); 1126 register int n; 1127 register u_offset_t offset; 1128 int error = 0, terror; 1129 extern struct as kas; 1130 u_offset_t lastpage_off; 1131 int pagecreate = 0; 1132 int newpage; 1133 1134 #ifdef CFSDEBUG 1135 CFS_DEBUG(CFSDEBUG_VOPS) 1136 printf( 1137 "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n", 1138 (void *)vp, uiop->uio_loffset, uiop->uio_resid); 1139 #endif 1140 1141 /* 1142 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in 1143 * uiomove() because page faults may cause the cache to be invalidated 1144 * out from under us. 1145 */ 1146 do { 1147 offset = uiop->uio_loffset; 1148 lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK; 1149 1150 /* 1151 * If not connected then need to make sure we have space 1152 * to perform the write. We could make this check 1153 * a little tighter by only doing it if we are growing the file. 1154 */ 1155 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1156 error = cachefs_allocblocks(fscp->fs_cache, 1, 1157 cp->c_metadata.md_rltype); 1158 if (error) 1159 break; 1160 cachefs_freeblocks(fscp->fs_cache, 1, 1161 cp->c_metadata.md_rltype); 1162 } 1163 1164 /* 1165 * n is the number of bytes required to satisfy the request 1166 * or the number of bytes to fill out the page. 1167 */ 1168 n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET)); 1169 if (n > tcount) 1170 n = tcount; 1171 1172 /* 1173 * The number of bytes of data in the last page can not 1174 * be accurately be determined while page is being 1175 * uiomove'd to and the size of the file being updated. 1176 * Thus, inform threads which need to know accurately 1177 * how much data is in the last page of the file. They 1178 * will not do the i/o immediately, but will arrange for 1179 * the i/o to happen later when this modify operation 1180 * will have finished. 1181 * 1182 * in similar NFS code, this is done right before the 1183 * uiomove(), which is best. but here in cachefs, we 1184 * have two uiomove()s, so we must do it here. 1185 */ 1186 ASSERT(!(cp->c_flags & CN_CMODINPROG)); 1187 mutex_enter(&cp->c_statelock); 1188 cp->c_flags |= CN_CMODINPROG; 1189 cp->c_modaddr = (offset & (offset_t)MAXBMASK); 1190 mutex_exit(&cp->c_statelock); 1191 1192 /* 1193 * Check to see if we can skip reading in the page 1194 * and just allocate the memory. We can do this 1195 * if we are going to rewrite the entire mapping 1196 * or if we are going to write to or beyond the current 1197 * end of file from the beginning of the mapping. 1198 */ 1199 if ((offset > (lastpage_off + PAGEOFFSET)) || 1200 ((cp->c_size == 0) && (offset < PAGESIZE)) || 1201 ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE || 1202 ((offset + n) >= cp->c_size))) { 1203 pagecreate = 1; 1204 1205 /* 1206 * segmap_pagecreate() returns 1 if it calls 1207 * page_create_va() to allocate any pages. 1208 */ 1209 newpage = segmap_pagecreate(segkmap, 1210 (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK), 1211 PAGESIZE, 0); 1212 /* do not zero page if we are overwriting all of it */ 1213 if (!((((uintptr_t)base & PAGEOFFSET) == 0) && 1214 (n == PAGESIZE))) { 1215 (void) kzero((void *) 1216 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1217 PAGESIZE); 1218 } 1219 error = uiomove(base, n, UIO_WRITE, uiop); 1220 1221 /* 1222 * Unlock the page allocated by page_create_va() 1223 * in segmap_pagecreate() 1224 */ 1225 if (newpage) 1226 segmap_pageunlock(segkmap, 1227 (caddr_t)((uintptr_t)base & 1228 (uintptr_t)PAGEMASK), 1229 PAGESIZE, S_WRITE); 1230 } else { 1231 /* 1232 * KLUDGE ! Use segmap_fault instead of faulting and 1233 * using as_fault() to avoid a recursive readers lock 1234 * on kas. 1235 */ 1236 error = segmap_fault(kas.a_hat, segkmap, (caddr_t) 1237 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1238 PAGESIZE, F_SOFTLOCK, S_WRITE); 1239 if (error) { 1240 if (FC_CODE(error) == FC_OBJERR) 1241 error = FC_ERRNO(error); 1242 else 1243 error = EIO; 1244 break; 1245 } 1246 error = uiomove(base, n, UIO_WRITE, uiop); 1247 (void) segmap_fault(kas.a_hat, segkmap, (caddr_t) 1248 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1249 PAGESIZE, F_SOFTUNLOCK, S_WRITE); 1250 } 1251 n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */ 1252 base += n; 1253 tcount -= n; 1254 1255 /* get access to the file system */ 1256 if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) { 1257 error = terror; 1258 break; 1259 } 1260 1261 /* 1262 * cp->c_attr.va_size is the maximum number of 1263 * bytes known to be in the file. 1264 * Make sure it is at least as high as the 1265 * last byte we just wrote into the buffer. 1266 */ 1267 mutex_enter(&cp->c_statelock); 1268 if (cp->c_size < uiop->uio_loffset) { 1269 cp->c_size = uiop->uio_loffset; 1270 } 1271 if (cp->c_size != cp->c_attr.va_size) { 1272 cp->c_attr.va_size = cp->c_size; 1273 cp->c_flags |= CN_UPDATED; 1274 } 1275 /* c_size is now correct, so we can clear modinprog */ 1276 cp->c_flags &= ~CN_CMODINPROG; 1277 if (error == 0) { 1278 cp->c_flags |= CDIRTY; 1279 if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) { 1280 /* 1281 * if we're not in NOCACHE mode 1282 * (i.e., single-writer), we update the 1283 * allocmap here rather than waiting until 1284 * cachefspush is called. This prevents 1285 * getpage from clustering up pages from 1286 * the backfile and stomping over the changes 1287 * we make here. 1288 */ 1289 if (cachefs_charge_page(cp, offset) == 0) { 1290 cachefs_update_allocmap(cp, 1291 offset & (offset_t)PAGEMASK, 1292 (size_t)PAGESIZE); 1293 } 1294 1295 /* else we ran out of space */ 1296 else { 1297 /* nocache file if connected */ 1298 if (fscp->fs_cdconnected == 1299 CFS_CD_CONNECTED) 1300 cachefs_nocache(cp); 1301 /* 1302 * If disconnected then cannot 1303 * nocache the file. Let it have 1304 * the space. 1305 */ 1306 else { 1307 cp->c_metadata.md_frontblks++; 1308 cp->c_flags |= CN_UPDATED; 1309 cachefs_update_allocmap(cp, 1310 offset & (offset_t)PAGEMASK, 1311 (size_t)PAGESIZE); 1312 } 1313 } 1314 } 1315 } 1316 mutex_exit(&cp->c_statelock); 1317 cachefs_cd_release(fscp); 1318 } while (tcount > 0 && error == 0); 1319 1320 if (cp->c_flags & CN_CMODINPROG) { 1321 /* XXX assert error != 0? FC_ERRNO() makes this more risky. */ 1322 mutex_enter(&cp->c_statelock); 1323 cp->c_flags &= ~CN_CMODINPROG; 1324 mutex_exit(&cp->c_statelock); 1325 } 1326 1327 #ifdef CFS_CD_DEBUG 1328 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 1329 #endif 1330 1331 #ifdef CFSDEBUG 1332 CFS_DEBUG(CFSDEBUG_VOPS) 1333 printf("cachefs_writepage: EXIT error %d\n", error); 1334 #endif 1335 1336 return (error); 1337 } 1338 1339 /* 1340 * Pushes out pages to the back and/or front file system. 1341 */ 1342 static int 1343 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 1344 int flags, cred_t *cr) 1345 { 1346 struct cnode *cp = VTOC(vp); 1347 struct buf *bp; 1348 int error; 1349 fscache_t *fscp = C_TO_FSCACHE(cp); 1350 u_offset_t iooff; 1351 size_t iolen; 1352 u_offset_t lbn; 1353 u_offset_t lbn_off; 1354 uint_t bsize; 1355 1356 ASSERT((flags & B_ASYNC) == 0); 1357 ASSERT(!vn_is_readonly(vp)); 1358 ASSERT(pp != NULL); 1359 ASSERT(cr != NULL); 1360 1361 bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE); 1362 lbn = pp->p_offset / bsize; 1363 lbn_off = lbn * bsize; 1364 1365 /* 1366 * Find a kluster that fits in one block, or in 1367 * one page if pages are bigger than blocks. If 1368 * there is less file space allocated than a whole 1369 * page, we'll shorten the i/o request below. 1370 */ 1371 1372 pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off, 1373 roundup(bsize, PAGESIZE), flags); 1374 1375 /* 1376 * The CN_CMODINPROG flag makes sure that we use a correct 1377 * value of c_size, below. CN_CMODINPROG is set in 1378 * cachefs_writepage(). When CN_CMODINPROG is set it 1379 * indicates that a uiomove() is in progress and the c_size 1380 * has not been made consistent with the new size of the 1381 * file. When the uiomove() completes the c_size is updated 1382 * and the CN_CMODINPROG flag is cleared. 1383 * 1384 * The CN_CMODINPROG flag makes sure that cachefs_push_front 1385 * and cachefs_push_connected see a consistent value of 1386 * c_size. Without this handshaking, it is possible that 1387 * these routines will pick up the old value of c_size before 1388 * the uiomove() in cachefs_writepage() completes. This will 1389 * result in the vn_rdwr() being too small, and data loss. 1390 * 1391 * More precisely, there is a window between the time the 1392 * uiomove() completes and the time the c_size is updated. If 1393 * a VOP_PUTPAGE() operation intervenes in this window, the 1394 * page will be picked up, because it is dirty; it will be 1395 * unlocked, unless it was pagecreate'd. When the page is 1396 * picked up as dirty, the dirty bit is reset 1397 * (pvn_getdirty()). In cachefs_push_connected(), c_size is 1398 * checked. This will still be the old size. Therefore, the 1399 * page will not be written out to the correct length, and the 1400 * page will be clean, so the data may disappear. 1401 */ 1402 if (cp->c_flags & CN_CMODINPROG) { 1403 mutex_enter(&cp->c_statelock); 1404 if ((cp->c_flags & CN_CMODINPROG) && 1405 cp->c_modaddr + MAXBSIZE > iooff && 1406 cp->c_modaddr < iooff + iolen) { 1407 page_t *plist; 1408 1409 /* 1410 * A write is in progress for this region of 1411 * the file. If we did not detect 1412 * CN_CMODINPROG here then this path through 1413 * cachefs_push_connected() would eventually 1414 * do the vn_rdwr() and may not write out all 1415 * of the data in the pages. We end up losing 1416 * data. So we decide to set the modified bit 1417 * on each page in the page list and mark the 1418 * cnode with CDIRTY. This push will be 1419 * restarted at some later time. 1420 */ 1421 1422 plist = pp; 1423 while (plist != NULL) { 1424 pp = plist; 1425 page_sub(&plist, pp); 1426 hat_setmod(pp); 1427 page_io_unlock(pp); 1428 page_unlock(pp); 1429 } 1430 cp->c_flags |= CDIRTY; 1431 mutex_exit(&cp->c_statelock); 1432 if (offp) 1433 *offp = iooff; 1434 if (lenp) 1435 *lenp = iolen; 1436 return (0); 1437 } 1438 mutex_exit(&cp->c_statelock); 1439 } 1440 1441 /* 1442 * Set the pages up for pageout. 1443 */ 1444 bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags); 1445 if (bp == NULL) { 1446 1447 /* 1448 * currently, there is no way for pageio_setup() to 1449 * return NULL, since it uses its own scheme for 1450 * kmem_alloc()ing that shouldn't return NULL, and 1451 * since pageio_setup() itself dereferences the thing 1452 * it's about to return. still, we need to be ready 1453 * in case this ever does start happening. 1454 */ 1455 1456 error = ENOMEM; 1457 goto writedone; 1458 } 1459 /* 1460 * pageio_setup should have set b_addr to 0. This 1461 * is correct since we want to do I/O on a page 1462 * boundary. bp_mapin will use this addr to calculate 1463 * an offset, and then set b_addr to the kernel virtual 1464 * address it allocated for us. 1465 */ 1466 bp->b_edev = 0; 1467 bp->b_dev = 0; 1468 bp->b_lblkno = (diskaddr_t)lbtodb(iooff); 1469 bp_mapin(bp); 1470 1471 iolen = cp->c_size - ldbtob(bp->b_blkno); 1472 if (iolen > bp->b_bcount) 1473 iolen = bp->b_bcount; 1474 1475 /* if connected */ 1476 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1477 /* write to the back file first */ 1478 error = cachefs_push_connected(vp, bp, iolen, iooff, cr); 1479 1480 /* write to the front file if allowed */ 1481 if ((error == 0) && CFS_ISFS_NONSHARED(fscp) && 1482 ((cp->c_flags & CN_NOCACHE) == 0)) { 1483 /* try to write to the front file */ 1484 (void) cachefs_push_front(vp, bp, iolen, iooff, cr); 1485 } 1486 } 1487 1488 /* else if disconnected */ 1489 else { 1490 /* try to write to the front file */ 1491 error = cachefs_push_front(vp, bp, iolen, iooff, cr); 1492 } 1493 1494 bp_mapout(bp); 1495 pageio_done(bp); 1496 1497 writedone: 1498 1499 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 1500 if (offp) 1501 *offp = iooff; 1502 if (lenp) 1503 *lenp = iolen; 1504 1505 /* XXX ask bob mastors how to fix this someday */ 1506 mutex_enter(&cp->c_statelock); 1507 if (error) { 1508 if (error == ENOSPC) { 1509 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) || 1510 CFS_ISFS_SOFT(fscp)) { 1511 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr); 1512 cp->c_error = error; 1513 } 1514 } else if ((CFS_TIMEOUT(fscp, error) == 0) && 1515 (error != EINTR)) { 1516 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr); 1517 cp->c_error = error; 1518 } 1519 } else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1520 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 1521 } 1522 mutex_exit(&cp->c_statelock); 1523 1524 return (error); 1525 } 1526 1527 /* 1528 * Pushes out pages to the back file system. 1529 */ 1530 static int 1531 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen, 1532 u_offset_t iooff, cred_t *cr) 1533 { 1534 struct cnode *cp = VTOC(vp); 1535 int error = 0; 1536 int mode = 0; 1537 fscache_t *fscp = C_TO_FSCACHE(cp); 1538 ssize_t resid; 1539 vnode_t *backvp; 1540 1541 /* get the back file if necessary */ 1542 mutex_enter(&cp->c_statelock); 1543 if (cp->c_backvp == NULL) { 1544 error = cachefs_getbackvp(fscp, cp); 1545 if (error) { 1546 mutex_exit(&cp->c_statelock); 1547 goto out; 1548 } 1549 } 1550 backvp = cp->c_backvp; 1551 VN_HOLD(backvp); 1552 mutex_exit(&cp->c_statelock); 1553 1554 if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp)) 1555 mode = FSYNC; 1556 1557 /* write to the back file */ 1558 error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr, 1559 iolen, iooff, UIO_SYSSPACE, mode, 1560 RLIM64_INFINITY, cr, &resid); 1561 if (error) { 1562 #ifdef CFSDEBUG 1563 CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK) 1564 printf("cachefspush: error %d cr %p\n", 1565 error, (void *)cr); 1566 #endif 1567 bp->b_flags |= B_ERROR; 1568 } 1569 VN_RELE(backvp); 1570 out: 1571 return (error); 1572 } 1573 1574 /* 1575 * Pushes out pages to the front file system. 1576 * Called for both connected and disconnected states. 1577 */ 1578 static int 1579 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen, 1580 u_offset_t iooff, cred_t *cr) 1581 { 1582 struct cnode *cp = VTOC(vp); 1583 fscache_t *fscp = C_TO_FSCACHE(cp); 1584 int error = 0; 1585 ssize_t resid; 1586 u_offset_t popoff; 1587 off_t commit = 0; 1588 uint_t seq; 1589 enum cachefs_rl_type type; 1590 vnode_t *frontvp = NULL; 1591 1592 mutex_enter(&cp->c_statelock); 1593 1594 if (!CFS_ISFS_NONSHARED(fscp)) { 1595 error = ETIMEDOUT; 1596 goto out; 1597 } 1598 1599 /* get the front file if necessary */ 1600 if ((cp->c_frontvp == NULL) && 1601 ((cp->c_flags & CN_NOCACHE) == 0)) { 1602 (void) cachefs_getfrontfile(cp); 1603 } 1604 if (cp->c_flags & CN_NOCACHE) { 1605 error = ETIMEDOUT; 1606 goto out; 1607 } 1608 1609 /* if disconnected, needs to be populated and have good attributes */ 1610 if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) && 1611 (((cp->c_metadata.md_flags & MD_POPULATED) == 0) || 1612 (cp->c_metadata.md_flags & MD_NEEDATTRS))) { 1613 error = ETIMEDOUT; 1614 goto out; 1615 } 1616 1617 for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) { 1618 if (cachefs_charge_page(cp, popoff)) { 1619 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1620 cachefs_nocache(cp); 1621 goto out; 1622 } else { 1623 error = ENOSPC; 1624 goto out; 1625 } 1626 } 1627 } 1628 1629 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1630 /* log the first putpage to a file */ 1631 if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) { 1632 /* uses open's creds if we have them */ 1633 if (cp->c_cred) 1634 cr = cp->c_cred; 1635 1636 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 1637 error = cachefs_dlog_cidmap(fscp); 1638 if (error) { 1639 error = ENOSPC; 1640 goto out; 1641 } 1642 cp->c_metadata.md_flags |= MD_MAPPING; 1643 } 1644 1645 commit = cachefs_dlog_modify(fscp, cp, cr, &seq); 1646 if (commit == 0) { 1647 /* out of space */ 1648 error = ENOSPC; 1649 goto out; 1650 } 1651 1652 cp->c_metadata.md_seq = seq; 1653 type = cp->c_metadata.md_rltype; 1654 cachefs_modified(cp); 1655 cp->c_metadata.md_flags |= MD_PUTPAGE; 1656 cp->c_metadata.md_flags &= ~MD_PUSHDONE; 1657 cp->c_flags |= CN_UPDATED; 1658 } 1659 1660 /* subsequent putpages just get a new sequence number */ 1661 else { 1662 /* but only if it matters */ 1663 if (cp->c_metadata.md_seq != fscp->fs_dlogseq) { 1664 seq = cachefs_dlog_seqnext(fscp); 1665 if (seq == 0) { 1666 error = ENOSPC; 1667 goto out; 1668 } 1669 cp->c_metadata.md_seq = seq; 1670 cp->c_flags |= CN_UPDATED; 1671 /* XXX maybe should do write_metadata here */ 1672 } 1673 } 1674 } 1675 1676 frontvp = cp->c_frontvp; 1677 VN_HOLD(frontvp); 1678 mutex_exit(&cp->c_statelock); 1679 error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp, 1680 bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0, 1681 RLIM64_INFINITY, kcred, &resid); 1682 mutex_enter(&cp->c_statelock); 1683 VN_RELE(frontvp); 1684 frontvp = NULL; 1685 if (error) { 1686 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1687 cachefs_nocache(cp); 1688 error = 0; 1689 goto out; 1690 } else { 1691 goto out; 1692 } 1693 } 1694 1695 (void) cachefs_update_allocmap(cp, iooff, iolen); 1696 cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC | 1697 CN_POPULATION_PENDING); 1698 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1699 gethrestime(&cp->c_metadata.md_localmtime); 1700 cp->c_metadata.md_flags |= MD_LOCALMTIME; 1701 } 1702 1703 out: 1704 if (commit) { 1705 /* commit the log record */ 1706 ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED); 1707 if (cachefs_dlog_commit(fscp, commit, error)) { 1708 /*EMPTY*/ 1709 /* XXX fix on panic */ 1710 } 1711 } 1712 1713 if (error && commit) { 1714 cp->c_metadata.md_flags &= ~MD_PUTPAGE; 1715 cachefs_rlent_moveto(fscp->fs_cache, type, 1716 cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks); 1717 cp->c_metadata.md_rltype = type; 1718 cp->c_flags |= CN_UPDATED; 1719 } 1720 mutex_exit(&cp->c_statelock); 1721 return (error); 1722 } 1723 1724 /*ARGSUSED*/ 1725 static int 1726 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3, 1727 caller_context_t *ct) 1728 { 1729 return (ENOSYS); /* should we panic if we get here? */ 1730 } 1731 1732 /*ARGSUSED*/ 1733 static int 1734 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred, 1735 int *rvalp, caller_context_t *ct) 1736 { 1737 int error; 1738 struct cnode *cp = VTOC(vp); 1739 struct fscache *fscp = C_TO_FSCACHE(cp); 1740 struct cachefscache *cachep; 1741 extern kmutex_t cachefs_cachelock; 1742 extern cachefscache_t *cachefs_cachelist; 1743 cachefsio_pack_t *packp; 1744 STRUCT_DECL(cachefsio_dcmd, dcmd); 1745 int inlen, outlen; /* LP64: generic int for struct in/out len */ 1746 void *dinp, *doutp; 1747 int (*dcmd_routine)(vnode_t *, void *, void *); 1748 1749 if (getzoneid() != GLOBAL_ZONEID) 1750 return (EPERM); 1751 1752 /* 1753 * Cachefs only provides pass-through support for NFSv4, 1754 * and all vnode operations are passed through to the 1755 * back file system. For NFSv4 pass-through to work, only 1756 * connected operation is supported, the cnode backvp must 1757 * exist, and cachefs optional (eg., disconnectable) flags 1758 * are turned off. Assert these conditions which ensure 1759 * that only a subset of the ioctls are "truly supported" 1760 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS. 1761 * The packing operations are meaningless since there is 1762 * no caching for NFSv4, and the called functions silently 1763 * return if the backfilesystem is NFSv4. The daemon 1764 * commands except for those above are essentially used 1765 * for disconnectable operation support (including log 1766 * rolling), so in each called function, we assert that 1767 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD) 1768 * are from "cfsfstype" which is not a documented 1769 * command. However, the command is visible in 1770 * /usr/lib/fs/cachefs so the commands are simply let 1771 * through (don't seem to impact pass-through functionality). 1772 */ 1773 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 1774 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 1775 1776 switch (cmd) { 1777 case CACHEFSIO_PACK: 1778 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1779 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1780 if (!error) 1781 error = cachefs_pack(vp, packp->p_name, cred); 1782 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1783 break; 1784 1785 case CACHEFSIO_UNPACK: 1786 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1787 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1788 if (!error) 1789 error = cachefs_unpack(vp, packp->p_name, cred); 1790 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1791 break; 1792 1793 case CACHEFSIO_PACKINFO: 1794 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1795 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1796 if (!error) 1797 error = cachefs_packinfo(vp, packp->p_name, 1798 &packp->p_status, cred); 1799 if (!error) 1800 error = xcopyout(packp, (void *)arg, 1801 sizeof (cachefsio_pack_t)); 1802 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1803 break; 1804 1805 case CACHEFSIO_UNPACKALL: 1806 error = cachefs_unpackall(vp); 1807 break; 1808 1809 case CACHEFSIO_DCMD: 1810 /* 1811 * This is a private interface between the cachefsd and 1812 * this file system. 1813 */ 1814 1815 /* must be root to use these commands */ 1816 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) 1817 return (EPERM); 1818 1819 /* get the command packet */ 1820 STRUCT_INIT(dcmd, flag & DATAMODEL_MASK); 1821 error = xcopyin((void *)arg, STRUCT_BUF(dcmd), 1822 SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE)); 1823 if (error) 1824 return (error); 1825 1826 /* copy in the data for the operation */ 1827 dinp = NULL; 1828 if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) { 1829 dinp = cachefs_kmem_alloc(inlen, KM_SLEEP); 1830 error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp, 1831 inlen); 1832 if (error) 1833 return (error); 1834 } 1835 1836 /* allocate space for the result */ 1837 doutp = NULL; 1838 if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0) 1839 doutp = cachefs_kmem_alloc(outlen, KM_SLEEP); 1840 1841 /* 1842 * Assert NFSv4 only allows the daemonid and getstats 1843 * daemon requests 1844 */ 1845 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 || 1846 STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID || 1847 STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS); 1848 1849 /* get the routine to execute */ 1850 dcmd_routine = NULL; 1851 switch (STRUCT_FGET(dcmd, d_cmd)) { 1852 case CFSDCMD_DAEMONID: 1853 dcmd_routine = cachefs_io_daemonid; 1854 break; 1855 case CFSDCMD_STATEGET: 1856 dcmd_routine = cachefs_io_stateget; 1857 break; 1858 case CFSDCMD_STATESET: 1859 dcmd_routine = cachefs_io_stateset; 1860 break; 1861 case CFSDCMD_XWAIT: 1862 dcmd_routine = cachefs_io_xwait; 1863 break; 1864 case CFSDCMD_EXISTS: 1865 dcmd_routine = cachefs_io_exists; 1866 break; 1867 case CFSDCMD_LOSTFOUND: 1868 dcmd_routine = cachefs_io_lostfound; 1869 break; 1870 case CFSDCMD_GETINFO: 1871 dcmd_routine = cachefs_io_getinfo; 1872 break; 1873 case CFSDCMD_CIDTOFID: 1874 dcmd_routine = cachefs_io_cidtofid; 1875 break; 1876 case CFSDCMD_GETATTRFID: 1877 dcmd_routine = cachefs_io_getattrfid; 1878 break; 1879 case CFSDCMD_GETATTRNAME: 1880 dcmd_routine = cachefs_io_getattrname; 1881 break; 1882 case CFSDCMD_GETSTATS: 1883 dcmd_routine = cachefs_io_getstats; 1884 break; 1885 case CFSDCMD_ROOTFID: 1886 dcmd_routine = cachefs_io_rootfid; 1887 break; 1888 case CFSDCMD_CREATE: 1889 dcmd_routine = cachefs_io_create; 1890 break; 1891 case CFSDCMD_REMOVE: 1892 dcmd_routine = cachefs_io_remove; 1893 break; 1894 case CFSDCMD_LINK: 1895 dcmd_routine = cachefs_io_link; 1896 break; 1897 case CFSDCMD_RENAME: 1898 dcmd_routine = cachefs_io_rename; 1899 break; 1900 case CFSDCMD_MKDIR: 1901 dcmd_routine = cachefs_io_mkdir; 1902 break; 1903 case CFSDCMD_RMDIR: 1904 dcmd_routine = cachefs_io_rmdir; 1905 break; 1906 case CFSDCMD_SYMLINK: 1907 dcmd_routine = cachefs_io_symlink; 1908 break; 1909 case CFSDCMD_SETATTR: 1910 dcmd_routine = cachefs_io_setattr; 1911 break; 1912 case CFSDCMD_SETSECATTR: 1913 dcmd_routine = cachefs_io_setsecattr; 1914 break; 1915 case CFSDCMD_PUSHBACK: 1916 dcmd_routine = cachefs_io_pushback; 1917 break; 1918 default: 1919 error = ENOTTY; 1920 break; 1921 } 1922 1923 /* execute the routine */ 1924 if (dcmd_routine) 1925 error = (*dcmd_routine)(vp, dinp, doutp); 1926 1927 /* copy out the result */ 1928 if ((error == 0) && doutp) 1929 error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata), 1930 outlen); 1931 1932 /* free allocated memory */ 1933 if (dinp) 1934 cachefs_kmem_free(dinp, inlen); 1935 if (doutp) 1936 cachefs_kmem_free(doutp, outlen); 1937 1938 break; 1939 1940 case _FIOCOD: 1941 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) { 1942 error = EPERM; 1943 break; 1944 } 1945 1946 error = EBUSY; 1947 if (arg) { 1948 /* non-zero arg means do all filesystems */ 1949 mutex_enter(&cachefs_cachelock); 1950 for (cachep = cachefs_cachelist; cachep != NULL; 1951 cachep = cachep->c_next) { 1952 mutex_enter(&cachep->c_fslistlock); 1953 for (fscp = cachep->c_fslist; 1954 fscp != NULL; 1955 fscp = fscp->fs_next) { 1956 if (CFS_ISFS_CODCONST(fscp)) { 1957 gethrestime(&fscp->fs_cod_time); 1958 error = 0; 1959 } 1960 } 1961 mutex_exit(&cachep->c_fslistlock); 1962 } 1963 mutex_exit(&cachefs_cachelock); 1964 } else { 1965 if (CFS_ISFS_CODCONST(fscp)) { 1966 gethrestime(&fscp->fs_cod_time); 1967 error = 0; 1968 } 1969 } 1970 break; 1971 1972 case _FIOSTOPCACHE: 1973 error = cachefs_stop_cache(cp); 1974 break; 1975 1976 default: 1977 error = ENOTTY; 1978 break; 1979 } 1980 1981 /* return the result */ 1982 return (error); 1983 } 1984 1985 ino64_t 1986 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old) 1987 { 1988 ino64_t new; 1989 1990 ASSERT(MUTEX_HELD(&fscp->fs_fslock)); 1991 1992 for (;;) { 1993 fscp->fs_info.fi_localfileno++; 1994 if (fscp->fs_info.fi_localfileno == 0) 1995 fscp->fs_info.fi_localfileno = 3; 1996 fscp->fs_flags |= CFS_FS_DIRTYINFO; 1997 1998 new = fscp->fs_info.fi_localfileno; 1999 if (! cachefs_fileno_inuse(fscp, new)) 2000 break; 2001 } 2002 2003 cachefs_inum_register(fscp, old, new); 2004 cachefs_inum_register(fscp, new, 0); 2005 return (new); 2006 } 2007 2008 /*ARGSUSED*/ 2009 static int 2010 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2011 caller_context_t *ct) 2012 { 2013 struct cnode *cp = VTOC(vp); 2014 fscache_t *fscp = C_TO_FSCACHE(cp); 2015 int error = 0; 2016 int held = 0; 2017 int connected = 0; 2018 2019 #ifdef CFSDEBUG 2020 CFS_DEBUG(CFSDEBUG_VOPS) 2021 printf("cachefs_getattr: ENTER vp %p\n", (void *)vp); 2022 #endif 2023 2024 if (getzoneid() != GLOBAL_ZONEID) 2025 return (EPERM); 2026 2027 /* Call backfilesystem getattr if NFSv4 */ 2028 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 2029 error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct); 2030 goto out; 2031 } 2032 2033 /* 2034 * If it has been specified that the return value will 2035 * just be used as a hint, and we are only being asked 2036 * for size, fsid or rdevid, then return the client's 2037 * notion of these values without checking to make sure 2038 * that the attribute cache is up to date. 2039 * The whole point is to avoid an over the wire GETATTR 2040 * call. 2041 */ 2042 if (flags & ATTR_HINT) { 2043 if (vap->va_mask == 2044 (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) { 2045 if (vap->va_mask | AT_SIZE) 2046 vap->va_size = cp->c_size; 2047 /* 2048 * Return the FSID of the cachefs filesystem, 2049 * not the back filesystem 2050 */ 2051 if (vap->va_mask | AT_FSID) 2052 vap->va_fsid = vp->v_vfsp->vfs_dev; 2053 if (vap->va_mask | AT_RDEV) 2054 vap->va_rdev = cp->c_attr.va_rdev; 2055 return (0); 2056 } 2057 } 2058 2059 /* 2060 * Only need to flush pages if asking for the mtime 2061 * and if there any dirty pages. 2062 */ 2063 if (vap->va_mask & AT_MTIME) { 2064 /*EMPTY*/ 2065 #if 0 2066 /* 2067 * XXX bob: stolen from nfs code, need to do something similar 2068 */ 2069 rp = VTOR(vp); 2070 if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0) 2071 (void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr); 2072 #endif 2073 } 2074 2075 for (;;) { 2076 /* get (or renew) access to the file system */ 2077 if (held) { 2078 cachefs_cd_release(fscp); 2079 held = 0; 2080 } 2081 error = cachefs_cd_access(fscp, connected, 0); 2082 if (error) 2083 goto out; 2084 held = 1; 2085 2086 /* 2087 * If it has been specified that the return value will 2088 * just be used as a hint, and we are only being asked 2089 * for size, fsid or rdevid, then return the client's 2090 * notion of these values without checking to make sure 2091 * that the attribute cache is up to date. 2092 * The whole point is to avoid an over the wire GETATTR 2093 * call. 2094 */ 2095 if (flags & ATTR_HINT) { 2096 if (vap->va_mask == 2097 (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) { 2098 if (vap->va_mask | AT_SIZE) 2099 vap->va_size = cp->c_size; 2100 /* 2101 * Return the FSID of the cachefs filesystem, 2102 * not the back filesystem 2103 */ 2104 if (vap->va_mask | AT_FSID) 2105 vap->va_fsid = vp->v_vfsp->vfs_dev; 2106 if (vap->va_mask | AT_RDEV) 2107 vap->va_rdev = cp->c_attr.va_rdev; 2108 goto out; 2109 } 2110 } 2111 2112 mutex_enter(&cp->c_statelock); 2113 if ((cp->c_metadata.md_flags & MD_NEEDATTRS) && 2114 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 2115 mutex_exit(&cp->c_statelock); 2116 connected = 1; 2117 continue; 2118 } 2119 2120 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2121 if (CFS_TIMEOUT(fscp, error)) { 2122 mutex_exit(&cp->c_statelock); 2123 cachefs_cd_release(fscp); 2124 held = 0; 2125 cachefs_cd_timedout(fscp); 2126 continue; 2127 } 2128 if (error) { 2129 mutex_exit(&cp->c_statelock); 2130 break; 2131 } 2132 2133 /* check for fileno conflict */ 2134 if ((fscp->fs_inum_size > 0) && 2135 ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) { 2136 ino64_t fakenum; 2137 2138 mutex_exit(&cp->c_statelock); 2139 mutex_enter(&fscp->fs_fslock); 2140 fakenum = cachefs_inum_real2fake(fscp, 2141 cp->c_attr.va_nodeid); 2142 if (fakenum == 0) { 2143 fakenum = cachefs_fileno_conflict(fscp, 2144 cp->c_attr.va_nodeid); 2145 } 2146 mutex_exit(&fscp->fs_fslock); 2147 2148 mutex_enter(&cp->c_statelock); 2149 cp->c_metadata.md_flags |= MD_LOCALFILENO; 2150 cp->c_metadata.md_localfileno = fakenum; 2151 cp->c_flags |= CN_UPDATED; 2152 } 2153 2154 /* copy out the attributes */ 2155 *vap = cp->c_attr; 2156 2157 /* 2158 * return the FSID of the cachefs filesystem, 2159 * not the back filesystem 2160 */ 2161 vap->va_fsid = vp->v_vfsp->vfs_dev; 2162 2163 /* return our idea of the size */ 2164 if (cp->c_size > vap->va_size) 2165 vap->va_size = cp->c_size; 2166 2167 /* overwrite with our version of fileno and timestamps */ 2168 vap->va_nodeid = cp->c_metadata.md_localfileno; 2169 vap->va_mtime = cp->c_metadata.md_localmtime; 2170 vap->va_ctime = cp->c_metadata.md_localctime; 2171 2172 mutex_exit(&cp->c_statelock); 2173 break; 2174 } 2175 out: 2176 if (held) 2177 cachefs_cd_release(fscp); 2178 #ifdef CFS_CD_DEBUG 2179 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2180 #endif 2181 2182 #ifdef CFSDEBUG 2183 CFS_DEBUG(CFSDEBUG_VOPS) 2184 printf("cachefs_getattr: EXIT error = %d\n", error); 2185 #endif 2186 return (error); 2187 } 2188 2189 /* 2190 * cachefs_getattr_backfs_nfsv4 2191 * 2192 * Call NFSv4 back filesystem to handle the getattr (cachefs 2193 * pass-through support for NFSv4). 2194 */ 2195 static int 2196 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap, 2197 int flags, cred_t *cr, caller_context_t *ct) 2198 { 2199 cnode_t *cp = VTOC(vp); 2200 fscache_t *fscp = C_TO_FSCACHE(cp); 2201 vnode_t *backvp; 2202 int error; 2203 2204 /* 2205 * For NFSv4 pass-through to work, only connected operation 2206 * is supported, the cnode backvp must exist, and cachefs 2207 * optional (eg., disconnectable) flags are turned off. Assert 2208 * these conditions for the getattr operation. 2209 */ 2210 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2211 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2212 2213 /* Call backfs vnode op after extracting backvp */ 2214 mutex_enter(&cp->c_statelock); 2215 backvp = cp->c_backvp; 2216 mutex_exit(&cp->c_statelock); 2217 2218 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p," 2219 " backvp %p\n", cp, backvp)); 2220 error = VOP_GETATTR(backvp, vap, flags, cr, ct); 2221 2222 /* Update attributes */ 2223 cp->c_attr = *vap; 2224 2225 /* 2226 * return the FSID of the cachefs filesystem, 2227 * not the back filesystem 2228 */ 2229 vap->va_fsid = vp->v_vfsp->vfs_dev; 2230 2231 return (error); 2232 } 2233 2234 /*ARGSUSED4*/ 2235 static int 2236 cachefs_setattr( 2237 vnode_t *vp, 2238 vattr_t *vap, 2239 int flags, 2240 cred_t *cr, 2241 caller_context_t *ct) 2242 { 2243 cnode_t *cp = VTOC(vp); 2244 fscache_t *fscp = C_TO_FSCACHE(cp); 2245 int error; 2246 int connected; 2247 int held = 0; 2248 2249 if (getzoneid() != GLOBAL_ZONEID) 2250 return (EPERM); 2251 2252 /* 2253 * Cachefs only provides pass-through support for NFSv4, 2254 * and all vnode operations are passed through to the 2255 * back file system. For NFSv4 pass-through to work, only 2256 * connected operation is supported, the cnode backvp must 2257 * exist, and cachefs optional (eg., disconnectable) flags 2258 * are turned off. Assert these conditions to ensure that 2259 * the backfilesystem is called for the setattr operation. 2260 */ 2261 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2262 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2263 2264 connected = 0; 2265 for (;;) { 2266 /* drop hold on file system */ 2267 if (held) { 2268 /* Won't loop with NFSv4 connected behavior */ 2269 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2270 cachefs_cd_release(fscp); 2271 held = 0; 2272 } 2273 2274 /* acquire access to the file system */ 2275 error = cachefs_cd_access(fscp, connected, 1); 2276 if (error) 2277 break; 2278 held = 1; 2279 2280 /* perform the setattr */ 2281 error = cachefs_setattr_common(vp, vap, flags, cr, ct); 2282 if (error) { 2283 /* if connected */ 2284 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2285 if (CFS_TIMEOUT(fscp, error)) { 2286 cachefs_cd_release(fscp); 2287 held = 0; 2288 cachefs_cd_timedout(fscp); 2289 connected = 0; 2290 continue; 2291 } 2292 } 2293 2294 /* else must be disconnected */ 2295 else { 2296 if (CFS_TIMEOUT(fscp, error)) { 2297 connected = 1; 2298 continue; 2299 } 2300 } 2301 } 2302 break; 2303 } 2304 2305 if (held) { 2306 cachefs_cd_release(fscp); 2307 } 2308 #ifdef CFS_CD_DEBUG 2309 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2310 #endif 2311 return (error); 2312 } 2313 2314 static int 2315 cachefs_setattr_common( 2316 vnode_t *vp, 2317 vattr_t *vap, 2318 int flags, 2319 cred_t *cr, 2320 caller_context_t *ct) 2321 { 2322 cnode_t *cp = VTOC(vp); 2323 fscache_t *fscp = C_TO_FSCACHE(cp); 2324 cachefscache_t *cachep = fscp->fs_cache; 2325 uint_t mask = vap->va_mask; 2326 int error = 0; 2327 uint_t bcnt; 2328 2329 /* Cannot set these attributes. */ 2330 if (mask & AT_NOSET) 2331 return (EINVAL); 2332 2333 /* 2334 * Truncate file. Must have write permission and not be a directory. 2335 */ 2336 if (mask & AT_SIZE) { 2337 if (vp->v_type == VDIR) { 2338 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)) 2339 cachefs_log_truncate(cachep, EISDIR, 2340 fscp->fs_cfsvfsp, 2341 &cp->c_metadata.md_cookie, 2342 cp->c_id.cid_fileno, 2343 crgetuid(cr), vap->va_size); 2344 return (EISDIR); 2345 } 2346 } 2347 2348 /* 2349 * Gotta deal with one special case here, where we're setting the 2350 * size of the file. First, we zero out part of the page after the 2351 * new size of the file. Then we toss (not write) all pages after 2352 * page in which the new offset occurs. Note that the NULL passed 2353 * in instead of a putapage() fn parameter is correct, since 2354 * no dirty pages will be found (B_TRUNC | B_INVAL). 2355 */ 2356 2357 rw_enter(&cp->c_rwlock, RW_WRITER); 2358 2359 /* sync dirty pages */ 2360 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) { 2361 error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr); 2362 if (error == EINTR) 2363 goto out; 2364 } 2365 error = 0; 2366 2367 /* if connected */ 2368 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2369 error = cachefs_setattr_connected(vp, vap, flags, cr, ct); 2370 } 2371 /* else must be disconnected */ 2372 else { 2373 error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct); 2374 } 2375 if (error) 2376 goto out; 2377 2378 /* 2379 * If the file size has been changed then 2380 * toss whole pages beyond the end of the file and zero 2381 * the portion of the last page that is beyond the end of the file. 2382 */ 2383 if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) { 2384 bcnt = (uint_t)(cp->c_size & PAGEOFFSET); 2385 if (bcnt) 2386 pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt); 2387 (void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push, 2388 B_TRUNC | B_INVAL, cr); 2389 } 2390 2391 out: 2392 rw_exit(&cp->c_rwlock); 2393 2394 if ((mask & AT_SIZE) && 2395 (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))) 2396 cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp, 2397 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 2398 crgetuid(cr), vap->va_size); 2399 2400 return (error); 2401 } 2402 2403 static int 2404 cachefs_setattr_connected( 2405 vnode_t *vp, 2406 vattr_t *vap, 2407 int flags, 2408 cred_t *cr, 2409 caller_context_t *ct) 2410 { 2411 cnode_t *cp = VTOC(vp); 2412 fscache_t *fscp = C_TO_FSCACHE(cp); 2413 uint_t mask = vap->va_mask; 2414 int error = 0; 2415 int setsize; 2416 2417 mutex_enter(&cp->c_statelock); 2418 2419 if (cp->c_backvp == NULL) { 2420 error = cachefs_getbackvp(fscp, cp); 2421 if (error) 2422 goto out; 2423 } 2424 2425 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2426 if (error) 2427 goto out; 2428 2429 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, " 2430 "backvp %p\n", cp, cp->c_backvp)); 2431 error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct); 2432 if (error) { 2433 goto out; 2434 } 2435 2436 /* if the size of the file is being changed */ 2437 if (mask & AT_SIZE) { 2438 cp->c_size = vap->va_size; 2439 error = 0; 2440 setsize = 0; 2441 2442 /* see if okay to try to set the file size */ 2443 if (((cp->c_flags & CN_NOCACHE) == 0) && 2444 CFS_ISFS_NONSHARED(fscp)) { 2445 /* okay to set size if file is populated */ 2446 if (cp->c_metadata.md_flags & MD_POPULATED) 2447 setsize = 1; 2448 2449 /* 2450 * Okay to set size if front file exists and setting 2451 * file size to zero. 2452 */ 2453 if ((cp->c_metadata.md_flags & MD_FILE) && 2454 (vap->va_size == 0)) 2455 setsize = 1; 2456 } 2457 2458 /* if okay to try to set the file size */ 2459 if (setsize) { 2460 error = 0; 2461 if (cp->c_frontvp == NULL) 2462 error = cachefs_getfrontfile(cp); 2463 if (error == 0) 2464 error = cachefs_frontfile_size(cp, cp->c_size); 2465 } else if (cp->c_metadata.md_flags & MD_FILE) { 2466 /* make sure file gets nocached */ 2467 error = EEXIST; 2468 } 2469 2470 /* if we have to nocache the file */ 2471 if (error) { 2472 if ((cp->c_flags & CN_NOCACHE) == 0 && 2473 !CFS_ISFS_BACKFS_NFSV4(fscp)) 2474 cachefs_nocache(cp); 2475 error = 0; 2476 } 2477 } 2478 2479 cp->c_flags |= CN_UPDATED; 2480 2481 /* XXX bob: given what modify_cobject does this seems unnecessary */ 2482 cp->c_attr.va_mask = AT_ALL; 2483 error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct); 2484 if (error) 2485 goto out; 2486 2487 cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size); 2488 cp->c_size = cp->c_attr.va_size; 2489 2490 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 2491 out: 2492 mutex_exit(&cp->c_statelock); 2493 return (error); 2494 } 2495 2496 /* 2497 * perform the setattr on the local file system 2498 */ 2499 /*ARGSUSED4*/ 2500 static int 2501 cachefs_setattr_disconnected( 2502 vnode_t *vp, 2503 vattr_t *vap, 2504 int flags, 2505 cred_t *cr, 2506 caller_context_t *ct) 2507 { 2508 cnode_t *cp = VTOC(vp); 2509 fscache_t *fscp = C_TO_FSCACHE(cp); 2510 int mask; 2511 int error; 2512 int newfile; 2513 off_t commit = 0; 2514 2515 if (CFS_ISFS_WRITE_AROUND(fscp)) 2516 return (ETIMEDOUT); 2517 2518 /* if we do not have good attributes */ 2519 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 2520 return (ETIMEDOUT); 2521 2522 /* primary concern is to keep this routine as much like ufs_setattr */ 2523 2524 mutex_enter(&cp->c_statelock); 2525 2526 error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags, 2527 cachefs_access_local, cp); 2528 2529 if (error) 2530 goto out; 2531 2532 mask = vap->va_mask; 2533 2534 /* if changing the size of the file */ 2535 if (mask & AT_SIZE) { 2536 if (vp->v_type == VDIR) { 2537 error = EISDIR; 2538 goto out; 2539 } 2540 2541 if (vp->v_type == VFIFO) { 2542 error = 0; 2543 goto out; 2544 } 2545 2546 if ((vp->v_type != VREG) && 2547 !((vp->v_type == VLNK) && (vap->va_size == 0))) { 2548 error = EINVAL; 2549 goto out; 2550 } 2551 2552 if (vap->va_size > fscp->fs_offmax) { 2553 error = EFBIG; 2554 goto out; 2555 } 2556 2557 /* if the file is not populated and we are not truncating it */ 2558 if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) && 2559 (vap->va_size != 0)) { 2560 error = ETIMEDOUT; 2561 goto out; 2562 } 2563 2564 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2565 error = cachefs_dlog_cidmap(fscp); 2566 if (error) { 2567 error = ENOSPC; 2568 goto out; 2569 } 2570 cp->c_metadata.md_flags |= MD_MAPPING; 2571 } 2572 2573 /* log the operation */ 2574 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2575 if (commit == 0) { 2576 error = ENOSPC; 2577 goto out; 2578 } 2579 cp->c_flags &= ~CN_NOCACHE; 2580 2581 /* special case truncating fast sym links */ 2582 if ((vp->v_type == VLNK) && 2583 (cp->c_metadata.md_flags & MD_FASTSYMLNK)) { 2584 /* XXX how can we get here */ 2585 /* XXX should update mtime */ 2586 cp->c_size = 0; 2587 error = 0; 2588 goto out; 2589 } 2590 2591 /* get the front file, this may create one */ 2592 newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1; 2593 if (cp->c_frontvp == NULL) { 2594 error = cachefs_getfrontfile(cp); 2595 if (error) 2596 goto out; 2597 } 2598 ASSERT(cp->c_frontvp); 2599 if (newfile && (cp->c_flags & CN_UPDATED)) { 2600 /* allocate space for the metadata */ 2601 ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0); 2602 ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) 2603 == 0); 2604 error = filegrp_write_metadata(cp->c_filegrp, 2605 &cp->c_id, &cp->c_metadata); 2606 if (error) 2607 goto out; 2608 } 2609 2610 /* change the size of the front file */ 2611 error = cachefs_frontfile_size(cp, vap->va_size); 2612 if (error) 2613 goto out; 2614 cp->c_attr.va_size = cp->c_size = vap->va_size; 2615 gethrestime(&cp->c_metadata.md_localmtime); 2616 cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME; 2617 cachefs_modified(cp); 2618 cp->c_flags |= CN_UPDATED; 2619 } 2620 2621 if (mask & AT_MODE) { 2622 /* mark as modified */ 2623 if (cachefs_modified_alloc(cp)) { 2624 error = ENOSPC; 2625 goto out; 2626 } 2627 2628 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2629 error = cachefs_dlog_cidmap(fscp); 2630 if (error) { 2631 error = ENOSPC; 2632 goto out; 2633 } 2634 cp->c_metadata.md_flags |= MD_MAPPING; 2635 } 2636 2637 /* log the operation if not already logged */ 2638 if (commit == 0) { 2639 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2640 if (commit == 0) { 2641 error = ENOSPC; 2642 goto out; 2643 } 2644 } 2645 2646 cp->c_attr.va_mode &= S_IFMT; 2647 cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT; 2648 gethrestime(&cp->c_metadata.md_localctime); 2649 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2650 cp->c_flags |= CN_UPDATED; 2651 } 2652 2653 if (mask & (AT_UID|AT_GID)) { 2654 2655 /* mark as modified */ 2656 if (cachefs_modified_alloc(cp)) { 2657 error = ENOSPC; 2658 goto out; 2659 } 2660 2661 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2662 error = cachefs_dlog_cidmap(fscp); 2663 if (error) { 2664 error = ENOSPC; 2665 goto out; 2666 } 2667 cp->c_metadata.md_flags |= MD_MAPPING; 2668 } 2669 2670 /* log the operation if not already logged */ 2671 if (commit == 0) { 2672 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2673 if (commit == 0) { 2674 error = ENOSPC; 2675 goto out; 2676 } 2677 } 2678 2679 if (mask & AT_UID) 2680 cp->c_attr.va_uid = vap->va_uid; 2681 2682 if (mask & AT_GID) 2683 cp->c_attr.va_gid = vap->va_gid; 2684 gethrestime(&cp->c_metadata.md_localctime); 2685 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2686 cp->c_flags |= CN_UPDATED; 2687 } 2688 2689 2690 if (mask & (AT_MTIME|AT_ATIME)) { 2691 /* mark as modified */ 2692 if (cachefs_modified_alloc(cp)) { 2693 error = ENOSPC; 2694 goto out; 2695 } 2696 2697 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2698 error = cachefs_dlog_cidmap(fscp); 2699 if (error) { 2700 error = ENOSPC; 2701 goto out; 2702 } 2703 cp->c_metadata.md_flags |= MD_MAPPING; 2704 } 2705 2706 /* log the operation if not already logged */ 2707 if (commit == 0) { 2708 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2709 if (commit == 0) { 2710 error = ENOSPC; 2711 goto out; 2712 } 2713 } 2714 2715 if (mask & AT_MTIME) { 2716 cp->c_metadata.md_localmtime = vap->va_mtime; 2717 cp->c_metadata.md_flags |= MD_LOCALMTIME; 2718 } 2719 if (mask & AT_ATIME) 2720 cp->c_attr.va_atime = vap->va_atime; 2721 gethrestime(&cp->c_metadata.md_localctime); 2722 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2723 cp->c_flags |= CN_UPDATED; 2724 } 2725 2726 out: 2727 mutex_exit(&cp->c_statelock); 2728 2729 /* commit the log entry */ 2730 if (commit) { 2731 if (cachefs_dlog_commit(fscp, commit, error)) { 2732 /*EMPTY*/ 2733 /* XXX bob: fix on panic */ 2734 } 2735 } 2736 return (error); 2737 } 2738 2739 /* ARGSUSED */ 2740 static int 2741 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr, 2742 caller_context_t *ct) 2743 { 2744 cnode_t *cp = VTOC(vp); 2745 fscache_t *fscp = C_TO_FSCACHE(cp); 2746 int error; 2747 int held = 0; 2748 int connected = 0; 2749 2750 #ifdef CFSDEBUG 2751 CFS_DEBUG(CFSDEBUG_VOPS) 2752 printf("cachefs_access: ENTER vp %p\n", (void *)vp); 2753 #endif 2754 if (getzoneid() != GLOBAL_ZONEID) { 2755 error = EPERM; 2756 goto out; 2757 } 2758 2759 /* 2760 * Cachefs only provides pass-through support for NFSv4, 2761 * and all vnode operations are passed through to the 2762 * back file system. For NFSv4 pass-through to work, only 2763 * connected operation is supported, the cnode backvp must 2764 * exist, and cachefs optional (eg., disconnectable) flags 2765 * are turned off. Assert these conditions to ensure that 2766 * the backfilesystem is called for the access operation. 2767 */ 2768 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2769 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2770 2771 for (;;) { 2772 /* get (or renew) access to the file system */ 2773 if (held) { 2774 /* Won't loop with NFSv4 connected behavior */ 2775 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2776 cachefs_cd_release(fscp); 2777 held = 0; 2778 } 2779 error = cachefs_cd_access(fscp, connected, 0); 2780 if (error) 2781 break; 2782 held = 1; 2783 2784 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2785 error = cachefs_access_connected(vp, mode, flags, 2786 cr); 2787 if (CFS_TIMEOUT(fscp, error)) { 2788 cachefs_cd_release(fscp); 2789 held = 0; 2790 cachefs_cd_timedout(fscp); 2791 connected = 0; 2792 continue; 2793 } 2794 } else { 2795 mutex_enter(&cp->c_statelock); 2796 error = cachefs_access_local(cp, mode, cr); 2797 mutex_exit(&cp->c_statelock); 2798 if (CFS_TIMEOUT(fscp, error)) { 2799 if (cachefs_cd_access_miss(fscp)) { 2800 mutex_enter(&cp->c_statelock); 2801 if (cp->c_backvp == NULL) { 2802 (void) cachefs_getbackvp(fscp, 2803 cp); 2804 } 2805 mutex_exit(&cp->c_statelock); 2806 error = cachefs_access_connected(vp, 2807 mode, flags, cr); 2808 if (!CFS_TIMEOUT(fscp, error)) 2809 break; 2810 delay(5*hz); 2811 connected = 0; 2812 continue; 2813 } 2814 connected = 1; 2815 continue; 2816 } 2817 } 2818 break; 2819 } 2820 if (held) 2821 cachefs_cd_release(fscp); 2822 #ifdef CFS_CD_DEBUG 2823 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2824 #endif 2825 out: 2826 #ifdef CFSDEBUG 2827 CFS_DEBUG(CFSDEBUG_VOPS) 2828 printf("cachefs_access: EXIT error = %d\n", error); 2829 #endif 2830 return (error); 2831 } 2832 2833 static int 2834 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr) 2835 { 2836 cnode_t *cp = VTOC(vp); 2837 fscache_t *fscp = C_TO_FSCACHE(cp); 2838 int error = 0; 2839 2840 mutex_enter(&cp->c_statelock); 2841 2842 /* Make sure the cnode attrs are valid first. */ 2843 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2844 if (error) 2845 goto out; 2846 2847 /* see if can do a local file system check */ 2848 if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 && 2849 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 2850 error = cachefs_access_local(cp, mode, cr); 2851 goto out; 2852 } 2853 2854 /* else do a remote file system check */ 2855 else { 2856 if (cp->c_backvp == NULL) { 2857 error = cachefs_getbackvp(fscp, cp); 2858 if (error) 2859 goto out; 2860 } 2861 2862 CFS_DPRINT_BACKFS_NFSV4(fscp, 2863 ("cachefs_access (nfsv4): cnode %p, backvp %p\n", 2864 cp, cp->c_backvp)); 2865 error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL); 2866 2867 /* 2868 * even though we don't `need' the ACL to do access 2869 * via the backvp, we should cache it here to make our 2870 * behavior more reasonable if we go disconnected. 2871 */ 2872 2873 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 2874 (cachefs_vtype_aclok(vp)) && 2875 ((cp->c_flags & CN_NOCACHE) == 0) && 2876 (!CFS_ISFS_BACKFS_NFSV4(fscp)) && 2877 ((cp->c_metadata.md_flags & MD_ACL) == 0)) 2878 (void) cachefs_cacheacl(cp, NULL); 2879 } 2880 out: 2881 /* 2882 * If NFS returned ESTALE, mark this cnode as stale, so that 2883 * the vn_open retry will read the file anew from backfs 2884 */ 2885 if (error == ESTALE) 2886 cachefs_cnode_stale(cp); 2887 2888 mutex_exit(&cp->c_statelock); 2889 return (error); 2890 } 2891 2892 /* 2893 * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then 2894 * the link is placed in the metadata itself (no front file is allocated). 2895 */ 2896 /*ARGSUSED*/ 2897 static int 2898 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 2899 { 2900 int error = 0; 2901 cnode_t *cp = VTOC(vp); 2902 fscache_t *fscp = C_TO_FSCACHE(cp); 2903 cachefscache_t *cachep = fscp->fs_cache; 2904 int held = 0; 2905 int connected = 0; 2906 2907 if (getzoneid() != GLOBAL_ZONEID) 2908 return (EPERM); 2909 2910 if (vp->v_type != VLNK) 2911 return (EINVAL); 2912 2913 /* 2914 * Cachefs only provides pass-through support for NFSv4, 2915 * and all vnode operations are passed through to the 2916 * back file system. For NFSv4 pass-through to work, only 2917 * connected operation is supported, the cnode backvp must 2918 * exist, and cachefs optional (eg., disconnectable) flags 2919 * are turned off. Assert these conditions to ensure that 2920 * the backfilesystem is called for the readlink operation. 2921 */ 2922 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2923 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2924 2925 for (;;) { 2926 /* get (or renew) access to the file system */ 2927 if (held) { 2928 /* Won't loop with NFSv4 connected behavior */ 2929 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2930 cachefs_cd_release(fscp); 2931 held = 0; 2932 } 2933 error = cachefs_cd_access(fscp, connected, 0); 2934 if (error) 2935 break; 2936 held = 1; 2937 2938 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2939 /* 2940 * since readlink_connected will call stuffsymlink 2941 * on success, have to serialize access 2942 */ 2943 if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) { 2944 cachefs_cd_release(fscp); 2945 rw_enter(&cp->c_rwlock, RW_WRITER); 2946 error = cachefs_cd_access(fscp, connected, 0); 2947 if (error) { 2948 held = 0; 2949 rw_exit(&cp->c_rwlock); 2950 break; 2951 } 2952 } 2953 error = cachefs_readlink_connected(vp, uiop, cr); 2954 rw_exit(&cp->c_rwlock); 2955 if (CFS_TIMEOUT(fscp, error)) { 2956 cachefs_cd_release(fscp); 2957 held = 0; 2958 cachefs_cd_timedout(fscp); 2959 connected = 0; 2960 continue; 2961 } 2962 } else { 2963 error = cachefs_readlink_disconnected(vp, uiop); 2964 if (CFS_TIMEOUT(fscp, error)) { 2965 if (cachefs_cd_access_miss(fscp)) { 2966 /* as above */ 2967 if (!rw_tryenter(&cp->c_rwlock, 2968 RW_WRITER)) { 2969 cachefs_cd_release(fscp); 2970 rw_enter(&cp->c_rwlock, 2971 RW_WRITER); 2972 error = cachefs_cd_access(fscp, 2973 connected, 0); 2974 if (error) { 2975 held = 0; 2976 rw_exit(&cp->c_rwlock); 2977 break; 2978 } 2979 } 2980 error = cachefs_readlink_connected(vp, 2981 uiop, cr); 2982 rw_exit(&cp->c_rwlock); 2983 if (!CFS_TIMEOUT(fscp, error)) 2984 break; 2985 delay(5*hz); 2986 connected = 0; 2987 continue; 2988 } 2989 connected = 1; 2990 continue; 2991 } 2992 } 2993 break; 2994 } 2995 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK)) 2996 cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp, 2997 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 2998 crgetuid(cr), cp->c_size); 2999 3000 if (held) 3001 cachefs_cd_release(fscp); 3002 #ifdef CFS_CD_DEBUG 3003 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3004 #endif 3005 3006 /* 3007 * The over the wire error for attempting to readlink something 3008 * other than a symbolic link is ENXIO. However, we need to 3009 * return EINVAL instead of ENXIO, so we map it here. 3010 */ 3011 return (error == ENXIO ? EINVAL : error); 3012 } 3013 3014 static int 3015 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr) 3016 { 3017 int error; 3018 cnode_t *cp = VTOC(vp); 3019 fscache_t *fscp = C_TO_FSCACHE(cp); 3020 caddr_t buf; 3021 int buflen; 3022 int readcache = 0; 3023 3024 mutex_enter(&cp->c_statelock); 3025 3026 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 3027 if (error) 3028 goto out; 3029 3030 /* if the sym link is cached as a fast sym link */ 3031 if (cp->c_metadata.md_flags & MD_FASTSYMLNK) { 3032 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3033 error = uiomove(cp->c_metadata.md_allocinfo, 3034 MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop); 3035 #ifdef CFSDEBUG 3036 readcache = 1; 3037 goto out; 3038 #else /* CFSDEBUG */ 3039 /* XXX KLUDGE! correct for insidious 0-len symlink */ 3040 if (cp->c_size != 0) { 3041 readcache = 1; 3042 goto out; 3043 } 3044 #endif /* CFSDEBUG */ 3045 } 3046 3047 /* if the sym link is cached in a front file */ 3048 if (cp->c_metadata.md_flags & MD_POPULATED) { 3049 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3050 ASSERT(cp->c_metadata.md_flags & MD_FILE); 3051 if (cp->c_frontvp == NULL) { 3052 (void) cachefs_getfrontfile(cp); 3053 } 3054 if (cp->c_metadata.md_flags & MD_POPULATED) { 3055 /* read symlink data from frontfile */ 3056 uiop->uio_offset = 0; 3057 (void) VOP_RWLOCK(cp->c_frontvp, 3058 V_WRITELOCK_FALSE, NULL); 3059 error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL); 3060 VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL); 3061 3062 /* XXX KLUDGE! correct for insidious 0-len symlink */ 3063 if (cp->c_size != 0) { 3064 readcache = 1; 3065 goto out; 3066 } 3067 } 3068 } 3069 3070 /* get the sym link contents from the back fs */ 3071 error = cachefs_readlink_back(cp, cr, &buf, &buflen); 3072 if (error) 3073 goto out; 3074 3075 /* copy the contents out to the user */ 3076 error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop); 3077 3078 /* 3079 * try to cache the sym link, note that its a noop if NOCACHE is set 3080 * or if NFSv4 pass-through is enabled. 3081 */ 3082 if (cachefs_stuffsymlink(cp, buf, buflen)) { 3083 cachefs_nocache(cp); 3084 } 3085 3086 cachefs_kmem_free(buf, MAXPATHLEN); 3087 3088 out: 3089 mutex_exit(&cp->c_statelock); 3090 if (error == 0) { 3091 if (readcache) 3092 fscp->fs_stats.st_hits++; 3093 else 3094 fscp->fs_stats.st_misses++; 3095 } 3096 return (error); 3097 } 3098 3099 static int 3100 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop) 3101 { 3102 int error; 3103 cnode_t *cp = VTOC(vp); 3104 fscache_t *fscp = C_TO_FSCACHE(cp); 3105 int readcache = 0; 3106 3107 mutex_enter(&cp->c_statelock); 3108 3109 /* if the sym link is cached as a fast sym link */ 3110 if (cp->c_metadata.md_flags & MD_FASTSYMLNK) { 3111 error = uiomove(cp->c_metadata.md_allocinfo, 3112 MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop); 3113 readcache = 1; 3114 goto out; 3115 } 3116 3117 /* if the sym link is cached in a front file */ 3118 if (cp->c_metadata.md_flags & MD_POPULATED) { 3119 ASSERT(cp->c_metadata.md_flags & MD_FILE); 3120 if (cp->c_frontvp == NULL) { 3121 (void) cachefs_getfrontfile(cp); 3122 } 3123 if (cp->c_metadata.md_flags & MD_POPULATED) { 3124 /* read symlink data from frontfile */ 3125 uiop->uio_offset = 0; 3126 (void) VOP_RWLOCK(cp->c_frontvp, 3127 V_WRITELOCK_FALSE, NULL); 3128 error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL); 3129 VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL); 3130 readcache = 1; 3131 goto out; 3132 } 3133 } 3134 error = ETIMEDOUT; 3135 3136 out: 3137 mutex_exit(&cp->c_statelock); 3138 if (error == 0) { 3139 if (readcache) 3140 fscp->fs_stats.st_hits++; 3141 else 3142 fscp->fs_stats.st_misses++; 3143 } 3144 return (error); 3145 } 3146 3147 /*ARGSUSED*/ 3148 static int 3149 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 3150 { 3151 cnode_t *cp = VTOC(vp); 3152 int error = 0; 3153 fscache_t *fscp = C_TO_FSCACHE(cp); 3154 int held = 0; 3155 int connected = 0; 3156 3157 #ifdef CFSDEBUG 3158 CFS_DEBUG(CFSDEBUG_VOPS) 3159 printf("cachefs_fsync: ENTER vp %p\n", (void *)vp); 3160 #endif 3161 3162 if (getzoneid() != GLOBAL_ZONEID) { 3163 error = EPERM; 3164 goto out; 3165 } 3166 3167 if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY) 3168 goto out; 3169 3170 /* 3171 * Cachefs only provides pass-through support for NFSv4, 3172 * and all vnode operations are passed through to the 3173 * back file system. For NFSv4 pass-through to work, only 3174 * connected operation is supported, the cnode backvp must 3175 * exist, and cachefs optional (eg., disconnectable) flags 3176 * are turned off. Assert these conditions to ensure that 3177 * the backfilesystem is called for the fsync operation. 3178 */ 3179 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3180 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 3181 3182 for (;;) { 3183 /* get (or renew) access to the file system */ 3184 if (held) { 3185 /* Won't loop with NFSv4 connected behavior */ 3186 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3187 cachefs_cd_release(fscp); 3188 held = 0; 3189 } 3190 error = cachefs_cd_access(fscp, connected, 1); 3191 if (error) 3192 break; 3193 held = 1; 3194 connected = 0; 3195 3196 /* if a regular file, write out the pages */ 3197 if ((vp->v_type == VREG) && vn_has_cached_data(vp) && 3198 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 3199 error = cachefs_putpage_common(vp, (offset_t)0, 3200 0, 0, cr); 3201 if (CFS_TIMEOUT(fscp, error)) { 3202 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3203 cachefs_cd_release(fscp); 3204 held = 0; 3205 cachefs_cd_timedout(fscp); 3206 continue; 3207 } else { 3208 connected = 1; 3209 continue; 3210 } 3211 } 3212 3213 /* if no space left in cache, wait until connected */ 3214 if ((error == ENOSPC) && 3215 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 3216 connected = 1; 3217 continue; 3218 } 3219 3220 /* clear the cnode error if putpage worked */ 3221 if ((error == 0) && cp->c_error) { 3222 mutex_enter(&cp->c_statelock); 3223 cp->c_error = 0; 3224 mutex_exit(&cp->c_statelock); 3225 } 3226 3227 if (error) 3228 break; 3229 } 3230 3231 /* if connected, sync the backvp */ 3232 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) && 3233 cp->c_backvp) { 3234 mutex_enter(&cp->c_statelock); 3235 if (cp->c_backvp) { 3236 CFS_DPRINT_BACKFS_NFSV4(fscp, 3237 ("cachefs_fsync (nfsv4): cnode %p, " 3238 "backvp %p\n", cp, cp->c_backvp)); 3239 error = VOP_FSYNC(cp->c_backvp, syncflag, cr, 3240 ct); 3241 if (CFS_TIMEOUT(fscp, error)) { 3242 mutex_exit(&cp->c_statelock); 3243 cachefs_cd_release(fscp); 3244 held = 0; 3245 cachefs_cd_timedout(fscp); 3246 continue; 3247 } else if (error && (error != EINTR)) 3248 cp->c_error = error; 3249 } 3250 mutex_exit(&cp->c_statelock); 3251 } 3252 3253 /* sync the metadata and the front file to the front fs */ 3254 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) { 3255 error = cachefs_sync_metadata(cp); 3256 if (error && 3257 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) 3258 error = 0; 3259 } 3260 break; 3261 } 3262 3263 if (error == 0) 3264 error = cp->c_error; 3265 3266 if (held) 3267 cachefs_cd_release(fscp); 3268 3269 out: 3270 #ifdef CFS_CD_DEBUG 3271 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3272 #endif 3273 3274 #ifdef CFSDEBUG 3275 CFS_DEBUG(CFSDEBUG_VOPS) 3276 printf("cachefs_fsync: EXIT vp %p\n", (void *)vp); 3277 #endif 3278 return (error); 3279 } 3280 3281 /* 3282 * Called from cachefs_inactive(), to make sure all the data goes out to disk. 3283 */ 3284 int 3285 cachefs_sync_metadata(cnode_t *cp) 3286 { 3287 int error = 0; 3288 struct filegrp *fgp; 3289 struct vattr va; 3290 fscache_t *fscp = C_TO_FSCACHE(cp); 3291 3292 #ifdef CFSDEBUG 3293 CFS_DEBUG(CFSDEBUG_VOPS) 3294 printf("c_sync_metadata: ENTER cp %p cflag %x\n", 3295 (void *)cp, cp->c_flags); 3296 #endif 3297 3298 mutex_enter(&cp->c_statelock); 3299 if ((cp->c_flags & CN_UPDATED) == 0) 3300 goto out; 3301 if (cp->c_flags & (CN_STALE | CN_DESTROY)) 3302 goto out; 3303 fgp = cp->c_filegrp; 3304 if ((fgp->fg_flags & CFS_FG_WRITE) == 0) 3305 goto out; 3306 if (CFS_ISFS_BACKFS_NFSV4(fscp)) 3307 goto out; 3308 3309 if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) { 3310 mutex_exit(&cp->c_statelock); 3311 error = filegrp_allocattr(fgp); 3312 mutex_enter(&cp->c_statelock); 3313 if (error) { 3314 error = 0; 3315 goto out; 3316 } 3317 } 3318 3319 if (cp->c_flags & CN_ALLOC_PENDING) { 3320 error = filegrp_create_metadata(fgp, &cp->c_metadata, 3321 &cp->c_id); 3322 if (error) 3323 goto out; 3324 cp->c_flags &= ~CN_ALLOC_PENDING; 3325 } 3326 3327 if (cp->c_flags & CN_NEED_FRONT_SYNC) { 3328 if (cp->c_frontvp != NULL) { 3329 error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL); 3330 if (error) { 3331 cp->c_metadata.md_timestamp.tv_sec = 0; 3332 } else { 3333 va.va_mask = AT_MTIME; 3334 error = VOP_GETATTR(cp->c_frontvp, &va, 0, 3335 kcred, NULL); 3336 if (error) 3337 goto out; 3338 cp->c_metadata.md_timestamp = va.va_mtime; 3339 cp->c_flags &= 3340 ~(CN_NEED_FRONT_SYNC | 3341 CN_POPULATION_PENDING); 3342 } 3343 } else { 3344 cp->c_flags &= 3345 ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING); 3346 } 3347 } 3348 3349 /* 3350 * XXX tony: How can CN_ALLOC_PENDING still be set?? 3351 * XXX tony: How can CN_UPDATED not be set????? 3352 */ 3353 if ((cp->c_flags & CN_ALLOC_PENDING) == 0 && 3354 (cp->c_flags & CN_UPDATED)) { 3355 error = filegrp_write_metadata(fgp, &cp->c_id, 3356 &cp->c_metadata); 3357 if (error) 3358 goto out; 3359 } 3360 out: 3361 if (error) { 3362 /* XXX modified files? */ 3363 if (cp->c_metadata.md_rlno) { 3364 cachefs_removefrontfile(&cp->c_metadata, 3365 &cp->c_id, fgp); 3366 cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache, 3367 CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0); 3368 cp->c_metadata.md_rlno = 0; 3369 cp->c_metadata.md_rltype = CACHEFS_RL_NONE; 3370 if (cp->c_frontvp) { 3371 VN_RELE(cp->c_frontvp); 3372 cp->c_frontvp = NULL; 3373 } 3374 } 3375 if ((cp->c_flags & CN_ALLOC_PENDING) == 0) 3376 (void) filegrp_destroy_metadata(fgp, &cp->c_id); 3377 cp->c_flags |= CN_ALLOC_PENDING; 3378 cachefs_nocache(cp); 3379 } 3380 /* 3381 * we clear the updated bit even on errors because a retry 3382 * will probably fail also. 3383 */ 3384 cp->c_flags &= ~CN_UPDATED; 3385 mutex_exit(&cp->c_statelock); 3386 3387 #ifdef CFSDEBUG 3388 CFS_DEBUG(CFSDEBUG_VOPS) 3389 printf("c_sync_metadata: EXIT cp %p cflag %x\n", 3390 (void *)cp, cp->c_flags); 3391 #endif 3392 3393 return (error); 3394 } 3395 3396 /* 3397 * This is the vop entry point for inactivating a vnode. 3398 * It just queues the request for the async thread which 3399 * calls cachefs_inactive. 3400 * Because of the dnlc, it is not safe to grab most locks here. 3401 */ 3402 /*ARGSUSED*/ 3403 static void 3404 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct) 3405 { 3406 cnode_t *cp; 3407 struct cachefs_req *rp; 3408 fscache_t *fscp; 3409 3410 #ifdef CFSDEBUG 3411 CFS_DEBUG(CFSDEBUG_VOPS) 3412 printf("cachefs_inactive: ENTER vp %p\n", (void *)vp); 3413 #endif 3414 3415 cp = VTOC(vp); 3416 fscp = C_TO_FSCACHE(cp); 3417 3418 ASSERT((cp->c_flags & CN_IDLE) == 0); 3419 3420 /* 3421 * Cachefs only provides pass-through support for NFSv4, 3422 * and all vnode operations are passed through to the 3423 * back file system. For NFSv4 pass-through to work, only 3424 * connected operation is supported, the cnode backvp must 3425 * exist, and cachefs optional (eg., disconnectable) flags 3426 * are turned off. Assert these conditions to ensure that 3427 * the backfilesystem is called for the inactive operation. 3428 */ 3429 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3430 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 3431 3432 /* vn_rele() set the v_count == 1 */ 3433 3434 cp->c_ipending = 1; 3435 3436 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 3437 rp->cfs_cmd = CFS_IDLE; 3438 rp->cfs_cr = cr; 3439 crhold(rp->cfs_cr); 3440 rp->cfs_req_u.cu_idle.ci_vp = vp; 3441 cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq)); 3442 3443 #ifdef CFSDEBUG 3444 CFS_DEBUG(CFSDEBUG_VOPS) 3445 printf("cachefs_inactive: EXIT vp %p\n", (void *)vp); 3446 #endif 3447 } 3448 3449 /* ARGSUSED */ 3450 static int 3451 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 3452 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 3453 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 3454 3455 { 3456 int error = 0; 3457 cnode_t *dcp = VTOC(dvp); 3458 fscache_t *fscp = C_TO_FSCACHE(dcp); 3459 int held = 0; 3460 int connected = 0; 3461 3462 #ifdef CFSDEBUG 3463 CFS_DEBUG(CFSDEBUG_VOPS) 3464 printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm); 3465 #endif 3466 3467 if (getzoneid() != GLOBAL_ZONEID) { 3468 error = EPERM; 3469 goto out; 3470 } 3471 3472 /* 3473 * Cachefs only provides pass-through support for NFSv4, 3474 * and all vnode operations are passed through to the 3475 * back file system. For NFSv4 pass-through to work, only 3476 * connected operation is supported, the cnode backvp must 3477 * exist, and cachefs optional (eg., disconnectable) flags 3478 * are turned off. Assert these conditions to ensure that 3479 * the backfilesystem is called for the lookup operation. 3480 */ 3481 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3482 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 3483 3484 for (;;) { 3485 /* get (or renew) access to the file system */ 3486 if (held) { 3487 /* Won't loop with NFSv4 connected behavior */ 3488 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3489 cachefs_cd_release(fscp); 3490 held = 0; 3491 } 3492 error = cachefs_cd_access(fscp, connected, 0); 3493 if (error) 3494 break; 3495 held = 1; 3496 3497 error = cachefs_lookup_common(dvp, nm, vpp, pnp, 3498 flags, rdir, cr); 3499 if (CFS_TIMEOUT(fscp, error)) { 3500 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3501 cachefs_cd_release(fscp); 3502 held = 0; 3503 cachefs_cd_timedout(fscp); 3504 connected = 0; 3505 continue; 3506 } else { 3507 if (cachefs_cd_access_miss(fscp)) { 3508 rw_enter(&dcp->c_rwlock, RW_READER); 3509 error = cachefs_lookup_back(dvp, nm, 3510 vpp, cr); 3511 rw_exit(&dcp->c_rwlock); 3512 if (!CFS_TIMEOUT(fscp, error)) 3513 break; 3514 delay(5*hz); 3515 connected = 0; 3516 continue; 3517 } 3518 connected = 1; 3519 continue; 3520 } 3521 } 3522 break; 3523 } 3524 if (held) 3525 cachefs_cd_release(fscp); 3526 3527 if (error == 0 && IS_DEVVP(*vpp)) { 3528 struct vnode *newvp; 3529 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 3530 VN_RELE(*vpp); 3531 if (newvp == NULL) { 3532 error = ENOSYS; 3533 } else { 3534 *vpp = newvp; 3535 } 3536 } 3537 3538 #ifdef CFS_CD_DEBUG 3539 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3540 #endif 3541 out: 3542 #ifdef CFSDEBUG 3543 CFS_DEBUG(CFSDEBUG_VOPS) 3544 printf("cachefs_lookup: EXIT error = %d\n", error); 3545 #endif 3546 3547 return (error); 3548 } 3549 3550 /* ARGSUSED */ 3551 int 3552 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp, 3553 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr) 3554 { 3555 int error = 0; 3556 cnode_t *cp, *dcp = VTOC(dvp); 3557 fscache_t *fscp = C_TO_FSCACHE(dcp); 3558 struct fid cookie; 3559 u_offset_t d_offset; 3560 struct cachefs_req *rp; 3561 cfs_cid_t cid, dircid; 3562 uint_t flag; 3563 uint_t uncached = 0; 3564 3565 *vpp = NULL; 3566 3567 /* 3568 * If lookup is for "", just return dvp. Don't need 3569 * to send it over the wire, look it up in the dnlc, 3570 * or perform any access checks. 3571 */ 3572 if (*nm == '\0') { 3573 VN_HOLD(dvp); 3574 *vpp = dvp; 3575 return (0); 3576 } 3577 3578 /* can't do lookups in non-directories */ 3579 if (dvp->v_type != VDIR) 3580 return (ENOTDIR); 3581 3582 /* perform access check, also does consistency check if connected */ 3583 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3584 error = cachefs_access_connected(dvp, VEXEC, 0, cr); 3585 } else { 3586 mutex_enter(&dcp->c_statelock); 3587 error = cachefs_access_local(dcp, VEXEC, cr); 3588 mutex_exit(&dcp->c_statelock); 3589 } 3590 if (error) 3591 return (error); 3592 3593 /* 3594 * If lookup is for ".", just return dvp. Don't need 3595 * to send it over the wire or look it up in the dnlc, 3596 * just need to check access. 3597 */ 3598 if (strcmp(nm, ".") == 0) { 3599 VN_HOLD(dvp); 3600 *vpp = dvp; 3601 return (0); 3602 } 3603 3604 /* check the dnlc */ 3605 *vpp = (vnode_t *)dnlc_lookup(dvp, nm); 3606 if (*vpp) 3607 return (0); 3608 3609 /* read lock the dir before starting the search */ 3610 rw_enter(&dcp->c_rwlock, RW_READER); 3611 3612 mutex_enter(&dcp->c_statelock); 3613 dircid = dcp->c_id; 3614 3615 dcp->c_usage++; 3616 3617 /* if front file is not usable, lookup on the back fs */ 3618 if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) || 3619 CFS_ISFS_BACKFS_NFSV4(fscp) || 3620 ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) { 3621 mutex_exit(&dcp->c_statelock); 3622 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3623 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3624 else 3625 error = ETIMEDOUT; 3626 goto out; 3627 } 3628 3629 /* if the front file is not populated, try to populate it */ 3630 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 3631 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 3632 error = ETIMEDOUT; 3633 mutex_exit(&dcp->c_statelock); 3634 goto out; 3635 } 3636 3637 if (cachefs_async_okay()) { 3638 /* cannot populate if cache is not writable */ 3639 ASSERT((dcp->c_flags & 3640 (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0); 3641 dcp->c_flags |= CN_ASYNC_POPULATE; 3642 3643 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 3644 rp->cfs_cmd = CFS_POPULATE; 3645 rp->cfs_req_u.cu_populate.cpop_vp = dvp; 3646 rp->cfs_cr = cr; 3647 3648 crhold(cr); 3649 VN_HOLD(dvp); 3650 3651 cachefs_addqueue(rp, &fscp->fs_workq); 3652 } else if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 3653 error = cachefs_dir_fill(dcp, cr); 3654 if (error != 0) { 3655 mutex_exit(&dcp->c_statelock); 3656 goto out; 3657 } 3658 } 3659 /* no populate if too many asyncs and we have to cache ACLs */ 3660 3661 mutex_exit(&dcp->c_statelock); 3662 3663 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3664 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3665 else 3666 error = ETIMEDOUT; 3667 goto out; 3668 } 3669 3670 /* by now we have a valid cached front file that we can search */ 3671 3672 ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0); 3673 error = cachefs_dir_look(dcp, nm, &cookie, &flag, 3674 &d_offset, &cid); 3675 mutex_exit(&dcp->c_statelock); 3676 3677 if (error) { 3678 /* if the entry does not have the fid, go get it */ 3679 if (error == EINVAL) { 3680 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3681 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3682 else 3683 error = ETIMEDOUT; 3684 } 3685 3686 /* errors other than does not exist */ 3687 else if (error != ENOENT) { 3688 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3689 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3690 else 3691 error = ETIMEDOUT; 3692 } 3693 goto out; 3694 } 3695 3696 /* 3697 * Else we found the entry in the cached directory. 3698 * Make a cnode for it. 3699 */ 3700 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL, 3701 cr, 0, &cp); 3702 if (error == ESTALE) { 3703 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3704 mutex_enter(&dcp->c_statelock); 3705 cachefs_nocache(dcp); 3706 mutex_exit(&dcp->c_statelock); 3707 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3708 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3709 uncached = 1; 3710 } else 3711 error = ETIMEDOUT; 3712 } else if (error == 0) { 3713 *vpp = CTOV(cp); 3714 } 3715 3716 out: 3717 if (error == 0) { 3718 /* put the entry in the dnlc */ 3719 if (cachefs_dnlc) 3720 dnlc_enter(dvp, nm, *vpp); 3721 3722 /* save the cid of the parent so can find the name */ 3723 cp = VTOC(*vpp); 3724 if (bcmp(&cp->c_metadata.md_parent, &dircid, 3725 sizeof (cfs_cid_t)) != 0) { 3726 mutex_enter(&cp->c_statelock); 3727 cp->c_metadata.md_parent = dircid; 3728 cp->c_flags |= CN_UPDATED; 3729 mutex_exit(&cp->c_statelock); 3730 } 3731 } 3732 3733 rw_exit(&dcp->c_rwlock); 3734 if (uncached && dcp->c_metadata.md_flags & MD_PACKED) 3735 (void) cachefs_pack_common(dvp, cr); 3736 return (error); 3737 } 3738 3739 /* 3740 * Called from cachefs_lookup_common when the back file system needs to be 3741 * examined to perform the lookup. 3742 */ 3743 static int 3744 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp, 3745 cred_t *cr) 3746 { 3747 int error = 0; 3748 cnode_t *cp, *dcp = VTOC(dvp); 3749 fscache_t *fscp = C_TO_FSCACHE(dcp); 3750 vnode_t *backvp = NULL; 3751 struct vattr va; 3752 struct fid cookie; 3753 cfs_cid_t cid; 3754 uint32_t valid_fid; 3755 3756 mutex_enter(&dcp->c_statelock); 3757 3758 /* do a lookup on the back FS to get the back vnode */ 3759 if (dcp->c_backvp == NULL) { 3760 error = cachefs_getbackvp(fscp, dcp); 3761 if (error) 3762 goto out; 3763 } 3764 3765 CFS_DPRINT_BACKFS_NFSV4(fscp, 3766 ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n", 3767 dcp, dcp->c_backvp, nm)); 3768 error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL, 3769 0, (vnode_t *)NULL, cr, NULL, NULL, NULL); 3770 if (error) 3771 goto out; 3772 if (IS_DEVVP(backvp)) { 3773 struct vnode *devvp = backvp; 3774 3775 if (VOP_REALVP(devvp, &backvp, NULL) == 0) { 3776 VN_HOLD(backvp); 3777 VN_RELE(devvp); 3778 } 3779 } 3780 3781 /* get the fid and attrs from the back fs */ 3782 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 3783 error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid); 3784 if (error) 3785 goto out; 3786 3787 cid.cid_fileno = va.va_nodeid; 3788 cid.cid_flags = 0; 3789 3790 #if 0 3791 /* XXX bob: this is probably no longer necessary */ 3792 /* if the directory entry was incomplete, we can complete it now */ 3793 if ((dcp->c_metadata.md_flags & MD_POPULATED) && 3794 ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) && 3795 (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) { 3796 cachefs_dir_modentry(dcp, d_offset, &cookie, &cid); 3797 } 3798 #endif 3799 3800 out: 3801 mutex_exit(&dcp->c_statelock); 3802 3803 /* create the cnode */ 3804 if (error == 0) { 3805 error = cachefs_cnode_make(&cid, fscp, 3806 (valid_fid ? &cookie : NULL), 3807 &va, backvp, cr, 0, &cp); 3808 if (error == 0) { 3809 *vpp = CTOV(cp); 3810 } 3811 } 3812 3813 if (backvp) 3814 VN_RELE(backvp); 3815 3816 return (error); 3817 } 3818 3819 /*ARGSUSED7*/ 3820 static int 3821 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap, 3822 vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag, 3823 caller_context_t *ct, vsecattr_t *vsecp) 3824 3825 { 3826 cnode_t *dcp = VTOC(dvp); 3827 fscache_t *fscp = C_TO_FSCACHE(dcp); 3828 cachefscache_t *cachep = fscp->fs_cache; 3829 int error; 3830 int connected = 0; 3831 int held = 0; 3832 3833 #ifdef CFSDEBUG 3834 CFS_DEBUG(CFSDEBUG_VOPS) 3835 printf("cachefs_create: ENTER dvp %p, nm %s\n", 3836 (void *)dvp, nm); 3837 #endif 3838 if (getzoneid() != GLOBAL_ZONEID) { 3839 error = EPERM; 3840 goto out; 3841 } 3842 3843 /* 3844 * Cachefs only provides pass-through support for NFSv4, 3845 * and all vnode operations are passed through to the 3846 * back file system. For NFSv4 pass-through to work, only 3847 * connected operation is supported, the cnode backvp must 3848 * exist, and cachefs optional (eg., disconnectable) flags 3849 * are turned off. Assert these conditions to ensure that 3850 * the backfilesystem is called for the create operation. 3851 */ 3852 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3853 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 3854 3855 for (;;) { 3856 /* get (or renew) access to the file system */ 3857 if (held) { 3858 /* Won't loop with NFSv4 connected behavior */ 3859 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3860 cachefs_cd_release(fscp); 3861 held = 0; 3862 } 3863 error = cachefs_cd_access(fscp, connected, 1); 3864 if (error) 3865 break; 3866 held = 1; 3867 3868 /* 3869 * if we are connected, perform the remote portion of the 3870 * create. 3871 */ 3872 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3873 error = cachefs_create_connected(dvp, nm, vap, 3874 exclusive, mode, vpp, cr); 3875 if (CFS_TIMEOUT(fscp, error)) { 3876 cachefs_cd_release(fscp); 3877 held = 0; 3878 cachefs_cd_timedout(fscp); 3879 connected = 0; 3880 continue; 3881 } else if (error) { 3882 break; 3883 } 3884 } 3885 3886 /* else we must be disconnected */ 3887 else { 3888 error = cachefs_create_disconnected(dvp, nm, vap, 3889 exclusive, mode, vpp, cr); 3890 if (CFS_TIMEOUT(fscp, error)) { 3891 connected = 1; 3892 continue; 3893 } else if (error) { 3894 break; 3895 } 3896 } 3897 break; 3898 } 3899 3900 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) { 3901 fid_t *fidp = NULL; 3902 ino64_t fileno = 0; 3903 cnode_t *cp = NULL; 3904 if (error == 0) 3905 cp = VTOC(*vpp); 3906 3907 if (cp != NULL) { 3908 fidp = &cp->c_metadata.md_cookie; 3909 fileno = cp->c_id.cid_fileno; 3910 } 3911 cachefs_log_create(cachep, error, fscp->fs_cfsvfsp, 3912 fidp, fileno, crgetuid(cr)); 3913 } 3914 3915 if (held) 3916 cachefs_cd_release(fscp); 3917 3918 if (error == 0 && CFS_ISFS_NONSHARED(fscp)) 3919 (void) cachefs_pack(dvp, nm, cr); 3920 if (error == 0 && IS_DEVVP(*vpp)) { 3921 struct vnode *spcvp; 3922 3923 spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 3924 VN_RELE(*vpp); 3925 if (spcvp == NULL) { 3926 error = ENOSYS; 3927 } else { 3928 *vpp = spcvp; 3929 } 3930 } 3931 3932 #ifdef CFS_CD_DEBUG 3933 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3934 #endif 3935 out: 3936 #ifdef CFSDEBUG 3937 CFS_DEBUG(CFSDEBUG_VOPS) 3938 printf("cachefs_create: EXIT error %d\n", error); 3939 #endif 3940 return (error); 3941 } 3942 3943 3944 static int 3945 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap, 3946 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr) 3947 { 3948 cnode_t *dcp = VTOC(dvp); 3949 fscache_t *fscp = C_TO_FSCACHE(dcp); 3950 int error; 3951 vnode_t *tvp = NULL; 3952 vnode_t *devvp; 3953 fid_t cookie; 3954 vattr_t va; 3955 cnode_t *ncp; 3956 cfs_cid_t cid; 3957 vnode_t *vp; 3958 uint32_t valid_fid; 3959 3960 /* special case if file already exists */ 3961 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 3962 if (CFS_TIMEOUT(fscp, error)) 3963 return (error); 3964 if (error == 0) { 3965 if (exclusive == EXCL) 3966 error = EEXIST; 3967 else if (vp->v_type == VDIR && (mode & VWRITE)) 3968 error = EISDIR; 3969 else if ((error = 3970 cachefs_access_connected(vp, mode, 0, cr)) == 0) { 3971 if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) { 3972 vap->va_mask = AT_SIZE; 3973 error = cachefs_setattr_common(vp, vap, 0, 3974 cr, NULL); 3975 } 3976 } 3977 if (error) { 3978 VN_RELE(vp); 3979 } else 3980 *vpp = vp; 3981 return (error); 3982 } 3983 3984 rw_enter(&dcp->c_rwlock, RW_WRITER); 3985 mutex_enter(&dcp->c_statelock); 3986 3987 /* consistency check the directory */ 3988 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 3989 if (error) { 3990 mutex_exit(&dcp->c_statelock); 3991 goto out; 3992 } 3993 3994 /* get the backvp if necessary */ 3995 if (dcp->c_backvp == NULL) { 3996 error = cachefs_getbackvp(fscp, dcp); 3997 if (error) { 3998 mutex_exit(&dcp->c_statelock); 3999 goto out; 4000 } 4001 } 4002 4003 /* create the file on the back fs */ 4004 CFS_DPRINT_BACKFS_NFSV4(fscp, 4005 ("cachefs_create (nfsv4): dcp %p, dbackvp %p," 4006 "name %s\n", dcp, dcp->c_backvp, nm)); 4007 error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode, 4008 &devvp, cr, 0, NULL, NULL); 4009 mutex_exit(&dcp->c_statelock); 4010 if (error) 4011 goto out; 4012 if (VOP_REALVP(devvp, &tvp, NULL) == 0) { 4013 VN_HOLD(tvp); 4014 VN_RELE(devvp); 4015 } else { 4016 tvp = devvp; 4017 } 4018 4019 /* get the fid and attrs from the back fs */ 4020 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 4021 error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid); 4022 if (error) 4023 goto out; 4024 4025 /* make the cnode */ 4026 cid.cid_fileno = va.va_nodeid; 4027 cid.cid_flags = 0; 4028 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 4029 &va, tvp, cr, 0, &ncp); 4030 if (error) 4031 goto out; 4032 4033 *vpp = CTOV(ncp); 4034 4035 /* enter it in the parent directory */ 4036 mutex_enter(&dcp->c_statelock); 4037 if (CFS_ISFS_NONSHARED(fscp) && 4038 (dcp->c_metadata.md_flags & MD_POPULATED)) { 4039 /* see if entry already exists */ 4040 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4041 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL); 4042 if (error == ENOENT) { 4043 /* entry, does not exist, add the new file */ 4044 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie, 4045 &ncp->c_id, SM_ASYNC); 4046 if (error) { 4047 cachefs_nocache(dcp); 4048 error = 0; 4049 } 4050 /* XXX should this be done elsewhere, too? */ 4051 dnlc_enter(dvp, nm, *vpp); 4052 } else { 4053 /* entry exists or some other problem */ 4054 cachefs_nocache(dcp); 4055 error = 0; 4056 } 4057 } 4058 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 4059 mutex_exit(&dcp->c_statelock); 4060 4061 out: 4062 rw_exit(&dcp->c_rwlock); 4063 if (tvp) 4064 VN_RELE(tvp); 4065 4066 return (error); 4067 } 4068 4069 static int 4070 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 4071 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr) 4072 { 4073 cnode_t *dcp = VTOC(dvp); 4074 cnode_t *cp; 4075 cnode_t *ncp = NULL; 4076 vnode_t *vp; 4077 fscache_t *fscp = C_TO_FSCACHE(dcp); 4078 int error = 0; 4079 struct vattr va; 4080 timestruc_t current_time; 4081 off_t commit = 0; 4082 fid_t cookie; 4083 cfs_cid_t cid; 4084 4085 rw_enter(&dcp->c_rwlock, RW_WRITER); 4086 mutex_enter(&dcp->c_statelock); 4087 4088 /* give up if the directory is not populated */ 4089 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4090 mutex_exit(&dcp->c_statelock); 4091 rw_exit(&dcp->c_rwlock); 4092 return (ETIMEDOUT); 4093 } 4094 4095 /* special case if file already exists */ 4096 error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid); 4097 if (error == EINVAL) { 4098 mutex_exit(&dcp->c_statelock); 4099 rw_exit(&dcp->c_rwlock); 4100 return (ETIMEDOUT); 4101 } 4102 if (error == 0) { 4103 mutex_exit(&dcp->c_statelock); 4104 rw_exit(&dcp->c_rwlock); 4105 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL, 4106 cr, 0, &cp); 4107 if (error) { 4108 return (error); 4109 } 4110 vp = CTOV(cp); 4111 4112 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 4113 error = ETIMEDOUT; 4114 else if (exclusive == EXCL) 4115 error = EEXIST; 4116 else if (vp->v_type == VDIR && (mode & VWRITE)) 4117 error = EISDIR; 4118 else { 4119 mutex_enter(&cp->c_statelock); 4120 error = cachefs_access_local(cp, mode, cr); 4121 mutex_exit(&cp->c_statelock); 4122 if (!error) { 4123 if ((vap->va_mask & AT_SIZE) && 4124 (vp->v_type == VREG)) { 4125 vap->va_mask = AT_SIZE; 4126 error = cachefs_setattr_common(vp, 4127 vap, 0, cr, NULL); 4128 } 4129 } 4130 } 4131 if (error) { 4132 VN_RELE(vp); 4133 } else 4134 *vpp = vp; 4135 return (error); 4136 } 4137 4138 /* give up if cannot modify the cache */ 4139 if (CFS_ISFS_WRITE_AROUND(fscp)) { 4140 mutex_exit(&dcp->c_statelock); 4141 error = ETIMEDOUT; 4142 goto out; 4143 } 4144 4145 /* check access */ 4146 if (error = cachefs_access_local(dcp, VWRITE, cr)) { 4147 mutex_exit(&dcp->c_statelock); 4148 goto out; 4149 } 4150 4151 /* mark dir as modified */ 4152 cachefs_modified(dcp); 4153 mutex_exit(&dcp->c_statelock); 4154 4155 /* must be privileged to set sticky bit */ 4156 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 4157 vap->va_mode &= ~VSVTX; 4158 4159 /* make up a reasonable set of attributes */ 4160 cachefs_attr_setup(vap, &va, dcp, cr); 4161 4162 /* create the cnode */ 4163 error = cachefs_cnode_create(fscp, &va, 0, &ncp); 4164 if (error) 4165 goto out; 4166 4167 mutex_enter(&ncp->c_statelock); 4168 4169 /* get the front file now instead of later */ 4170 if (vap->va_type == VREG) { 4171 error = cachefs_getfrontfile(ncp); 4172 if (error) { 4173 mutex_exit(&ncp->c_statelock); 4174 goto out; 4175 } 4176 ASSERT(ncp->c_frontvp != NULL); 4177 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0); 4178 ncp->c_metadata.md_flags |= MD_POPULATED; 4179 } else { 4180 ASSERT(ncp->c_flags & CN_ALLOC_PENDING); 4181 if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 4182 (void) filegrp_allocattr(ncp->c_filegrp); 4183 } 4184 error = filegrp_create_metadata(ncp->c_filegrp, 4185 &ncp->c_metadata, &ncp->c_id); 4186 if (error) { 4187 mutex_exit(&ncp->c_statelock); 4188 goto out; 4189 } 4190 ncp->c_flags &= ~CN_ALLOC_PENDING; 4191 } 4192 mutex_enter(&dcp->c_statelock); 4193 cachefs_creategid(dcp, ncp, vap, cr); 4194 cachefs_createacl(dcp, ncp); 4195 mutex_exit(&dcp->c_statelock); 4196 4197 /* set times on the file */ 4198 gethrestime(¤t_time); 4199 ncp->c_metadata.md_vattr.va_atime = current_time; 4200 ncp->c_metadata.md_localctime = current_time; 4201 ncp->c_metadata.md_localmtime = current_time; 4202 ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME; 4203 4204 /* reserve space for the daemon cid mapping */ 4205 error = cachefs_dlog_cidmap(fscp); 4206 if (error) { 4207 mutex_exit(&ncp->c_statelock); 4208 goto out; 4209 } 4210 ncp->c_metadata.md_flags |= MD_MAPPING; 4211 4212 /* mark the new file as modified */ 4213 if (cachefs_modified_alloc(ncp)) { 4214 mutex_exit(&ncp->c_statelock); 4215 error = ENOSPC; 4216 goto out; 4217 } 4218 ncp->c_flags |= CN_UPDATED; 4219 4220 /* 4221 * write the metadata now rather than waiting until 4222 * inactive so that if there's no space we can let 4223 * the caller know. 4224 */ 4225 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0); 4226 ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0); 4227 error = filegrp_write_metadata(ncp->c_filegrp, 4228 &ncp->c_id, &ncp->c_metadata); 4229 if (error) { 4230 mutex_exit(&ncp->c_statelock); 4231 goto out; 4232 } 4233 4234 /* log the operation */ 4235 commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive, 4236 mode, ncp, 0, cr); 4237 if (commit == 0) { 4238 mutex_exit(&ncp->c_statelock); 4239 error = ENOSPC; 4240 goto out; 4241 } 4242 4243 mutex_exit(&ncp->c_statelock); 4244 4245 mutex_enter(&dcp->c_statelock); 4246 4247 /* update parent dir times */ 4248 dcp->c_metadata.md_localmtime = current_time; 4249 dcp->c_metadata.md_flags |= MD_LOCALMTIME; 4250 dcp->c_flags |= CN_UPDATED; 4251 4252 /* enter new file name in the parent directory */ 4253 if (dcp->c_metadata.md_flags & MD_POPULATED) { 4254 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie, 4255 &ncp->c_id, 0); 4256 if (error) { 4257 cachefs_nocache(dcp); 4258 mutex_exit(&dcp->c_statelock); 4259 error = ETIMEDOUT; 4260 goto out; 4261 } 4262 dnlc_enter(dvp, nm, CTOV(ncp)); 4263 } else { 4264 mutex_exit(&dcp->c_statelock); 4265 error = ETIMEDOUT; 4266 goto out; 4267 } 4268 mutex_exit(&dcp->c_statelock); 4269 4270 out: 4271 rw_exit(&dcp->c_rwlock); 4272 4273 if (commit) { 4274 if (cachefs_dlog_commit(fscp, commit, error)) { 4275 /*EMPTY*/ 4276 /* XXX bob: fix on panic */ 4277 } 4278 } 4279 if (error) { 4280 /* destroy the cnode we created */ 4281 if (ncp) { 4282 mutex_enter(&ncp->c_statelock); 4283 ncp->c_flags |= CN_DESTROY; 4284 mutex_exit(&ncp->c_statelock); 4285 VN_RELE(CTOV(ncp)); 4286 } 4287 } else { 4288 *vpp = CTOV(ncp); 4289 } 4290 return (error); 4291 } 4292 4293 /*ARGSUSED*/ 4294 static int 4295 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 4296 int flags) 4297 { 4298 cnode_t *dcp = VTOC(dvp); 4299 fscache_t *fscp = C_TO_FSCACHE(dcp); 4300 cachefscache_t *cachep = fscp->fs_cache; 4301 int error = 0; 4302 int held = 0; 4303 int connected = 0; 4304 size_t namlen; 4305 vnode_t *vp = NULL; 4306 int vfslock = 0; 4307 4308 #ifdef CFSDEBUG 4309 CFS_DEBUG(CFSDEBUG_VOPS) 4310 printf("cachefs_remove: ENTER dvp %p name %s\n", 4311 (void *)dvp, nm); 4312 #endif 4313 if (getzoneid() != GLOBAL_ZONEID) { 4314 error = EPERM; 4315 goto out; 4316 } 4317 4318 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 4319 ASSERT(dcp->c_flags & CN_NOCACHE); 4320 4321 /* 4322 * Cachefs only provides pass-through support for NFSv4, 4323 * and all vnode operations are passed through to the 4324 * back file system. For NFSv4 pass-through to work, only 4325 * connected operation is supported, the cnode backvp must 4326 * exist, and cachefs optional (eg., disconnectable) flags 4327 * are turned off. Assert these conditions to ensure that 4328 * the backfilesystem is called for the remove operation. 4329 */ 4330 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4331 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 4332 4333 for (;;) { 4334 if (vfslock) { 4335 vn_vfsunlock(vp); 4336 vfslock = 0; 4337 } 4338 if (vp) { 4339 VN_RELE(vp); 4340 vp = NULL; 4341 } 4342 4343 /* get (or renew) access to the file system */ 4344 if (held) { 4345 /* Won't loop with NFSv4 connected behavior */ 4346 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4347 cachefs_cd_release(fscp); 4348 held = 0; 4349 } 4350 error = cachefs_cd_access(fscp, connected, 1); 4351 if (error) 4352 break; 4353 held = 1; 4354 4355 /* if disconnected, do some extra error checking */ 4356 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 4357 /* check permissions */ 4358 mutex_enter(&dcp->c_statelock); 4359 error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr); 4360 mutex_exit(&dcp->c_statelock); 4361 if (CFS_TIMEOUT(fscp, error)) { 4362 connected = 1; 4363 continue; 4364 } 4365 if (error) 4366 break; 4367 4368 namlen = strlen(nm); 4369 if (namlen == 0) { 4370 error = EINVAL; 4371 break; 4372 } 4373 4374 /* cannot remove . and .. */ 4375 if (nm[0] == '.') { 4376 if (namlen == 1) { 4377 error = EINVAL; 4378 break; 4379 } else if (namlen == 2 && nm[1] == '.') { 4380 error = EEXIST; 4381 break; 4382 } 4383 } 4384 4385 } 4386 4387 /* get the cnode of the file to delete */ 4388 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 4389 if (error) { 4390 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4391 if (CFS_TIMEOUT(fscp, error)) { 4392 cachefs_cd_release(fscp); 4393 held = 0; 4394 cachefs_cd_timedout(fscp); 4395 connected = 0; 4396 continue; 4397 } 4398 } else { 4399 if (CFS_TIMEOUT(fscp, error)) { 4400 connected = 1; 4401 continue; 4402 } 4403 } 4404 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) { 4405 struct fid foo; 4406 4407 bzero(&foo, sizeof (foo)); 4408 cachefs_log_remove(cachep, error, 4409 fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr)); 4410 } 4411 break; 4412 } 4413 4414 if (vp->v_type == VDIR) { 4415 /* must be privileged to remove dirs with unlink() */ 4416 if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0) 4417 break; 4418 4419 /* see ufs_dirremove for why this is done, mount race */ 4420 if (vn_vfswlock(vp)) { 4421 error = EBUSY; 4422 break; 4423 } 4424 vfslock = 1; 4425 if (vn_mountedvfs(vp) != NULL) { 4426 error = EBUSY; 4427 break; 4428 } 4429 } 4430 4431 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4432 error = cachefs_remove_connected(dvp, nm, cr, vp); 4433 if (CFS_TIMEOUT(fscp, error)) { 4434 cachefs_cd_release(fscp); 4435 held = 0; 4436 cachefs_cd_timedout(fscp); 4437 connected = 0; 4438 continue; 4439 } 4440 } else { 4441 error = cachefs_remove_disconnected(dvp, nm, cr, 4442 vp); 4443 if (CFS_TIMEOUT(fscp, error)) { 4444 connected = 1; 4445 continue; 4446 } 4447 } 4448 break; 4449 } 4450 4451 #if 0 4452 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) 4453 cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp, 4454 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 4455 crgetuid(cr)); 4456 #endif 4457 4458 if (held) 4459 cachefs_cd_release(fscp); 4460 4461 if (vfslock) 4462 vn_vfsunlock(vp); 4463 4464 if (vp) 4465 VN_RELE(vp); 4466 4467 #ifdef CFS_CD_DEBUG 4468 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 4469 #endif 4470 out: 4471 #ifdef CFSDEBUG 4472 CFS_DEBUG(CFSDEBUG_VOPS) 4473 printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp); 4474 #endif 4475 4476 return (error); 4477 } 4478 4479 int 4480 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp) 4481 { 4482 cnode_t *dcp = VTOC(dvp); 4483 cnode_t *cp = VTOC(vp); 4484 fscache_t *fscp = C_TO_FSCACHE(dcp); 4485 int error = 0; 4486 4487 /* 4488 * Acquire the rwlock (WRITER) on the directory to prevent other 4489 * activity on the directory. 4490 */ 4491 rw_enter(&dcp->c_rwlock, RW_WRITER); 4492 4493 /* purge dnlc of this entry so can get accurate vnode count */ 4494 dnlc_purge_vp(vp); 4495 4496 /* 4497 * If the cnode is active, make a link to the file 4498 * so operations on the file will continue. 4499 */ 4500 if ((vp->v_type != VDIR) && 4501 !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) { 4502 error = cachefs_remove_dolink(dvp, vp, nm, cr); 4503 if (error) 4504 goto out; 4505 } 4506 4507 /* else call backfs NFSv4 handler if NFSv4 */ 4508 else if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 4509 error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp); 4510 goto out; 4511 } 4512 4513 /* else drop the backvp so nfs does not do rename */ 4514 else if (cp->c_backvp) { 4515 mutex_enter(&cp->c_statelock); 4516 if (cp->c_backvp) { 4517 VN_RELE(cp->c_backvp); 4518 cp->c_backvp = NULL; 4519 } 4520 mutex_exit(&cp->c_statelock); 4521 } 4522 4523 mutex_enter(&dcp->c_statelock); 4524 4525 /* get the backvp */ 4526 if (dcp->c_backvp == NULL) { 4527 error = cachefs_getbackvp(fscp, dcp); 4528 if (error) { 4529 mutex_exit(&dcp->c_statelock); 4530 goto out; 4531 } 4532 } 4533 4534 /* check directory consistency */ 4535 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 4536 if (error) { 4537 mutex_exit(&dcp->c_statelock); 4538 goto out; 4539 } 4540 4541 /* perform the remove on the back fs */ 4542 error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0); 4543 if (error) { 4544 mutex_exit(&dcp->c_statelock); 4545 goto out; 4546 } 4547 4548 /* the dir has been modified */ 4549 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 4550 4551 /* remove the entry from the populated directory */ 4552 if (CFS_ISFS_NONSHARED(fscp) && 4553 (dcp->c_metadata.md_flags & MD_POPULATED)) { 4554 error = cachefs_dir_rmentry(dcp, nm); 4555 if (error) { 4556 cachefs_nocache(dcp); 4557 error = 0; 4558 } 4559 } 4560 mutex_exit(&dcp->c_statelock); 4561 4562 /* fix up the file we deleted */ 4563 mutex_enter(&cp->c_statelock); 4564 if (cp->c_attr.va_nlink == 1) 4565 cp->c_flags |= CN_DESTROY; 4566 else 4567 cp->c_flags |= CN_UPDATED; 4568 4569 cp->c_attr.va_nlink--; 4570 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 4571 mutex_exit(&cp->c_statelock); 4572 4573 out: 4574 rw_exit(&dcp->c_rwlock); 4575 return (error); 4576 } 4577 4578 /* 4579 * cachefs_remove_backfs_nfsv4 4580 * 4581 * Call NFSv4 back filesystem to handle the remove (cachefs 4582 * pass-through support for NFSv4). 4583 */ 4584 int 4585 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp) 4586 { 4587 cnode_t *dcp = VTOC(dvp); 4588 cnode_t *cp = VTOC(vp); 4589 vnode_t *dbackvp; 4590 fscache_t *fscp = C_TO_FSCACHE(dcp); 4591 int error = 0; 4592 4593 /* 4594 * For NFSv4 pass-through to work, only connected operation 4595 * is supported, the cnode backvp must exist, and cachefs 4596 * optional (eg., disconnectable) flags are turned off. Assert 4597 * these conditions for the getattr operation. 4598 */ 4599 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4600 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 4601 4602 /* Should hold the directory readwrite lock to update directory */ 4603 ASSERT(RW_WRITE_HELD(&dcp->c_rwlock)); 4604 4605 /* 4606 * Update attributes for directory. Note that 4607 * CFSOP_CHECK_COBJECT asserts for c_statelock being 4608 * held, so grab it before calling the routine. 4609 */ 4610 mutex_enter(&dcp->c_statelock); 4611 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 4612 mutex_exit(&dcp->c_statelock); 4613 if (error) 4614 goto out; 4615 4616 /* 4617 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT 4618 * asserts for c_statelock being held, so grab it before 4619 * calling the routine. 4620 */ 4621 mutex_enter(&cp->c_statelock); 4622 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 4623 if (error) { 4624 mutex_exit(&cp->c_statelock); 4625 goto out; 4626 } 4627 4628 /* 4629 * Drop the backvp so nfs if the link count is 1 so that 4630 * nfs does not do rename. Ensure that we will destroy the cnode 4631 * since this cnode no longer contains the backvp. Note that we 4632 * maintain lock on this cnode to prevent change till the remove 4633 * completes, otherwise other operations will encounter an ESTALE 4634 * if they try to use the cnode with CN_DESTROY set (see 4635 * cachefs_get_backvp()), or change the state of the cnode 4636 * while we're removing it. 4637 */ 4638 if (cp->c_attr.va_nlink == 1) { 4639 /* 4640 * The unldvp information is created for the case 4641 * when there is more than one reference on the 4642 * vnode when a remove operation is called. If the 4643 * remove itself was holding a reference to the 4644 * vnode, then a subsequent remove will remove the 4645 * backvp, so we need to get rid of the unldvp 4646 * before removing the backvp. An alternate would 4647 * be to simply ignore the remove and let the 4648 * inactivation routine do the deletion of the 4649 * unldvp. 4650 */ 4651 if (cp->c_unldvp) { 4652 VN_RELE(cp->c_unldvp); 4653 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN); 4654 crfree(cp->c_unlcred); 4655 cp->c_unldvp = NULL; 4656 cp->c_unlcred = NULL; 4657 } 4658 cp->c_flags |= CN_DESTROY; 4659 cp->c_attr.va_nlink = 0; 4660 VN_RELE(cp->c_backvp); 4661 cp->c_backvp = NULL; 4662 } 4663 4664 /* perform the remove on back fs after extracting directory backvp */ 4665 mutex_enter(&dcp->c_statelock); 4666 dbackvp = dcp->c_backvp; 4667 mutex_exit(&dcp->c_statelock); 4668 4669 CFS_DPRINT_BACKFS_NFSV4(fscp, 4670 ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n", 4671 dcp, dbackvp, nm)); 4672 error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0); 4673 if (error) { 4674 mutex_exit(&cp->c_statelock); 4675 goto out; 4676 } 4677 4678 /* fix up the file we deleted, if not destroying the cnode */ 4679 if ((cp->c_flags & CN_DESTROY) == 0) { 4680 cp->c_attr.va_nlink--; 4681 cp->c_flags |= CN_UPDATED; 4682 } 4683 4684 mutex_exit(&cp->c_statelock); 4685 4686 out: 4687 return (error); 4688 } 4689 4690 int 4691 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr, 4692 vnode_t *vp) 4693 { 4694 cnode_t *dcp = VTOC(dvp); 4695 cnode_t *cp = VTOC(vp); 4696 fscache_t *fscp = C_TO_FSCACHE(dcp); 4697 int error = 0; 4698 off_t commit = 0; 4699 timestruc_t current_time; 4700 4701 if (CFS_ISFS_WRITE_AROUND(fscp)) 4702 return (ETIMEDOUT); 4703 4704 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 4705 return (ETIMEDOUT); 4706 4707 /* 4708 * Acquire the rwlock (WRITER) on the directory to prevent other 4709 * activity on the directory. 4710 */ 4711 rw_enter(&dcp->c_rwlock, RW_WRITER); 4712 4713 /* dir must be populated */ 4714 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4715 error = ETIMEDOUT; 4716 goto out; 4717 } 4718 4719 mutex_enter(&dcp->c_statelock); 4720 mutex_enter(&cp->c_statelock); 4721 4722 error = cachefs_stickyrmchk(dcp, cp, cr); 4723 4724 mutex_exit(&cp->c_statelock); 4725 mutex_exit(&dcp->c_statelock); 4726 if (error) 4727 goto out; 4728 4729 /* purge dnlc of this entry so can get accurate vnode count */ 4730 dnlc_purge_vp(vp); 4731 4732 /* 4733 * If the cnode is active, make a link to the file 4734 * so operations on the file will continue. 4735 */ 4736 if ((vp->v_type != VDIR) && 4737 !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) { 4738 error = cachefs_remove_dolink(dvp, vp, nm, cr); 4739 if (error) 4740 goto out; 4741 } 4742 4743 if (cp->c_attr.va_nlink > 1) { 4744 mutex_enter(&cp->c_statelock); 4745 if (cachefs_modified_alloc(cp)) { 4746 mutex_exit(&cp->c_statelock); 4747 error = ENOSPC; 4748 goto out; 4749 } 4750 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 4751 error = cachefs_dlog_cidmap(fscp); 4752 if (error) { 4753 mutex_exit(&cp->c_statelock); 4754 error = ENOSPC; 4755 goto out; 4756 } 4757 cp->c_metadata.md_flags |= MD_MAPPING; 4758 cp->c_flags |= CN_UPDATED; 4759 } 4760 mutex_exit(&cp->c_statelock); 4761 } 4762 4763 /* log the remove */ 4764 commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr); 4765 if (commit == 0) { 4766 error = ENOSPC; 4767 goto out; 4768 } 4769 4770 /* remove the file from the dir */ 4771 mutex_enter(&dcp->c_statelock); 4772 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4773 mutex_exit(&dcp->c_statelock); 4774 error = ETIMEDOUT; 4775 goto out; 4776 4777 } 4778 cachefs_modified(dcp); 4779 error = cachefs_dir_rmentry(dcp, nm); 4780 if (error) { 4781 mutex_exit(&dcp->c_statelock); 4782 if (error == ENOTDIR) 4783 error = ETIMEDOUT; 4784 goto out; 4785 } 4786 4787 /* update parent dir times */ 4788 gethrestime(¤t_time); 4789 dcp->c_metadata.md_localctime = current_time; 4790 dcp->c_metadata.md_localmtime = current_time; 4791 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 4792 dcp->c_flags |= CN_UPDATED; 4793 mutex_exit(&dcp->c_statelock); 4794 4795 /* adjust file we are deleting */ 4796 mutex_enter(&cp->c_statelock); 4797 cp->c_attr.va_nlink--; 4798 cp->c_metadata.md_localctime = current_time; 4799 cp->c_metadata.md_flags |= MD_LOCALCTIME; 4800 if (cp->c_attr.va_nlink == 0) { 4801 cp->c_flags |= CN_DESTROY; 4802 } else { 4803 cp->c_flags |= CN_UPDATED; 4804 } 4805 mutex_exit(&cp->c_statelock); 4806 4807 out: 4808 if (commit) { 4809 /* commit the log entry */ 4810 if (cachefs_dlog_commit(fscp, commit, error)) { 4811 /*EMPTY*/ 4812 /* XXX bob: fix on panic */ 4813 } 4814 } 4815 4816 rw_exit(&dcp->c_rwlock); 4817 return (error); 4818 } 4819 4820 /*ARGSUSED*/ 4821 static int 4822 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr, 4823 caller_context_t *ct, int flags) 4824 { 4825 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 4826 cnode_t *tdcp = VTOC(tdvp); 4827 struct vnode *realvp; 4828 int error = 0; 4829 int held = 0; 4830 int connected = 0; 4831 4832 #ifdef CFSDEBUG 4833 CFS_DEBUG(CFSDEBUG_VOPS) 4834 printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n", 4835 (void *)fvp, (void *)tdvp, tnm); 4836 #endif 4837 4838 if (getzoneid() != GLOBAL_ZONEID) { 4839 error = EPERM; 4840 goto out; 4841 } 4842 4843 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 4844 ASSERT(tdcp->c_flags & CN_NOCACHE); 4845 4846 if (VOP_REALVP(fvp, &realvp, ct) == 0) { 4847 fvp = realvp; 4848 } 4849 4850 /* 4851 * Cachefs only provides pass-through support for NFSv4, 4852 * and all vnode operations are passed through to the 4853 * back file system. For NFSv4 pass-through to work, only 4854 * connected operation is supported, the cnode backvp must 4855 * exist, and cachefs optional (eg., disconnectable) flags 4856 * are turned off. Assert these conditions to ensure that 4857 * the backfilesystem is called for the link operation. 4858 */ 4859 4860 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4861 CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp); 4862 4863 for (;;) { 4864 /* get (or renew) access to the file system */ 4865 if (held) { 4866 /* Won't loop with NFSv4 connected behavior */ 4867 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4868 rw_exit(&tdcp->c_rwlock); 4869 cachefs_cd_release(fscp); 4870 held = 0; 4871 } 4872 error = cachefs_cd_access(fscp, connected, 1); 4873 if (error) 4874 break; 4875 rw_enter(&tdcp->c_rwlock, RW_WRITER); 4876 held = 1; 4877 4878 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4879 error = cachefs_link_connected(tdvp, fvp, tnm, cr); 4880 if (CFS_TIMEOUT(fscp, error)) { 4881 rw_exit(&tdcp->c_rwlock); 4882 cachefs_cd_release(fscp); 4883 held = 0; 4884 cachefs_cd_timedout(fscp); 4885 connected = 0; 4886 continue; 4887 } 4888 } else { 4889 error = cachefs_link_disconnected(tdvp, fvp, tnm, 4890 cr); 4891 if (CFS_TIMEOUT(fscp, error)) { 4892 connected = 1; 4893 continue; 4894 } 4895 } 4896 break; 4897 } 4898 4899 if (held) { 4900 rw_exit(&tdcp->c_rwlock); 4901 cachefs_cd_release(fscp); 4902 } 4903 4904 #ifdef CFS_CD_DEBUG 4905 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 4906 #endif 4907 out: 4908 #ifdef CFSDEBUG 4909 CFS_DEBUG(CFSDEBUG_VOPS) 4910 printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n", 4911 (void *)fvp, (void *)tdvp, tnm); 4912 #endif 4913 return (error); 4914 } 4915 4916 static int 4917 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr) 4918 { 4919 cnode_t *tdcp = VTOC(tdvp); 4920 cnode_t *fcp = VTOC(fvp); 4921 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 4922 int error = 0; 4923 vnode_t *backvp = NULL; 4924 4925 if (tdcp != fcp) { 4926 mutex_enter(&fcp->c_statelock); 4927 4928 if (fcp->c_backvp == NULL) { 4929 error = cachefs_getbackvp(fscp, fcp); 4930 if (error) { 4931 mutex_exit(&fcp->c_statelock); 4932 goto out; 4933 } 4934 } 4935 4936 error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr); 4937 if (error) { 4938 mutex_exit(&fcp->c_statelock); 4939 goto out; 4940 } 4941 backvp = fcp->c_backvp; 4942 VN_HOLD(backvp); 4943 mutex_exit(&fcp->c_statelock); 4944 } 4945 4946 mutex_enter(&tdcp->c_statelock); 4947 4948 /* get backvp of target directory */ 4949 if (tdcp->c_backvp == NULL) { 4950 error = cachefs_getbackvp(fscp, tdcp); 4951 if (error) { 4952 mutex_exit(&tdcp->c_statelock); 4953 goto out; 4954 } 4955 } 4956 4957 /* consistency check target directory */ 4958 error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr); 4959 if (error) { 4960 mutex_exit(&tdcp->c_statelock); 4961 goto out; 4962 } 4963 if (backvp == NULL) { 4964 backvp = tdcp->c_backvp; 4965 VN_HOLD(backvp); 4966 } 4967 4968 /* perform the link on the back fs */ 4969 CFS_DPRINT_BACKFS_NFSV4(fscp, 4970 ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, " 4971 "name %s\n", tdcp, tdcp->c_backvp, tnm)); 4972 error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0); 4973 if (error) { 4974 mutex_exit(&tdcp->c_statelock); 4975 goto out; 4976 } 4977 4978 CFSOP_MODIFY_COBJECT(fscp, tdcp, cr); 4979 4980 /* if the dir is populated, add the new link */ 4981 if (CFS_ISFS_NONSHARED(fscp) && 4982 (tdcp->c_metadata.md_flags & MD_POPULATED)) { 4983 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie, 4984 &fcp->c_id, SM_ASYNC); 4985 if (error) { 4986 cachefs_nocache(tdcp); 4987 error = 0; 4988 } 4989 } 4990 mutex_exit(&tdcp->c_statelock); 4991 4992 /* get the new link count on the file */ 4993 mutex_enter(&fcp->c_statelock); 4994 fcp->c_flags |= CN_UPDATED; 4995 CFSOP_MODIFY_COBJECT(fscp, fcp, cr); 4996 if (fcp->c_backvp == NULL) { 4997 error = cachefs_getbackvp(fscp, fcp); 4998 if (error) { 4999 mutex_exit(&fcp->c_statelock); 5000 goto out; 5001 } 5002 } 5003 5004 /* XXX bob: given what modify_cobject does this seems unnecessary */ 5005 fcp->c_attr.va_mask = AT_ALL; 5006 error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL); 5007 mutex_exit(&fcp->c_statelock); 5008 out: 5009 if (backvp) 5010 VN_RELE(backvp); 5011 5012 return (error); 5013 } 5014 5015 static int 5016 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm, 5017 cred_t *cr) 5018 { 5019 cnode_t *tdcp = VTOC(tdvp); 5020 cnode_t *fcp = VTOC(fvp); 5021 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 5022 int error = 0; 5023 timestruc_t current_time; 5024 off_t commit = 0; 5025 5026 if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 || 5027 fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 5028 return (EPERM); 5029 5030 if (CFS_ISFS_WRITE_AROUND(fscp)) 5031 return (ETIMEDOUT); 5032 5033 if (fcp->c_metadata.md_flags & MD_NEEDATTRS) 5034 return (ETIMEDOUT); 5035 5036 mutex_enter(&tdcp->c_statelock); 5037 5038 /* check permissions */ 5039 if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) { 5040 mutex_exit(&tdcp->c_statelock); 5041 goto out; 5042 } 5043 5044 /* the directory front file must be populated */ 5045 if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) { 5046 error = ETIMEDOUT; 5047 mutex_exit(&tdcp->c_statelock); 5048 goto out; 5049 } 5050 5051 /* make sure tnm does not already exist in the directory */ 5052 error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL); 5053 if (error == ENOTDIR) { 5054 error = ETIMEDOUT; 5055 mutex_exit(&tdcp->c_statelock); 5056 goto out; 5057 } 5058 if (error != ENOENT) { 5059 error = EEXIST; 5060 mutex_exit(&tdcp->c_statelock); 5061 goto out; 5062 } 5063 5064 mutex_enter(&fcp->c_statelock); 5065 5066 /* create a mapping for the file if necessary */ 5067 if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) { 5068 error = cachefs_dlog_cidmap(fscp); 5069 if (error) { 5070 mutex_exit(&fcp->c_statelock); 5071 mutex_exit(&tdcp->c_statelock); 5072 error = ENOSPC; 5073 goto out; 5074 } 5075 fcp->c_metadata.md_flags |= MD_MAPPING; 5076 fcp->c_flags |= CN_UPDATED; 5077 } 5078 5079 /* mark file as modified */ 5080 if (cachefs_modified_alloc(fcp)) { 5081 mutex_exit(&fcp->c_statelock); 5082 mutex_exit(&tdcp->c_statelock); 5083 error = ENOSPC; 5084 goto out; 5085 } 5086 mutex_exit(&fcp->c_statelock); 5087 5088 /* log the operation */ 5089 commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr); 5090 if (commit == 0) { 5091 mutex_exit(&tdcp->c_statelock); 5092 error = ENOSPC; 5093 goto out; 5094 } 5095 5096 gethrestime(¤t_time); 5097 5098 /* make the new link */ 5099 cachefs_modified(tdcp); 5100 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie, 5101 &fcp->c_id, SM_ASYNC); 5102 if (error) { 5103 error = 0; 5104 mutex_exit(&tdcp->c_statelock); 5105 goto out; 5106 } 5107 5108 /* Update mtime/ctime of parent dir */ 5109 tdcp->c_metadata.md_localmtime = current_time; 5110 tdcp->c_metadata.md_localctime = current_time; 5111 tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5112 tdcp->c_flags |= CN_UPDATED; 5113 mutex_exit(&tdcp->c_statelock); 5114 5115 /* update the file we linked to */ 5116 mutex_enter(&fcp->c_statelock); 5117 fcp->c_attr.va_nlink++; 5118 fcp->c_metadata.md_localctime = current_time; 5119 fcp->c_metadata.md_flags |= MD_LOCALCTIME; 5120 fcp->c_flags |= CN_UPDATED; 5121 mutex_exit(&fcp->c_statelock); 5122 5123 out: 5124 if (commit) { 5125 /* commit the log entry */ 5126 if (cachefs_dlog_commit(fscp, commit, error)) { 5127 /*EMPTY*/ 5128 /* XXX bob: fix on panic */ 5129 } 5130 } 5131 5132 return (error); 5133 } 5134 5135 /* 5136 * Serialize all renames in CFS, to avoid deadlocks - We have to hold two 5137 * cnodes atomically. 5138 */ 5139 kmutex_t cachefs_rename_lock; 5140 5141 /*ARGSUSED*/ 5142 static int 5143 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, 5144 char *nnm, cred_t *cr, caller_context_t *ct, int flags) 5145 { 5146 fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp)); 5147 cachefscache_t *cachep = fscp->fs_cache; 5148 int error = 0; 5149 int held = 0; 5150 int connected = 0; 5151 vnode_t *delvp = NULL; 5152 vnode_t *tvp = NULL; 5153 int vfslock = 0; 5154 struct vnode *realvp; 5155 5156 if (getzoneid() != GLOBAL_ZONEID) 5157 return (EPERM); 5158 5159 if (VOP_REALVP(ndvp, &realvp, ct) == 0) 5160 ndvp = realvp; 5161 5162 /* 5163 * if the fs NOFILL or NOCACHE flags are on, then the old and new 5164 * directory cnodes better indicate NOCACHE mode as well. 5165 */ 5166 ASSERT( 5167 (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 || 5168 ((VTOC(odvp)->c_flags & CN_NOCACHE) && 5169 (VTOC(ndvp)->c_flags & CN_NOCACHE))); 5170 5171 /* 5172 * Cachefs only provides pass-through support for NFSv4, 5173 * and all vnode operations are passed through to the 5174 * back file system. For NFSv4 pass-through to work, only 5175 * connected operation is supported, the cnode backvp must 5176 * exist, and cachefs optional (eg., disconnectable) flags 5177 * are turned off. Assert these conditions to ensure that 5178 * the backfilesystem is called for the rename operation. 5179 */ 5180 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 5181 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp)); 5182 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp)); 5183 5184 for (;;) { 5185 if (vfslock) { 5186 vn_vfsunlock(delvp); 5187 vfslock = 0; 5188 } 5189 if (delvp) { 5190 VN_RELE(delvp); 5191 delvp = NULL; 5192 } 5193 5194 /* get (or renew) access to the file system */ 5195 if (held) { 5196 /* Won't loop for NFSv4 connected support */ 5197 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 5198 cachefs_cd_release(fscp); 5199 held = 0; 5200 } 5201 error = cachefs_cd_access(fscp, connected, 1); 5202 if (error) 5203 break; 5204 held = 1; 5205 5206 /* sanity check */ 5207 if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) { 5208 error = EINVAL; 5209 break; 5210 } 5211 5212 /* cannot rename from or to . or .. */ 5213 if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 || 5214 strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) { 5215 error = EINVAL; 5216 break; 5217 } 5218 5219 if (odvp != ndvp) { 5220 /* 5221 * if moving a directory, its notion 5222 * of ".." will change 5223 */ 5224 error = cachefs_lookup_common(odvp, onm, &tvp, 5225 NULL, 0, NULL, cr); 5226 if (error == 0) { 5227 ASSERT(tvp != NULL); 5228 if (tvp->v_type == VDIR) { 5229 cnode_t *cp = VTOC(tvp); 5230 5231 dnlc_remove(tvp, ".."); 5232 5233 mutex_enter(&cp->c_statelock); 5234 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 5235 mutex_exit(&cp->c_statelock); 5236 } 5237 } else { 5238 tvp = NULL; 5239 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5240 if (CFS_TIMEOUT(fscp, error)) { 5241 cachefs_cd_release(fscp); 5242 held = 0; 5243 cachefs_cd_timedout(fscp); 5244 connected = 0; 5245 continue; 5246 } 5247 } else { 5248 if (CFS_TIMEOUT(fscp, error)) { 5249 connected = 1; 5250 continue; 5251 } 5252 } 5253 break; 5254 } 5255 } 5256 5257 /* get the cnode if file being deleted */ 5258 error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0, 5259 NULL, cr); 5260 if (error) { 5261 delvp = NULL; 5262 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5263 if (CFS_TIMEOUT(fscp, error)) { 5264 cachefs_cd_release(fscp); 5265 held = 0; 5266 cachefs_cd_timedout(fscp); 5267 connected = 0; 5268 continue; 5269 } 5270 } else { 5271 if (CFS_TIMEOUT(fscp, error)) { 5272 connected = 1; 5273 continue; 5274 } 5275 } 5276 if (error != ENOENT) 5277 break; 5278 } 5279 5280 if (delvp && delvp->v_type == VDIR) { 5281 /* see ufs_dirremove for why this is done, mount race */ 5282 if (vn_vfswlock(delvp)) { 5283 error = EBUSY; 5284 break; 5285 } 5286 vfslock = 1; 5287 if (vn_mountedvfs(delvp) != NULL) { 5288 error = EBUSY; 5289 break; 5290 } 5291 } 5292 5293 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5294 error = cachefs_rename_connected(odvp, onm, 5295 ndvp, nnm, cr, delvp); 5296 if (CFS_TIMEOUT(fscp, error)) { 5297 cachefs_cd_release(fscp); 5298 held = 0; 5299 cachefs_cd_timedout(fscp); 5300 connected = 0; 5301 continue; 5302 } 5303 } else { 5304 error = cachefs_rename_disconnected(odvp, onm, 5305 ndvp, nnm, cr, delvp); 5306 if (CFS_TIMEOUT(fscp, error)) { 5307 connected = 1; 5308 continue; 5309 } 5310 } 5311 break; 5312 } 5313 5314 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) { 5315 struct fid gone; 5316 5317 bzero(&gone, sizeof (gone)); 5318 gone.fid_len = MAXFIDSZ; 5319 if (delvp != NULL) 5320 (void) VOP_FID(delvp, &gone, ct); 5321 5322 cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp, 5323 &gone, 0, (delvp != NULL), crgetuid(cr)); 5324 } 5325 5326 if (held) 5327 cachefs_cd_release(fscp); 5328 5329 if (vfslock) 5330 vn_vfsunlock(delvp); 5331 5332 if (delvp) 5333 VN_RELE(delvp); 5334 if (tvp) 5335 VN_RELE(tvp); 5336 5337 #ifdef CFS_CD_DEBUG 5338 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 5339 #endif 5340 return (error); 5341 } 5342 5343 static int 5344 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp, 5345 char *nnm, cred_t *cr, vnode_t *delvp) 5346 { 5347 cnode_t *odcp = VTOC(odvp); 5348 cnode_t *ndcp = VTOC(ndvp); 5349 vnode_t *revp = NULL; 5350 cnode_t *recp; 5351 cnode_t *delcp; 5352 fscache_t *fscp = C_TO_FSCACHE(odcp); 5353 int error = 0; 5354 struct fid cookie; 5355 struct fid *cookiep; 5356 cfs_cid_t cid; 5357 int gotdirent; 5358 5359 /* find the file we are renaming */ 5360 error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr); 5361 if (error) 5362 return (error); 5363 recp = VTOC(revp); 5364 5365 /* 5366 * To avoid deadlock, we acquire this global rename lock before 5367 * we try to get the locks for the source and target directories. 5368 */ 5369 mutex_enter(&cachefs_rename_lock); 5370 rw_enter(&odcp->c_rwlock, RW_WRITER); 5371 if (odcp != ndcp) { 5372 rw_enter(&ndcp->c_rwlock, RW_WRITER); 5373 } 5374 mutex_exit(&cachefs_rename_lock); 5375 5376 ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0); 5377 ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0); 5378 5379 mutex_enter(&odcp->c_statelock); 5380 if (odcp->c_backvp == NULL) { 5381 error = cachefs_getbackvp(fscp, odcp); 5382 if (error) { 5383 mutex_exit(&odcp->c_statelock); 5384 goto out; 5385 } 5386 } 5387 5388 error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr); 5389 if (error) { 5390 mutex_exit(&odcp->c_statelock); 5391 goto out; 5392 } 5393 mutex_exit(&odcp->c_statelock); 5394 5395 if (odcp != ndcp) { 5396 mutex_enter(&ndcp->c_statelock); 5397 if (ndcp->c_backvp == NULL) { 5398 error = cachefs_getbackvp(fscp, ndcp); 5399 if (error) { 5400 mutex_exit(&ndcp->c_statelock); 5401 goto out; 5402 } 5403 } 5404 5405 error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr); 5406 if (error) { 5407 mutex_exit(&ndcp->c_statelock); 5408 goto out; 5409 } 5410 mutex_exit(&ndcp->c_statelock); 5411 } 5412 5413 /* if a file is being deleted because of this rename */ 5414 if (delvp) { 5415 /* if src and dest file are same */ 5416 if (delvp == revp) { 5417 error = 0; 5418 goto out; 5419 } 5420 5421 /* 5422 * If the cnode is active, make a link to the file 5423 * so operations on the file will continue. 5424 */ 5425 dnlc_purge_vp(delvp); 5426 delcp = VTOC(delvp); 5427 if ((delvp->v_type != VDIR) && 5428 !((delvp->v_count == 1) || 5429 ((delvp->v_count == 2) && delcp->c_ipending))) { 5430 error = cachefs_remove_dolink(ndvp, delvp, nnm, cr); 5431 if (error) 5432 goto out; 5433 } 5434 } 5435 5436 /* do the rename on the back fs */ 5437 CFS_DPRINT_BACKFS_NFSV4(fscp, 5438 ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, " 5439 " ndcp %p, ndbackvp %p, onm %s, nnm %s\n", 5440 odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm)); 5441 error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL, 5442 0); 5443 if (error) 5444 goto out; 5445 5446 /* purge mappings to file in the old directory */ 5447 dnlc_purge_vp(odvp); 5448 5449 /* purge mappings in the new dir if we deleted a file */ 5450 if (delvp && (odvp != ndvp)) 5451 dnlc_purge_vp(ndvp); 5452 5453 /* update the file we just deleted */ 5454 if (delvp) { 5455 mutex_enter(&delcp->c_statelock); 5456 if (delcp->c_attr.va_nlink == 1) { 5457 delcp->c_flags |= CN_DESTROY; 5458 } else { 5459 delcp->c_flags |= CN_UPDATED; 5460 } 5461 delcp->c_attr.va_nlink--; 5462 CFSOP_MODIFY_COBJECT(fscp, delcp, cr); 5463 mutex_exit(&delcp->c_statelock); 5464 } 5465 5466 /* find the entry in the old directory */ 5467 mutex_enter(&odcp->c_statelock); 5468 gotdirent = 0; 5469 cookiep = NULL; 5470 if (CFS_ISFS_NONSHARED(fscp) && 5471 (odcp->c_metadata.md_flags & MD_POPULATED)) { 5472 error = cachefs_dir_look(odcp, onm, &cookie, 5473 NULL, NULL, &cid); 5474 if (error == 0 || error == EINVAL) { 5475 gotdirent = 1; 5476 if (error == 0) 5477 cookiep = &cookie; 5478 } else { 5479 cachefs_inval_object(odcp); 5480 } 5481 } 5482 error = 0; 5483 5484 /* remove the directory entry from the old directory */ 5485 if (gotdirent) { 5486 error = cachefs_dir_rmentry(odcp, onm); 5487 if (error) { 5488 cachefs_nocache(odcp); 5489 error = 0; 5490 } 5491 } 5492 CFSOP_MODIFY_COBJECT(fscp, odcp, cr); 5493 mutex_exit(&odcp->c_statelock); 5494 5495 /* install the directory entry in the new directory */ 5496 mutex_enter(&ndcp->c_statelock); 5497 if (CFS_ISFS_NONSHARED(fscp) && 5498 (ndcp->c_metadata.md_flags & MD_POPULATED)) { 5499 error = 1; 5500 if (gotdirent) { 5501 ASSERT(cid.cid_fileno != 0); 5502 error = 0; 5503 if (delvp) { 5504 error = cachefs_dir_rmentry(ndcp, nnm); 5505 } 5506 if (error == 0) { 5507 error = cachefs_dir_enter(ndcp, nnm, cookiep, 5508 &cid, SM_ASYNC); 5509 } 5510 } 5511 if (error) { 5512 cachefs_nocache(ndcp); 5513 error = 0; 5514 } 5515 } 5516 if (odcp != ndcp) 5517 CFSOP_MODIFY_COBJECT(fscp, ndcp, cr); 5518 mutex_exit(&ndcp->c_statelock); 5519 5520 /* ctime of renamed file has changed */ 5521 mutex_enter(&recp->c_statelock); 5522 CFSOP_MODIFY_COBJECT(fscp, recp, cr); 5523 mutex_exit(&recp->c_statelock); 5524 5525 out: 5526 if (odcp != ndcp) 5527 rw_exit(&ndcp->c_rwlock); 5528 rw_exit(&odcp->c_rwlock); 5529 5530 VN_RELE(revp); 5531 5532 return (error); 5533 } 5534 5535 static int 5536 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp, 5537 char *nnm, cred_t *cr, vnode_t *delvp) 5538 { 5539 cnode_t *odcp = VTOC(odvp); 5540 cnode_t *ndcp = VTOC(ndvp); 5541 cnode_t *delcp = NULL; 5542 vnode_t *revp = NULL; 5543 cnode_t *recp; 5544 fscache_t *fscp = C_TO_FSCACHE(odcp); 5545 int error = 0; 5546 struct fid cookie; 5547 struct fid *cookiep; 5548 cfs_cid_t cid; 5549 off_t commit = 0; 5550 timestruc_t current_time; 5551 5552 if (CFS_ISFS_WRITE_AROUND(fscp)) 5553 return (ETIMEDOUT); 5554 5555 /* find the file we are renaming */ 5556 error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr); 5557 if (error) 5558 return (error); 5559 recp = VTOC(revp); 5560 5561 /* 5562 * To avoid deadlock, we acquire this global rename lock before 5563 * we try to get the locks for the source and target directories. 5564 */ 5565 mutex_enter(&cachefs_rename_lock); 5566 rw_enter(&odcp->c_rwlock, RW_WRITER); 5567 if (odcp != ndcp) { 5568 rw_enter(&ndcp->c_rwlock, RW_WRITER); 5569 } 5570 mutex_exit(&cachefs_rename_lock); 5571 5572 if (recp->c_metadata.md_flags & MD_NEEDATTRS) { 5573 error = ETIMEDOUT; 5574 goto out; 5575 } 5576 5577 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) { 5578 mutex_enter(&recp->c_statelock); 5579 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) { 5580 error = cachefs_dlog_cidmap(fscp); 5581 if (error) { 5582 mutex_exit(&recp->c_statelock); 5583 error = ENOSPC; 5584 goto out; 5585 } 5586 recp->c_metadata.md_flags |= MD_MAPPING; 5587 recp->c_flags |= CN_UPDATED; 5588 } 5589 mutex_exit(&recp->c_statelock); 5590 } 5591 5592 /* check permissions */ 5593 /* XXX clean up this mutex junk sometime */ 5594 mutex_enter(&odcp->c_statelock); 5595 error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr); 5596 mutex_exit(&odcp->c_statelock); 5597 if (error != 0) 5598 goto out; 5599 mutex_enter(&ndcp->c_statelock); 5600 error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr); 5601 mutex_exit(&ndcp->c_statelock); 5602 if (error != 0) 5603 goto out; 5604 mutex_enter(&odcp->c_statelock); 5605 error = cachefs_stickyrmchk(odcp, recp, cr); 5606 mutex_exit(&odcp->c_statelock); 5607 if (error != 0) 5608 goto out; 5609 5610 /* dirs must be populated */ 5611 if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) || 5612 ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) { 5613 error = ETIMEDOUT; 5614 goto out; 5615 } 5616 5617 /* for now do not allow moving dirs because could cause cycles */ 5618 if ((((revp->v_type == VDIR) && (odvp != ndvp))) || 5619 (revp == odvp)) { 5620 error = ETIMEDOUT; 5621 goto out; 5622 } 5623 5624 /* if a file is being deleted because of this rename */ 5625 if (delvp) { 5626 delcp = VTOC(delvp); 5627 5628 /* if src and dest file are the same */ 5629 if (delvp == revp) { 5630 error = 0; 5631 goto out; 5632 } 5633 5634 if (delcp->c_metadata.md_flags & MD_NEEDATTRS) { 5635 error = ETIMEDOUT; 5636 goto out; 5637 } 5638 5639 /* if there are hard links to this file */ 5640 if (delcp->c_attr.va_nlink > 1) { 5641 mutex_enter(&delcp->c_statelock); 5642 if (cachefs_modified_alloc(delcp)) { 5643 mutex_exit(&delcp->c_statelock); 5644 error = ENOSPC; 5645 goto out; 5646 } 5647 5648 if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) { 5649 error = cachefs_dlog_cidmap(fscp); 5650 if (error) { 5651 mutex_exit(&delcp->c_statelock); 5652 error = ENOSPC; 5653 goto out; 5654 } 5655 delcp->c_metadata.md_flags |= MD_MAPPING; 5656 delcp->c_flags |= CN_UPDATED; 5657 } 5658 mutex_exit(&delcp->c_statelock); 5659 } 5660 5661 /* make sure we can delete file */ 5662 mutex_enter(&ndcp->c_statelock); 5663 error = cachefs_stickyrmchk(ndcp, delcp, cr); 5664 mutex_exit(&ndcp->c_statelock); 5665 if (error != 0) 5666 goto out; 5667 5668 /* 5669 * If the cnode is active, make a link to the file 5670 * so operations on the file will continue. 5671 */ 5672 dnlc_purge_vp(delvp); 5673 if ((delvp->v_type != VDIR) && 5674 !((delvp->v_count == 1) || 5675 ((delvp->v_count == 2) && delcp->c_ipending))) { 5676 error = cachefs_remove_dolink(ndvp, delvp, nnm, cr); 5677 if (error) 5678 goto out; 5679 } 5680 } 5681 5682 /* purge mappings to file in the old directory */ 5683 dnlc_purge_vp(odvp); 5684 5685 /* purge mappings in the new dir if we deleted a file */ 5686 if (delvp && (odvp != ndvp)) 5687 dnlc_purge_vp(ndvp); 5688 5689 /* find the entry in the old directory */ 5690 mutex_enter(&odcp->c_statelock); 5691 if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) { 5692 mutex_exit(&odcp->c_statelock); 5693 error = ETIMEDOUT; 5694 goto out; 5695 } 5696 cookiep = NULL; 5697 error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid); 5698 if (error == 0 || error == EINVAL) { 5699 if (error == 0) 5700 cookiep = &cookie; 5701 } else { 5702 mutex_exit(&odcp->c_statelock); 5703 if (error == ENOTDIR) 5704 error = ETIMEDOUT; 5705 goto out; 5706 } 5707 error = 0; 5708 5709 /* write the log entry */ 5710 commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr, 5711 recp, delcp); 5712 if (commit == 0) { 5713 mutex_exit(&odcp->c_statelock); 5714 error = ENOSPC; 5715 goto out; 5716 } 5717 5718 /* remove the directory entry from the old directory */ 5719 cachefs_modified(odcp); 5720 error = cachefs_dir_rmentry(odcp, onm); 5721 if (error) { 5722 mutex_exit(&odcp->c_statelock); 5723 if (error == ENOTDIR) 5724 error = ETIMEDOUT; 5725 goto out; 5726 } 5727 mutex_exit(&odcp->c_statelock); 5728 5729 /* install the directory entry in the new directory */ 5730 mutex_enter(&ndcp->c_statelock); 5731 error = ENOTDIR; 5732 if (ndcp->c_metadata.md_flags & MD_POPULATED) { 5733 ASSERT(cid.cid_fileno != 0); 5734 cachefs_modified(ndcp); 5735 error = 0; 5736 if (delvp) { 5737 error = cachefs_dir_rmentry(ndcp, nnm); 5738 } 5739 if (error == 0) { 5740 error = cachefs_dir_enter(ndcp, nnm, cookiep, 5741 &cid, SM_ASYNC); 5742 } 5743 } 5744 if (error) { 5745 cachefs_nocache(ndcp); 5746 mutex_exit(&ndcp->c_statelock); 5747 mutex_enter(&odcp->c_statelock); 5748 cachefs_nocache(odcp); 5749 mutex_exit(&odcp->c_statelock); 5750 if (error == ENOTDIR) 5751 error = ETIMEDOUT; 5752 goto out; 5753 } 5754 mutex_exit(&ndcp->c_statelock); 5755 5756 gethrestime(¤t_time); 5757 5758 /* update the file we just deleted */ 5759 if (delvp) { 5760 mutex_enter(&delcp->c_statelock); 5761 delcp->c_attr.va_nlink--; 5762 delcp->c_metadata.md_localctime = current_time; 5763 delcp->c_metadata.md_flags |= MD_LOCALCTIME; 5764 if (delcp->c_attr.va_nlink == 0) { 5765 delcp->c_flags |= CN_DESTROY; 5766 } else { 5767 delcp->c_flags |= CN_UPDATED; 5768 } 5769 mutex_exit(&delcp->c_statelock); 5770 } 5771 5772 /* update the file we renamed */ 5773 mutex_enter(&recp->c_statelock); 5774 recp->c_metadata.md_localctime = current_time; 5775 recp->c_metadata.md_flags |= MD_LOCALCTIME; 5776 recp->c_flags |= CN_UPDATED; 5777 mutex_exit(&recp->c_statelock); 5778 5779 /* update the source directory */ 5780 mutex_enter(&odcp->c_statelock); 5781 odcp->c_metadata.md_localctime = current_time; 5782 odcp->c_metadata.md_localmtime = current_time; 5783 odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5784 odcp->c_flags |= CN_UPDATED; 5785 mutex_exit(&odcp->c_statelock); 5786 5787 /* update the destination directory */ 5788 if (odcp != ndcp) { 5789 mutex_enter(&ndcp->c_statelock); 5790 ndcp->c_metadata.md_localctime = current_time; 5791 ndcp->c_metadata.md_localmtime = current_time; 5792 ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5793 ndcp->c_flags |= CN_UPDATED; 5794 mutex_exit(&ndcp->c_statelock); 5795 } 5796 5797 out: 5798 if (commit) { 5799 /* commit the log entry */ 5800 if (cachefs_dlog_commit(fscp, commit, error)) { 5801 /*EMPTY*/ 5802 /* XXX bob: fix on panic */ 5803 } 5804 } 5805 5806 if (odcp != ndcp) 5807 rw_exit(&ndcp->c_rwlock); 5808 rw_exit(&odcp->c_rwlock); 5809 5810 VN_RELE(revp); 5811 5812 return (error); 5813 } 5814 5815 /*ARGSUSED*/ 5816 static int 5817 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp, 5818 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 5819 { 5820 cnode_t *dcp = VTOC(dvp); 5821 fscache_t *fscp = C_TO_FSCACHE(dcp); 5822 cachefscache_t *cachep = fscp->fs_cache; 5823 int error = 0; 5824 int held = 0; 5825 int connected = 0; 5826 5827 #ifdef CFSDEBUG 5828 CFS_DEBUG(CFSDEBUG_VOPS) 5829 printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp); 5830 #endif 5831 5832 if (getzoneid() != GLOBAL_ZONEID) { 5833 error = EPERM; 5834 goto out; 5835 } 5836 5837 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 5838 ASSERT(dcp->c_flags & CN_NOCACHE); 5839 5840 /* 5841 * Cachefs only provides pass-through support for NFSv4, 5842 * and all vnode operations are passed through to the 5843 * back file system. For NFSv4 pass-through to work, only 5844 * connected operation is supported, the cnode backvp must 5845 * exist, and cachefs optional (eg., disconnectable) flags 5846 * are turned off. Assert these conditions to ensure that 5847 * the backfilesystem is called for the mkdir operation. 5848 */ 5849 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 5850 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 5851 5852 for (;;) { 5853 /* get (or renew) access to the file system */ 5854 if (held) { 5855 /* Won't loop with NFSv4 connected behavior */ 5856 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 5857 rw_exit(&dcp->c_rwlock); 5858 cachefs_cd_release(fscp); 5859 held = 0; 5860 } 5861 error = cachefs_cd_access(fscp, connected, 1); 5862 if (error) 5863 break; 5864 rw_enter(&dcp->c_rwlock, RW_WRITER); 5865 held = 1; 5866 5867 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5868 error = cachefs_mkdir_connected(dvp, nm, vap, 5869 vpp, cr); 5870 if (CFS_TIMEOUT(fscp, error)) { 5871 rw_exit(&dcp->c_rwlock); 5872 cachefs_cd_release(fscp); 5873 held = 0; 5874 cachefs_cd_timedout(fscp); 5875 connected = 0; 5876 continue; 5877 } 5878 } else { 5879 error = cachefs_mkdir_disconnected(dvp, nm, vap, 5880 vpp, cr); 5881 if (CFS_TIMEOUT(fscp, error)) { 5882 connected = 1; 5883 continue; 5884 } 5885 } 5886 break; 5887 } 5888 5889 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) { 5890 fid_t *fidp = NULL; 5891 ino64_t fileno = 0; 5892 cnode_t *cp = NULL; 5893 if (error == 0) 5894 cp = VTOC(*vpp); 5895 5896 if (cp != NULL) { 5897 fidp = &cp->c_metadata.md_cookie; 5898 fileno = cp->c_id.cid_fileno; 5899 } 5900 5901 cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp, 5902 fidp, fileno, crgetuid(cr)); 5903 } 5904 5905 if (held) { 5906 rw_exit(&dcp->c_rwlock); 5907 cachefs_cd_release(fscp); 5908 } 5909 if (error == 0 && CFS_ISFS_NONSHARED(fscp)) 5910 (void) cachefs_pack(dvp, nm, cr); 5911 5912 #ifdef CFS_CD_DEBUG 5913 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 5914 #endif 5915 out: 5916 #ifdef CFSDEBUG 5917 CFS_DEBUG(CFSDEBUG_VOPS) 5918 printf("cachefs_mkdir: EXIT error = %d\n", error); 5919 #endif 5920 return (error); 5921 } 5922 5923 static int 5924 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap, 5925 vnode_t **vpp, cred_t *cr) 5926 { 5927 cnode_t *newcp = NULL, *dcp = VTOC(dvp); 5928 struct vnode *vp = NULL; 5929 int error = 0; 5930 fscache_t *fscp = C_TO_FSCACHE(dcp); 5931 struct fid cookie; 5932 struct vattr attr; 5933 cfs_cid_t cid, dircid; 5934 uint32_t valid_fid; 5935 5936 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 5937 ASSERT(dcp->c_flags & CN_NOCACHE); 5938 5939 mutex_enter(&dcp->c_statelock); 5940 5941 /* get backvp of dir */ 5942 if (dcp->c_backvp == NULL) { 5943 error = cachefs_getbackvp(fscp, dcp); 5944 if (error) { 5945 mutex_exit(&dcp->c_statelock); 5946 goto out; 5947 } 5948 } 5949 5950 /* consistency check the directory */ 5951 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 5952 if (error) { 5953 mutex_exit(&dcp->c_statelock); 5954 goto out; 5955 } 5956 dircid = dcp->c_id; 5957 5958 /* make the dir on the back fs */ 5959 CFS_DPRINT_BACKFS_NFSV4(fscp, 5960 ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, " 5961 "name %s\n", dcp, dcp->c_backvp, nm)); 5962 error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL); 5963 mutex_exit(&dcp->c_statelock); 5964 if (error) { 5965 goto out; 5966 } 5967 5968 /* get the cookie and make the cnode */ 5969 attr.va_mask = AT_ALL; 5970 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 5971 error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid); 5972 if (error) { 5973 goto out; 5974 } 5975 cid.cid_flags = 0; 5976 cid.cid_fileno = attr.va_nodeid; 5977 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 5978 &attr, vp, cr, 0, &newcp); 5979 if (error) { 5980 goto out; 5981 } 5982 ASSERT(CTOV(newcp)->v_type == VDIR); 5983 *vpp = CTOV(newcp); 5984 5985 /* if the dir is populated, add the new entry */ 5986 mutex_enter(&dcp->c_statelock); 5987 if (CFS_ISFS_NONSHARED(fscp) && 5988 (dcp->c_metadata.md_flags & MD_POPULATED)) { 5989 error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id, 5990 SM_ASYNC); 5991 if (error) { 5992 cachefs_nocache(dcp); 5993 error = 0; 5994 } 5995 } 5996 dcp->c_attr.va_nlink++; 5997 dcp->c_flags |= CN_UPDATED; 5998 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 5999 mutex_exit(&dcp->c_statelock); 6000 6001 /* XXX bob: should we do a filldir here? or just add . and .. */ 6002 /* maybe should kick off an async filldir so caller does not wait */ 6003 6004 /* put the entry in the dnlc */ 6005 if (cachefs_dnlc) 6006 dnlc_enter(dvp, nm, *vpp); 6007 6008 /* save the fileno of the parent so can find the name */ 6009 if (bcmp(&newcp->c_metadata.md_parent, &dircid, 6010 sizeof (cfs_cid_t)) != 0) { 6011 mutex_enter(&newcp->c_statelock); 6012 newcp->c_metadata.md_parent = dircid; 6013 newcp->c_flags |= CN_UPDATED; 6014 mutex_exit(&newcp->c_statelock); 6015 } 6016 out: 6017 if (vp) 6018 VN_RELE(vp); 6019 6020 return (error); 6021 } 6022 6023 static int 6024 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 6025 vnode_t **vpp, cred_t *cr) 6026 { 6027 cnode_t *dcp = VTOC(dvp); 6028 fscache_t *fscp = C_TO_FSCACHE(dcp); 6029 int error; 6030 cnode_t *newcp = NULL; 6031 struct vattr va; 6032 timestruc_t current_time; 6033 off_t commit = 0; 6034 char *s; 6035 int namlen; 6036 6037 /* don't allow '/' characters in pathname component */ 6038 for (s = nm, namlen = 0; *s; s++, namlen++) 6039 if (*s == '/') 6040 return (EACCES); 6041 if (namlen == 0) 6042 return (EINVAL); 6043 6044 if (CFS_ISFS_WRITE_AROUND(fscp)) 6045 return (ETIMEDOUT); 6046 6047 mutex_enter(&dcp->c_statelock); 6048 6049 /* check permissions */ 6050 if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) { 6051 mutex_exit(&dcp->c_statelock); 6052 goto out; 6053 } 6054 6055 /* the directory front file must be populated */ 6056 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6057 error = ETIMEDOUT; 6058 mutex_exit(&dcp->c_statelock); 6059 goto out; 6060 } 6061 6062 /* make sure nm does not already exist in the directory */ 6063 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL); 6064 if (error == ENOTDIR) { 6065 error = ETIMEDOUT; 6066 mutex_exit(&dcp->c_statelock); 6067 goto out; 6068 } 6069 if (error != ENOENT) { 6070 error = EEXIST; 6071 mutex_exit(&dcp->c_statelock); 6072 goto out; 6073 } 6074 6075 /* make up a reasonable set of attributes */ 6076 cachefs_attr_setup(vap, &va, dcp, cr); 6077 va.va_type = VDIR; 6078 va.va_mode |= S_IFDIR; 6079 va.va_nlink = 2; 6080 6081 mutex_exit(&dcp->c_statelock); 6082 6083 /* create the cnode */ 6084 error = cachefs_cnode_create(fscp, &va, 0, &newcp); 6085 if (error) 6086 goto out; 6087 6088 mutex_enter(&newcp->c_statelock); 6089 6090 error = cachefs_dlog_cidmap(fscp); 6091 if (error) { 6092 mutex_exit(&newcp->c_statelock); 6093 goto out; 6094 } 6095 6096 cachefs_creategid(dcp, newcp, vap, cr); 6097 mutex_enter(&dcp->c_statelock); 6098 cachefs_createacl(dcp, newcp); 6099 mutex_exit(&dcp->c_statelock); 6100 gethrestime(¤t_time); 6101 newcp->c_metadata.md_vattr.va_atime = current_time; 6102 newcp->c_metadata.md_localctime = current_time; 6103 newcp->c_metadata.md_localmtime = current_time; 6104 newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME | 6105 MD_LOCALCTIME; 6106 newcp->c_flags |= CN_UPDATED; 6107 6108 /* make a front file for the new directory, add . and .. */ 6109 error = cachefs_dir_new(dcp, newcp); 6110 if (error) { 6111 mutex_exit(&newcp->c_statelock); 6112 goto out; 6113 } 6114 cachefs_modified(newcp); 6115 6116 /* 6117 * write the metadata now rather than waiting until 6118 * inactive so that if there's no space we can let 6119 * the caller know. 6120 */ 6121 ASSERT(newcp->c_frontvp); 6122 ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0); 6123 ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0); 6124 error = filegrp_write_metadata(newcp->c_filegrp, 6125 &newcp->c_id, &newcp->c_metadata); 6126 if (error) { 6127 mutex_exit(&newcp->c_statelock); 6128 goto out; 6129 } 6130 mutex_exit(&newcp->c_statelock); 6131 6132 /* log the operation */ 6133 commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr); 6134 if (commit == 0) { 6135 error = ENOSPC; 6136 goto out; 6137 } 6138 6139 mutex_enter(&dcp->c_statelock); 6140 6141 /* make sure directory is still populated */ 6142 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6143 mutex_exit(&dcp->c_statelock); 6144 error = ETIMEDOUT; 6145 goto out; 6146 } 6147 cachefs_modified(dcp); 6148 6149 /* enter the new file in the directory */ 6150 error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie, 6151 &newcp->c_id, SM_ASYNC); 6152 if (error) { 6153 mutex_exit(&dcp->c_statelock); 6154 goto out; 6155 } 6156 6157 /* update parent dir times */ 6158 dcp->c_metadata.md_localctime = current_time; 6159 dcp->c_metadata.md_localmtime = current_time; 6160 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 6161 dcp->c_attr.va_nlink++; 6162 dcp->c_flags |= CN_UPDATED; 6163 mutex_exit(&dcp->c_statelock); 6164 6165 out: 6166 if (commit) { 6167 /* commit the log entry */ 6168 if (cachefs_dlog_commit(fscp, commit, error)) { 6169 /*EMPTY*/ 6170 /* XXX bob: fix on panic */ 6171 } 6172 } 6173 if (error) { 6174 if (newcp) { 6175 mutex_enter(&newcp->c_statelock); 6176 newcp->c_flags |= CN_DESTROY; 6177 mutex_exit(&newcp->c_statelock); 6178 VN_RELE(CTOV(newcp)); 6179 } 6180 } else { 6181 *vpp = CTOV(newcp); 6182 } 6183 return (error); 6184 } 6185 6186 /*ARGSUSED*/ 6187 static int 6188 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 6189 caller_context_t *ct, int flags) 6190 { 6191 cnode_t *dcp = VTOC(dvp); 6192 fscache_t *fscp = C_TO_FSCACHE(dcp); 6193 cachefscache_t *cachep = fscp->fs_cache; 6194 int error = 0; 6195 int held = 0; 6196 int connected = 0; 6197 size_t namlen; 6198 vnode_t *vp = NULL; 6199 int vfslock = 0; 6200 6201 #ifdef CFSDEBUG 6202 CFS_DEBUG(CFSDEBUG_VOPS) 6203 printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp); 6204 #endif 6205 6206 if (getzoneid() != GLOBAL_ZONEID) { 6207 error = EPERM; 6208 goto out; 6209 } 6210 6211 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 6212 ASSERT(dcp->c_flags & CN_NOCACHE); 6213 6214 /* 6215 * Cachefs only provides pass-through support for NFSv4, 6216 * and all vnode operations are passed through to the 6217 * back file system. For NFSv4 pass-through to work, only 6218 * connected operation is supported, the cnode backvp must 6219 * exist, and cachefs optional (eg., disconnectable) flags 6220 * are turned off. Assert these conditions to ensure that 6221 * the backfilesystem is called for the rmdir operation. 6222 */ 6223 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6224 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6225 6226 for (;;) { 6227 if (vfslock) { 6228 vn_vfsunlock(vp); 6229 vfslock = 0; 6230 } 6231 if (vp) { 6232 VN_RELE(vp); 6233 vp = NULL; 6234 } 6235 6236 /* get (or renew) access to the file system */ 6237 if (held) { 6238 /* Won't loop with NFSv4 connected behavior */ 6239 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6240 cachefs_cd_release(fscp); 6241 held = 0; 6242 } 6243 error = cachefs_cd_access(fscp, connected, 1); 6244 if (error) 6245 break; 6246 held = 1; 6247 6248 /* if disconnected, do some extra error checking */ 6249 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 6250 /* check permissions */ 6251 mutex_enter(&dcp->c_statelock); 6252 error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr); 6253 mutex_exit(&dcp->c_statelock); 6254 if (CFS_TIMEOUT(fscp, error)) { 6255 connected = 1; 6256 continue; 6257 } 6258 if (error) 6259 break; 6260 6261 namlen = strlen(nm); 6262 if (namlen == 0) { 6263 error = EINVAL; 6264 break; 6265 } 6266 6267 /* cannot remove . and .. */ 6268 if (nm[0] == '.') { 6269 if (namlen == 1) { 6270 error = EINVAL; 6271 break; 6272 } else if (namlen == 2 && nm[1] == '.') { 6273 error = EEXIST; 6274 break; 6275 } 6276 } 6277 6278 } 6279 6280 /* get the cnode of the dir to remove */ 6281 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 6282 if (error) { 6283 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6284 if (CFS_TIMEOUT(fscp, error)) { 6285 cachefs_cd_release(fscp); 6286 held = 0; 6287 cachefs_cd_timedout(fscp); 6288 connected = 0; 6289 continue; 6290 } 6291 } else { 6292 if (CFS_TIMEOUT(fscp, error)) { 6293 connected = 1; 6294 continue; 6295 } 6296 } 6297 break; 6298 } 6299 6300 /* must be a dir */ 6301 if (vp->v_type != VDIR) { 6302 error = ENOTDIR; 6303 break; 6304 } 6305 6306 /* must not be current dir */ 6307 if (VOP_CMP(vp, cdir, ct)) { 6308 error = EINVAL; 6309 break; 6310 } 6311 6312 /* see ufs_dirremove for why this is done, mount race */ 6313 if (vn_vfswlock(vp)) { 6314 error = EBUSY; 6315 break; 6316 } 6317 vfslock = 1; 6318 if (vn_mountedvfs(vp) != NULL) { 6319 error = EBUSY; 6320 break; 6321 } 6322 6323 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6324 error = cachefs_rmdir_connected(dvp, nm, cdir, 6325 cr, vp); 6326 if (CFS_TIMEOUT(fscp, error)) { 6327 cachefs_cd_release(fscp); 6328 held = 0; 6329 cachefs_cd_timedout(fscp); 6330 connected = 0; 6331 continue; 6332 } 6333 } else { 6334 error = cachefs_rmdir_disconnected(dvp, nm, cdir, 6335 cr, vp); 6336 if (CFS_TIMEOUT(fscp, error)) { 6337 connected = 1; 6338 continue; 6339 } 6340 } 6341 break; 6342 } 6343 6344 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) { 6345 ino64_t fileno = 0; 6346 fid_t *fidp = NULL; 6347 cnode_t *cp = NULL; 6348 if (vp) 6349 cp = VTOC(vp); 6350 6351 if (cp != NULL) { 6352 fidp = &cp->c_metadata.md_cookie; 6353 fileno = cp->c_id.cid_fileno; 6354 } 6355 6356 cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp, 6357 fidp, fileno, crgetuid(cr)); 6358 } 6359 6360 if (held) { 6361 cachefs_cd_release(fscp); 6362 } 6363 6364 if (vfslock) 6365 vn_vfsunlock(vp); 6366 6367 if (vp) 6368 VN_RELE(vp); 6369 6370 #ifdef CFS_CD_DEBUG 6371 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 6372 #endif 6373 out: 6374 #ifdef CFSDEBUG 6375 CFS_DEBUG(CFSDEBUG_VOPS) 6376 printf("cachefs_rmdir: EXIT error = %d\n", error); 6377 #endif 6378 6379 return (error); 6380 } 6381 6382 static int 6383 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 6384 vnode_t *vp) 6385 { 6386 cnode_t *dcp = VTOC(dvp); 6387 cnode_t *cp = VTOC(vp); 6388 int error = 0; 6389 fscache_t *fscp = C_TO_FSCACHE(dcp); 6390 6391 rw_enter(&dcp->c_rwlock, RW_WRITER); 6392 mutex_enter(&dcp->c_statelock); 6393 mutex_enter(&cp->c_statelock); 6394 6395 if (dcp->c_backvp == NULL) { 6396 error = cachefs_getbackvp(fscp, dcp); 6397 if (error) { 6398 goto out; 6399 } 6400 } 6401 6402 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 6403 if (error) 6404 goto out; 6405 6406 /* rmdir on the back fs */ 6407 CFS_DPRINT_BACKFS_NFSV4(fscp, 6408 ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, " 6409 "name %s\n", dcp, dcp->c_backvp, nm)); 6410 error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0); 6411 if (error) 6412 goto out; 6413 6414 /* if the dir is populated, remove the entry from it */ 6415 if (CFS_ISFS_NONSHARED(fscp) && 6416 (dcp->c_metadata.md_flags & MD_POPULATED)) { 6417 error = cachefs_dir_rmentry(dcp, nm); 6418 if (error) { 6419 cachefs_nocache(dcp); 6420 error = 0; 6421 } 6422 } 6423 6424 /* 6425 * *if* the (hard) link count goes to 0, then we set the CDESTROY 6426 * flag on the cnode. The cached object will then be destroyed 6427 * at inactive time where the chickens come home to roost :-) 6428 * The link cnt for directories is bumped down by 2 'cause the "." 6429 * entry has to be elided too ! The link cnt for the parent goes down 6430 * by 1 (because of ".."). 6431 */ 6432 cp->c_attr.va_nlink -= 2; 6433 dcp->c_attr.va_nlink--; 6434 if (cp->c_attr.va_nlink == 0) { 6435 cp->c_flags |= CN_DESTROY; 6436 } else { 6437 cp->c_flags |= CN_UPDATED; 6438 } 6439 dcp->c_flags |= CN_UPDATED; 6440 6441 dnlc_purge_vp(vp); 6442 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 6443 6444 out: 6445 mutex_exit(&cp->c_statelock); 6446 mutex_exit(&dcp->c_statelock); 6447 rw_exit(&dcp->c_rwlock); 6448 6449 return (error); 6450 } 6451 6452 static int 6453 /*ARGSUSED*/ 6454 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir, 6455 cred_t *cr, vnode_t *vp) 6456 { 6457 cnode_t *dcp = VTOC(dvp); 6458 cnode_t *cp = VTOC(vp); 6459 fscache_t *fscp = C_TO_FSCACHE(dcp); 6460 int error = 0; 6461 off_t commit = 0; 6462 timestruc_t current_time; 6463 6464 if (CFS_ISFS_WRITE_AROUND(fscp)) 6465 return (ETIMEDOUT); 6466 6467 rw_enter(&dcp->c_rwlock, RW_WRITER); 6468 mutex_enter(&dcp->c_statelock); 6469 mutex_enter(&cp->c_statelock); 6470 6471 /* both directories must be populated */ 6472 if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) || 6473 ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) { 6474 error = ETIMEDOUT; 6475 goto out; 6476 } 6477 6478 /* if sticky bit set on the dir, more access checks to perform */ 6479 if (error = cachefs_stickyrmchk(dcp, cp, cr)) { 6480 goto out; 6481 } 6482 6483 /* make sure dir is empty */ 6484 if (cp->c_attr.va_nlink > 2) { 6485 error = cachefs_dir_empty(cp); 6486 if (error) { 6487 if (error == ENOTDIR) 6488 error = ETIMEDOUT; 6489 goto out; 6490 } 6491 cachefs_modified(cp); 6492 } 6493 cachefs_modified(dcp); 6494 6495 /* log the operation */ 6496 commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr); 6497 if (commit == 0) { 6498 error = ENOSPC; 6499 goto out; 6500 } 6501 6502 /* remove name from parent dir */ 6503 error = cachefs_dir_rmentry(dcp, nm); 6504 if (error == ENOTDIR) { 6505 error = ETIMEDOUT; 6506 goto out; 6507 } 6508 if (error) 6509 goto out; 6510 6511 gethrestime(¤t_time); 6512 6513 /* update deleted dir values */ 6514 cp->c_attr.va_nlink -= 2; 6515 if (cp->c_attr.va_nlink == 0) 6516 cp->c_flags |= CN_DESTROY; 6517 else { 6518 cp->c_metadata.md_localctime = current_time; 6519 cp->c_metadata.md_flags |= MD_LOCALCTIME; 6520 cp->c_flags |= CN_UPDATED; 6521 } 6522 6523 /* update parent values */ 6524 dcp->c_metadata.md_localctime = current_time; 6525 dcp->c_metadata.md_localmtime = current_time; 6526 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 6527 dcp->c_attr.va_nlink--; 6528 dcp->c_flags |= CN_UPDATED; 6529 6530 out: 6531 mutex_exit(&cp->c_statelock); 6532 mutex_exit(&dcp->c_statelock); 6533 rw_exit(&dcp->c_rwlock); 6534 if (commit) { 6535 /* commit the log entry */ 6536 if (cachefs_dlog_commit(fscp, commit, error)) { 6537 /*EMPTY*/ 6538 /* XXX bob: fix on panic */ 6539 } 6540 dnlc_purge_vp(vp); 6541 } 6542 return (error); 6543 } 6544 6545 /*ARGSUSED*/ 6546 static int 6547 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva, 6548 char *tnm, cred_t *cr, caller_context_t *ct, int flags) 6549 { 6550 cnode_t *dcp = VTOC(dvp); 6551 fscache_t *fscp = C_TO_FSCACHE(dcp); 6552 cachefscache_t *cachep = fscp->fs_cache; 6553 int error = 0; 6554 int held = 0; 6555 int connected = 0; 6556 6557 #ifdef CFSDEBUG 6558 CFS_DEBUG(CFSDEBUG_VOPS) 6559 printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n", 6560 (void *)dvp, lnm, tnm); 6561 #endif 6562 6563 if (getzoneid() != GLOBAL_ZONEID) { 6564 error = EPERM; 6565 goto out; 6566 } 6567 6568 if (fscp->fs_cache->c_flags & CACHE_NOCACHE) 6569 ASSERT(dcp->c_flags & CN_NOCACHE); 6570 6571 /* 6572 * Cachefs only provides pass-through support for NFSv4, 6573 * and all vnode operations are passed through to the 6574 * back file system. For NFSv4 pass-through to work, only 6575 * connected operation is supported, the cnode backvp must 6576 * exist, and cachefs optional (eg., disconnectable) flags 6577 * are turned off. Assert these conditions to ensure that 6578 * the backfilesystem is called for the symlink operation. 6579 */ 6580 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6581 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6582 6583 for (;;) { 6584 /* get (or renew) access to the file system */ 6585 if (held) { 6586 /* Won't loop with NFSv4 connected behavior */ 6587 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6588 rw_exit(&dcp->c_rwlock); 6589 cachefs_cd_release(fscp); 6590 held = 0; 6591 } 6592 error = cachefs_cd_access(fscp, connected, 1); 6593 if (error) 6594 break; 6595 rw_enter(&dcp->c_rwlock, RW_WRITER); 6596 held = 1; 6597 6598 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6599 error = cachefs_symlink_connected(dvp, lnm, tva, 6600 tnm, cr); 6601 if (CFS_TIMEOUT(fscp, error)) { 6602 rw_exit(&dcp->c_rwlock); 6603 cachefs_cd_release(fscp); 6604 held = 0; 6605 cachefs_cd_timedout(fscp); 6606 connected = 0; 6607 continue; 6608 } 6609 } else { 6610 error = cachefs_symlink_disconnected(dvp, lnm, tva, 6611 tnm, cr); 6612 if (CFS_TIMEOUT(fscp, error)) { 6613 connected = 1; 6614 continue; 6615 } 6616 } 6617 break; 6618 } 6619 6620 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK)) 6621 cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp, 6622 &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno, 6623 crgetuid(cr), (uint_t)strlen(tnm)); 6624 6625 if (held) { 6626 rw_exit(&dcp->c_rwlock); 6627 cachefs_cd_release(fscp); 6628 } 6629 6630 #ifdef CFS_CD_DEBUG 6631 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 6632 #endif 6633 out: 6634 #ifdef CFSDEBUG 6635 CFS_DEBUG(CFSDEBUG_VOPS) 6636 printf("cachefs_symlink: EXIT error = %d\n", error); 6637 #endif 6638 return (error); 6639 } 6640 6641 static int 6642 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva, 6643 char *tnm, cred_t *cr) 6644 { 6645 cnode_t *dcp = VTOC(dvp); 6646 fscache_t *fscp = C_TO_FSCACHE(dcp); 6647 int error = 0; 6648 vnode_t *backvp = NULL; 6649 cnode_t *newcp = NULL; 6650 struct vattr va; 6651 struct fid cookie; 6652 cfs_cid_t cid; 6653 uint32_t valid_fid; 6654 6655 mutex_enter(&dcp->c_statelock); 6656 6657 if (dcp->c_backvp == NULL) { 6658 error = cachefs_getbackvp(fscp, dcp); 6659 if (error) { 6660 cachefs_nocache(dcp); 6661 mutex_exit(&dcp->c_statelock); 6662 goto out; 6663 } 6664 } 6665 6666 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 6667 if (error) { 6668 mutex_exit(&dcp->c_statelock); 6669 goto out; 6670 } 6671 CFS_DPRINT_BACKFS_NFSV4(fscp, 6672 ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, " 6673 "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm)); 6674 error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0); 6675 if (error) { 6676 mutex_exit(&dcp->c_statelock); 6677 goto out; 6678 } 6679 if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 && 6680 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 6681 cachefs_nocache(dcp); 6682 mutex_exit(&dcp->c_statelock); 6683 goto out; 6684 } 6685 6686 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 6687 6688 /* lookup the symlink we just created and get its fid and attrs */ 6689 (void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr, 6690 NULL, NULL, NULL); 6691 if (backvp == NULL) { 6692 if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0) 6693 cachefs_nocache(dcp); 6694 mutex_exit(&dcp->c_statelock); 6695 goto out; 6696 } 6697 6698 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 6699 error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid); 6700 if (error) { 6701 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6702 error = 0; 6703 cachefs_nocache(dcp); 6704 mutex_exit(&dcp->c_statelock); 6705 goto out; 6706 } 6707 cid.cid_fileno = va.va_nodeid; 6708 cid.cid_flags = 0; 6709 6710 /* if the dir is cached, add the symlink to it */ 6711 if (CFS_ISFS_NONSHARED(fscp) && 6712 (dcp->c_metadata.md_flags & MD_POPULATED)) { 6713 error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC); 6714 if (error) { 6715 cachefs_nocache(dcp); 6716 error = 0; 6717 } 6718 } 6719 mutex_exit(&dcp->c_statelock); 6720 6721 /* make the cnode for the sym link */ 6722 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 6723 &va, backvp, cr, 0, &newcp); 6724 if (error) { 6725 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6726 cachefs_nocache(dcp); 6727 error = 0; 6728 goto out; 6729 } 6730 6731 /* try to cache the symlink contents */ 6732 rw_enter(&newcp->c_rwlock, RW_WRITER); 6733 mutex_enter(&newcp->c_statelock); 6734 6735 /* 6736 * try to cache the sym link, note that its a noop if NOCACHE 6737 * or NFSv4 is set 6738 */ 6739 error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size); 6740 if (error) { 6741 cachefs_nocache(newcp); 6742 error = 0; 6743 } 6744 mutex_exit(&newcp->c_statelock); 6745 rw_exit(&newcp->c_rwlock); 6746 6747 out: 6748 if (backvp) 6749 VN_RELE(backvp); 6750 if (newcp) 6751 VN_RELE(CTOV(newcp)); 6752 return (error); 6753 } 6754 6755 static int 6756 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva, 6757 char *tnm, cred_t *cr) 6758 { 6759 cnode_t *dcp = VTOC(dvp); 6760 fscache_t *fscp = C_TO_FSCACHE(dcp); 6761 int error; 6762 cnode_t *newcp = NULL; 6763 struct vattr va; 6764 timestruc_t current_time; 6765 off_t commit = 0; 6766 6767 if (CFS_ISFS_WRITE_AROUND(fscp)) 6768 return (ETIMEDOUT); 6769 6770 mutex_enter(&dcp->c_statelock); 6771 6772 /* check permissions */ 6773 if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) { 6774 mutex_exit(&dcp->c_statelock); 6775 goto out; 6776 } 6777 6778 /* the directory front file must be populated */ 6779 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6780 error = ETIMEDOUT; 6781 mutex_exit(&dcp->c_statelock); 6782 goto out; 6783 } 6784 6785 /* make sure lnm does not already exist in the directory */ 6786 error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL); 6787 if (error == ENOTDIR) { 6788 error = ETIMEDOUT; 6789 mutex_exit(&dcp->c_statelock); 6790 goto out; 6791 } 6792 if (error != ENOENT) { 6793 error = EEXIST; 6794 mutex_exit(&dcp->c_statelock); 6795 goto out; 6796 } 6797 6798 /* make up a reasonable set of attributes */ 6799 cachefs_attr_setup(tva, &va, dcp, cr); 6800 va.va_type = VLNK; 6801 va.va_mode |= S_IFLNK; 6802 va.va_size = strlen(tnm); 6803 6804 mutex_exit(&dcp->c_statelock); 6805 6806 /* create the cnode */ 6807 error = cachefs_cnode_create(fscp, &va, 0, &newcp); 6808 if (error) 6809 goto out; 6810 6811 rw_enter(&newcp->c_rwlock, RW_WRITER); 6812 mutex_enter(&newcp->c_statelock); 6813 6814 error = cachefs_dlog_cidmap(fscp); 6815 if (error) { 6816 mutex_exit(&newcp->c_statelock); 6817 rw_exit(&newcp->c_rwlock); 6818 error = ENOSPC; 6819 goto out; 6820 } 6821 6822 cachefs_creategid(dcp, newcp, tva, cr); 6823 mutex_enter(&dcp->c_statelock); 6824 cachefs_createacl(dcp, newcp); 6825 mutex_exit(&dcp->c_statelock); 6826 gethrestime(¤t_time); 6827 newcp->c_metadata.md_vattr.va_atime = current_time; 6828 newcp->c_metadata.md_localctime = current_time; 6829 newcp->c_metadata.md_localmtime = current_time; 6830 newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME | 6831 MD_LOCALCTIME; 6832 newcp->c_flags |= CN_UPDATED; 6833 6834 /* log the operation */ 6835 commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr); 6836 if (commit == 0) { 6837 mutex_exit(&newcp->c_statelock); 6838 rw_exit(&newcp->c_rwlock); 6839 error = ENOSPC; 6840 goto out; 6841 } 6842 6843 /* store the symlink contents */ 6844 error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size); 6845 if (error) { 6846 mutex_exit(&newcp->c_statelock); 6847 rw_exit(&newcp->c_rwlock); 6848 goto out; 6849 } 6850 if (cachefs_modified_alloc(newcp)) { 6851 mutex_exit(&newcp->c_statelock); 6852 rw_exit(&newcp->c_rwlock); 6853 error = ENOSPC; 6854 goto out; 6855 } 6856 6857 /* 6858 * write the metadata now rather than waiting until 6859 * inactive so that if there's no space we can let 6860 * the caller know. 6861 */ 6862 if (newcp->c_flags & CN_ALLOC_PENDING) { 6863 if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 6864 (void) filegrp_allocattr(newcp->c_filegrp); 6865 } 6866 error = filegrp_create_metadata(newcp->c_filegrp, 6867 &newcp->c_metadata, &newcp->c_id); 6868 if (error) { 6869 mutex_exit(&newcp->c_statelock); 6870 rw_exit(&newcp->c_rwlock); 6871 goto out; 6872 } 6873 newcp->c_flags &= ~CN_ALLOC_PENDING; 6874 } 6875 error = filegrp_write_metadata(newcp->c_filegrp, 6876 &newcp->c_id, &newcp->c_metadata); 6877 if (error) { 6878 mutex_exit(&newcp->c_statelock); 6879 rw_exit(&newcp->c_rwlock); 6880 goto out; 6881 } 6882 mutex_exit(&newcp->c_statelock); 6883 rw_exit(&newcp->c_rwlock); 6884 6885 mutex_enter(&dcp->c_statelock); 6886 6887 /* enter the new file in the directory */ 6888 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6889 error = ETIMEDOUT; 6890 mutex_exit(&dcp->c_statelock); 6891 goto out; 6892 } 6893 cachefs_modified(dcp); 6894 error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie, 6895 &newcp->c_id, SM_ASYNC); 6896 if (error) { 6897 mutex_exit(&dcp->c_statelock); 6898 goto out; 6899 } 6900 6901 /* update parent dir times */ 6902 dcp->c_metadata.md_localctime = current_time; 6903 dcp->c_metadata.md_localmtime = current_time; 6904 dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME; 6905 dcp->c_flags |= CN_UPDATED; 6906 mutex_exit(&dcp->c_statelock); 6907 6908 out: 6909 if (commit) { 6910 /* commit the log entry */ 6911 if (cachefs_dlog_commit(fscp, commit, error)) { 6912 /*EMPTY*/ 6913 /* XXX bob: fix on panic */ 6914 } 6915 } 6916 6917 if (error) { 6918 if (newcp) { 6919 mutex_enter(&newcp->c_statelock); 6920 newcp->c_flags |= CN_DESTROY; 6921 mutex_exit(&newcp->c_statelock); 6922 } 6923 } 6924 if (newcp) { 6925 VN_RELE(CTOV(newcp)); 6926 } 6927 6928 return (error); 6929 } 6930 6931 /*ARGSUSED*/ 6932 static int 6933 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, 6934 caller_context_t *ct, int flags) 6935 { 6936 cnode_t *dcp = VTOC(vp); 6937 fscache_t *fscp = C_TO_FSCACHE(dcp); 6938 cachefscache_t *cachep = fscp->fs_cache; 6939 int error = 0; 6940 int held = 0; 6941 int connected = 0; 6942 6943 #ifdef CFSDEBUG 6944 CFS_DEBUG(CFSDEBUG_VOPS) 6945 printf("cachefs_readdir: ENTER vp %p\n", (void *)vp); 6946 #endif 6947 if (getzoneid() != GLOBAL_ZONEID) { 6948 error = EPERM; 6949 goto out; 6950 } 6951 6952 /* 6953 * Cachefs only provides pass-through support for NFSv4, 6954 * and all vnode operations are passed through to the 6955 * back file system. For NFSv4 pass-through to work, only 6956 * connected operation is supported, the cnode backvp must 6957 * exist, and cachefs optional (eg., disconnectable) flags 6958 * are turned off. Assert these conditions to ensure that 6959 * the backfilesystem is called for the readdir operation. 6960 */ 6961 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6962 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6963 6964 for (;;) { 6965 /* get (or renew) access to the file system */ 6966 if (held) { 6967 /* Won't loop with NFSv4 connected behavior */ 6968 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6969 rw_exit(&dcp->c_rwlock); 6970 cachefs_cd_release(fscp); 6971 held = 0; 6972 } 6973 error = cachefs_cd_access(fscp, connected, 0); 6974 if (error) 6975 break; 6976 rw_enter(&dcp->c_rwlock, RW_READER); 6977 held = 1; 6978 6979 /* quit if link count of zero (posix) */ 6980 if (dcp->c_attr.va_nlink == 0) { 6981 if (eofp) 6982 *eofp = 1; 6983 error = 0; 6984 break; 6985 } 6986 6987 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6988 error = cachefs_readdir_connected(vp, uiop, cr, 6989 eofp); 6990 if (CFS_TIMEOUT(fscp, error)) { 6991 rw_exit(&dcp->c_rwlock); 6992 cachefs_cd_release(fscp); 6993 held = 0; 6994 cachefs_cd_timedout(fscp); 6995 connected = 0; 6996 continue; 6997 } 6998 } else { 6999 error = cachefs_readdir_disconnected(vp, uiop, cr, 7000 eofp); 7001 if (CFS_TIMEOUT(fscp, error)) { 7002 if (cachefs_cd_access_miss(fscp)) { 7003 error = cachefs_readdir_connected(vp, 7004 uiop, cr, eofp); 7005 if (!CFS_TIMEOUT(fscp, error)) 7006 break; 7007 delay(5*hz); 7008 connected = 0; 7009 continue; 7010 } 7011 connected = 1; 7012 continue; 7013 } 7014 } 7015 break; 7016 } 7017 7018 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR)) 7019 cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp, 7020 &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno, 7021 crgetuid(cr), uiop->uio_loffset, *eofp); 7022 7023 if (held) { 7024 rw_exit(&dcp->c_rwlock); 7025 cachefs_cd_release(fscp); 7026 } 7027 7028 #ifdef CFS_CD_DEBUG 7029 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7030 #endif 7031 out: 7032 #ifdef CFSDEBUG 7033 CFS_DEBUG(CFSDEBUG_VOPS) 7034 printf("cachefs_readdir: EXIT error = %d\n", error); 7035 #endif 7036 7037 return (error); 7038 } 7039 7040 static int 7041 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp) 7042 { 7043 cnode_t *dcp = VTOC(vp); 7044 int error; 7045 fscache_t *fscp = C_TO_FSCACHE(dcp); 7046 struct cachefs_req *rp; 7047 7048 mutex_enter(&dcp->c_statelock); 7049 7050 /* check directory consistency */ 7051 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 7052 if (error) 7053 goto out; 7054 dcp->c_usage++; 7055 7056 /* if dir was modified, toss old contents */ 7057 if (dcp->c_metadata.md_flags & MD_INVALREADDIR) { 7058 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7059 cachefs_inval_object(dcp); 7060 } 7061 7062 error = 0; 7063 if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) && 7064 ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) && 7065 !CFS_ISFS_BACKFS_NFSV4(fscp) && 7066 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) { 7067 7068 if (cachefs_async_okay()) { 7069 7070 /* 7071 * Set up asynchronous request to fill this 7072 * directory. 7073 */ 7074 7075 dcp->c_flags |= CN_ASYNC_POPULATE; 7076 7077 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 7078 rp->cfs_cmd = CFS_POPULATE; 7079 rp->cfs_req_u.cu_populate.cpop_vp = vp; 7080 rp->cfs_cr = cr; 7081 7082 crhold(cr); 7083 VN_HOLD(vp); 7084 7085 cachefs_addqueue(rp, &fscp->fs_workq); 7086 } else { 7087 error = cachefs_dir_fill(dcp, cr); 7088 if (error != 0) 7089 cachefs_nocache(dcp); 7090 } 7091 } 7092 7093 /* if front file is populated */ 7094 if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) && 7095 !CFS_ISFS_BACKFS_NFSV4(fscp) && 7096 (dcp->c_metadata.md_flags & MD_POPULATED)) { 7097 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7098 error = cachefs_dir_read(dcp, uiop, eofp); 7099 if (error == 0) 7100 fscp->fs_stats.st_hits++; 7101 } 7102 7103 /* if front file could not be used */ 7104 if ((error != 0) || 7105 CFS_ISFS_BACKFS_NFSV4(fscp) || 7106 (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) || 7107 ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) { 7108 7109 if (error && !(dcp->c_flags & CN_NOCACHE) && 7110 !CFS_ISFS_BACKFS_NFSV4(fscp)) 7111 cachefs_nocache(dcp); 7112 7113 /* get the back vp */ 7114 if (dcp->c_backvp == NULL) { 7115 error = cachefs_getbackvp(fscp, dcp); 7116 if (error) 7117 goto out; 7118 } 7119 7120 if (fscp->fs_inum_size > 0) { 7121 error = cachefs_readback_translate(dcp, uiop, cr, eofp); 7122 } else { 7123 /* do the dir read from the back fs */ 7124 (void) VOP_RWLOCK(dcp->c_backvp, 7125 V_WRITELOCK_FALSE, NULL); 7126 CFS_DPRINT_BACKFS_NFSV4(fscp, 7127 ("cachefs_readdir (nfsv4): " 7128 "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp)); 7129 error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp, 7130 NULL, 0); 7131 VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL); 7132 } 7133 7134 if (error == 0) 7135 fscp->fs_stats.st_misses++; 7136 } 7137 7138 out: 7139 mutex_exit(&dcp->c_statelock); 7140 7141 return (error); 7142 } 7143 7144 static int 7145 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp) 7146 { 7147 int error = 0; 7148 fscache_t *fscp = C_TO_FSCACHE(cp); 7149 caddr_t buffy = NULL; 7150 int buffysize = MAXBSIZE; 7151 caddr_t chrp, end; 7152 ino64_t newinum; 7153 struct dirent64 *de; 7154 uio_t uioin; 7155 iovec_t iov; 7156 7157 ASSERT(cp->c_backvp != NULL); 7158 ASSERT(fscp->fs_inum_size > 0); 7159 7160 if (uiop->uio_resid < buffysize) 7161 buffysize = (int)uiop->uio_resid; 7162 buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP); 7163 7164 iov.iov_base = buffy; 7165 iov.iov_len = buffysize; 7166 uioin.uio_iov = &iov; 7167 uioin.uio_iovcnt = 1; 7168 uioin.uio_segflg = UIO_SYSSPACE; 7169 uioin.uio_fmode = 0; 7170 uioin.uio_extflg = UIO_COPY_CACHED; 7171 uioin.uio_loffset = uiop->uio_loffset; 7172 uioin.uio_resid = buffysize; 7173 7174 (void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL); 7175 error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0); 7176 VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL); 7177 7178 if (error != 0) 7179 goto out; 7180 7181 end = buffy + buffysize - uioin.uio_resid; 7182 7183 mutex_exit(&cp->c_statelock); 7184 mutex_enter(&fscp->fs_fslock); 7185 7186 7187 for (chrp = buffy; chrp < end; chrp += de->d_reclen) { 7188 de = (dirent64_t *)chrp; 7189 newinum = cachefs_inum_real2fake(fscp, de->d_ino); 7190 if (newinum == 0) 7191 newinum = cachefs_fileno_conflict(fscp, de->d_ino); 7192 de->d_ino = newinum; 7193 } 7194 mutex_exit(&fscp->fs_fslock); 7195 mutex_enter(&cp->c_statelock); 7196 7197 error = uiomove(buffy, end - buffy, UIO_READ, uiop); 7198 uiop->uio_loffset = uioin.uio_loffset; 7199 7200 out: 7201 7202 if (buffy != NULL) 7203 cachefs_kmem_free(buffy, buffysize); 7204 7205 return (error); 7206 } 7207 7208 static int 7209 /*ARGSUSED*/ 7210 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr, 7211 int *eofp) 7212 { 7213 cnode_t *dcp = VTOC(vp); 7214 int error; 7215 7216 mutex_enter(&dcp->c_statelock); 7217 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 7218 error = ETIMEDOUT; 7219 } else { 7220 error = cachefs_dir_read(dcp, uiop, eofp); 7221 if (error == ENOTDIR) 7222 error = ETIMEDOUT; 7223 } 7224 mutex_exit(&dcp->c_statelock); 7225 7226 return (error); 7227 } 7228 7229 /*ARGSUSED*/ 7230 static int 7231 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 7232 { 7233 int error = 0; 7234 struct cnode *cp = VTOC(vp); 7235 fscache_t *fscp = C_TO_FSCACHE(cp); 7236 7237 /* 7238 * Cachefs only provides pass-through support for NFSv4, 7239 * and all vnode operations are passed through to the 7240 * back file system. For NFSv4 pass-through to work, only 7241 * connected operation is supported, the cnode backvp must 7242 * exist, and cachefs optional (eg., disconnectable) flags 7243 * are turned off. Assert these conditions, then bail 7244 * as NFSv4 doesn't support VOP_FID. 7245 */ 7246 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7247 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7248 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7249 return (ENOTSUP); 7250 } 7251 7252 mutex_enter(&cp->c_statelock); 7253 if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) { 7254 fidp->fid_len = cp->c_metadata.md_cookie.fid_len; 7255 error = ENOSPC; 7256 } else { 7257 bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data, 7258 cp->c_metadata.md_cookie.fid_len); 7259 fidp->fid_len = cp->c_metadata.md_cookie.fid_len; 7260 } 7261 mutex_exit(&cp->c_statelock); 7262 return (error); 7263 } 7264 7265 /* ARGSUSED2 */ 7266 static int 7267 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 7268 { 7269 cnode_t *cp = VTOC(vp); 7270 7271 /* 7272 * XXX - This is ifdef'ed out for now. The problem - 7273 * getdents() acquires the read version of rwlock, then we come 7274 * into cachefs_readdir() and that wants to acquire the write version 7275 * of this lock (if its going to populate the directory). This is 7276 * a problem, this can be solved by introducing another lock in the 7277 * cnode. 7278 */ 7279 /* XXX */ 7280 if (vp->v_type != VREG) 7281 return (-1); 7282 if (write_lock) 7283 rw_enter(&cp->c_rwlock, RW_WRITER); 7284 else 7285 rw_enter(&cp->c_rwlock, RW_READER); 7286 return (write_lock); 7287 } 7288 7289 /* ARGSUSED */ 7290 static void 7291 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 7292 { 7293 cnode_t *cp = VTOC(vp); 7294 if (vp->v_type != VREG) 7295 return; 7296 rw_exit(&cp->c_rwlock); 7297 } 7298 7299 /* ARGSUSED */ 7300 static int 7301 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 7302 caller_context_t *ct) 7303 { 7304 return (0); 7305 } 7306 7307 static int cachefs_lostpage = 0; 7308 /* 7309 * Return all the pages from [off..off+len] in file 7310 */ 7311 /*ARGSUSED*/ 7312 static int 7313 cachefs_getpage(struct vnode *vp, offset_t off, size_t len, 7314 uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg, 7315 caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct) 7316 { 7317 cnode_t *cp = VTOC(vp); 7318 int error; 7319 fscache_t *fscp = C_TO_FSCACHE(cp); 7320 cachefscache_t *cachep = fscp->fs_cache; 7321 int held = 0; 7322 int connected = 0; 7323 7324 #ifdef CFSDEBUG 7325 u_offset_t offx = (u_offset_t)off; 7326 7327 CFS_DEBUG(CFSDEBUG_VOPS) 7328 printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n", 7329 (void *)vp, offx, len, rw); 7330 #endif 7331 if (getzoneid() != GLOBAL_ZONEID) { 7332 error = EPERM; 7333 goto out; 7334 } 7335 7336 if (vp->v_flag & VNOMAP) { 7337 error = ENOSYS; 7338 goto out; 7339 } 7340 7341 /* Call backfilesystem if NFSv4 */ 7342 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7343 error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl, 7344 plsz, seg, addr, rw, cr); 7345 goto out; 7346 } 7347 7348 /* XXX sam: make this do an async populate? */ 7349 if (pl == NULL) { 7350 error = 0; 7351 goto out; 7352 } 7353 if (protp != NULL) 7354 *protp = PROT_ALL; 7355 7356 for (;;) { 7357 /* get (or renew) access to the file system */ 7358 if (held) { 7359 cachefs_cd_release(fscp); 7360 held = 0; 7361 } 7362 error = cachefs_cd_access(fscp, connected, 0); 7363 if (error) 7364 break; 7365 held = 1; 7366 7367 /* 7368 * If we are getting called as a side effect of a 7369 * cachefs_write() 7370 * operation the local file size might not be extended yet. 7371 * In this case we want to be able to return pages of zeroes. 7372 */ 7373 if ((u_offset_t)off + len > 7374 ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) { 7375 if (seg != segkmap) { 7376 error = EFAULT; 7377 break; 7378 } 7379 } 7380 if (len <= PAGESIZE) 7381 error = cachefs_getapage(vp, (u_offset_t)off, len, 7382 protp, pl, plsz, seg, addr, rw, cr); 7383 else 7384 error = pvn_getpages(cachefs_getapage, vp, 7385 (u_offset_t)off, len, protp, pl, plsz, seg, addr, 7386 rw, cr); 7387 if (error == 0) 7388 break; 7389 7390 if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) || 7391 error == EAGAIN) { 7392 connected = 0; 7393 continue; 7394 } 7395 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 7396 if (CFS_TIMEOUT(fscp, error)) { 7397 cachefs_cd_release(fscp); 7398 held = 0; 7399 cachefs_cd_timedout(fscp); 7400 connected = 0; 7401 continue; 7402 } 7403 } else { 7404 if (CFS_TIMEOUT(fscp, error)) { 7405 if (cachefs_cd_access_miss(fscp)) { 7406 if (len <= PAGESIZE) 7407 error = cachefs_getapage_back( 7408 vp, (u_offset_t)off, 7409 len, protp, pl, 7410 plsz, seg, addr, rw, cr); 7411 else 7412 error = pvn_getpages( 7413 cachefs_getapage_back, vp, 7414 (u_offset_t)off, len, 7415 protp, pl, 7416 plsz, seg, addr, rw, cr); 7417 if (!CFS_TIMEOUT(fscp, error) && 7418 (error != EAGAIN)) 7419 break; 7420 delay(5*hz); 7421 connected = 0; 7422 continue; 7423 } 7424 connected = 1; 7425 continue; 7426 } 7427 } 7428 break; 7429 } 7430 7431 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE)) 7432 cachefs_log_getpage(cachep, error, vp->v_vfsp, 7433 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 7434 crgetuid(cr), off, len); 7435 7436 if (held) { 7437 cachefs_cd_release(fscp); 7438 } 7439 7440 out: 7441 #ifdef CFS_CD_DEBUG 7442 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7443 #endif 7444 #ifdef CFSDEBUG 7445 CFS_DEBUG(CFSDEBUG_VOPS) 7446 printf("cachefs_getpage: EXIT vp %p error %d\n", 7447 (void *)vp, error); 7448 #endif 7449 return (error); 7450 } 7451 7452 /* 7453 * cachefs_getpage_backfs_nfsv4 7454 * 7455 * Call NFSv4 back filesystem to handle the getpage (cachefs 7456 * pass-through support for NFSv4). 7457 */ 7458 static int 7459 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len, 7460 uint_t *protp, struct page *pl[], size_t plsz, 7461 struct seg *seg, caddr_t addr, enum seg_rw rw, 7462 cred_t *cr) 7463 { 7464 cnode_t *cp = VTOC(vp); 7465 fscache_t *fscp = C_TO_FSCACHE(cp); 7466 vnode_t *backvp; 7467 int error; 7468 7469 /* 7470 * For NFSv4 pass-through to work, only connected operation is 7471 * supported, the cnode backvp must exist, and cachefs optional 7472 * (eg., disconnectable) flags are turned off. Assert these 7473 * conditions for the getpage operation. 7474 */ 7475 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7476 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7477 7478 /* Call backfs vnode op after extracting backvp */ 7479 mutex_enter(&cp->c_statelock); 7480 backvp = cp->c_backvp; 7481 mutex_exit(&cp->c_statelock); 7482 7483 CFS_DPRINT_BACKFS_NFSV4(fscp, 7484 ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n", 7485 cp, backvp)); 7486 error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg, 7487 addr, rw, cr, NULL); 7488 7489 return (error); 7490 } 7491 7492 /* 7493 * Called from pvn_getpages or cachefs_getpage to get a particular page. 7494 */ 7495 /*ARGSUSED*/ 7496 static int 7497 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp, 7498 struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr, 7499 enum seg_rw rw, cred_t *cr) 7500 { 7501 cnode_t *cp = VTOC(vp); 7502 page_t **ppp, *pp = NULL; 7503 fscache_t *fscp = C_TO_FSCACHE(cp); 7504 cachefscache_t *cachep = fscp->fs_cache; 7505 int error = 0; 7506 struct page **ourpl; 7507 struct page *ourstackpl[17]; /* see ASSERT() below for 17 */ 7508 int index = 0; 7509 int downgrade; 7510 int have_statelock = 0; 7511 u_offset_t popoff; 7512 size_t popsize = 0; 7513 7514 ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17); 7515 7516 if (fscp->fs_info.fi_popsize > DEF_POP_SIZE) 7517 ourpl = cachefs_kmem_alloc(sizeof (struct page *) * 7518 ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP); 7519 else 7520 ourpl = ourstackpl; 7521 7522 ourpl[0] = NULL; 7523 off = off & (offset_t)PAGEMASK; 7524 again: 7525 /* 7526 * Look for the page 7527 */ 7528 if (page_exists(vp, off) == 0) { 7529 /* 7530 * Need to do work to get the page. 7531 * Grab our lock because we are going to 7532 * modify the state of the cnode. 7533 */ 7534 if (! have_statelock) { 7535 mutex_enter(&cp->c_statelock); 7536 have_statelock = 1; 7537 } 7538 /* 7539 * If we're in NOCACHE mode, we will need a backvp 7540 */ 7541 if (cp->c_flags & CN_NOCACHE) { 7542 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 7543 error = ETIMEDOUT; 7544 goto out; 7545 } 7546 if (cp->c_backvp == NULL) { 7547 error = cachefs_getbackvp(fscp, cp); 7548 if (error) 7549 goto out; 7550 } 7551 error = VOP_GETPAGE(cp->c_backvp, off, 7552 PAGESIZE, protp, ourpl, PAGESIZE, seg, 7553 addr, S_READ, cr, NULL); 7554 /* 7555 * backfs returns EFAULT when we are trying for a 7556 * page beyond EOF but cachefs has the knowledge that 7557 * it is not beyond EOF be cause cp->c_size is 7558 * greater then the offset requested. 7559 */ 7560 if (error == EFAULT) { 7561 error = 0; 7562 pp = page_create_va(vp, off, PAGESIZE, 7563 PG_EXCL | PG_WAIT, seg, addr); 7564 if (pp == NULL) 7565 goto again; 7566 pagezero(pp, 0, PAGESIZE); 7567 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 7568 goto out; 7569 } 7570 if (error) 7571 goto out; 7572 goto getpages; 7573 } 7574 /* 7575 * We need a front file. If we can't get it, 7576 * put the cnode in NOCACHE mode and try again. 7577 */ 7578 if (cp->c_frontvp == NULL) { 7579 error = cachefs_getfrontfile(cp); 7580 if (error) { 7581 cachefs_nocache(cp); 7582 error = EAGAIN; 7583 goto out; 7584 } 7585 } 7586 /* 7587 * Check if the front file needs population. 7588 * If population is necessary, make sure we have a 7589 * backvp as well. We will get the page from the backvp. 7590 * bug 4152459- 7591 * But if the file system is in disconnected mode 7592 * and the file is a local file then do not check the 7593 * allocmap. 7594 */ 7595 if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) || 7596 ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) && 7597 (cachefs_check_allocmap(cp, off) == 0)) { 7598 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 7599 error = ETIMEDOUT; 7600 goto out; 7601 } 7602 if (cp->c_backvp == NULL) { 7603 error = cachefs_getbackvp(fscp, cp); 7604 if (error) 7605 goto out; 7606 } 7607 if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) { 7608 cachefs_cluster_allocmap(off, &popoff, 7609 &popsize, 7610 fscp->fs_info.fi_popsize, cp); 7611 if (popsize != 0) { 7612 error = cachefs_populate(cp, 7613 popoff, popsize, 7614 cp->c_frontvp, cp->c_backvp, 7615 cp->c_size, cr); 7616 if (error) { 7617 cachefs_nocache(cp); 7618 error = EAGAIN; 7619 goto out; 7620 } else { 7621 cp->c_flags |= 7622 CN_UPDATED | 7623 CN_NEED_FRONT_SYNC | 7624 CN_POPULATION_PENDING; 7625 } 7626 popsize = popsize - (off - popoff); 7627 } else { 7628 popsize = PAGESIZE; 7629 } 7630 } 7631 /* else XXX assert CN_NOCACHE? */ 7632 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off, 7633 PAGESIZE, protp, ourpl, popsize, 7634 seg, addr, S_READ, cr, NULL); 7635 if (error) 7636 goto out; 7637 fscp->fs_stats.st_misses++; 7638 } else { 7639 if (cp->c_flags & CN_POPULATION_PENDING) { 7640 error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr, 7641 NULL); 7642 cp->c_flags &= ~CN_POPULATION_PENDING; 7643 if (error) { 7644 cachefs_nocache(cp); 7645 error = EAGAIN; 7646 goto out; 7647 } 7648 } 7649 /* 7650 * File was populated so we get the page from the 7651 * frontvp 7652 */ 7653 error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off, 7654 PAGESIZE, protp, ourpl, PAGESIZE, seg, addr, 7655 rw, cr, NULL); 7656 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT)) 7657 cachefs_log_gpfront(cachep, error, 7658 fscp->fs_cfsvfsp, 7659 &cp->c_metadata.md_cookie, cp->c_fileno, 7660 crgetuid(cr), off, PAGESIZE); 7661 if (error) { 7662 cachefs_nocache(cp); 7663 error = EAGAIN; 7664 goto out; 7665 } 7666 fscp->fs_stats.st_hits++; 7667 } 7668 getpages: 7669 ASSERT(have_statelock); 7670 if (have_statelock) { 7671 mutex_exit(&cp->c_statelock); 7672 have_statelock = 0; 7673 } 7674 downgrade = 0; 7675 for (ppp = ourpl; *ppp; ppp++) { 7676 if ((*ppp)->p_offset < off) { 7677 index++; 7678 page_unlock(*ppp); 7679 continue; 7680 } 7681 if (PAGE_SHARED(*ppp)) { 7682 if (page_tryupgrade(*ppp) == 0) { 7683 for (ppp = &ourpl[index]; *ppp; ppp++) 7684 page_unlock(*ppp); 7685 error = EAGAIN; 7686 goto out; 7687 } 7688 downgrade = 1; 7689 } 7690 ASSERT(PAGE_EXCL(*ppp)); 7691 (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD); 7692 page_rename(*ppp, vp, (*ppp)->p_offset); 7693 } 7694 pl[0] = ourpl[index]; 7695 pl[1] = NULL; 7696 if (downgrade) { 7697 page_downgrade(ourpl[index]); 7698 } 7699 /* Unlock the rest of the pages from the cluster */ 7700 for (ppp = &ourpl[index+1]; *ppp; ppp++) 7701 page_unlock(*ppp); 7702 } else { 7703 ASSERT(! have_statelock); 7704 if (have_statelock) { 7705 mutex_exit(&cp->c_statelock); 7706 have_statelock = 0; 7707 } 7708 /* XXX SE_SHARED probably isn't what we *always* want */ 7709 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 7710 cachefs_lostpage++; 7711 goto again; 7712 } 7713 pl[0] = pp; 7714 pl[1] = NULL; 7715 /* XXX increment st_hits? i don't think so, but... */ 7716 } 7717 7718 out: 7719 if (have_statelock) { 7720 mutex_exit(&cp->c_statelock); 7721 have_statelock = 0; 7722 } 7723 if (fscp->fs_info.fi_popsize > DEF_POP_SIZE) 7724 cachefs_kmem_free(ourpl, sizeof (struct page *) * 7725 ((fscp->fs_info.fi_popsize / PAGESIZE) + 1)); 7726 return (error); 7727 } 7728 7729 /* gets a page but only from the back fs */ 7730 /*ARGSUSED*/ 7731 static int 7732 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len, 7733 uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg, 7734 caddr_t addr, enum seg_rw rw, cred_t *cr) 7735 { 7736 cnode_t *cp = VTOC(vp); 7737 page_t **ppp, *pp = NULL; 7738 fscache_t *fscp = C_TO_FSCACHE(cp); 7739 int error = 0; 7740 struct page *ourpl[17]; 7741 int index = 0; 7742 int have_statelock = 0; 7743 int downgrade; 7744 7745 /* 7746 * Grab the cnode statelock so the cnode state won't change 7747 * while we're in here. 7748 */ 7749 ourpl[0] = NULL; 7750 off = off & (offset_t)PAGEMASK; 7751 again: 7752 if (page_exists(vp, off) == 0) { 7753 if (! have_statelock) { 7754 mutex_enter(&cp->c_statelock); 7755 have_statelock = 1; 7756 } 7757 7758 if (cp->c_backvp == NULL) { 7759 error = cachefs_getbackvp(fscp, cp); 7760 if (error) 7761 goto out; 7762 } 7763 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off, 7764 PAGESIZE, protp, ourpl, PAGESIZE, seg, 7765 addr, S_READ, cr, NULL); 7766 if (error) 7767 goto out; 7768 7769 if (have_statelock) { 7770 mutex_exit(&cp->c_statelock); 7771 have_statelock = 0; 7772 } 7773 downgrade = 0; 7774 for (ppp = ourpl; *ppp; ppp++) { 7775 if ((*ppp)->p_offset < off) { 7776 index++; 7777 page_unlock(*ppp); 7778 continue; 7779 } 7780 if (PAGE_SHARED(*ppp)) { 7781 if (page_tryupgrade(*ppp) == 0) { 7782 for (ppp = &ourpl[index]; *ppp; ppp++) 7783 page_unlock(*ppp); 7784 error = EAGAIN; 7785 goto out; 7786 } 7787 downgrade = 1; 7788 } 7789 ASSERT(PAGE_EXCL(*ppp)); 7790 (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD); 7791 page_rename(*ppp, vp, (*ppp)->p_offset); 7792 } 7793 pl[0] = ourpl[index]; 7794 pl[1] = NULL; 7795 if (downgrade) { 7796 page_downgrade(ourpl[index]); 7797 } 7798 /* Unlock the rest of the pages from the cluster */ 7799 for (ppp = &ourpl[index+1]; *ppp; ppp++) 7800 page_unlock(*ppp); 7801 } else { 7802 ASSERT(! have_statelock); 7803 if (have_statelock) { 7804 mutex_exit(&cp->c_statelock); 7805 have_statelock = 0; 7806 } 7807 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 7808 cachefs_lostpage++; 7809 goto again; 7810 } 7811 pl[0] = pp; 7812 pl[1] = NULL; 7813 } 7814 7815 out: 7816 if (have_statelock) { 7817 mutex_exit(&cp->c_statelock); 7818 have_statelock = 0; 7819 } 7820 return (error); 7821 } 7822 7823 /*ARGSUSED*/ 7824 static int 7825 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 7826 caller_context_t *ct) 7827 { 7828 cnode_t *cp = VTOC(vp); 7829 int error = 0; 7830 fscache_t *fscp = C_TO_FSCACHE(cp); 7831 int held = 0; 7832 int connected = 0; 7833 7834 if (getzoneid() != GLOBAL_ZONEID) 7835 return (EPERM); 7836 7837 /* Call backfilesytem if NFSv4 */ 7838 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7839 error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr); 7840 goto out; 7841 } 7842 7843 for (;;) { 7844 /* get (or renew) access to the file system */ 7845 if (held) { 7846 cachefs_cd_release(fscp); 7847 held = 0; 7848 } 7849 error = cachefs_cd_access(fscp, connected, 1); 7850 if (error) 7851 break; 7852 held = 1; 7853 7854 error = cachefs_putpage_common(vp, off, len, flags, cr); 7855 if (error == 0) 7856 break; 7857 7858 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 7859 if (CFS_TIMEOUT(fscp, error)) { 7860 cachefs_cd_release(fscp); 7861 held = 0; 7862 cachefs_cd_timedout(fscp); 7863 connected = 0; 7864 continue; 7865 } 7866 } else { 7867 if (NOMEMWAIT()) { 7868 error = 0; 7869 goto out; 7870 } 7871 if (CFS_TIMEOUT(fscp, error)) { 7872 connected = 1; 7873 continue; 7874 } 7875 } 7876 break; 7877 } 7878 7879 out: 7880 7881 if (held) { 7882 cachefs_cd_release(fscp); 7883 } 7884 7885 #ifdef CFS_CD_DEBUG 7886 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7887 #endif 7888 return (error); 7889 } 7890 7891 /* 7892 * cachefs_putpage_backfs_nfsv4 7893 * 7894 * Call NFSv4 back filesystem to handle the putpage (cachefs 7895 * pass-through support for NFSv4). 7896 */ 7897 static int 7898 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags, 7899 cred_t *cr) 7900 { 7901 cnode_t *cp = VTOC(vp); 7902 fscache_t *fscp = C_TO_FSCACHE(cp); 7903 vnode_t *backvp; 7904 int error; 7905 7906 /* 7907 * For NFSv4 pass-through to work, only connected operation is 7908 * supported, the cnode backvp must exist, and cachefs optional 7909 * (eg., disconnectable) flags are turned off. Assert these 7910 * conditions for the putpage operation. 7911 */ 7912 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7913 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7914 7915 /* Call backfs vnode op after extracting backvp */ 7916 mutex_enter(&cp->c_statelock); 7917 backvp = cp->c_backvp; 7918 mutex_exit(&cp->c_statelock); 7919 7920 CFS_DPRINT_BACKFS_NFSV4(fscp, 7921 ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n", 7922 cp, backvp)); 7923 error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL); 7924 7925 return (error); 7926 } 7927 7928 /* 7929 * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE} 7930 * If len == 0, do from off to EOF. 7931 * 7932 * The normal cases should be len == 0 & off == 0 (entire vp list), 7933 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE 7934 * (from pageout). 7935 */ 7936 7937 /*ARGSUSED*/ 7938 int 7939 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len, 7940 int flags, cred_t *cr) 7941 { 7942 struct cnode *cp = VTOC(vp); 7943 struct page *pp; 7944 size_t io_len; 7945 u_offset_t eoff, io_off; 7946 int error = 0; 7947 fscache_t *fscp = C_TO_FSCACHE(cp); 7948 cachefscache_t *cachep = fscp->fs_cache; 7949 7950 if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) { 7951 return (0); 7952 } 7953 if (!vn_has_cached_data(vp) || (off >= cp->c_size && 7954 (flags & B_INVAL) == 0)) 7955 return (0); 7956 7957 /* 7958 * Should never have cached data for the cachefs vnode 7959 * if NFSv4 is in use. 7960 */ 7961 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7962 7963 /* 7964 * If this is an async putpage let a thread handle it. 7965 */ 7966 if (flags & B_ASYNC) { 7967 struct cachefs_req *rp; 7968 int tflags = (flags & ~(B_ASYNC|B_DONTNEED)); 7969 7970 if (ttoproc(curthread) == proc_pageout) { 7971 /* 7972 * If this is the page daemon we 7973 * do the push synchronously (Dangerous!) and hope 7974 * we can free enough to keep running... 7975 */ 7976 flags &= ~B_ASYNC; 7977 goto again; 7978 } 7979 7980 if (! cachefs_async_okay()) { 7981 7982 /* 7983 * this is somewhat like NFS's behavior. keep 7984 * the system from thrashing. we've seen 7985 * cases where async queues get out of 7986 * control, especially if 7987 * madvise(MADV_SEQUENTIAL) is done on a large 7988 * mmap()ed file that is read sequentially. 7989 */ 7990 7991 flags &= ~B_ASYNC; 7992 goto again; 7993 } 7994 7995 /* 7996 * if no flags other than B_ASYNC were set, 7997 * we coalesce putpage requests into a single one for the 7998 * whole file (len = off = 0). If such a request is 7999 * already queued, we're done. 8000 * 8001 * If there are other flags set (e.g., B_INVAL), we don't 8002 * attempt to coalesce and we use the specified length and 8003 * offset. 8004 */ 8005 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 8006 mutex_enter(&cp->c_iomutex); 8007 if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) { 8008 rp->cfs_cmd = CFS_PUTPAGE; 8009 rp->cfs_req_u.cu_putpage.cp_vp = vp; 8010 if (tflags == 0) { 8011 off = len = 0; 8012 cp->c_ioflags |= CIO_PUTPAGES; 8013 } 8014 rp->cfs_req_u.cu_putpage.cp_off = off; 8015 rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len; 8016 rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC; 8017 rp->cfs_cr = cr; 8018 crhold(rp->cfs_cr); 8019 VN_HOLD(vp); 8020 cp->c_nio++; 8021 cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq)); 8022 } else { 8023 kmem_cache_free(cachefs_req_cache, rp); 8024 } 8025 8026 mutex_exit(&cp->c_iomutex); 8027 return (0); 8028 } 8029 8030 8031 again: 8032 if (len == 0) { 8033 /* 8034 * Search the entire vp list for pages >= off 8035 */ 8036 error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr); 8037 } else { 8038 /* 8039 * Do a range from [off...off + len] looking for pages 8040 * to deal with. 8041 */ 8042 eoff = (u_offset_t)off + len; 8043 for (io_off = off; io_off < eoff && io_off < cp->c_size; 8044 io_off += io_len) { 8045 /* 8046 * If we are not invalidating, synchronously 8047 * freeing or writing pages use the routine 8048 * page_lookup_nowait() to prevent reclaiming 8049 * them from the free list. 8050 */ 8051 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 8052 pp = page_lookup(vp, io_off, 8053 (flags & (B_INVAL | B_FREE)) ? 8054 SE_EXCL : SE_SHARED); 8055 } else { 8056 /* XXX this looks like dead code */ 8057 pp = page_lookup_nowait(vp, io_off, 8058 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 8059 } 8060 8061 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 8062 io_len = PAGESIZE; 8063 else { 8064 error = cachefs_push(vp, pp, &io_off, 8065 &io_len, flags, cr); 8066 if (error != 0) 8067 break; 8068 /* 8069 * "io_off" and "io_len" are returned as 8070 * the range of pages we actually wrote. 8071 * This allows us to skip ahead more quickly 8072 * since several pages may've been dealt 8073 * with by this iteration of the loop. 8074 */ 8075 } 8076 } 8077 } 8078 8079 if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) { 8080 cp->c_flags &= ~CDIRTY; 8081 } 8082 8083 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE)) 8084 cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp, 8085 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 8086 crgetuid(cr), off, len); 8087 8088 return (error); 8089 8090 } 8091 8092 /*ARGSUSED*/ 8093 static int 8094 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, 8095 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 8096 caller_context_t *ct) 8097 { 8098 cnode_t *cp = VTOC(vp); 8099 fscache_t *fscp = C_TO_FSCACHE(cp); 8100 struct segvn_crargs vn_a; 8101 int error; 8102 int held = 0; 8103 int writing; 8104 int connected = 0; 8105 8106 #ifdef CFSDEBUG 8107 u_offset_t offx = (u_offset_t)off; 8108 8109 CFS_DEBUG(CFSDEBUG_VOPS) 8110 printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n", 8111 (void *)vp, offx, len, flags); 8112 #endif 8113 if (getzoneid() != GLOBAL_ZONEID) { 8114 error = EPERM; 8115 goto out; 8116 } 8117 8118 if (vp->v_flag & VNOMAP) { 8119 error = ENOSYS; 8120 goto out; 8121 } 8122 if (off < 0 || (offset_t)(off + len) < 0) { 8123 error = ENXIO; 8124 goto out; 8125 } 8126 if (vp->v_type != VREG) { 8127 error = ENODEV; 8128 goto out; 8129 } 8130 8131 /* 8132 * Check to see if the vnode is currently marked as not cachable. 8133 * If so, we have to refuse the map request as this violates the 8134 * don't cache attribute. 8135 */ 8136 if (vp->v_flag & VNOCACHE) 8137 return (EAGAIN); 8138 8139 #ifdef OBSOLETE 8140 /* 8141 * If file is being locked, disallow mapping. 8142 */ 8143 if (vn_has_flocks(vp)) { 8144 error = EAGAIN; 8145 goto out; 8146 } 8147 #endif 8148 8149 /* call backfilesystem if NFSv4 */ 8150 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 8151 error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot, 8152 maxprot, flags, cr); 8153 goto out; 8154 } 8155 8156 writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0)); 8157 8158 for (;;) { 8159 /* get (or renew) access to the file system */ 8160 if (held) { 8161 cachefs_cd_release(fscp); 8162 held = 0; 8163 } 8164 error = cachefs_cd_access(fscp, connected, writing); 8165 if (error) 8166 break; 8167 held = 1; 8168 8169 if (writing) { 8170 mutex_enter(&cp->c_statelock); 8171 if (CFS_ISFS_WRITE_AROUND(fscp)) { 8172 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 8173 connected = 1; 8174 continue; 8175 } else { 8176 cachefs_nocache(cp); 8177 } 8178 } 8179 8180 /* 8181 * CN_MAPWRITE is for an optimization in cachefs_delmap. 8182 * If CN_MAPWRITE is not set then cachefs_delmap does 8183 * not need to try to push out any pages. 8184 * This bit gets cleared when the cnode goes inactive. 8185 */ 8186 cp->c_flags |= CN_MAPWRITE; 8187 8188 mutex_exit(&cp->c_statelock); 8189 } 8190 break; 8191 } 8192 8193 if (held) { 8194 cachefs_cd_release(fscp); 8195 } 8196 8197 as_rangelock(as); 8198 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 8199 if (error != 0) { 8200 as_rangeunlock(as); 8201 goto out; 8202 } 8203 8204 /* 8205 * package up all the data passed in into a segvn_args struct and 8206 * call as_map with segvn_create function to create a new segment 8207 * in the address space. 8208 */ 8209 vn_a.vp = vp; 8210 vn_a.offset = off; 8211 vn_a.type = flags & MAP_TYPE; 8212 vn_a.prot = (uchar_t)prot; 8213 vn_a.maxprot = (uchar_t)maxprot; 8214 vn_a.cred = cr; 8215 vn_a.amp = NULL; 8216 vn_a.flags = flags & ~MAP_TYPE; 8217 vn_a.szc = 0; 8218 vn_a.lgrp_mem_policy_flags = 0; 8219 error = as_map(as, *addrp, len, segvn_create, &vn_a); 8220 as_rangeunlock(as); 8221 out: 8222 8223 #ifdef CFS_CD_DEBUG 8224 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8225 #endif 8226 #ifdef CFSDEBUG 8227 CFS_DEBUG(CFSDEBUG_VOPS) 8228 printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error); 8229 #endif 8230 return (error); 8231 } 8232 8233 /* 8234 * cachefs_map_backfs_nfsv4 8235 * 8236 * Call NFSv4 back filesystem to handle the map (cachefs 8237 * pass-through support for NFSv4). 8238 */ 8239 static int 8240 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as, 8241 caddr_t *addrp, size_t len, uchar_t prot, 8242 uchar_t maxprot, uint_t flags, cred_t *cr) 8243 { 8244 cnode_t *cp = VTOC(vp); 8245 fscache_t *fscp = C_TO_FSCACHE(cp); 8246 vnode_t *backvp; 8247 int error; 8248 8249 /* 8250 * For NFSv4 pass-through to work, only connected operation is 8251 * supported, the cnode backvp must exist, and cachefs optional 8252 * (eg., disconnectable) flags are turned off. Assert these 8253 * conditions for the map operation. 8254 */ 8255 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8256 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8257 8258 /* Call backfs vnode op after extracting backvp */ 8259 mutex_enter(&cp->c_statelock); 8260 backvp = cp->c_backvp; 8261 mutex_exit(&cp->c_statelock); 8262 8263 CFS_DPRINT_BACKFS_NFSV4(fscp, 8264 ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n", 8265 cp, backvp)); 8266 error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr, 8267 NULL); 8268 8269 return (error); 8270 } 8271 8272 /*ARGSUSED*/ 8273 static int 8274 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as, 8275 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 8276 cred_t *cr, caller_context_t *ct) 8277 { 8278 cnode_t *cp = VTOC(vp); 8279 fscache_t *fscp = C_TO_FSCACHE(cp); 8280 8281 if (getzoneid() != GLOBAL_ZONEID) 8282 return (EPERM); 8283 8284 if (vp->v_flag & VNOMAP) 8285 return (ENOSYS); 8286 8287 /* 8288 * Check this is not an NFSv4 filesystem, as the mapping 8289 * is not done on the cachefs filesystem if NFSv4 is in 8290 * use. 8291 */ 8292 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8293 8294 mutex_enter(&cp->c_statelock); 8295 cp->c_mapcnt += btopr(len); 8296 mutex_exit(&cp->c_statelock); 8297 return (0); 8298 } 8299 8300 /*ARGSUSED*/ 8301 static int 8302 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as, 8303 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 8304 cred_t *cr, caller_context_t *ct) 8305 { 8306 cnode_t *cp = VTOC(vp); 8307 fscache_t *fscp = C_TO_FSCACHE(cp); 8308 int error; 8309 int connected = 0; 8310 int held = 0; 8311 8312 /* 8313 * The file may be passed in to (or inherited into) the zone, so we 8314 * need to let this operation go through since it happens as part of 8315 * exiting. 8316 */ 8317 if (vp->v_flag & VNOMAP) 8318 return (ENOSYS); 8319 8320 /* 8321 * Check this is not an NFSv4 filesystem, as the mapping 8322 * is not done on the cachefs filesystem if NFSv4 is in 8323 * use. 8324 */ 8325 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8326 8327 mutex_enter(&cp->c_statelock); 8328 cp->c_mapcnt -= btopr(len); 8329 ASSERT(cp->c_mapcnt >= 0); 8330 mutex_exit(&cp->c_statelock); 8331 8332 if (cp->c_mapcnt || !vn_has_cached_data(vp) || 8333 ((cp->c_flags & CN_MAPWRITE) == 0)) 8334 return (0); 8335 8336 for (;;) { 8337 /* get (or renew) access to the file system */ 8338 if (held) { 8339 cachefs_cd_release(fscp); 8340 held = 0; 8341 } 8342 error = cachefs_cd_access(fscp, connected, 1); 8343 if (error) 8344 break; 8345 held = 1; 8346 connected = 0; 8347 8348 error = cachefs_putpage_common(vp, (offset_t)0, 8349 (uint_t)0, 0, cr); 8350 if (CFS_TIMEOUT(fscp, error)) { 8351 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8352 cachefs_cd_release(fscp); 8353 held = 0; 8354 cachefs_cd_timedout(fscp); 8355 continue; 8356 } else { 8357 connected = 1; 8358 continue; 8359 } 8360 } 8361 8362 /* if no space left in cache, wait until connected */ 8363 if ((error == ENOSPC) && 8364 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 8365 connected = 1; 8366 continue; 8367 } 8368 8369 mutex_enter(&cp->c_statelock); 8370 if (!error) 8371 error = cp->c_error; 8372 cp->c_error = 0; 8373 mutex_exit(&cp->c_statelock); 8374 break; 8375 } 8376 8377 if (held) 8378 cachefs_cd_release(fscp); 8379 8380 #ifdef CFS_CD_DEBUG 8381 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8382 #endif 8383 return (error); 8384 } 8385 8386 /* ARGSUSED */ 8387 static int 8388 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 8389 offset_t offset, struct flk_callback *flk_cbp, cred_t *cr, 8390 caller_context_t *ct) 8391 { 8392 struct cnode *cp = VTOC(vp); 8393 int error; 8394 struct fscache *fscp = C_TO_FSCACHE(cp); 8395 vnode_t *backvp; 8396 int held = 0; 8397 int connected = 0; 8398 8399 if (getzoneid() != GLOBAL_ZONEID) 8400 return (EPERM); 8401 8402 if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW)) 8403 return (EINVAL); 8404 8405 /* Disallow locking of files that are currently mapped */ 8406 if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) { 8407 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8408 return (EAGAIN); 8409 } 8410 8411 /* 8412 * Cachefs only provides pass-through support for NFSv4, 8413 * and all vnode operations are passed through to the 8414 * back file system. For NFSv4 pass-through to work, only 8415 * connected operation is supported, the cnode backvp must 8416 * exist, and cachefs optional (eg., disconnectable) flags 8417 * are turned off. Assert these conditions to ensure that 8418 * the backfilesystem is called for the frlock operation. 8419 */ 8420 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8421 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8422 8423 /* XXX bob: nfs does a bunch more checks than we do */ 8424 if (CFS_ISFS_LLOCK(fscp)) { 8425 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8426 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 8427 } 8428 8429 for (;;) { 8430 /* get (or renew) access to the file system */ 8431 if (held) { 8432 /* Won't loop with NFSv4 connected behavior */ 8433 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8434 cachefs_cd_release(fscp); 8435 held = 0; 8436 } 8437 error = cachefs_cd_access(fscp, connected, 0); 8438 if (error) 8439 break; 8440 held = 1; 8441 8442 /* if not connected, quit or wait */ 8443 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 8444 connected = 1; 8445 continue; 8446 } 8447 8448 /* nocache the file */ 8449 if ((cp->c_flags & CN_NOCACHE) == 0 && 8450 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8451 mutex_enter(&cp->c_statelock); 8452 cachefs_nocache(cp); 8453 mutex_exit(&cp->c_statelock); 8454 } 8455 8456 /* 8457 * XXX bob: probably should do a consistency check 8458 * Pass arguments unchanged if NFSv4 is the backfs. 8459 */ 8460 if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) { 8461 bfp->l_start += cp->c_size; 8462 bfp->l_whence = 0; 8463 } 8464 8465 /* get the back vp */ 8466 mutex_enter(&cp->c_statelock); 8467 if (cp->c_backvp == NULL) { 8468 error = cachefs_getbackvp(fscp, cp); 8469 if (error) { 8470 mutex_exit(&cp->c_statelock); 8471 break; 8472 } 8473 } 8474 backvp = cp->c_backvp; 8475 VN_HOLD(backvp); 8476 mutex_exit(&cp->c_statelock); 8477 8478 /* 8479 * make sure we can flush currently dirty pages before 8480 * allowing the lock 8481 */ 8482 if (bfp->l_type != F_UNLCK && cmd != F_GETLK && 8483 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8484 error = cachefs_putpage( 8485 vp, (offset_t)0, 0, B_INVAL, cr, ct); 8486 if (error) { 8487 error = ENOLCK; 8488 VN_RELE(backvp); 8489 break; 8490 } 8491 } 8492 8493 /* do lock on the back file */ 8494 CFS_DPRINT_BACKFS_NFSV4(fscp, 8495 ("cachefs_frlock (nfsv4): cp %p, backvp %p\n", 8496 cp, backvp)); 8497 error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr, 8498 ct); 8499 VN_RELE(backvp); 8500 if (CFS_TIMEOUT(fscp, error)) { 8501 connected = 1; 8502 continue; 8503 } 8504 break; 8505 } 8506 8507 if (held) { 8508 cachefs_cd_release(fscp); 8509 } 8510 8511 /* 8512 * If we are setting a lock mark the vnode VNOCACHE so the page 8513 * cache does not give inconsistent results on locked files shared 8514 * between clients. The VNOCACHE flag is never turned off as long 8515 * as the vnode is active because it is hard to figure out when the 8516 * last lock is gone. 8517 * XXX - what if some already has the vnode mapped in? 8518 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in. 8519 */ 8520 if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) && 8521 !CFS_ISFS_BACKFS_NFSV4(fscp)) 8522 vp->v_flag |= VNOCACHE; 8523 8524 #ifdef CFS_CD_DEBUG 8525 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8526 #endif 8527 return (error); 8528 } 8529 8530 /* 8531 * Free storage space associated with the specified vnode. The portion 8532 * to be freed is specified by bfp->l_start and bfp->l_len (already 8533 * normalized to a "whence" of 0). 8534 * 8535 * This is an experimental facility whose continued existence is not 8536 * guaranteed. Currently, we only support the special case 8537 * of l_len == 0, meaning free to end of file. 8538 */ 8539 /* ARGSUSED */ 8540 static int 8541 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 8542 offset_t offset, cred_t *cr, caller_context_t *ct) 8543 { 8544 cnode_t *cp = VTOC(vp); 8545 fscache_t *fscp = C_TO_FSCACHE(cp); 8546 int error; 8547 8548 ASSERT(vp->v_type == VREG); 8549 if (getzoneid() != GLOBAL_ZONEID) 8550 return (EPERM); 8551 if (cmd != F_FREESP) 8552 return (EINVAL); 8553 8554 /* call backfilesystem if NFSv4 */ 8555 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 8556 error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag, 8557 offset, cr, ct); 8558 goto out; 8559 } 8560 8561 if ((error = convoff(vp, bfp, 0, offset)) == 0) { 8562 ASSERT(bfp->l_start >= 0); 8563 if (bfp->l_len == 0) { 8564 struct vattr va; 8565 8566 va.va_size = bfp->l_start; 8567 va.va_mask = AT_SIZE; 8568 error = cachefs_setattr(vp, &va, 0, cr, ct); 8569 } else 8570 error = EINVAL; 8571 } 8572 8573 out: 8574 return (error); 8575 } 8576 8577 /* 8578 * cachefs_space_backfs_nfsv4 8579 * 8580 * Call NFSv4 back filesystem to handle the space (cachefs 8581 * pass-through support for NFSv4). 8582 */ 8583 static int 8584 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp, 8585 int flag, offset_t offset, cred_t *cr, caller_context_t *ct) 8586 { 8587 cnode_t *cp = VTOC(vp); 8588 fscache_t *fscp = C_TO_FSCACHE(cp); 8589 vnode_t *backvp; 8590 int error; 8591 8592 /* 8593 * For NFSv4 pass-through to work, only connected operation is 8594 * supported, the cnode backvp must exist, and cachefs optional 8595 * (eg., disconnectable) flags are turned off. Assert these 8596 * conditions for the space operation. 8597 */ 8598 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8599 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8600 8601 /* Call backfs vnode op after extracting backvp */ 8602 mutex_enter(&cp->c_statelock); 8603 backvp = cp->c_backvp; 8604 mutex_exit(&cp->c_statelock); 8605 8606 CFS_DPRINT_BACKFS_NFSV4(fscp, 8607 ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n", 8608 cp, backvp)); 8609 error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct); 8610 8611 return (error); 8612 } 8613 8614 /*ARGSUSED*/ 8615 static int 8616 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct) 8617 { 8618 return (EINVAL); 8619 } 8620 8621 /*ARGSUSED*/ 8622 static int 8623 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len, 8624 int flags, cred_t *cr, caller_context_t *ct) 8625 { 8626 return (ENOSYS); 8627 } 8628 8629 static int 8630 cachefs_setsecattr_connected(cnode_t *cp, 8631 vsecattr_t *vsec, int flag, cred_t *cr) 8632 { 8633 fscache_t *fscp = C_TO_FSCACHE(cp); 8634 int error = 0; 8635 8636 ASSERT(RW_WRITE_HELD(&cp->c_rwlock)); 8637 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 8638 8639 mutex_enter(&cp->c_statelock); 8640 8641 if (cp->c_backvp == NULL) { 8642 error = cachefs_getbackvp(fscp, cp); 8643 if (error) { 8644 cachefs_nocache(cp); 8645 goto out; 8646 } 8647 } 8648 8649 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 8650 if (error) 8651 goto out; 8652 8653 /* only owner can set acl */ 8654 if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) { 8655 error = EINVAL; 8656 goto out; 8657 } 8658 8659 8660 CFS_DPRINT_BACKFS_NFSV4(fscp, 8661 ("cachefs_setsecattr (nfsv4): cp %p, backvp %p", 8662 cp, cp->c_backvp)); 8663 error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL); 8664 if (error) { 8665 goto out; 8666 } 8667 8668 if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 && 8669 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8670 cachefs_nocache(cp); 8671 goto out; 8672 } 8673 8674 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 8675 8676 /* acl may have changed permissions -- handle this. */ 8677 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) 8678 cachefs_acl2perm(cp, vsec); 8679 8680 if ((cp->c_flags & CN_NOCACHE) == 0 && 8681 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8682 error = cachefs_cacheacl(cp, vsec); 8683 if (error != 0) { 8684 #ifdef CFSDEBUG 8685 CFS_DEBUG(CFSDEBUG_VOPS) 8686 printf("cachefs_setacl: cacheacl: error %d\n", 8687 error); 8688 #endif /* CFSDEBUG */ 8689 error = 0; 8690 cachefs_nocache(cp); 8691 } 8692 } 8693 8694 out: 8695 mutex_exit(&cp->c_statelock); 8696 8697 return (error); 8698 } 8699 8700 static int 8701 cachefs_setsecattr_disconnected(cnode_t *cp, 8702 vsecattr_t *vsec, int flag, cred_t *cr) 8703 { 8704 fscache_t *fscp = C_TO_FSCACHE(cp); 8705 mode_t failmode = cp->c_metadata.md_vattr.va_mode; 8706 off_t commit = 0; 8707 int error = 0; 8708 8709 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 8710 8711 if (CFS_ISFS_WRITE_AROUND(fscp)) 8712 return (ETIMEDOUT); 8713 8714 mutex_enter(&cp->c_statelock); 8715 8716 /* only owner can set acl */ 8717 if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) { 8718 error = EINVAL; 8719 goto out; 8720 } 8721 8722 if (cp->c_metadata.md_flags & MD_NEEDATTRS) { 8723 error = ETIMEDOUT; 8724 goto out; 8725 } 8726 8727 /* XXX do i need this? is this right? */ 8728 if (cp->c_flags & CN_ALLOC_PENDING) { 8729 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 8730 (void) filegrp_allocattr(cp->c_filegrp); 8731 } 8732 error = filegrp_create_metadata(cp->c_filegrp, 8733 &cp->c_metadata, &cp->c_id); 8734 if (error) { 8735 goto out; 8736 } 8737 cp->c_flags &= ~CN_ALLOC_PENDING; 8738 } 8739 8740 /* XXX is this right? */ 8741 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 8742 error = cachefs_dlog_cidmap(fscp); 8743 if (error) { 8744 error = ENOSPC; 8745 goto out; 8746 } 8747 cp->c_metadata.md_flags |= MD_MAPPING; 8748 cp->c_flags |= CN_UPDATED; 8749 } 8750 8751 commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr); 8752 if (commit == 0) 8753 goto out; 8754 8755 /* fix modes in metadata */ 8756 cachefs_acl2perm(cp, vsec); 8757 8758 if ((cp->c_flags & CN_NOCACHE) == 0) { 8759 error = cachefs_cacheacl(cp, vsec); 8760 if (error != 0) { 8761 goto out; 8762 } 8763 } 8764 8765 /* XXX is this right? */ 8766 if (cachefs_modified_alloc(cp)) { 8767 error = ENOSPC; 8768 goto out; 8769 } 8770 8771 out: 8772 if (error != 0) 8773 cp->c_metadata.md_vattr.va_mode = failmode; 8774 8775 mutex_exit(&cp->c_statelock); 8776 8777 if (commit) { 8778 if (cachefs_dlog_commit(fscp, commit, error)) { 8779 /*EMPTY*/ 8780 /* XXX fix on panic? */ 8781 } 8782 } 8783 8784 return (error); 8785 } 8786 8787 /*ARGSUSED*/ 8788 static int 8789 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr, 8790 caller_context_t *ct) 8791 { 8792 cnode_t *cp = VTOC(vp); 8793 fscache_t *fscp = C_TO_FSCACHE(cp); 8794 int connected = 0; 8795 int held = 0; 8796 int error = 0; 8797 8798 #ifdef CFSDEBUG 8799 CFS_DEBUG(CFSDEBUG_VOPS) 8800 printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp); 8801 #endif 8802 if (getzoneid() != GLOBAL_ZONEID) { 8803 error = EPERM; 8804 goto out; 8805 } 8806 8807 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 8808 error = ENOSYS; 8809 goto out; 8810 } 8811 8812 if (! cachefs_vtype_aclok(vp)) { 8813 error = EINVAL; 8814 goto out; 8815 } 8816 8817 /* 8818 * Cachefs only provides pass-through support for NFSv4, 8819 * and all vnode operations are passed through to the 8820 * back file system. For NFSv4 pass-through to work, only 8821 * connected operation is supported, the cnode backvp must 8822 * exist, and cachefs optional (eg., disconnectable) flags 8823 * are turned off. Assert these conditions to ensure that 8824 * the backfilesystem is called for the setsecattr operation. 8825 */ 8826 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8827 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8828 8829 for (;;) { 8830 /* drop hold on file system */ 8831 if (held) { 8832 /* Won't loop with NFSv4 connected operation */ 8833 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8834 cachefs_cd_release(fscp); 8835 held = 0; 8836 } 8837 8838 /* acquire access to the file system */ 8839 error = cachefs_cd_access(fscp, connected, 1); 8840 if (error) 8841 break; 8842 held = 1; 8843 8844 /* perform the setattr */ 8845 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 8846 error = cachefs_setsecattr_connected(cp, 8847 vsec, flag, cr); 8848 else 8849 error = cachefs_setsecattr_disconnected(cp, 8850 vsec, flag, cr); 8851 if (error) { 8852 /* if connected */ 8853 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8854 if (CFS_TIMEOUT(fscp, error)) { 8855 cachefs_cd_release(fscp); 8856 held = 0; 8857 cachefs_cd_timedout(fscp); 8858 connected = 0; 8859 continue; 8860 } 8861 } 8862 8863 /* else must be disconnected */ 8864 else { 8865 if (CFS_TIMEOUT(fscp, error)) { 8866 connected = 1; 8867 continue; 8868 } 8869 } 8870 } 8871 break; 8872 } 8873 8874 if (held) { 8875 cachefs_cd_release(fscp); 8876 } 8877 return (error); 8878 8879 out: 8880 #ifdef CFS_CD_DEBUG 8881 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8882 #endif 8883 8884 #ifdef CFSDEBUG 8885 CFS_DEBUG(CFSDEBUG_VOPS) 8886 printf("cachefs_setsecattr: EXIT error = %d\n", error); 8887 #endif 8888 return (error); 8889 } 8890 8891 /* 8892 * call this BEFORE calling cachefs_cacheacl(), as the latter will 8893 * sanitize the acl. 8894 */ 8895 8896 static void 8897 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec) 8898 { 8899 aclent_t *aclp; 8900 int i; 8901 8902 for (i = 0; i < vsec->vsa_aclcnt; i++) { 8903 aclp = ((aclent_t *)vsec->vsa_aclentp) + i; 8904 switch (aclp->a_type) { 8905 case USER_OBJ: 8906 cp->c_metadata.md_vattr.va_mode &= (~0700); 8907 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6); 8908 break; 8909 8910 case GROUP_OBJ: 8911 cp->c_metadata.md_vattr.va_mode &= (~070); 8912 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3); 8913 break; 8914 8915 case OTHER_OBJ: 8916 cp->c_metadata.md_vattr.va_mode &= (~07); 8917 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm); 8918 break; 8919 8920 case CLASS_OBJ: 8921 cp->c_metadata.md_aclclass = aclp->a_perm; 8922 break; 8923 } 8924 } 8925 8926 cp->c_flags |= CN_UPDATED; 8927 } 8928 8929 static int 8930 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr, 8931 caller_context_t *ct) 8932 { 8933 cnode_t *cp = VTOC(vp); 8934 fscache_t *fscp = C_TO_FSCACHE(cp); 8935 int held = 0, connected = 0; 8936 int error = 0; 8937 8938 #ifdef CFSDEBUG 8939 CFS_DEBUG(CFSDEBUG_VOPS) 8940 printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp); 8941 #endif 8942 8943 if (getzoneid() != GLOBAL_ZONEID) { 8944 error = EPERM; 8945 goto out; 8946 } 8947 8948 /* 8949 * Cachefs only provides pass-through support for NFSv4, 8950 * and all vnode operations are passed through to the 8951 * back file system. For NFSv4 pass-through to work, only 8952 * connected operation is supported, the cnode backvp must 8953 * exist, and cachefs optional (eg., disconnectable) flags 8954 * are turned off. Assert these conditions to ensure that 8955 * the backfilesystem is called for the getsecattr operation. 8956 */ 8957 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8958 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8959 8960 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 8961 error = fs_fab_acl(vp, vsec, flag, cr, ct); 8962 goto out; 8963 } 8964 8965 for (;;) { 8966 if (held) { 8967 /* Won't loop with NFSv4 connected behavior */ 8968 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8969 cachefs_cd_release(fscp); 8970 held = 0; 8971 } 8972 error = cachefs_cd_access(fscp, connected, 0); 8973 if (error) 8974 break; 8975 held = 1; 8976 8977 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8978 error = cachefs_getsecattr_connected(vp, vsec, flag, 8979 cr); 8980 if (CFS_TIMEOUT(fscp, error)) { 8981 cachefs_cd_release(fscp); 8982 held = 0; 8983 cachefs_cd_timedout(fscp); 8984 connected = 0; 8985 continue; 8986 } 8987 } else { 8988 error = cachefs_getsecattr_disconnected(vp, vsec, flag, 8989 cr); 8990 if (CFS_TIMEOUT(fscp, error)) { 8991 if (cachefs_cd_access_miss(fscp)) { 8992 error = cachefs_getsecattr_connected(vp, 8993 vsec, flag, cr); 8994 if (!CFS_TIMEOUT(fscp, error)) 8995 break; 8996 delay(5*hz); 8997 connected = 0; 8998 continue; 8999 } 9000 connected = 1; 9001 continue; 9002 } 9003 } 9004 break; 9005 } 9006 9007 out: 9008 if (held) 9009 cachefs_cd_release(fscp); 9010 9011 #ifdef CFS_CD_DEBUG 9012 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 9013 #endif 9014 #ifdef CFSDEBUG 9015 CFS_DEBUG(CFSDEBUG_VOPS) 9016 printf("cachefs_getsecattr: EXIT error = %d\n", error); 9017 #endif 9018 return (error); 9019 } 9020 9021 static int 9022 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, 9023 caller_context_t *ct) 9024 { 9025 cnode_t *cp = VTOC(vp); 9026 fscache_t *fscp = C_TO_FSCACHE(cp); 9027 int error = 0; 9028 vnode_t *backvp; 9029 9030 #ifdef CFSDEBUG 9031 CFS_DEBUG(CFSDEBUG_VOPS) 9032 printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp); 9033 #endif 9034 9035 if (getzoneid() != GLOBAL_ZONEID) { 9036 error = EPERM; 9037 goto out; 9038 } 9039 9040 /* 9041 * Cachefs only provides pass-through support for NFSv4, 9042 * and all vnode operations are passed through to the 9043 * back file system. For NFSv4 pass-through to work, only 9044 * connected operation is supported, the cnode backvp must 9045 * exist, and cachefs optional (eg., disconnectable) flags 9046 * are turned off. Assert these conditions to ensure that 9047 * the backfilesystem is called for the shrlock operation. 9048 */ 9049 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 9050 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 9051 9052 mutex_enter(&cp->c_statelock); 9053 if (cp->c_backvp == NULL) 9054 error = cachefs_getbackvp(fscp, cp); 9055 backvp = cp->c_backvp; 9056 mutex_exit(&cp->c_statelock); 9057 ASSERT((error != 0) || (backvp != NULL)); 9058 9059 if (error == 0) { 9060 CFS_DPRINT_BACKFS_NFSV4(fscp, 9061 ("cachefs_shrlock (nfsv4): cp %p, backvp %p", 9062 cp, backvp)); 9063 error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct); 9064 } 9065 9066 out: 9067 #ifdef CFSDEBUG 9068 CFS_DEBUG(CFSDEBUG_VOPS) 9069 printf("cachefs_shrlock: EXIT error = %d\n", error); 9070 #endif 9071 return (error); 9072 } 9073 9074 static int 9075 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag, 9076 cred_t *cr) 9077 { 9078 cnode_t *cp = VTOC(vp); 9079 fscache_t *fscp = C_TO_FSCACHE(cp); 9080 int hit = 0; 9081 int error = 0; 9082 9083 9084 mutex_enter(&cp->c_statelock); 9085 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 9086 if (error) 9087 goto out; 9088 9089 /* read from the cache if we can */ 9090 if ((cp->c_metadata.md_flags & MD_ACL) && 9091 ((cp->c_flags & CN_NOCACHE) == 0) && 9092 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 9093 ASSERT((cp->c_flags & CN_NOCACHE) == 0); 9094 error = cachefs_getaclfromcache(cp, vsec); 9095 if (error) { 9096 cachefs_nocache(cp); 9097 ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0); 9098 error = 0; 9099 } else { 9100 hit = 1; 9101 goto out; 9102 } 9103 } 9104 9105 ASSERT(error == 0); 9106 if (cp->c_backvp == NULL) 9107 error = cachefs_getbackvp(fscp, cp); 9108 if (error) 9109 goto out; 9110 9111 CFS_DPRINT_BACKFS_NFSV4(fscp, 9112 ("cachefs_getsecattr (nfsv4): cp %p, backvp %p", 9113 cp, cp->c_backvp)); 9114 error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL); 9115 if (error) 9116 goto out; 9117 9118 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 9119 (cachefs_vtype_aclok(vp)) && 9120 ((cp->c_flags & CN_NOCACHE) == 0) && 9121 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 9122 error = cachefs_cacheacl(cp, vsec); 9123 if (error) { 9124 error = 0; 9125 cachefs_nocache(cp); 9126 } 9127 } 9128 9129 out: 9130 if (error == 0) { 9131 if (hit) 9132 fscp->fs_stats.st_hits++; 9133 else 9134 fscp->fs_stats.st_misses++; 9135 } 9136 mutex_exit(&cp->c_statelock); 9137 9138 return (error); 9139 } 9140 9141 static int 9142 /*ARGSUSED*/ 9143 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag, 9144 cred_t *cr) 9145 { 9146 cnode_t *cp = VTOC(vp); 9147 fscache_t *fscp = C_TO_FSCACHE(cp); 9148 int hit = 0; 9149 int error = 0; 9150 9151 9152 mutex_enter(&cp->c_statelock); 9153 9154 /* read from the cache if we can */ 9155 if (((cp->c_flags & CN_NOCACHE) == 0) && 9156 (cp->c_metadata.md_flags & MD_ACL)) { 9157 error = cachefs_getaclfromcache(cp, vsec); 9158 if (error) { 9159 cachefs_nocache(cp); 9160 ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0); 9161 error = 0; 9162 } else { 9163 hit = 1; 9164 goto out; 9165 } 9166 } 9167 error = ETIMEDOUT; 9168 9169 out: 9170 if (error == 0) { 9171 if (hit) 9172 fscp->fs_stats.st_hits++; 9173 else 9174 fscp->fs_stats.st_misses++; 9175 } 9176 mutex_exit(&cp->c_statelock); 9177 9178 return (error); 9179 } 9180 9181 /* 9182 * cachefs_cacheacl() -- cache an ACL, which we do by applying it to 9183 * the frontfile if possible; otherwise, the adjunct directory. 9184 * 9185 * inputs: 9186 * cp - the cnode, with its statelock already held 9187 * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is, 9188 * or NULL if you want us to do the VOP_GETSECATTR(backvp). 9189 * 9190 * returns: 9191 * 0 - all is well 9192 * nonzero - errno 9193 */ 9194 9195 int 9196 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp) 9197 { 9198 fscache_t *fscp = C_TO_FSCACHE(cp); 9199 vsecattr_t vsec; 9200 aclent_t *aclp; 9201 int gotvsec = 0; 9202 int error = 0; 9203 vnode_t *vp = NULL; 9204 void *aclkeep = NULL; 9205 int i; 9206 9207 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9208 ASSERT((cp->c_flags & CN_NOCACHE) == 0); 9209 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 9210 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 9211 ASSERT(cachefs_vtype_aclok(CTOV(cp))); 9212 9213 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 9214 error = ENOSYS; 9215 goto out; 9216 } 9217 9218 if (vsecp == NULL) { 9219 if (cp->c_backvp == NULL) 9220 error = cachefs_getbackvp(fscp, cp); 9221 if (error != 0) 9222 goto out; 9223 vsecp = &vsec; 9224 bzero(&vsec, sizeof (vsec)); 9225 vsecp->vsa_mask = 9226 VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9227 error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL); 9228 if (error != 0) { 9229 goto out; 9230 } 9231 gotvsec = 1; 9232 } else if (vsecp->vsa_mask & VSA_ACL) { 9233 aclkeep = vsecp->vsa_aclentp; 9234 vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt * 9235 sizeof (aclent_t), KM_SLEEP); 9236 bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt * 9237 sizeof (aclent_t)); 9238 } else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) { 9239 /* unless there's real data, we can cache nothing. */ 9240 return (0); 9241 } 9242 9243 /* 9244 * prevent the ACL from chmoding our frontfile, and 9245 * snarf the class info 9246 */ 9247 9248 if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) == 9249 (VSA_ACL | VSA_ACLCNT)) { 9250 for (i = 0; i < vsecp->vsa_aclcnt; i++) { 9251 aclp = ((aclent_t *)vsecp->vsa_aclentp) + i; 9252 switch (aclp->a_type) { 9253 case CLASS_OBJ: 9254 cp->c_metadata.md_aclclass = 9255 aclp->a_perm; 9256 /*FALLTHROUGH*/ 9257 case USER_OBJ: 9258 case GROUP_OBJ: 9259 case OTHER_OBJ: 9260 aclp->a_perm = 06; 9261 } 9262 } 9263 } 9264 9265 /* 9266 * if the frontfile exists, then we always do the work. but, 9267 * if there's no frontfile, and the ACL isn't a `real' ACL, 9268 * then we don't want to do the work. otherwise, an `ls -l' 9269 * will create tons of emtpy frontfiles. 9270 */ 9271 9272 if (((cp->c_metadata.md_flags & MD_FILE) == 0) && 9273 ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt) 9274 <= MIN_ACL_ENTRIES)) { 9275 cp->c_metadata.md_flags |= MD_ACL; 9276 cp->c_flags |= CN_UPDATED; 9277 goto out; 9278 } 9279 9280 /* 9281 * if we have a default ACL, then we need a 9282 * real live directory in the frontfs that we 9283 * can apply the ACL to. if not, then we just 9284 * use the frontfile. we get the frontfile 9285 * regardless -- that way, we know the 9286 * directory for the frontfile exists. 9287 */ 9288 9289 if (vsecp->vsa_dfaclcnt > 0) { 9290 if (cp->c_acldirvp == NULL) 9291 error = cachefs_getacldirvp(cp); 9292 if (error != 0) 9293 goto out; 9294 vp = cp->c_acldirvp; 9295 } else { 9296 if (cp->c_frontvp == NULL) 9297 error = cachefs_getfrontfile(cp); 9298 if (error != 0) 9299 goto out; 9300 vp = cp->c_frontvp; 9301 } 9302 ASSERT(vp != NULL); 9303 9304 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 9305 error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL); 9306 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 9307 if (error != 0) { 9308 #ifdef CFSDEBUG 9309 CFS_DEBUG(CFSDEBUG_VOPS) 9310 printf("cachefs_cacheacl: setsecattr: error %d\n", 9311 error); 9312 #endif /* CFSDEBUG */ 9313 /* 9314 * If there was an error, we don't want to call 9315 * cachefs_nocache(); so, set error to 0. 9316 * We will call cachefs_purgeacl(), in order to 9317 * clean such things as adjunct ACL directories. 9318 */ 9319 cachefs_purgeacl(cp); 9320 error = 0; 9321 goto out; 9322 } 9323 if (vp == cp->c_frontvp) 9324 cp->c_flags |= CN_NEED_FRONT_SYNC; 9325 9326 cp->c_metadata.md_flags |= MD_ACL; 9327 cp->c_flags |= CN_UPDATED; 9328 9329 out: 9330 if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED)) 9331 cachefs_nocache(cp); 9332 9333 if (gotvsec) { 9334 if (vsec.vsa_aclcnt) 9335 kmem_free(vsec.vsa_aclentp, 9336 vsec.vsa_aclcnt * sizeof (aclent_t)); 9337 if (vsec.vsa_dfaclcnt) 9338 kmem_free(vsec.vsa_dfaclentp, 9339 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 9340 } else if (aclkeep != NULL) { 9341 cachefs_kmem_free(vsecp->vsa_aclentp, 9342 vsecp->vsa_aclcnt * sizeof (aclent_t)); 9343 vsecp->vsa_aclentp = aclkeep; 9344 } 9345 9346 return (error); 9347 } 9348 9349 void 9350 cachefs_purgeacl(cnode_t *cp) 9351 { 9352 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9353 9354 ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp))); 9355 9356 if (cp->c_acldirvp != NULL) { 9357 VN_RELE(cp->c_acldirvp); 9358 cp->c_acldirvp = NULL; 9359 } 9360 9361 if (cp->c_metadata.md_flags & MD_ACLDIR) { 9362 char name[CFS_FRONTFILE_NAME_SIZE + 2]; 9363 9364 ASSERT(cp->c_filegrp->fg_dirvp != NULL); 9365 make_ascii_name(&cp->c_id, name); 9366 (void) strcat(name, ".d"); 9367 9368 (void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name, 9369 cp->c_filegrp->fg_dirvp, kcred, NULL, 0); 9370 } 9371 9372 cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR); 9373 cp->c_flags |= CN_UPDATED; 9374 } 9375 9376 static int 9377 cachefs_getacldirvp(cnode_t *cp) 9378 { 9379 char name[CFS_FRONTFILE_NAME_SIZE + 2]; 9380 int error = 0; 9381 9382 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9383 ASSERT(cp->c_acldirvp == NULL); 9384 9385 if (cp->c_frontvp == NULL) 9386 error = cachefs_getfrontfile(cp); 9387 if (error != 0) 9388 goto out; 9389 9390 ASSERT(cp->c_filegrp->fg_dirvp != NULL); 9391 make_ascii_name(&cp->c_id, name); 9392 (void) strcat(name, ".d"); 9393 error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp, 9394 name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 9395 if ((error != 0) && (error != ENOENT)) 9396 goto out; 9397 9398 if (error != 0) { 9399 vattr_t va; 9400 9401 va.va_mode = S_IFDIR | 0777; 9402 va.va_uid = 0; 9403 va.va_gid = 0; 9404 va.va_type = VDIR; 9405 va.va_mask = AT_TYPE | AT_MODE | 9406 AT_UID | AT_GID; 9407 error = 9408 VOP_MKDIR(cp->c_filegrp->fg_dirvp, 9409 name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL); 9410 if (error != 0) 9411 goto out; 9412 } 9413 9414 ASSERT(cp->c_acldirvp != NULL); 9415 cp->c_metadata.md_flags |= MD_ACLDIR; 9416 cp->c_flags |= CN_UPDATED; 9417 9418 out: 9419 if (error != 0) 9420 cp->c_acldirvp = NULL; 9421 return (error); 9422 } 9423 9424 static int 9425 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec) 9426 { 9427 aclent_t *aclp; 9428 int error = 0; 9429 vnode_t *vp = NULL; 9430 int i; 9431 9432 ASSERT(cp->c_metadata.md_flags & MD_ACL); 9433 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9434 ASSERT(vsec->vsa_aclentp == NULL); 9435 9436 if (cp->c_metadata.md_flags & MD_ACLDIR) { 9437 if (cp->c_acldirvp == NULL) 9438 error = cachefs_getacldirvp(cp); 9439 if (error != 0) 9440 goto out; 9441 vp = cp->c_acldirvp; 9442 } else if (cp->c_metadata.md_flags & MD_FILE) { 9443 if (cp->c_frontvp == NULL) 9444 error = cachefs_getfrontfile(cp); 9445 if (error != 0) 9446 goto out; 9447 vp = cp->c_frontvp; 9448 } else { 9449 9450 /* 9451 * if we get here, then we know that MD_ACL is on, 9452 * meaning an ACL was successfully cached. we also 9453 * know that neither MD_ACLDIR nor MD_FILE are on, so 9454 * this has to be an entry without a `real' ACL. 9455 * thus, we forge whatever is necessary. 9456 */ 9457 9458 if (vsec->vsa_mask & VSA_ACLCNT) 9459 vsec->vsa_aclcnt = MIN_ACL_ENTRIES; 9460 9461 if (vsec->vsa_mask & VSA_ACL) { 9462 vsec->vsa_aclentp = 9463 kmem_zalloc(MIN_ACL_ENTRIES * 9464 sizeof (aclent_t), KM_SLEEP); 9465 aclp = (aclent_t *)vsec->vsa_aclentp; 9466 aclp->a_type = USER_OBJ; 9467 ++aclp; 9468 aclp->a_type = GROUP_OBJ; 9469 ++aclp; 9470 aclp->a_type = OTHER_OBJ; 9471 ++aclp; 9472 aclp->a_type = CLASS_OBJ; 9473 ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES, 9474 sizeof (aclent_t), cmp2acls); 9475 } 9476 9477 ASSERT(vp == NULL); 9478 } 9479 9480 if (vp != NULL) { 9481 if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) { 9482 #ifdef CFSDEBUG 9483 CFS_DEBUG(CFSDEBUG_VOPS) 9484 printf("cachefs_getaclfromcache: error %d\n", 9485 error); 9486 #endif /* CFSDEBUG */ 9487 goto out; 9488 } 9489 } 9490 9491 if (vsec->vsa_aclentp != NULL) { 9492 for (i = 0; i < vsec->vsa_aclcnt; i++) { 9493 aclp = ((aclent_t *)vsec->vsa_aclentp) + i; 9494 switch (aclp->a_type) { 9495 case USER_OBJ: 9496 aclp->a_id = cp->c_metadata.md_vattr.va_uid; 9497 aclp->a_perm = 9498 cp->c_metadata.md_vattr.va_mode & 0700; 9499 aclp->a_perm >>= 6; 9500 break; 9501 9502 case GROUP_OBJ: 9503 aclp->a_id = cp->c_metadata.md_vattr.va_gid; 9504 aclp->a_perm = 9505 cp->c_metadata.md_vattr.va_mode & 070; 9506 aclp->a_perm >>= 3; 9507 break; 9508 9509 case OTHER_OBJ: 9510 aclp->a_perm = 9511 cp->c_metadata.md_vattr.va_mode & 07; 9512 break; 9513 9514 case CLASS_OBJ: 9515 aclp->a_perm = 9516 cp->c_metadata.md_aclclass; 9517 break; 9518 } 9519 } 9520 } 9521 9522 out: 9523 9524 if (error != 0) 9525 cachefs_nocache(cp); 9526 9527 return (error); 9528 } 9529 9530 /* 9531 * Fills in targp with attribute information from srcp, cp 9532 * and if necessary the system. 9533 */ 9534 static void 9535 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr) 9536 { 9537 time_t now; 9538 9539 ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE)); 9540 9541 /* 9542 * Add code to fill in the va struct. We use the fields from 9543 * the srcp struct if they are populated, otherwise we guess 9544 */ 9545 9546 targp->va_mask = 0; /* initialize all fields */ 9547 targp->va_mode = srcp->va_mode; 9548 targp->va_type = srcp->va_type; 9549 targp->va_nlink = 1; 9550 targp->va_nodeid = 0; 9551 9552 if (srcp->va_mask & AT_UID) 9553 targp->va_uid = srcp->va_uid; 9554 else 9555 targp->va_uid = crgetuid(cr); 9556 9557 if (srcp->va_mask & AT_GID) 9558 targp->va_gid = srcp->va_gid; 9559 else 9560 targp->va_gid = crgetgid(cr); 9561 9562 if (srcp->va_mask & AT_FSID) 9563 targp->va_fsid = srcp->va_fsid; 9564 else 9565 targp->va_fsid = 0; /* initialize all fields */ 9566 9567 now = gethrestime_sec(); 9568 if (srcp->va_mask & AT_ATIME) 9569 targp->va_atime = srcp->va_atime; 9570 else 9571 targp->va_atime.tv_sec = now; 9572 9573 if (srcp->va_mask & AT_MTIME) 9574 targp->va_mtime = srcp->va_mtime; 9575 else 9576 targp->va_mtime.tv_sec = now; 9577 9578 if (srcp->va_mask & AT_CTIME) 9579 targp->va_ctime = srcp->va_ctime; 9580 else 9581 targp->va_ctime.tv_sec = now; 9582 9583 9584 if (srcp->va_mask & AT_SIZE) 9585 targp->va_size = srcp->va_size; 9586 else 9587 targp->va_size = 0; 9588 9589 /* 9590 * the remaing fields are set by the fs and not changable. 9591 * we populate these entries useing the parent directory 9592 * values. It's a small hack, but should work. 9593 */ 9594 targp->va_blksize = cp->c_metadata.md_vattr.va_blksize; 9595 targp->va_rdev = cp->c_metadata.md_vattr.va_rdev; 9596 targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks; 9597 targp->va_seq = 0; /* Never keep the sequence number */ 9598 } 9599 9600 /* 9601 * set the gid for a newly created file. The algorithm is as follows: 9602 * 9603 * 1) If the gid is set in the attribute list, then use it if 9604 * the caller is privileged, belongs to the target group, or 9605 * the group is the same as the parent directory. 9606 * 9607 * 2) If the parent directory's set-gid bit is clear, then use 9608 * the process gid 9609 * 9610 * 3) Otherwise, use the gid of the parent directory. 9611 * 9612 * Note: newcp->c_attr.va_{mode,type} must already be set before calling 9613 * this routine. 9614 */ 9615 static void 9616 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr) 9617 { 9618 if ((vap->va_mask & AT_GID) && 9619 ((vap->va_gid == dcp->c_attr.va_gid) || 9620 groupmember(vap->va_gid, cr) || 9621 secpolicy_vnode_create_gid(cr) != 0)) { 9622 newcp->c_attr.va_gid = vap->va_gid; 9623 } else { 9624 if (dcp->c_attr.va_mode & S_ISGID) 9625 newcp->c_attr.va_gid = dcp->c_attr.va_gid; 9626 else 9627 newcp->c_attr.va_gid = crgetgid(cr); 9628 } 9629 9630 /* 9631 * if we're creating a directory, and the parent directory has the 9632 * set-GID bit set, set it on the new directory. 9633 * Otherwise, if the user is neither privileged nor a member of the 9634 * file's new group, clear the file's set-GID bit. 9635 */ 9636 if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) { 9637 newcp->c_attr.va_mode |= S_ISGID; 9638 } else if ((newcp->c_attr.va_mode & S_ISGID) && 9639 secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0) 9640 newcp->c_attr.va_mode &= ~S_ISGID; 9641 } 9642 9643 /* 9644 * create an acl for the newly created file. should be called right 9645 * after cachefs_creategid. 9646 */ 9647 9648 static void 9649 cachefs_createacl(cnode_t *dcp, cnode_t *newcp) 9650 { 9651 fscache_t *fscp = C_TO_FSCACHE(dcp); 9652 vsecattr_t vsec; 9653 int gotvsec = 0; 9654 int error = 0; /* placeholder */ 9655 aclent_t *aclp; 9656 o_mode_t *classp = NULL; 9657 o_mode_t gunion = 0; 9658 int i; 9659 9660 if ((fscp->fs_info.fi_mntflags & CFS_NOACL) || 9661 (! cachefs_vtype_aclok(CTOV(newcp)))) 9662 return; 9663 9664 ASSERT(dcp->c_metadata.md_flags & MD_ACL); 9665 ASSERT(MUTEX_HELD(&dcp->c_statelock)); 9666 ASSERT(MUTEX_HELD(&newcp->c_statelock)); 9667 9668 /* 9669 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that 9670 * would hit code paths that isn't hit anywhere else. 9671 */ 9672 9673 bzero(&vsec, sizeof (vsec)); 9674 vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9675 error = cachefs_getaclfromcache(dcp, &vsec); 9676 if (error != 0) 9677 goto out; 9678 gotvsec = 1; 9679 9680 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) { 9681 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL)) 9682 kmem_free(vsec.vsa_aclentp, 9683 vsec.vsa_aclcnt * sizeof (aclent_t)); 9684 9685 vsec.vsa_aclcnt = vsec.vsa_dfaclcnt; 9686 vsec.vsa_aclentp = vsec.vsa_dfaclentp; 9687 vsec.vsa_dfaclcnt = 0; 9688 vsec.vsa_dfaclentp = NULL; 9689 9690 if (newcp->c_attr.va_type == VDIR) { 9691 vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt * 9692 sizeof (aclent_t), KM_SLEEP); 9693 vsec.vsa_dfaclcnt = vsec.vsa_aclcnt; 9694 bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp, 9695 vsec.vsa_aclcnt * sizeof (aclent_t)); 9696 } 9697 9698 /* 9699 * this function should be called pretty much after 9700 * the rest of the file creation stuff is done. so, 9701 * uid, gid, etc. should be `right'. we'll go with 9702 * that, rather than trying to determine whether to 9703 * get stuff from cr or va. 9704 */ 9705 9706 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9707 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9708 switch (aclp->a_type) { 9709 case DEF_USER_OBJ: 9710 aclp->a_type = USER_OBJ; 9711 aclp->a_id = newcp->c_metadata.md_vattr.va_uid; 9712 aclp->a_perm = 9713 newcp->c_metadata.md_vattr.va_mode; 9714 aclp->a_perm &= 0700; 9715 aclp->a_perm >>= 6; 9716 break; 9717 9718 case DEF_GROUP_OBJ: 9719 aclp->a_type = GROUP_OBJ; 9720 aclp->a_id = newcp->c_metadata.md_vattr.va_gid; 9721 aclp->a_perm = 9722 newcp->c_metadata.md_vattr.va_mode; 9723 aclp->a_perm &= 070; 9724 aclp->a_perm >>= 3; 9725 gunion |= aclp->a_perm; 9726 break; 9727 9728 case DEF_OTHER_OBJ: 9729 aclp->a_type = OTHER_OBJ; 9730 aclp->a_perm = 9731 newcp->c_metadata.md_vattr.va_mode & 07; 9732 break; 9733 9734 case DEF_CLASS_OBJ: 9735 aclp->a_type = CLASS_OBJ; 9736 classp = &(aclp->a_perm); 9737 break; 9738 9739 case DEF_USER: 9740 aclp->a_type = USER; 9741 gunion |= aclp->a_perm; 9742 break; 9743 9744 case DEF_GROUP: 9745 aclp->a_type = GROUP; 9746 gunion |= aclp->a_perm; 9747 break; 9748 } 9749 } 9750 9751 /* XXX is this the POSIX thing to do? */ 9752 if (classp != NULL) 9753 *classp &= gunion; 9754 9755 /* 9756 * we don't need to log this; rather, we clear the 9757 * MD_ACL bit when we reconnect. 9758 */ 9759 9760 error = cachefs_cacheacl(newcp, &vsec); 9761 if (error != 0) 9762 goto out; 9763 } 9764 9765 newcp->c_metadata.md_aclclass = 07; /* XXX check posix */ 9766 newcp->c_metadata.md_flags |= MD_ACL; 9767 newcp->c_flags |= CN_UPDATED; 9768 9769 out: 9770 9771 if (gotvsec) { 9772 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL)) 9773 kmem_free(vsec.vsa_aclentp, 9774 vsec.vsa_aclcnt * sizeof (aclent_t)); 9775 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) 9776 kmem_free(vsec.vsa_dfaclentp, 9777 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 9778 } 9779 } 9780 9781 /* 9782 * this is translated from the UFS code for access checking. 9783 */ 9784 9785 static int 9786 cachefs_access_local(void *vcp, int mode, cred_t *cr) 9787 { 9788 cnode_t *cp = vcp; 9789 fscache_t *fscp = C_TO_FSCACHE(cp); 9790 int shift = 0; 9791 9792 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9793 9794 if (mode & VWRITE) { 9795 /* 9796 * Disallow write attempts on read-only 9797 * file systems, unless the file is special. 9798 */ 9799 struct vnode *vp = CTOV(cp); 9800 if (vn_is_readonly(vp)) { 9801 if (!IS_DEVVP(vp)) { 9802 return (EROFS); 9803 } 9804 } 9805 } 9806 9807 /* 9808 * if we need to do ACLs, do it. this works whether anyone 9809 * has explicitly made an ACL or not. 9810 */ 9811 9812 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 9813 (cachefs_vtype_aclok(CTOV(cp)))) 9814 return (cachefs_acl_access(cp, mode, cr)); 9815 9816 if (crgetuid(cr) != cp->c_attr.va_uid) { 9817 shift += 3; 9818 if (!groupmember(cp->c_attr.va_gid, cr)) 9819 shift += 3; 9820 } 9821 9822 /* compute missing mode bits */ 9823 mode &= ~(cp->c_attr.va_mode << shift); 9824 9825 if (mode == 0) 9826 return (0); 9827 9828 return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode)); 9829 } 9830 9831 /* 9832 * This is transcribed from ufs_acl_access(). If that changes, then 9833 * this should, too. 9834 * 9835 * Check the cnode's ACL's to see if this mode of access is 9836 * allowed; return 0 if allowed, EACCES if not. 9837 * 9838 * We follow the procedure defined in Sec. 3.3.5, ACL Access 9839 * Check Algorithm, of the POSIX 1003.6 Draft Standard. 9840 */ 9841 9842 #define ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \ 9843 secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM))) 9844 9845 static int 9846 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr) 9847 { 9848 int error = 0; 9849 9850 fscache_t *fscp = C_TO_FSCACHE(cp); 9851 9852 int mask = ~0; 9853 int ismask = 0; 9854 9855 int gperm = 0; 9856 int ngroup = 0; 9857 9858 vsecattr_t vsec; 9859 int gotvsec = 0; 9860 aclent_t *aclp; 9861 9862 uid_t owner = cp->c_attr.va_uid; 9863 9864 int i; 9865 9866 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9867 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 9868 9869 /* 9870 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT, 9871 * but then i believe we'd be the only thing exercising those 9872 * code paths -- probably a bad thing. 9873 */ 9874 9875 bzero(&vsec, sizeof (vsec)); 9876 vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9877 9878 /* XXX KLUDGE! correct insidious 0-class problem */ 9879 if (cp->c_metadata.md_aclclass == 0 && 9880 fscp->fs_cdconnected == CFS_CD_CONNECTED) 9881 cachefs_purgeacl(cp); 9882 again: 9883 if (cp->c_metadata.md_flags & MD_ACL) { 9884 error = cachefs_getaclfromcache(cp, &vsec); 9885 if (error != 0) { 9886 #ifdef CFSDEBUG 9887 if (error != ETIMEDOUT) 9888 CFS_DEBUG(CFSDEBUG_VOPS) 9889 printf("cachefs_acl_access():" 9890 "error %d from getaclfromcache()\n", 9891 error); 9892 #endif /* CFSDEBUG */ 9893 if ((cp->c_metadata.md_flags & MD_ACL) == 0) { 9894 goto again; 9895 } else { 9896 goto out; 9897 } 9898 } 9899 } else { 9900 if (cp->c_backvp == NULL) { 9901 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 9902 error = cachefs_getbackvp(fscp, cp); 9903 else 9904 error = ETIMEDOUT; 9905 } 9906 if (error == 0) 9907 error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr, 9908 NULL); 9909 if (error != 0) { 9910 #ifdef CFSDEBUG 9911 CFS_DEBUG(CFSDEBUG_VOPS) 9912 printf("cachefs_acl_access():" 9913 "error %d from getsecattr(backvp)\n", 9914 error); 9915 #endif /* CFSDEBUG */ 9916 goto out; 9917 } 9918 if ((cp->c_flags & CN_NOCACHE) == 0 && 9919 !CFS_ISFS_BACKFS_NFSV4(fscp)) 9920 (void) cachefs_cacheacl(cp, &vsec); 9921 } 9922 gotvsec = 1; 9923 9924 ASSERT(error == 0); 9925 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9926 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9927 switch (aclp->a_type) { 9928 case USER_OBJ: 9929 /* 9930 * this might look cleaner in the 2nd loop 9931 * below, but we do it here as an 9932 * optimization. 9933 */ 9934 9935 owner = aclp->a_id; 9936 if (crgetuid(cr) == owner) { 9937 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6, 9938 cr, cp); 9939 goto out; 9940 } 9941 break; 9942 9943 case CLASS_OBJ: 9944 mask = aclp->a_perm; 9945 ismask = 1; 9946 break; 9947 } 9948 } 9949 9950 ASSERT(error == 0); 9951 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9952 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9953 switch (aclp->a_type) { 9954 case USER: 9955 if (crgetuid(cr) == aclp->a_id) { 9956 error = ACL_MODE_CHECK(mode, 9957 (aclp->a_perm & mask) << 6, cr, cp); 9958 goto out; 9959 } 9960 break; 9961 9962 case GROUP_OBJ: 9963 if (groupmember(aclp->a_id, cr)) { 9964 ++ngroup; 9965 gperm |= aclp->a_perm; 9966 if (! ismask) { 9967 error = ACL_MODE_CHECK(mode, 9968 aclp->a_perm << 6, 9969 cr, cp); 9970 goto out; 9971 } 9972 } 9973 break; 9974 9975 case GROUP: 9976 if (groupmember(aclp->a_id, cr)) { 9977 ++ngroup; 9978 gperm |= aclp->a_perm; 9979 } 9980 break; 9981 9982 case OTHER_OBJ: 9983 if (ngroup == 0) { 9984 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6, 9985 cr, cp); 9986 goto out; 9987 } 9988 break; 9989 9990 default: 9991 break; 9992 } 9993 } 9994 9995 ASSERT(ngroup > 0); 9996 error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp); 9997 9998 out: 9999 if (gotvsec) { 10000 if (vsec.vsa_aclcnt && vsec.vsa_aclentp) 10001 kmem_free(vsec.vsa_aclentp, 10002 vsec.vsa_aclcnt * sizeof (aclent_t)); 10003 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp) 10004 kmem_free(vsec.vsa_dfaclentp, 10005 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 10006 } 10007 10008 return (error); 10009 } 10010 10011 /* 10012 * see if permissions allow for removal of the given file from 10013 * the given directory. 10014 */ 10015 static int 10016 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr) 10017 { 10018 uid_t uid; 10019 /* 10020 * If the containing directory is sticky, the user must: 10021 * - own the directory, or 10022 * - own the file, or 10023 * - be able to write the file (if it's a plain file), or 10024 * - be sufficiently privileged. 10025 */ 10026 if ((dcp->c_attr.va_mode & S_ISVTX) && 10027 ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) && 10028 (uid != cp->c_attr.va_uid) && 10029 (cp->c_attr.va_type != VREG || 10030 cachefs_access_local(cp, VWRITE, cr) != 0)) 10031 return (secpolicy_vnode_remove(cr)); 10032 10033 return (0); 10034 } 10035 10036 /* 10037 * Returns a new name, may even be unique. 10038 * Stolen from nfs code. 10039 * Since now we will use renaming to .cfs* in place of .nfs* 10040 * for CacheFS. Both NFS and CacheFS will rename opened files. 10041 */ 10042 static char cachefs_prefix[] = ".cfs"; 10043 kmutex_t cachefs_newnum_lock; 10044 10045 static char * 10046 cachefs_newname(void) 10047 { 10048 static uint_t newnum = 0; 10049 char *news; 10050 char *s, *p; 10051 uint_t id; 10052 10053 mutex_enter(&cachefs_newnum_lock); 10054 if (newnum == 0) { 10055 newnum = gethrestime_sec() & 0xfffff; 10056 newnum |= 0x10000; 10057 } 10058 id = newnum++; 10059 mutex_exit(&cachefs_newnum_lock); 10060 10061 news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP); 10062 s = news; 10063 p = cachefs_prefix; 10064 while (*p != '\0') 10065 *s++ = *p++; 10066 while (id != 0) { 10067 *s++ = "0123456789ABCDEF"[id & 0x0f]; 10068 id >>= 4; 10069 } 10070 *s = '\0'; 10071 return (news); 10072 } 10073 10074 /* 10075 * Called to rename the specified file to a temporary file so 10076 * operations to the file after remove work. 10077 * Must call this routine with the dir c_rwlock held as a writer. 10078 */ 10079 static int 10080 /*ARGSUSED*/ 10081 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr) 10082 { 10083 cnode_t *cp = VTOC(vp); 10084 char *tmpname; 10085 fscache_t *fscp = C_TO_FSCACHE(cp); 10086 int error; 10087 10088 ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock))); 10089 10090 /* get the new name for the file */ 10091 tmpname = cachefs_newname(); 10092 10093 /* do the link */ 10094 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 10095 error = cachefs_link_connected(dvp, vp, tmpname, cr); 10096 else 10097 error = cachefs_link_disconnected(dvp, vp, tmpname, cr); 10098 if (error) { 10099 cachefs_kmem_free(tmpname, MAXNAMELEN); 10100 return (error); 10101 } 10102 10103 mutex_enter(&cp->c_statelock); 10104 if (cp->c_unldvp) { 10105 VN_RELE(cp->c_unldvp); 10106 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN); 10107 crfree(cp->c_unlcred); 10108 } 10109 10110 VN_HOLD(dvp); 10111 cp->c_unldvp = dvp; 10112 crhold(cr); 10113 cp->c_unlcred = cr; 10114 cp->c_unlname = tmpname; 10115 10116 /* drop the backvp so NFS does not also do a rename */ 10117 mutex_exit(&cp->c_statelock); 10118 10119 return (0); 10120 } 10121 10122 /* 10123 * Marks the cnode as modified. 10124 */ 10125 static void 10126 cachefs_modified(cnode_t *cp) 10127 { 10128 fscache_t *fscp = C_TO_FSCACHE(cp); 10129 struct vattr va; 10130 int error; 10131 10132 ASSERT(MUTEX_HELD(&cp->c_statelock)); 10133 ASSERT(cp->c_metadata.md_rlno); 10134 10135 /* if not on the modify list */ 10136 if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) { 10137 /* put on modified list, also marks the file as modified */ 10138 cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED, 10139 cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks); 10140 cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED; 10141 cp->c_flags |= CN_UPDATED; 10142 10143 /* if a modified regular file that is not local */ 10144 if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) && 10145 (cp->c_metadata.md_flags & MD_FILE) && 10146 (cp->c_attr.va_type == VREG)) { 10147 10148 if (cp->c_frontvp == NULL) 10149 (void) cachefs_getfrontfile(cp); 10150 if (cp->c_frontvp) { 10151 /* identify file so fsck knows it is modified */ 10152 va.va_mode = 0766; 10153 va.va_mask = AT_MODE; 10154 error = VOP_SETATTR(cp->c_frontvp, 10155 &va, 0, kcred, NULL); 10156 if (error) { 10157 cmn_err(CE_WARN, 10158 "Cannot change ff mode.\n"); 10159 } 10160 } 10161 } 10162 } 10163 } 10164 10165 /* 10166 * Marks the cnode as modified. 10167 * Allocates a rl slot for the cnode if necessary. 10168 * Returns 0 for success, !0 if cannot get an rl slot. 10169 */ 10170 static int 10171 cachefs_modified_alloc(cnode_t *cp) 10172 { 10173 fscache_t *fscp = C_TO_FSCACHE(cp); 10174 filegrp_t *fgp = cp->c_filegrp; 10175 int error; 10176 rl_entry_t rl_ent; 10177 10178 ASSERT(MUTEX_HELD(&cp->c_statelock)); 10179 10180 /* get the rl slot if needed */ 10181 if (cp->c_metadata.md_rlno == 0) { 10182 /* get a metadata slot if we do not have one yet */ 10183 if (cp->c_flags & CN_ALLOC_PENDING) { 10184 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 10185 (void) filegrp_allocattr(cp->c_filegrp); 10186 } 10187 error = filegrp_create_metadata(cp->c_filegrp, 10188 &cp->c_metadata, &cp->c_id); 10189 if (error) 10190 return (error); 10191 cp->c_flags &= ~CN_ALLOC_PENDING; 10192 } 10193 10194 /* get a free rl entry */ 10195 rl_ent.rl_fileno = cp->c_id.cid_fileno; 10196 rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0; 10197 rl_ent.rl_fsid = fscp->fs_cfsid; 10198 rl_ent.rl_attrc = 0; 10199 error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent, 10200 &cp->c_metadata.md_rlno); 10201 if (error) 10202 return (error); 10203 cp->c_metadata.md_rltype = CACHEFS_RL_NONE; 10204 10205 /* hold the filegrp so the attrcache file is not gc */ 10206 error = filegrp_ffhold(fgp); 10207 if (error) { 10208 cachefs_rlent_moveto(fscp->fs_cache, 10209 CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0); 10210 cp->c_metadata.md_rlno = 0; 10211 return (error); 10212 } 10213 } 10214 cachefs_modified(cp); 10215 return (0); 10216 } 10217 10218 int 10219 cachefs_vtype_aclok(vnode_t *vp) 10220 { 10221 vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON}; 10222 10223 if (vp->v_type == VNON) 10224 return (0); 10225 10226 for (vtp = oktypes; *vtp != VNON; vtp++) 10227 if (vp->v_type == *vtp) 10228 break; 10229 10230 return (*vtp != VNON); 10231 } 10232 10233 static int 10234 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 10235 caller_context_t *ct) 10236 { 10237 int error = 0; 10238 fscache_t *fscp = C_TO_FSCACHE(VTOC(vp)); 10239 10240 /* Assert cachefs compatibility if NFSv4 is in use */ 10241 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 10242 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp)); 10243 10244 if (cmd == _PC_FILESIZEBITS) { 10245 u_offset_t maxsize = fscp->fs_offmax; 10246 (*valp) = 0; 10247 while (maxsize != 0) { 10248 maxsize >>= 1; 10249 (*valp)++; 10250 } 10251 (*valp)++; 10252 } else 10253 error = fs_pathconf(vp, cmd, valp, cr, ct); 10254 10255 return (error); 10256 } 10257