1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/param.h> 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/cred.h> 30 #include <sys/proc.h> 31 #include <sys/user.h> 32 #include <sys/time.h> 33 #include <sys/vnode.h> 34 #include <sys/vfs.h> 35 #include <sys/vfs_opreg.h> 36 #include <sys/file.h> 37 #include <sys/filio.h> 38 #include <sys/uio.h> 39 #include <sys/buf.h> 40 #include <sys/mman.h> 41 #include <sys/tiuser.h> 42 #include <sys/pathname.h> 43 #include <sys/dirent.h> 44 #include <sys/conf.h> 45 #include <sys/debug.h> 46 #include <sys/vmsystm.h> 47 #include <sys/fcntl.h> 48 #include <sys/flock.h> 49 #include <sys/swap.h> 50 #include <sys/errno.h> 51 #include <sys/sysmacros.h> 52 #include <sys/disp.h> 53 #include <sys/kmem.h> 54 #include <sys/cmn_err.h> 55 #include <sys/vtrace.h> 56 #include <sys/mount.h> 57 #include <sys/bootconf.h> 58 #include <sys/dnlc.h> 59 #include <sys/stat.h> 60 #include <sys/acl.h> 61 #include <sys/policy.h> 62 #include <rpc/types.h> 63 64 #include <vm/hat.h> 65 #include <vm/as.h> 66 #include <vm/page.h> 67 #include <vm/pvn.h> 68 #include <vm/seg.h> 69 #include <vm/seg_map.h> 70 #include <vm/seg_vn.h> 71 #include <vm/rm.h> 72 #include <sys/fs/cachefs_fs.h> 73 #include <sys/fs/cachefs_dir.h> 74 #include <sys/fs/cachefs_dlog.h> 75 #include <sys/fs/cachefs_ioctl.h> 76 #include <sys/fs/cachefs_log.h> 77 #include <fs/fs_subr.h> 78 79 int cachefs_dnlc; /* use dnlc, debugging */ 80 81 static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, 82 cred_t *cr); 83 static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, 84 cred_t *cr); 85 static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp); 86 static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec); 87 static int cachefs_getacldirvp(cnode_t *cp); 88 static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec); 89 static int cachefs_access_local(void *cp, int mode, cred_t *cr); 90 static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr); 91 static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen, 92 u_offset_t iooff, cred_t *cr); 93 static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen, 94 u_offset_t iooff, cred_t *cr); 95 static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags, 96 cred_t *cr, caller_context_t *ct); 97 static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap, 98 int flags, cred_t *cr, caller_context_t *ct); 99 static int cachefs_access_connected(struct vnode *vp, int mode, 100 int flags, cred_t *cr); 101 static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp, 102 cred_t *cr); 103 static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva, 104 char *tnm, cred_t *cr); 105 static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, 106 vattr_t *tva, char *tnm, cred_t *cr); 107 static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, 108 cred_t *cr); 109 static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, 110 char *tnm, cred_t *cr); 111 static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap, 112 vnode_t **vpp, cred_t *cr); 113 static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 114 vnode_t **vpp, cred_t *cr); 115 static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr); 116 static int cachefs_rmdir_connected(vnode_t *dvp, char *nm, 117 vnode_t *cdir, cred_t *cr, vnode_t *vp); 118 static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, 119 vnode_t *cdir, cred_t *cr, vnode_t *vp); 120 static char *cachefs_newname(void); 121 static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, 122 cred_t *cr); 123 static int cachefs_rename_connected(vnode_t *odvp, char *onm, 124 vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp); 125 static int cachefs_rename_disconnected(vnode_t *odvp, char *onm, 126 vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp); 127 static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, 128 int *eofp); 129 static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, 130 cred_t *cr, int *eofp); 131 static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop, 132 cred_t *cr, int *eofp); 133 134 static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags, 135 cred_t *cr, caller_context_t *ct); 136 137 static int cachefs_open(struct vnode **, int, cred_t *, 138 caller_context_t *); 139 static int cachefs_close(struct vnode *, int, int, offset_t, 140 cred_t *, caller_context_t *); 141 static int cachefs_read(struct vnode *, struct uio *, int, cred_t *, 142 caller_context_t *); 143 static int cachefs_write(struct vnode *, struct uio *, int, cred_t *, 144 caller_context_t *); 145 static int cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *, 146 int *, caller_context_t *); 147 static int cachefs_getattr(struct vnode *, struct vattr *, int, 148 cred_t *, caller_context_t *); 149 static int cachefs_setattr(struct vnode *, struct vattr *, 150 int, cred_t *, caller_context_t *); 151 static int cachefs_access(struct vnode *, int, int, cred_t *, 152 caller_context_t *); 153 static int cachefs_lookup(struct vnode *, char *, struct vnode **, 154 struct pathname *, int, struct vnode *, cred_t *, 155 caller_context_t *, int *, pathname_t *); 156 static int cachefs_create(struct vnode *, char *, struct vattr *, 157 enum vcexcl, int, struct vnode **, cred_t *, int, 158 caller_context_t *, vsecattr_t *); 159 static int cachefs_create_connected(vnode_t *dvp, char *nm, 160 vattr_t *vap, enum vcexcl exclusive, int mode, 161 vnode_t **vpp, cred_t *cr); 162 static int cachefs_create_disconnected(vnode_t *dvp, char *nm, 163 vattr_t *vap, enum vcexcl exclusive, int mode, 164 vnode_t **vpp, cred_t *cr); 165 static int cachefs_remove(struct vnode *, char *, cred_t *, 166 caller_context_t *, int); 167 static int cachefs_link(struct vnode *, struct vnode *, char *, 168 cred_t *, caller_context_t *, int); 169 static int cachefs_rename(struct vnode *, char *, struct vnode *, 170 char *, cred_t *, caller_context_t *, int); 171 static int cachefs_mkdir(struct vnode *, char *, struct 172 vattr *, struct vnode **, cred_t *, caller_context_t *, 173 int, vsecattr_t *); 174 static int cachefs_rmdir(struct vnode *, char *, struct vnode *, 175 cred_t *, caller_context_t *, int); 176 static int cachefs_readdir(struct vnode *, struct uio *, 177 cred_t *, int *, caller_context_t *, int); 178 static int cachefs_symlink(struct vnode *, char *, struct vattr *, 179 char *, cred_t *, caller_context_t *, int); 180 static int cachefs_readlink(struct vnode *, struct uio *, cred_t *, 181 caller_context_t *); 182 static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr); 183 static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop); 184 static int cachefs_fsync(struct vnode *, int, cred_t *, 185 caller_context_t *); 186 static void cachefs_inactive(struct vnode *, cred_t *, caller_context_t *); 187 static int cachefs_fid(struct vnode *, struct fid *, caller_context_t *); 188 static int cachefs_rwlock(struct vnode *, int, caller_context_t *); 189 static void cachefs_rwunlock(struct vnode *, int, caller_context_t *); 190 static int cachefs_seek(struct vnode *, offset_t, offset_t *, 191 caller_context_t *); 192 static int cachefs_frlock(struct vnode *, int, struct flock64 *, 193 int, offset_t, struct flk_callback *, cred_t *, 194 caller_context_t *); 195 static int cachefs_space(struct vnode *, int, struct flock64 *, int, 196 offset_t, cred_t *, caller_context_t *); 197 static int cachefs_realvp(struct vnode *, struct vnode **, 198 caller_context_t *); 199 static int cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *, 200 struct page *[], size_t, struct seg *, caddr_t, 201 enum seg_rw, cred_t *, caller_context_t *); 202 static int cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 203 struct page *[], size_t, struct seg *, caddr_t, 204 enum seg_rw, cred_t *); 205 static int cachefs_getapage_back(struct vnode *, u_offset_t, size_t, 206 uint_t *, struct page *[], size_t, struct seg *, caddr_t, 207 enum seg_rw, cred_t *); 208 static int cachefs_putpage(struct vnode *, offset_t, size_t, int, 209 cred_t *, caller_context_t *); 210 static int cachefs_map(struct vnode *, offset_t, struct as *, 211 caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *, 212 caller_context_t *); 213 static int cachefs_addmap(struct vnode *, offset_t, struct as *, 214 caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *, 215 caller_context_t *); 216 static int cachefs_delmap(struct vnode *, offset_t, struct as *, 217 caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *, 218 caller_context_t *); 219 static int cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, 220 int flag, cred_t *cr, caller_context_t *); 221 static int cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, 222 int flag, cred_t *cr, caller_context_t *); 223 static int cachefs_shrlock(vnode_t *, int, struct shrlock *, int, 224 cred_t *, caller_context_t *); 225 static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag, 226 cred_t *cr); 227 static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, 228 int flag, cred_t *cr); 229 230 static int cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t, 231 caller_context_t *); 232 static int cachefs_pageio(struct vnode *, page_t *, 233 u_offset_t, size_t, int, cred_t *, caller_context_t *); 234 static int cachefs_writepage(struct vnode *vp, caddr_t base, 235 int tcount, struct uio *uiop); 236 static int cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *, 237 caller_context_t *); 238 239 static int cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, 240 cred_t *cr, caller_context_t *ct); 241 static int cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, 242 cred_t *cr, caller_context_t *ct); 243 static int cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap, 244 int flags, cred_t *cr, caller_context_t *ct); 245 static int cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, 246 vnode_t *vp); 247 static int cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, 248 size_t len, uint_t *protp, struct page *pl[], 249 size_t plsz, struct seg *seg, caddr_t addr, 250 enum seg_rw rw, cred_t *cr); 251 static int cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, 252 size_t len, int flags, cred_t *cr); 253 static int cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, 254 struct as *as, caddr_t *addrp, size_t len, uchar_t prot, 255 uchar_t maxprot, uint_t flags, cred_t *cr); 256 static int cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, 257 struct flock64 *bfp, int flag, offset_t offset, 258 cred_t *cr, caller_context_t *ct); 259 260 struct vnodeops *cachefs_vnodeops; 261 262 static const fs_operation_def_t cachefs_vnodeops_template[] = { 263 VOPNAME_OPEN, { .vop_open = cachefs_open }, 264 VOPNAME_CLOSE, { .vop_close = cachefs_close }, 265 VOPNAME_READ, { .vop_read = cachefs_read }, 266 VOPNAME_WRITE, { .vop_write = cachefs_write }, 267 VOPNAME_IOCTL, { .vop_ioctl = cachefs_ioctl }, 268 VOPNAME_GETATTR, { .vop_getattr = cachefs_getattr }, 269 VOPNAME_SETATTR, { .vop_setattr = cachefs_setattr }, 270 VOPNAME_ACCESS, { .vop_access = cachefs_access }, 271 VOPNAME_LOOKUP, { .vop_lookup = cachefs_lookup }, 272 VOPNAME_CREATE, { .vop_create = cachefs_create }, 273 VOPNAME_REMOVE, { .vop_remove = cachefs_remove }, 274 VOPNAME_LINK, { .vop_link = cachefs_link }, 275 VOPNAME_RENAME, { .vop_rename = cachefs_rename }, 276 VOPNAME_MKDIR, { .vop_mkdir = cachefs_mkdir }, 277 VOPNAME_RMDIR, { .vop_rmdir = cachefs_rmdir }, 278 VOPNAME_READDIR, { .vop_readdir = cachefs_readdir }, 279 VOPNAME_SYMLINK, { .vop_symlink = cachefs_symlink }, 280 VOPNAME_READLINK, { .vop_readlink = cachefs_readlink }, 281 VOPNAME_FSYNC, { .vop_fsync = cachefs_fsync }, 282 VOPNAME_INACTIVE, { .vop_inactive = cachefs_inactive }, 283 VOPNAME_FID, { .vop_fid = cachefs_fid }, 284 VOPNAME_RWLOCK, { .vop_rwlock = cachefs_rwlock }, 285 VOPNAME_RWUNLOCK, { .vop_rwunlock = cachefs_rwunlock }, 286 VOPNAME_SEEK, { .vop_seek = cachefs_seek }, 287 VOPNAME_FRLOCK, { .vop_frlock = cachefs_frlock }, 288 VOPNAME_SPACE, { .vop_space = cachefs_space }, 289 VOPNAME_REALVP, { .vop_realvp = cachefs_realvp }, 290 VOPNAME_GETPAGE, { .vop_getpage = cachefs_getpage }, 291 VOPNAME_PUTPAGE, { .vop_putpage = cachefs_putpage }, 292 VOPNAME_MAP, { .vop_map = cachefs_map }, 293 VOPNAME_ADDMAP, { .vop_addmap = cachefs_addmap }, 294 VOPNAME_DELMAP, { .vop_delmap = cachefs_delmap }, 295 VOPNAME_DUMP, { .vop_dump = cachefs_dump }, 296 VOPNAME_PATHCONF, { .vop_pathconf = cachefs_pathconf }, 297 VOPNAME_PAGEIO, { .vop_pageio = cachefs_pageio }, 298 VOPNAME_SETSECATTR, { .vop_setsecattr = cachefs_setsecattr }, 299 VOPNAME_GETSECATTR, { .vop_getsecattr = cachefs_getsecattr }, 300 VOPNAME_SHRLOCK, { .vop_shrlock = cachefs_shrlock }, 301 NULL, NULL 302 }; 303 304 /* forward declarations of statics */ 305 static void cachefs_modified(cnode_t *cp); 306 static int cachefs_modified_alloc(cnode_t *cp); 307 308 int 309 cachefs_init_vnops(char *name) 310 { 311 return (vn_make_ops(name, 312 cachefs_vnodeops_template, &cachefs_vnodeops)); 313 } 314 315 struct vnodeops * 316 cachefs_getvnodeops(void) 317 { 318 return (cachefs_vnodeops); 319 } 320 321 static int 322 cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 323 { 324 int error = 0; 325 cnode_t *cp = VTOC(*vpp); 326 fscache_t *fscp = C_TO_FSCACHE(cp); 327 int held = 0; 328 int type; 329 int connected = 0; 330 331 #ifdef CFSDEBUG 332 CFS_DEBUG(CFSDEBUG_VOPS) 333 printf("cachefs_open: ENTER vpp %p flag %x\n", 334 (void *)vpp, flag); 335 #endif 336 if (getzoneid() != GLOBAL_ZONEID) { 337 error = EPERM; 338 goto out; 339 } 340 if ((flag & FWRITE) && 341 ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) { 342 error = EISDIR; 343 goto out; 344 } 345 346 /* 347 * Cachefs only provides pass-through support for NFSv4, 348 * and all vnode operations are passed through to the 349 * back file system. For NFSv4 pass-through to work, only 350 * connected operation is supported, the cnode backvp must 351 * exist, and cachefs optional (eg., disconnectable) flags 352 * are turned off. Assert these conditions to ensure that 353 * the backfilesystem is called for the open operation. 354 */ 355 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 356 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 357 358 for (;;) { 359 /* get (or renew) access to the file system */ 360 if (held) { 361 /* Won't loop with NFSv4 connected behavior */ 362 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 363 cachefs_cd_release(fscp); 364 held = 0; 365 } 366 error = cachefs_cd_access(fscp, connected, 0); 367 if (error) 368 goto out; 369 held = 1; 370 371 mutex_enter(&cp->c_statelock); 372 373 /* grab creds if we do not have any yet */ 374 if (cp->c_cred == NULL) { 375 crhold(cr); 376 cp->c_cred = cr; 377 } 378 cp->c_flags |= CN_NEEDOPEN; 379 380 /* if we are disconnected */ 381 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 382 /* if we cannot write to the file system */ 383 if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) { 384 mutex_exit(&cp->c_statelock); 385 connected = 1; 386 continue; 387 } 388 /* 389 * Allow read only requests to continue 390 */ 391 if ((flag & (FWRITE|FREAD)) == FREAD) { 392 /* track the flag for opening the backvp */ 393 cp->c_rdcnt++; 394 mutex_exit(&cp->c_statelock); 395 error = 0; 396 break; 397 } 398 399 /* 400 * check credentials - if this procs 401 * credentials don't match the creds in the 402 * cnode disallow writing while disconnected. 403 */ 404 if (crcmp(cp->c_cred, CRED()) != 0 && 405 secpolicy_vnode_access(CRED(), *vpp, 406 cp->c_attr.va_uid, VWRITE) != 0) { 407 mutex_exit(&cp->c_statelock); 408 connected = 1; 409 continue; 410 } 411 /* to get here, we know that the WRITE flag is on */ 412 cp->c_wrcnt++; 413 if (flag & FREAD) 414 cp->c_rdcnt++; 415 } 416 417 /* else if we are connected */ 418 else { 419 /* if cannot use the cached copy of the file */ 420 if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) && 421 ((cp->c_flags & CN_NOCACHE) == 0)) 422 cachefs_nocache(cp); 423 424 /* pass open to the back file */ 425 if (cp->c_backvp) { 426 cp->c_flags &= ~CN_NEEDOPEN; 427 CFS_DPRINT_BACKFS_NFSV4(fscp, 428 ("cachefs_open (nfsv4): cnode %p, " 429 "backvp %p\n", cp, cp->c_backvp)); 430 error = VOP_OPEN(&cp->c_backvp, flag, cr, ct); 431 if (CFS_TIMEOUT(fscp, error)) { 432 mutex_exit(&cp->c_statelock); 433 cachefs_cd_release(fscp); 434 held = 0; 435 cachefs_cd_timedout(fscp); 436 continue; 437 } else if (error) { 438 mutex_exit(&cp->c_statelock); 439 break; 440 } 441 } else { 442 /* backvp will be VOP_OPEN'd later */ 443 if (flag & FREAD) 444 cp->c_rdcnt++; 445 if (flag & FWRITE) 446 cp->c_wrcnt++; 447 } 448 449 /* 450 * Now perform a consistency check on the file. 451 * If strict consistency then force a check to 452 * the backfs even if the timeout has not expired 453 * for close-to-open consistency. 454 */ 455 type = 0; 456 if (fscp->fs_consttype == CFS_FS_CONST_STRICT) 457 type = C_BACK_CHECK; 458 error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr); 459 if (CFS_TIMEOUT(fscp, error)) { 460 mutex_exit(&cp->c_statelock); 461 cachefs_cd_release(fscp); 462 held = 0; 463 cachefs_cd_timedout(fscp); 464 continue; 465 } 466 } 467 mutex_exit(&cp->c_statelock); 468 break; 469 } 470 if (held) 471 cachefs_cd_release(fscp); 472 out: 473 #ifdef CFS_CD_DEBUG 474 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 475 #endif 476 #ifdef CFSDEBUG 477 CFS_DEBUG(CFSDEBUG_VOPS) 478 printf("cachefs_open: EXIT vpp %p error %d\n", 479 (void *)vpp, error); 480 #endif 481 return (error); 482 } 483 484 /* ARGSUSED */ 485 static int 486 cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 487 caller_context_t *ct) 488 { 489 int error = 0; 490 cnode_t *cp = VTOC(vp); 491 fscache_t *fscp = C_TO_FSCACHE(cp); 492 int held = 0; 493 int connected = 0; 494 int close_cnt = 1; 495 cachefscache_t *cachep; 496 497 #ifdef CFSDEBUG 498 CFS_DEBUG(CFSDEBUG_VOPS) 499 printf("cachefs_close: ENTER vp %p\n", (void *)vp); 500 #endif 501 /* 502 * Cachefs only provides pass-through support for NFSv4, 503 * and all vnode operations are passed through to the 504 * back file system. For NFSv4 pass-through to work, only 505 * connected operation is supported, the cnode backvp must 506 * exist, and cachefs optional (eg., disconnectable) flags 507 * are turned off. Assert these conditions to ensure that 508 * the backfilesystem is called for the close operation. 509 */ 510 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 511 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 512 513 /* 514 * File could have been passed in or inherited from the global zone, so 515 * we don't want to flat out reject the request; we'll just leave things 516 * the way they are and let the backfs (NFS) deal with it. 517 */ 518 /* get rid of any local locks */ 519 if (CFS_ISFS_LLOCK(fscp)) { 520 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 521 } 522 523 /* clean up if this is the daemon closing down */ 524 if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) && 525 ((ttoproc(curthread)->p_pid) != 0) && 526 (vp == fscp->fs_rootvp) && 527 (count == 1)) { 528 mutex_enter(&fscp->fs_cdlock); 529 fscp->fs_cddaemonid = 0; 530 if (fscp->fs_dlogfile) 531 fscp->fs_cdconnected = CFS_CD_DISCONNECTED; 532 else 533 fscp->fs_cdconnected = CFS_CD_CONNECTED; 534 cv_broadcast(&fscp->fs_cdwaitcv); 535 mutex_exit(&fscp->fs_cdlock); 536 if (fscp->fs_flags & CFS_FS_ROOTFS) { 537 cachep = fscp->fs_cache; 538 mutex_enter(&cachep->c_contentslock); 539 ASSERT(cachep->c_rootdaemonid != 0); 540 cachep->c_rootdaemonid = 0; 541 mutex_exit(&cachep->c_contentslock); 542 } 543 return (0); 544 } 545 546 for (;;) { 547 /* get (or renew) access to the file system */ 548 if (held) { 549 /* Won't loop with NFSv4 connected behavior */ 550 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 551 cachefs_cd_release(fscp); 552 held = 0; 553 } 554 error = cachefs_cd_access(fscp, connected, 0); 555 if (error) 556 goto out; 557 held = 1; 558 connected = 0; 559 560 /* if not the last close */ 561 if (count > 1) { 562 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) 563 goto out; 564 mutex_enter(&cp->c_statelock); 565 if (cp->c_backvp) { 566 CFS_DPRINT_BACKFS_NFSV4(fscp, 567 ("cachefs_close (nfsv4): cnode %p, " 568 "backvp %p\n", cp, cp->c_backvp)); 569 error = VOP_CLOSE(cp->c_backvp, flag, count, 570 offset, cr, ct); 571 if (CFS_TIMEOUT(fscp, error)) { 572 mutex_exit(&cp->c_statelock); 573 cachefs_cd_release(fscp); 574 held = 0; 575 cachefs_cd_timedout(fscp); 576 continue; 577 } 578 } 579 mutex_exit(&cp->c_statelock); 580 goto out; 581 } 582 583 /* 584 * If the file is an unlinked file, then flush the lookup 585 * cache so that inactive will be called if this is 586 * the last reference. It will invalidate all of the 587 * cached pages, without writing them out. Writing them 588 * out is not required because they will be written to a 589 * file which will be immediately removed. 590 */ 591 if (cp->c_unldvp != NULL) { 592 dnlc_purge_vp(vp); 593 mutex_enter(&cp->c_statelock); 594 error = cp->c_error; 595 cp->c_error = 0; 596 mutex_exit(&cp->c_statelock); 597 /* always call VOP_CLOSE() for back fs vnode */ 598 } 599 600 /* force dirty data to stable storage */ 601 else if ((vp->v_type == VREG) && (flag & FWRITE) && 602 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 603 /* clean the cachefs pages synchronously */ 604 error = cachefs_putpage_common(vp, (offset_t)0, 605 0, 0, cr); 606 if (CFS_TIMEOUT(fscp, error)) { 607 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 608 cachefs_cd_release(fscp); 609 held = 0; 610 cachefs_cd_timedout(fscp); 611 continue; 612 } else { 613 connected = 1; 614 continue; 615 } 616 } 617 618 /* if no space left in cache, wait until connected */ 619 if ((error == ENOSPC) && 620 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 621 connected = 1; 622 continue; 623 } 624 625 /* clear the cnode error if putpage worked */ 626 if ((error == 0) && cp->c_error) { 627 mutex_enter(&cp->c_statelock); 628 cp->c_error = 0; 629 mutex_exit(&cp->c_statelock); 630 } 631 632 /* if any other important error */ 633 if (cp->c_error) { 634 /* get rid of the pages */ 635 (void) cachefs_putpage_common(vp, 636 (offset_t)0, 0, B_INVAL | B_FORCE, cr); 637 dnlc_purge_vp(vp); 638 } 639 } 640 641 mutex_enter(&cp->c_statelock); 642 if (cp->c_backvp && 643 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) { 644 error = VOP_CLOSE(cp->c_backvp, flag, close_cnt, 645 offset, cr, ct); 646 if (CFS_TIMEOUT(fscp, error)) { 647 mutex_exit(&cp->c_statelock); 648 cachefs_cd_release(fscp); 649 held = 0; 650 cachefs_cd_timedout(fscp); 651 /* don't decrement the vnode counts again */ 652 close_cnt = 0; 653 continue; 654 } 655 } 656 mutex_exit(&cp->c_statelock); 657 break; 658 } 659 660 mutex_enter(&cp->c_statelock); 661 if (!error) 662 error = cp->c_error; 663 cp->c_error = 0; 664 mutex_exit(&cp->c_statelock); 665 666 out: 667 if (held) 668 cachefs_cd_release(fscp); 669 #ifdef CFS_CD_DEBUG 670 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 671 #endif 672 673 #ifdef CFSDEBUG 674 CFS_DEBUG(CFSDEBUG_VOPS) 675 printf("cachefs_close: EXIT vp %p\n", (void *)vp); 676 #endif 677 return (error); 678 } 679 680 /*ARGSUSED*/ 681 static int 682 cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 683 caller_context_t *ct) 684 { 685 struct cnode *cp = VTOC(vp); 686 fscache_t *fscp = C_TO_FSCACHE(cp); 687 register u_offset_t off; 688 register int mapoff; 689 register caddr_t base; 690 int n; 691 offset_t diff; 692 uint_t flags = 0; 693 int error = 0; 694 695 #if 0 696 if (vp->v_flag & VNOCACHE) 697 flags = SM_INVAL; 698 #endif 699 if (getzoneid() != GLOBAL_ZONEID) 700 return (EPERM); 701 if (vp->v_type != VREG) 702 return (EISDIR); 703 704 ASSERT(RW_READ_HELD(&cp->c_rwlock)); 705 706 if (uiop->uio_resid == 0) 707 return (0); 708 709 710 if (uiop->uio_loffset < (offset_t)0) 711 return (EINVAL); 712 713 /* 714 * Call backfilesystem to read if NFSv4, the cachefs code 715 * does the read from the back filesystem asynchronously 716 * which is not supported by pass-through functionality. 717 */ 718 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 719 error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct); 720 goto out; 721 } 722 723 if (MANDLOCK(vp, cp->c_attr.va_mode)) { 724 error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset, 725 uiop->uio_resid, uiop->uio_fmode, ct); 726 if (error) 727 return (error); 728 } 729 730 /* 731 * Sit in a loop and transfer (uiomove) the data in up to 732 * MAXBSIZE chunks. Each chunk is mapped into the kernel's 733 * address space as needed and then released. 734 */ 735 do { 736 /* 737 * off Offset of current MAXBSIZE chunk 738 * mapoff Offset within the current chunk 739 * n Number of bytes to move from this chunk 740 * base kernel address of mapped in chunk 741 */ 742 off = uiop->uio_loffset & (offset_t)MAXBMASK; 743 mapoff = uiop->uio_loffset & MAXBOFFSET; 744 n = MAXBSIZE - mapoff; 745 if (n > uiop->uio_resid) 746 n = (uint_t)uiop->uio_resid; 747 748 /* perform consistency check */ 749 error = cachefs_cd_access(fscp, 0, 0); 750 if (error) 751 break; 752 mutex_enter(&cp->c_statelock); 753 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 754 diff = cp->c_size - uiop->uio_loffset; 755 mutex_exit(&cp->c_statelock); 756 if (CFS_TIMEOUT(fscp, error)) { 757 cachefs_cd_release(fscp); 758 cachefs_cd_timedout(fscp); 759 error = 0; 760 continue; 761 } 762 cachefs_cd_release(fscp); 763 764 if (error) 765 break; 766 767 if (diff <= (offset_t)0) 768 break; 769 if (diff < (offset_t)n) 770 n = diff; 771 772 base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ); 773 774 error = segmap_fault(kas.a_hat, segkmap, base, n, 775 F_SOFTLOCK, S_READ); 776 if (error) { 777 (void) segmap_release(segkmap, base, 0); 778 if (FC_CODE(error) == FC_OBJERR) 779 error = FC_ERRNO(error); 780 else 781 error = EIO; 782 break; 783 } 784 error = uiomove(base+mapoff, n, UIO_READ, uiop); 785 (void) segmap_fault(kas.a_hat, segkmap, base, n, 786 F_SOFTUNLOCK, S_READ); 787 if (error == 0) { 788 /* 789 * if we read a whole page(s), or to eof, 790 * we won't need this page(s) again soon. 791 */ 792 if (n + mapoff == MAXBSIZE || 793 uiop->uio_loffset == cp->c_size) 794 flags |= SM_DONTNEED; 795 } 796 (void) segmap_release(segkmap, base, flags); 797 } while (error == 0 && uiop->uio_resid > 0); 798 799 out: 800 #ifdef CFSDEBUG 801 CFS_DEBUG(CFSDEBUG_VOPS) 802 printf("cachefs_read: EXIT error %d resid %ld\n", error, 803 uiop->uio_resid); 804 #endif 805 return (error); 806 } 807 808 /* 809 * cachefs_read_backfs_nfsv4 810 * 811 * Call NFSv4 back filesystem to handle the read (cachefs 812 * pass-through support for NFSv4). 813 */ 814 static int 815 cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 816 caller_context_t *ct) 817 { 818 cnode_t *cp = VTOC(vp); 819 fscache_t *fscp = C_TO_FSCACHE(cp); 820 vnode_t *backvp; 821 int error; 822 823 /* 824 * For NFSv4 pass-through to work, only connected operation 825 * is supported, the cnode backvp must exist, and cachefs 826 * optional (eg., disconnectable) flags are turned off. Assert 827 * these conditions for the read operation. 828 */ 829 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 830 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 831 832 /* Call backfs vnode op after extracting backvp */ 833 mutex_enter(&cp->c_statelock); 834 backvp = cp->c_backvp; 835 mutex_exit(&cp->c_statelock); 836 837 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, " 838 "backvp %p\n", cp, backvp)); 839 840 (void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct); 841 error = VOP_READ(backvp, uiop, ioflag, cr, ct); 842 VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct); 843 844 /* Increment cache miss counter */ 845 fscp->fs_stats.st_misses++; 846 847 return (error); 848 } 849 850 /*ARGSUSED*/ 851 static int 852 cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 853 caller_context_t *ct) 854 { 855 struct cnode *cp = VTOC(vp); 856 fscache_t *fscp = C_TO_FSCACHE(cp); 857 int error = 0; 858 u_offset_t off; 859 caddr_t base; 860 uint_t bsize; 861 uint_t flags; 862 int n, on; 863 rlim64_t limit = uiop->uio_llimit; 864 ssize_t resid; 865 offset_t offset; 866 offset_t remainder; 867 868 #ifdef CFSDEBUG 869 CFS_DEBUG(CFSDEBUG_VOPS) 870 printf( 871 "cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n", 872 (void *)vp, uiop->uio_loffset, uiop->uio_resid, 873 cp->c_flags); 874 #endif 875 if (getzoneid() != GLOBAL_ZONEID) { 876 error = EPERM; 877 goto out; 878 } 879 if (vp->v_type != VREG) { 880 error = EISDIR; 881 goto out; 882 } 883 884 ASSERT(RW_WRITE_HELD(&cp->c_rwlock)); 885 886 if (uiop->uio_resid == 0) { 887 goto out; 888 } 889 890 /* Call backfilesystem to write if NFSv4 */ 891 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 892 error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct); 893 goto out2; 894 } 895 896 if (MANDLOCK(vp, cp->c_attr.va_mode)) { 897 error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset, 898 uiop->uio_resid, uiop->uio_fmode, ct); 899 if (error) 900 goto out; 901 } 902 903 if (ioflag & FAPPEND) { 904 for (;;) { 905 /* do consistency check to get correct file size */ 906 error = cachefs_cd_access(fscp, 0, 1); 907 if (error) 908 goto out; 909 mutex_enter(&cp->c_statelock); 910 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 911 uiop->uio_loffset = cp->c_size; 912 mutex_exit(&cp->c_statelock); 913 if (CFS_TIMEOUT(fscp, error)) { 914 cachefs_cd_release(fscp); 915 cachefs_cd_timedout(fscp); 916 continue; 917 } 918 cachefs_cd_release(fscp); 919 if (error) 920 goto out; 921 break; 922 } 923 } 924 925 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 926 limit = MAXOFFSET_T; 927 928 if (uiop->uio_loffset >= limit) { 929 proc_t *p = ttoproc(curthread); 930 931 mutex_enter(&p->p_lock); 932 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 933 p, RCA_UNSAFE_SIGINFO); 934 mutex_exit(&p->p_lock); 935 error = EFBIG; 936 goto out; 937 } 938 if (uiop->uio_loffset > fscp->fs_offmax) { 939 error = EFBIG; 940 goto out; 941 } 942 943 if (limit > fscp->fs_offmax) 944 limit = fscp->fs_offmax; 945 946 if (uiop->uio_loffset < (offset_t)0) { 947 error = EINVAL; 948 goto out; 949 } 950 951 offset = uiop->uio_loffset + uiop->uio_resid; 952 /* 953 * Check to make sure that the process will not exceed 954 * its limit on file size. It is okay to write up to 955 * the limit, but not beyond. Thus, the write which 956 * reaches the limit will be short and the next write 957 * will return an error. 958 */ 959 remainder = 0; 960 if (offset > limit) { 961 remainder = (int)(offset - (u_offset_t)limit); 962 uiop->uio_resid = limit - uiop->uio_loffset; 963 if (uiop->uio_resid <= 0) { 964 proc_t *p = ttoproc(curthread); 965 966 uiop->uio_resid += remainder; 967 mutex_enter(&p->p_lock); 968 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 969 p->p_rctls, p, RCA_UNSAFE_SIGINFO); 970 mutex_exit(&p->p_lock); 971 error = EFBIG; 972 goto out; 973 } 974 } 975 976 resid = uiop->uio_resid; 977 offset = uiop->uio_loffset; 978 bsize = vp->v_vfsp->vfs_bsize; 979 980 /* loop around and do the write in MAXBSIZE chunks */ 981 do { 982 /* mapping offset */ 983 off = uiop->uio_loffset & (offset_t)MAXBMASK; 984 on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */ 985 n = MAXBSIZE - on; 986 if (n > uiop->uio_resid) 987 n = (int)uiop->uio_resid; 988 989 /* 990 * Touch the page and fault it in if it is not in 991 * core before segmap_getmapflt can lock it. This 992 * is to avoid the deadlock if the buffer is mapped 993 * to the same file through mmap which we want to 994 * write to. 995 */ 996 uio_prefaultpages((long)n, uiop); 997 998 base = segmap_getmap(segkmap, vp, off); 999 error = cachefs_writepage(vp, (base + on), n, uiop); 1000 if (error == 0) { 1001 flags = 0; 1002 /* 1003 * Have written a whole block.Start an 1004 * asynchronous write and mark the buffer to 1005 * indicate that it won't be needed again 1006 * soon. 1007 */ 1008 if (n + on == bsize) { 1009 flags = SM_WRITE |SM_ASYNC |SM_DONTNEED; 1010 } 1011 #if 0 1012 /* XXX need to understand this */ 1013 if ((ioflag & (FSYNC|FDSYNC)) || 1014 (cp->c_backvp && vn_has_flocks(cp->c_backvp))) { 1015 flags &= ~SM_ASYNC; 1016 flags |= SM_WRITE; 1017 } 1018 #else 1019 if (ioflag & (FSYNC|FDSYNC)) { 1020 flags &= ~SM_ASYNC; 1021 flags |= SM_WRITE; 1022 } 1023 #endif 1024 error = segmap_release(segkmap, base, flags); 1025 } else { 1026 (void) segmap_release(segkmap, base, 0); 1027 } 1028 } while (error == 0 && uiop->uio_resid > 0); 1029 1030 out: 1031 if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) { 1032 uiop->uio_resid = resid; 1033 uiop->uio_loffset = offset; 1034 } else 1035 uiop->uio_resid += remainder; 1036 1037 out2: 1038 #ifdef CFSDEBUG 1039 CFS_DEBUG(CFSDEBUG_VOPS) 1040 printf("cachefs_write: EXIT error %d\n", error); 1041 #endif 1042 return (error); 1043 } 1044 1045 /* 1046 * cachefs_write_backfs_nfsv4 1047 * 1048 * Call NFSv4 back filesystem to handle the write (cachefs 1049 * pass-through support for NFSv4). 1050 */ 1051 static int 1052 cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, 1053 caller_context_t *ct) 1054 { 1055 cnode_t *cp = VTOC(vp); 1056 fscache_t *fscp = C_TO_FSCACHE(cp); 1057 vnode_t *backvp; 1058 int error; 1059 1060 /* 1061 * For NFSv4 pass-through to work, only connected operation 1062 * is supported, the cnode backvp must exist, and cachefs 1063 * optional (eg., disconnectable) flags are turned off. Assert 1064 * these conditions for the read operation. 1065 */ 1066 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 1067 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 1068 1069 /* Call backfs vnode op after extracting the backvp */ 1070 mutex_enter(&cp->c_statelock); 1071 backvp = cp->c_backvp; 1072 mutex_exit(&cp->c_statelock); 1073 1074 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, " 1075 "backvp %p\n", cp, backvp)); 1076 (void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct); 1077 error = VOP_WRITE(backvp, uiop, ioflag, cr, ct); 1078 VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct); 1079 1080 return (error); 1081 } 1082 1083 /* 1084 * see if we've charged ourselves for frontfile data at 1085 * the given offset. If not, allocate a block for it now. 1086 */ 1087 static int 1088 cachefs_charge_page(struct cnode *cp, u_offset_t offset) 1089 { 1090 u_offset_t blockoff; 1091 int error; 1092 int inc; 1093 1094 ASSERT(MUTEX_HELD(&cp->c_statelock)); 1095 /*LINTED*/ 1096 ASSERT(PAGESIZE <= MAXBSIZE); 1097 1098 error = 0; 1099 blockoff = offset & (offset_t)MAXBMASK; 1100 1101 /* get the front file if necessary so allocblocks works */ 1102 if ((cp->c_frontvp == NULL) && 1103 ((cp->c_flags & CN_NOCACHE) == 0)) { 1104 (void) cachefs_getfrontfile(cp); 1105 } 1106 if (cp->c_flags & CN_NOCACHE) 1107 return (1); 1108 1109 if (cachefs_check_allocmap(cp, blockoff)) 1110 return (0); 1111 1112 for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE) 1113 if (cachefs_check_allocmap(cp, blockoff+inc)) 1114 return (0); 1115 1116 error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1, 1117 cp->c_metadata.md_rltype); 1118 if (error == 0) { 1119 cp->c_metadata.md_frontblks++; 1120 cp->c_flags |= CN_UPDATED; 1121 } 1122 return (error); 1123 } 1124 1125 /* 1126 * Called only by cachefs_write to write 1 page or less of data. 1127 * base - base address kernel addr space 1128 * tcount - Total bytes to move - < MAXBSIZE 1129 */ 1130 static int 1131 cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop) 1132 { 1133 struct cnode *cp = VTOC(vp); 1134 fscache_t *fscp = C_TO_FSCACHE(cp); 1135 register int n; 1136 register u_offset_t offset; 1137 int error = 0, terror; 1138 extern struct as kas; 1139 u_offset_t lastpage_off; 1140 int pagecreate = 0; 1141 int newpage; 1142 1143 #ifdef CFSDEBUG 1144 CFS_DEBUG(CFSDEBUG_VOPS) 1145 printf( 1146 "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n", 1147 (void *)vp, uiop->uio_loffset, uiop->uio_resid); 1148 #endif 1149 1150 /* 1151 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in 1152 * uiomove() because page faults may cause the cache to be invalidated 1153 * out from under us. 1154 */ 1155 do { 1156 offset = uiop->uio_loffset; 1157 lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK; 1158 1159 /* 1160 * If not connected then need to make sure we have space 1161 * to perform the write. We could make this check 1162 * a little tighter by only doing it if we are growing the file. 1163 */ 1164 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1165 error = cachefs_allocblocks(fscp->fs_cache, 1, 1166 cp->c_metadata.md_rltype); 1167 if (error) 1168 break; 1169 cachefs_freeblocks(fscp->fs_cache, 1, 1170 cp->c_metadata.md_rltype); 1171 } 1172 1173 /* 1174 * n is the number of bytes required to satisfy the request 1175 * or the number of bytes to fill out the page. 1176 */ 1177 n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET)); 1178 if (n > tcount) 1179 n = tcount; 1180 1181 /* 1182 * The number of bytes of data in the last page can not 1183 * be accurately be determined while page is being 1184 * uiomove'd to and the size of the file being updated. 1185 * Thus, inform threads which need to know accurately 1186 * how much data is in the last page of the file. They 1187 * will not do the i/o immediately, but will arrange for 1188 * the i/o to happen later when this modify operation 1189 * will have finished. 1190 * 1191 * in similar NFS code, this is done right before the 1192 * uiomove(), which is best. but here in cachefs, we 1193 * have two uiomove()s, so we must do it here. 1194 */ 1195 ASSERT(!(cp->c_flags & CN_CMODINPROG)); 1196 mutex_enter(&cp->c_statelock); 1197 cp->c_flags |= CN_CMODINPROG; 1198 cp->c_modaddr = (offset & (offset_t)MAXBMASK); 1199 mutex_exit(&cp->c_statelock); 1200 1201 /* 1202 * Check to see if we can skip reading in the page 1203 * and just allocate the memory. We can do this 1204 * if we are going to rewrite the entire mapping 1205 * or if we are going to write to or beyond the current 1206 * end of file from the beginning of the mapping. 1207 */ 1208 if ((offset > (lastpage_off + PAGEOFFSET)) || 1209 ((cp->c_size == 0) && (offset < PAGESIZE)) || 1210 ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE || 1211 ((offset + n) >= cp->c_size))) { 1212 pagecreate = 1; 1213 1214 /* 1215 * segmap_pagecreate() returns 1 if it calls 1216 * page_create_va() to allocate any pages. 1217 */ 1218 newpage = segmap_pagecreate(segkmap, 1219 (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK), 1220 PAGESIZE, 0); 1221 /* do not zero page if we are overwriting all of it */ 1222 if (!((((uintptr_t)base & PAGEOFFSET) == 0) && 1223 (n == PAGESIZE))) { 1224 (void) kzero((void *) 1225 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1226 PAGESIZE); 1227 } 1228 error = uiomove(base, n, UIO_WRITE, uiop); 1229 1230 /* 1231 * Unlock the page allocated by page_create_va() 1232 * in segmap_pagecreate() 1233 */ 1234 if (newpage) 1235 segmap_pageunlock(segkmap, 1236 (caddr_t)((uintptr_t)base & 1237 (uintptr_t)PAGEMASK), 1238 PAGESIZE, S_WRITE); 1239 } else { 1240 /* 1241 * KLUDGE ! Use segmap_fault instead of faulting and 1242 * using as_fault() to avoid a recursive readers lock 1243 * on kas. 1244 */ 1245 error = segmap_fault(kas.a_hat, segkmap, (caddr_t) 1246 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1247 PAGESIZE, F_SOFTLOCK, S_WRITE); 1248 if (error) { 1249 if (FC_CODE(error) == FC_OBJERR) 1250 error = FC_ERRNO(error); 1251 else 1252 error = EIO; 1253 break; 1254 } 1255 error = uiomove(base, n, UIO_WRITE, uiop); 1256 (void) segmap_fault(kas.a_hat, segkmap, (caddr_t) 1257 ((uintptr_t)base & (uintptr_t)PAGEMASK), 1258 PAGESIZE, F_SOFTUNLOCK, S_WRITE); 1259 } 1260 n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */ 1261 base += n; 1262 tcount -= n; 1263 1264 /* get access to the file system */ 1265 if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) { 1266 error = terror; 1267 break; 1268 } 1269 1270 /* 1271 * cp->c_attr.va_size is the maximum number of 1272 * bytes known to be in the file. 1273 * Make sure it is at least as high as the 1274 * last byte we just wrote into the buffer. 1275 */ 1276 mutex_enter(&cp->c_statelock); 1277 if (cp->c_size < uiop->uio_loffset) { 1278 cp->c_size = uiop->uio_loffset; 1279 } 1280 if (cp->c_size != cp->c_attr.va_size) { 1281 cp->c_attr.va_size = cp->c_size; 1282 cp->c_flags |= CN_UPDATED; 1283 } 1284 /* c_size is now correct, so we can clear modinprog */ 1285 cp->c_flags &= ~CN_CMODINPROG; 1286 if (error == 0) { 1287 cp->c_flags |= CDIRTY; 1288 if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) { 1289 /* 1290 * if we're not in NOCACHE mode 1291 * (i.e., single-writer), we update the 1292 * allocmap here rather than waiting until 1293 * cachefspush is called. This prevents 1294 * getpage from clustering up pages from 1295 * the backfile and stomping over the changes 1296 * we make here. 1297 */ 1298 if (cachefs_charge_page(cp, offset) == 0) { 1299 cachefs_update_allocmap(cp, 1300 offset & (offset_t)PAGEMASK, 1301 (size_t)PAGESIZE); 1302 } 1303 1304 /* else we ran out of space */ 1305 else { 1306 /* nocache file if connected */ 1307 if (fscp->fs_cdconnected == 1308 CFS_CD_CONNECTED) 1309 cachefs_nocache(cp); 1310 /* 1311 * If disconnected then cannot 1312 * nocache the file. Let it have 1313 * the space. 1314 */ 1315 else { 1316 cp->c_metadata.md_frontblks++; 1317 cp->c_flags |= CN_UPDATED; 1318 cachefs_update_allocmap(cp, 1319 offset & (offset_t)PAGEMASK, 1320 (size_t)PAGESIZE); 1321 } 1322 } 1323 } 1324 } 1325 mutex_exit(&cp->c_statelock); 1326 cachefs_cd_release(fscp); 1327 } while (tcount > 0 && error == 0); 1328 1329 if (cp->c_flags & CN_CMODINPROG) { 1330 /* XXX assert error != 0? FC_ERRNO() makes this more risky. */ 1331 mutex_enter(&cp->c_statelock); 1332 cp->c_flags &= ~CN_CMODINPROG; 1333 mutex_exit(&cp->c_statelock); 1334 } 1335 1336 #ifdef CFS_CD_DEBUG 1337 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 1338 #endif 1339 1340 #ifdef CFSDEBUG 1341 CFS_DEBUG(CFSDEBUG_VOPS) 1342 printf("cachefs_writepage: EXIT error %d\n", error); 1343 #endif 1344 1345 return (error); 1346 } 1347 1348 /* 1349 * Pushes out pages to the back and/or front file system. 1350 */ 1351 static int 1352 cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 1353 int flags, cred_t *cr) 1354 { 1355 struct cnode *cp = VTOC(vp); 1356 struct buf *bp; 1357 int error; 1358 fscache_t *fscp = C_TO_FSCACHE(cp); 1359 u_offset_t iooff; 1360 size_t iolen; 1361 u_offset_t lbn; 1362 u_offset_t lbn_off; 1363 uint_t bsize; 1364 1365 ASSERT((flags & B_ASYNC) == 0); 1366 ASSERT(!vn_is_readonly(vp)); 1367 ASSERT(pp != NULL); 1368 ASSERT(cr != NULL); 1369 1370 bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE); 1371 lbn = pp->p_offset / bsize; 1372 lbn_off = lbn * bsize; 1373 1374 /* 1375 * Find a kluster that fits in one block, or in 1376 * one page if pages are bigger than blocks. If 1377 * there is less file space allocated than a whole 1378 * page, we'll shorten the i/o request below. 1379 */ 1380 1381 pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off, 1382 roundup(bsize, PAGESIZE), flags); 1383 1384 /* 1385 * The CN_CMODINPROG flag makes sure that we use a correct 1386 * value of c_size, below. CN_CMODINPROG is set in 1387 * cachefs_writepage(). When CN_CMODINPROG is set it 1388 * indicates that a uiomove() is in progress and the c_size 1389 * has not been made consistent with the new size of the 1390 * file. When the uiomove() completes the c_size is updated 1391 * and the CN_CMODINPROG flag is cleared. 1392 * 1393 * The CN_CMODINPROG flag makes sure that cachefs_push_front 1394 * and cachefs_push_connected see a consistent value of 1395 * c_size. Without this handshaking, it is possible that 1396 * these routines will pick up the old value of c_size before 1397 * the uiomove() in cachefs_writepage() completes. This will 1398 * result in the vn_rdwr() being too small, and data loss. 1399 * 1400 * More precisely, there is a window between the time the 1401 * uiomove() completes and the time the c_size is updated. If 1402 * a VOP_PUTPAGE() operation intervenes in this window, the 1403 * page will be picked up, because it is dirty; it will be 1404 * unlocked, unless it was pagecreate'd. When the page is 1405 * picked up as dirty, the dirty bit is reset 1406 * (pvn_getdirty()). In cachefs_push_connected(), c_size is 1407 * checked. This will still be the old size. Therefore, the 1408 * page will not be written out to the correct length, and the 1409 * page will be clean, so the data may disappear. 1410 */ 1411 if (cp->c_flags & CN_CMODINPROG) { 1412 mutex_enter(&cp->c_statelock); 1413 if ((cp->c_flags & CN_CMODINPROG) && 1414 cp->c_modaddr + MAXBSIZE > iooff && 1415 cp->c_modaddr < iooff + iolen) { 1416 page_t *plist; 1417 1418 /* 1419 * A write is in progress for this region of 1420 * the file. If we did not detect 1421 * CN_CMODINPROG here then this path through 1422 * cachefs_push_connected() would eventually 1423 * do the vn_rdwr() and may not write out all 1424 * of the data in the pages. We end up losing 1425 * data. So we decide to set the modified bit 1426 * on each page in the page list and mark the 1427 * cnode with CDIRTY. This push will be 1428 * restarted at some later time. 1429 */ 1430 1431 plist = pp; 1432 while (plist != NULL) { 1433 pp = plist; 1434 page_sub(&plist, pp); 1435 hat_setmod(pp); 1436 page_io_unlock(pp); 1437 page_unlock(pp); 1438 } 1439 cp->c_flags |= CDIRTY; 1440 mutex_exit(&cp->c_statelock); 1441 if (offp) 1442 *offp = iooff; 1443 if (lenp) 1444 *lenp = iolen; 1445 return (0); 1446 } 1447 mutex_exit(&cp->c_statelock); 1448 } 1449 1450 /* 1451 * Set the pages up for pageout. 1452 */ 1453 bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags); 1454 if (bp == NULL) { 1455 1456 /* 1457 * currently, there is no way for pageio_setup() to 1458 * return NULL, since it uses its own scheme for 1459 * kmem_alloc()ing that shouldn't return NULL, and 1460 * since pageio_setup() itself dereferences the thing 1461 * it's about to return. still, we need to be ready 1462 * in case this ever does start happening. 1463 */ 1464 1465 error = ENOMEM; 1466 goto writedone; 1467 } 1468 /* 1469 * pageio_setup should have set b_addr to 0. This 1470 * is correct since we want to do I/O on a page 1471 * boundary. bp_mapin will use this addr to calculate 1472 * an offset, and then set b_addr to the kernel virtual 1473 * address it allocated for us. 1474 */ 1475 bp->b_edev = 0; 1476 bp->b_dev = 0; 1477 bp->b_lblkno = (diskaddr_t)lbtodb(iooff); 1478 bp_mapin(bp); 1479 1480 iolen = cp->c_size - ldbtob(bp->b_blkno); 1481 if (iolen > bp->b_bcount) 1482 iolen = bp->b_bcount; 1483 1484 /* if connected */ 1485 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1486 /* write to the back file first */ 1487 error = cachefs_push_connected(vp, bp, iolen, iooff, cr); 1488 1489 /* write to the front file if allowed */ 1490 if ((error == 0) && CFS_ISFS_NONSHARED(fscp) && 1491 ((cp->c_flags & CN_NOCACHE) == 0)) { 1492 /* try to write to the front file */ 1493 (void) cachefs_push_front(vp, bp, iolen, iooff, cr); 1494 } 1495 } 1496 1497 /* else if disconnected */ 1498 else { 1499 /* try to write to the front file */ 1500 error = cachefs_push_front(vp, bp, iolen, iooff, cr); 1501 } 1502 1503 bp_mapout(bp); 1504 pageio_done(bp); 1505 1506 writedone: 1507 1508 pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags); 1509 if (offp) 1510 *offp = iooff; 1511 if (lenp) 1512 *lenp = iolen; 1513 1514 /* XXX ask bob mastors how to fix this someday */ 1515 mutex_enter(&cp->c_statelock); 1516 if (error) { 1517 if (error == ENOSPC) { 1518 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) || 1519 CFS_ISFS_SOFT(fscp)) { 1520 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr); 1521 cp->c_error = error; 1522 } 1523 } else if ((CFS_TIMEOUT(fscp, error) == 0) && 1524 (error != EINTR)) { 1525 CFSOP_INVALIDATE_COBJECT(fscp, cp, cr); 1526 cp->c_error = error; 1527 } 1528 } else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1529 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 1530 } 1531 mutex_exit(&cp->c_statelock); 1532 1533 return (error); 1534 } 1535 1536 /* 1537 * Pushes out pages to the back file system. 1538 */ 1539 static int 1540 cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen, 1541 u_offset_t iooff, cred_t *cr) 1542 { 1543 struct cnode *cp = VTOC(vp); 1544 int error = 0; 1545 int mode = 0; 1546 fscache_t *fscp = C_TO_FSCACHE(cp); 1547 ssize_t resid; 1548 vnode_t *backvp; 1549 1550 /* get the back file if necessary */ 1551 mutex_enter(&cp->c_statelock); 1552 if (cp->c_backvp == NULL) { 1553 error = cachefs_getbackvp(fscp, cp); 1554 if (error) { 1555 mutex_exit(&cp->c_statelock); 1556 goto out; 1557 } 1558 } 1559 backvp = cp->c_backvp; 1560 VN_HOLD(backvp); 1561 mutex_exit(&cp->c_statelock); 1562 1563 if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp)) 1564 mode = FSYNC; 1565 1566 /* write to the back file */ 1567 error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr, 1568 iolen, iooff, UIO_SYSSPACE, mode, 1569 RLIM64_INFINITY, cr, &resid); 1570 if (error) { 1571 #ifdef CFSDEBUG 1572 CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK) 1573 printf("cachefspush: error %d cr %p\n", 1574 error, (void *)cr); 1575 #endif 1576 bp->b_flags |= B_ERROR; 1577 } 1578 VN_RELE(backvp); 1579 out: 1580 return (error); 1581 } 1582 1583 /* 1584 * Pushes out pages to the front file system. 1585 * Called for both connected and disconnected states. 1586 */ 1587 static int 1588 cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen, 1589 u_offset_t iooff, cred_t *cr) 1590 { 1591 struct cnode *cp = VTOC(vp); 1592 fscache_t *fscp = C_TO_FSCACHE(cp); 1593 int error = 0; 1594 ssize_t resid; 1595 u_offset_t popoff; 1596 off_t commit = 0; 1597 uint_t seq; 1598 enum cachefs_rl_type type; 1599 vnode_t *frontvp = NULL; 1600 1601 mutex_enter(&cp->c_statelock); 1602 1603 if (!CFS_ISFS_NONSHARED(fscp)) { 1604 error = ETIMEDOUT; 1605 goto out; 1606 } 1607 1608 /* get the front file if necessary */ 1609 if ((cp->c_frontvp == NULL) && 1610 ((cp->c_flags & CN_NOCACHE) == 0)) { 1611 (void) cachefs_getfrontfile(cp); 1612 } 1613 if (cp->c_flags & CN_NOCACHE) { 1614 error = ETIMEDOUT; 1615 goto out; 1616 } 1617 1618 /* if disconnected, needs to be populated and have good attributes */ 1619 if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) && 1620 (((cp->c_metadata.md_flags & MD_POPULATED) == 0) || 1621 (cp->c_metadata.md_flags & MD_NEEDATTRS))) { 1622 error = ETIMEDOUT; 1623 goto out; 1624 } 1625 1626 for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) { 1627 if (cachefs_charge_page(cp, popoff)) { 1628 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1629 cachefs_nocache(cp); 1630 goto out; 1631 } else { 1632 error = ENOSPC; 1633 goto out; 1634 } 1635 } 1636 } 1637 1638 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1639 /* log the first putpage to a file */ 1640 if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) { 1641 /* uses open's creds if we have them */ 1642 if (cp->c_cred) 1643 cr = cp->c_cred; 1644 1645 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 1646 error = cachefs_dlog_cidmap(fscp); 1647 if (error) { 1648 error = ENOSPC; 1649 goto out; 1650 } 1651 cp->c_metadata.md_flags |= MD_MAPPING; 1652 } 1653 1654 commit = cachefs_dlog_modify(fscp, cp, cr, &seq); 1655 if (commit == 0) { 1656 /* out of space */ 1657 error = ENOSPC; 1658 goto out; 1659 } 1660 1661 cp->c_metadata.md_seq = seq; 1662 type = cp->c_metadata.md_rltype; 1663 cachefs_modified(cp); 1664 cp->c_metadata.md_flags |= MD_PUTPAGE; 1665 cp->c_metadata.md_flags &= ~MD_PUSHDONE; 1666 cp->c_flags |= CN_UPDATED; 1667 } 1668 1669 /* subsequent putpages just get a new sequence number */ 1670 else { 1671 /* but only if it matters */ 1672 if (cp->c_metadata.md_seq != fscp->fs_dlogseq) { 1673 seq = cachefs_dlog_seqnext(fscp); 1674 if (seq == 0) { 1675 error = ENOSPC; 1676 goto out; 1677 } 1678 cp->c_metadata.md_seq = seq; 1679 cp->c_flags |= CN_UPDATED; 1680 /* XXX maybe should do write_metadata here */ 1681 } 1682 } 1683 } 1684 1685 frontvp = cp->c_frontvp; 1686 VN_HOLD(frontvp); 1687 mutex_exit(&cp->c_statelock); 1688 error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp, 1689 bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0, 1690 RLIM64_INFINITY, kcred, &resid); 1691 mutex_enter(&cp->c_statelock); 1692 VN_RELE(frontvp); 1693 frontvp = NULL; 1694 if (error) { 1695 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 1696 cachefs_nocache(cp); 1697 error = 0; 1698 goto out; 1699 } else { 1700 goto out; 1701 } 1702 } 1703 1704 (void) cachefs_update_allocmap(cp, iooff, iolen); 1705 cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC | 1706 CN_POPULATION_PENDING); 1707 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 1708 gethrestime(&cp->c_metadata.md_localmtime); 1709 cp->c_metadata.md_flags |= MD_LOCALMTIME; 1710 } 1711 1712 out: 1713 if (commit) { 1714 /* commit the log record */ 1715 ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED); 1716 if (cachefs_dlog_commit(fscp, commit, error)) { 1717 /*EMPTY*/ 1718 /* XXX fix on panic */ 1719 } 1720 } 1721 1722 if (error && commit) { 1723 cp->c_metadata.md_flags &= ~MD_PUTPAGE; 1724 cachefs_rlent_moveto(fscp->fs_cache, type, 1725 cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks); 1726 cp->c_metadata.md_rltype = type; 1727 cp->c_flags |= CN_UPDATED; 1728 } 1729 mutex_exit(&cp->c_statelock); 1730 return (error); 1731 } 1732 1733 /*ARGSUSED*/ 1734 static int 1735 cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3, 1736 caller_context_t *ct) 1737 { 1738 return (ENOSYS); /* should we panic if we get here? */ 1739 } 1740 1741 /*ARGSUSED*/ 1742 static int 1743 cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred, 1744 int *rvalp, caller_context_t *ct) 1745 { 1746 int error; 1747 struct cnode *cp = VTOC(vp); 1748 struct fscache *fscp = C_TO_FSCACHE(cp); 1749 struct cachefscache *cachep; 1750 extern kmutex_t cachefs_cachelock; 1751 extern cachefscache_t *cachefs_cachelist; 1752 cachefsio_pack_t *packp; 1753 STRUCT_DECL(cachefsio_dcmd, dcmd); 1754 int inlen, outlen; /* LP64: generic int for struct in/out len */ 1755 void *dinp, *doutp; 1756 int (*dcmd_routine)(vnode_t *, void *, void *); 1757 1758 if (getzoneid() != GLOBAL_ZONEID) 1759 return (EPERM); 1760 1761 /* 1762 * Cachefs only provides pass-through support for NFSv4, 1763 * and all vnode operations are passed through to the 1764 * back file system. For NFSv4 pass-through to work, only 1765 * connected operation is supported, the cnode backvp must 1766 * exist, and cachefs optional (eg., disconnectable) flags 1767 * are turned off. Assert these conditions which ensure 1768 * that only a subset of the ioctls are "truly supported" 1769 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS. 1770 * The packing operations are meaningless since there is 1771 * no caching for NFSv4, and the called functions silently 1772 * return if the backfilesystem is NFSv4. The daemon 1773 * commands except for those above are essentially used 1774 * for disconnectable operation support (including log 1775 * rolling), so in each called function, we assert that 1776 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD) 1777 * are from "cfsfstype" which is not a documented 1778 * command. However, the command is visible in 1779 * /usr/lib/fs/cachefs so the commands are simply let 1780 * through (don't seem to impact pass-through functionality). 1781 */ 1782 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 1783 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 1784 1785 switch (cmd) { 1786 case CACHEFSIO_PACK: 1787 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1788 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1789 if (!error) 1790 error = cachefs_pack(vp, packp->p_name, cred); 1791 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1792 break; 1793 1794 case CACHEFSIO_UNPACK: 1795 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1796 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1797 if (!error) 1798 error = cachefs_unpack(vp, packp->p_name, cred); 1799 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1800 break; 1801 1802 case CACHEFSIO_PACKINFO: 1803 packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP); 1804 error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t)); 1805 if (!error) 1806 error = cachefs_packinfo(vp, packp->p_name, 1807 &packp->p_status, cred); 1808 if (!error) 1809 error = xcopyout(packp, (void *)arg, 1810 sizeof (cachefsio_pack_t)); 1811 cachefs_kmem_free(packp, sizeof (cachefsio_pack_t)); 1812 break; 1813 1814 case CACHEFSIO_UNPACKALL: 1815 error = cachefs_unpackall(vp); 1816 break; 1817 1818 case CACHEFSIO_DCMD: 1819 /* 1820 * This is a private interface between the cachefsd and 1821 * this file system. 1822 */ 1823 1824 /* must be root to use these commands */ 1825 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) 1826 return (EPERM); 1827 1828 /* get the command packet */ 1829 STRUCT_INIT(dcmd, flag & DATAMODEL_MASK); 1830 error = xcopyin((void *)arg, STRUCT_BUF(dcmd), 1831 SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE)); 1832 if (error) 1833 return (error); 1834 1835 /* copy in the data for the operation */ 1836 dinp = NULL; 1837 if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) { 1838 dinp = cachefs_kmem_alloc(inlen, KM_SLEEP); 1839 error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp, 1840 inlen); 1841 if (error) 1842 return (error); 1843 } 1844 1845 /* allocate space for the result */ 1846 doutp = NULL; 1847 if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0) 1848 doutp = cachefs_kmem_alloc(outlen, KM_SLEEP); 1849 1850 /* 1851 * Assert NFSv4 only allows the daemonid and getstats 1852 * daemon requests 1853 */ 1854 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 || 1855 STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID || 1856 STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS); 1857 1858 /* get the routine to execute */ 1859 dcmd_routine = NULL; 1860 switch (STRUCT_FGET(dcmd, d_cmd)) { 1861 case CFSDCMD_DAEMONID: 1862 dcmd_routine = cachefs_io_daemonid; 1863 break; 1864 case CFSDCMD_STATEGET: 1865 dcmd_routine = cachefs_io_stateget; 1866 break; 1867 case CFSDCMD_STATESET: 1868 dcmd_routine = cachefs_io_stateset; 1869 break; 1870 case CFSDCMD_XWAIT: 1871 dcmd_routine = cachefs_io_xwait; 1872 break; 1873 case CFSDCMD_EXISTS: 1874 dcmd_routine = cachefs_io_exists; 1875 break; 1876 case CFSDCMD_LOSTFOUND: 1877 dcmd_routine = cachefs_io_lostfound; 1878 break; 1879 case CFSDCMD_GETINFO: 1880 dcmd_routine = cachefs_io_getinfo; 1881 break; 1882 case CFSDCMD_CIDTOFID: 1883 dcmd_routine = cachefs_io_cidtofid; 1884 break; 1885 case CFSDCMD_GETATTRFID: 1886 dcmd_routine = cachefs_io_getattrfid; 1887 break; 1888 case CFSDCMD_GETATTRNAME: 1889 dcmd_routine = cachefs_io_getattrname; 1890 break; 1891 case CFSDCMD_GETSTATS: 1892 dcmd_routine = cachefs_io_getstats; 1893 break; 1894 case CFSDCMD_ROOTFID: 1895 dcmd_routine = cachefs_io_rootfid; 1896 break; 1897 case CFSDCMD_CREATE: 1898 dcmd_routine = cachefs_io_create; 1899 break; 1900 case CFSDCMD_REMOVE: 1901 dcmd_routine = cachefs_io_remove; 1902 break; 1903 case CFSDCMD_LINK: 1904 dcmd_routine = cachefs_io_link; 1905 break; 1906 case CFSDCMD_RENAME: 1907 dcmd_routine = cachefs_io_rename; 1908 break; 1909 case CFSDCMD_MKDIR: 1910 dcmd_routine = cachefs_io_mkdir; 1911 break; 1912 case CFSDCMD_RMDIR: 1913 dcmd_routine = cachefs_io_rmdir; 1914 break; 1915 case CFSDCMD_SYMLINK: 1916 dcmd_routine = cachefs_io_symlink; 1917 break; 1918 case CFSDCMD_SETATTR: 1919 dcmd_routine = cachefs_io_setattr; 1920 break; 1921 case CFSDCMD_SETSECATTR: 1922 dcmd_routine = cachefs_io_setsecattr; 1923 break; 1924 case CFSDCMD_PUSHBACK: 1925 dcmd_routine = cachefs_io_pushback; 1926 break; 1927 default: 1928 error = ENOTTY; 1929 break; 1930 } 1931 1932 /* execute the routine */ 1933 if (dcmd_routine) 1934 error = (*dcmd_routine)(vp, dinp, doutp); 1935 1936 /* copy out the result */ 1937 if ((error == 0) && doutp) 1938 error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata), 1939 outlen); 1940 1941 /* free allocated memory */ 1942 if (dinp) 1943 cachefs_kmem_free(dinp, inlen); 1944 if (doutp) 1945 cachefs_kmem_free(doutp, outlen); 1946 1947 break; 1948 1949 case _FIOCOD: 1950 if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) { 1951 error = EPERM; 1952 break; 1953 } 1954 1955 error = EBUSY; 1956 if (arg) { 1957 /* non-zero arg means do all filesystems */ 1958 mutex_enter(&cachefs_cachelock); 1959 for (cachep = cachefs_cachelist; cachep != NULL; 1960 cachep = cachep->c_next) { 1961 mutex_enter(&cachep->c_fslistlock); 1962 for (fscp = cachep->c_fslist; 1963 fscp != NULL; 1964 fscp = fscp->fs_next) { 1965 if (CFS_ISFS_CODCONST(fscp)) { 1966 gethrestime(&fscp->fs_cod_time); 1967 error = 0; 1968 } 1969 } 1970 mutex_exit(&cachep->c_fslistlock); 1971 } 1972 mutex_exit(&cachefs_cachelock); 1973 } else { 1974 if (CFS_ISFS_CODCONST(fscp)) { 1975 gethrestime(&fscp->fs_cod_time); 1976 error = 0; 1977 } 1978 } 1979 break; 1980 1981 case _FIOSTOPCACHE: 1982 error = cachefs_stop_cache(cp); 1983 break; 1984 1985 default: 1986 error = ENOTTY; 1987 break; 1988 } 1989 1990 /* return the result */ 1991 return (error); 1992 } 1993 1994 ino64_t 1995 cachefs_fileno_conflict(fscache_t *fscp, ino64_t old) 1996 { 1997 ino64_t new; 1998 1999 ASSERT(MUTEX_HELD(&fscp->fs_fslock)); 2000 2001 for (;;) { 2002 fscp->fs_info.fi_localfileno++; 2003 if (fscp->fs_info.fi_localfileno == 0) 2004 fscp->fs_info.fi_localfileno = 3; 2005 fscp->fs_flags |= CFS_FS_DIRTYINFO; 2006 2007 new = fscp->fs_info.fi_localfileno; 2008 if (! cachefs_fileno_inuse(fscp, new)) 2009 break; 2010 } 2011 2012 cachefs_inum_register(fscp, old, new); 2013 cachefs_inum_register(fscp, new, 0); 2014 return (new); 2015 } 2016 2017 /*ARGSUSED*/ 2018 static int 2019 cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 2020 caller_context_t *ct) 2021 { 2022 struct cnode *cp = VTOC(vp); 2023 fscache_t *fscp = C_TO_FSCACHE(cp); 2024 int error = 0; 2025 int held = 0; 2026 int connected = 0; 2027 2028 #ifdef CFSDEBUG 2029 CFS_DEBUG(CFSDEBUG_VOPS) 2030 printf("cachefs_getattr: ENTER vp %p\n", (void *)vp); 2031 #endif 2032 2033 if (getzoneid() != GLOBAL_ZONEID) 2034 return (EPERM); 2035 2036 /* Call backfilesystem getattr if NFSv4 */ 2037 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 2038 error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct); 2039 goto out; 2040 } 2041 2042 /* 2043 * If it has been specified that the return value will 2044 * just be used as a hint, and we are only being asked 2045 * for size, fsid or rdevid, then return the client's 2046 * notion of these values without checking to make sure 2047 * that the attribute cache is up to date. 2048 * The whole point is to avoid an over the wire GETATTR 2049 * call. 2050 */ 2051 if (flags & ATTR_HINT) { 2052 if (vap->va_mask == 2053 (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) { 2054 if (vap->va_mask | AT_SIZE) 2055 vap->va_size = cp->c_size; 2056 /* 2057 * Return the FSID of the cachefs filesystem, 2058 * not the back filesystem 2059 */ 2060 if (vap->va_mask | AT_FSID) 2061 vap->va_fsid = vp->v_vfsp->vfs_dev; 2062 if (vap->va_mask | AT_RDEV) 2063 vap->va_rdev = cp->c_attr.va_rdev; 2064 return (0); 2065 } 2066 } 2067 2068 /* 2069 * Only need to flush pages if asking for the mtime 2070 * and if there any dirty pages. 2071 */ 2072 if (vap->va_mask & AT_MTIME) { 2073 /*EMPTY*/ 2074 #if 0 2075 /* 2076 * XXX bob: stolen from nfs code, need to do something similar 2077 */ 2078 rp = VTOR(vp); 2079 if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0) 2080 (void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr); 2081 #endif 2082 } 2083 2084 for (;;) { 2085 /* get (or renew) access to the file system */ 2086 if (held) { 2087 cachefs_cd_release(fscp); 2088 held = 0; 2089 } 2090 error = cachefs_cd_access(fscp, connected, 0); 2091 if (error) 2092 goto out; 2093 held = 1; 2094 2095 /* 2096 * If it has been specified that the return value will 2097 * just be used as a hint, and we are only being asked 2098 * for size, fsid or rdevid, then return the client's 2099 * notion of these values without checking to make sure 2100 * that the attribute cache is up to date. 2101 * The whole point is to avoid an over the wire GETATTR 2102 * call. 2103 */ 2104 if (flags & ATTR_HINT) { 2105 if (vap->va_mask == 2106 (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) { 2107 if (vap->va_mask | AT_SIZE) 2108 vap->va_size = cp->c_size; 2109 /* 2110 * Return the FSID of the cachefs filesystem, 2111 * not the back filesystem 2112 */ 2113 if (vap->va_mask | AT_FSID) 2114 vap->va_fsid = vp->v_vfsp->vfs_dev; 2115 if (vap->va_mask | AT_RDEV) 2116 vap->va_rdev = cp->c_attr.va_rdev; 2117 goto out; 2118 } 2119 } 2120 2121 mutex_enter(&cp->c_statelock); 2122 if ((cp->c_metadata.md_flags & MD_NEEDATTRS) && 2123 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 2124 mutex_exit(&cp->c_statelock); 2125 connected = 1; 2126 continue; 2127 } 2128 2129 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2130 if (CFS_TIMEOUT(fscp, error)) { 2131 mutex_exit(&cp->c_statelock); 2132 cachefs_cd_release(fscp); 2133 held = 0; 2134 cachefs_cd_timedout(fscp); 2135 continue; 2136 } 2137 if (error) { 2138 mutex_exit(&cp->c_statelock); 2139 break; 2140 } 2141 2142 /* check for fileno conflict */ 2143 if ((fscp->fs_inum_size > 0) && 2144 ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) { 2145 ino64_t fakenum; 2146 2147 mutex_exit(&cp->c_statelock); 2148 mutex_enter(&fscp->fs_fslock); 2149 fakenum = cachefs_inum_real2fake(fscp, 2150 cp->c_attr.va_nodeid); 2151 if (fakenum == 0) { 2152 fakenum = cachefs_fileno_conflict(fscp, 2153 cp->c_attr.va_nodeid); 2154 } 2155 mutex_exit(&fscp->fs_fslock); 2156 2157 mutex_enter(&cp->c_statelock); 2158 cp->c_metadata.md_flags |= MD_LOCALFILENO; 2159 cp->c_metadata.md_localfileno = fakenum; 2160 cp->c_flags |= CN_UPDATED; 2161 } 2162 2163 /* copy out the attributes */ 2164 *vap = cp->c_attr; 2165 2166 /* 2167 * return the FSID of the cachefs filesystem, 2168 * not the back filesystem 2169 */ 2170 vap->va_fsid = vp->v_vfsp->vfs_dev; 2171 2172 /* return our idea of the size */ 2173 if (cp->c_size > vap->va_size) 2174 vap->va_size = cp->c_size; 2175 2176 /* overwrite with our version of fileno and timestamps */ 2177 vap->va_nodeid = cp->c_metadata.md_localfileno; 2178 vap->va_mtime = cp->c_metadata.md_localmtime; 2179 vap->va_ctime = cp->c_metadata.md_localctime; 2180 2181 mutex_exit(&cp->c_statelock); 2182 break; 2183 } 2184 out: 2185 if (held) 2186 cachefs_cd_release(fscp); 2187 #ifdef CFS_CD_DEBUG 2188 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2189 #endif 2190 2191 #ifdef CFSDEBUG 2192 CFS_DEBUG(CFSDEBUG_VOPS) 2193 printf("cachefs_getattr: EXIT error = %d\n", error); 2194 #endif 2195 return (error); 2196 } 2197 2198 /* 2199 * cachefs_getattr_backfs_nfsv4 2200 * 2201 * Call NFSv4 back filesystem to handle the getattr (cachefs 2202 * pass-through support for NFSv4). 2203 */ 2204 static int 2205 cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap, 2206 int flags, cred_t *cr, caller_context_t *ct) 2207 { 2208 cnode_t *cp = VTOC(vp); 2209 fscache_t *fscp = C_TO_FSCACHE(cp); 2210 vnode_t *backvp; 2211 int error; 2212 2213 /* 2214 * For NFSv4 pass-through to work, only connected operation 2215 * is supported, the cnode backvp must exist, and cachefs 2216 * optional (eg., disconnectable) flags are turned off. Assert 2217 * these conditions for the getattr operation. 2218 */ 2219 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2220 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2221 2222 /* Call backfs vnode op after extracting backvp */ 2223 mutex_enter(&cp->c_statelock); 2224 backvp = cp->c_backvp; 2225 mutex_exit(&cp->c_statelock); 2226 2227 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p," 2228 " backvp %p\n", cp, backvp)); 2229 error = VOP_GETATTR(backvp, vap, flags, cr, ct); 2230 2231 /* Update attributes */ 2232 cp->c_attr = *vap; 2233 2234 /* 2235 * return the FSID of the cachefs filesystem, 2236 * not the back filesystem 2237 */ 2238 vap->va_fsid = vp->v_vfsp->vfs_dev; 2239 2240 return (error); 2241 } 2242 2243 /*ARGSUSED4*/ 2244 static int 2245 cachefs_setattr( 2246 vnode_t *vp, 2247 vattr_t *vap, 2248 int flags, 2249 cred_t *cr, 2250 caller_context_t *ct) 2251 { 2252 cnode_t *cp = VTOC(vp); 2253 fscache_t *fscp = C_TO_FSCACHE(cp); 2254 int error; 2255 int connected; 2256 int held = 0; 2257 2258 if (getzoneid() != GLOBAL_ZONEID) 2259 return (EPERM); 2260 2261 /* 2262 * Cachefs only provides pass-through support for NFSv4, 2263 * and all vnode operations are passed through to the 2264 * back file system. For NFSv4 pass-through to work, only 2265 * connected operation is supported, the cnode backvp must 2266 * exist, and cachefs optional (eg., disconnectable) flags 2267 * are turned off. Assert these conditions to ensure that 2268 * the backfilesystem is called for the setattr operation. 2269 */ 2270 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2271 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2272 2273 connected = 0; 2274 for (;;) { 2275 /* drop hold on file system */ 2276 if (held) { 2277 /* Won't loop with NFSv4 connected behavior */ 2278 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2279 cachefs_cd_release(fscp); 2280 held = 0; 2281 } 2282 2283 /* acquire access to the file system */ 2284 error = cachefs_cd_access(fscp, connected, 1); 2285 if (error) 2286 break; 2287 held = 1; 2288 2289 /* perform the setattr */ 2290 error = cachefs_setattr_common(vp, vap, flags, cr, ct); 2291 if (error) { 2292 /* if connected */ 2293 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2294 if (CFS_TIMEOUT(fscp, error)) { 2295 cachefs_cd_release(fscp); 2296 held = 0; 2297 cachefs_cd_timedout(fscp); 2298 connected = 0; 2299 continue; 2300 } 2301 } 2302 2303 /* else must be disconnected */ 2304 else { 2305 if (CFS_TIMEOUT(fscp, error)) { 2306 connected = 1; 2307 continue; 2308 } 2309 } 2310 } 2311 break; 2312 } 2313 2314 if (held) { 2315 cachefs_cd_release(fscp); 2316 } 2317 #ifdef CFS_CD_DEBUG 2318 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2319 #endif 2320 return (error); 2321 } 2322 2323 static int 2324 cachefs_setattr_common( 2325 vnode_t *vp, 2326 vattr_t *vap, 2327 int flags, 2328 cred_t *cr, 2329 caller_context_t *ct) 2330 { 2331 cnode_t *cp = VTOC(vp); 2332 fscache_t *fscp = C_TO_FSCACHE(cp); 2333 cachefscache_t *cachep = fscp->fs_cache; 2334 uint_t mask = vap->va_mask; 2335 int error = 0; 2336 uint_t bcnt; 2337 2338 /* Cannot set these attributes. */ 2339 if (mask & AT_NOSET) 2340 return (EINVAL); 2341 2342 /* 2343 * Truncate file. Must have write permission and not be a directory. 2344 */ 2345 if (mask & AT_SIZE) { 2346 if (vp->v_type == VDIR) { 2347 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)) 2348 cachefs_log_truncate(cachep, EISDIR, 2349 fscp->fs_cfsvfsp, 2350 &cp->c_metadata.md_cookie, 2351 cp->c_id.cid_fileno, 2352 crgetuid(cr), vap->va_size); 2353 return (EISDIR); 2354 } 2355 } 2356 2357 /* 2358 * Gotta deal with one special case here, where we're setting the 2359 * size of the file. First, we zero out part of the page after the 2360 * new size of the file. Then we toss (not write) all pages after 2361 * page in which the new offset occurs. Note that the NULL passed 2362 * in instead of a putapage() fn parameter is correct, since 2363 * no dirty pages will be found (B_TRUNC | B_INVAL). 2364 */ 2365 2366 rw_enter(&cp->c_rwlock, RW_WRITER); 2367 2368 /* sync dirty pages */ 2369 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) { 2370 error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr); 2371 if (error == EINTR) 2372 goto out; 2373 } 2374 error = 0; 2375 2376 /* if connected */ 2377 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2378 error = cachefs_setattr_connected(vp, vap, flags, cr, ct); 2379 } 2380 /* else must be disconnected */ 2381 else { 2382 error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct); 2383 } 2384 if (error) 2385 goto out; 2386 2387 /* 2388 * If the file size has been changed then 2389 * toss whole pages beyond the end of the file and zero 2390 * the portion of the last page that is beyond the end of the file. 2391 */ 2392 if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) { 2393 bcnt = (uint_t)(cp->c_size & PAGEOFFSET); 2394 if (bcnt) 2395 pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt); 2396 (void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push, 2397 B_TRUNC | B_INVAL, cr); 2398 } 2399 2400 out: 2401 rw_exit(&cp->c_rwlock); 2402 2403 if ((mask & AT_SIZE) && 2404 (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))) 2405 cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp, 2406 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 2407 crgetuid(cr), vap->va_size); 2408 2409 return (error); 2410 } 2411 2412 static int 2413 cachefs_setattr_connected( 2414 vnode_t *vp, 2415 vattr_t *vap, 2416 int flags, 2417 cred_t *cr, 2418 caller_context_t *ct) 2419 { 2420 cnode_t *cp = VTOC(vp); 2421 fscache_t *fscp = C_TO_FSCACHE(cp); 2422 uint_t mask = vap->va_mask; 2423 int error = 0; 2424 int setsize; 2425 2426 mutex_enter(&cp->c_statelock); 2427 2428 if (cp->c_backvp == NULL) { 2429 error = cachefs_getbackvp(fscp, cp); 2430 if (error) 2431 goto out; 2432 } 2433 2434 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2435 if (error) 2436 goto out; 2437 2438 CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, " 2439 "backvp %p\n", cp, cp->c_backvp)); 2440 error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct); 2441 if (error) { 2442 goto out; 2443 } 2444 2445 /* if the size of the file is being changed */ 2446 if (mask & AT_SIZE) { 2447 cp->c_size = vap->va_size; 2448 error = 0; 2449 setsize = 0; 2450 2451 /* see if okay to try to set the file size */ 2452 if (((cp->c_flags & CN_NOCACHE) == 0) && 2453 CFS_ISFS_NONSHARED(fscp)) { 2454 /* okay to set size if file is populated */ 2455 if (cp->c_metadata.md_flags & MD_POPULATED) 2456 setsize = 1; 2457 2458 /* 2459 * Okay to set size if front file exists and setting 2460 * file size to zero. 2461 */ 2462 if ((cp->c_metadata.md_flags & MD_FILE) && 2463 (vap->va_size == 0)) 2464 setsize = 1; 2465 } 2466 2467 /* if okay to try to set the file size */ 2468 if (setsize) { 2469 error = 0; 2470 if (cp->c_frontvp == NULL) 2471 error = cachefs_getfrontfile(cp); 2472 if (error == 0) 2473 error = cachefs_frontfile_size(cp, cp->c_size); 2474 } else if (cp->c_metadata.md_flags & MD_FILE) { 2475 /* make sure file gets nocached */ 2476 error = EEXIST; 2477 } 2478 2479 /* if we have to nocache the file */ 2480 if (error) { 2481 if ((cp->c_flags & CN_NOCACHE) == 0 && 2482 !CFS_ISFS_BACKFS_NFSV4(fscp)) 2483 cachefs_nocache(cp); 2484 error = 0; 2485 } 2486 } 2487 2488 cp->c_flags |= CN_UPDATED; 2489 2490 /* XXX bob: given what modify_cobject does this seems unnecessary */ 2491 cp->c_attr.va_mask = AT_ALL; 2492 error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct); 2493 if (error) 2494 goto out; 2495 2496 cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size); 2497 cp->c_size = cp->c_attr.va_size; 2498 2499 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 2500 out: 2501 mutex_exit(&cp->c_statelock); 2502 return (error); 2503 } 2504 2505 /* 2506 * perform the setattr on the local file system 2507 */ 2508 /*ARGSUSED4*/ 2509 static int 2510 cachefs_setattr_disconnected( 2511 vnode_t *vp, 2512 vattr_t *vap, 2513 int flags, 2514 cred_t *cr, 2515 caller_context_t *ct) 2516 { 2517 cnode_t *cp = VTOC(vp); 2518 fscache_t *fscp = C_TO_FSCACHE(cp); 2519 int mask; 2520 int error; 2521 int newfile; 2522 off_t commit = 0; 2523 2524 if (CFS_ISFS_WRITE_AROUND(fscp)) 2525 return (ETIMEDOUT); 2526 2527 /* if we do not have good attributes */ 2528 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 2529 return (ETIMEDOUT); 2530 2531 /* primary concern is to keep this routine as much like ufs_setattr */ 2532 2533 mutex_enter(&cp->c_statelock); 2534 2535 error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags, 2536 cachefs_access_local, cp); 2537 2538 if (error) 2539 goto out; 2540 2541 mask = vap->va_mask; 2542 2543 /* if changing the size of the file */ 2544 if (mask & AT_SIZE) { 2545 if (vp->v_type == VDIR) { 2546 error = EISDIR; 2547 goto out; 2548 } 2549 2550 if (vp->v_type == VFIFO) { 2551 error = 0; 2552 goto out; 2553 } 2554 2555 if ((vp->v_type != VREG) && 2556 !((vp->v_type == VLNK) && (vap->va_size == 0))) { 2557 error = EINVAL; 2558 goto out; 2559 } 2560 2561 if (vap->va_size > fscp->fs_offmax) { 2562 error = EFBIG; 2563 goto out; 2564 } 2565 2566 /* if the file is not populated and we are not truncating it */ 2567 if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) && 2568 (vap->va_size != 0)) { 2569 error = ETIMEDOUT; 2570 goto out; 2571 } 2572 2573 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2574 error = cachefs_dlog_cidmap(fscp); 2575 if (error) { 2576 error = ENOSPC; 2577 goto out; 2578 } 2579 cp->c_metadata.md_flags |= MD_MAPPING; 2580 } 2581 2582 /* log the operation */ 2583 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2584 if (commit == 0) { 2585 error = ENOSPC; 2586 goto out; 2587 } 2588 cp->c_flags &= ~CN_NOCACHE; 2589 2590 /* special case truncating fast sym links */ 2591 if ((vp->v_type == VLNK) && 2592 (cp->c_metadata.md_flags & MD_FASTSYMLNK)) { 2593 /* XXX how can we get here */ 2594 /* XXX should update mtime */ 2595 cp->c_size = 0; 2596 error = 0; 2597 goto out; 2598 } 2599 2600 /* get the front file, this may create one */ 2601 newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1; 2602 if (cp->c_frontvp == NULL) { 2603 error = cachefs_getfrontfile(cp); 2604 if (error) 2605 goto out; 2606 } 2607 ASSERT(cp->c_frontvp); 2608 if (newfile && (cp->c_flags & CN_UPDATED)) { 2609 /* allocate space for the metadata */ 2610 ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0); 2611 ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) 2612 == 0); 2613 error = filegrp_write_metadata(cp->c_filegrp, 2614 &cp->c_id, &cp->c_metadata); 2615 if (error) 2616 goto out; 2617 } 2618 2619 /* change the size of the front file */ 2620 error = cachefs_frontfile_size(cp, vap->va_size); 2621 if (error) 2622 goto out; 2623 cp->c_attr.va_size = cp->c_size = vap->va_size; 2624 gethrestime(&cp->c_metadata.md_localmtime); 2625 cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME; 2626 cachefs_modified(cp); 2627 cp->c_flags |= CN_UPDATED; 2628 } 2629 2630 if (mask & AT_MODE) { 2631 /* mark as modified */ 2632 if (cachefs_modified_alloc(cp)) { 2633 error = ENOSPC; 2634 goto out; 2635 } 2636 2637 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2638 error = cachefs_dlog_cidmap(fscp); 2639 if (error) { 2640 error = ENOSPC; 2641 goto out; 2642 } 2643 cp->c_metadata.md_flags |= MD_MAPPING; 2644 } 2645 2646 /* log the operation if not already logged */ 2647 if (commit == 0) { 2648 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2649 if (commit == 0) { 2650 error = ENOSPC; 2651 goto out; 2652 } 2653 } 2654 2655 cp->c_attr.va_mode &= S_IFMT; 2656 cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT; 2657 gethrestime(&cp->c_metadata.md_localctime); 2658 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2659 cp->c_flags |= CN_UPDATED; 2660 } 2661 2662 if (mask & (AT_UID|AT_GID)) { 2663 2664 /* mark as modified */ 2665 if (cachefs_modified_alloc(cp)) { 2666 error = ENOSPC; 2667 goto out; 2668 } 2669 2670 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2671 error = cachefs_dlog_cidmap(fscp); 2672 if (error) { 2673 error = ENOSPC; 2674 goto out; 2675 } 2676 cp->c_metadata.md_flags |= MD_MAPPING; 2677 } 2678 2679 /* log the operation if not already logged */ 2680 if (commit == 0) { 2681 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2682 if (commit == 0) { 2683 error = ENOSPC; 2684 goto out; 2685 } 2686 } 2687 2688 if (mask & AT_UID) 2689 cp->c_attr.va_uid = vap->va_uid; 2690 2691 if (mask & AT_GID) 2692 cp->c_attr.va_gid = vap->va_gid; 2693 gethrestime(&cp->c_metadata.md_localctime); 2694 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2695 cp->c_flags |= CN_UPDATED; 2696 } 2697 2698 2699 if (mask & (AT_MTIME|AT_ATIME)) { 2700 /* mark as modified */ 2701 if (cachefs_modified_alloc(cp)) { 2702 error = ENOSPC; 2703 goto out; 2704 } 2705 2706 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 2707 error = cachefs_dlog_cidmap(fscp); 2708 if (error) { 2709 error = ENOSPC; 2710 goto out; 2711 } 2712 cp->c_metadata.md_flags |= MD_MAPPING; 2713 } 2714 2715 /* log the operation if not already logged */ 2716 if (commit == 0) { 2717 commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr); 2718 if (commit == 0) { 2719 error = ENOSPC; 2720 goto out; 2721 } 2722 } 2723 2724 if (mask & AT_MTIME) { 2725 cp->c_metadata.md_localmtime = vap->va_mtime; 2726 cp->c_metadata.md_flags |= MD_LOCALMTIME; 2727 } 2728 if (mask & AT_ATIME) 2729 cp->c_attr.va_atime = vap->va_atime; 2730 gethrestime(&cp->c_metadata.md_localctime); 2731 cp->c_metadata.md_flags |= MD_LOCALCTIME; 2732 cp->c_flags |= CN_UPDATED; 2733 } 2734 2735 out: 2736 mutex_exit(&cp->c_statelock); 2737 2738 /* commit the log entry */ 2739 if (commit) { 2740 if (cachefs_dlog_commit(fscp, commit, error)) { 2741 /*EMPTY*/ 2742 /* XXX bob: fix on panic */ 2743 } 2744 } 2745 return (error); 2746 } 2747 2748 /* ARGSUSED */ 2749 static int 2750 cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr, 2751 caller_context_t *ct) 2752 { 2753 cnode_t *cp = VTOC(vp); 2754 fscache_t *fscp = C_TO_FSCACHE(cp); 2755 int error; 2756 int held = 0; 2757 int connected = 0; 2758 2759 #ifdef CFSDEBUG 2760 CFS_DEBUG(CFSDEBUG_VOPS) 2761 printf("cachefs_access: ENTER vp %p\n", (void *)vp); 2762 #endif 2763 if (getzoneid() != GLOBAL_ZONEID) { 2764 error = EPERM; 2765 goto out; 2766 } 2767 2768 /* 2769 * Cachefs only provides pass-through support for NFSv4, 2770 * and all vnode operations are passed through to the 2771 * back file system. For NFSv4 pass-through to work, only 2772 * connected operation is supported, the cnode backvp must 2773 * exist, and cachefs optional (eg., disconnectable) flags 2774 * are turned off. Assert these conditions to ensure that 2775 * the backfilesystem is called for the access operation. 2776 */ 2777 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2778 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2779 2780 for (;;) { 2781 /* get (or renew) access to the file system */ 2782 if (held) { 2783 /* Won't loop with NFSv4 connected behavior */ 2784 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2785 cachefs_cd_release(fscp); 2786 held = 0; 2787 } 2788 error = cachefs_cd_access(fscp, connected, 0); 2789 if (error) 2790 break; 2791 held = 1; 2792 2793 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2794 error = cachefs_access_connected(vp, mode, flags, 2795 cr); 2796 if (CFS_TIMEOUT(fscp, error)) { 2797 cachefs_cd_release(fscp); 2798 held = 0; 2799 cachefs_cd_timedout(fscp); 2800 connected = 0; 2801 continue; 2802 } 2803 } else { 2804 mutex_enter(&cp->c_statelock); 2805 error = cachefs_access_local(cp, mode, cr); 2806 mutex_exit(&cp->c_statelock); 2807 if (CFS_TIMEOUT(fscp, error)) { 2808 if (cachefs_cd_access_miss(fscp)) { 2809 mutex_enter(&cp->c_statelock); 2810 if (cp->c_backvp == NULL) { 2811 (void) cachefs_getbackvp(fscp, 2812 cp); 2813 } 2814 mutex_exit(&cp->c_statelock); 2815 error = cachefs_access_connected(vp, 2816 mode, flags, cr); 2817 if (!CFS_TIMEOUT(fscp, error)) 2818 break; 2819 delay(5*hz); 2820 connected = 0; 2821 continue; 2822 } 2823 connected = 1; 2824 continue; 2825 } 2826 } 2827 break; 2828 } 2829 if (held) 2830 cachefs_cd_release(fscp); 2831 #ifdef CFS_CD_DEBUG 2832 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 2833 #endif 2834 out: 2835 #ifdef CFSDEBUG 2836 CFS_DEBUG(CFSDEBUG_VOPS) 2837 printf("cachefs_access: EXIT error = %d\n", error); 2838 #endif 2839 return (error); 2840 } 2841 2842 static int 2843 cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr) 2844 { 2845 cnode_t *cp = VTOC(vp); 2846 fscache_t *fscp = C_TO_FSCACHE(cp); 2847 int error = 0; 2848 2849 mutex_enter(&cp->c_statelock); 2850 2851 /* Make sure the cnode attrs are valid first. */ 2852 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 2853 if (error) 2854 goto out; 2855 2856 /* see if can do a local file system check */ 2857 if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 && 2858 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 2859 error = cachefs_access_local(cp, mode, cr); 2860 goto out; 2861 } 2862 2863 /* else do a remote file system check */ 2864 else { 2865 if (cp->c_backvp == NULL) { 2866 error = cachefs_getbackvp(fscp, cp); 2867 if (error) 2868 goto out; 2869 } 2870 2871 CFS_DPRINT_BACKFS_NFSV4(fscp, 2872 ("cachefs_access (nfsv4): cnode %p, backvp %p\n", 2873 cp, cp->c_backvp)); 2874 error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL); 2875 2876 /* 2877 * even though we don't `need' the ACL to do access 2878 * via the backvp, we should cache it here to make our 2879 * behavior more reasonable if we go disconnected. 2880 */ 2881 2882 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 2883 (cachefs_vtype_aclok(vp)) && 2884 ((cp->c_flags & CN_NOCACHE) == 0) && 2885 (!CFS_ISFS_BACKFS_NFSV4(fscp)) && 2886 ((cp->c_metadata.md_flags & MD_ACL) == 0)) 2887 (void) cachefs_cacheacl(cp, NULL); 2888 } 2889 out: 2890 /* 2891 * If NFS returned ESTALE, mark this cnode as stale, so that 2892 * the vn_open retry will read the file anew from backfs 2893 */ 2894 if (error == ESTALE) 2895 cachefs_cnode_stale(cp); 2896 2897 mutex_exit(&cp->c_statelock); 2898 return (error); 2899 } 2900 2901 /* 2902 * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then 2903 * the link is placed in the metadata itself (no front file is allocated). 2904 */ 2905 /*ARGSUSED*/ 2906 static int 2907 cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct) 2908 { 2909 int error = 0; 2910 cnode_t *cp = VTOC(vp); 2911 fscache_t *fscp = C_TO_FSCACHE(cp); 2912 cachefscache_t *cachep = fscp->fs_cache; 2913 int held = 0; 2914 int connected = 0; 2915 2916 if (getzoneid() != GLOBAL_ZONEID) 2917 return (EPERM); 2918 2919 if (vp->v_type != VLNK) 2920 return (EINVAL); 2921 2922 /* 2923 * Cachefs only provides pass-through support for NFSv4, 2924 * and all vnode operations are passed through to the 2925 * back file system. For NFSv4 pass-through to work, only 2926 * connected operation is supported, the cnode backvp must 2927 * exist, and cachefs optional (eg., disconnectable) flags 2928 * are turned off. Assert these conditions to ensure that 2929 * the backfilesystem is called for the readlink operation. 2930 */ 2931 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 2932 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 2933 2934 for (;;) { 2935 /* get (or renew) access to the file system */ 2936 if (held) { 2937 /* Won't loop with NFSv4 connected behavior */ 2938 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 2939 cachefs_cd_release(fscp); 2940 held = 0; 2941 } 2942 error = cachefs_cd_access(fscp, connected, 0); 2943 if (error) 2944 break; 2945 held = 1; 2946 2947 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 2948 /* 2949 * since readlink_connected will call stuffsymlink 2950 * on success, have to serialize access 2951 */ 2952 if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) { 2953 cachefs_cd_release(fscp); 2954 rw_enter(&cp->c_rwlock, RW_WRITER); 2955 error = cachefs_cd_access(fscp, connected, 0); 2956 if (error) { 2957 held = 0; 2958 rw_exit(&cp->c_rwlock); 2959 break; 2960 } 2961 } 2962 error = cachefs_readlink_connected(vp, uiop, cr); 2963 rw_exit(&cp->c_rwlock); 2964 if (CFS_TIMEOUT(fscp, error)) { 2965 cachefs_cd_release(fscp); 2966 held = 0; 2967 cachefs_cd_timedout(fscp); 2968 connected = 0; 2969 continue; 2970 } 2971 } else { 2972 error = cachefs_readlink_disconnected(vp, uiop); 2973 if (CFS_TIMEOUT(fscp, error)) { 2974 if (cachefs_cd_access_miss(fscp)) { 2975 /* as above */ 2976 if (!rw_tryenter(&cp->c_rwlock, 2977 RW_WRITER)) { 2978 cachefs_cd_release(fscp); 2979 rw_enter(&cp->c_rwlock, 2980 RW_WRITER); 2981 error = cachefs_cd_access(fscp, 2982 connected, 0); 2983 if (error) { 2984 held = 0; 2985 rw_exit(&cp->c_rwlock); 2986 break; 2987 } 2988 } 2989 error = cachefs_readlink_connected(vp, 2990 uiop, cr); 2991 rw_exit(&cp->c_rwlock); 2992 if (!CFS_TIMEOUT(fscp, error)) 2993 break; 2994 delay(5*hz); 2995 connected = 0; 2996 continue; 2997 } 2998 connected = 1; 2999 continue; 3000 } 3001 } 3002 break; 3003 } 3004 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK)) 3005 cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp, 3006 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 3007 crgetuid(cr), cp->c_size); 3008 3009 if (held) 3010 cachefs_cd_release(fscp); 3011 #ifdef CFS_CD_DEBUG 3012 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3013 #endif 3014 3015 /* 3016 * The over the wire error for attempting to readlink something 3017 * other than a symbolic link is ENXIO. However, we need to 3018 * return EINVAL instead of ENXIO, so we map it here. 3019 */ 3020 return (error == ENXIO ? EINVAL : error); 3021 } 3022 3023 static int 3024 cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr) 3025 { 3026 int error; 3027 cnode_t *cp = VTOC(vp); 3028 fscache_t *fscp = C_TO_FSCACHE(cp); 3029 caddr_t buf; 3030 int buflen; 3031 int readcache = 0; 3032 3033 mutex_enter(&cp->c_statelock); 3034 3035 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 3036 if (error) 3037 goto out; 3038 3039 /* if the sym link is cached as a fast sym link */ 3040 if (cp->c_metadata.md_flags & MD_FASTSYMLNK) { 3041 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3042 error = uiomove(cp->c_metadata.md_allocinfo, 3043 MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop); 3044 #ifdef CFSDEBUG 3045 readcache = 1; 3046 goto out; 3047 #else /* CFSDEBUG */ 3048 /* XXX KLUDGE! correct for insidious 0-len symlink */ 3049 if (cp->c_size != 0) { 3050 readcache = 1; 3051 goto out; 3052 } 3053 #endif /* CFSDEBUG */ 3054 } 3055 3056 /* if the sym link is cached in a front file */ 3057 if (cp->c_metadata.md_flags & MD_POPULATED) { 3058 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3059 ASSERT(cp->c_metadata.md_flags & MD_FILE); 3060 if (cp->c_frontvp == NULL) { 3061 (void) cachefs_getfrontfile(cp); 3062 } 3063 if (cp->c_metadata.md_flags & MD_POPULATED) { 3064 /* read symlink data from frontfile */ 3065 uiop->uio_offset = 0; 3066 (void) VOP_RWLOCK(cp->c_frontvp, 3067 V_WRITELOCK_FALSE, NULL); 3068 error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL); 3069 VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL); 3070 3071 /* XXX KLUDGE! correct for insidious 0-len symlink */ 3072 if (cp->c_size != 0) { 3073 readcache = 1; 3074 goto out; 3075 } 3076 } 3077 } 3078 3079 /* get the sym link contents from the back fs */ 3080 error = cachefs_readlink_back(cp, cr, &buf, &buflen); 3081 if (error) 3082 goto out; 3083 3084 /* copy the contents out to the user */ 3085 error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop); 3086 3087 /* 3088 * try to cache the sym link, note that its a noop if NOCACHE is set 3089 * or if NFSv4 pass-through is enabled. 3090 */ 3091 if (cachefs_stuffsymlink(cp, buf, buflen)) { 3092 cachefs_nocache(cp); 3093 } 3094 3095 cachefs_kmem_free(buf, MAXPATHLEN); 3096 3097 out: 3098 mutex_exit(&cp->c_statelock); 3099 if (error == 0) { 3100 if (readcache) 3101 fscp->fs_stats.st_hits++; 3102 else 3103 fscp->fs_stats.st_misses++; 3104 } 3105 return (error); 3106 } 3107 3108 static int 3109 cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop) 3110 { 3111 int error; 3112 cnode_t *cp = VTOC(vp); 3113 fscache_t *fscp = C_TO_FSCACHE(cp); 3114 int readcache = 0; 3115 3116 mutex_enter(&cp->c_statelock); 3117 3118 /* if the sym link is cached as a fast sym link */ 3119 if (cp->c_metadata.md_flags & MD_FASTSYMLNK) { 3120 error = uiomove(cp->c_metadata.md_allocinfo, 3121 MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop); 3122 readcache = 1; 3123 goto out; 3124 } 3125 3126 /* if the sym link is cached in a front file */ 3127 if (cp->c_metadata.md_flags & MD_POPULATED) { 3128 ASSERT(cp->c_metadata.md_flags & MD_FILE); 3129 if (cp->c_frontvp == NULL) { 3130 (void) cachefs_getfrontfile(cp); 3131 } 3132 if (cp->c_metadata.md_flags & MD_POPULATED) { 3133 /* read symlink data from frontfile */ 3134 uiop->uio_offset = 0; 3135 (void) VOP_RWLOCK(cp->c_frontvp, 3136 V_WRITELOCK_FALSE, NULL); 3137 error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL); 3138 VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL); 3139 readcache = 1; 3140 goto out; 3141 } 3142 } 3143 error = ETIMEDOUT; 3144 3145 out: 3146 mutex_exit(&cp->c_statelock); 3147 if (error == 0) { 3148 if (readcache) 3149 fscp->fs_stats.st_hits++; 3150 else 3151 fscp->fs_stats.st_misses++; 3152 } 3153 return (error); 3154 } 3155 3156 /*ARGSUSED*/ 3157 static int 3158 cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 3159 { 3160 cnode_t *cp = VTOC(vp); 3161 int error = 0; 3162 fscache_t *fscp = C_TO_FSCACHE(cp); 3163 int held = 0; 3164 int connected = 0; 3165 3166 #ifdef CFSDEBUG 3167 CFS_DEBUG(CFSDEBUG_VOPS) 3168 printf("cachefs_fsync: ENTER vp %p\n", (void *)vp); 3169 #endif 3170 3171 if (getzoneid() != GLOBAL_ZONEID) { 3172 error = EPERM; 3173 goto out; 3174 } 3175 3176 if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY) 3177 goto out; 3178 3179 /* 3180 * Cachefs only provides pass-through support for NFSv4, 3181 * and all vnode operations are passed through to the 3182 * back file system. For NFSv4 pass-through to work, only 3183 * connected operation is supported, the cnode backvp must 3184 * exist, and cachefs optional (eg., disconnectable) flags 3185 * are turned off. Assert these conditions to ensure that 3186 * the backfilesystem is called for the fsync operation. 3187 */ 3188 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3189 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 3190 3191 for (;;) { 3192 /* get (or renew) access to the file system */ 3193 if (held) { 3194 /* Won't loop with NFSv4 connected behavior */ 3195 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3196 cachefs_cd_release(fscp); 3197 held = 0; 3198 } 3199 error = cachefs_cd_access(fscp, connected, 1); 3200 if (error) 3201 break; 3202 held = 1; 3203 connected = 0; 3204 3205 /* if a regular file, write out the pages */ 3206 if ((vp->v_type == VREG) && vn_has_cached_data(vp) && 3207 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 3208 error = cachefs_putpage_common(vp, (offset_t)0, 3209 0, 0, cr); 3210 if (CFS_TIMEOUT(fscp, error)) { 3211 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3212 cachefs_cd_release(fscp); 3213 held = 0; 3214 cachefs_cd_timedout(fscp); 3215 continue; 3216 } else { 3217 connected = 1; 3218 continue; 3219 } 3220 } 3221 3222 /* if no space left in cache, wait until connected */ 3223 if ((error == ENOSPC) && 3224 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 3225 connected = 1; 3226 continue; 3227 } 3228 3229 /* clear the cnode error if putpage worked */ 3230 if ((error == 0) && cp->c_error) { 3231 mutex_enter(&cp->c_statelock); 3232 cp->c_error = 0; 3233 mutex_exit(&cp->c_statelock); 3234 } 3235 3236 if (error) 3237 break; 3238 } 3239 3240 /* if connected, sync the backvp */ 3241 if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) && 3242 cp->c_backvp) { 3243 mutex_enter(&cp->c_statelock); 3244 if (cp->c_backvp) { 3245 CFS_DPRINT_BACKFS_NFSV4(fscp, 3246 ("cachefs_fsync (nfsv4): cnode %p, " 3247 "backvp %p\n", cp, cp->c_backvp)); 3248 error = VOP_FSYNC(cp->c_backvp, syncflag, cr, 3249 ct); 3250 if (CFS_TIMEOUT(fscp, error)) { 3251 mutex_exit(&cp->c_statelock); 3252 cachefs_cd_release(fscp); 3253 held = 0; 3254 cachefs_cd_timedout(fscp); 3255 continue; 3256 } else if (error && (error != EINTR)) 3257 cp->c_error = error; 3258 } 3259 mutex_exit(&cp->c_statelock); 3260 } 3261 3262 /* sync the metadata and the front file to the front fs */ 3263 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) { 3264 error = cachefs_sync_metadata(cp); 3265 if (error && 3266 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) 3267 error = 0; 3268 } 3269 break; 3270 } 3271 3272 if (error == 0) 3273 error = cp->c_error; 3274 3275 if (held) 3276 cachefs_cd_release(fscp); 3277 3278 out: 3279 #ifdef CFS_CD_DEBUG 3280 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3281 #endif 3282 3283 #ifdef CFSDEBUG 3284 CFS_DEBUG(CFSDEBUG_VOPS) 3285 printf("cachefs_fsync: EXIT vp %p\n", (void *)vp); 3286 #endif 3287 return (error); 3288 } 3289 3290 /* 3291 * Called from cachefs_inactive(), to make sure all the data goes out to disk. 3292 */ 3293 int 3294 cachefs_sync_metadata(cnode_t *cp) 3295 { 3296 int error = 0; 3297 struct filegrp *fgp; 3298 struct vattr va; 3299 fscache_t *fscp = C_TO_FSCACHE(cp); 3300 3301 #ifdef CFSDEBUG 3302 CFS_DEBUG(CFSDEBUG_VOPS) 3303 printf("c_sync_metadata: ENTER cp %p cflag %x\n", 3304 (void *)cp, cp->c_flags); 3305 #endif 3306 3307 mutex_enter(&cp->c_statelock); 3308 if ((cp->c_flags & CN_UPDATED) == 0) 3309 goto out; 3310 if (cp->c_flags & (CN_STALE | CN_DESTROY)) 3311 goto out; 3312 fgp = cp->c_filegrp; 3313 if ((fgp->fg_flags & CFS_FG_WRITE) == 0) 3314 goto out; 3315 if (CFS_ISFS_BACKFS_NFSV4(fscp)) 3316 goto out; 3317 3318 if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) { 3319 mutex_exit(&cp->c_statelock); 3320 error = filegrp_allocattr(fgp); 3321 mutex_enter(&cp->c_statelock); 3322 if (error) { 3323 error = 0; 3324 goto out; 3325 } 3326 } 3327 3328 if (cp->c_flags & CN_ALLOC_PENDING) { 3329 error = filegrp_create_metadata(fgp, &cp->c_metadata, 3330 &cp->c_id); 3331 if (error) 3332 goto out; 3333 cp->c_flags &= ~CN_ALLOC_PENDING; 3334 } 3335 3336 if (cp->c_flags & CN_NEED_FRONT_SYNC) { 3337 if (cp->c_frontvp != NULL) { 3338 error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL); 3339 if (error) { 3340 cp->c_metadata.md_timestamp.tv_sec = 0; 3341 } else { 3342 va.va_mask = AT_MTIME; 3343 error = VOP_GETATTR(cp->c_frontvp, &va, 0, 3344 kcred, NULL); 3345 if (error) 3346 goto out; 3347 cp->c_metadata.md_timestamp = va.va_mtime; 3348 cp->c_flags &= 3349 ~(CN_NEED_FRONT_SYNC | 3350 CN_POPULATION_PENDING); 3351 } 3352 } else { 3353 cp->c_flags &= 3354 ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING); 3355 } 3356 } 3357 3358 /* 3359 * XXX tony: How can CN_ALLOC_PENDING still be set?? 3360 * XXX tony: How can CN_UPDATED not be set????? 3361 */ 3362 if ((cp->c_flags & CN_ALLOC_PENDING) == 0 && 3363 (cp->c_flags & CN_UPDATED)) { 3364 error = filegrp_write_metadata(fgp, &cp->c_id, 3365 &cp->c_metadata); 3366 if (error) 3367 goto out; 3368 } 3369 out: 3370 if (error) { 3371 /* XXX modified files? */ 3372 if (cp->c_metadata.md_rlno) { 3373 cachefs_removefrontfile(&cp->c_metadata, 3374 &cp->c_id, fgp); 3375 cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache, 3376 CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0); 3377 cp->c_metadata.md_rlno = 0; 3378 cp->c_metadata.md_rltype = CACHEFS_RL_NONE; 3379 if (cp->c_frontvp) { 3380 VN_RELE(cp->c_frontvp); 3381 cp->c_frontvp = NULL; 3382 } 3383 } 3384 if ((cp->c_flags & CN_ALLOC_PENDING) == 0) 3385 (void) filegrp_destroy_metadata(fgp, &cp->c_id); 3386 cp->c_flags |= CN_ALLOC_PENDING; 3387 cachefs_nocache(cp); 3388 } 3389 /* 3390 * we clear the updated bit even on errors because a retry 3391 * will probably fail also. 3392 */ 3393 cp->c_flags &= ~CN_UPDATED; 3394 mutex_exit(&cp->c_statelock); 3395 3396 #ifdef CFSDEBUG 3397 CFS_DEBUG(CFSDEBUG_VOPS) 3398 printf("c_sync_metadata: EXIT cp %p cflag %x\n", 3399 (void *)cp, cp->c_flags); 3400 #endif 3401 3402 return (error); 3403 } 3404 3405 /* 3406 * This is the vop entry point for inactivating a vnode. 3407 * It just queues the request for the async thread which 3408 * calls cachefs_inactive. 3409 * Because of the dnlc, it is not safe to grab most locks here. 3410 */ 3411 /*ARGSUSED*/ 3412 static void 3413 cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct) 3414 { 3415 cnode_t *cp; 3416 struct cachefs_req *rp; 3417 fscache_t *fscp; 3418 3419 #ifdef CFSDEBUG 3420 CFS_DEBUG(CFSDEBUG_VOPS) 3421 printf("cachefs_inactive: ENTER vp %p\n", (void *)vp); 3422 #endif 3423 3424 cp = VTOC(vp); 3425 fscp = C_TO_FSCACHE(cp); 3426 3427 ASSERT((cp->c_flags & CN_IDLE) == 0); 3428 3429 /* 3430 * Cachefs only provides pass-through support for NFSv4, 3431 * and all vnode operations are passed through to the 3432 * back file system. For NFSv4 pass-through to work, only 3433 * connected operation is supported, the cnode backvp must 3434 * exist, and cachefs optional (eg., disconnectable) flags 3435 * are turned off. Assert these conditions to ensure that 3436 * the backfilesystem is called for the inactive operation. 3437 */ 3438 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3439 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 3440 3441 /* vn_rele() set the v_count == 1 */ 3442 3443 cp->c_ipending = 1; 3444 3445 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 3446 rp->cfs_cmd = CFS_IDLE; 3447 rp->cfs_cr = cr; 3448 crhold(rp->cfs_cr); 3449 rp->cfs_req_u.cu_idle.ci_vp = vp; 3450 cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq)); 3451 3452 #ifdef CFSDEBUG 3453 CFS_DEBUG(CFSDEBUG_VOPS) 3454 printf("cachefs_inactive: EXIT vp %p\n", (void *)vp); 3455 #endif 3456 } 3457 3458 /* ARGSUSED */ 3459 static int 3460 cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, 3461 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr, 3462 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 3463 3464 { 3465 int error = 0; 3466 cnode_t *dcp = VTOC(dvp); 3467 fscache_t *fscp = C_TO_FSCACHE(dcp); 3468 int held = 0; 3469 int connected = 0; 3470 3471 #ifdef CFSDEBUG 3472 CFS_DEBUG(CFSDEBUG_VOPS) 3473 printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm); 3474 #endif 3475 3476 if (getzoneid() != GLOBAL_ZONEID) { 3477 error = EPERM; 3478 goto out; 3479 } 3480 3481 /* 3482 * Cachefs only provides pass-through support for NFSv4, 3483 * and all vnode operations are passed through to the 3484 * back file system. For NFSv4 pass-through to work, only 3485 * connected operation is supported, the cnode backvp must 3486 * exist, and cachefs optional (eg., disconnectable) flags 3487 * are turned off. Assert these conditions to ensure that 3488 * the backfilesystem is called for the lookup operation. 3489 */ 3490 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3491 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 3492 3493 for (;;) { 3494 /* get (or renew) access to the file system */ 3495 if (held) { 3496 /* Won't loop with NFSv4 connected behavior */ 3497 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3498 cachefs_cd_release(fscp); 3499 held = 0; 3500 } 3501 error = cachefs_cd_access(fscp, connected, 0); 3502 if (error) 3503 break; 3504 held = 1; 3505 3506 error = cachefs_lookup_common(dvp, nm, vpp, pnp, 3507 flags, rdir, cr); 3508 if (CFS_TIMEOUT(fscp, error)) { 3509 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3510 cachefs_cd_release(fscp); 3511 held = 0; 3512 cachefs_cd_timedout(fscp); 3513 connected = 0; 3514 continue; 3515 } else { 3516 if (cachefs_cd_access_miss(fscp)) { 3517 rw_enter(&dcp->c_rwlock, RW_READER); 3518 error = cachefs_lookup_back(dvp, nm, 3519 vpp, cr); 3520 rw_exit(&dcp->c_rwlock); 3521 if (!CFS_TIMEOUT(fscp, error)) 3522 break; 3523 delay(5*hz); 3524 connected = 0; 3525 continue; 3526 } 3527 connected = 1; 3528 continue; 3529 } 3530 } 3531 break; 3532 } 3533 if (held) 3534 cachefs_cd_release(fscp); 3535 3536 if (error == 0 && IS_DEVVP(*vpp)) { 3537 struct vnode *newvp; 3538 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 3539 VN_RELE(*vpp); 3540 if (newvp == NULL) { 3541 error = ENOSYS; 3542 } else { 3543 *vpp = newvp; 3544 } 3545 } 3546 3547 #ifdef CFS_CD_DEBUG 3548 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3549 #endif 3550 out: 3551 #ifdef CFSDEBUG 3552 CFS_DEBUG(CFSDEBUG_VOPS) 3553 printf("cachefs_lookup: EXIT error = %d\n", error); 3554 #endif 3555 3556 return (error); 3557 } 3558 3559 /* ARGSUSED */ 3560 int 3561 cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp, 3562 struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr) 3563 { 3564 int error = 0; 3565 cnode_t *cp, *dcp = VTOC(dvp); 3566 fscache_t *fscp = C_TO_FSCACHE(dcp); 3567 struct fid cookie; 3568 u_offset_t d_offset; 3569 struct cachefs_req *rp; 3570 cfs_cid_t cid, dircid; 3571 uint_t flag; 3572 uint_t uncached = 0; 3573 3574 *vpp = NULL; 3575 3576 /* 3577 * If lookup is for "", just return dvp. Don't need 3578 * to send it over the wire, look it up in the dnlc, 3579 * or perform any access checks. 3580 */ 3581 if (*nm == '\0') { 3582 VN_HOLD(dvp); 3583 *vpp = dvp; 3584 return (0); 3585 } 3586 3587 /* can't do lookups in non-directories */ 3588 if (dvp->v_type != VDIR) 3589 return (ENOTDIR); 3590 3591 /* perform access check, also does consistency check if connected */ 3592 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3593 error = cachefs_access_connected(dvp, VEXEC, 0, cr); 3594 } else { 3595 mutex_enter(&dcp->c_statelock); 3596 error = cachefs_access_local(dcp, VEXEC, cr); 3597 mutex_exit(&dcp->c_statelock); 3598 } 3599 if (error) 3600 return (error); 3601 3602 /* 3603 * If lookup is for ".", just return dvp. Don't need 3604 * to send it over the wire or look it up in the dnlc, 3605 * just need to check access. 3606 */ 3607 if (strcmp(nm, ".") == 0) { 3608 VN_HOLD(dvp); 3609 *vpp = dvp; 3610 return (0); 3611 } 3612 3613 /* check the dnlc */ 3614 *vpp = (vnode_t *)dnlc_lookup(dvp, nm); 3615 if (*vpp) 3616 return (0); 3617 3618 /* read lock the dir before starting the search */ 3619 rw_enter(&dcp->c_rwlock, RW_READER); 3620 3621 mutex_enter(&dcp->c_statelock); 3622 dircid = dcp->c_id; 3623 3624 dcp->c_usage++; 3625 3626 /* if front file is not usable, lookup on the back fs */ 3627 if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) || 3628 CFS_ISFS_BACKFS_NFSV4(fscp) || 3629 ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) { 3630 mutex_exit(&dcp->c_statelock); 3631 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3632 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3633 else 3634 error = ETIMEDOUT; 3635 goto out; 3636 } 3637 3638 /* if the front file is not populated, try to populate it */ 3639 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 3640 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 3641 error = ETIMEDOUT; 3642 mutex_exit(&dcp->c_statelock); 3643 goto out; 3644 } 3645 3646 if (cachefs_async_okay()) { 3647 /* cannot populate if cache is not writable */ 3648 ASSERT((dcp->c_flags & 3649 (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0); 3650 dcp->c_flags |= CN_ASYNC_POPULATE; 3651 3652 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 3653 rp->cfs_cmd = CFS_POPULATE; 3654 rp->cfs_req_u.cu_populate.cpop_vp = dvp; 3655 rp->cfs_cr = cr; 3656 3657 crhold(cr); 3658 VN_HOLD(dvp); 3659 3660 cachefs_addqueue(rp, &fscp->fs_workq); 3661 } else if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 3662 error = cachefs_dir_fill(dcp, cr); 3663 if (error != 0) { 3664 mutex_exit(&dcp->c_statelock); 3665 goto out; 3666 } 3667 } 3668 /* no populate if too many asyncs and we have to cache ACLs */ 3669 3670 mutex_exit(&dcp->c_statelock); 3671 3672 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3673 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3674 else 3675 error = ETIMEDOUT; 3676 goto out; 3677 } 3678 3679 /* by now we have a valid cached front file that we can search */ 3680 3681 ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0); 3682 error = cachefs_dir_look(dcp, nm, &cookie, &flag, 3683 &d_offset, &cid); 3684 mutex_exit(&dcp->c_statelock); 3685 3686 if (error) { 3687 /* if the entry does not have the fid, go get it */ 3688 if (error == EINVAL) { 3689 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3690 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3691 else 3692 error = ETIMEDOUT; 3693 } 3694 3695 /* errors other than does not exist */ 3696 else if (error != ENOENT) { 3697 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 3698 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3699 else 3700 error = ETIMEDOUT; 3701 } 3702 goto out; 3703 } 3704 3705 /* 3706 * Else we found the entry in the cached directory. 3707 * Make a cnode for it. 3708 */ 3709 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL, 3710 cr, 0, &cp); 3711 if (error == ESTALE) { 3712 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3713 mutex_enter(&dcp->c_statelock); 3714 cachefs_nocache(dcp); 3715 mutex_exit(&dcp->c_statelock); 3716 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3717 error = cachefs_lookup_back(dvp, nm, vpp, cr); 3718 uncached = 1; 3719 } else 3720 error = ETIMEDOUT; 3721 } else if (error == 0) { 3722 *vpp = CTOV(cp); 3723 } 3724 3725 out: 3726 if (error == 0) { 3727 /* put the entry in the dnlc */ 3728 if (cachefs_dnlc) 3729 dnlc_enter(dvp, nm, *vpp); 3730 3731 /* save the cid of the parent so can find the name */ 3732 cp = VTOC(*vpp); 3733 if (bcmp(&cp->c_metadata.md_parent, &dircid, 3734 sizeof (cfs_cid_t)) != 0) { 3735 mutex_enter(&cp->c_statelock); 3736 cp->c_metadata.md_parent = dircid; 3737 cp->c_flags |= CN_UPDATED; 3738 mutex_exit(&cp->c_statelock); 3739 } 3740 } 3741 3742 rw_exit(&dcp->c_rwlock); 3743 if (uncached && dcp->c_metadata.md_flags & MD_PACKED) 3744 (void) cachefs_pack_common(dvp, cr); 3745 return (error); 3746 } 3747 3748 /* 3749 * Called from cachefs_lookup_common when the back file system needs to be 3750 * examined to perform the lookup. 3751 */ 3752 static int 3753 cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp, 3754 cred_t *cr) 3755 { 3756 int error = 0; 3757 cnode_t *cp, *dcp = VTOC(dvp); 3758 fscache_t *fscp = C_TO_FSCACHE(dcp); 3759 vnode_t *backvp = NULL; 3760 struct vattr va; 3761 struct fid cookie; 3762 cfs_cid_t cid; 3763 uint32_t valid_fid; 3764 3765 mutex_enter(&dcp->c_statelock); 3766 3767 /* do a lookup on the back FS to get the back vnode */ 3768 if (dcp->c_backvp == NULL) { 3769 error = cachefs_getbackvp(fscp, dcp); 3770 if (error) 3771 goto out; 3772 } 3773 3774 CFS_DPRINT_BACKFS_NFSV4(fscp, 3775 ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n", 3776 dcp, dcp->c_backvp, nm)); 3777 error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL, 3778 0, (vnode_t *)NULL, cr, NULL, NULL, NULL); 3779 if (error) 3780 goto out; 3781 if (IS_DEVVP(backvp)) { 3782 struct vnode *devvp = backvp; 3783 3784 if (VOP_REALVP(devvp, &backvp, NULL) == 0) { 3785 VN_HOLD(backvp); 3786 VN_RELE(devvp); 3787 } 3788 } 3789 3790 /* get the fid and attrs from the back fs */ 3791 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 3792 error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid); 3793 if (error) 3794 goto out; 3795 3796 cid.cid_fileno = va.va_nodeid; 3797 cid.cid_flags = 0; 3798 3799 #if 0 3800 /* XXX bob: this is probably no longer necessary */ 3801 /* if the directory entry was incomplete, we can complete it now */ 3802 if ((dcp->c_metadata.md_flags & MD_POPULATED) && 3803 ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) && 3804 (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) { 3805 cachefs_dir_modentry(dcp, d_offset, &cookie, &cid); 3806 } 3807 #endif 3808 3809 out: 3810 mutex_exit(&dcp->c_statelock); 3811 3812 /* create the cnode */ 3813 if (error == 0) { 3814 error = cachefs_cnode_make(&cid, fscp, 3815 (valid_fid ? &cookie : NULL), 3816 &va, backvp, cr, 0, &cp); 3817 if (error == 0) { 3818 *vpp = CTOV(cp); 3819 } 3820 } 3821 3822 if (backvp) 3823 VN_RELE(backvp); 3824 3825 return (error); 3826 } 3827 3828 /*ARGSUSED7*/ 3829 static int 3830 cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap, 3831 vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag, 3832 caller_context_t *ct, vsecattr_t *vsecp) 3833 3834 { 3835 cnode_t *dcp = VTOC(dvp); 3836 fscache_t *fscp = C_TO_FSCACHE(dcp); 3837 cachefscache_t *cachep = fscp->fs_cache; 3838 int error; 3839 int connected = 0; 3840 int held = 0; 3841 3842 #ifdef CFSDEBUG 3843 CFS_DEBUG(CFSDEBUG_VOPS) 3844 printf("cachefs_create: ENTER dvp %p, nm %s\n", 3845 (void *)dvp, nm); 3846 #endif 3847 if (getzoneid() != GLOBAL_ZONEID) { 3848 error = EPERM; 3849 goto out; 3850 } 3851 3852 /* 3853 * Cachefs only provides pass-through support for NFSv4, 3854 * and all vnode operations are passed through to the 3855 * back file system. For NFSv4 pass-through to work, only 3856 * connected operation is supported, the cnode backvp must 3857 * exist, and cachefs optional (eg., disconnectable) flags 3858 * are turned off. Assert these conditions to ensure that 3859 * the backfilesystem is called for the create operation. 3860 */ 3861 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 3862 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 3863 3864 for (;;) { 3865 /* get (or renew) access to the file system */ 3866 if (held) { 3867 /* Won't loop with NFSv4 connected behavior */ 3868 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 3869 cachefs_cd_release(fscp); 3870 held = 0; 3871 } 3872 error = cachefs_cd_access(fscp, connected, 1); 3873 if (error) 3874 break; 3875 held = 1; 3876 3877 /* 3878 * if we are connected, perform the remote portion of the 3879 * create. 3880 */ 3881 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 3882 error = cachefs_create_connected(dvp, nm, vap, 3883 exclusive, mode, vpp, cr); 3884 if (CFS_TIMEOUT(fscp, error)) { 3885 cachefs_cd_release(fscp); 3886 held = 0; 3887 cachefs_cd_timedout(fscp); 3888 connected = 0; 3889 continue; 3890 } else if (error) { 3891 break; 3892 } 3893 } 3894 3895 /* else we must be disconnected */ 3896 else { 3897 error = cachefs_create_disconnected(dvp, nm, vap, 3898 exclusive, mode, vpp, cr); 3899 if (CFS_TIMEOUT(fscp, error)) { 3900 connected = 1; 3901 continue; 3902 } else if (error) { 3903 break; 3904 } 3905 } 3906 break; 3907 } 3908 3909 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) { 3910 fid_t *fidp = NULL; 3911 ino64_t fileno = 0; 3912 cnode_t *cp = NULL; 3913 if (error == 0) 3914 cp = VTOC(*vpp); 3915 3916 if (cp != NULL) { 3917 fidp = &cp->c_metadata.md_cookie; 3918 fileno = cp->c_id.cid_fileno; 3919 } 3920 cachefs_log_create(cachep, error, fscp->fs_cfsvfsp, 3921 fidp, fileno, crgetuid(cr)); 3922 } 3923 3924 if (held) 3925 cachefs_cd_release(fscp); 3926 3927 if (error == 0 && CFS_ISFS_NONSHARED(fscp)) 3928 (void) cachefs_pack(dvp, nm, cr); 3929 if (error == 0 && IS_DEVVP(*vpp)) { 3930 struct vnode *spcvp; 3931 3932 spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr); 3933 VN_RELE(*vpp); 3934 if (spcvp == NULL) { 3935 error = ENOSYS; 3936 } else { 3937 *vpp = spcvp; 3938 } 3939 } 3940 3941 #ifdef CFS_CD_DEBUG 3942 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 3943 #endif 3944 out: 3945 #ifdef CFSDEBUG 3946 CFS_DEBUG(CFSDEBUG_VOPS) 3947 printf("cachefs_create: EXIT error %d\n", error); 3948 #endif 3949 return (error); 3950 } 3951 3952 3953 static int 3954 cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap, 3955 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr) 3956 { 3957 cnode_t *dcp = VTOC(dvp); 3958 fscache_t *fscp = C_TO_FSCACHE(dcp); 3959 int error; 3960 vnode_t *tvp = NULL; 3961 vnode_t *devvp; 3962 fid_t cookie; 3963 vattr_t va; 3964 cnode_t *ncp; 3965 cfs_cid_t cid; 3966 vnode_t *vp; 3967 uint32_t valid_fid; 3968 3969 /* special case if file already exists */ 3970 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 3971 if (CFS_TIMEOUT(fscp, error)) 3972 return (error); 3973 if (error == 0) { 3974 if (exclusive == EXCL) 3975 error = EEXIST; 3976 else if (vp->v_type == VDIR && (mode & VWRITE)) 3977 error = EISDIR; 3978 else if ((error = 3979 cachefs_access_connected(vp, mode, 0, cr)) == 0) { 3980 if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) { 3981 vap->va_mask = AT_SIZE; 3982 error = cachefs_setattr_common(vp, vap, 0, 3983 cr, NULL); 3984 } 3985 } 3986 if (error) { 3987 VN_RELE(vp); 3988 } else 3989 *vpp = vp; 3990 return (error); 3991 } 3992 3993 rw_enter(&dcp->c_rwlock, RW_WRITER); 3994 mutex_enter(&dcp->c_statelock); 3995 3996 /* consistency check the directory */ 3997 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 3998 if (error) { 3999 mutex_exit(&dcp->c_statelock); 4000 goto out; 4001 } 4002 4003 /* get the backvp if necessary */ 4004 if (dcp->c_backvp == NULL) { 4005 error = cachefs_getbackvp(fscp, dcp); 4006 if (error) { 4007 mutex_exit(&dcp->c_statelock); 4008 goto out; 4009 } 4010 } 4011 4012 /* create the file on the back fs */ 4013 CFS_DPRINT_BACKFS_NFSV4(fscp, 4014 ("cachefs_create (nfsv4): dcp %p, dbackvp %p," 4015 "name %s\n", dcp, dcp->c_backvp, nm)); 4016 error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode, 4017 &devvp, cr, 0, NULL, NULL); 4018 mutex_exit(&dcp->c_statelock); 4019 if (error) 4020 goto out; 4021 if (VOP_REALVP(devvp, &tvp, NULL) == 0) { 4022 VN_HOLD(tvp); 4023 VN_RELE(devvp); 4024 } else { 4025 tvp = devvp; 4026 } 4027 4028 /* get the fid and attrs from the back fs */ 4029 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 4030 error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid); 4031 if (error) 4032 goto out; 4033 4034 /* make the cnode */ 4035 cid.cid_fileno = va.va_nodeid; 4036 cid.cid_flags = 0; 4037 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 4038 &va, tvp, cr, 0, &ncp); 4039 if (error) 4040 goto out; 4041 4042 *vpp = CTOV(ncp); 4043 4044 /* enter it in the parent directory */ 4045 mutex_enter(&dcp->c_statelock); 4046 if (CFS_ISFS_NONSHARED(fscp) && 4047 (dcp->c_metadata.md_flags & MD_POPULATED)) { 4048 /* see if entry already exists */ 4049 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4050 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL); 4051 if (error == ENOENT) { 4052 /* entry, does not exist, add the new file */ 4053 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie, 4054 &ncp->c_id, SM_ASYNC); 4055 if (error) { 4056 cachefs_nocache(dcp); 4057 error = 0; 4058 } 4059 /* XXX should this be done elsewhere, too? */ 4060 dnlc_enter(dvp, nm, *vpp); 4061 } else { 4062 /* entry exists or some other problem */ 4063 cachefs_nocache(dcp); 4064 error = 0; 4065 } 4066 } 4067 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 4068 mutex_exit(&dcp->c_statelock); 4069 4070 out: 4071 rw_exit(&dcp->c_rwlock); 4072 if (tvp) 4073 VN_RELE(tvp); 4074 4075 return (error); 4076 } 4077 4078 static int 4079 cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 4080 enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr) 4081 { 4082 cnode_t *dcp = VTOC(dvp); 4083 cnode_t *cp; 4084 cnode_t *ncp = NULL; 4085 vnode_t *vp; 4086 fscache_t *fscp = C_TO_FSCACHE(dcp); 4087 int error = 0; 4088 struct vattr va; 4089 timestruc_t current_time; 4090 off_t commit = 0; 4091 fid_t cookie; 4092 cfs_cid_t cid; 4093 4094 rw_enter(&dcp->c_rwlock, RW_WRITER); 4095 mutex_enter(&dcp->c_statelock); 4096 4097 /* give up if the directory is not populated */ 4098 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4099 mutex_exit(&dcp->c_statelock); 4100 rw_exit(&dcp->c_rwlock); 4101 return (ETIMEDOUT); 4102 } 4103 4104 /* special case if file already exists */ 4105 error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid); 4106 if (error == EINVAL) { 4107 mutex_exit(&dcp->c_statelock); 4108 rw_exit(&dcp->c_rwlock); 4109 return (ETIMEDOUT); 4110 } 4111 if (error == 0) { 4112 mutex_exit(&dcp->c_statelock); 4113 rw_exit(&dcp->c_rwlock); 4114 error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL, 4115 cr, 0, &cp); 4116 if (error) { 4117 return (error); 4118 } 4119 vp = CTOV(cp); 4120 4121 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 4122 error = ETIMEDOUT; 4123 else if (exclusive == EXCL) 4124 error = EEXIST; 4125 else if (vp->v_type == VDIR && (mode & VWRITE)) 4126 error = EISDIR; 4127 else { 4128 mutex_enter(&cp->c_statelock); 4129 error = cachefs_access_local(cp, mode, cr); 4130 mutex_exit(&cp->c_statelock); 4131 if (!error) { 4132 if ((vap->va_mask & AT_SIZE) && 4133 (vp->v_type == VREG)) { 4134 vap->va_mask = AT_SIZE; 4135 error = cachefs_setattr_common(vp, 4136 vap, 0, cr, NULL); 4137 } 4138 } 4139 } 4140 if (error) { 4141 VN_RELE(vp); 4142 } else 4143 *vpp = vp; 4144 return (error); 4145 } 4146 4147 /* give up if cannot modify the cache */ 4148 if (CFS_ISFS_WRITE_AROUND(fscp)) { 4149 mutex_exit(&dcp->c_statelock); 4150 error = ETIMEDOUT; 4151 goto out; 4152 } 4153 4154 /* check access */ 4155 if (error = cachefs_access_local(dcp, VWRITE, cr)) { 4156 mutex_exit(&dcp->c_statelock); 4157 goto out; 4158 } 4159 4160 /* mark dir as modified */ 4161 cachefs_modified(dcp); 4162 mutex_exit(&dcp->c_statelock); 4163 4164 /* must be privileged to set sticky bit */ 4165 if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0) 4166 vap->va_mode &= ~VSVTX; 4167 4168 /* make up a reasonable set of attributes */ 4169 cachefs_attr_setup(vap, &va, dcp, cr); 4170 4171 /* create the cnode */ 4172 error = cachefs_cnode_create(fscp, &va, 0, &ncp); 4173 if (error) 4174 goto out; 4175 4176 mutex_enter(&ncp->c_statelock); 4177 4178 /* get the front file now instead of later */ 4179 if (vap->va_type == VREG) { 4180 error = cachefs_getfrontfile(ncp); 4181 if (error) { 4182 mutex_exit(&ncp->c_statelock); 4183 goto out; 4184 } 4185 ASSERT(ncp->c_frontvp != NULL); 4186 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0); 4187 ncp->c_metadata.md_flags |= MD_POPULATED; 4188 } else { 4189 ASSERT(ncp->c_flags & CN_ALLOC_PENDING); 4190 if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 4191 (void) filegrp_allocattr(ncp->c_filegrp); 4192 } 4193 error = filegrp_create_metadata(ncp->c_filegrp, 4194 &ncp->c_metadata, &ncp->c_id); 4195 if (error) { 4196 mutex_exit(&ncp->c_statelock); 4197 goto out; 4198 } 4199 ncp->c_flags &= ~CN_ALLOC_PENDING; 4200 } 4201 mutex_enter(&dcp->c_statelock); 4202 cachefs_creategid(dcp, ncp, vap, cr); 4203 cachefs_createacl(dcp, ncp); 4204 mutex_exit(&dcp->c_statelock); 4205 4206 /* set times on the file */ 4207 gethrestime(¤t_time); 4208 ncp->c_metadata.md_vattr.va_atime = current_time; 4209 ncp->c_metadata.md_localctime = current_time; 4210 ncp->c_metadata.md_localmtime = current_time; 4211 ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME; 4212 4213 /* reserve space for the daemon cid mapping */ 4214 error = cachefs_dlog_cidmap(fscp); 4215 if (error) { 4216 mutex_exit(&ncp->c_statelock); 4217 goto out; 4218 } 4219 ncp->c_metadata.md_flags |= MD_MAPPING; 4220 4221 /* mark the new file as modified */ 4222 if (cachefs_modified_alloc(ncp)) { 4223 mutex_exit(&ncp->c_statelock); 4224 error = ENOSPC; 4225 goto out; 4226 } 4227 ncp->c_flags |= CN_UPDATED; 4228 4229 /* 4230 * write the metadata now rather than waiting until 4231 * inactive so that if there's no space we can let 4232 * the caller know. 4233 */ 4234 ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0); 4235 ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0); 4236 error = filegrp_write_metadata(ncp->c_filegrp, 4237 &ncp->c_id, &ncp->c_metadata); 4238 if (error) { 4239 mutex_exit(&ncp->c_statelock); 4240 goto out; 4241 } 4242 4243 /* log the operation */ 4244 commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive, 4245 mode, ncp, 0, cr); 4246 if (commit == 0) { 4247 mutex_exit(&ncp->c_statelock); 4248 error = ENOSPC; 4249 goto out; 4250 } 4251 4252 mutex_exit(&ncp->c_statelock); 4253 4254 mutex_enter(&dcp->c_statelock); 4255 4256 /* update parent dir times */ 4257 dcp->c_metadata.md_localmtime = current_time; 4258 dcp->c_metadata.md_flags |= MD_LOCALMTIME; 4259 dcp->c_flags |= CN_UPDATED; 4260 4261 /* enter new file name in the parent directory */ 4262 if (dcp->c_metadata.md_flags & MD_POPULATED) { 4263 error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie, 4264 &ncp->c_id, 0); 4265 if (error) { 4266 cachefs_nocache(dcp); 4267 mutex_exit(&dcp->c_statelock); 4268 error = ETIMEDOUT; 4269 goto out; 4270 } 4271 dnlc_enter(dvp, nm, CTOV(ncp)); 4272 } else { 4273 mutex_exit(&dcp->c_statelock); 4274 error = ETIMEDOUT; 4275 goto out; 4276 } 4277 mutex_exit(&dcp->c_statelock); 4278 4279 out: 4280 rw_exit(&dcp->c_rwlock); 4281 4282 if (commit) { 4283 if (cachefs_dlog_commit(fscp, commit, error)) { 4284 /*EMPTY*/ 4285 /* XXX bob: fix on panic */ 4286 } 4287 } 4288 if (error) { 4289 /* destroy the cnode we created */ 4290 if (ncp) { 4291 mutex_enter(&ncp->c_statelock); 4292 ncp->c_flags |= CN_DESTROY; 4293 mutex_exit(&ncp->c_statelock); 4294 VN_RELE(CTOV(ncp)); 4295 } 4296 } else { 4297 *vpp = CTOV(ncp); 4298 } 4299 return (error); 4300 } 4301 4302 /*ARGSUSED*/ 4303 static int 4304 cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, 4305 int flags) 4306 { 4307 cnode_t *dcp = VTOC(dvp); 4308 fscache_t *fscp = C_TO_FSCACHE(dcp); 4309 cachefscache_t *cachep = fscp->fs_cache; 4310 int error = 0; 4311 int held = 0; 4312 int connected = 0; 4313 size_t namlen; 4314 vnode_t *vp = NULL; 4315 int vfslock = 0; 4316 4317 #ifdef CFSDEBUG 4318 CFS_DEBUG(CFSDEBUG_VOPS) 4319 printf("cachefs_remove: ENTER dvp %p name %s\n", 4320 (void *)dvp, nm); 4321 #endif 4322 if (getzoneid() != GLOBAL_ZONEID) { 4323 error = EPERM; 4324 goto out; 4325 } 4326 4327 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 4328 ASSERT(dcp->c_flags & CN_NOCACHE); 4329 4330 /* 4331 * Cachefs only provides pass-through support for NFSv4, 4332 * and all vnode operations are passed through to the 4333 * back file system. For NFSv4 pass-through to work, only 4334 * connected operation is supported, the cnode backvp must 4335 * exist, and cachefs optional (eg., disconnectable) flags 4336 * are turned off. Assert these conditions to ensure that 4337 * the backfilesystem is called for the remove operation. 4338 */ 4339 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4340 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 4341 4342 for (;;) { 4343 if (vfslock) { 4344 vn_vfsunlock(vp); 4345 vfslock = 0; 4346 } 4347 if (vp) { 4348 VN_RELE(vp); 4349 vp = NULL; 4350 } 4351 4352 /* get (or renew) access to the file system */ 4353 if (held) { 4354 /* Won't loop with NFSv4 connected behavior */ 4355 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4356 cachefs_cd_release(fscp); 4357 held = 0; 4358 } 4359 error = cachefs_cd_access(fscp, connected, 1); 4360 if (error) 4361 break; 4362 held = 1; 4363 4364 /* if disconnected, do some extra error checking */ 4365 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 4366 /* check permissions */ 4367 mutex_enter(&dcp->c_statelock); 4368 error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr); 4369 mutex_exit(&dcp->c_statelock); 4370 if (CFS_TIMEOUT(fscp, error)) { 4371 connected = 1; 4372 continue; 4373 } 4374 if (error) 4375 break; 4376 4377 namlen = strlen(nm); 4378 if (namlen == 0) { 4379 error = EINVAL; 4380 break; 4381 } 4382 4383 /* cannot remove . and .. */ 4384 if (nm[0] == '.') { 4385 if (namlen == 1) { 4386 error = EINVAL; 4387 break; 4388 } else if (namlen == 2 && nm[1] == '.') { 4389 error = EEXIST; 4390 break; 4391 } 4392 } 4393 4394 } 4395 4396 /* get the cnode of the file to delete */ 4397 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 4398 if (error) { 4399 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4400 if (CFS_TIMEOUT(fscp, error)) { 4401 cachefs_cd_release(fscp); 4402 held = 0; 4403 cachefs_cd_timedout(fscp); 4404 connected = 0; 4405 continue; 4406 } 4407 } else { 4408 if (CFS_TIMEOUT(fscp, error)) { 4409 connected = 1; 4410 continue; 4411 } 4412 } 4413 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) { 4414 struct fid foo; 4415 4416 bzero(&foo, sizeof (foo)); 4417 cachefs_log_remove(cachep, error, 4418 fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr)); 4419 } 4420 break; 4421 } 4422 4423 if (vp->v_type == VDIR) { 4424 /* must be privileged to remove dirs with unlink() */ 4425 if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0) 4426 break; 4427 4428 /* see ufs_dirremove for why this is done, mount race */ 4429 if (vn_vfswlock(vp)) { 4430 error = EBUSY; 4431 break; 4432 } 4433 vfslock = 1; 4434 if (vn_mountedvfs(vp) != NULL) { 4435 error = EBUSY; 4436 break; 4437 } 4438 } 4439 4440 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4441 error = cachefs_remove_connected(dvp, nm, cr, vp); 4442 if (CFS_TIMEOUT(fscp, error)) { 4443 cachefs_cd_release(fscp); 4444 held = 0; 4445 cachefs_cd_timedout(fscp); 4446 connected = 0; 4447 continue; 4448 } 4449 } else { 4450 error = cachefs_remove_disconnected(dvp, nm, cr, 4451 vp); 4452 if (CFS_TIMEOUT(fscp, error)) { 4453 connected = 1; 4454 continue; 4455 } 4456 } 4457 break; 4458 } 4459 4460 #if 0 4461 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) 4462 cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp, 4463 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 4464 crgetuid(cr)); 4465 #endif 4466 4467 if (held) 4468 cachefs_cd_release(fscp); 4469 4470 if (vfslock) 4471 vn_vfsunlock(vp); 4472 4473 if (vp) 4474 VN_RELE(vp); 4475 4476 #ifdef CFS_CD_DEBUG 4477 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 4478 #endif 4479 out: 4480 #ifdef CFSDEBUG 4481 CFS_DEBUG(CFSDEBUG_VOPS) 4482 printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp); 4483 #endif 4484 4485 return (error); 4486 } 4487 4488 int 4489 cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp) 4490 { 4491 cnode_t *dcp = VTOC(dvp); 4492 cnode_t *cp = VTOC(vp); 4493 fscache_t *fscp = C_TO_FSCACHE(dcp); 4494 int error = 0; 4495 4496 /* 4497 * Acquire the rwlock (WRITER) on the directory to prevent other 4498 * activity on the directory. 4499 */ 4500 rw_enter(&dcp->c_rwlock, RW_WRITER); 4501 4502 /* purge dnlc of this entry so can get accurate vnode count */ 4503 dnlc_purge_vp(vp); 4504 4505 /* 4506 * If the cnode is active, make a link to the file 4507 * so operations on the file will continue. 4508 */ 4509 if ((vp->v_type != VDIR) && 4510 !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) { 4511 error = cachefs_remove_dolink(dvp, vp, nm, cr); 4512 if (error) 4513 goto out; 4514 } 4515 4516 /* else call backfs NFSv4 handler if NFSv4 */ 4517 else if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 4518 error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp); 4519 goto out; 4520 } 4521 4522 /* else drop the backvp so nfs does not do rename */ 4523 else if (cp->c_backvp) { 4524 mutex_enter(&cp->c_statelock); 4525 if (cp->c_backvp) { 4526 VN_RELE(cp->c_backvp); 4527 cp->c_backvp = NULL; 4528 } 4529 mutex_exit(&cp->c_statelock); 4530 } 4531 4532 mutex_enter(&dcp->c_statelock); 4533 4534 /* get the backvp */ 4535 if (dcp->c_backvp == NULL) { 4536 error = cachefs_getbackvp(fscp, dcp); 4537 if (error) { 4538 mutex_exit(&dcp->c_statelock); 4539 goto out; 4540 } 4541 } 4542 4543 /* check directory consistency */ 4544 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 4545 if (error) { 4546 mutex_exit(&dcp->c_statelock); 4547 goto out; 4548 } 4549 4550 /* perform the remove on the back fs */ 4551 error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0); 4552 if (error) { 4553 mutex_exit(&dcp->c_statelock); 4554 goto out; 4555 } 4556 4557 /* the dir has been modified */ 4558 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 4559 4560 /* remove the entry from the populated directory */ 4561 if (CFS_ISFS_NONSHARED(fscp) && 4562 (dcp->c_metadata.md_flags & MD_POPULATED)) { 4563 error = cachefs_dir_rmentry(dcp, nm); 4564 if (error) { 4565 cachefs_nocache(dcp); 4566 error = 0; 4567 } 4568 } 4569 mutex_exit(&dcp->c_statelock); 4570 4571 /* fix up the file we deleted */ 4572 mutex_enter(&cp->c_statelock); 4573 if (cp->c_attr.va_nlink == 1) 4574 cp->c_flags |= CN_DESTROY; 4575 else 4576 cp->c_flags |= CN_UPDATED; 4577 4578 cp->c_attr.va_nlink--; 4579 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 4580 mutex_exit(&cp->c_statelock); 4581 4582 out: 4583 rw_exit(&dcp->c_rwlock); 4584 return (error); 4585 } 4586 4587 /* 4588 * cachefs_remove_backfs_nfsv4 4589 * 4590 * Call NFSv4 back filesystem to handle the remove (cachefs 4591 * pass-through support for NFSv4). 4592 */ 4593 int 4594 cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp) 4595 { 4596 cnode_t *dcp = VTOC(dvp); 4597 cnode_t *cp = VTOC(vp); 4598 vnode_t *dbackvp; 4599 fscache_t *fscp = C_TO_FSCACHE(dcp); 4600 int error = 0; 4601 4602 /* 4603 * For NFSv4 pass-through to work, only connected operation 4604 * is supported, the cnode backvp must exist, and cachefs 4605 * optional (eg., disconnectable) flags are turned off. Assert 4606 * these conditions for the getattr operation. 4607 */ 4608 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4609 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 4610 4611 /* Should hold the directory readwrite lock to update directory */ 4612 ASSERT(RW_WRITE_HELD(&dcp->c_rwlock)); 4613 4614 /* 4615 * Update attributes for directory. Note that 4616 * CFSOP_CHECK_COBJECT asserts for c_statelock being 4617 * held, so grab it before calling the routine. 4618 */ 4619 mutex_enter(&dcp->c_statelock); 4620 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 4621 mutex_exit(&dcp->c_statelock); 4622 if (error) 4623 goto out; 4624 4625 /* 4626 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT 4627 * asserts for c_statelock being held, so grab it before 4628 * calling the routine. 4629 */ 4630 mutex_enter(&cp->c_statelock); 4631 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 4632 if (error) { 4633 mutex_exit(&cp->c_statelock); 4634 goto out; 4635 } 4636 4637 /* 4638 * Drop the backvp so nfs if the link count is 1 so that 4639 * nfs does not do rename. Ensure that we will destroy the cnode 4640 * since this cnode no longer contains the backvp. Note that we 4641 * maintain lock on this cnode to prevent change till the remove 4642 * completes, otherwise other operations will encounter an ESTALE 4643 * if they try to use the cnode with CN_DESTROY set (see 4644 * cachefs_get_backvp()), or change the state of the cnode 4645 * while we're removing it. 4646 */ 4647 if (cp->c_attr.va_nlink == 1) { 4648 /* 4649 * The unldvp information is created for the case 4650 * when there is more than one reference on the 4651 * vnode when a remove operation is called. If the 4652 * remove itself was holding a reference to the 4653 * vnode, then a subsequent remove will remove the 4654 * backvp, so we need to get rid of the unldvp 4655 * before removing the backvp. An alternate would 4656 * be to simply ignore the remove and let the 4657 * inactivation routine do the deletion of the 4658 * unldvp. 4659 */ 4660 if (cp->c_unldvp) { 4661 VN_RELE(cp->c_unldvp); 4662 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN); 4663 crfree(cp->c_unlcred); 4664 cp->c_unldvp = NULL; 4665 cp->c_unlcred = NULL; 4666 } 4667 cp->c_flags |= CN_DESTROY; 4668 cp->c_attr.va_nlink = 0; 4669 VN_RELE(cp->c_backvp); 4670 cp->c_backvp = NULL; 4671 } 4672 4673 /* perform the remove on back fs after extracting directory backvp */ 4674 mutex_enter(&dcp->c_statelock); 4675 dbackvp = dcp->c_backvp; 4676 mutex_exit(&dcp->c_statelock); 4677 4678 CFS_DPRINT_BACKFS_NFSV4(fscp, 4679 ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n", 4680 dcp, dbackvp, nm)); 4681 error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0); 4682 if (error) { 4683 mutex_exit(&cp->c_statelock); 4684 goto out; 4685 } 4686 4687 /* fix up the file we deleted, if not destroying the cnode */ 4688 if ((cp->c_flags & CN_DESTROY) == 0) { 4689 cp->c_attr.va_nlink--; 4690 cp->c_flags |= CN_UPDATED; 4691 } 4692 4693 mutex_exit(&cp->c_statelock); 4694 4695 out: 4696 return (error); 4697 } 4698 4699 int 4700 cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr, 4701 vnode_t *vp) 4702 { 4703 cnode_t *dcp = VTOC(dvp); 4704 cnode_t *cp = VTOC(vp); 4705 fscache_t *fscp = C_TO_FSCACHE(dcp); 4706 int error = 0; 4707 off_t commit = 0; 4708 timestruc_t current_time; 4709 4710 if (CFS_ISFS_WRITE_AROUND(fscp)) 4711 return (ETIMEDOUT); 4712 4713 if (cp->c_metadata.md_flags & MD_NEEDATTRS) 4714 return (ETIMEDOUT); 4715 4716 /* 4717 * Acquire the rwlock (WRITER) on the directory to prevent other 4718 * activity on the directory. 4719 */ 4720 rw_enter(&dcp->c_rwlock, RW_WRITER); 4721 4722 /* dir must be populated */ 4723 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4724 error = ETIMEDOUT; 4725 goto out; 4726 } 4727 4728 mutex_enter(&dcp->c_statelock); 4729 mutex_enter(&cp->c_statelock); 4730 4731 error = cachefs_stickyrmchk(dcp, cp, cr); 4732 4733 mutex_exit(&cp->c_statelock); 4734 mutex_exit(&dcp->c_statelock); 4735 if (error) 4736 goto out; 4737 4738 /* purge dnlc of this entry so can get accurate vnode count */ 4739 dnlc_purge_vp(vp); 4740 4741 /* 4742 * If the cnode is active, make a link to the file 4743 * so operations on the file will continue. 4744 */ 4745 if ((vp->v_type != VDIR) && 4746 !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) { 4747 error = cachefs_remove_dolink(dvp, vp, nm, cr); 4748 if (error) 4749 goto out; 4750 } 4751 4752 if (cp->c_attr.va_nlink > 1) { 4753 mutex_enter(&cp->c_statelock); 4754 if (cachefs_modified_alloc(cp)) { 4755 mutex_exit(&cp->c_statelock); 4756 error = ENOSPC; 4757 goto out; 4758 } 4759 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 4760 error = cachefs_dlog_cidmap(fscp); 4761 if (error) { 4762 mutex_exit(&cp->c_statelock); 4763 error = ENOSPC; 4764 goto out; 4765 } 4766 cp->c_metadata.md_flags |= MD_MAPPING; 4767 cp->c_flags |= CN_UPDATED; 4768 } 4769 mutex_exit(&cp->c_statelock); 4770 } 4771 4772 /* log the remove */ 4773 commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr); 4774 if (commit == 0) { 4775 error = ENOSPC; 4776 goto out; 4777 } 4778 4779 /* remove the file from the dir */ 4780 mutex_enter(&dcp->c_statelock); 4781 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 4782 mutex_exit(&dcp->c_statelock); 4783 error = ETIMEDOUT; 4784 goto out; 4785 4786 } 4787 cachefs_modified(dcp); 4788 error = cachefs_dir_rmentry(dcp, nm); 4789 if (error) { 4790 mutex_exit(&dcp->c_statelock); 4791 if (error == ENOTDIR) 4792 error = ETIMEDOUT; 4793 goto out; 4794 } 4795 4796 /* update parent dir times */ 4797 gethrestime(¤t_time); 4798 dcp->c_metadata.md_localctime = current_time; 4799 dcp->c_metadata.md_localmtime = current_time; 4800 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 4801 dcp->c_flags |= CN_UPDATED; 4802 mutex_exit(&dcp->c_statelock); 4803 4804 /* adjust file we are deleting */ 4805 mutex_enter(&cp->c_statelock); 4806 cp->c_attr.va_nlink--; 4807 cp->c_metadata.md_localctime = current_time; 4808 cp->c_metadata.md_flags |= MD_LOCALCTIME; 4809 if (cp->c_attr.va_nlink == 0) { 4810 cp->c_flags |= CN_DESTROY; 4811 } else { 4812 cp->c_flags |= CN_UPDATED; 4813 } 4814 mutex_exit(&cp->c_statelock); 4815 4816 out: 4817 if (commit) { 4818 /* commit the log entry */ 4819 if (cachefs_dlog_commit(fscp, commit, error)) { 4820 /*EMPTY*/ 4821 /* XXX bob: fix on panic */ 4822 } 4823 } 4824 4825 rw_exit(&dcp->c_rwlock); 4826 return (error); 4827 } 4828 4829 /*ARGSUSED*/ 4830 static int 4831 cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr, 4832 caller_context_t *ct, int flags) 4833 { 4834 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 4835 cnode_t *tdcp = VTOC(tdvp); 4836 struct vnode *realvp; 4837 int error = 0; 4838 int held = 0; 4839 int connected = 0; 4840 4841 #ifdef CFSDEBUG 4842 CFS_DEBUG(CFSDEBUG_VOPS) 4843 printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n", 4844 (void *)fvp, (void *)tdvp, tnm); 4845 #endif 4846 4847 if (getzoneid() != GLOBAL_ZONEID) { 4848 error = EPERM; 4849 goto out; 4850 } 4851 4852 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 4853 ASSERT(tdcp->c_flags & CN_NOCACHE); 4854 4855 if (VOP_REALVP(fvp, &realvp, ct) == 0) { 4856 fvp = realvp; 4857 } 4858 4859 /* 4860 * Cachefs only provides pass-through support for NFSv4, 4861 * and all vnode operations are passed through to the 4862 * back file system. For NFSv4 pass-through to work, only 4863 * connected operation is supported, the cnode backvp must 4864 * exist, and cachefs optional (eg., disconnectable) flags 4865 * are turned off. Assert these conditions to ensure that 4866 * the backfilesystem is called for the link operation. 4867 */ 4868 4869 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 4870 CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp); 4871 4872 for (;;) { 4873 /* get (or renew) access to the file system */ 4874 if (held) { 4875 /* Won't loop with NFSv4 connected behavior */ 4876 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 4877 rw_exit(&tdcp->c_rwlock); 4878 cachefs_cd_release(fscp); 4879 held = 0; 4880 } 4881 error = cachefs_cd_access(fscp, connected, 1); 4882 if (error) 4883 break; 4884 rw_enter(&tdcp->c_rwlock, RW_WRITER); 4885 held = 1; 4886 4887 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 4888 error = cachefs_link_connected(tdvp, fvp, tnm, cr); 4889 if (CFS_TIMEOUT(fscp, error)) { 4890 rw_exit(&tdcp->c_rwlock); 4891 cachefs_cd_release(fscp); 4892 held = 0; 4893 cachefs_cd_timedout(fscp); 4894 connected = 0; 4895 continue; 4896 } 4897 } else { 4898 error = cachefs_link_disconnected(tdvp, fvp, tnm, 4899 cr); 4900 if (CFS_TIMEOUT(fscp, error)) { 4901 connected = 1; 4902 continue; 4903 } 4904 } 4905 break; 4906 } 4907 4908 if (held) { 4909 rw_exit(&tdcp->c_rwlock); 4910 cachefs_cd_release(fscp); 4911 } 4912 4913 #ifdef CFS_CD_DEBUG 4914 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 4915 #endif 4916 out: 4917 #ifdef CFSDEBUG 4918 CFS_DEBUG(CFSDEBUG_VOPS) 4919 printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n", 4920 (void *)fvp, (void *)tdvp, tnm); 4921 #endif 4922 return (error); 4923 } 4924 4925 static int 4926 cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr) 4927 { 4928 cnode_t *tdcp = VTOC(tdvp); 4929 cnode_t *fcp = VTOC(fvp); 4930 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 4931 int error = 0; 4932 vnode_t *backvp = NULL; 4933 4934 if (tdcp != fcp) { 4935 mutex_enter(&fcp->c_statelock); 4936 4937 if (fcp->c_backvp == NULL) { 4938 error = cachefs_getbackvp(fscp, fcp); 4939 if (error) { 4940 mutex_exit(&fcp->c_statelock); 4941 goto out; 4942 } 4943 } 4944 4945 error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr); 4946 if (error) { 4947 mutex_exit(&fcp->c_statelock); 4948 goto out; 4949 } 4950 backvp = fcp->c_backvp; 4951 VN_HOLD(backvp); 4952 mutex_exit(&fcp->c_statelock); 4953 } 4954 4955 mutex_enter(&tdcp->c_statelock); 4956 4957 /* get backvp of target directory */ 4958 if (tdcp->c_backvp == NULL) { 4959 error = cachefs_getbackvp(fscp, tdcp); 4960 if (error) { 4961 mutex_exit(&tdcp->c_statelock); 4962 goto out; 4963 } 4964 } 4965 4966 /* consistency check target directory */ 4967 error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr); 4968 if (error) { 4969 mutex_exit(&tdcp->c_statelock); 4970 goto out; 4971 } 4972 if (backvp == NULL) { 4973 backvp = tdcp->c_backvp; 4974 VN_HOLD(backvp); 4975 } 4976 4977 /* perform the link on the back fs */ 4978 CFS_DPRINT_BACKFS_NFSV4(fscp, 4979 ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, " 4980 "name %s\n", tdcp, tdcp->c_backvp, tnm)); 4981 error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0); 4982 if (error) { 4983 mutex_exit(&tdcp->c_statelock); 4984 goto out; 4985 } 4986 4987 CFSOP_MODIFY_COBJECT(fscp, tdcp, cr); 4988 4989 /* if the dir is populated, add the new link */ 4990 if (CFS_ISFS_NONSHARED(fscp) && 4991 (tdcp->c_metadata.md_flags & MD_POPULATED)) { 4992 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie, 4993 &fcp->c_id, SM_ASYNC); 4994 if (error) { 4995 cachefs_nocache(tdcp); 4996 error = 0; 4997 } 4998 } 4999 mutex_exit(&tdcp->c_statelock); 5000 5001 /* get the new link count on the file */ 5002 mutex_enter(&fcp->c_statelock); 5003 fcp->c_flags |= CN_UPDATED; 5004 CFSOP_MODIFY_COBJECT(fscp, fcp, cr); 5005 if (fcp->c_backvp == NULL) { 5006 error = cachefs_getbackvp(fscp, fcp); 5007 if (error) { 5008 mutex_exit(&fcp->c_statelock); 5009 goto out; 5010 } 5011 } 5012 5013 /* XXX bob: given what modify_cobject does this seems unnecessary */ 5014 fcp->c_attr.va_mask = AT_ALL; 5015 error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL); 5016 mutex_exit(&fcp->c_statelock); 5017 out: 5018 if (backvp) 5019 VN_RELE(backvp); 5020 5021 return (error); 5022 } 5023 5024 static int 5025 cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm, 5026 cred_t *cr) 5027 { 5028 cnode_t *tdcp = VTOC(tdvp); 5029 cnode_t *fcp = VTOC(fvp); 5030 fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp); 5031 int error = 0; 5032 timestruc_t current_time; 5033 off_t commit = 0; 5034 5035 if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 || 5036 fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0) 5037 return (EPERM); 5038 5039 if (CFS_ISFS_WRITE_AROUND(fscp)) 5040 return (ETIMEDOUT); 5041 5042 if (fcp->c_metadata.md_flags & MD_NEEDATTRS) 5043 return (ETIMEDOUT); 5044 5045 mutex_enter(&tdcp->c_statelock); 5046 5047 /* check permissions */ 5048 if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) { 5049 mutex_exit(&tdcp->c_statelock); 5050 goto out; 5051 } 5052 5053 /* the directory front file must be populated */ 5054 if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) { 5055 error = ETIMEDOUT; 5056 mutex_exit(&tdcp->c_statelock); 5057 goto out; 5058 } 5059 5060 /* make sure tnm does not already exist in the directory */ 5061 error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL); 5062 if (error == ENOTDIR) { 5063 error = ETIMEDOUT; 5064 mutex_exit(&tdcp->c_statelock); 5065 goto out; 5066 } 5067 if (error != ENOENT) { 5068 error = EEXIST; 5069 mutex_exit(&tdcp->c_statelock); 5070 goto out; 5071 } 5072 5073 mutex_enter(&fcp->c_statelock); 5074 5075 /* create a mapping for the file if necessary */ 5076 if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) { 5077 error = cachefs_dlog_cidmap(fscp); 5078 if (error) { 5079 mutex_exit(&fcp->c_statelock); 5080 mutex_exit(&tdcp->c_statelock); 5081 error = ENOSPC; 5082 goto out; 5083 } 5084 fcp->c_metadata.md_flags |= MD_MAPPING; 5085 fcp->c_flags |= CN_UPDATED; 5086 } 5087 5088 /* mark file as modified */ 5089 if (cachefs_modified_alloc(fcp)) { 5090 mutex_exit(&fcp->c_statelock); 5091 mutex_exit(&tdcp->c_statelock); 5092 error = ENOSPC; 5093 goto out; 5094 } 5095 mutex_exit(&fcp->c_statelock); 5096 5097 /* log the operation */ 5098 commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr); 5099 if (commit == 0) { 5100 mutex_exit(&tdcp->c_statelock); 5101 error = ENOSPC; 5102 goto out; 5103 } 5104 5105 gethrestime(¤t_time); 5106 5107 /* make the new link */ 5108 cachefs_modified(tdcp); 5109 error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie, 5110 &fcp->c_id, SM_ASYNC); 5111 if (error) { 5112 error = 0; 5113 mutex_exit(&tdcp->c_statelock); 5114 goto out; 5115 } 5116 5117 /* Update mtime/ctime of parent dir */ 5118 tdcp->c_metadata.md_localmtime = current_time; 5119 tdcp->c_metadata.md_localctime = current_time; 5120 tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5121 tdcp->c_flags |= CN_UPDATED; 5122 mutex_exit(&tdcp->c_statelock); 5123 5124 /* update the file we linked to */ 5125 mutex_enter(&fcp->c_statelock); 5126 fcp->c_attr.va_nlink++; 5127 fcp->c_metadata.md_localctime = current_time; 5128 fcp->c_metadata.md_flags |= MD_LOCALCTIME; 5129 fcp->c_flags |= CN_UPDATED; 5130 mutex_exit(&fcp->c_statelock); 5131 5132 out: 5133 if (commit) { 5134 /* commit the log entry */ 5135 if (cachefs_dlog_commit(fscp, commit, error)) { 5136 /*EMPTY*/ 5137 /* XXX bob: fix on panic */ 5138 } 5139 } 5140 5141 return (error); 5142 } 5143 5144 /* 5145 * Serialize all renames in CFS, to avoid deadlocks - We have to hold two 5146 * cnodes atomically. 5147 */ 5148 kmutex_t cachefs_rename_lock; 5149 5150 /*ARGSUSED*/ 5151 static int 5152 cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, 5153 char *nnm, cred_t *cr, caller_context_t *ct, int flags) 5154 { 5155 fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp)); 5156 cachefscache_t *cachep = fscp->fs_cache; 5157 int error = 0; 5158 int held = 0; 5159 int connected = 0; 5160 vnode_t *delvp = NULL; 5161 vnode_t *tvp = NULL; 5162 int vfslock = 0; 5163 struct vnode *realvp; 5164 5165 if (getzoneid() != GLOBAL_ZONEID) 5166 return (EPERM); 5167 5168 if (VOP_REALVP(ndvp, &realvp, ct) == 0) 5169 ndvp = realvp; 5170 5171 /* 5172 * if the fs NOFILL or NOCACHE flags are on, then the old and new 5173 * directory cnodes better indicate NOCACHE mode as well. 5174 */ 5175 ASSERT( 5176 (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 || 5177 ((VTOC(odvp)->c_flags & CN_NOCACHE) && 5178 (VTOC(ndvp)->c_flags & CN_NOCACHE))); 5179 5180 /* 5181 * Cachefs only provides pass-through support for NFSv4, 5182 * and all vnode operations are passed through to the 5183 * back file system. For NFSv4 pass-through to work, only 5184 * connected operation is supported, the cnode backvp must 5185 * exist, and cachefs optional (eg., disconnectable) flags 5186 * are turned off. Assert these conditions to ensure that 5187 * the backfilesystem is called for the rename operation. 5188 */ 5189 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 5190 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp)); 5191 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp)); 5192 5193 for (;;) { 5194 if (vfslock) { 5195 vn_vfsunlock(delvp); 5196 vfslock = 0; 5197 } 5198 if (delvp) { 5199 VN_RELE(delvp); 5200 delvp = NULL; 5201 } 5202 5203 /* get (or renew) access to the file system */ 5204 if (held) { 5205 /* Won't loop for NFSv4 connected support */ 5206 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 5207 cachefs_cd_release(fscp); 5208 held = 0; 5209 } 5210 error = cachefs_cd_access(fscp, connected, 1); 5211 if (error) 5212 break; 5213 held = 1; 5214 5215 /* sanity check */ 5216 if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) { 5217 error = EINVAL; 5218 break; 5219 } 5220 5221 /* cannot rename from or to . or .. */ 5222 if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 || 5223 strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) { 5224 error = EINVAL; 5225 break; 5226 } 5227 5228 if (odvp != ndvp) { 5229 /* 5230 * if moving a directory, its notion 5231 * of ".." will change 5232 */ 5233 error = cachefs_lookup_common(odvp, onm, &tvp, 5234 NULL, 0, NULL, cr); 5235 if (error == 0) { 5236 ASSERT(tvp != NULL); 5237 if (tvp->v_type == VDIR) { 5238 cnode_t *cp = VTOC(tvp); 5239 5240 dnlc_remove(tvp, ".."); 5241 5242 mutex_enter(&cp->c_statelock); 5243 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 5244 mutex_exit(&cp->c_statelock); 5245 } 5246 } else { 5247 tvp = NULL; 5248 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5249 if (CFS_TIMEOUT(fscp, error)) { 5250 cachefs_cd_release(fscp); 5251 held = 0; 5252 cachefs_cd_timedout(fscp); 5253 connected = 0; 5254 continue; 5255 } 5256 } else { 5257 if (CFS_TIMEOUT(fscp, error)) { 5258 connected = 1; 5259 continue; 5260 } 5261 } 5262 break; 5263 } 5264 } 5265 5266 /* get the cnode if file being deleted */ 5267 error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0, 5268 NULL, cr); 5269 if (error) { 5270 delvp = NULL; 5271 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5272 if (CFS_TIMEOUT(fscp, error)) { 5273 cachefs_cd_release(fscp); 5274 held = 0; 5275 cachefs_cd_timedout(fscp); 5276 connected = 0; 5277 continue; 5278 } 5279 } else { 5280 if (CFS_TIMEOUT(fscp, error)) { 5281 connected = 1; 5282 continue; 5283 } 5284 } 5285 if (error != ENOENT) 5286 break; 5287 } 5288 5289 if (delvp && delvp->v_type == VDIR) { 5290 /* see ufs_dirremove for why this is done, mount race */ 5291 if (vn_vfswlock(delvp)) { 5292 error = EBUSY; 5293 break; 5294 } 5295 vfslock = 1; 5296 if (vn_mountedvfs(delvp) != NULL) { 5297 error = EBUSY; 5298 break; 5299 } 5300 } 5301 5302 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5303 error = cachefs_rename_connected(odvp, onm, 5304 ndvp, nnm, cr, delvp); 5305 if (CFS_TIMEOUT(fscp, error)) { 5306 cachefs_cd_release(fscp); 5307 held = 0; 5308 cachefs_cd_timedout(fscp); 5309 connected = 0; 5310 continue; 5311 } 5312 } else { 5313 error = cachefs_rename_disconnected(odvp, onm, 5314 ndvp, nnm, cr, delvp); 5315 if (CFS_TIMEOUT(fscp, error)) { 5316 connected = 1; 5317 continue; 5318 } 5319 } 5320 break; 5321 } 5322 5323 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) { 5324 struct fid gone; 5325 5326 bzero(&gone, sizeof (gone)); 5327 gone.fid_len = MAXFIDSZ; 5328 if (delvp != NULL) 5329 (void) VOP_FID(delvp, &gone, ct); 5330 5331 cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp, 5332 &gone, 0, (delvp != NULL), crgetuid(cr)); 5333 } 5334 5335 if (held) 5336 cachefs_cd_release(fscp); 5337 5338 if (vfslock) 5339 vn_vfsunlock(delvp); 5340 5341 if (delvp) 5342 VN_RELE(delvp); 5343 if (tvp) 5344 VN_RELE(tvp); 5345 5346 #ifdef CFS_CD_DEBUG 5347 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 5348 #endif 5349 return (error); 5350 } 5351 5352 static int 5353 cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp, 5354 char *nnm, cred_t *cr, vnode_t *delvp) 5355 { 5356 cnode_t *odcp = VTOC(odvp); 5357 cnode_t *ndcp = VTOC(ndvp); 5358 vnode_t *revp = NULL; 5359 cnode_t *recp; 5360 cnode_t *delcp; 5361 fscache_t *fscp = C_TO_FSCACHE(odcp); 5362 int error = 0; 5363 struct fid cookie; 5364 struct fid *cookiep; 5365 cfs_cid_t cid; 5366 int gotdirent; 5367 5368 /* find the file we are renaming */ 5369 error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr); 5370 if (error) 5371 return (error); 5372 recp = VTOC(revp); 5373 5374 /* 5375 * To avoid deadlock, we acquire this global rename lock before 5376 * we try to get the locks for the source and target directories. 5377 */ 5378 mutex_enter(&cachefs_rename_lock); 5379 rw_enter(&odcp->c_rwlock, RW_WRITER); 5380 if (odcp != ndcp) { 5381 rw_enter(&ndcp->c_rwlock, RW_WRITER); 5382 } 5383 mutex_exit(&cachefs_rename_lock); 5384 5385 ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0); 5386 ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0); 5387 5388 mutex_enter(&odcp->c_statelock); 5389 if (odcp->c_backvp == NULL) { 5390 error = cachefs_getbackvp(fscp, odcp); 5391 if (error) { 5392 mutex_exit(&odcp->c_statelock); 5393 goto out; 5394 } 5395 } 5396 5397 error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr); 5398 if (error) { 5399 mutex_exit(&odcp->c_statelock); 5400 goto out; 5401 } 5402 mutex_exit(&odcp->c_statelock); 5403 5404 if (odcp != ndcp) { 5405 mutex_enter(&ndcp->c_statelock); 5406 if (ndcp->c_backvp == NULL) { 5407 error = cachefs_getbackvp(fscp, ndcp); 5408 if (error) { 5409 mutex_exit(&ndcp->c_statelock); 5410 goto out; 5411 } 5412 } 5413 5414 error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr); 5415 if (error) { 5416 mutex_exit(&ndcp->c_statelock); 5417 goto out; 5418 } 5419 mutex_exit(&ndcp->c_statelock); 5420 } 5421 5422 /* if a file is being deleted because of this rename */ 5423 if (delvp) { 5424 /* if src and dest file are same */ 5425 if (delvp == revp) { 5426 error = 0; 5427 goto out; 5428 } 5429 5430 /* 5431 * If the cnode is active, make a link to the file 5432 * so operations on the file will continue. 5433 */ 5434 dnlc_purge_vp(delvp); 5435 delcp = VTOC(delvp); 5436 if ((delvp->v_type != VDIR) && 5437 !((delvp->v_count == 1) || 5438 ((delvp->v_count == 2) && delcp->c_ipending))) { 5439 error = cachefs_remove_dolink(ndvp, delvp, nnm, cr); 5440 if (error) 5441 goto out; 5442 } 5443 } 5444 5445 /* do the rename on the back fs */ 5446 CFS_DPRINT_BACKFS_NFSV4(fscp, 5447 ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, " 5448 " ndcp %p, ndbackvp %p, onm %s, nnm %s\n", 5449 odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm)); 5450 error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL, 5451 0); 5452 if (error) 5453 goto out; 5454 5455 /* purge mappings to file in the old directory */ 5456 dnlc_purge_vp(odvp); 5457 5458 /* purge mappings in the new dir if we deleted a file */ 5459 if (delvp && (odvp != ndvp)) 5460 dnlc_purge_vp(ndvp); 5461 5462 /* update the file we just deleted */ 5463 if (delvp) { 5464 mutex_enter(&delcp->c_statelock); 5465 if (delcp->c_attr.va_nlink == 1) { 5466 delcp->c_flags |= CN_DESTROY; 5467 } else { 5468 delcp->c_flags |= CN_UPDATED; 5469 } 5470 delcp->c_attr.va_nlink--; 5471 CFSOP_MODIFY_COBJECT(fscp, delcp, cr); 5472 mutex_exit(&delcp->c_statelock); 5473 } 5474 5475 /* find the entry in the old directory */ 5476 mutex_enter(&odcp->c_statelock); 5477 gotdirent = 0; 5478 cookiep = NULL; 5479 if (CFS_ISFS_NONSHARED(fscp) && 5480 (odcp->c_metadata.md_flags & MD_POPULATED)) { 5481 error = cachefs_dir_look(odcp, onm, &cookie, 5482 NULL, NULL, &cid); 5483 if (error == 0 || error == EINVAL) { 5484 gotdirent = 1; 5485 if (error == 0) 5486 cookiep = &cookie; 5487 } else { 5488 cachefs_inval_object(odcp); 5489 } 5490 } 5491 error = 0; 5492 5493 /* remove the directory entry from the old directory */ 5494 if (gotdirent) { 5495 error = cachefs_dir_rmentry(odcp, onm); 5496 if (error) { 5497 cachefs_nocache(odcp); 5498 error = 0; 5499 } 5500 } 5501 CFSOP_MODIFY_COBJECT(fscp, odcp, cr); 5502 mutex_exit(&odcp->c_statelock); 5503 5504 /* install the directory entry in the new directory */ 5505 mutex_enter(&ndcp->c_statelock); 5506 if (CFS_ISFS_NONSHARED(fscp) && 5507 (ndcp->c_metadata.md_flags & MD_POPULATED)) { 5508 error = 1; 5509 if (gotdirent) { 5510 ASSERT(cid.cid_fileno != 0); 5511 error = 0; 5512 if (delvp) { 5513 error = cachefs_dir_rmentry(ndcp, nnm); 5514 } 5515 if (error == 0) { 5516 error = cachefs_dir_enter(ndcp, nnm, cookiep, 5517 &cid, SM_ASYNC); 5518 } 5519 } 5520 if (error) { 5521 cachefs_nocache(ndcp); 5522 error = 0; 5523 } 5524 } 5525 if (odcp != ndcp) 5526 CFSOP_MODIFY_COBJECT(fscp, ndcp, cr); 5527 mutex_exit(&ndcp->c_statelock); 5528 5529 /* ctime of renamed file has changed */ 5530 mutex_enter(&recp->c_statelock); 5531 CFSOP_MODIFY_COBJECT(fscp, recp, cr); 5532 mutex_exit(&recp->c_statelock); 5533 5534 out: 5535 if (odcp != ndcp) 5536 rw_exit(&ndcp->c_rwlock); 5537 rw_exit(&odcp->c_rwlock); 5538 5539 VN_RELE(revp); 5540 5541 return (error); 5542 } 5543 5544 static int 5545 cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp, 5546 char *nnm, cred_t *cr, vnode_t *delvp) 5547 { 5548 cnode_t *odcp = VTOC(odvp); 5549 cnode_t *ndcp = VTOC(ndvp); 5550 cnode_t *delcp = NULL; 5551 vnode_t *revp = NULL; 5552 cnode_t *recp; 5553 fscache_t *fscp = C_TO_FSCACHE(odcp); 5554 int error = 0; 5555 struct fid cookie; 5556 struct fid *cookiep; 5557 cfs_cid_t cid; 5558 off_t commit = 0; 5559 timestruc_t current_time; 5560 5561 if (CFS_ISFS_WRITE_AROUND(fscp)) 5562 return (ETIMEDOUT); 5563 5564 /* find the file we are renaming */ 5565 error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr); 5566 if (error) 5567 return (error); 5568 recp = VTOC(revp); 5569 5570 /* 5571 * To avoid deadlock, we acquire this global rename lock before 5572 * we try to get the locks for the source and target directories. 5573 */ 5574 mutex_enter(&cachefs_rename_lock); 5575 rw_enter(&odcp->c_rwlock, RW_WRITER); 5576 if (odcp != ndcp) { 5577 rw_enter(&ndcp->c_rwlock, RW_WRITER); 5578 } 5579 mutex_exit(&cachefs_rename_lock); 5580 5581 if (recp->c_metadata.md_flags & MD_NEEDATTRS) { 5582 error = ETIMEDOUT; 5583 goto out; 5584 } 5585 5586 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) { 5587 mutex_enter(&recp->c_statelock); 5588 if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) { 5589 error = cachefs_dlog_cidmap(fscp); 5590 if (error) { 5591 mutex_exit(&recp->c_statelock); 5592 error = ENOSPC; 5593 goto out; 5594 } 5595 recp->c_metadata.md_flags |= MD_MAPPING; 5596 recp->c_flags |= CN_UPDATED; 5597 } 5598 mutex_exit(&recp->c_statelock); 5599 } 5600 5601 /* check permissions */ 5602 /* XXX clean up this mutex junk sometime */ 5603 mutex_enter(&odcp->c_statelock); 5604 error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr); 5605 mutex_exit(&odcp->c_statelock); 5606 if (error != 0) 5607 goto out; 5608 mutex_enter(&ndcp->c_statelock); 5609 error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr); 5610 mutex_exit(&ndcp->c_statelock); 5611 if (error != 0) 5612 goto out; 5613 mutex_enter(&odcp->c_statelock); 5614 error = cachefs_stickyrmchk(odcp, recp, cr); 5615 mutex_exit(&odcp->c_statelock); 5616 if (error != 0) 5617 goto out; 5618 5619 /* dirs must be populated */ 5620 if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) || 5621 ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) { 5622 error = ETIMEDOUT; 5623 goto out; 5624 } 5625 5626 /* for now do not allow moving dirs because could cause cycles */ 5627 if ((((revp->v_type == VDIR) && (odvp != ndvp))) || 5628 (revp == odvp)) { 5629 error = ETIMEDOUT; 5630 goto out; 5631 } 5632 5633 /* if a file is being deleted because of this rename */ 5634 if (delvp) { 5635 delcp = VTOC(delvp); 5636 5637 /* if src and dest file are the same */ 5638 if (delvp == revp) { 5639 error = 0; 5640 goto out; 5641 } 5642 5643 if (delcp->c_metadata.md_flags & MD_NEEDATTRS) { 5644 error = ETIMEDOUT; 5645 goto out; 5646 } 5647 5648 /* if there are hard links to this file */ 5649 if (delcp->c_attr.va_nlink > 1) { 5650 mutex_enter(&delcp->c_statelock); 5651 if (cachefs_modified_alloc(delcp)) { 5652 mutex_exit(&delcp->c_statelock); 5653 error = ENOSPC; 5654 goto out; 5655 } 5656 5657 if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) { 5658 error = cachefs_dlog_cidmap(fscp); 5659 if (error) { 5660 mutex_exit(&delcp->c_statelock); 5661 error = ENOSPC; 5662 goto out; 5663 } 5664 delcp->c_metadata.md_flags |= MD_MAPPING; 5665 delcp->c_flags |= CN_UPDATED; 5666 } 5667 mutex_exit(&delcp->c_statelock); 5668 } 5669 5670 /* make sure we can delete file */ 5671 mutex_enter(&ndcp->c_statelock); 5672 error = cachefs_stickyrmchk(ndcp, delcp, cr); 5673 mutex_exit(&ndcp->c_statelock); 5674 if (error != 0) 5675 goto out; 5676 5677 /* 5678 * If the cnode is active, make a link to the file 5679 * so operations on the file will continue. 5680 */ 5681 dnlc_purge_vp(delvp); 5682 if ((delvp->v_type != VDIR) && 5683 !((delvp->v_count == 1) || 5684 ((delvp->v_count == 2) && delcp->c_ipending))) { 5685 error = cachefs_remove_dolink(ndvp, delvp, nnm, cr); 5686 if (error) 5687 goto out; 5688 } 5689 } 5690 5691 /* purge mappings to file in the old directory */ 5692 dnlc_purge_vp(odvp); 5693 5694 /* purge mappings in the new dir if we deleted a file */ 5695 if (delvp && (odvp != ndvp)) 5696 dnlc_purge_vp(ndvp); 5697 5698 /* find the entry in the old directory */ 5699 mutex_enter(&odcp->c_statelock); 5700 if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) { 5701 mutex_exit(&odcp->c_statelock); 5702 error = ETIMEDOUT; 5703 goto out; 5704 } 5705 cookiep = NULL; 5706 error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid); 5707 if (error == 0 || error == EINVAL) { 5708 if (error == 0) 5709 cookiep = &cookie; 5710 } else { 5711 mutex_exit(&odcp->c_statelock); 5712 if (error == ENOTDIR) 5713 error = ETIMEDOUT; 5714 goto out; 5715 } 5716 error = 0; 5717 5718 /* write the log entry */ 5719 commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr, 5720 recp, delcp); 5721 if (commit == 0) { 5722 mutex_exit(&odcp->c_statelock); 5723 error = ENOSPC; 5724 goto out; 5725 } 5726 5727 /* remove the directory entry from the old directory */ 5728 cachefs_modified(odcp); 5729 error = cachefs_dir_rmentry(odcp, onm); 5730 if (error) { 5731 mutex_exit(&odcp->c_statelock); 5732 if (error == ENOTDIR) 5733 error = ETIMEDOUT; 5734 goto out; 5735 } 5736 mutex_exit(&odcp->c_statelock); 5737 5738 /* install the directory entry in the new directory */ 5739 mutex_enter(&ndcp->c_statelock); 5740 error = ENOTDIR; 5741 if (ndcp->c_metadata.md_flags & MD_POPULATED) { 5742 ASSERT(cid.cid_fileno != 0); 5743 cachefs_modified(ndcp); 5744 error = 0; 5745 if (delvp) { 5746 error = cachefs_dir_rmentry(ndcp, nnm); 5747 } 5748 if (error == 0) { 5749 error = cachefs_dir_enter(ndcp, nnm, cookiep, 5750 &cid, SM_ASYNC); 5751 } 5752 } 5753 if (error) { 5754 cachefs_nocache(ndcp); 5755 mutex_exit(&ndcp->c_statelock); 5756 mutex_enter(&odcp->c_statelock); 5757 cachefs_nocache(odcp); 5758 mutex_exit(&odcp->c_statelock); 5759 if (error == ENOTDIR) 5760 error = ETIMEDOUT; 5761 goto out; 5762 } 5763 mutex_exit(&ndcp->c_statelock); 5764 5765 gethrestime(¤t_time); 5766 5767 /* update the file we just deleted */ 5768 if (delvp) { 5769 mutex_enter(&delcp->c_statelock); 5770 delcp->c_attr.va_nlink--; 5771 delcp->c_metadata.md_localctime = current_time; 5772 delcp->c_metadata.md_flags |= MD_LOCALCTIME; 5773 if (delcp->c_attr.va_nlink == 0) { 5774 delcp->c_flags |= CN_DESTROY; 5775 } else { 5776 delcp->c_flags |= CN_UPDATED; 5777 } 5778 mutex_exit(&delcp->c_statelock); 5779 } 5780 5781 /* update the file we renamed */ 5782 mutex_enter(&recp->c_statelock); 5783 recp->c_metadata.md_localctime = current_time; 5784 recp->c_metadata.md_flags |= MD_LOCALCTIME; 5785 recp->c_flags |= CN_UPDATED; 5786 mutex_exit(&recp->c_statelock); 5787 5788 /* update the source directory */ 5789 mutex_enter(&odcp->c_statelock); 5790 odcp->c_metadata.md_localctime = current_time; 5791 odcp->c_metadata.md_localmtime = current_time; 5792 odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5793 odcp->c_flags |= CN_UPDATED; 5794 mutex_exit(&odcp->c_statelock); 5795 5796 /* update the destination directory */ 5797 if (odcp != ndcp) { 5798 mutex_enter(&ndcp->c_statelock); 5799 ndcp->c_metadata.md_localctime = current_time; 5800 ndcp->c_metadata.md_localmtime = current_time; 5801 ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 5802 ndcp->c_flags |= CN_UPDATED; 5803 mutex_exit(&ndcp->c_statelock); 5804 } 5805 5806 out: 5807 if (commit) { 5808 /* commit the log entry */ 5809 if (cachefs_dlog_commit(fscp, commit, error)) { 5810 /*EMPTY*/ 5811 /* XXX bob: fix on panic */ 5812 } 5813 } 5814 5815 if (odcp != ndcp) 5816 rw_exit(&ndcp->c_rwlock); 5817 rw_exit(&odcp->c_rwlock); 5818 5819 VN_RELE(revp); 5820 5821 return (error); 5822 } 5823 5824 /*ARGSUSED*/ 5825 static int 5826 cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp, 5827 cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) 5828 { 5829 cnode_t *dcp = VTOC(dvp); 5830 fscache_t *fscp = C_TO_FSCACHE(dcp); 5831 cachefscache_t *cachep = fscp->fs_cache; 5832 int error = 0; 5833 int held = 0; 5834 int connected = 0; 5835 5836 #ifdef CFSDEBUG 5837 CFS_DEBUG(CFSDEBUG_VOPS) 5838 printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp); 5839 #endif 5840 5841 if (getzoneid() != GLOBAL_ZONEID) { 5842 error = EPERM; 5843 goto out; 5844 } 5845 5846 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 5847 ASSERT(dcp->c_flags & CN_NOCACHE); 5848 5849 /* 5850 * Cachefs only provides pass-through support for NFSv4, 5851 * and all vnode operations are passed through to the 5852 * back file system. For NFSv4 pass-through to work, only 5853 * connected operation is supported, the cnode backvp must 5854 * exist, and cachefs optional (eg., disconnectable) flags 5855 * are turned off. Assert these conditions to ensure that 5856 * the backfilesystem is called for the mkdir operation. 5857 */ 5858 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 5859 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 5860 5861 for (;;) { 5862 /* get (or renew) access to the file system */ 5863 if (held) { 5864 /* Won't loop with NFSv4 connected behavior */ 5865 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 5866 rw_exit(&dcp->c_rwlock); 5867 cachefs_cd_release(fscp); 5868 held = 0; 5869 } 5870 error = cachefs_cd_access(fscp, connected, 1); 5871 if (error) 5872 break; 5873 rw_enter(&dcp->c_rwlock, RW_WRITER); 5874 held = 1; 5875 5876 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 5877 error = cachefs_mkdir_connected(dvp, nm, vap, 5878 vpp, cr); 5879 if (CFS_TIMEOUT(fscp, error)) { 5880 rw_exit(&dcp->c_rwlock); 5881 cachefs_cd_release(fscp); 5882 held = 0; 5883 cachefs_cd_timedout(fscp); 5884 connected = 0; 5885 continue; 5886 } 5887 } else { 5888 error = cachefs_mkdir_disconnected(dvp, nm, vap, 5889 vpp, cr); 5890 if (CFS_TIMEOUT(fscp, error)) { 5891 connected = 1; 5892 continue; 5893 } 5894 } 5895 break; 5896 } 5897 5898 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) { 5899 fid_t *fidp = NULL; 5900 ino64_t fileno = 0; 5901 cnode_t *cp = NULL; 5902 if (error == 0) 5903 cp = VTOC(*vpp); 5904 5905 if (cp != NULL) { 5906 fidp = &cp->c_metadata.md_cookie; 5907 fileno = cp->c_id.cid_fileno; 5908 } 5909 5910 cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp, 5911 fidp, fileno, crgetuid(cr)); 5912 } 5913 5914 if (held) { 5915 rw_exit(&dcp->c_rwlock); 5916 cachefs_cd_release(fscp); 5917 } 5918 if (error == 0 && CFS_ISFS_NONSHARED(fscp)) 5919 (void) cachefs_pack(dvp, nm, cr); 5920 5921 #ifdef CFS_CD_DEBUG 5922 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 5923 #endif 5924 out: 5925 #ifdef CFSDEBUG 5926 CFS_DEBUG(CFSDEBUG_VOPS) 5927 printf("cachefs_mkdir: EXIT error = %d\n", error); 5928 #endif 5929 return (error); 5930 } 5931 5932 static int 5933 cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap, 5934 vnode_t **vpp, cred_t *cr) 5935 { 5936 cnode_t *newcp = NULL, *dcp = VTOC(dvp); 5937 struct vnode *vp = NULL; 5938 int error = 0; 5939 fscache_t *fscp = C_TO_FSCACHE(dcp); 5940 struct fid cookie; 5941 struct vattr attr; 5942 cfs_cid_t cid, dircid; 5943 uint32_t valid_fid; 5944 5945 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 5946 ASSERT(dcp->c_flags & CN_NOCACHE); 5947 5948 mutex_enter(&dcp->c_statelock); 5949 5950 /* get backvp of dir */ 5951 if (dcp->c_backvp == NULL) { 5952 error = cachefs_getbackvp(fscp, dcp); 5953 if (error) { 5954 mutex_exit(&dcp->c_statelock); 5955 goto out; 5956 } 5957 } 5958 5959 /* consistency check the directory */ 5960 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 5961 if (error) { 5962 mutex_exit(&dcp->c_statelock); 5963 goto out; 5964 } 5965 dircid = dcp->c_id; 5966 5967 /* make the dir on the back fs */ 5968 CFS_DPRINT_BACKFS_NFSV4(fscp, 5969 ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, " 5970 "name %s\n", dcp, dcp->c_backvp, nm)); 5971 error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL); 5972 mutex_exit(&dcp->c_statelock); 5973 if (error) { 5974 goto out; 5975 } 5976 5977 /* get the cookie and make the cnode */ 5978 attr.va_mask = AT_ALL; 5979 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 5980 error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid); 5981 if (error) { 5982 goto out; 5983 } 5984 cid.cid_flags = 0; 5985 cid.cid_fileno = attr.va_nodeid; 5986 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 5987 &attr, vp, cr, 0, &newcp); 5988 if (error) { 5989 goto out; 5990 } 5991 ASSERT(CTOV(newcp)->v_type == VDIR); 5992 *vpp = CTOV(newcp); 5993 5994 /* if the dir is populated, add the new entry */ 5995 mutex_enter(&dcp->c_statelock); 5996 if (CFS_ISFS_NONSHARED(fscp) && 5997 (dcp->c_metadata.md_flags & MD_POPULATED)) { 5998 error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id, 5999 SM_ASYNC); 6000 if (error) { 6001 cachefs_nocache(dcp); 6002 error = 0; 6003 } 6004 } 6005 dcp->c_attr.va_nlink++; 6006 dcp->c_flags |= CN_UPDATED; 6007 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 6008 mutex_exit(&dcp->c_statelock); 6009 6010 /* XXX bob: should we do a filldir here? or just add . and .. */ 6011 /* maybe should kick off an async filldir so caller does not wait */ 6012 6013 /* put the entry in the dnlc */ 6014 if (cachefs_dnlc) 6015 dnlc_enter(dvp, nm, *vpp); 6016 6017 /* save the fileno of the parent so can find the name */ 6018 if (bcmp(&newcp->c_metadata.md_parent, &dircid, 6019 sizeof (cfs_cid_t)) != 0) { 6020 mutex_enter(&newcp->c_statelock); 6021 newcp->c_metadata.md_parent = dircid; 6022 newcp->c_flags |= CN_UPDATED; 6023 mutex_exit(&newcp->c_statelock); 6024 } 6025 out: 6026 if (vp) 6027 VN_RELE(vp); 6028 6029 return (error); 6030 } 6031 6032 static int 6033 cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap, 6034 vnode_t **vpp, cred_t *cr) 6035 { 6036 cnode_t *dcp = VTOC(dvp); 6037 fscache_t *fscp = C_TO_FSCACHE(dcp); 6038 int error; 6039 cnode_t *newcp = NULL; 6040 struct vattr va; 6041 timestruc_t current_time; 6042 off_t commit = 0; 6043 char *s; 6044 int namlen; 6045 6046 /* don't allow '/' characters in pathname component */ 6047 for (s = nm, namlen = 0; *s; s++, namlen++) 6048 if (*s == '/') 6049 return (EACCES); 6050 if (namlen == 0) 6051 return (EINVAL); 6052 6053 if (CFS_ISFS_WRITE_AROUND(fscp)) 6054 return (ETIMEDOUT); 6055 6056 mutex_enter(&dcp->c_statelock); 6057 6058 /* check permissions */ 6059 if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) { 6060 mutex_exit(&dcp->c_statelock); 6061 goto out; 6062 } 6063 6064 /* the directory front file must be populated */ 6065 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6066 error = ETIMEDOUT; 6067 mutex_exit(&dcp->c_statelock); 6068 goto out; 6069 } 6070 6071 /* make sure nm does not already exist in the directory */ 6072 error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL); 6073 if (error == ENOTDIR) { 6074 error = ETIMEDOUT; 6075 mutex_exit(&dcp->c_statelock); 6076 goto out; 6077 } 6078 if (error != ENOENT) { 6079 error = EEXIST; 6080 mutex_exit(&dcp->c_statelock); 6081 goto out; 6082 } 6083 6084 /* make up a reasonable set of attributes */ 6085 cachefs_attr_setup(vap, &va, dcp, cr); 6086 va.va_type = VDIR; 6087 va.va_mode |= S_IFDIR; 6088 va.va_nlink = 2; 6089 6090 mutex_exit(&dcp->c_statelock); 6091 6092 /* create the cnode */ 6093 error = cachefs_cnode_create(fscp, &va, 0, &newcp); 6094 if (error) 6095 goto out; 6096 6097 mutex_enter(&newcp->c_statelock); 6098 6099 error = cachefs_dlog_cidmap(fscp); 6100 if (error) { 6101 mutex_exit(&newcp->c_statelock); 6102 goto out; 6103 } 6104 6105 cachefs_creategid(dcp, newcp, vap, cr); 6106 mutex_enter(&dcp->c_statelock); 6107 cachefs_createacl(dcp, newcp); 6108 mutex_exit(&dcp->c_statelock); 6109 gethrestime(¤t_time); 6110 newcp->c_metadata.md_vattr.va_atime = current_time; 6111 newcp->c_metadata.md_localctime = current_time; 6112 newcp->c_metadata.md_localmtime = current_time; 6113 newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME | 6114 MD_LOCALCTIME; 6115 newcp->c_flags |= CN_UPDATED; 6116 6117 /* make a front file for the new directory, add . and .. */ 6118 error = cachefs_dir_new(dcp, newcp); 6119 if (error) { 6120 mutex_exit(&newcp->c_statelock); 6121 goto out; 6122 } 6123 cachefs_modified(newcp); 6124 6125 /* 6126 * write the metadata now rather than waiting until 6127 * inactive so that if there's no space we can let 6128 * the caller know. 6129 */ 6130 ASSERT(newcp->c_frontvp); 6131 ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0); 6132 ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0); 6133 error = filegrp_write_metadata(newcp->c_filegrp, 6134 &newcp->c_id, &newcp->c_metadata); 6135 if (error) { 6136 mutex_exit(&newcp->c_statelock); 6137 goto out; 6138 } 6139 mutex_exit(&newcp->c_statelock); 6140 6141 /* log the operation */ 6142 commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr); 6143 if (commit == 0) { 6144 error = ENOSPC; 6145 goto out; 6146 } 6147 6148 mutex_enter(&dcp->c_statelock); 6149 6150 /* make sure directory is still populated */ 6151 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6152 mutex_exit(&dcp->c_statelock); 6153 error = ETIMEDOUT; 6154 goto out; 6155 } 6156 cachefs_modified(dcp); 6157 6158 /* enter the new file in the directory */ 6159 error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie, 6160 &newcp->c_id, SM_ASYNC); 6161 if (error) { 6162 mutex_exit(&dcp->c_statelock); 6163 goto out; 6164 } 6165 6166 /* update parent dir times */ 6167 dcp->c_metadata.md_localctime = current_time; 6168 dcp->c_metadata.md_localmtime = current_time; 6169 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 6170 dcp->c_attr.va_nlink++; 6171 dcp->c_flags |= CN_UPDATED; 6172 mutex_exit(&dcp->c_statelock); 6173 6174 out: 6175 if (commit) { 6176 /* commit the log entry */ 6177 if (cachefs_dlog_commit(fscp, commit, error)) { 6178 /*EMPTY*/ 6179 /* XXX bob: fix on panic */ 6180 } 6181 } 6182 if (error) { 6183 if (newcp) { 6184 mutex_enter(&newcp->c_statelock); 6185 newcp->c_flags |= CN_DESTROY; 6186 mutex_exit(&newcp->c_statelock); 6187 VN_RELE(CTOV(newcp)); 6188 } 6189 } else { 6190 *vpp = CTOV(newcp); 6191 } 6192 return (error); 6193 } 6194 6195 /*ARGSUSED*/ 6196 static int 6197 cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 6198 caller_context_t *ct, int flags) 6199 { 6200 cnode_t *dcp = VTOC(dvp); 6201 fscache_t *fscp = C_TO_FSCACHE(dcp); 6202 cachefscache_t *cachep = fscp->fs_cache; 6203 int error = 0; 6204 int held = 0; 6205 int connected = 0; 6206 size_t namlen; 6207 vnode_t *vp = NULL; 6208 int vfslock = 0; 6209 6210 #ifdef CFSDEBUG 6211 CFS_DEBUG(CFSDEBUG_VOPS) 6212 printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp); 6213 #endif 6214 6215 if (getzoneid() != GLOBAL_ZONEID) { 6216 error = EPERM; 6217 goto out; 6218 } 6219 6220 if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) 6221 ASSERT(dcp->c_flags & CN_NOCACHE); 6222 6223 /* 6224 * Cachefs only provides pass-through support for NFSv4, 6225 * and all vnode operations are passed through to the 6226 * back file system. For NFSv4 pass-through to work, only 6227 * connected operation is supported, the cnode backvp must 6228 * exist, and cachefs optional (eg., disconnectable) flags 6229 * are turned off. Assert these conditions to ensure that 6230 * the backfilesystem is called for the rmdir operation. 6231 */ 6232 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6233 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6234 6235 for (;;) { 6236 if (vfslock) { 6237 vn_vfsunlock(vp); 6238 vfslock = 0; 6239 } 6240 if (vp) { 6241 VN_RELE(vp); 6242 vp = NULL; 6243 } 6244 6245 /* get (or renew) access to the file system */ 6246 if (held) { 6247 /* Won't loop with NFSv4 connected behavior */ 6248 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6249 cachefs_cd_release(fscp); 6250 held = 0; 6251 } 6252 error = cachefs_cd_access(fscp, connected, 1); 6253 if (error) 6254 break; 6255 held = 1; 6256 6257 /* if disconnected, do some extra error checking */ 6258 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 6259 /* check permissions */ 6260 mutex_enter(&dcp->c_statelock); 6261 error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr); 6262 mutex_exit(&dcp->c_statelock); 6263 if (CFS_TIMEOUT(fscp, error)) { 6264 connected = 1; 6265 continue; 6266 } 6267 if (error) 6268 break; 6269 6270 namlen = strlen(nm); 6271 if (namlen == 0) { 6272 error = EINVAL; 6273 break; 6274 } 6275 6276 /* cannot remove . and .. */ 6277 if (nm[0] == '.') { 6278 if (namlen == 1) { 6279 error = EINVAL; 6280 break; 6281 } else if (namlen == 2 && nm[1] == '.') { 6282 error = EEXIST; 6283 break; 6284 } 6285 } 6286 6287 } 6288 6289 /* get the cnode of the dir to remove */ 6290 error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr); 6291 if (error) { 6292 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6293 if (CFS_TIMEOUT(fscp, error)) { 6294 cachefs_cd_release(fscp); 6295 held = 0; 6296 cachefs_cd_timedout(fscp); 6297 connected = 0; 6298 continue; 6299 } 6300 } else { 6301 if (CFS_TIMEOUT(fscp, error)) { 6302 connected = 1; 6303 continue; 6304 } 6305 } 6306 break; 6307 } 6308 6309 /* must be a dir */ 6310 if (vp->v_type != VDIR) { 6311 error = ENOTDIR; 6312 break; 6313 } 6314 6315 /* must not be current dir */ 6316 if (VOP_CMP(vp, cdir, ct)) { 6317 error = EINVAL; 6318 break; 6319 } 6320 6321 /* see ufs_dirremove for why this is done, mount race */ 6322 if (vn_vfswlock(vp)) { 6323 error = EBUSY; 6324 break; 6325 } 6326 vfslock = 1; 6327 if (vn_mountedvfs(vp) != NULL) { 6328 error = EBUSY; 6329 break; 6330 } 6331 6332 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6333 error = cachefs_rmdir_connected(dvp, nm, cdir, 6334 cr, vp); 6335 if (CFS_TIMEOUT(fscp, error)) { 6336 cachefs_cd_release(fscp); 6337 held = 0; 6338 cachefs_cd_timedout(fscp); 6339 connected = 0; 6340 continue; 6341 } 6342 } else { 6343 error = cachefs_rmdir_disconnected(dvp, nm, cdir, 6344 cr, vp); 6345 if (CFS_TIMEOUT(fscp, error)) { 6346 connected = 1; 6347 continue; 6348 } 6349 } 6350 break; 6351 } 6352 6353 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) { 6354 ino64_t fileno = 0; 6355 fid_t *fidp = NULL; 6356 cnode_t *cp = NULL; 6357 if (vp) 6358 cp = VTOC(vp); 6359 6360 if (cp != NULL) { 6361 fidp = &cp->c_metadata.md_cookie; 6362 fileno = cp->c_id.cid_fileno; 6363 } 6364 6365 cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp, 6366 fidp, fileno, crgetuid(cr)); 6367 } 6368 6369 if (held) { 6370 cachefs_cd_release(fscp); 6371 } 6372 6373 if (vfslock) 6374 vn_vfsunlock(vp); 6375 6376 if (vp) 6377 VN_RELE(vp); 6378 6379 #ifdef CFS_CD_DEBUG 6380 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 6381 #endif 6382 out: 6383 #ifdef CFSDEBUG 6384 CFS_DEBUG(CFSDEBUG_VOPS) 6385 printf("cachefs_rmdir: EXIT error = %d\n", error); 6386 #endif 6387 6388 return (error); 6389 } 6390 6391 static int 6392 cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 6393 vnode_t *vp) 6394 { 6395 cnode_t *dcp = VTOC(dvp); 6396 cnode_t *cp = VTOC(vp); 6397 int error = 0; 6398 fscache_t *fscp = C_TO_FSCACHE(dcp); 6399 6400 rw_enter(&dcp->c_rwlock, RW_WRITER); 6401 mutex_enter(&dcp->c_statelock); 6402 mutex_enter(&cp->c_statelock); 6403 6404 if (dcp->c_backvp == NULL) { 6405 error = cachefs_getbackvp(fscp, dcp); 6406 if (error) { 6407 goto out; 6408 } 6409 } 6410 6411 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 6412 if (error) 6413 goto out; 6414 6415 /* rmdir on the back fs */ 6416 CFS_DPRINT_BACKFS_NFSV4(fscp, 6417 ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, " 6418 "name %s\n", dcp, dcp->c_backvp, nm)); 6419 error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0); 6420 if (error) 6421 goto out; 6422 6423 /* if the dir is populated, remove the entry from it */ 6424 if (CFS_ISFS_NONSHARED(fscp) && 6425 (dcp->c_metadata.md_flags & MD_POPULATED)) { 6426 error = cachefs_dir_rmentry(dcp, nm); 6427 if (error) { 6428 cachefs_nocache(dcp); 6429 error = 0; 6430 } 6431 } 6432 6433 /* 6434 * *if* the (hard) link count goes to 0, then we set the CDESTROY 6435 * flag on the cnode. The cached object will then be destroyed 6436 * at inactive time where the chickens come home to roost :-) 6437 * The link cnt for directories is bumped down by 2 'cause the "." 6438 * entry has to be elided too ! The link cnt for the parent goes down 6439 * by 1 (because of ".."). 6440 */ 6441 cp->c_attr.va_nlink -= 2; 6442 dcp->c_attr.va_nlink--; 6443 if (cp->c_attr.va_nlink == 0) { 6444 cp->c_flags |= CN_DESTROY; 6445 } else { 6446 cp->c_flags |= CN_UPDATED; 6447 } 6448 dcp->c_flags |= CN_UPDATED; 6449 6450 dnlc_purge_vp(vp); 6451 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 6452 6453 out: 6454 mutex_exit(&cp->c_statelock); 6455 mutex_exit(&dcp->c_statelock); 6456 rw_exit(&dcp->c_rwlock); 6457 6458 return (error); 6459 } 6460 6461 static int 6462 /*ARGSUSED*/ 6463 cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir, 6464 cred_t *cr, vnode_t *vp) 6465 { 6466 cnode_t *dcp = VTOC(dvp); 6467 cnode_t *cp = VTOC(vp); 6468 fscache_t *fscp = C_TO_FSCACHE(dcp); 6469 int error = 0; 6470 off_t commit = 0; 6471 timestruc_t current_time; 6472 6473 if (CFS_ISFS_WRITE_AROUND(fscp)) 6474 return (ETIMEDOUT); 6475 6476 rw_enter(&dcp->c_rwlock, RW_WRITER); 6477 mutex_enter(&dcp->c_statelock); 6478 mutex_enter(&cp->c_statelock); 6479 6480 /* both directories must be populated */ 6481 if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) || 6482 ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) { 6483 error = ETIMEDOUT; 6484 goto out; 6485 } 6486 6487 /* if sticky bit set on the dir, more access checks to perform */ 6488 if (error = cachefs_stickyrmchk(dcp, cp, cr)) { 6489 goto out; 6490 } 6491 6492 /* make sure dir is empty */ 6493 if (cp->c_attr.va_nlink > 2) { 6494 error = cachefs_dir_empty(cp); 6495 if (error) { 6496 if (error == ENOTDIR) 6497 error = ETIMEDOUT; 6498 goto out; 6499 } 6500 cachefs_modified(cp); 6501 } 6502 cachefs_modified(dcp); 6503 6504 /* log the operation */ 6505 commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr); 6506 if (commit == 0) { 6507 error = ENOSPC; 6508 goto out; 6509 } 6510 6511 /* remove name from parent dir */ 6512 error = cachefs_dir_rmentry(dcp, nm); 6513 if (error == ENOTDIR) { 6514 error = ETIMEDOUT; 6515 goto out; 6516 } 6517 if (error) 6518 goto out; 6519 6520 gethrestime(¤t_time); 6521 6522 /* update deleted dir values */ 6523 cp->c_attr.va_nlink -= 2; 6524 if (cp->c_attr.va_nlink == 0) 6525 cp->c_flags |= CN_DESTROY; 6526 else { 6527 cp->c_metadata.md_localctime = current_time; 6528 cp->c_metadata.md_flags |= MD_LOCALCTIME; 6529 cp->c_flags |= CN_UPDATED; 6530 } 6531 6532 /* update parent values */ 6533 dcp->c_metadata.md_localctime = current_time; 6534 dcp->c_metadata.md_localmtime = current_time; 6535 dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME; 6536 dcp->c_attr.va_nlink--; 6537 dcp->c_flags |= CN_UPDATED; 6538 6539 out: 6540 mutex_exit(&cp->c_statelock); 6541 mutex_exit(&dcp->c_statelock); 6542 rw_exit(&dcp->c_rwlock); 6543 if (commit) { 6544 /* commit the log entry */ 6545 if (cachefs_dlog_commit(fscp, commit, error)) { 6546 /*EMPTY*/ 6547 /* XXX bob: fix on panic */ 6548 } 6549 dnlc_purge_vp(vp); 6550 } 6551 return (error); 6552 } 6553 6554 /*ARGSUSED*/ 6555 static int 6556 cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva, 6557 char *tnm, cred_t *cr, caller_context_t *ct, int flags) 6558 { 6559 cnode_t *dcp = VTOC(dvp); 6560 fscache_t *fscp = C_TO_FSCACHE(dcp); 6561 cachefscache_t *cachep = fscp->fs_cache; 6562 int error = 0; 6563 int held = 0; 6564 int connected = 0; 6565 6566 #ifdef CFSDEBUG 6567 CFS_DEBUG(CFSDEBUG_VOPS) 6568 printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n", 6569 (void *)dvp, lnm, tnm); 6570 #endif 6571 6572 if (getzoneid() != GLOBAL_ZONEID) { 6573 error = EPERM; 6574 goto out; 6575 } 6576 6577 if (fscp->fs_cache->c_flags & CACHE_NOCACHE) 6578 ASSERT(dcp->c_flags & CN_NOCACHE); 6579 6580 /* 6581 * Cachefs only provides pass-through support for NFSv4, 6582 * and all vnode operations are passed through to the 6583 * back file system. For NFSv4 pass-through to work, only 6584 * connected operation is supported, the cnode backvp must 6585 * exist, and cachefs optional (eg., disconnectable) flags 6586 * are turned off. Assert these conditions to ensure that 6587 * the backfilesystem is called for the symlink operation. 6588 */ 6589 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6590 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6591 6592 for (;;) { 6593 /* get (or renew) access to the file system */ 6594 if (held) { 6595 /* Won't loop with NFSv4 connected behavior */ 6596 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6597 rw_exit(&dcp->c_rwlock); 6598 cachefs_cd_release(fscp); 6599 held = 0; 6600 } 6601 error = cachefs_cd_access(fscp, connected, 1); 6602 if (error) 6603 break; 6604 rw_enter(&dcp->c_rwlock, RW_WRITER); 6605 held = 1; 6606 6607 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6608 error = cachefs_symlink_connected(dvp, lnm, tva, 6609 tnm, cr); 6610 if (CFS_TIMEOUT(fscp, error)) { 6611 rw_exit(&dcp->c_rwlock); 6612 cachefs_cd_release(fscp); 6613 held = 0; 6614 cachefs_cd_timedout(fscp); 6615 connected = 0; 6616 continue; 6617 } 6618 } else { 6619 error = cachefs_symlink_disconnected(dvp, lnm, tva, 6620 tnm, cr); 6621 if (CFS_TIMEOUT(fscp, error)) { 6622 connected = 1; 6623 continue; 6624 } 6625 } 6626 break; 6627 } 6628 6629 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK)) 6630 cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp, 6631 &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno, 6632 crgetuid(cr), (uint_t)strlen(tnm)); 6633 6634 if (held) { 6635 rw_exit(&dcp->c_rwlock); 6636 cachefs_cd_release(fscp); 6637 } 6638 6639 #ifdef CFS_CD_DEBUG 6640 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 6641 #endif 6642 out: 6643 #ifdef CFSDEBUG 6644 CFS_DEBUG(CFSDEBUG_VOPS) 6645 printf("cachefs_symlink: EXIT error = %d\n", error); 6646 #endif 6647 return (error); 6648 } 6649 6650 static int 6651 cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva, 6652 char *tnm, cred_t *cr) 6653 { 6654 cnode_t *dcp = VTOC(dvp); 6655 fscache_t *fscp = C_TO_FSCACHE(dcp); 6656 int error = 0; 6657 vnode_t *backvp = NULL; 6658 cnode_t *newcp = NULL; 6659 struct vattr va; 6660 struct fid cookie; 6661 cfs_cid_t cid; 6662 uint32_t valid_fid; 6663 6664 mutex_enter(&dcp->c_statelock); 6665 6666 if (dcp->c_backvp == NULL) { 6667 error = cachefs_getbackvp(fscp, dcp); 6668 if (error) { 6669 cachefs_nocache(dcp); 6670 mutex_exit(&dcp->c_statelock); 6671 goto out; 6672 } 6673 } 6674 6675 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 6676 if (error) { 6677 mutex_exit(&dcp->c_statelock); 6678 goto out; 6679 } 6680 CFS_DPRINT_BACKFS_NFSV4(fscp, 6681 ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, " 6682 "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm)); 6683 error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0); 6684 if (error) { 6685 mutex_exit(&dcp->c_statelock); 6686 goto out; 6687 } 6688 if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 && 6689 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 6690 cachefs_nocache(dcp); 6691 mutex_exit(&dcp->c_statelock); 6692 goto out; 6693 } 6694 6695 CFSOP_MODIFY_COBJECT(fscp, dcp, cr); 6696 6697 /* lookup the symlink we just created and get its fid and attrs */ 6698 (void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr, 6699 NULL, NULL, NULL); 6700 if (backvp == NULL) { 6701 if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0) 6702 cachefs_nocache(dcp); 6703 mutex_exit(&dcp->c_statelock); 6704 goto out; 6705 } 6706 6707 valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE); 6708 error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid); 6709 if (error) { 6710 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6711 error = 0; 6712 cachefs_nocache(dcp); 6713 mutex_exit(&dcp->c_statelock); 6714 goto out; 6715 } 6716 cid.cid_fileno = va.va_nodeid; 6717 cid.cid_flags = 0; 6718 6719 /* if the dir is cached, add the symlink to it */ 6720 if (CFS_ISFS_NONSHARED(fscp) && 6721 (dcp->c_metadata.md_flags & MD_POPULATED)) { 6722 error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC); 6723 if (error) { 6724 cachefs_nocache(dcp); 6725 error = 0; 6726 } 6727 } 6728 mutex_exit(&dcp->c_statelock); 6729 6730 /* make the cnode for the sym link */ 6731 error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL), 6732 &va, backvp, cr, 0, &newcp); 6733 if (error) { 6734 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6735 cachefs_nocache(dcp); 6736 error = 0; 6737 goto out; 6738 } 6739 6740 /* try to cache the symlink contents */ 6741 rw_enter(&newcp->c_rwlock, RW_WRITER); 6742 mutex_enter(&newcp->c_statelock); 6743 6744 /* 6745 * try to cache the sym link, note that its a noop if NOCACHE 6746 * or NFSv4 is set 6747 */ 6748 error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size); 6749 if (error) { 6750 cachefs_nocache(newcp); 6751 error = 0; 6752 } 6753 mutex_exit(&newcp->c_statelock); 6754 rw_exit(&newcp->c_rwlock); 6755 6756 out: 6757 if (backvp) 6758 VN_RELE(backvp); 6759 if (newcp) 6760 VN_RELE(CTOV(newcp)); 6761 return (error); 6762 } 6763 6764 static int 6765 cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva, 6766 char *tnm, cred_t *cr) 6767 { 6768 cnode_t *dcp = VTOC(dvp); 6769 fscache_t *fscp = C_TO_FSCACHE(dcp); 6770 int error; 6771 cnode_t *newcp = NULL; 6772 struct vattr va; 6773 timestruc_t current_time; 6774 off_t commit = 0; 6775 6776 if (CFS_ISFS_WRITE_AROUND(fscp)) 6777 return (ETIMEDOUT); 6778 6779 mutex_enter(&dcp->c_statelock); 6780 6781 /* check permissions */ 6782 if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) { 6783 mutex_exit(&dcp->c_statelock); 6784 goto out; 6785 } 6786 6787 /* the directory front file must be populated */ 6788 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6789 error = ETIMEDOUT; 6790 mutex_exit(&dcp->c_statelock); 6791 goto out; 6792 } 6793 6794 /* make sure lnm does not already exist in the directory */ 6795 error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL); 6796 if (error == ENOTDIR) { 6797 error = ETIMEDOUT; 6798 mutex_exit(&dcp->c_statelock); 6799 goto out; 6800 } 6801 if (error != ENOENT) { 6802 error = EEXIST; 6803 mutex_exit(&dcp->c_statelock); 6804 goto out; 6805 } 6806 6807 /* make up a reasonable set of attributes */ 6808 cachefs_attr_setup(tva, &va, dcp, cr); 6809 va.va_type = VLNK; 6810 va.va_mode |= S_IFLNK; 6811 va.va_size = strlen(tnm); 6812 6813 mutex_exit(&dcp->c_statelock); 6814 6815 /* create the cnode */ 6816 error = cachefs_cnode_create(fscp, &va, 0, &newcp); 6817 if (error) 6818 goto out; 6819 6820 rw_enter(&newcp->c_rwlock, RW_WRITER); 6821 mutex_enter(&newcp->c_statelock); 6822 6823 error = cachefs_dlog_cidmap(fscp); 6824 if (error) { 6825 mutex_exit(&newcp->c_statelock); 6826 rw_exit(&newcp->c_rwlock); 6827 error = ENOSPC; 6828 goto out; 6829 } 6830 6831 cachefs_creategid(dcp, newcp, tva, cr); 6832 mutex_enter(&dcp->c_statelock); 6833 cachefs_createacl(dcp, newcp); 6834 mutex_exit(&dcp->c_statelock); 6835 gethrestime(¤t_time); 6836 newcp->c_metadata.md_vattr.va_atime = current_time; 6837 newcp->c_metadata.md_localctime = current_time; 6838 newcp->c_metadata.md_localmtime = current_time; 6839 newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME | 6840 MD_LOCALCTIME; 6841 newcp->c_flags |= CN_UPDATED; 6842 6843 /* log the operation */ 6844 commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr); 6845 if (commit == 0) { 6846 mutex_exit(&newcp->c_statelock); 6847 rw_exit(&newcp->c_rwlock); 6848 error = ENOSPC; 6849 goto out; 6850 } 6851 6852 /* store the symlink contents */ 6853 error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size); 6854 if (error) { 6855 mutex_exit(&newcp->c_statelock); 6856 rw_exit(&newcp->c_rwlock); 6857 goto out; 6858 } 6859 if (cachefs_modified_alloc(newcp)) { 6860 mutex_exit(&newcp->c_statelock); 6861 rw_exit(&newcp->c_rwlock); 6862 error = ENOSPC; 6863 goto out; 6864 } 6865 6866 /* 6867 * write the metadata now rather than waiting until 6868 * inactive so that if there's no space we can let 6869 * the caller know. 6870 */ 6871 if (newcp->c_flags & CN_ALLOC_PENDING) { 6872 if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 6873 (void) filegrp_allocattr(newcp->c_filegrp); 6874 } 6875 error = filegrp_create_metadata(newcp->c_filegrp, 6876 &newcp->c_metadata, &newcp->c_id); 6877 if (error) { 6878 mutex_exit(&newcp->c_statelock); 6879 rw_exit(&newcp->c_rwlock); 6880 goto out; 6881 } 6882 newcp->c_flags &= ~CN_ALLOC_PENDING; 6883 } 6884 error = filegrp_write_metadata(newcp->c_filegrp, 6885 &newcp->c_id, &newcp->c_metadata); 6886 if (error) { 6887 mutex_exit(&newcp->c_statelock); 6888 rw_exit(&newcp->c_rwlock); 6889 goto out; 6890 } 6891 mutex_exit(&newcp->c_statelock); 6892 rw_exit(&newcp->c_rwlock); 6893 6894 mutex_enter(&dcp->c_statelock); 6895 6896 /* enter the new file in the directory */ 6897 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 6898 error = ETIMEDOUT; 6899 mutex_exit(&dcp->c_statelock); 6900 goto out; 6901 } 6902 cachefs_modified(dcp); 6903 error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie, 6904 &newcp->c_id, SM_ASYNC); 6905 if (error) { 6906 mutex_exit(&dcp->c_statelock); 6907 goto out; 6908 } 6909 6910 /* update parent dir times */ 6911 dcp->c_metadata.md_localctime = current_time; 6912 dcp->c_metadata.md_localmtime = current_time; 6913 dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME; 6914 dcp->c_flags |= CN_UPDATED; 6915 mutex_exit(&dcp->c_statelock); 6916 6917 out: 6918 if (commit) { 6919 /* commit the log entry */ 6920 if (cachefs_dlog_commit(fscp, commit, error)) { 6921 /*EMPTY*/ 6922 /* XXX bob: fix on panic */ 6923 } 6924 } 6925 6926 if (error) { 6927 if (newcp) { 6928 mutex_enter(&newcp->c_statelock); 6929 newcp->c_flags |= CN_DESTROY; 6930 mutex_exit(&newcp->c_statelock); 6931 } 6932 } 6933 if (newcp) { 6934 VN_RELE(CTOV(newcp)); 6935 } 6936 6937 return (error); 6938 } 6939 6940 /*ARGSUSED*/ 6941 static int 6942 cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, 6943 caller_context_t *ct, int flags) 6944 { 6945 cnode_t *dcp = VTOC(vp); 6946 fscache_t *fscp = C_TO_FSCACHE(dcp); 6947 cachefscache_t *cachep = fscp->fs_cache; 6948 int error = 0; 6949 int held = 0; 6950 int connected = 0; 6951 6952 #ifdef CFSDEBUG 6953 CFS_DEBUG(CFSDEBUG_VOPS) 6954 printf("cachefs_readdir: ENTER vp %p\n", (void *)vp); 6955 #endif 6956 if (getzoneid() != GLOBAL_ZONEID) { 6957 error = EPERM; 6958 goto out; 6959 } 6960 6961 /* 6962 * Cachefs only provides pass-through support for NFSv4, 6963 * and all vnode operations are passed through to the 6964 * back file system. For NFSv4 pass-through to work, only 6965 * connected operation is supported, the cnode backvp must 6966 * exist, and cachefs optional (eg., disconnectable) flags 6967 * are turned off. Assert these conditions to ensure that 6968 * the backfilesystem is called for the readdir operation. 6969 */ 6970 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 6971 CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp); 6972 6973 for (;;) { 6974 /* get (or renew) access to the file system */ 6975 if (held) { 6976 /* Won't loop with NFSv4 connected behavior */ 6977 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 6978 rw_exit(&dcp->c_rwlock); 6979 cachefs_cd_release(fscp); 6980 held = 0; 6981 } 6982 error = cachefs_cd_access(fscp, connected, 0); 6983 if (error) 6984 break; 6985 rw_enter(&dcp->c_rwlock, RW_READER); 6986 held = 1; 6987 6988 /* quit if link count of zero (posix) */ 6989 if (dcp->c_attr.va_nlink == 0) { 6990 if (eofp) 6991 *eofp = 1; 6992 error = 0; 6993 break; 6994 } 6995 6996 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 6997 error = cachefs_readdir_connected(vp, uiop, cr, 6998 eofp); 6999 if (CFS_TIMEOUT(fscp, error)) { 7000 rw_exit(&dcp->c_rwlock); 7001 cachefs_cd_release(fscp); 7002 held = 0; 7003 cachefs_cd_timedout(fscp); 7004 connected = 0; 7005 continue; 7006 } 7007 } else { 7008 error = cachefs_readdir_disconnected(vp, uiop, cr, 7009 eofp); 7010 if (CFS_TIMEOUT(fscp, error)) { 7011 if (cachefs_cd_access_miss(fscp)) { 7012 error = cachefs_readdir_connected(vp, 7013 uiop, cr, eofp); 7014 if (!CFS_TIMEOUT(fscp, error)) 7015 break; 7016 delay(5*hz); 7017 connected = 0; 7018 continue; 7019 } 7020 connected = 1; 7021 continue; 7022 } 7023 } 7024 break; 7025 } 7026 7027 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR)) 7028 cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp, 7029 &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno, 7030 crgetuid(cr), uiop->uio_loffset, *eofp); 7031 7032 if (held) { 7033 rw_exit(&dcp->c_rwlock); 7034 cachefs_cd_release(fscp); 7035 } 7036 7037 #ifdef CFS_CD_DEBUG 7038 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7039 #endif 7040 out: 7041 #ifdef CFSDEBUG 7042 CFS_DEBUG(CFSDEBUG_VOPS) 7043 printf("cachefs_readdir: EXIT error = %d\n", error); 7044 #endif 7045 7046 return (error); 7047 } 7048 7049 static int 7050 cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp) 7051 { 7052 cnode_t *dcp = VTOC(vp); 7053 int error; 7054 fscache_t *fscp = C_TO_FSCACHE(dcp); 7055 struct cachefs_req *rp; 7056 7057 mutex_enter(&dcp->c_statelock); 7058 7059 /* check directory consistency */ 7060 error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr); 7061 if (error) 7062 goto out; 7063 dcp->c_usage++; 7064 7065 /* if dir was modified, toss old contents */ 7066 if (dcp->c_metadata.md_flags & MD_INVALREADDIR) { 7067 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7068 cachefs_inval_object(dcp); 7069 } 7070 7071 error = 0; 7072 if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) && 7073 ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) && 7074 !CFS_ISFS_BACKFS_NFSV4(fscp) && 7075 (fscp->fs_cdconnected == CFS_CD_CONNECTED)) { 7076 7077 if (cachefs_async_okay()) { 7078 7079 /* 7080 * Set up asynchronous request to fill this 7081 * directory. 7082 */ 7083 7084 dcp->c_flags |= CN_ASYNC_POPULATE; 7085 7086 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 7087 rp->cfs_cmd = CFS_POPULATE; 7088 rp->cfs_req_u.cu_populate.cpop_vp = vp; 7089 rp->cfs_cr = cr; 7090 7091 crhold(cr); 7092 VN_HOLD(vp); 7093 7094 cachefs_addqueue(rp, &fscp->fs_workq); 7095 } else { 7096 error = cachefs_dir_fill(dcp, cr); 7097 if (error != 0) 7098 cachefs_nocache(dcp); 7099 } 7100 } 7101 7102 /* if front file is populated */ 7103 if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) && 7104 !CFS_ISFS_BACKFS_NFSV4(fscp) && 7105 (dcp->c_metadata.md_flags & MD_POPULATED)) { 7106 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7107 error = cachefs_dir_read(dcp, uiop, eofp); 7108 if (error == 0) 7109 fscp->fs_stats.st_hits++; 7110 } 7111 7112 /* if front file could not be used */ 7113 if ((error != 0) || 7114 CFS_ISFS_BACKFS_NFSV4(fscp) || 7115 (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) || 7116 ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) { 7117 7118 if (error && !(dcp->c_flags & CN_NOCACHE) && 7119 !CFS_ISFS_BACKFS_NFSV4(fscp)) 7120 cachefs_nocache(dcp); 7121 7122 /* get the back vp */ 7123 if (dcp->c_backvp == NULL) { 7124 error = cachefs_getbackvp(fscp, dcp); 7125 if (error) 7126 goto out; 7127 } 7128 7129 if (fscp->fs_inum_size > 0) { 7130 error = cachefs_readback_translate(dcp, uiop, cr, eofp); 7131 } else { 7132 /* do the dir read from the back fs */ 7133 (void) VOP_RWLOCK(dcp->c_backvp, 7134 V_WRITELOCK_FALSE, NULL); 7135 CFS_DPRINT_BACKFS_NFSV4(fscp, 7136 ("cachefs_readdir (nfsv4): " 7137 "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp)); 7138 error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp, 7139 NULL, 0); 7140 VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL); 7141 } 7142 7143 if (error == 0) 7144 fscp->fs_stats.st_misses++; 7145 } 7146 7147 out: 7148 mutex_exit(&dcp->c_statelock); 7149 7150 return (error); 7151 } 7152 7153 static int 7154 cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp) 7155 { 7156 int error = 0; 7157 fscache_t *fscp = C_TO_FSCACHE(cp); 7158 caddr_t buffy = NULL; 7159 int buffysize = MAXBSIZE; 7160 caddr_t chrp, end; 7161 ino64_t newinum; 7162 struct dirent64 *de; 7163 uio_t uioin; 7164 iovec_t iov; 7165 7166 ASSERT(cp->c_backvp != NULL); 7167 ASSERT(fscp->fs_inum_size > 0); 7168 7169 if (uiop->uio_resid < buffysize) 7170 buffysize = (int)uiop->uio_resid; 7171 buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP); 7172 7173 iov.iov_base = buffy; 7174 iov.iov_len = buffysize; 7175 uioin.uio_iov = &iov; 7176 uioin.uio_iovcnt = 1; 7177 uioin.uio_segflg = UIO_SYSSPACE; 7178 uioin.uio_fmode = 0; 7179 uioin.uio_extflg = UIO_COPY_CACHED; 7180 uioin.uio_loffset = uiop->uio_loffset; 7181 uioin.uio_resid = buffysize; 7182 7183 (void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL); 7184 error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0); 7185 VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL); 7186 7187 if (error != 0) 7188 goto out; 7189 7190 end = buffy + buffysize - uioin.uio_resid; 7191 7192 mutex_exit(&cp->c_statelock); 7193 mutex_enter(&fscp->fs_fslock); 7194 7195 7196 for (chrp = buffy; chrp < end; chrp += de->d_reclen) { 7197 de = (dirent64_t *)chrp; 7198 newinum = cachefs_inum_real2fake(fscp, de->d_ino); 7199 if (newinum == 0) 7200 newinum = cachefs_fileno_conflict(fscp, de->d_ino); 7201 de->d_ino = newinum; 7202 } 7203 mutex_exit(&fscp->fs_fslock); 7204 mutex_enter(&cp->c_statelock); 7205 7206 error = uiomove(buffy, end - buffy, UIO_READ, uiop); 7207 uiop->uio_loffset = uioin.uio_loffset; 7208 7209 out: 7210 7211 if (buffy != NULL) 7212 cachefs_kmem_free(buffy, buffysize); 7213 7214 return (error); 7215 } 7216 7217 static int 7218 /*ARGSUSED*/ 7219 cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr, 7220 int *eofp) 7221 { 7222 cnode_t *dcp = VTOC(vp); 7223 int error; 7224 7225 mutex_enter(&dcp->c_statelock); 7226 if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) { 7227 error = ETIMEDOUT; 7228 } else { 7229 error = cachefs_dir_read(dcp, uiop, eofp); 7230 if (error == ENOTDIR) 7231 error = ETIMEDOUT; 7232 } 7233 mutex_exit(&dcp->c_statelock); 7234 7235 return (error); 7236 } 7237 7238 /*ARGSUSED*/ 7239 static int 7240 cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 7241 { 7242 int error = 0; 7243 struct cnode *cp = VTOC(vp); 7244 fscache_t *fscp = C_TO_FSCACHE(cp); 7245 7246 /* 7247 * Cachefs only provides pass-through support for NFSv4, 7248 * and all vnode operations are passed through to the 7249 * back file system. For NFSv4 pass-through to work, only 7250 * connected operation is supported, the cnode backvp must 7251 * exist, and cachefs optional (eg., disconnectable) flags 7252 * are turned off. Assert these conditions, then bail 7253 * as NFSv4 doesn't support VOP_FID. 7254 */ 7255 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7256 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7257 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7258 return (ENOTSUP); 7259 } 7260 7261 mutex_enter(&cp->c_statelock); 7262 if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) { 7263 fidp->fid_len = cp->c_metadata.md_cookie.fid_len; 7264 error = ENOSPC; 7265 } else { 7266 bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data, 7267 cp->c_metadata.md_cookie.fid_len); 7268 fidp->fid_len = cp->c_metadata.md_cookie.fid_len; 7269 } 7270 mutex_exit(&cp->c_statelock); 7271 return (error); 7272 } 7273 7274 /* ARGSUSED2 */ 7275 static int 7276 cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 7277 { 7278 cnode_t *cp = VTOC(vp); 7279 7280 /* 7281 * XXX - This is ifdef'ed out for now. The problem - 7282 * getdents() acquires the read version of rwlock, then we come 7283 * into cachefs_readdir() and that wants to acquire the write version 7284 * of this lock (if its going to populate the directory). This is 7285 * a problem, this can be solved by introducing another lock in the 7286 * cnode. 7287 */ 7288 /* XXX */ 7289 if (vp->v_type != VREG) 7290 return (-1); 7291 if (write_lock) 7292 rw_enter(&cp->c_rwlock, RW_WRITER); 7293 else 7294 rw_enter(&cp->c_rwlock, RW_READER); 7295 return (write_lock); 7296 } 7297 7298 /* ARGSUSED */ 7299 static void 7300 cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 7301 { 7302 cnode_t *cp = VTOC(vp); 7303 if (vp->v_type != VREG) 7304 return; 7305 rw_exit(&cp->c_rwlock); 7306 } 7307 7308 /* ARGSUSED */ 7309 static int 7310 cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 7311 caller_context_t *ct) 7312 { 7313 return (0); 7314 } 7315 7316 static int cachefs_lostpage = 0; 7317 /* 7318 * Return all the pages from [off..off+len] in file 7319 */ 7320 /*ARGSUSED*/ 7321 static int 7322 cachefs_getpage(struct vnode *vp, offset_t off, size_t len, 7323 uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg, 7324 caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct) 7325 { 7326 cnode_t *cp = VTOC(vp); 7327 int error; 7328 fscache_t *fscp = C_TO_FSCACHE(cp); 7329 cachefscache_t *cachep = fscp->fs_cache; 7330 int held = 0; 7331 int connected = 0; 7332 7333 #ifdef CFSDEBUG 7334 u_offset_t offx = (u_offset_t)off; 7335 7336 CFS_DEBUG(CFSDEBUG_VOPS) 7337 printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n", 7338 (void *)vp, offx, len, rw); 7339 #endif 7340 if (getzoneid() != GLOBAL_ZONEID) { 7341 error = EPERM; 7342 goto out; 7343 } 7344 7345 if (vp->v_flag & VNOMAP) { 7346 error = ENOSYS; 7347 goto out; 7348 } 7349 7350 /* Call backfilesystem if NFSv4 */ 7351 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7352 error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl, 7353 plsz, seg, addr, rw, cr); 7354 goto out; 7355 } 7356 7357 /* XXX sam: make this do an async populate? */ 7358 if (pl == NULL) { 7359 error = 0; 7360 goto out; 7361 } 7362 if (protp != NULL) 7363 *protp = PROT_ALL; 7364 7365 for (;;) { 7366 /* get (or renew) access to the file system */ 7367 if (held) { 7368 cachefs_cd_release(fscp); 7369 held = 0; 7370 } 7371 error = cachefs_cd_access(fscp, connected, 0); 7372 if (error) 7373 break; 7374 held = 1; 7375 7376 /* 7377 * If we are getting called as a side effect of a 7378 * cachefs_write() 7379 * operation the local file size might not be extended yet. 7380 * In this case we want to be able to return pages of zeroes. 7381 */ 7382 if ((u_offset_t)off + len > 7383 ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) { 7384 if (seg != segkmap) { 7385 error = EFAULT; 7386 break; 7387 } 7388 } 7389 if (len <= PAGESIZE) 7390 error = cachefs_getapage(vp, (u_offset_t)off, len, 7391 protp, pl, plsz, seg, addr, rw, cr); 7392 else 7393 error = pvn_getpages(cachefs_getapage, vp, 7394 (u_offset_t)off, len, protp, pl, plsz, seg, addr, 7395 rw, cr); 7396 if (error == 0) 7397 break; 7398 7399 if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) || 7400 error == EAGAIN) { 7401 connected = 0; 7402 continue; 7403 } 7404 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 7405 if (CFS_TIMEOUT(fscp, error)) { 7406 cachefs_cd_release(fscp); 7407 held = 0; 7408 cachefs_cd_timedout(fscp); 7409 connected = 0; 7410 continue; 7411 } 7412 } else { 7413 if (CFS_TIMEOUT(fscp, error)) { 7414 if (cachefs_cd_access_miss(fscp)) { 7415 if (len <= PAGESIZE) 7416 error = cachefs_getapage_back( 7417 vp, (u_offset_t)off, 7418 len, protp, pl, 7419 plsz, seg, addr, rw, cr); 7420 else 7421 error = pvn_getpages( 7422 cachefs_getapage_back, vp, 7423 (u_offset_t)off, len, 7424 protp, pl, 7425 plsz, seg, addr, rw, cr); 7426 if (!CFS_TIMEOUT(fscp, error) && 7427 (error != EAGAIN)) 7428 break; 7429 delay(5*hz); 7430 connected = 0; 7431 continue; 7432 } 7433 connected = 1; 7434 continue; 7435 } 7436 } 7437 break; 7438 } 7439 7440 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE)) 7441 cachefs_log_getpage(cachep, error, vp->v_vfsp, 7442 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 7443 crgetuid(cr), off, len); 7444 7445 if (held) { 7446 cachefs_cd_release(fscp); 7447 } 7448 7449 out: 7450 #ifdef CFS_CD_DEBUG 7451 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7452 #endif 7453 #ifdef CFSDEBUG 7454 CFS_DEBUG(CFSDEBUG_VOPS) 7455 printf("cachefs_getpage: EXIT vp %p error %d\n", 7456 (void *)vp, error); 7457 #endif 7458 return (error); 7459 } 7460 7461 /* 7462 * cachefs_getpage_backfs_nfsv4 7463 * 7464 * Call NFSv4 back filesystem to handle the getpage (cachefs 7465 * pass-through support for NFSv4). 7466 */ 7467 static int 7468 cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len, 7469 uint_t *protp, struct page *pl[], size_t plsz, 7470 struct seg *seg, caddr_t addr, enum seg_rw rw, 7471 cred_t *cr) 7472 { 7473 cnode_t *cp = VTOC(vp); 7474 fscache_t *fscp = C_TO_FSCACHE(cp); 7475 vnode_t *backvp; 7476 int error; 7477 7478 /* 7479 * For NFSv4 pass-through to work, only connected operation is 7480 * supported, the cnode backvp must exist, and cachefs optional 7481 * (eg., disconnectable) flags are turned off. Assert these 7482 * conditions for the getpage operation. 7483 */ 7484 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7485 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7486 7487 /* Call backfs vnode op after extracting backvp */ 7488 mutex_enter(&cp->c_statelock); 7489 backvp = cp->c_backvp; 7490 mutex_exit(&cp->c_statelock); 7491 7492 CFS_DPRINT_BACKFS_NFSV4(fscp, 7493 ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n", 7494 cp, backvp)); 7495 error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg, 7496 addr, rw, cr, NULL); 7497 7498 return (error); 7499 } 7500 7501 /* 7502 * Called from pvn_getpages or cachefs_getpage to get a particular page. 7503 */ 7504 /*ARGSUSED*/ 7505 static int 7506 cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp, 7507 struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr, 7508 enum seg_rw rw, cred_t *cr) 7509 { 7510 cnode_t *cp = VTOC(vp); 7511 page_t **ppp, *pp = NULL; 7512 fscache_t *fscp = C_TO_FSCACHE(cp); 7513 cachefscache_t *cachep = fscp->fs_cache; 7514 int error = 0; 7515 struct page **ourpl; 7516 struct page *ourstackpl[17]; /* see ASSERT() below for 17 */ 7517 int index = 0; 7518 int downgrade; 7519 int have_statelock = 0; 7520 u_offset_t popoff; 7521 size_t popsize = 0; 7522 7523 /*LINTED*/ 7524 ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17); 7525 7526 if (fscp->fs_info.fi_popsize > DEF_POP_SIZE) 7527 ourpl = cachefs_kmem_alloc(sizeof (struct page *) * 7528 ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP); 7529 else 7530 ourpl = ourstackpl; 7531 7532 ourpl[0] = NULL; 7533 off = off & (offset_t)PAGEMASK; 7534 again: 7535 /* 7536 * Look for the page 7537 */ 7538 if (page_exists(vp, off) == 0) { 7539 /* 7540 * Need to do work to get the page. 7541 * Grab our lock because we are going to 7542 * modify the state of the cnode. 7543 */ 7544 if (! have_statelock) { 7545 mutex_enter(&cp->c_statelock); 7546 have_statelock = 1; 7547 } 7548 /* 7549 * If we're in NOCACHE mode, we will need a backvp 7550 */ 7551 if (cp->c_flags & CN_NOCACHE) { 7552 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 7553 error = ETIMEDOUT; 7554 goto out; 7555 } 7556 if (cp->c_backvp == NULL) { 7557 error = cachefs_getbackvp(fscp, cp); 7558 if (error) 7559 goto out; 7560 } 7561 error = VOP_GETPAGE(cp->c_backvp, off, 7562 PAGESIZE, protp, ourpl, PAGESIZE, seg, 7563 addr, S_READ, cr, NULL); 7564 /* 7565 * backfs returns EFAULT when we are trying for a 7566 * page beyond EOF but cachefs has the knowledge that 7567 * it is not beyond EOF be cause cp->c_size is 7568 * greater then the offset requested. 7569 */ 7570 if (error == EFAULT) { 7571 error = 0; 7572 pp = page_create_va(vp, off, PAGESIZE, 7573 PG_EXCL | PG_WAIT, seg, addr); 7574 if (pp == NULL) 7575 goto again; 7576 pagezero(pp, 0, PAGESIZE); 7577 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 7578 goto out; 7579 } 7580 if (error) 7581 goto out; 7582 goto getpages; 7583 } 7584 /* 7585 * We need a front file. If we can't get it, 7586 * put the cnode in NOCACHE mode and try again. 7587 */ 7588 if (cp->c_frontvp == NULL) { 7589 error = cachefs_getfrontfile(cp); 7590 if (error) { 7591 cachefs_nocache(cp); 7592 error = EAGAIN; 7593 goto out; 7594 } 7595 } 7596 /* 7597 * Check if the front file needs population. 7598 * If population is necessary, make sure we have a 7599 * backvp as well. We will get the page from the backvp. 7600 * bug 4152459- 7601 * But if the file system is in disconnected mode 7602 * and the file is a local file then do not check the 7603 * allocmap. 7604 */ 7605 if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) || 7606 ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) && 7607 (cachefs_check_allocmap(cp, off) == 0)) { 7608 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 7609 error = ETIMEDOUT; 7610 goto out; 7611 } 7612 if (cp->c_backvp == NULL) { 7613 error = cachefs_getbackvp(fscp, cp); 7614 if (error) 7615 goto out; 7616 } 7617 if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) { 7618 cachefs_cluster_allocmap(off, &popoff, 7619 &popsize, 7620 fscp->fs_info.fi_popsize, cp); 7621 if (popsize != 0) { 7622 error = cachefs_populate(cp, 7623 popoff, popsize, 7624 cp->c_frontvp, cp->c_backvp, 7625 cp->c_size, cr); 7626 if (error) { 7627 cachefs_nocache(cp); 7628 error = EAGAIN; 7629 goto out; 7630 } else { 7631 cp->c_flags |= 7632 CN_UPDATED | 7633 CN_NEED_FRONT_SYNC | 7634 CN_POPULATION_PENDING; 7635 } 7636 popsize = popsize - (off - popoff); 7637 } else { 7638 popsize = PAGESIZE; 7639 } 7640 } 7641 /* else XXX assert CN_NOCACHE? */ 7642 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off, 7643 PAGESIZE, protp, ourpl, popsize, 7644 seg, addr, S_READ, cr, NULL); 7645 if (error) 7646 goto out; 7647 fscp->fs_stats.st_misses++; 7648 } else { 7649 if (cp->c_flags & CN_POPULATION_PENDING) { 7650 error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr, 7651 NULL); 7652 cp->c_flags &= ~CN_POPULATION_PENDING; 7653 if (error) { 7654 cachefs_nocache(cp); 7655 error = EAGAIN; 7656 goto out; 7657 } 7658 } 7659 /* 7660 * File was populated so we get the page from the 7661 * frontvp 7662 */ 7663 error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off, 7664 PAGESIZE, protp, ourpl, PAGESIZE, seg, addr, 7665 rw, cr, NULL); 7666 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT)) 7667 cachefs_log_gpfront(cachep, error, 7668 fscp->fs_cfsvfsp, 7669 &cp->c_metadata.md_cookie, cp->c_fileno, 7670 crgetuid(cr), off, PAGESIZE); 7671 if (error) { 7672 cachefs_nocache(cp); 7673 error = EAGAIN; 7674 goto out; 7675 } 7676 fscp->fs_stats.st_hits++; 7677 } 7678 getpages: 7679 ASSERT(have_statelock); 7680 if (have_statelock) { 7681 mutex_exit(&cp->c_statelock); 7682 have_statelock = 0; 7683 } 7684 downgrade = 0; 7685 for (ppp = ourpl; *ppp; ppp++) { 7686 if ((*ppp)->p_offset < off) { 7687 index++; 7688 page_unlock(*ppp); 7689 continue; 7690 } 7691 if (PAGE_SHARED(*ppp)) { 7692 if (page_tryupgrade(*ppp) == 0) { 7693 for (ppp = &ourpl[index]; *ppp; ppp++) 7694 page_unlock(*ppp); 7695 error = EAGAIN; 7696 goto out; 7697 } 7698 downgrade = 1; 7699 } 7700 ASSERT(PAGE_EXCL(*ppp)); 7701 (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD); 7702 page_rename(*ppp, vp, (*ppp)->p_offset); 7703 } 7704 pl[0] = ourpl[index]; 7705 pl[1] = NULL; 7706 if (downgrade) { 7707 page_downgrade(ourpl[index]); 7708 } 7709 /* Unlock the rest of the pages from the cluster */ 7710 for (ppp = &ourpl[index+1]; *ppp; ppp++) 7711 page_unlock(*ppp); 7712 } else { 7713 ASSERT(! have_statelock); 7714 if (have_statelock) { 7715 mutex_exit(&cp->c_statelock); 7716 have_statelock = 0; 7717 } 7718 /* XXX SE_SHARED probably isn't what we *always* want */ 7719 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 7720 cachefs_lostpage++; 7721 goto again; 7722 } 7723 pl[0] = pp; 7724 pl[1] = NULL; 7725 /* XXX increment st_hits? i don't think so, but... */ 7726 } 7727 7728 out: 7729 if (have_statelock) { 7730 mutex_exit(&cp->c_statelock); 7731 have_statelock = 0; 7732 } 7733 if (fscp->fs_info.fi_popsize > DEF_POP_SIZE) 7734 cachefs_kmem_free(ourpl, sizeof (struct page *) * 7735 ((fscp->fs_info.fi_popsize / PAGESIZE) + 1)); 7736 return (error); 7737 } 7738 7739 /* gets a page but only from the back fs */ 7740 /*ARGSUSED*/ 7741 static int 7742 cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len, 7743 uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg, 7744 caddr_t addr, enum seg_rw rw, cred_t *cr) 7745 { 7746 cnode_t *cp = VTOC(vp); 7747 page_t **ppp, *pp = NULL; 7748 fscache_t *fscp = C_TO_FSCACHE(cp); 7749 int error = 0; 7750 struct page *ourpl[17]; 7751 int index = 0; 7752 int have_statelock = 0; 7753 int downgrade; 7754 7755 /* 7756 * Grab the cnode statelock so the cnode state won't change 7757 * while we're in here. 7758 */ 7759 ourpl[0] = NULL; 7760 off = off & (offset_t)PAGEMASK; 7761 again: 7762 if (page_exists(vp, off) == 0) { 7763 if (! have_statelock) { 7764 mutex_enter(&cp->c_statelock); 7765 have_statelock = 1; 7766 } 7767 7768 if (cp->c_backvp == NULL) { 7769 error = cachefs_getbackvp(fscp, cp); 7770 if (error) 7771 goto out; 7772 } 7773 error = VOP_GETPAGE(cp->c_backvp, (offset_t)off, 7774 PAGESIZE, protp, ourpl, PAGESIZE, seg, 7775 addr, S_READ, cr, NULL); 7776 if (error) 7777 goto out; 7778 7779 if (have_statelock) { 7780 mutex_exit(&cp->c_statelock); 7781 have_statelock = 0; 7782 } 7783 downgrade = 0; 7784 for (ppp = ourpl; *ppp; ppp++) { 7785 if ((*ppp)->p_offset < off) { 7786 index++; 7787 page_unlock(*ppp); 7788 continue; 7789 } 7790 if (PAGE_SHARED(*ppp)) { 7791 if (page_tryupgrade(*ppp) == 0) { 7792 for (ppp = &ourpl[index]; *ppp; ppp++) 7793 page_unlock(*ppp); 7794 error = EAGAIN; 7795 goto out; 7796 } 7797 downgrade = 1; 7798 } 7799 ASSERT(PAGE_EXCL(*ppp)); 7800 (void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD); 7801 page_rename(*ppp, vp, (*ppp)->p_offset); 7802 } 7803 pl[0] = ourpl[index]; 7804 pl[1] = NULL; 7805 if (downgrade) { 7806 page_downgrade(ourpl[index]); 7807 } 7808 /* Unlock the rest of the pages from the cluster */ 7809 for (ppp = &ourpl[index+1]; *ppp; ppp++) 7810 page_unlock(*ppp); 7811 } else { 7812 ASSERT(! have_statelock); 7813 if (have_statelock) { 7814 mutex_exit(&cp->c_statelock); 7815 have_statelock = 0; 7816 } 7817 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) { 7818 cachefs_lostpage++; 7819 goto again; 7820 } 7821 pl[0] = pp; 7822 pl[1] = NULL; 7823 } 7824 7825 out: 7826 if (have_statelock) { 7827 mutex_exit(&cp->c_statelock); 7828 have_statelock = 0; 7829 } 7830 return (error); 7831 } 7832 7833 /*ARGSUSED*/ 7834 static int 7835 cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 7836 caller_context_t *ct) 7837 { 7838 cnode_t *cp = VTOC(vp); 7839 int error = 0; 7840 fscache_t *fscp = C_TO_FSCACHE(cp); 7841 int held = 0; 7842 int connected = 0; 7843 7844 if (getzoneid() != GLOBAL_ZONEID) 7845 return (EPERM); 7846 7847 /* Call backfilesytem if NFSv4 */ 7848 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 7849 error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr); 7850 goto out; 7851 } 7852 7853 for (;;) { 7854 /* get (or renew) access to the file system */ 7855 if (held) { 7856 cachefs_cd_release(fscp); 7857 held = 0; 7858 } 7859 error = cachefs_cd_access(fscp, connected, 1); 7860 if (error) 7861 break; 7862 held = 1; 7863 7864 error = cachefs_putpage_common(vp, off, len, flags, cr); 7865 if (error == 0) 7866 break; 7867 7868 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 7869 if (CFS_TIMEOUT(fscp, error)) { 7870 cachefs_cd_release(fscp); 7871 held = 0; 7872 cachefs_cd_timedout(fscp); 7873 connected = 0; 7874 continue; 7875 } 7876 } else { 7877 if (NOMEMWAIT()) { 7878 error = 0; 7879 goto out; 7880 } 7881 if (CFS_TIMEOUT(fscp, error)) { 7882 connected = 1; 7883 continue; 7884 } 7885 } 7886 break; 7887 } 7888 7889 out: 7890 7891 if (held) { 7892 cachefs_cd_release(fscp); 7893 } 7894 7895 #ifdef CFS_CD_DEBUG 7896 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 7897 #endif 7898 return (error); 7899 } 7900 7901 /* 7902 * cachefs_putpage_backfs_nfsv4 7903 * 7904 * Call NFSv4 back filesystem to handle the putpage (cachefs 7905 * pass-through support for NFSv4). 7906 */ 7907 static int 7908 cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags, 7909 cred_t *cr) 7910 { 7911 cnode_t *cp = VTOC(vp); 7912 fscache_t *fscp = C_TO_FSCACHE(cp); 7913 vnode_t *backvp; 7914 int error; 7915 7916 /* 7917 * For NFSv4 pass-through to work, only connected operation is 7918 * supported, the cnode backvp must exist, and cachefs optional 7919 * (eg., disconnectable) flags are turned off. Assert these 7920 * conditions for the putpage operation. 7921 */ 7922 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 7923 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 7924 7925 /* Call backfs vnode op after extracting backvp */ 7926 mutex_enter(&cp->c_statelock); 7927 backvp = cp->c_backvp; 7928 mutex_exit(&cp->c_statelock); 7929 7930 CFS_DPRINT_BACKFS_NFSV4(fscp, 7931 ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n", 7932 cp, backvp)); 7933 error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL); 7934 7935 return (error); 7936 } 7937 7938 /* 7939 * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE} 7940 * If len == 0, do from off to EOF. 7941 * 7942 * The normal cases should be len == 0 & off == 0 (entire vp list), 7943 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE 7944 * (from pageout). 7945 */ 7946 7947 /*ARGSUSED*/ 7948 int 7949 cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len, 7950 int flags, cred_t *cr) 7951 { 7952 struct cnode *cp = VTOC(vp); 7953 struct page *pp; 7954 size_t io_len; 7955 u_offset_t eoff, io_off; 7956 int error = 0; 7957 fscache_t *fscp = C_TO_FSCACHE(cp); 7958 cachefscache_t *cachep = fscp->fs_cache; 7959 7960 if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) { 7961 return (0); 7962 } 7963 if (!vn_has_cached_data(vp) || (off >= cp->c_size && 7964 (flags & B_INVAL) == 0)) 7965 return (0); 7966 7967 /* 7968 * Should never have cached data for the cachefs vnode 7969 * if NFSv4 is in use. 7970 */ 7971 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 7972 7973 /* 7974 * If this is an async putpage let a thread handle it. 7975 */ 7976 if (flags & B_ASYNC) { 7977 struct cachefs_req *rp; 7978 int tflags = (flags & ~(B_ASYNC|B_DONTNEED)); 7979 7980 if (ttoproc(curthread) == proc_pageout) { 7981 /* 7982 * If this is the page daemon we 7983 * do the push synchronously (Dangerous!) and hope 7984 * we can free enough to keep running... 7985 */ 7986 flags &= ~B_ASYNC; 7987 goto again; 7988 } 7989 7990 if (! cachefs_async_okay()) { 7991 7992 /* 7993 * this is somewhat like NFS's behavior. keep 7994 * the system from thrashing. we've seen 7995 * cases where async queues get out of 7996 * control, especially if 7997 * madvise(MADV_SEQUENTIAL) is done on a large 7998 * mmap()ed file that is read sequentially. 7999 */ 8000 8001 flags &= ~B_ASYNC; 8002 goto again; 8003 } 8004 8005 /* 8006 * if no flags other than B_ASYNC were set, 8007 * we coalesce putpage requests into a single one for the 8008 * whole file (len = off = 0). If such a request is 8009 * already queued, we're done. 8010 * 8011 * If there are other flags set (e.g., B_INVAL), we don't 8012 * attempt to coalesce and we use the specified length and 8013 * offset. 8014 */ 8015 rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP); 8016 mutex_enter(&cp->c_iomutex); 8017 if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) { 8018 rp->cfs_cmd = CFS_PUTPAGE; 8019 rp->cfs_req_u.cu_putpage.cp_vp = vp; 8020 if (tflags == 0) { 8021 off = len = 0; 8022 cp->c_ioflags |= CIO_PUTPAGES; 8023 } 8024 rp->cfs_req_u.cu_putpage.cp_off = off; 8025 rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len; 8026 rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC; 8027 rp->cfs_cr = cr; 8028 crhold(rp->cfs_cr); 8029 VN_HOLD(vp); 8030 cp->c_nio++; 8031 cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq)); 8032 } else { 8033 kmem_cache_free(cachefs_req_cache, rp); 8034 } 8035 8036 mutex_exit(&cp->c_iomutex); 8037 return (0); 8038 } 8039 8040 8041 again: 8042 if (len == 0) { 8043 /* 8044 * Search the entire vp list for pages >= off 8045 */ 8046 error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr); 8047 } else { 8048 /* 8049 * Do a range from [off...off + len] looking for pages 8050 * to deal with. 8051 */ 8052 eoff = (u_offset_t)off + len; 8053 for (io_off = off; io_off < eoff && io_off < cp->c_size; 8054 io_off += io_len) { 8055 /* 8056 * If we are not invalidating, synchronously 8057 * freeing or writing pages use the routine 8058 * page_lookup_nowait() to prevent reclaiming 8059 * them from the free list. 8060 */ 8061 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 8062 pp = page_lookup(vp, io_off, 8063 (flags & (B_INVAL | B_FREE)) ? 8064 SE_EXCL : SE_SHARED); 8065 } else { 8066 /* XXX this looks like dead code */ 8067 pp = page_lookup_nowait(vp, io_off, 8068 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 8069 } 8070 8071 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 8072 io_len = PAGESIZE; 8073 else { 8074 error = cachefs_push(vp, pp, &io_off, 8075 &io_len, flags, cr); 8076 if (error != 0) 8077 break; 8078 /* 8079 * "io_off" and "io_len" are returned as 8080 * the range of pages we actually wrote. 8081 * This allows us to skip ahead more quickly 8082 * since several pages may've been dealt 8083 * with by this iteration of the loop. 8084 */ 8085 } 8086 } 8087 } 8088 8089 if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) { 8090 cp->c_flags &= ~CDIRTY; 8091 } 8092 8093 if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE)) 8094 cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp, 8095 &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, 8096 crgetuid(cr), off, len); 8097 8098 return (error); 8099 8100 } 8101 8102 /*ARGSUSED*/ 8103 static int 8104 cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp, 8105 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr, 8106 caller_context_t *ct) 8107 { 8108 cnode_t *cp = VTOC(vp); 8109 fscache_t *fscp = C_TO_FSCACHE(cp); 8110 struct segvn_crargs vn_a; 8111 int error; 8112 int held = 0; 8113 int writing; 8114 int connected = 0; 8115 8116 #ifdef CFSDEBUG 8117 u_offset_t offx = (u_offset_t)off; 8118 8119 CFS_DEBUG(CFSDEBUG_VOPS) 8120 printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n", 8121 (void *)vp, offx, len, flags); 8122 #endif 8123 if (getzoneid() != GLOBAL_ZONEID) { 8124 error = EPERM; 8125 goto out; 8126 } 8127 8128 if (vp->v_flag & VNOMAP) { 8129 error = ENOSYS; 8130 goto out; 8131 } 8132 if (off < 0 || (offset_t)(off + len) < 0) { 8133 error = ENXIO; 8134 goto out; 8135 } 8136 if (vp->v_type != VREG) { 8137 error = ENODEV; 8138 goto out; 8139 } 8140 8141 /* 8142 * Check to see if the vnode is currently marked as not cachable. 8143 * If so, we have to refuse the map request as this violates the 8144 * don't cache attribute. 8145 */ 8146 if (vp->v_flag & VNOCACHE) 8147 return (EAGAIN); 8148 8149 #ifdef OBSOLETE 8150 /* 8151 * If file is being locked, disallow mapping. 8152 */ 8153 if (vn_has_flocks(vp)) { 8154 error = EAGAIN; 8155 goto out; 8156 } 8157 #endif 8158 8159 /* call backfilesystem if NFSv4 */ 8160 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 8161 error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot, 8162 maxprot, flags, cr); 8163 goto out; 8164 } 8165 8166 writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0)); 8167 8168 for (;;) { 8169 /* get (or renew) access to the file system */ 8170 if (held) { 8171 cachefs_cd_release(fscp); 8172 held = 0; 8173 } 8174 error = cachefs_cd_access(fscp, connected, writing); 8175 if (error) 8176 break; 8177 held = 1; 8178 8179 if (writing) { 8180 mutex_enter(&cp->c_statelock); 8181 if (CFS_ISFS_WRITE_AROUND(fscp)) { 8182 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 8183 connected = 1; 8184 continue; 8185 } else { 8186 cachefs_nocache(cp); 8187 } 8188 } 8189 8190 /* 8191 * CN_MAPWRITE is for an optimization in cachefs_delmap. 8192 * If CN_MAPWRITE is not set then cachefs_delmap does 8193 * not need to try to push out any pages. 8194 * This bit gets cleared when the cnode goes inactive. 8195 */ 8196 cp->c_flags |= CN_MAPWRITE; 8197 8198 mutex_exit(&cp->c_statelock); 8199 } 8200 break; 8201 } 8202 8203 if (held) { 8204 cachefs_cd_release(fscp); 8205 } 8206 8207 as_rangelock(as); 8208 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 8209 if (error != 0) { 8210 as_rangeunlock(as); 8211 goto out; 8212 } 8213 8214 /* 8215 * package up all the data passed in into a segvn_args struct and 8216 * call as_map with segvn_create function to create a new segment 8217 * in the address space. 8218 */ 8219 vn_a.vp = vp; 8220 vn_a.offset = off; 8221 vn_a.type = flags & MAP_TYPE; 8222 vn_a.prot = (uchar_t)prot; 8223 vn_a.maxprot = (uchar_t)maxprot; 8224 vn_a.cred = cr; 8225 vn_a.amp = NULL; 8226 vn_a.flags = flags & ~MAP_TYPE; 8227 vn_a.szc = 0; 8228 vn_a.lgrp_mem_policy_flags = 0; 8229 error = as_map(as, *addrp, len, segvn_create, &vn_a); 8230 as_rangeunlock(as); 8231 out: 8232 8233 #ifdef CFS_CD_DEBUG 8234 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8235 #endif 8236 #ifdef CFSDEBUG 8237 CFS_DEBUG(CFSDEBUG_VOPS) 8238 printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error); 8239 #endif 8240 return (error); 8241 } 8242 8243 /* 8244 * cachefs_map_backfs_nfsv4 8245 * 8246 * Call NFSv4 back filesystem to handle the map (cachefs 8247 * pass-through support for NFSv4). 8248 */ 8249 static int 8250 cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as, 8251 caddr_t *addrp, size_t len, uchar_t prot, 8252 uchar_t maxprot, uint_t flags, cred_t *cr) 8253 { 8254 cnode_t *cp = VTOC(vp); 8255 fscache_t *fscp = C_TO_FSCACHE(cp); 8256 vnode_t *backvp; 8257 int error; 8258 8259 /* 8260 * For NFSv4 pass-through to work, only connected operation is 8261 * supported, the cnode backvp must exist, and cachefs optional 8262 * (eg., disconnectable) flags are turned off. Assert these 8263 * conditions for the map operation. 8264 */ 8265 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8266 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8267 8268 /* Call backfs vnode op after extracting backvp */ 8269 mutex_enter(&cp->c_statelock); 8270 backvp = cp->c_backvp; 8271 mutex_exit(&cp->c_statelock); 8272 8273 CFS_DPRINT_BACKFS_NFSV4(fscp, 8274 ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n", 8275 cp, backvp)); 8276 error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr, 8277 NULL); 8278 8279 return (error); 8280 } 8281 8282 /*ARGSUSED*/ 8283 static int 8284 cachefs_addmap(struct vnode *vp, offset_t off, struct as *as, 8285 caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 8286 cred_t *cr, caller_context_t *ct) 8287 { 8288 cnode_t *cp = VTOC(vp); 8289 fscache_t *fscp = C_TO_FSCACHE(cp); 8290 8291 if (getzoneid() != GLOBAL_ZONEID) 8292 return (EPERM); 8293 8294 if (vp->v_flag & VNOMAP) 8295 return (ENOSYS); 8296 8297 /* 8298 * Check this is not an NFSv4 filesystem, as the mapping 8299 * is not done on the cachefs filesystem if NFSv4 is in 8300 * use. 8301 */ 8302 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8303 8304 mutex_enter(&cp->c_statelock); 8305 cp->c_mapcnt += btopr(len); 8306 mutex_exit(&cp->c_statelock); 8307 return (0); 8308 } 8309 8310 /*ARGSUSED*/ 8311 static int 8312 cachefs_delmap(struct vnode *vp, offset_t off, struct as *as, 8313 caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags, 8314 cred_t *cr, caller_context_t *ct) 8315 { 8316 cnode_t *cp = VTOC(vp); 8317 fscache_t *fscp = C_TO_FSCACHE(cp); 8318 int error; 8319 int connected = 0; 8320 int held = 0; 8321 8322 /* 8323 * The file may be passed in to (or inherited into) the zone, so we 8324 * need to let this operation go through since it happens as part of 8325 * exiting. 8326 */ 8327 if (vp->v_flag & VNOMAP) 8328 return (ENOSYS); 8329 8330 /* 8331 * Check this is not an NFSv4 filesystem, as the mapping 8332 * is not done on the cachefs filesystem if NFSv4 is in 8333 * use. 8334 */ 8335 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8336 8337 mutex_enter(&cp->c_statelock); 8338 cp->c_mapcnt -= btopr(len); 8339 ASSERT(cp->c_mapcnt >= 0); 8340 mutex_exit(&cp->c_statelock); 8341 8342 if (cp->c_mapcnt || !vn_has_cached_data(vp) || 8343 ((cp->c_flags & CN_MAPWRITE) == 0)) 8344 return (0); 8345 8346 for (;;) { 8347 /* get (or renew) access to the file system */ 8348 if (held) { 8349 cachefs_cd_release(fscp); 8350 held = 0; 8351 } 8352 error = cachefs_cd_access(fscp, connected, 1); 8353 if (error) 8354 break; 8355 held = 1; 8356 connected = 0; 8357 8358 error = cachefs_putpage_common(vp, (offset_t)0, 8359 (uint_t)0, 0, cr); 8360 if (CFS_TIMEOUT(fscp, error)) { 8361 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8362 cachefs_cd_release(fscp); 8363 held = 0; 8364 cachefs_cd_timedout(fscp); 8365 continue; 8366 } else { 8367 connected = 1; 8368 continue; 8369 } 8370 } 8371 8372 /* if no space left in cache, wait until connected */ 8373 if ((error == ENOSPC) && 8374 (fscp->fs_cdconnected != CFS_CD_CONNECTED)) { 8375 connected = 1; 8376 continue; 8377 } 8378 8379 mutex_enter(&cp->c_statelock); 8380 if (!error) 8381 error = cp->c_error; 8382 cp->c_error = 0; 8383 mutex_exit(&cp->c_statelock); 8384 break; 8385 } 8386 8387 if (held) 8388 cachefs_cd_release(fscp); 8389 8390 #ifdef CFS_CD_DEBUG 8391 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8392 #endif 8393 return (error); 8394 } 8395 8396 /* ARGSUSED */ 8397 static int 8398 cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 8399 offset_t offset, struct flk_callback *flk_cbp, cred_t *cr, 8400 caller_context_t *ct) 8401 { 8402 struct cnode *cp = VTOC(vp); 8403 int error; 8404 struct fscache *fscp = C_TO_FSCACHE(cp); 8405 vnode_t *backvp; 8406 int held = 0; 8407 int connected = 0; 8408 8409 if (getzoneid() != GLOBAL_ZONEID) 8410 return (EPERM); 8411 8412 if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW)) 8413 return (EINVAL); 8414 8415 /* Disallow locking of files that are currently mapped */ 8416 if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) { 8417 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8418 return (EAGAIN); 8419 } 8420 8421 /* 8422 * Cachefs only provides pass-through support for NFSv4, 8423 * and all vnode operations are passed through to the 8424 * back file system. For NFSv4 pass-through to work, only 8425 * connected operation is supported, the cnode backvp must 8426 * exist, and cachefs optional (eg., disconnectable) flags 8427 * are turned off. Assert these conditions to ensure that 8428 * the backfilesystem is called for the frlock operation. 8429 */ 8430 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8431 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8432 8433 /* XXX bob: nfs does a bunch more checks than we do */ 8434 if (CFS_ISFS_LLOCK(fscp)) { 8435 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8436 return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 8437 } 8438 8439 for (;;) { 8440 /* get (or renew) access to the file system */ 8441 if (held) { 8442 /* Won't loop with NFSv4 connected behavior */ 8443 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8444 cachefs_cd_release(fscp); 8445 held = 0; 8446 } 8447 error = cachefs_cd_access(fscp, connected, 0); 8448 if (error) 8449 break; 8450 held = 1; 8451 8452 /* if not connected, quit or wait */ 8453 if (fscp->fs_cdconnected != CFS_CD_CONNECTED) { 8454 connected = 1; 8455 continue; 8456 } 8457 8458 /* nocache the file */ 8459 if ((cp->c_flags & CN_NOCACHE) == 0 && 8460 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8461 mutex_enter(&cp->c_statelock); 8462 cachefs_nocache(cp); 8463 mutex_exit(&cp->c_statelock); 8464 } 8465 8466 /* 8467 * XXX bob: probably should do a consistency check 8468 * Pass arguments unchanged if NFSv4 is the backfs. 8469 */ 8470 if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) { 8471 bfp->l_start += cp->c_size; 8472 bfp->l_whence = 0; 8473 } 8474 8475 /* get the back vp */ 8476 mutex_enter(&cp->c_statelock); 8477 if (cp->c_backvp == NULL) { 8478 error = cachefs_getbackvp(fscp, cp); 8479 if (error) { 8480 mutex_exit(&cp->c_statelock); 8481 break; 8482 } 8483 } 8484 backvp = cp->c_backvp; 8485 VN_HOLD(backvp); 8486 mutex_exit(&cp->c_statelock); 8487 8488 /* 8489 * make sure we can flush currently dirty pages before 8490 * allowing the lock 8491 */ 8492 if (bfp->l_type != F_UNLCK && cmd != F_GETLK && 8493 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8494 error = cachefs_putpage( 8495 vp, (offset_t)0, 0, B_INVAL, cr, ct); 8496 if (error) { 8497 error = ENOLCK; 8498 VN_RELE(backvp); 8499 break; 8500 } 8501 } 8502 8503 /* do lock on the back file */ 8504 CFS_DPRINT_BACKFS_NFSV4(fscp, 8505 ("cachefs_frlock (nfsv4): cp %p, backvp %p\n", 8506 cp, backvp)); 8507 error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr, 8508 ct); 8509 VN_RELE(backvp); 8510 if (CFS_TIMEOUT(fscp, error)) { 8511 connected = 1; 8512 continue; 8513 } 8514 break; 8515 } 8516 8517 if (held) { 8518 cachefs_cd_release(fscp); 8519 } 8520 8521 /* 8522 * If we are setting a lock mark the vnode VNOCACHE so the page 8523 * cache does not give inconsistent results on locked files shared 8524 * between clients. The VNOCACHE flag is never turned off as long 8525 * as the vnode is active because it is hard to figure out when the 8526 * last lock is gone. 8527 * XXX - what if some already has the vnode mapped in? 8528 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in. 8529 */ 8530 if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) && 8531 !CFS_ISFS_BACKFS_NFSV4(fscp)) 8532 vp->v_flag |= VNOCACHE; 8533 8534 #ifdef CFS_CD_DEBUG 8535 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8536 #endif 8537 return (error); 8538 } 8539 8540 /* 8541 * Free storage space associated with the specified vnode. The portion 8542 * to be freed is specified by bfp->l_start and bfp->l_len (already 8543 * normalized to a "whence" of 0). 8544 * 8545 * This is an experimental facility whose continued existence is not 8546 * guaranteed. Currently, we only support the special case 8547 * of l_len == 0, meaning free to end of file. 8548 */ 8549 /* ARGSUSED */ 8550 static int 8551 cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 8552 offset_t offset, cred_t *cr, caller_context_t *ct) 8553 { 8554 cnode_t *cp = VTOC(vp); 8555 fscache_t *fscp = C_TO_FSCACHE(cp); 8556 int error; 8557 8558 ASSERT(vp->v_type == VREG); 8559 if (getzoneid() != GLOBAL_ZONEID) 8560 return (EPERM); 8561 if (cmd != F_FREESP) 8562 return (EINVAL); 8563 8564 /* call backfilesystem if NFSv4 */ 8565 if (CFS_ISFS_BACKFS_NFSV4(fscp)) { 8566 error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag, 8567 offset, cr, ct); 8568 goto out; 8569 } 8570 8571 if ((error = convoff(vp, bfp, 0, offset)) == 0) { 8572 ASSERT(bfp->l_start >= 0); 8573 if (bfp->l_len == 0) { 8574 struct vattr va; 8575 8576 va.va_size = bfp->l_start; 8577 va.va_mask = AT_SIZE; 8578 error = cachefs_setattr(vp, &va, 0, cr, ct); 8579 } else 8580 error = EINVAL; 8581 } 8582 8583 out: 8584 return (error); 8585 } 8586 8587 /* 8588 * cachefs_space_backfs_nfsv4 8589 * 8590 * Call NFSv4 back filesystem to handle the space (cachefs 8591 * pass-through support for NFSv4). 8592 */ 8593 static int 8594 cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp, 8595 int flag, offset_t offset, cred_t *cr, caller_context_t *ct) 8596 { 8597 cnode_t *cp = VTOC(vp); 8598 fscache_t *fscp = C_TO_FSCACHE(cp); 8599 vnode_t *backvp; 8600 int error; 8601 8602 /* 8603 * For NFSv4 pass-through to work, only connected operation is 8604 * supported, the cnode backvp must exist, and cachefs optional 8605 * (eg., disconnectable) flags are turned off. Assert these 8606 * conditions for the space operation. 8607 */ 8608 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8609 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8610 8611 /* Call backfs vnode op after extracting backvp */ 8612 mutex_enter(&cp->c_statelock); 8613 backvp = cp->c_backvp; 8614 mutex_exit(&cp->c_statelock); 8615 8616 CFS_DPRINT_BACKFS_NFSV4(fscp, 8617 ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n", 8618 cp, backvp)); 8619 error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct); 8620 8621 return (error); 8622 } 8623 8624 /*ARGSUSED*/ 8625 static int 8626 cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct) 8627 { 8628 return (EINVAL); 8629 } 8630 8631 /*ARGSUSED*/ 8632 static int 8633 cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len, 8634 int flags, cred_t *cr, caller_context_t *ct) 8635 { 8636 return (ENOSYS); 8637 } 8638 8639 static int 8640 cachefs_setsecattr_connected(cnode_t *cp, 8641 vsecattr_t *vsec, int flag, cred_t *cr) 8642 { 8643 fscache_t *fscp = C_TO_FSCACHE(cp); 8644 int error = 0; 8645 8646 ASSERT(RW_WRITE_HELD(&cp->c_rwlock)); 8647 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 8648 8649 mutex_enter(&cp->c_statelock); 8650 8651 if (cp->c_backvp == NULL) { 8652 error = cachefs_getbackvp(fscp, cp); 8653 if (error) { 8654 cachefs_nocache(cp); 8655 goto out; 8656 } 8657 } 8658 8659 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 8660 if (error) 8661 goto out; 8662 8663 /* only owner can set acl */ 8664 if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) { 8665 error = EINVAL; 8666 goto out; 8667 } 8668 8669 8670 CFS_DPRINT_BACKFS_NFSV4(fscp, 8671 ("cachefs_setsecattr (nfsv4): cp %p, backvp %p", 8672 cp, cp->c_backvp)); 8673 error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL); 8674 if (error) { 8675 goto out; 8676 } 8677 8678 if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 && 8679 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8680 cachefs_nocache(cp); 8681 goto out; 8682 } 8683 8684 CFSOP_MODIFY_COBJECT(fscp, cp, cr); 8685 8686 /* acl may have changed permissions -- handle this. */ 8687 if (!CFS_ISFS_BACKFS_NFSV4(fscp)) 8688 cachefs_acl2perm(cp, vsec); 8689 8690 if ((cp->c_flags & CN_NOCACHE) == 0 && 8691 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 8692 error = cachefs_cacheacl(cp, vsec); 8693 if (error != 0) { 8694 #ifdef CFSDEBUG 8695 CFS_DEBUG(CFSDEBUG_VOPS) 8696 printf("cachefs_setacl: cacheacl: error %d\n", 8697 error); 8698 #endif /* CFSDEBUG */ 8699 error = 0; 8700 cachefs_nocache(cp); 8701 } 8702 } 8703 8704 out: 8705 mutex_exit(&cp->c_statelock); 8706 8707 return (error); 8708 } 8709 8710 static int 8711 cachefs_setsecattr_disconnected(cnode_t *cp, 8712 vsecattr_t *vsec, int flag, cred_t *cr) 8713 { 8714 fscache_t *fscp = C_TO_FSCACHE(cp); 8715 mode_t failmode = cp->c_metadata.md_vattr.va_mode; 8716 off_t commit = 0; 8717 int error = 0; 8718 8719 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 8720 8721 if (CFS_ISFS_WRITE_AROUND(fscp)) 8722 return (ETIMEDOUT); 8723 8724 mutex_enter(&cp->c_statelock); 8725 8726 /* only owner can set acl */ 8727 if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) { 8728 error = EINVAL; 8729 goto out; 8730 } 8731 8732 if (cp->c_metadata.md_flags & MD_NEEDATTRS) { 8733 error = ETIMEDOUT; 8734 goto out; 8735 } 8736 8737 /* XXX do i need this? is this right? */ 8738 if (cp->c_flags & CN_ALLOC_PENDING) { 8739 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 8740 (void) filegrp_allocattr(cp->c_filegrp); 8741 } 8742 error = filegrp_create_metadata(cp->c_filegrp, 8743 &cp->c_metadata, &cp->c_id); 8744 if (error) { 8745 goto out; 8746 } 8747 cp->c_flags &= ~CN_ALLOC_PENDING; 8748 } 8749 8750 /* XXX is this right? */ 8751 if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) { 8752 error = cachefs_dlog_cidmap(fscp); 8753 if (error) { 8754 error = ENOSPC; 8755 goto out; 8756 } 8757 cp->c_metadata.md_flags |= MD_MAPPING; 8758 cp->c_flags |= CN_UPDATED; 8759 } 8760 8761 commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr); 8762 if (commit == 0) 8763 goto out; 8764 8765 /* fix modes in metadata */ 8766 cachefs_acl2perm(cp, vsec); 8767 8768 if ((cp->c_flags & CN_NOCACHE) == 0) { 8769 error = cachefs_cacheacl(cp, vsec); 8770 if (error != 0) { 8771 goto out; 8772 } 8773 } 8774 8775 /* XXX is this right? */ 8776 if (cachefs_modified_alloc(cp)) { 8777 error = ENOSPC; 8778 goto out; 8779 } 8780 8781 out: 8782 if (error != 0) 8783 cp->c_metadata.md_vattr.va_mode = failmode; 8784 8785 mutex_exit(&cp->c_statelock); 8786 8787 if (commit) { 8788 if (cachefs_dlog_commit(fscp, commit, error)) { 8789 /*EMPTY*/ 8790 /* XXX fix on panic? */ 8791 } 8792 } 8793 8794 return (error); 8795 } 8796 8797 /*ARGSUSED*/ 8798 static int 8799 cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr, 8800 caller_context_t *ct) 8801 { 8802 cnode_t *cp = VTOC(vp); 8803 fscache_t *fscp = C_TO_FSCACHE(cp); 8804 int connected = 0; 8805 int held = 0; 8806 int error = 0; 8807 8808 #ifdef CFSDEBUG 8809 CFS_DEBUG(CFSDEBUG_VOPS) 8810 printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp); 8811 #endif 8812 if (getzoneid() != GLOBAL_ZONEID) { 8813 error = EPERM; 8814 goto out; 8815 } 8816 8817 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 8818 error = ENOSYS; 8819 goto out; 8820 } 8821 8822 if (! cachefs_vtype_aclok(vp)) { 8823 error = EINVAL; 8824 goto out; 8825 } 8826 8827 /* 8828 * Cachefs only provides pass-through support for NFSv4, 8829 * and all vnode operations are passed through to the 8830 * back file system. For NFSv4 pass-through to work, only 8831 * connected operation is supported, the cnode backvp must 8832 * exist, and cachefs optional (eg., disconnectable) flags 8833 * are turned off. Assert these conditions to ensure that 8834 * the backfilesystem is called for the setsecattr operation. 8835 */ 8836 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8837 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8838 8839 for (;;) { 8840 /* drop hold on file system */ 8841 if (held) { 8842 /* Won't loop with NFSv4 connected operation */ 8843 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8844 cachefs_cd_release(fscp); 8845 held = 0; 8846 } 8847 8848 /* acquire access to the file system */ 8849 error = cachefs_cd_access(fscp, connected, 1); 8850 if (error) 8851 break; 8852 held = 1; 8853 8854 /* perform the setattr */ 8855 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 8856 error = cachefs_setsecattr_connected(cp, 8857 vsec, flag, cr); 8858 else 8859 error = cachefs_setsecattr_disconnected(cp, 8860 vsec, flag, cr); 8861 if (error) { 8862 /* if connected */ 8863 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8864 if (CFS_TIMEOUT(fscp, error)) { 8865 cachefs_cd_release(fscp); 8866 held = 0; 8867 cachefs_cd_timedout(fscp); 8868 connected = 0; 8869 continue; 8870 } 8871 } 8872 8873 /* else must be disconnected */ 8874 else { 8875 if (CFS_TIMEOUT(fscp, error)) { 8876 connected = 1; 8877 continue; 8878 } 8879 } 8880 } 8881 break; 8882 } 8883 8884 if (held) { 8885 cachefs_cd_release(fscp); 8886 } 8887 return (error); 8888 8889 out: 8890 #ifdef CFS_CD_DEBUG 8891 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 8892 #endif 8893 8894 #ifdef CFSDEBUG 8895 CFS_DEBUG(CFSDEBUG_VOPS) 8896 printf("cachefs_setsecattr: EXIT error = %d\n", error); 8897 #endif 8898 return (error); 8899 } 8900 8901 /* 8902 * call this BEFORE calling cachefs_cacheacl(), as the latter will 8903 * sanitize the acl. 8904 */ 8905 8906 static void 8907 cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec) 8908 { 8909 aclent_t *aclp; 8910 int i; 8911 8912 for (i = 0; i < vsec->vsa_aclcnt; i++) { 8913 aclp = ((aclent_t *)vsec->vsa_aclentp) + i; 8914 switch (aclp->a_type) { 8915 case USER_OBJ: 8916 cp->c_metadata.md_vattr.va_mode &= (~0700); 8917 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6); 8918 break; 8919 8920 case GROUP_OBJ: 8921 cp->c_metadata.md_vattr.va_mode &= (~070); 8922 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3); 8923 break; 8924 8925 case OTHER_OBJ: 8926 cp->c_metadata.md_vattr.va_mode &= (~07); 8927 cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm); 8928 break; 8929 8930 case CLASS_OBJ: 8931 cp->c_metadata.md_aclclass = aclp->a_perm; 8932 break; 8933 } 8934 } 8935 8936 cp->c_flags |= CN_UPDATED; 8937 } 8938 8939 static int 8940 cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr, 8941 caller_context_t *ct) 8942 { 8943 cnode_t *cp = VTOC(vp); 8944 fscache_t *fscp = C_TO_FSCACHE(cp); 8945 int held = 0, connected = 0; 8946 int error = 0; 8947 8948 #ifdef CFSDEBUG 8949 CFS_DEBUG(CFSDEBUG_VOPS) 8950 printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp); 8951 #endif 8952 8953 if (getzoneid() != GLOBAL_ZONEID) { 8954 error = EPERM; 8955 goto out; 8956 } 8957 8958 /* 8959 * Cachefs only provides pass-through support for NFSv4, 8960 * and all vnode operations are passed through to the 8961 * back file system. For NFSv4 pass-through to work, only 8962 * connected operation is supported, the cnode backvp must 8963 * exist, and cachefs optional (eg., disconnectable) flags 8964 * are turned off. Assert these conditions to ensure that 8965 * the backfilesystem is called for the getsecattr operation. 8966 */ 8967 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 8968 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 8969 8970 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 8971 error = fs_fab_acl(vp, vsec, flag, cr, ct); 8972 goto out; 8973 } 8974 8975 for (;;) { 8976 if (held) { 8977 /* Won't loop with NFSv4 connected behavior */ 8978 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 8979 cachefs_cd_release(fscp); 8980 held = 0; 8981 } 8982 error = cachefs_cd_access(fscp, connected, 0); 8983 if (error) 8984 break; 8985 held = 1; 8986 8987 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) { 8988 error = cachefs_getsecattr_connected(vp, vsec, flag, 8989 cr); 8990 if (CFS_TIMEOUT(fscp, error)) { 8991 cachefs_cd_release(fscp); 8992 held = 0; 8993 cachefs_cd_timedout(fscp); 8994 connected = 0; 8995 continue; 8996 } 8997 } else { 8998 error = cachefs_getsecattr_disconnected(vp, vsec, flag, 8999 cr); 9000 if (CFS_TIMEOUT(fscp, error)) { 9001 if (cachefs_cd_access_miss(fscp)) { 9002 error = cachefs_getsecattr_connected(vp, 9003 vsec, flag, cr); 9004 if (!CFS_TIMEOUT(fscp, error)) 9005 break; 9006 delay(5*hz); 9007 connected = 0; 9008 continue; 9009 } 9010 connected = 1; 9011 continue; 9012 } 9013 } 9014 break; 9015 } 9016 9017 out: 9018 if (held) 9019 cachefs_cd_release(fscp); 9020 9021 #ifdef CFS_CD_DEBUG 9022 ASSERT((curthread->t_flag & T_CD_HELD) == 0); 9023 #endif 9024 #ifdef CFSDEBUG 9025 CFS_DEBUG(CFSDEBUG_VOPS) 9026 printf("cachefs_getsecattr: EXIT error = %d\n", error); 9027 #endif 9028 return (error); 9029 } 9030 9031 static int 9032 cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, 9033 caller_context_t *ct) 9034 { 9035 cnode_t *cp = VTOC(vp); 9036 fscache_t *fscp = C_TO_FSCACHE(cp); 9037 int error = 0; 9038 vnode_t *backvp; 9039 9040 #ifdef CFSDEBUG 9041 CFS_DEBUG(CFSDEBUG_VOPS) 9042 printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp); 9043 #endif 9044 9045 if (getzoneid() != GLOBAL_ZONEID) { 9046 error = EPERM; 9047 goto out; 9048 } 9049 9050 /* 9051 * Cachefs only provides pass-through support for NFSv4, 9052 * and all vnode operations are passed through to the 9053 * back file system. For NFSv4 pass-through to work, only 9054 * connected operation is supported, the cnode backvp must 9055 * exist, and cachefs optional (eg., disconnectable) flags 9056 * are turned off. Assert these conditions to ensure that 9057 * the backfilesystem is called for the shrlock operation. 9058 */ 9059 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 9060 CFS_BACKFS_NFSV4_ASSERT_CNODE(cp); 9061 9062 mutex_enter(&cp->c_statelock); 9063 if (cp->c_backvp == NULL) 9064 error = cachefs_getbackvp(fscp, cp); 9065 backvp = cp->c_backvp; 9066 mutex_exit(&cp->c_statelock); 9067 ASSERT((error != 0) || (backvp != NULL)); 9068 9069 if (error == 0) { 9070 CFS_DPRINT_BACKFS_NFSV4(fscp, 9071 ("cachefs_shrlock (nfsv4): cp %p, backvp %p", 9072 cp, backvp)); 9073 error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct); 9074 } 9075 9076 out: 9077 #ifdef CFSDEBUG 9078 CFS_DEBUG(CFSDEBUG_VOPS) 9079 printf("cachefs_shrlock: EXIT error = %d\n", error); 9080 #endif 9081 return (error); 9082 } 9083 9084 static int 9085 cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag, 9086 cred_t *cr) 9087 { 9088 cnode_t *cp = VTOC(vp); 9089 fscache_t *fscp = C_TO_FSCACHE(cp); 9090 int hit = 0; 9091 int error = 0; 9092 9093 9094 mutex_enter(&cp->c_statelock); 9095 error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr); 9096 if (error) 9097 goto out; 9098 9099 /* read from the cache if we can */ 9100 if ((cp->c_metadata.md_flags & MD_ACL) && 9101 ((cp->c_flags & CN_NOCACHE) == 0) && 9102 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 9103 ASSERT((cp->c_flags & CN_NOCACHE) == 0); 9104 error = cachefs_getaclfromcache(cp, vsec); 9105 if (error) { 9106 cachefs_nocache(cp); 9107 ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0); 9108 error = 0; 9109 } else { 9110 hit = 1; 9111 goto out; 9112 } 9113 } 9114 9115 ASSERT(error == 0); 9116 if (cp->c_backvp == NULL) 9117 error = cachefs_getbackvp(fscp, cp); 9118 if (error) 9119 goto out; 9120 9121 CFS_DPRINT_BACKFS_NFSV4(fscp, 9122 ("cachefs_getsecattr (nfsv4): cp %p, backvp %p", 9123 cp, cp->c_backvp)); 9124 error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL); 9125 if (error) 9126 goto out; 9127 9128 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 9129 (cachefs_vtype_aclok(vp)) && 9130 ((cp->c_flags & CN_NOCACHE) == 0) && 9131 !CFS_ISFS_BACKFS_NFSV4(fscp)) { 9132 error = cachefs_cacheacl(cp, vsec); 9133 if (error) { 9134 error = 0; 9135 cachefs_nocache(cp); 9136 } 9137 } 9138 9139 out: 9140 if (error == 0) { 9141 if (hit) 9142 fscp->fs_stats.st_hits++; 9143 else 9144 fscp->fs_stats.st_misses++; 9145 } 9146 mutex_exit(&cp->c_statelock); 9147 9148 return (error); 9149 } 9150 9151 static int 9152 /*ARGSUSED*/ 9153 cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag, 9154 cred_t *cr) 9155 { 9156 cnode_t *cp = VTOC(vp); 9157 fscache_t *fscp = C_TO_FSCACHE(cp); 9158 int hit = 0; 9159 int error = 0; 9160 9161 9162 mutex_enter(&cp->c_statelock); 9163 9164 /* read from the cache if we can */ 9165 if (((cp->c_flags & CN_NOCACHE) == 0) && 9166 (cp->c_metadata.md_flags & MD_ACL)) { 9167 error = cachefs_getaclfromcache(cp, vsec); 9168 if (error) { 9169 cachefs_nocache(cp); 9170 ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0); 9171 error = 0; 9172 } else { 9173 hit = 1; 9174 goto out; 9175 } 9176 } 9177 error = ETIMEDOUT; 9178 9179 out: 9180 if (error == 0) { 9181 if (hit) 9182 fscp->fs_stats.st_hits++; 9183 else 9184 fscp->fs_stats.st_misses++; 9185 } 9186 mutex_exit(&cp->c_statelock); 9187 9188 return (error); 9189 } 9190 9191 /* 9192 * cachefs_cacheacl() -- cache an ACL, which we do by applying it to 9193 * the frontfile if possible; otherwise, the adjunct directory. 9194 * 9195 * inputs: 9196 * cp - the cnode, with its statelock already held 9197 * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is, 9198 * or NULL if you want us to do the VOP_GETSECATTR(backvp). 9199 * 9200 * returns: 9201 * 0 - all is well 9202 * nonzero - errno 9203 */ 9204 9205 int 9206 cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp) 9207 { 9208 fscache_t *fscp = C_TO_FSCACHE(cp); 9209 vsecattr_t vsec; 9210 aclent_t *aclp; 9211 int gotvsec = 0; 9212 int error = 0; 9213 vnode_t *vp = NULL; 9214 void *aclkeep = NULL; 9215 int i; 9216 9217 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9218 ASSERT((cp->c_flags & CN_NOCACHE) == 0); 9219 ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0); 9220 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 9221 ASSERT(cachefs_vtype_aclok(CTOV(cp))); 9222 9223 if (fscp->fs_info.fi_mntflags & CFS_NOACL) { 9224 error = ENOSYS; 9225 goto out; 9226 } 9227 9228 if (vsecp == NULL) { 9229 if (cp->c_backvp == NULL) 9230 error = cachefs_getbackvp(fscp, cp); 9231 if (error != 0) 9232 goto out; 9233 vsecp = &vsec; 9234 bzero(&vsec, sizeof (vsec)); 9235 vsecp->vsa_mask = 9236 VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9237 error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL); 9238 if (error != 0) { 9239 goto out; 9240 } 9241 gotvsec = 1; 9242 } else if (vsecp->vsa_mask & VSA_ACL) { 9243 aclkeep = vsecp->vsa_aclentp; 9244 vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt * 9245 sizeof (aclent_t), KM_SLEEP); 9246 bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt * 9247 sizeof (aclent_t)); 9248 } else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) { 9249 /* unless there's real data, we can cache nothing. */ 9250 return (0); 9251 } 9252 9253 /* 9254 * prevent the ACL from chmoding our frontfile, and 9255 * snarf the class info 9256 */ 9257 9258 if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) == 9259 (VSA_ACL | VSA_ACLCNT)) { 9260 for (i = 0; i < vsecp->vsa_aclcnt; i++) { 9261 aclp = ((aclent_t *)vsecp->vsa_aclentp) + i; 9262 switch (aclp->a_type) { 9263 case CLASS_OBJ: 9264 cp->c_metadata.md_aclclass = 9265 aclp->a_perm; 9266 /*FALLTHROUGH*/ 9267 case USER_OBJ: 9268 case GROUP_OBJ: 9269 case OTHER_OBJ: 9270 aclp->a_perm = 06; 9271 } 9272 } 9273 } 9274 9275 /* 9276 * if the frontfile exists, then we always do the work. but, 9277 * if there's no frontfile, and the ACL isn't a `real' ACL, 9278 * then we don't want to do the work. otherwise, an `ls -l' 9279 * will create tons of emtpy frontfiles. 9280 */ 9281 9282 if (((cp->c_metadata.md_flags & MD_FILE) == 0) && 9283 ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt) 9284 <= MIN_ACL_ENTRIES)) { 9285 cp->c_metadata.md_flags |= MD_ACL; 9286 cp->c_flags |= CN_UPDATED; 9287 goto out; 9288 } 9289 9290 /* 9291 * if we have a default ACL, then we need a 9292 * real live directory in the frontfs that we 9293 * can apply the ACL to. if not, then we just 9294 * use the frontfile. we get the frontfile 9295 * regardless -- that way, we know the 9296 * directory for the frontfile exists. 9297 */ 9298 9299 if (vsecp->vsa_dfaclcnt > 0) { 9300 if (cp->c_acldirvp == NULL) 9301 error = cachefs_getacldirvp(cp); 9302 if (error != 0) 9303 goto out; 9304 vp = cp->c_acldirvp; 9305 } else { 9306 if (cp->c_frontvp == NULL) 9307 error = cachefs_getfrontfile(cp); 9308 if (error != 0) 9309 goto out; 9310 vp = cp->c_frontvp; 9311 } 9312 ASSERT(vp != NULL); 9313 9314 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 9315 error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL); 9316 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 9317 if (error != 0) { 9318 #ifdef CFSDEBUG 9319 CFS_DEBUG(CFSDEBUG_VOPS) 9320 printf("cachefs_cacheacl: setsecattr: error %d\n", 9321 error); 9322 #endif /* CFSDEBUG */ 9323 /* 9324 * If there was an error, we don't want to call 9325 * cachefs_nocache(); so, set error to 0. 9326 * We will call cachefs_purgeacl(), in order to 9327 * clean such things as adjunct ACL directories. 9328 */ 9329 cachefs_purgeacl(cp); 9330 error = 0; 9331 goto out; 9332 } 9333 if (vp == cp->c_frontvp) 9334 cp->c_flags |= CN_NEED_FRONT_SYNC; 9335 9336 cp->c_metadata.md_flags |= MD_ACL; 9337 cp->c_flags |= CN_UPDATED; 9338 9339 out: 9340 if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED)) 9341 cachefs_nocache(cp); 9342 9343 if (gotvsec) { 9344 if (vsec.vsa_aclcnt) 9345 kmem_free(vsec.vsa_aclentp, 9346 vsec.vsa_aclcnt * sizeof (aclent_t)); 9347 if (vsec.vsa_dfaclcnt) 9348 kmem_free(vsec.vsa_dfaclentp, 9349 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 9350 } else if (aclkeep != NULL) { 9351 cachefs_kmem_free(vsecp->vsa_aclentp, 9352 vsecp->vsa_aclcnt * sizeof (aclent_t)); 9353 vsecp->vsa_aclentp = aclkeep; 9354 } 9355 9356 return (error); 9357 } 9358 9359 void 9360 cachefs_purgeacl(cnode_t *cp) 9361 { 9362 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9363 9364 ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp))); 9365 9366 if (cp->c_acldirvp != NULL) { 9367 VN_RELE(cp->c_acldirvp); 9368 cp->c_acldirvp = NULL; 9369 } 9370 9371 if (cp->c_metadata.md_flags & MD_ACLDIR) { 9372 char name[CFS_FRONTFILE_NAME_SIZE + 2]; 9373 9374 ASSERT(cp->c_filegrp->fg_dirvp != NULL); 9375 make_ascii_name(&cp->c_id, name); 9376 (void) strcat(name, ".d"); 9377 9378 (void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name, 9379 cp->c_filegrp->fg_dirvp, kcred, NULL, 0); 9380 } 9381 9382 cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR); 9383 cp->c_flags |= CN_UPDATED; 9384 } 9385 9386 static int 9387 cachefs_getacldirvp(cnode_t *cp) 9388 { 9389 char name[CFS_FRONTFILE_NAME_SIZE + 2]; 9390 int error = 0; 9391 9392 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9393 ASSERT(cp->c_acldirvp == NULL); 9394 9395 if (cp->c_frontvp == NULL) 9396 error = cachefs_getfrontfile(cp); 9397 if (error != 0) 9398 goto out; 9399 9400 ASSERT(cp->c_filegrp->fg_dirvp != NULL); 9401 make_ascii_name(&cp->c_id, name); 9402 (void) strcat(name, ".d"); 9403 error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp, 9404 name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 9405 if ((error != 0) && (error != ENOENT)) 9406 goto out; 9407 9408 if (error != 0) { 9409 vattr_t va; 9410 9411 va.va_mode = S_IFDIR | 0777; 9412 va.va_uid = 0; 9413 va.va_gid = 0; 9414 va.va_type = VDIR; 9415 va.va_mask = AT_TYPE | AT_MODE | 9416 AT_UID | AT_GID; 9417 error = 9418 VOP_MKDIR(cp->c_filegrp->fg_dirvp, 9419 name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL); 9420 if (error != 0) 9421 goto out; 9422 } 9423 9424 ASSERT(cp->c_acldirvp != NULL); 9425 cp->c_metadata.md_flags |= MD_ACLDIR; 9426 cp->c_flags |= CN_UPDATED; 9427 9428 out: 9429 if (error != 0) 9430 cp->c_acldirvp = NULL; 9431 return (error); 9432 } 9433 9434 static int 9435 cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec) 9436 { 9437 aclent_t *aclp; 9438 int error = 0; 9439 vnode_t *vp = NULL; 9440 int i; 9441 9442 ASSERT(cp->c_metadata.md_flags & MD_ACL); 9443 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9444 ASSERT(vsec->vsa_aclentp == NULL); 9445 9446 if (cp->c_metadata.md_flags & MD_ACLDIR) { 9447 if (cp->c_acldirvp == NULL) 9448 error = cachefs_getacldirvp(cp); 9449 if (error != 0) 9450 goto out; 9451 vp = cp->c_acldirvp; 9452 } else if (cp->c_metadata.md_flags & MD_FILE) { 9453 if (cp->c_frontvp == NULL) 9454 error = cachefs_getfrontfile(cp); 9455 if (error != 0) 9456 goto out; 9457 vp = cp->c_frontvp; 9458 } else { 9459 9460 /* 9461 * if we get here, then we know that MD_ACL is on, 9462 * meaning an ACL was successfully cached. we also 9463 * know that neither MD_ACLDIR nor MD_FILE are on, so 9464 * this has to be an entry without a `real' ACL. 9465 * thus, we forge whatever is necessary. 9466 */ 9467 9468 if (vsec->vsa_mask & VSA_ACLCNT) 9469 vsec->vsa_aclcnt = MIN_ACL_ENTRIES; 9470 9471 if (vsec->vsa_mask & VSA_ACL) { 9472 vsec->vsa_aclentp = 9473 kmem_zalloc(MIN_ACL_ENTRIES * 9474 sizeof (aclent_t), KM_SLEEP); 9475 aclp = (aclent_t *)vsec->vsa_aclentp; 9476 aclp->a_type = USER_OBJ; 9477 ++aclp; 9478 aclp->a_type = GROUP_OBJ; 9479 ++aclp; 9480 aclp->a_type = OTHER_OBJ; 9481 ++aclp; 9482 aclp->a_type = CLASS_OBJ; 9483 ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES, 9484 sizeof (aclent_t), cmp2acls); 9485 } 9486 9487 ASSERT(vp == NULL); 9488 } 9489 9490 if (vp != NULL) { 9491 if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) { 9492 #ifdef CFSDEBUG 9493 CFS_DEBUG(CFSDEBUG_VOPS) 9494 printf("cachefs_getaclfromcache: error %d\n", 9495 error); 9496 #endif /* CFSDEBUG */ 9497 goto out; 9498 } 9499 } 9500 9501 if (vsec->vsa_aclentp != NULL) { 9502 for (i = 0; i < vsec->vsa_aclcnt; i++) { 9503 aclp = ((aclent_t *)vsec->vsa_aclentp) + i; 9504 switch (aclp->a_type) { 9505 case USER_OBJ: 9506 aclp->a_id = cp->c_metadata.md_vattr.va_uid; 9507 aclp->a_perm = 9508 cp->c_metadata.md_vattr.va_mode & 0700; 9509 aclp->a_perm >>= 6; 9510 break; 9511 9512 case GROUP_OBJ: 9513 aclp->a_id = cp->c_metadata.md_vattr.va_gid; 9514 aclp->a_perm = 9515 cp->c_metadata.md_vattr.va_mode & 070; 9516 aclp->a_perm >>= 3; 9517 break; 9518 9519 case OTHER_OBJ: 9520 aclp->a_perm = 9521 cp->c_metadata.md_vattr.va_mode & 07; 9522 break; 9523 9524 case CLASS_OBJ: 9525 aclp->a_perm = 9526 cp->c_metadata.md_aclclass; 9527 break; 9528 } 9529 } 9530 } 9531 9532 out: 9533 9534 if (error != 0) 9535 cachefs_nocache(cp); 9536 9537 return (error); 9538 } 9539 9540 /* 9541 * Fills in targp with attribute information from srcp, cp 9542 * and if necessary the system. 9543 */ 9544 static void 9545 cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr) 9546 { 9547 time_t now; 9548 9549 ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE)); 9550 9551 /* 9552 * Add code to fill in the va struct. We use the fields from 9553 * the srcp struct if they are populated, otherwise we guess 9554 */ 9555 9556 targp->va_mask = 0; /* initialize all fields */ 9557 targp->va_mode = srcp->va_mode; 9558 targp->va_type = srcp->va_type; 9559 targp->va_nlink = 1; 9560 targp->va_nodeid = 0; 9561 9562 if (srcp->va_mask & AT_UID) 9563 targp->va_uid = srcp->va_uid; 9564 else 9565 targp->va_uid = crgetuid(cr); 9566 9567 if (srcp->va_mask & AT_GID) 9568 targp->va_gid = srcp->va_gid; 9569 else 9570 targp->va_gid = crgetgid(cr); 9571 9572 if (srcp->va_mask & AT_FSID) 9573 targp->va_fsid = srcp->va_fsid; 9574 else 9575 targp->va_fsid = 0; /* initialize all fields */ 9576 9577 now = gethrestime_sec(); 9578 if (srcp->va_mask & AT_ATIME) 9579 targp->va_atime = srcp->va_atime; 9580 else 9581 targp->va_atime.tv_sec = now; 9582 9583 if (srcp->va_mask & AT_MTIME) 9584 targp->va_mtime = srcp->va_mtime; 9585 else 9586 targp->va_mtime.tv_sec = now; 9587 9588 if (srcp->va_mask & AT_CTIME) 9589 targp->va_ctime = srcp->va_ctime; 9590 else 9591 targp->va_ctime.tv_sec = now; 9592 9593 9594 if (srcp->va_mask & AT_SIZE) 9595 targp->va_size = srcp->va_size; 9596 else 9597 targp->va_size = 0; 9598 9599 /* 9600 * the remaing fields are set by the fs and not changable. 9601 * we populate these entries useing the parent directory 9602 * values. It's a small hack, but should work. 9603 */ 9604 targp->va_blksize = cp->c_metadata.md_vattr.va_blksize; 9605 targp->va_rdev = cp->c_metadata.md_vattr.va_rdev; 9606 targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks; 9607 targp->va_seq = 0; /* Never keep the sequence number */ 9608 } 9609 9610 /* 9611 * set the gid for a newly created file. The algorithm is as follows: 9612 * 9613 * 1) If the gid is set in the attribute list, then use it if 9614 * the caller is privileged, belongs to the target group, or 9615 * the group is the same as the parent directory. 9616 * 9617 * 2) If the parent directory's set-gid bit is clear, then use 9618 * the process gid 9619 * 9620 * 3) Otherwise, use the gid of the parent directory. 9621 * 9622 * Note: newcp->c_attr.va_{mode,type} must already be set before calling 9623 * this routine. 9624 */ 9625 static void 9626 cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr) 9627 { 9628 if ((vap->va_mask & AT_GID) && 9629 ((vap->va_gid == dcp->c_attr.va_gid) || 9630 groupmember(vap->va_gid, cr) || 9631 secpolicy_vnode_create_gid(cr) != 0)) { 9632 newcp->c_attr.va_gid = vap->va_gid; 9633 } else { 9634 if (dcp->c_attr.va_mode & S_ISGID) 9635 newcp->c_attr.va_gid = dcp->c_attr.va_gid; 9636 else 9637 newcp->c_attr.va_gid = crgetgid(cr); 9638 } 9639 9640 /* 9641 * if we're creating a directory, and the parent directory has the 9642 * set-GID bit set, set it on the new directory. 9643 * Otherwise, if the user is neither privileged nor a member of the 9644 * file's new group, clear the file's set-GID bit. 9645 */ 9646 if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) { 9647 newcp->c_attr.va_mode |= S_ISGID; 9648 } else if ((newcp->c_attr.va_mode & S_ISGID) && 9649 secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0) 9650 newcp->c_attr.va_mode &= ~S_ISGID; 9651 } 9652 9653 /* 9654 * create an acl for the newly created file. should be called right 9655 * after cachefs_creategid. 9656 */ 9657 9658 static void 9659 cachefs_createacl(cnode_t *dcp, cnode_t *newcp) 9660 { 9661 fscache_t *fscp = C_TO_FSCACHE(dcp); 9662 vsecattr_t vsec; 9663 int gotvsec = 0; 9664 int error = 0; /* placeholder */ 9665 aclent_t *aclp; 9666 o_mode_t *classp = NULL; 9667 o_mode_t gunion = 0; 9668 int i; 9669 9670 if ((fscp->fs_info.fi_mntflags & CFS_NOACL) || 9671 (! cachefs_vtype_aclok(CTOV(newcp)))) 9672 return; 9673 9674 ASSERT(dcp->c_metadata.md_flags & MD_ACL); 9675 ASSERT(MUTEX_HELD(&dcp->c_statelock)); 9676 ASSERT(MUTEX_HELD(&newcp->c_statelock)); 9677 9678 /* 9679 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that 9680 * would hit code paths that isn't hit anywhere else. 9681 */ 9682 9683 bzero(&vsec, sizeof (vsec)); 9684 vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9685 error = cachefs_getaclfromcache(dcp, &vsec); 9686 if (error != 0) 9687 goto out; 9688 gotvsec = 1; 9689 9690 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) { 9691 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL)) 9692 kmem_free(vsec.vsa_aclentp, 9693 vsec.vsa_aclcnt * sizeof (aclent_t)); 9694 9695 vsec.vsa_aclcnt = vsec.vsa_dfaclcnt; 9696 vsec.vsa_aclentp = vsec.vsa_dfaclentp; 9697 vsec.vsa_dfaclcnt = 0; 9698 vsec.vsa_dfaclentp = NULL; 9699 9700 if (newcp->c_attr.va_type == VDIR) { 9701 vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt * 9702 sizeof (aclent_t), KM_SLEEP); 9703 vsec.vsa_dfaclcnt = vsec.vsa_aclcnt; 9704 bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp, 9705 vsec.vsa_aclcnt * sizeof (aclent_t)); 9706 } 9707 9708 /* 9709 * this function should be called pretty much after 9710 * the rest of the file creation stuff is done. so, 9711 * uid, gid, etc. should be `right'. we'll go with 9712 * that, rather than trying to determine whether to 9713 * get stuff from cr or va. 9714 */ 9715 9716 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9717 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9718 switch (aclp->a_type) { 9719 case DEF_USER_OBJ: 9720 aclp->a_type = USER_OBJ; 9721 aclp->a_id = newcp->c_metadata.md_vattr.va_uid; 9722 aclp->a_perm = 9723 newcp->c_metadata.md_vattr.va_mode; 9724 aclp->a_perm &= 0700; 9725 aclp->a_perm >>= 6; 9726 break; 9727 9728 case DEF_GROUP_OBJ: 9729 aclp->a_type = GROUP_OBJ; 9730 aclp->a_id = newcp->c_metadata.md_vattr.va_gid; 9731 aclp->a_perm = 9732 newcp->c_metadata.md_vattr.va_mode; 9733 aclp->a_perm &= 070; 9734 aclp->a_perm >>= 3; 9735 gunion |= aclp->a_perm; 9736 break; 9737 9738 case DEF_OTHER_OBJ: 9739 aclp->a_type = OTHER_OBJ; 9740 aclp->a_perm = 9741 newcp->c_metadata.md_vattr.va_mode & 07; 9742 break; 9743 9744 case DEF_CLASS_OBJ: 9745 aclp->a_type = CLASS_OBJ; 9746 classp = &(aclp->a_perm); 9747 break; 9748 9749 case DEF_USER: 9750 aclp->a_type = USER; 9751 gunion |= aclp->a_perm; 9752 break; 9753 9754 case DEF_GROUP: 9755 aclp->a_type = GROUP; 9756 gunion |= aclp->a_perm; 9757 break; 9758 } 9759 } 9760 9761 /* XXX is this the POSIX thing to do? */ 9762 if (classp != NULL) 9763 *classp &= gunion; 9764 9765 /* 9766 * we don't need to log this; rather, we clear the 9767 * MD_ACL bit when we reconnect. 9768 */ 9769 9770 error = cachefs_cacheacl(newcp, &vsec); 9771 if (error != 0) 9772 goto out; 9773 } 9774 9775 newcp->c_metadata.md_aclclass = 07; /* XXX check posix */ 9776 newcp->c_metadata.md_flags |= MD_ACL; 9777 newcp->c_flags |= CN_UPDATED; 9778 9779 out: 9780 9781 if (gotvsec) { 9782 if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL)) 9783 kmem_free(vsec.vsa_aclentp, 9784 vsec.vsa_aclcnt * sizeof (aclent_t)); 9785 if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) 9786 kmem_free(vsec.vsa_dfaclentp, 9787 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 9788 } 9789 } 9790 9791 /* 9792 * this is translated from the UFS code for access checking. 9793 */ 9794 9795 static int 9796 cachefs_access_local(void *vcp, int mode, cred_t *cr) 9797 { 9798 cnode_t *cp = vcp; 9799 fscache_t *fscp = C_TO_FSCACHE(cp); 9800 int shift = 0; 9801 9802 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9803 9804 if (mode & VWRITE) { 9805 /* 9806 * Disallow write attempts on read-only 9807 * file systems, unless the file is special. 9808 */ 9809 struct vnode *vp = CTOV(cp); 9810 if (vn_is_readonly(vp)) { 9811 if (!IS_DEVVP(vp)) { 9812 return (EROFS); 9813 } 9814 } 9815 } 9816 9817 /* 9818 * if we need to do ACLs, do it. this works whether anyone 9819 * has explicitly made an ACL or not. 9820 */ 9821 9822 if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) && 9823 (cachefs_vtype_aclok(CTOV(cp)))) 9824 return (cachefs_acl_access(cp, mode, cr)); 9825 9826 if (crgetuid(cr) != cp->c_attr.va_uid) { 9827 shift += 3; 9828 if (!groupmember(cp->c_attr.va_gid, cr)) 9829 shift += 3; 9830 } 9831 9832 /* compute missing mode bits */ 9833 mode &= ~(cp->c_attr.va_mode << shift); 9834 9835 if (mode == 0) 9836 return (0); 9837 9838 return (secpolicy_vnode_access(cr, CTOV(cp), cp->c_attr.va_uid, mode)); 9839 } 9840 9841 /* 9842 * This is transcribed from ufs_acl_access(). If that changes, then 9843 * this should, too. 9844 * 9845 * Check the cnode's ACL's to see if this mode of access is 9846 * allowed; return 0 if allowed, EACCES if not. 9847 * 9848 * We follow the procedure defined in Sec. 3.3.5, ACL Access 9849 * Check Algorithm, of the POSIX 1003.6 Draft Standard. 9850 */ 9851 9852 #define ACL_MODE_CHECK(M, PERM, C, I) ((((M) & (PERM)) == (M)) ? 0 : \ 9853 secpolicy_vnode_access(C, CTOV(I), owner, (M) & ~(PERM))) 9854 9855 static int 9856 cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr) 9857 { 9858 int error = 0; 9859 9860 fscache_t *fscp = C_TO_FSCACHE(cp); 9861 9862 int mask = ~0; 9863 int ismask = 0; 9864 9865 int gperm = 0; 9866 int ngroup = 0; 9867 9868 vsecattr_t vsec; 9869 int gotvsec = 0; 9870 aclent_t *aclp; 9871 9872 uid_t owner = cp->c_attr.va_uid; 9873 9874 int i; 9875 9876 ASSERT(MUTEX_HELD(&cp->c_statelock)); 9877 ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0); 9878 9879 /* 9880 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT, 9881 * but then i believe we'd be the only thing exercising those 9882 * code paths -- probably a bad thing. 9883 */ 9884 9885 bzero(&vsec, sizeof (vsec)); 9886 vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT; 9887 9888 /* XXX KLUDGE! correct insidious 0-class problem */ 9889 if (cp->c_metadata.md_aclclass == 0 && 9890 fscp->fs_cdconnected == CFS_CD_CONNECTED) 9891 cachefs_purgeacl(cp); 9892 again: 9893 if (cp->c_metadata.md_flags & MD_ACL) { 9894 error = cachefs_getaclfromcache(cp, &vsec); 9895 if (error != 0) { 9896 #ifdef CFSDEBUG 9897 if (error != ETIMEDOUT) 9898 CFS_DEBUG(CFSDEBUG_VOPS) 9899 printf("cachefs_acl_access():" 9900 "error %d from getaclfromcache()\n", 9901 error); 9902 #endif /* CFSDEBUG */ 9903 if ((cp->c_metadata.md_flags & MD_ACL) == 0) { 9904 goto again; 9905 } else { 9906 goto out; 9907 } 9908 } 9909 } else { 9910 if (cp->c_backvp == NULL) { 9911 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 9912 error = cachefs_getbackvp(fscp, cp); 9913 else 9914 error = ETIMEDOUT; 9915 } 9916 if (error == 0) 9917 error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr, 9918 NULL); 9919 if (error != 0) { 9920 #ifdef CFSDEBUG 9921 CFS_DEBUG(CFSDEBUG_VOPS) 9922 printf("cachefs_acl_access():" 9923 "error %d from getsecattr(backvp)\n", 9924 error); 9925 #endif /* CFSDEBUG */ 9926 goto out; 9927 } 9928 if ((cp->c_flags & CN_NOCACHE) == 0 && 9929 !CFS_ISFS_BACKFS_NFSV4(fscp)) 9930 (void) cachefs_cacheacl(cp, &vsec); 9931 } 9932 gotvsec = 1; 9933 9934 ASSERT(error == 0); 9935 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9936 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9937 switch (aclp->a_type) { 9938 case USER_OBJ: 9939 /* 9940 * this might look cleaner in the 2nd loop 9941 * below, but we do it here as an 9942 * optimization. 9943 */ 9944 9945 owner = aclp->a_id; 9946 if (crgetuid(cr) == owner) { 9947 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6, 9948 cr, cp); 9949 goto out; 9950 } 9951 break; 9952 9953 case CLASS_OBJ: 9954 mask = aclp->a_perm; 9955 ismask = 1; 9956 break; 9957 } 9958 } 9959 9960 ASSERT(error == 0); 9961 for (i = 0; i < vsec.vsa_aclcnt; i++) { 9962 aclp = ((aclent_t *)vsec.vsa_aclentp) + i; 9963 switch (aclp->a_type) { 9964 case USER: 9965 if (crgetuid(cr) == aclp->a_id) { 9966 error = ACL_MODE_CHECK(mode, 9967 (aclp->a_perm & mask) << 6, cr, cp); 9968 goto out; 9969 } 9970 break; 9971 9972 case GROUP_OBJ: 9973 if (groupmember(aclp->a_id, cr)) { 9974 ++ngroup; 9975 gperm |= aclp->a_perm; 9976 if (! ismask) { 9977 error = ACL_MODE_CHECK(mode, 9978 aclp->a_perm << 6, 9979 cr, cp); 9980 goto out; 9981 } 9982 } 9983 break; 9984 9985 case GROUP: 9986 if (groupmember(aclp->a_id, cr)) { 9987 ++ngroup; 9988 gperm |= aclp->a_perm; 9989 } 9990 break; 9991 9992 case OTHER_OBJ: 9993 if (ngroup == 0) { 9994 error = ACL_MODE_CHECK(mode, aclp->a_perm << 6, 9995 cr, cp); 9996 goto out; 9997 } 9998 break; 9999 10000 default: 10001 break; 10002 } 10003 } 10004 10005 ASSERT(ngroup > 0); 10006 error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp); 10007 10008 out: 10009 if (gotvsec) { 10010 if (vsec.vsa_aclcnt && vsec.vsa_aclentp) 10011 kmem_free(vsec.vsa_aclentp, 10012 vsec.vsa_aclcnt * sizeof (aclent_t)); 10013 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp) 10014 kmem_free(vsec.vsa_dfaclentp, 10015 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 10016 } 10017 10018 return (error); 10019 } 10020 10021 /* 10022 * see if permissions allow for removal of the given file from 10023 * the given directory. 10024 */ 10025 static int 10026 cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr) 10027 { 10028 uid_t uid; 10029 /* 10030 * If the containing directory is sticky, the user must: 10031 * - own the directory, or 10032 * - own the file, or 10033 * - be able to write the file (if it's a plain file), or 10034 * - be sufficiently privileged. 10035 */ 10036 if ((dcp->c_attr.va_mode & S_ISVTX) && 10037 ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) && 10038 (uid != cp->c_attr.va_uid) && 10039 (cp->c_attr.va_type != VREG || 10040 cachefs_access_local(cp, VWRITE, cr) != 0)) 10041 return (secpolicy_vnode_remove(cr)); 10042 10043 return (0); 10044 } 10045 10046 /* 10047 * Returns a new name, may even be unique. 10048 * Stolen from nfs code. 10049 * Since now we will use renaming to .cfs* in place of .nfs* 10050 * for CacheFS. Both NFS and CacheFS will rename opened files. 10051 */ 10052 static char cachefs_prefix[] = ".cfs"; 10053 kmutex_t cachefs_newnum_lock; 10054 10055 static char * 10056 cachefs_newname(void) 10057 { 10058 static uint_t newnum = 0; 10059 char *news; 10060 char *s, *p; 10061 uint_t id; 10062 10063 mutex_enter(&cachefs_newnum_lock); 10064 if (newnum == 0) { 10065 newnum = gethrestime_sec() & 0xfffff; 10066 newnum |= 0x10000; 10067 } 10068 id = newnum++; 10069 mutex_exit(&cachefs_newnum_lock); 10070 10071 news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP); 10072 s = news; 10073 p = cachefs_prefix; 10074 while (*p != '\0') 10075 *s++ = *p++; 10076 while (id != 0) { 10077 *s++ = "0123456789ABCDEF"[id & 0x0f]; 10078 id >>= 4; 10079 } 10080 *s = '\0'; 10081 return (news); 10082 } 10083 10084 /* 10085 * Called to rename the specified file to a temporary file so 10086 * operations to the file after remove work. 10087 * Must call this routine with the dir c_rwlock held as a writer. 10088 */ 10089 static int 10090 /*ARGSUSED*/ 10091 cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr) 10092 { 10093 cnode_t *cp = VTOC(vp); 10094 char *tmpname; 10095 fscache_t *fscp = C_TO_FSCACHE(cp); 10096 int error; 10097 10098 ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock))); 10099 10100 /* get the new name for the file */ 10101 tmpname = cachefs_newname(); 10102 10103 /* do the link */ 10104 if (fscp->fs_cdconnected == CFS_CD_CONNECTED) 10105 error = cachefs_link_connected(dvp, vp, tmpname, cr); 10106 else 10107 error = cachefs_link_disconnected(dvp, vp, tmpname, cr); 10108 if (error) { 10109 cachefs_kmem_free(tmpname, MAXNAMELEN); 10110 return (error); 10111 } 10112 10113 mutex_enter(&cp->c_statelock); 10114 if (cp->c_unldvp) { 10115 VN_RELE(cp->c_unldvp); 10116 cachefs_kmem_free(cp->c_unlname, MAXNAMELEN); 10117 crfree(cp->c_unlcred); 10118 } 10119 10120 VN_HOLD(dvp); 10121 cp->c_unldvp = dvp; 10122 crhold(cr); 10123 cp->c_unlcred = cr; 10124 cp->c_unlname = tmpname; 10125 10126 /* drop the backvp so NFS does not also do a rename */ 10127 mutex_exit(&cp->c_statelock); 10128 10129 return (0); 10130 } 10131 10132 /* 10133 * Marks the cnode as modified. 10134 */ 10135 static void 10136 cachefs_modified(cnode_t *cp) 10137 { 10138 fscache_t *fscp = C_TO_FSCACHE(cp); 10139 struct vattr va; 10140 int error; 10141 10142 ASSERT(MUTEX_HELD(&cp->c_statelock)); 10143 ASSERT(cp->c_metadata.md_rlno); 10144 10145 /* if not on the modify list */ 10146 if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) { 10147 /* put on modified list, also marks the file as modified */ 10148 cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED, 10149 cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks); 10150 cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED; 10151 cp->c_flags |= CN_UPDATED; 10152 10153 /* if a modified regular file that is not local */ 10154 if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) && 10155 (cp->c_metadata.md_flags & MD_FILE) && 10156 (cp->c_attr.va_type == VREG)) { 10157 10158 if (cp->c_frontvp == NULL) 10159 (void) cachefs_getfrontfile(cp); 10160 if (cp->c_frontvp) { 10161 /* identify file so fsck knows it is modified */ 10162 va.va_mode = 0766; 10163 va.va_mask = AT_MODE; 10164 error = VOP_SETATTR(cp->c_frontvp, 10165 &va, 0, kcred, NULL); 10166 if (error) { 10167 cmn_err(CE_WARN, 10168 "Cannot change ff mode.\n"); 10169 } 10170 } 10171 } 10172 } 10173 } 10174 10175 /* 10176 * Marks the cnode as modified. 10177 * Allocates a rl slot for the cnode if necessary. 10178 * Returns 0 for success, !0 if cannot get an rl slot. 10179 */ 10180 static int 10181 cachefs_modified_alloc(cnode_t *cp) 10182 { 10183 fscache_t *fscp = C_TO_FSCACHE(cp); 10184 filegrp_t *fgp = cp->c_filegrp; 10185 int error; 10186 rl_entry_t rl_ent; 10187 10188 ASSERT(MUTEX_HELD(&cp->c_statelock)); 10189 10190 /* get the rl slot if needed */ 10191 if (cp->c_metadata.md_rlno == 0) { 10192 /* get a metadata slot if we do not have one yet */ 10193 if (cp->c_flags & CN_ALLOC_PENDING) { 10194 if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) { 10195 (void) filegrp_allocattr(cp->c_filegrp); 10196 } 10197 error = filegrp_create_metadata(cp->c_filegrp, 10198 &cp->c_metadata, &cp->c_id); 10199 if (error) 10200 return (error); 10201 cp->c_flags &= ~CN_ALLOC_PENDING; 10202 } 10203 10204 /* get a free rl entry */ 10205 rl_ent.rl_fileno = cp->c_id.cid_fileno; 10206 rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0; 10207 rl_ent.rl_fsid = fscp->fs_cfsid; 10208 rl_ent.rl_attrc = 0; 10209 error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent, 10210 &cp->c_metadata.md_rlno); 10211 if (error) 10212 return (error); 10213 cp->c_metadata.md_rltype = CACHEFS_RL_NONE; 10214 10215 /* hold the filegrp so the attrcache file is not gc */ 10216 error = filegrp_ffhold(fgp); 10217 if (error) { 10218 cachefs_rlent_moveto(fscp->fs_cache, 10219 CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0); 10220 cp->c_metadata.md_rlno = 0; 10221 return (error); 10222 } 10223 } 10224 cachefs_modified(cp); 10225 return (0); 10226 } 10227 10228 int 10229 cachefs_vtype_aclok(vnode_t *vp) 10230 { 10231 vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON}; 10232 10233 if (vp->v_type == VNON) 10234 return (0); 10235 10236 for (vtp = oktypes; *vtp != VNON; vtp++) 10237 if (vp->v_type == *vtp) 10238 break; 10239 10240 return (*vtp != VNON); 10241 } 10242 10243 static int 10244 cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 10245 caller_context_t *ct) 10246 { 10247 int error = 0; 10248 fscache_t *fscp = C_TO_FSCACHE(VTOC(vp)); 10249 10250 /* Assert cachefs compatibility if NFSv4 is in use */ 10251 CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp); 10252 CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp)); 10253 10254 if (cmd == _PC_FILESIZEBITS) { 10255 u_offset_t maxsize = fscp->fs_offmax; 10256 (*valp) = 0; 10257 while (maxsize != 0) { 10258 maxsize >>= 1; 10259 (*valp)++; 10260 } 10261 (*valp)++; 10262 } else 10263 error = fs_pathconf(vp, cmd, valp, cr, ct); 10264 10265 return (error); 10266 } 10267