17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5a5652762Spraks * Common Development and Distribution License (the "License"). 6a5652762Spraks * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f8bbc571SPavel Filipensky * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T. 287c478bd9Sstevel@tonic-gate * All rights reserved. 297c478bd9Sstevel@tonic-gate */ 307c478bd9Sstevel@tonic-gate 3172102e74SBryan Cantrill /* 3272102e74SBryan Cantrill * Copyright (c) 2013, Joyent, Inc. All rights reserved. 33*06e6833aSJosef 'Jeff' Sipek * Copyright 2015 Nexenta Systems, Inc. All rights reserved. 3472102e74SBryan Cantrill */ 3572102e74SBryan Cantrill 367c478bd9Sstevel@tonic-gate #include <sys/param.h> 377c478bd9Sstevel@tonic-gate #include <sys/types.h> 387c478bd9Sstevel@tonic-gate #include <sys/systm.h> 397c478bd9Sstevel@tonic-gate #include <sys/cred.h> 407c478bd9Sstevel@tonic-gate #include <sys/time.h> 417c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 427c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 43aa59c4cbSrsb #include <sys/vfs_opreg.h> 447c478bd9Sstevel@tonic-gate #include <sys/file.h> 457c478bd9Sstevel@tonic-gate #include <sys/filio.h> 467c478bd9Sstevel@tonic-gate #include <sys/uio.h> 477c478bd9Sstevel@tonic-gate #include <sys/buf.h> 487c478bd9Sstevel@tonic-gate #include <sys/mman.h> 497c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 507c478bd9Sstevel@tonic-gate #include <sys/dirent.h> 517c478bd9Sstevel@tonic-gate #include <sys/debug.h> 527c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 537c478bd9Sstevel@tonic-gate #include <sys/fcntl.h> 547c478bd9Sstevel@tonic-gate #include <sys/flock.h> 557c478bd9Sstevel@tonic-gate #include <sys/swap.h> 567c478bd9Sstevel@tonic-gate #include <sys/errno.h> 577c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 587c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 597c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 607c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 617c478bd9Sstevel@tonic-gate #include <sys/pathconf.h> 627c478bd9Sstevel@tonic-gate #include <sys/utsname.h> 637c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 647c478bd9Sstevel@tonic-gate #include <sys/acl.h> 657c478bd9Sstevel@tonic-gate #include <sys/systeminfo.h> 667c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 677c478bd9Sstevel@tonic-gate #include <sys/policy.h> 687c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 695679c89fSjv227347 #include <sys/zone.h> 707c478bd9Sstevel@tonic-gate 717c478bd9Sstevel@tonic-gate #include <rpc/types.h> 727c478bd9Sstevel@tonic-gate #include <rpc/auth.h> 737c478bd9Sstevel@tonic-gate #include <rpc/clnt.h> 740a701b1eSRobert Gordon #include <rpc/rpc_rdma.h> 757c478bd9Sstevel@tonic-gate 767c478bd9Sstevel@tonic-gate #include <nfs/nfs.h> 777c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h> 787c478bd9Sstevel@tonic-gate #include <nfs/rnode.h> 797c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h> 807c478bd9Sstevel@tonic-gate #include <nfs/lm.h> 817c478bd9Sstevel@tonic-gate 827c478bd9Sstevel@tonic-gate #include <vm/hat.h> 837c478bd9Sstevel@tonic-gate #include <vm/as.h> 847c478bd9Sstevel@tonic-gate #include <vm/page.h> 857c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 867c478bd9Sstevel@tonic-gate #include <vm/seg.h> 877c478bd9Sstevel@tonic-gate #include <vm/seg_map.h> 887c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 897c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 907c478bd9Sstevel@tonic-gate 917c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate static int nfs3_rdwrlbn(vnode_t *, page_t *, u_offset_t, size_t, int, 967c478bd9Sstevel@tonic-gate cred_t *); 977c478bd9Sstevel@tonic-gate static int nfs3write(vnode_t *, caddr_t, u_offset_t, int, cred_t *, 987c478bd9Sstevel@tonic-gate stable_how *); 997c478bd9Sstevel@tonic-gate static int nfs3read(vnode_t *, caddr_t, offset_t, int, size_t *, cred_t *); 1007c478bd9Sstevel@tonic-gate static int nfs3setattr(vnode_t *, struct vattr *, int, cred_t *); 1017c478bd9Sstevel@tonic-gate static int nfs3_accessx(void *, int, cred_t *); 1027c478bd9Sstevel@tonic-gate static int nfs3lookup_dnlc(vnode_t *, char *, vnode_t **, cred_t *); 1037c478bd9Sstevel@tonic-gate static int nfs3lookup_otw(vnode_t *, char *, vnode_t **, cred_t *, int); 1047c478bd9Sstevel@tonic-gate static int nfs3create(vnode_t *, char *, struct vattr *, enum vcexcl, 1057c478bd9Sstevel@tonic-gate int, vnode_t **, cred_t *, int); 1067c478bd9Sstevel@tonic-gate static int nfs3excl_create_settimes(vnode_t *, struct vattr *, cred_t *); 1077c478bd9Sstevel@tonic-gate static int nfs3mknod(vnode_t *, char *, struct vattr *, enum vcexcl, 1087c478bd9Sstevel@tonic-gate int, vnode_t **, cred_t *); 109da6c28aaSamw static int nfs3rename(vnode_t *, char *, vnode_t *, char *, cred_t *, 110da6c28aaSamw caller_context_t *); 1117c478bd9Sstevel@tonic-gate static int do_nfs3readdir(vnode_t *, rddir_cache *, cred_t *); 1127c478bd9Sstevel@tonic-gate static void nfs3readdir(vnode_t *, rddir_cache *, cred_t *); 1137c478bd9Sstevel@tonic-gate static void nfs3readdirplus(vnode_t *, rddir_cache *, cred_t *); 1147c478bd9Sstevel@tonic-gate static int nfs3_bio(struct buf *, stable_how *, cred_t *); 1157c478bd9Sstevel@tonic-gate static int nfs3_getapage(vnode_t *, u_offset_t, size_t, uint_t *, 1167c478bd9Sstevel@tonic-gate page_t *[], size_t, struct seg *, caddr_t, 1177c478bd9Sstevel@tonic-gate enum seg_rw, cred_t *); 1187c478bd9Sstevel@tonic-gate static void nfs3_readahead(vnode_t *, u_offset_t, caddr_t, struct seg *, 1197c478bd9Sstevel@tonic-gate cred_t *); 1207c478bd9Sstevel@tonic-gate static int nfs3_sync_putapage(vnode_t *, page_t *, u_offset_t, size_t, 1217c478bd9Sstevel@tonic-gate int, cred_t *); 1227c478bd9Sstevel@tonic-gate static int nfs3_sync_pageio(vnode_t *, page_t *, u_offset_t, size_t, 1237c478bd9Sstevel@tonic-gate int, cred_t *); 1247c478bd9Sstevel@tonic-gate static int nfs3_commit(vnode_t *, offset3, count3, cred_t *); 1257c478bd9Sstevel@tonic-gate static void nfs3_set_mod(vnode_t *); 1267c478bd9Sstevel@tonic-gate static void nfs3_get_commit(vnode_t *); 1277c478bd9Sstevel@tonic-gate static void nfs3_get_commit_range(vnode_t *, u_offset_t, size_t); 1287c478bd9Sstevel@tonic-gate static int nfs3_putpage_commit(vnode_t *, offset_t, size_t, cred_t *); 1297c478bd9Sstevel@tonic-gate static int nfs3_commit_vp(vnode_t *, u_offset_t, size_t, cred_t *); 1307c478bd9Sstevel@tonic-gate static int nfs3_sync_commit(vnode_t *, page_t *, offset3, count3, 1317c478bd9Sstevel@tonic-gate cred_t *); 1327c478bd9Sstevel@tonic-gate static void nfs3_async_commit(vnode_t *, page_t *, offset3, count3, 1337c478bd9Sstevel@tonic-gate cred_t *); 1347c478bd9Sstevel@tonic-gate static void nfs3_delmap_callback(struct as *, void *, uint_t); 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate /* 1377c478bd9Sstevel@tonic-gate * Error flags used to pass information about certain special errors 1387c478bd9Sstevel@tonic-gate * which need to be handled specially. 1397c478bd9Sstevel@tonic-gate */ 1407c478bd9Sstevel@tonic-gate #define NFS_EOF -98 1417c478bd9Sstevel@tonic-gate #define NFS_VERF_MISMATCH -97 1427c478bd9Sstevel@tonic-gate 1437c478bd9Sstevel@tonic-gate /* ALIGN64 aligns the given buffer and adjust buffer size to 64 bit */ 1447c478bd9Sstevel@tonic-gate #define ALIGN64(x, ptr, sz) \ 1457c478bd9Sstevel@tonic-gate x = ((uintptr_t)(ptr)) & (sizeof (uint64_t) - 1); \ 1467c478bd9Sstevel@tonic-gate if (x) { \ 1477c478bd9Sstevel@tonic-gate x = sizeof (uint64_t) - (x); \ 1487c478bd9Sstevel@tonic-gate sz -= (x); \ 1497c478bd9Sstevel@tonic-gate ptr += (x); \ 1507c478bd9Sstevel@tonic-gate } 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* 1537c478bd9Sstevel@tonic-gate * These are the vnode ops routines which implement the vnode interface to 1547c478bd9Sstevel@tonic-gate * the networked file system. These routines just take their parameters, 1557c478bd9Sstevel@tonic-gate * make them look networkish by putting the right info into interface structs, 1567c478bd9Sstevel@tonic-gate * and then calling the appropriate remote routine(s) to do the work. 1577c478bd9Sstevel@tonic-gate * 1587c478bd9Sstevel@tonic-gate * Note on directory name lookup cacheing: If we detect a stale fhandle, 1597c478bd9Sstevel@tonic-gate * we purge the directory cache relative to that vnode. This way, the 1607c478bd9Sstevel@tonic-gate * user won't get burned by the cache repeatedly. See <nfs/rnode.h> for 1617c478bd9Sstevel@tonic-gate * more details on rnode locking. 1627c478bd9Sstevel@tonic-gate */ 1637c478bd9Sstevel@tonic-gate 164da6c28aaSamw static int nfs3_open(vnode_t **, int, cred_t *, caller_context_t *); 165da6c28aaSamw static int nfs3_close(vnode_t *, int, int, offset_t, cred_t *, 166da6c28aaSamw caller_context_t *); 1677c478bd9Sstevel@tonic-gate static int nfs3_read(vnode_t *, struct uio *, int, cred_t *, 1687c478bd9Sstevel@tonic-gate caller_context_t *); 1697c478bd9Sstevel@tonic-gate static int nfs3_write(vnode_t *, struct uio *, int, cred_t *, 1707c478bd9Sstevel@tonic-gate caller_context_t *); 171da6c28aaSamw static int nfs3_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *, 172da6c28aaSamw caller_context_t *); 173da6c28aaSamw static int nfs3_getattr(vnode_t *, struct vattr *, int, cred_t *, 174da6c28aaSamw caller_context_t *); 1757c478bd9Sstevel@tonic-gate static int nfs3_setattr(vnode_t *, struct vattr *, int, cred_t *, 1767c478bd9Sstevel@tonic-gate caller_context_t *); 177da6c28aaSamw static int nfs3_access(vnode_t *, int, int, cred_t *, caller_context_t *); 178da6c28aaSamw static int nfs3_readlink(vnode_t *, struct uio *, cred_t *, 179da6c28aaSamw caller_context_t *); 180da6c28aaSamw static int nfs3_fsync(vnode_t *, int, cred_t *, caller_context_t *); 181da6c28aaSamw static void nfs3_inactive(vnode_t *, cred_t *, caller_context_t *); 1827c478bd9Sstevel@tonic-gate static int nfs3_lookup(vnode_t *, char *, vnode_t **, 183da6c28aaSamw struct pathname *, int, vnode_t *, cred_t *, 184da6c28aaSamw caller_context_t *, int *, pathname_t *); 1857c478bd9Sstevel@tonic-gate static int nfs3_create(vnode_t *, char *, struct vattr *, enum vcexcl, 186da6c28aaSamw int, vnode_t **, cred_t *, int, caller_context_t *, 187da6c28aaSamw vsecattr_t *); 188da6c28aaSamw static int nfs3_remove(vnode_t *, char *, cred_t *, caller_context_t *, 189da6c28aaSamw int); 190da6c28aaSamw static int nfs3_link(vnode_t *, vnode_t *, char *, cred_t *, 191da6c28aaSamw caller_context_t *, int); 192da6c28aaSamw static int nfs3_rename(vnode_t *, char *, vnode_t *, char *, cred_t *, 193da6c28aaSamw caller_context_t *, int); 194da6c28aaSamw static int nfs3_mkdir(vnode_t *, char *, struct vattr *, vnode_t **, 195da6c28aaSamw cred_t *, caller_context_t *, int, vsecattr_t *); 196da6c28aaSamw static int nfs3_rmdir(vnode_t *, char *, vnode_t *, cred_t *, 197da6c28aaSamw caller_context_t *, int); 1987c478bd9Sstevel@tonic-gate static int nfs3_symlink(vnode_t *, char *, struct vattr *, char *, 199da6c28aaSamw cred_t *, caller_context_t *, int); 200da6c28aaSamw static int nfs3_readdir(vnode_t *, struct uio *, cred_t *, int *, 201da6c28aaSamw caller_context_t *, int); 202da6c28aaSamw static int nfs3_fid(vnode_t *, fid_t *, caller_context_t *); 2037c478bd9Sstevel@tonic-gate static int nfs3_rwlock(vnode_t *, int, caller_context_t *); 2047c478bd9Sstevel@tonic-gate static void nfs3_rwunlock(vnode_t *, int, caller_context_t *); 205da6c28aaSamw static int nfs3_seek(vnode_t *, offset_t, offset_t *, caller_context_t *); 2067c478bd9Sstevel@tonic-gate static int nfs3_getpage(vnode_t *, offset_t, size_t, uint_t *, 2077c478bd9Sstevel@tonic-gate page_t *[], size_t, struct seg *, caddr_t, 208da6c28aaSamw enum seg_rw, cred_t *, caller_context_t *); 209da6c28aaSamw static int nfs3_putpage(vnode_t *, offset_t, size_t, int, cred_t *, 210da6c28aaSamw caller_context_t *); 211da6c28aaSamw static int nfs3_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t, 212da6c28aaSamw uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *); 213da6c28aaSamw static int nfs3_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, 214da6c28aaSamw uchar_t, uchar_t, uint_t, cred_t *, caller_context_t *); 2157c478bd9Sstevel@tonic-gate static int nfs3_frlock(vnode_t *, int, struct flock64 *, int, offset_t, 216da6c28aaSamw struct flk_callback *, cred_t *, caller_context_t *); 2177c478bd9Sstevel@tonic-gate static int nfs3_space(vnode_t *, int, struct flock64 *, int, offset_t, 2187c478bd9Sstevel@tonic-gate cred_t *, caller_context_t *); 219da6c28aaSamw static int nfs3_realvp(vnode_t *, vnode_t **, caller_context_t *); 220da6c28aaSamw static int nfs3_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t, 221da6c28aaSamw uint_t, uint_t, uint_t, cred_t *, caller_context_t *); 222da6c28aaSamw static int nfs3_pathconf(vnode_t *, int, ulong_t *, cred_t *, 223da6c28aaSamw caller_context_t *); 2247c478bd9Sstevel@tonic-gate static int nfs3_pageio(vnode_t *, page_t *, u_offset_t, size_t, int, 225da6c28aaSamw cred_t *, caller_context_t *); 226da6c28aaSamw static void nfs3_dispose(vnode_t *, page_t *, int, int, cred_t *, 227da6c28aaSamw caller_context_t *); 228da6c28aaSamw static int nfs3_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 229da6c28aaSamw caller_context_t *); 230da6c28aaSamw static int nfs3_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *, 231da6c28aaSamw caller_context_t *); 232da6c28aaSamw static int nfs3_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *, 233da6c28aaSamw caller_context_t *); 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate struct vnodeops *nfs3_vnodeops; 2367c478bd9Sstevel@tonic-gate 2377c478bd9Sstevel@tonic-gate const fs_operation_def_t nfs3_vnodeops_template[] = { 238aa59c4cbSrsb VOPNAME_OPEN, { .vop_open = nfs3_open }, 239aa59c4cbSrsb VOPNAME_CLOSE, { .vop_close = nfs3_close }, 240aa59c4cbSrsb VOPNAME_READ, { .vop_read = nfs3_read }, 241aa59c4cbSrsb VOPNAME_WRITE, { .vop_write = nfs3_write }, 242aa59c4cbSrsb VOPNAME_IOCTL, { .vop_ioctl = nfs3_ioctl }, 243aa59c4cbSrsb VOPNAME_GETATTR, { .vop_getattr = nfs3_getattr }, 244aa59c4cbSrsb VOPNAME_SETATTR, { .vop_setattr = nfs3_setattr }, 245aa59c4cbSrsb VOPNAME_ACCESS, { .vop_access = nfs3_access }, 246aa59c4cbSrsb VOPNAME_LOOKUP, { .vop_lookup = nfs3_lookup }, 247aa59c4cbSrsb VOPNAME_CREATE, { .vop_create = nfs3_create }, 248aa59c4cbSrsb VOPNAME_REMOVE, { .vop_remove = nfs3_remove }, 249aa59c4cbSrsb VOPNAME_LINK, { .vop_link = nfs3_link }, 250aa59c4cbSrsb VOPNAME_RENAME, { .vop_rename = nfs3_rename }, 251aa59c4cbSrsb VOPNAME_MKDIR, { .vop_mkdir = nfs3_mkdir }, 252aa59c4cbSrsb VOPNAME_RMDIR, { .vop_rmdir = nfs3_rmdir }, 253aa59c4cbSrsb VOPNAME_READDIR, { .vop_readdir = nfs3_readdir }, 254aa59c4cbSrsb VOPNAME_SYMLINK, { .vop_symlink = nfs3_symlink }, 255aa59c4cbSrsb VOPNAME_READLINK, { .vop_readlink = nfs3_readlink }, 256aa59c4cbSrsb VOPNAME_FSYNC, { .vop_fsync = nfs3_fsync }, 257aa59c4cbSrsb VOPNAME_INACTIVE, { .vop_inactive = nfs3_inactive }, 258aa59c4cbSrsb VOPNAME_FID, { .vop_fid = nfs3_fid }, 259aa59c4cbSrsb VOPNAME_RWLOCK, { .vop_rwlock = nfs3_rwlock }, 260aa59c4cbSrsb VOPNAME_RWUNLOCK, { .vop_rwunlock = nfs3_rwunlock }, 261aa59c4cbSrsb VOPNAME_SEEK, { .vop_seek = nfs3_seek }, 262aa59c4cbSrsb VOPNAME_FRLOCK, { .vop_frlock = nfs3_frlock }, 263aa59c4cbSrsb VOPNAME_SPACE, { .vop_space = nfs3_space }, 264aa59c4cbSrsb VOPNAME_REALVP, { .vop_realvp = nfs3_realvp }, 265aa59c4cbSrsb VOPNAME_GETPAGE, { .vop_getpage = nfs3_getpage }, 266aa59c4cbSrsb VOPNAME_PUTPAGE, { .vop_putpage = nfs3_putpage }, 267aa59c4cbSrsb VOPNAME_MAP, { .vop_map = nfs3_map }, 268aa59c4cbSrsb VOPNAME_ADDMAP, { .vop_addmap = nfs3_addmap }, 269aa59c4cbSrsb VOPNAME_DELMAP, { .vop_delmap = nfs3_delmap }, 270aa59c4cbSrsb /* no separate nfs3_dump */ 271aa59c4cbSrsb VOPNAME_DUMP, { .vop_dump = nfs_dump }, 272aa59c4cbSrsb VOPNAME_PATHCONF, { .vop_pathconf = nfs3_pathconf }, 273aa59c4cbSrsb VOPNAME_PAGEIO, { .vop_pageio = nfs3_pageio }, 274aa59c4cbSrsb VOPNAME_DISPOSE, { .vop_dispose = nfs3_dispose }, 275aa59c4cbSrsb VOPNAME_SETSECATTR, { .vop_setsecattr = nfs3_setsecattr }, 276aa59c4cbSrsb VOPNAME_GETSECATTR, { .vop_getsecattr = nfs3_getsecattr }, 277aa59c4cbSrsb VOPNAME_SHRLOCK, { .vop_shrlock = nfs3_shrlock }, 278df2381bfSpraks VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2797c478bd9Sstevel@tonic-gate NULL, NULL 2807c478bd9Sstevel@tonic-gate }; 2817c478bd9Sstevel@tonic-gate 2827c478bd9Sstevel@tonic-gate /* 2837c478bd9Sstevel@tonic-gate * XXX: This is referenced in modstubs.s 2847c478bd9Sstevel@tonic-gate */ 2857c478bd9Sstevel@tonic-gate struct vnodeops * 2867c478bd9Sstevel@tonic-gate nfs3_getvnodeops(void) 2877c478bd9Sstevel@tonic-gate { 2887c478bd9Sstevel@tonic-gate return (nfs3_vnodeops); 2897c478bd9Sstevel@tonic-gate } 2907c478bd9Sstevel@tonic-gate 2917c478bd9Sstevel@tonic-gate /* ARGSUSED */ 2927c478bd9Sstevel@tonic-gate static int 293da6c28aaSamw nfs3_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 2947c478bd9Sstevel@tonic-gate { 2957c478bd9Sstevel@tonic-gate int error; 2967c478bd9Sstevel@tonic-gate struct vattr va; 2977c478bd9Sstevel@tonic-gate rnode_t *rp; 2987c478bd9Sstevel@tonic-gate vnode_t *vp; 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate vp = *vpp; 301108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 3027c478bd9Sstevel@tonic-gate return (EIO); 3037c478bd9Sstevel@tonic-gate rp = VTOR(vp); 3047c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 3057c478bd9Sstevel@tonic-gate if (rp->r_cred == NULL) { 3067c478bd9Sstevel@tonic-gate crhold(cr); 3077c478bd9Sstevel@tonic-gate rp->r_cred = cr; 3087c478bd9Sstevel@tonic-gate } 3097c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate /* 3127c478bd9Sstevel@tonic-gate * If there is no cached data or if close-to-open 3137c478bd9Sstevel@tonic-gate * consistency checking is turned off, we can avoid 3147c478bd9Sstevel@tonic-gate * the over the wire getattr. Otherwise, if the 3157c478bd9Sstevel@tonic-gate * file system is mounted readonly, then just verify 3167c478bd9Sstevel@tonic-gate * the caches are up to date using the normal mechanism. 3177c478bd9Sstevel@tonic-gate * Else, if the file is not mmap'd, then just mark 3187c478bd9Sstevel@tonic-gate * the attributes as timed out. They will be refreshed 3197c478bd9Sstevel@tonic-gate * and the caches validated prior to being used. 3207c478bd9Sstevel@tonic-gate * Else, the file system is mounted writeable so 3217c478bd9Sstevel@tonic-gate * force an over the wire GETATTR in order to ensure 3227c478bd9Sstevel@tonic-gate * that all cached data is valid. 3237c478bd9Sstevel@tonic-gate */ 3247c478bd9Sstevel@tonic-gate if (vp->v_count > 1 || 3257c478bd9Sstevel@tonic-gate ((vn_has_cached_data(vp) || HAVE_RDDIR_CACHE(rp)) && 3267c478bd9Sstevel@tonic-gate !(VTOMI(vp)->mi_flags & MI_NOCTO))) { 3277c478bd9Sstevel@tonic-gate if (vn_is_readonly(vp)) 3287c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(vp, cr); 3297c478bd9Sstevel@tonic-gate else if (rp->r_mapcnt == 0 && vp->v_count == 1) { 3307c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 3317c478bd9Sstevel@tonic-gate error = 0; 3327c478bd9Sstevel@tonic-gate } else { 3337c478bd9Sstevel@tonic-gate va.va_mask = AT_ALL; 3347c478bd9Sstevel@tonic-gate error = nfs3_getattr_otw(vp, &va, cr); 3357c478bd9Sstevel@tonic-gate } 3367c478bd9Sstevel@tonic-gate } else 3377c478bd9Sstevel@tonic-gate error = 0; 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate return (error); 3407c478bd9Sstevel@tonic-gate } 3417c478bd9Sstevel@tonic-gate 342da6c28aaSamw /* ARGSUSED */ 3437c478bd9Sstevel@tonic-gate static int 344da6c28aaSamw nfs3_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 345da6c28aaSamw caller_context_t *ct) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate rnode_t *rp; 3487c478bd9Sstevel@tonic-gate int error; 3497c478bd9Sstevel@tonic-gate struct vattr va; 3507c478bd9Sstevel@tonic-gate 3517c478bd9Sstevel@tonic-gate /* 3527c478bd9Sstevel@tonic-gate * zone_enter(2) prevents processes from changing zones with NFS files 3537c478bd9Sstevel@tonic-gate * open; if we happen to get here from the wrong zone we can't do 3547c478bd9Sstevel@tonic-gate * anything over the wire. 3557c478bd9Sstevel@tonic-gate */ 356108322fbScarlsonj if (VTOMI(vp)->mi_zone != nfs_zone()) { 3577c478bd9Sstevel@tonic-gate /* 3587c478bd9Sstevel@tonic-gate * We could attempt to clean up locks, except we're sure 3597c478bd9Sstevel@tonic-gate * that the current process didn't acquire any locks on 3607c478bd9Sstevel@tonic-gate * the file: any attempt to lock a file belong to another zone 3617c478bd9Sstevel@tonic-gate * will fail, and one can't lock an NFS file and then change 3627c478bd9Sstevel@tonic-gate * zones, as that fails too. 3637c478bd9Sstevel@tonic-gate * 3647c478bd9Sstevel@tonic-gate * Returning an error here is the sane thing to do. A 3657c478bd9Sstevel@tonic-gate * subsequent call to VN_RELE() which translates to a 3667c478bd9Sstevel@tonic-gate * nfs3_inactive() will clean up state: if the zone of the 3677c478bd9Sstevel@tonic-gate * vnode's origin is still alive and kicking, an async worker 3687c478bd9Sstevel@tonic-gate * thread will handle the request (from the correct zone), and 3697c478bd9Sstevel@tonic-gate * everything (minus the commit and final nfs3_getattr_otw() 3707c478bd9Sstevel@tonic-gate * call) should be OK. If the zone is going away 3717c478bd9Sstevel@tonic-gate * nfs_async_inactive() will throw away cached pages inline. 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate return (EIO); 3747c478bd9Sstevel@tonic-gate } 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate /* 3777c478bd9Sstevel@tonic-gate * If we are using local locking for this filesystem, then 3787c478bd9Sstevel@tonic-gate * release all of the SYSV style record locks. Otherwise, 3797c478bd9Sstevel@tonic-gate * we are doing network locking and we need to release all 3807c478bd9Sstevel@tonic-gate * of the network locks. All of the locks held by this 3817c478bd9Sstevel@tonic-gate * process on this file are released no matter what the 3827c478bd9Sstevel@tonic-gate * incoming reference count is. 3837c478bd9Sstevel@tonic-gate */ 3847c478bd9Sstevel@tonic-gate if (VTOMI(vp)->mi_flags & MI_LLOCK) { 3857c478bd9Sstevel@tonic-gate cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 3867c478bd9Sstevel@tonic-gate cleanshares(vp, ttoproc(curthread)->p_pid); 3877c478bd9Sstevel@tonic-gate } else 3887c478bd9Sstevel@tonic-gate nfs_lockrelease(vp, flag, offset, cr); 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate if (count > 1) 3917c478bd9Sstevel@tonic-gate return (0); 3927c478bd9Sstevel@tonic-gate 3937c478bd9Sstevel@tonic-gate /* 3947c478bd9Sstevel@tonic-gate * If the file has been `unlinked', then purge the 3957c478bd9Sstevel@tonic-gate * DNLC so that this vnode will get reycled quicker 3967c478bd9Sstevel@tonic-gate * and the .nfs* file on the server will get removed. 3977c478bd9Sstevel@tonic-gate */ 3987c478bd9Sstevel@tonic-gate rp = VTOR(vp); 3997c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) 4007c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp); 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate /* 4037c478bd9Sstevel@tonic-gate * If the file was open for write and there are pages, 4047c478bd9Sstevel@tonic-gate * then if the file system was mounted using the "no-close- 4057c478bd9Sstevel@tonic-gate * to-open" semantics, then start an asynchronous flush 4067c478bd9Sstevel@tonic-gate * of the all of the pages in the file. 4077c478bd9Sstevel@tonic-gate * else the file system was not mounted using the "no-close- 4087c478bd9Sstevel@tonic-gate * to-open" semantics, then do a synchronous flush and 4097c478bd9Sstevel@tonic-gate * commit of all of the dirty and uncommitted pages. 4107c478bd9Sstevel@tonic-gate * 4117c478bd9Sstevel@tonic-gate * The asynchronous flush of the pages in the "nocto" path 4127c478bd9Sstevel@tonic-gate * mostly just associates a cred pointer with the rnode so 4137c478bd9Sstevel@tonic-gate * writes which happen later will have a better chance of 4147c478bd9Sstevel@tonic-gate * working. It also starts the data being written to the 4157c478bd9Sstevel@tonic-gate * server, but without unnecessarily delaying the application. 4167c478bd9Sstevel@tonic-gate */ 4177c478bd9Sstevel@tonic-gate if ((flag & FWRITE) && vn_has_cached_data(vp)) { 4187c478bd9Sstevel@tonic-gate if (VTOMI(vp)->mi_flags & MI_NOCTO) { 419da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, B_ASYNC, 420da6c28aaSamw cr, ct); 4217c478bd9Sstevel@tonic-gate if (error == EAGAIN) 4227c478bd9Sstevel@tonic-gate error = 0; 4237c478bd9Sstevel@tonic-gate } else 4247c478bd9Sstevel@tonic-gate error = nfs3_putpage_commit(vp, (offset_t)0, 0, cr); 4257c478bd9Sstevel@tonic-gate if (!error) { 4267c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 4277c478bd9Sstevel@tonic-gate error = rp->r_error; 4287c478bd9Sstevel@tonic-gate rp->r_error = 0; 4297c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4307c478bd9Sstevel@tonic-gate } 4317c478bd9Sstevel@tonic-gate } else { 4327c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 4337c478bd9Sstevel@tonic-gate error = rp->r_error; 4347c478bd9Sstevel@tonic-gate rp->r_error = 0; 4357c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 4367c478bd9Sstevel@tonic-gate } 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate /* 4397c478bd9Sstevel@tonic-gate * If RWRITEATTR is set, then issue an over the wire GETATTR to 4407c478bd9Sstevel@tonic-gate * refresh the attribute cache with a set of attributes which 4417c478bd9Sstevel@tonic-gate * weren't returned from a WRITE. This will enable the close- 4427c478bd9Sstevel@tonic-gate * to-open processing to work. 4437c478bd9Sstevel@tonic-gate */ 4447c478bd9Sstevel@tonic-gate if (rp->r_flags & RWRITEATTR) 4457c478bd9Sstevel@tonic-gate (void) nfs3_getattr_otw(vp, &va, cr); 4467c478bd9Sstevel@tonic-gate 4477c478bd9Sstevel@tonic-gate return (error); 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate /* ARGSUSED */ 4517c478bd9Sstevel@tonic-gate static int 4527c478bd9Sstevel@tonic-gate nfs3_directio_read(vnode_t *vp, struct uio *uiop, cred_t *cr) 4537c478bd9Sstevel@tonic-gate { 4547c478bd9Sstevel@tonic-gate mntinfo_t *mi; 4557c478bd9Sstevel@tonic-gate READ3args args; 4567c478bd9Sstevel@tonic-gate READ3uiores res; 4577c478bd9Sstevel@tonic-gate int tsize; 4587c478bd9Sstevel@tonic-gate offset_t offset; 4597c478bd9Sstevel@tonic-gate ssize_t count; 4607c478bd9Sstevel@tonic-gate int error; 4617c478bd9Sstevel@tonic-gate int douprintf; 4627c478bd9Sstevel@tonic-gate failinfo_t fi; 4637c478bd9Sstevel@tonic-gate char *sv_hostname; 4647c478bd9Sstevel@tonic-gate 4657c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 466108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 4677c478bd9Sstevel@tonic-gate sv_hostname = VTOR(vp)->r_server->sv_hostname; 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate douprintf = 1; 4707c478bd9Sstevel@tonic-gate args.file = *VTOFH3(vp); 4717c478bd9Sstevel@tonic-gate fi.vp = vp; 4727c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.file; 4737c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 4747c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 4757c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate res.uiop = uiop; 4787c478bd9Sstevel@tonic-gate 4790a701b1eSRobert Gordon res.wlist = NULL; 4800a701b1eSRobert Gordon 4817c478bd9Sstevel@tonic-gate offset = uiop->uio_loffset; 4827c478bd9Sstevel@tonic-gate count = uiop->uio_resid; 4837c478bd9Sstevel@tonic-gate 4847c478bd9Sstevel@tonic-gate do { 4857c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 4867c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 4877c478bd9Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 4887c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 4897c478bd9Sstevel@tonic-gate } 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate do { 4927c478bd9Sstevel@tonic-gate tsize = MIN(mi->mi_tsize, count); 4937c478bd9Sstevel@tonic-gate args.offset = (offset3)offset; 4947c478bd9Sstevel@tonic-gate args.count = (count3)tsize; 4957c478bd9Sstevel@tonic-gate res.size = (uint_t)tsize; 4960a701b1eSRobert Gordon args.res_uiop = uiop; 4970a701b1eSRobert Gordon args.res_data_val_alt = NULL; 4980a701b1eSRobert Gordon 4997c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_READ, 5007c478bd9Sstevel@tonic-gate xdr_READ3args, (caddr_t)&args, 5017c478bd9Sstevel@tonic-gate xdr_READ3uiores, (caddr_t)&res, cr, 5027c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, &fi); 5037c478bd9Sstevel@tonic-gate } while (error == ENFS_TRYAGAIN); 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 5067c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 5077c478bd9Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 5087c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate if (error) 5127c478bd9Sstevel@tonic-gate return (error); 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 5157c478bd9Sstevel@tonic-gate if (error) 5167c478bd9Sstevel@tonic-gate return (error); 5177c478bd9Sstevel@tonic-gate 5187c478bd9Sstevel@tonic-gate if (res.count != res.size) { 5197c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 5207c478bd9Sstevel@tonic-gate "nfs3_directio_read: server %s returned incorrect amount", 5217c478bd9Sstevel@tonic-gate sv_hostname); 5227c478bd9Sstevel@tonic-gate return (EIO); 5237c478bd9Sstevel@tonic-gate } 5247c478bd9Sstevel@tonic-gate count -= res.count; 5257c478bd9Sstevel@tonic-gate offset += res.count; 5267c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 5277c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 5287c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->reads++; 5297c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->nread += res.count; 5307c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 5337c478bd9Sstevel@tonic-gate } while (count && !res.eof); 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate return (0); 5367c478bd9Sstevel@tonic-gate } 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate /* ARGSUSED */ 5397c478bd9Sstevel@tonic-gate static int 5407c478bd9Sstevel@tonic-gate nfs3_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, 5417c478bd9Sstevel@tonic-gate caller_context_t *ct) 5427c478bd9Sstevel@tonic-gate { 5437c478bd9Sstevel@tonic-gate rnode_t *rp; 5447c478bd9Sstevel@tonic-gate u_offset_t off; 5457c478bd9Sstevel@tonic-gate offset_t diff; 5467c478bd9Sstevel@tonic-gate int on; 5477c478bd9Sstevel@tonic-gate size_t n; 5487c478bd9Sstevel@tonic-gate caddr_t base; 5497c478bd9Sstevel@tonic-gate uint_t flags; 5507c478bd9Sstevel@tonic-gate int error = 0; 5517c478bd9Sstevel@tonic-gate mntinfo_t *mi; 5527c478bd9Sstevel@tonic-gate 5537c478bd9Sstevel@tonic-gate rp = VTOR(vp); 5547c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 5577c478bd9Sstevel@tonic-gate 558108322fbScarlsonj if (nfs_zone() != mi->mi_zone) 5597c478bd9Sstevel@tonic-gate return (EIO); 5607c478bd9Sstevel@tonic-gate 5617c478bd9Sstevel@tonic-gate if (vp->v_type != VREG) 5627c478bd9Sstevel@tonic-gate return (EISDIR); 5637c478bd9Sstevel@tonic-gate 5647c478bd9Sstevel@tonic-gate if (uiop->uio_resid == 0) 5657c478bd9Sstevel@tonic-gate return (0); 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate if (uiop->uio_loffset < 0 || uiop->uio_loffset + uiop->uio_resid < 0) 5687c478bd9Sstevel@tonic-gate return (EINVAL); 5697c478bd9Sstevel@tonic-gate 5707c478bd9Sstevel@tonic-gate /* 5717c478bd9Sstevel@tonic-gate * Bypass VM if caching has been disabled (e.g., locking) or if 5727c478bd9Sstevel@tonic-gate * using client-side direct I/O and the file is not mmap'd and 5737c478bd9Sstevel@tonic-gate * there are no cached pages. 5747c478bd9Sstevel@tonic-gate */ 5757c478bd9Sstevel@tonic-gate if ((vp->v_flag & VNOCACHE) || 5767c478bd9Sstevel@tonic-gate (((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO)) && 5771384c586SDeepak Honnalli rp->r_mapcnt == 0 && rp->r_inmap == 0 && 5781384c586SDeepak Honnalli !vn_has_cached_data(vp))) { 5797c478bd9Sstevel@tonic-gate return (nfs3_directio_read(vp, uiop, cr)); 5807c478bd9Sstevel@tonic-gate } 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate do { 5837c478bd9Sstevel@tonic-gate off = uiop->uio_loffset & MAXBMASK; /* mapping offset */ 5847c478bd9Sstevel@tonic-gate on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */ 5857c478bd9Sstevel@tonic-gate n = MIN(MAXBSIZE - on, uiop->uio_resid); 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(vp, cr); 5887c478bd9Sstevel@tonic-gate if (error) 5897c478bd9Sstevel@tonic-gate break; 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 5925e4df02aSvv149972 while (rp->r_flags & RINCACHEPURGE) { 5935e4df02aSvv149972 if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) { 5945e4df02aSvv149972 mutex_exit(&rp->r_statelock); 5955e4df02aSvv149972 return (EINTR); 5965e4df02aSvv149972 } 5975e4df02aSvv149972 } 5987c478bd9Sstevel@tonic-gate diff = rp->r_size - uiop->uio_loffset; 5997c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 6007c478bd9Sstevel@tonic-gate if (diff <= 0) 6017c478bd9Sstevel@tonic-gate break; 6027c478bd9Sstevel@tonic-gate if (diff < n) 6037c478bd9Sstevel@tonic-gate n = (size_t)diff; 6047c478bd9Sstevel@tonic-gate 605a5652762Spraks if (vpm_enable) { 606a5652762Spraks /* 607a5652762Spraks * Copy data. 608a5652762Spraks */ 609a5652762Spraks error = vpm_data_copy(vp, off + on, n, uiop, 610a5652762Spraks 1, NULL, 0, S_READ); 611a5652762Spraks } else { 612a5652762Spraks base = segmap_getmapflt(segkmap, vp, off + on, n, 1, 613a5652762Spraks S_READ); 6147c478bd9Sstevel@tonic-gate 6157c478bd9Sstevel@tonic-gate error = uiomove(base + on, n, UIO_READ, uiop); 616a5652762Spraks } 6177c478bd9Sstevel@tonic-gate 6187c478bd9Sstevel@tonic-gate if (!error) { 6197c478bd9Sstevel@tonic-gate /* 6207c478bd9Sstevel@tonic-gate * If read a whole block or read to eof, 6217c478bd9Sstevel@tonic-gate * won't need this buffer again soon. 6227c478bd9Sstevel@tonic-gate */ 6237c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 6247c478bd9Sstevel@tonic-gate if (n + on == MAXBSIZE || 6257c478bd9Sstevel@tonic-gate uiop->uio_loffset == rp->r_size) 6267c478bd9Sstevel@tonic-gate flags = SM_DONTNEED; 6277c478bd9Sstevel@tonic-gate else 6287c478bd9Sstevel@tonic-gate flags = 0; 6297c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 630a5652762Spraks if (vpm_enable) { 631a5652762Spraks error = vpm_sync_pages(vp, off, n, flags); 632a5652762Spraks } else { 6337c478bd9Sstevel@tonic-gate error = segmap_release(segkmap, base, flags); 634a5652762Spraks } 635a5652762Spraks } else { 636a5652762Spraks if (vpm_enable) { 637a5652762Spraks (void) vpm_sync_pages(vp, off, n, 0); 638a5652762Spraks } else { 6397c478bd9Sstevel@tonic-gate (void) segmap_release(segkmap, base, 0); 640a5652762Spraks } 641a5652762Spraks } 6427c478bd9Sstevel@tonic-gate } while (!error && uiop->uio_resid > 0); 6437c478bd9Sstevel@tonic-gate 6447c478bd9Sstevel@tonic-gate return (error); 6457c478bd9Sstevel@tonic-gate } 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate /* ARGSUSED */ 6487c478bd9Sstevel@tonic-gate static int 6497c478bd9Sstevel@tonic-gate nfs3_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr, 6507c478bd9Sstevel@tonic-gate caller_context_t *ct) 6517c478bd9Sstevel@tonic-gate { 6527c478bd9Sstevel@tonic-gate rlim64_t limit = uiop->uio_llimit; 6537c478bd9Sstevel@tonic-gate rnode_t *rp; 6547c478bd9Sstevel@tonic-gate u_offset_t off; 6557c478bd9Sstevel@tonic-gate caddr_t base; 6567c478bd9Sstevel@tonic-gate uint_t flags; 6577c478bd9Sstevel@tonic-gate int remainder; 6587c478bd9Sstevel@tonic-gate size_t n; 6597c478bd9Sstevel@tonic-gate int on; 6607c478bd9Sstevel@tonic-gate int error; 6617c478bd9Sstevel@tonic-gate int resid; 6627c478bd9Sstevel@tonic-gate offset_t offset; 6637c478bd9Sstevel@tonic-gate mntinfo_t *mi; 6647c478bd9Sstevel@tonic-gate uint_t bsize; 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate rp = VTOR(vp); 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate if (vp->v_type != VREG) 6697c478bd9Sstevel@tonic-gate return (EISDIR); 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 672108322fbScarlsonj if (nfs_zone() != mi->mi_zone) 6737c478bd9Sstevel@tonic-gate return (EIO); 6747c478bd9Sstevel@tonic-gate if (uiop->uio_resid == 0) 6757c478bd9Sstevel@tonic-gate return (0); 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate if (ioflag & FAPPEND) { 6787c478bd9Sstevel@tonic-gate struct vattr va; 6797c478bd9Sstevel@tonic-gate 6807c478bd9Sstevel@tonic-gate /* 6817c478bd9Sstevel@tonic-gate * Must serialize if appending. 6827c478bd9Sstevel@tonic-gate */ 6837c478bd9Sstevel@tonic-gate if (nfs_rw_lock_held(&rp->r_rwlock, RW_READER)) { 6847c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_rwlock); 6857c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER, 6867c478bd9Sstevel@tonic-gate INTR(vp))) 6877c478bd9Sstevel@tonic-gate return (EINTR); 6887c478bd9Sstevel@tonic-gate } 6897c478bd9Sstevel@tonic-gate 6907c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 6917c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &va, cr); 6927c478bd9Sstevel@tonic-gate if (error) 6937c478bd9Sstevel@tonic-gate return (error); 6947c478bd9Sstevel@tonic-gate uiop->uio_loffset = va.va_size; 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate offset = uiop->uio_loffset + uiop->uio_resid; 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate if (uiop->uio_loffset < 0 || offset < 0) 7007c478bd9Sstevel@tonic-gate return (EINVAL); 7017c478bd9Sstevel@tonic-gate 7027c478bd9Sstevel@tonic-gate if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 7037c478bd9Sstevel@tonic-gate limit = MAXOFFSET_T; 7047c478bd9Sstevel@tonic-gate 7057c478bd9Sstevel@tonic-gate /* 7067c478bd9Sstevel@tonic-gate * Check to make sure that the process will not exceed 7077c478bd9Sstevel@tonic-gate * its limit on file size. It is okay to write up to 7087c478bd9Sstevel@tonic-gate * the limit, but not beyond. Thus, the write which 7097c478bd9Sstevel@tonic-gate * reaches the limit will be short and the next write 7107c478bd9Sstevel@tonic-gate * will return an error. 7117c478bd9Sstevel@tonic-gate */ 7127c478bd9Sstevel@tonic-gate remainder = 0; 7137c478bd9Sstevel@tonic-gate if (offset > limit) { 7147c478bd9Sstevel@tonic-gate remainder = offset - limit; 7157c478bd9Sstevel@tonic-gate uiop->uio_resid = limit - uiop->uio_loffset; 7167c478bd9Sstevel@tonic-gate if (uiop->uio_resid <= 0) { 7177c478bd9Sstevel@tonic-gate proc_t *p = ttoproc(curthread); 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate uiop->uio_resid += remainder; 7207c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 7217c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 7227c478bd9Sstevel@tonic-gate p->p_rctls, p, RCA_UNSAFE_SIGINFO); 7237c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 7247c478bd9Sstevel@tonic-gate return (EFBIG); 7257c478bd9Sstevel@tonic-gate } 7267c478bd9Sstevel@tonic-gate } 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_READER, INTR(vp))) 7297c478bd9Sstevel@tonic-gate return (EINTR); 7307c478bd9Sstevel@tonic-gate 7317c478bd9Sstevel@tonic-gate /* 7327c478bd9Sstevel@tonic-gate * Bypass VM if caching has been disabled (e.g., locking) or if 7337c478bd9Sstevel@tonic-gate * using client-side direct I/O and the file is not mmap'd and 7347c478bd9Sstevel@tonic-gate * there are no cached pages. 7357c478bd9Sstevel@tonic-gate */ 7367c478bd9Sstevel@tonic-gate if ((vp->v_flag & VNOCACHE) || 7377c478bd9Sstevel@tonic-gate (((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO)) && 7381384c586SDeepak Honnalli rp->r_mapcnt == 0 && rp->r_inmap == 0 && 7391384c586SDeepak Honnalli !vn_has_cached_data(vp))) { 7407c478bd9Sstevel@tonic-gate size_t bufsize; 7417c478bd9Sstevel@tonic-gate int count; 7427c478bd9Sstevel@tonic-gate u_offset_t org_offset; 7437c478bd9Sstevel@tonic-gate stable_how stab_comm; 7447c478bd9Sstevel@tonic-gate 7457c478bd9Sstevel@tonic-gate nfs3_fwrite: 7467c478bd9Sstevel@tonic-gate if (rp->r_flags & RSTALE) { 7477c478bd9Sstevel@tonic-gate resid = uiop->uio_resid; 7487c478bd9Sstevel@tonic-gate offset = uiop->uio_loffset; 7497c478bd9Sstevel@tonic-gate error = rp->r_error; 7508afffe5eSbatschul /* 7518afffe5eSbatschul * A close may have cleared r_error, if so, 7528afffe5eSbatschul * propagate ESTALE error return properly 7538afffe5eSbatschul */ 7548afffe5eSbatschul if (error == 0) 7558afffe5eSbatschul error = ESTALE; 7567c478bd9Sstevel@tonic-gate goto bottom; 7577c478bd9Sstevel@tonic-gate } 7587c478bd9Sstevel@tonic-gate bufsize = MIN(uiop->uio_resid, mi->mi_stsize); 7597c478bd9Sstevel@tonic-gate base = kmem_alloc(bufsize, KM_SLEEP); 7607c478bd9Sstevel@tonic-gate do { 7617c478bd9Sstevel@tonic-gate if (ioflag & FDSYNC) 7627c478bd9Sstevel@tonic-gate stab_comm = DATA_SYNC; 7637c478bd9Sstevel@tonic-gate else 7647c478bd9Sstevel@tonic-gate stab_comm = FILE_SYNC; 7657c478bd9Sstevel@tonic-gate resid = uiop->uio_resid; 7667c478bd9Sstevel@tonic-gate offset = uiop->uio_loffset; 7677c478bd9Sstevel@tonic-gate count = MIN(uiop->uio_resid, bufsize); 7687c478bd9Sstevel@tonic-gate org_offset = uiop->uio_loffset; 7697c478bd9Sstevel@tonic-gate error = uiomove(base, count, UIO_WRITE, uiop); 7707c478bd9Sstevel@tonic-gate if (!error) { 7717c478bd9Sstevel@tonic-gate error = nfs3write(vp, base, org_offset, 7727c478bd9Sstevel@tonic-gate count, cr, &stab_comm); 7737c478bd9Sstevel@tonic-gate } 7747c478bd9Sstevel@tonic-gate } while (!error && uiop->uio_resid > 0); 7757c478bd9Sstevel@tonic-gate kmem_free(base, bufsize); 7767c478bd9Sstevel@tonic-gate goto bottom; 7777c478bd9Sstevel@tonic-gate } 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate 7807c478bd9Sstevel@tonic-gate bsize = vp->v_vfsp->vfs_bsize; 7817c478bd9Sstevel@tonic-gate 7827c478bd9Sstevel@tonic-gate do { 7837c478bd9Sstevel@tonic-gate off = uiop->uio_loffset & MAXBMASK; /* mapping offset */ 7847c478bd9Sstevel@tonic-gate on = uiop->uio_loffset & MAXBOFFSET; /* Relative offset */ 7857c478bd9Sstevel@tonic-gate n = MIN(MAXBSIZE - on, uiop->uio_resid); 7867c478bd9Sstevel@tonic-gate 7877c478bd9Sstevel@tonic-gate resid = uiop->uio_resid; 7887c478bd9Sstevel@tonic-gate offset = uiop->uio_loffset; 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate if (rp->r_flags & RSTALE) { 7917c478bd9Sstevel@tonic-gate error = rp->r_error; 7928afffe5eSbatschul /* 7938afffe5eSbatschul * A close may have cleared r_error, if so, 7948afffe5eSbatschul * propagate ESTALE error return properly 7958afffe5eSbatschul */ 7968afffe5eSbatschul if (error == 0) 7978afffe5eSbatschul error = ESTALE; 7987c478bd9Sstevel@tonic-gate break; 7997c478bd9Sstevel@tonic-gate } 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate /* 8027c478bd9Sstevel@tonic-gate * Don't create dirty pages faster than they 8037c478bd9Sstevel@tonic-gate * can be cleaned so that the system doesn't 8047c478bd9Sstevel@tonic-gate * get imbalanced. If the async queue is 8057c478bd9Sstevel@tonic-gate * maxed out, then wait for it to drain before 8067c478bd9Sstevel@tonic-gate * creating more dirty pages. Also, wait for 8077c478bd9Sstevel@tonic-gate * any threads doing pagewalks in the vop_getattr 8087c478bd9Sstevel@tonic-gate * entry points so that they don't block for 8097c478bd9Sstevel@tonic-gate * long periods. 8107c478bd9Sstevel@tonic-gate */ 8117c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 8127c478bd9Sstevel@tonic-gate while ((mi->mi_max_threads != 0 && 8137c478bd9Sstevel@tonic-gate rp->r_awcount > 2 * mi->mi_max_threads) || 8146edb4230SMarcel Telka rp->r_gcount > 0) { 8156edb4230SMarcel Telka if (INTR(vp)) { 8166edb4230SMarcel Telka klwp_t *lwp = ttolwp(curthread); 8176edb4230SMarcel Telka 8186edb4230SMarcel Telka if (lwp != NULL) 8196edb4230SMarcel Telka lwp->lwp_nostop++; 8206edb4230SMarcel Telka if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) { 8216edb4230SMarcel Telka mutex_exit(&rp->r_statelock); 8226edb4230SMarcel Telka if (lwp != NULL) 8236edb4230SMarcel Telka lwp->lwp_nostop--; 8246edb4230SMarcel Telka error = EINTR; 8256edb4230SMarcel Telka goto bottom; 8266edb4230SMarcel Telka } 8276edb4230SMarcel Telka if (lwp != NULL) 8286edb4230SMarcel Telka lwp->lwp_nostop--; 8296edb4230SMarcel Telka } else 8307c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 8316edb4230SMarcel Telka } 8327c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 8337c478bd9Sstevel@tonic-gate 8346f5f1c63SDonghai Qiao /* 8356f5f1c63SDonghai Qiao * Touch the page and fault it in if it is not in core 8366f5f1c63SDonghai Qiao * before segmap_getmapflt or vpm_data_copy can lock it. 8376f5f1c63SDonghai Qiao * This is to avoid the deadlock if the buffer is mapped 8386f5f1c63SDonghai Qiao * to the same file through mmap which we want to write. 8396f5f1c63SDonghai Qiao */ 8406f5f1c63SDonghai Qiao uio_prefaultpages((long)n, uiop); 8416f5f1c63SDonghai Qiao 842a5652762Spraks if (vpm_enable) { 843a5652762Spraks /* 844a5652762Spraks * It will use kpm mappings, so no need to 845a5652762Spraks * pass an address. 846a5652762Spraks */ 847a5652762Spraks error = writerp(rp, NULL, n, uiop, 0); 848a5652762Spraks } else { 8497c478bd9Sstevel@tonic-gate if (segmap_kpm) { 8507c478bd9Sstevel@tonic-gate int pon = uiop->uio_loffset & PAGEOFFSET; 851a5652762Spraks size_t pn = MIN(PAGESIZE - pon, 852a5652762Spraks uiop->uio_resid); 8537c478bd9Sstevel@tonic-gate int pagecreate; 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 8567c478bd9Sstevel@tonic-gate pagecreate = (pon == 0) && (pn == PAGESIZE || 8577c478bd9Sstevel@tonic-gate uiop->uio_loffset + pn >= rp->r_size); 8587c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 8597c478bd9Sstevel@tonic-gate 8607c478bd9Sstevel@tonic-gate base = segmap_getmapflt(segkmap, vp, off + on, 8617c478bd9Sstevel@tonic-gate pn, !pagecreate, S_WRITE); 8627c478bd9Sstevel@tonic-gate 863a5652762Spraks error = writerp(rp, base + pon, n, uiop, 864a5652762Spraks pagecreate); 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate } else { 8677c478bd9Sstevel@tonic-gate base = segmap_getmapflt(segkmap, vp, off + on, 8687c478bd9Sstevel@tonic-gate n, 0, S_READ); 8697c478bd9Sstevel@tonic-gate error = writerp(rp, base + on, n, uiop, 0); 8707c478bd9Sstevel@tonic-gate } 871a5652762Spraks } 8727c478bd9Sstevel@tonic-gate 8737c478bd9Sstevel@tonic-gate if (!error) { 8747c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_NOAC) 8757c478bd9Sstevel@tonic-gate flags = SM_WRITE; 8767c478bd9Sstevel@tonic-gate else if ((uiop->uio_loffset % bsize) == 0 || 8777c478bd9Sstevel@tonic-gate IS_SWAPVP(vp)) { 8787c478bd9Sstevel@tonic-gate /* 8797c478bd9Sstevel@tonic-gate * Have written a whole block. 8807c478bd9Sstevel@tonic-gate * Start an asynchronous write 8817c478bd9Sstevel@tonic-gate * and mark the buffer to 8827c478bd9Sstevel@tonic-gate * indicate that it won't be 8837c478bd9Sstevel@tonic-gate * needed again soon. 8847c478bd9Sstevel@tonic-gate */ 8857c478bd9Sstevel@tonic-gate flags = SM_WRITE | SM_ASYNC | SM_DONTNEED; 8867c478bd9Sstevel@tonic-gate } else 8877c478bd9Sstevel@tonic-gate flags = 0; 8887c478bd9Sstevel@tonic-gate if ((ioflag & (FSYNC|FDSYNC)) || 8897c478bd9Sstevel@tonic-gate (rp->r_flags & ROUTOFSPACE)) { 8907c478bd9Sstevel@tonic-gate flags &= ~SM_ASYNC; 8917c478bd9Sstevel@tonic-gate flags |= SM_WRITE; 8927c478bd9Sstevel@tonic-gate } 893a5652762Spraks if (vpm_enable) { 894a5652762Spraks error = vpm_sync_pages(vp, off, n, flags); 895a5652762Spraks } else { 8967c478bd9Sstevel@tonic-gate error = segmap_release(segkmap, base, flags); 897a5652762Spraks } 898a5652762Spraks } else { 899a5652762Spraks if (vpm_enable) { 900a5652762Spraks (void) vpm_sync_pages(vp, off, n, 0); 9017c478bd9Sstevel@tonic-gate } else { 9027c478bd9Sstevel@tonic-gate (void) segmap_release(segkmap, base, 0); 903a5652762Spraks } 9047c478bd9Sstevel@tonic-gate /* 9057c478bd9Sstevel@tonic-gate * In the event that we got an access error while 9067c478bd9Sstevel@tonic-gate * faulting in a page for a write-only file just 9077c478bd9Sstevel@tonic-gate * force a write. 9087c478bd9Sstevel@tonic-gate */ 9097c478bd9Sstevel@tonic-gate if (error == EACCES) 9107c478bd9Sstevel@tonic-gate goto nfs3_fwrite; 9117c478bd9Sstevel@tonic-gate } 9127c478bd9Sstevel@tonic-gate } while (!error && uiop->uio_resid > 0); 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate bottom: 9157c478bd9Sstevel@tonic-gate if (error) { 9167c478bd9Sstevel@tonic-gate uiop->uio_resid = resid + remainder; 9177c478bd9Sstevel@tonic-gate uiop->uio_loffset = offset; 9187c478bd9Sstevel@tonic-gate } else 9197c478bd9Sstevel@tonic-gate uiop->uio_resid += remainder; 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_lkserlock); 9227c478bd9Sstevel@tonic-gate 9237c478bd9Sstevel@tonic-gate return (error); 9247c478bd9Sstevel@tonic-gate } 9257c478bd9Sstevel@tonic-gate 9267c478bd9Sstevel@tonic-gate /* 9277c478bd9Sstevel@tonic-gate * Flags are composed of {B_ASYNC, B_INVAL, B_FREE, B_DONTNEED} 9287c478bd9Sstevel@tonic-gate */ 9297c478bd9Sstevel@tonic-gate static int 9307c478bd9Sstevel@tonic-gate nfs3_rdwrlbn(vnode_t *vp, page_t *pp, u_offset_t off, size_t len, 9317c478bd9Sstevel@tonic-gate int flags, cred_t *cr) 9327c478bd9Sstevel@tonic-gate { 9337c478bd9Sstevel@tonic-gate struct buf *bp; 9347c478bd9Sstevel@tonic-gate int error; 9357c478bd9Sstevel@tonic-gate page_t *savepp; 9367c478bd9Sstevel@tonic-gate uchar_t fsdata; 9377c478bd9Sstevel@tonic-gate stable_how stab_comm; 9387c478bd9Sstevel@tonic-gate 939108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 9407c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, len, vp, flags); 9417c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate /* 9447c478bd9Sstevel@tonic-gate * pageio_setup should have set b_addr to 0. This 9457c478bd9Sstevel@tonic-gate * is correct since we want to do I/O on a page 9467c478bd9Sstevel@tonic-gate * boundary. bp_mapin will use this addr to calculate 9477c478bd9Sstevel@tonic-gate * an offset, and then set b_addr to the kernel virtual 9487c478bd9Sstevel@tonic-gate * address it allocated for us. 9497c478bd9Sstevel@tonic-gate */ 9507c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == 0); 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate bp->b_edev = 0; 9537c478bd9Sstevel@tonic-gate bp->b_dev = 0; 9547c478bd9Sstevel@tonic-gate bp->b_lblkno = lbtodb(off); 9557c478bd9Sstevel@tonic-gate bp->b_file = vp; 9567c478bd9Sstevel@tonic-gate bp->b_offset = (offset_t)off; 9577c478bd9Sstevel@tonic-gate bp_mapin(bp); 9587c478bd9Sstevel@tonic-gate 9597c478bd9Sstevel@tonic-gate /* 9607c478bd9Sstevel@tonic-gate * Calculate the desired level of stability to write data 9617c478bd9Sstevel@tonic-gate * on the server and then mark all of the pages to reflect 9627c478bd9Sstevel@tonic-gate * this. 9637c478bd9Sstevel@tonic-gate */ 9647c478bd9Sstevel@tonic-gate if ((flags & (B_WRITE|B_ASYNC)) == (B_WRITE|B_ASYNC) && 9657c478bd9Sstevel@tonic-gate freemem > desfree) { 9667c478bd9Sstevel@tonic-gate stab_comm = UNSTABLE; 9677c478bd9Sstevel@tonic-gate fsdata = C_DELAYCOMMIT; 9687c478bd9Sstevel@tonic-gate } else { 9697c478bd9Sstevel@tonic-gate stab_comm = FILE_SYNC; 9707c478bd9Sstevel@tonic-gate fsdata = C_NOCOMMIT; 9717c478bd9Sstevel@tonic-gate } 9727c478bd9Sstevel@tonic-gate 9737c478bd9Sstevel@tonic-gate savepp = pp; 9747c478bd9Sstevel@tonic-gate do { 9757c478bd9Sstevel@tonic-gate pp->p_fsdata = fsdata; 9767c478bd9Sstevel@tonic-gate } while ((pp = pp->p_next) != savepp); 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate error = nfs3_bio(bp, &stab_comm, cr); 9797c478bd9Sstevel@tonic-gate 9807c478bd9Sstevel@tonic-gate bp_mapout(bp); 9817c478bd9Sstevel@tonic-gate pageio_done(bp); 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate /* 9847c478bd9Sstevel@tonic-gate * If the server wrote pages in a more stable fashion than 9857c478bd9Sstevel@tonic-gate * was requested, then clear all of the marks in the pages 9867c478bd9Sstevel@tonic-gate * indicating that COMMIT operations were required. 9877c478bd9Sstevel@tonic-gate */ 9887c478bd9Sstevel@tonic-gate if (stab_comm != UNSTABLE && fsdata == C_DELAYCOMMIT) { 9897c478bd9Sstevel@tonic-gate do { 9907c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 9917c478bd9Sstevel@tonic-gate } while ((pp = pp->p_next) != savepp); 9927c478bd9Sstevel@tonic-gate } 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate return (error); 9957c478bd9Sstevel@tonic-gate } 9967c478bd9Sstevel@tonic-gate 9977c478bd9Sstevel@tonic-gate /* 9987c478bd9Sstevel@tonic-gate * Write to file. Writes to remote server in largest size 9997c478bd9Sstevel@tonic-gate * chunks that the server can handle. Write is synchronous. 10007c478bd9Sstevel@tonic-gate */ 10017c478bd9Sstevel@tonic-gate static int 10027c478bd9Sstevel@tonic-gate nfs3write(vnode_t *vp, caddr_t base, u_offset_t offset, int count, cred_t *cr, 10037c478bd9Sstevel@tonic-gate stable_how *stab_comm) 10047c478bd9Sstevel@tonic-gate { 10057c478bd9Sstevel@tonic-gate mntinfo_t *mi; 10067c478bd9Sstevel@tonic-gate WRITE3args args; 10077c478bd9Sstevel@tonic-gate WRITE3res res; 10087c478bd9Sstevel@tonic-gate int error; 10097c478bd9Sstevel@tonic-gate int tsize; 10107c478bd9Sstevel@tonic-gate rnode_t *rp; 10117c478bd9Sstevel@tonic-gate int douprintf; 10127c478bd9Sstevel@tonic-gate 10137c478bd9Sstevel@tonic-gate rp = VTOR(vp); 10147c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 10157c478bd9Sstevel@tonic-gate 1016108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate args.file = *VTOFH3(vp); 10197c478bd9Sstevel@tonic-gate args.stable = *stab_comm; 10207c478bd9Sstevel@tonic-gate 10217c478bd9Sstevel@tonic-gate *stab_comm = FILE_SYNC; 10227c478bd9Sstevel@tonic-gate 10237c478bd9Sstevel@tonic-gate douprintf = 1; 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate do { 10267c478bd9Sstevel@tonic-gate if ((vp->v_flag & VNOCACHE) || 10277c478bd9Sstevel@tonic-gate (rp->r_flags & RDIRECTIO) || 10287c478bd9Sstevel@tonic-gate (mi->mi_flags & MI_DIRECTIO)) 10297c478bd9Sstevel@tonic-gate tsize = MIN(mi->mi_stsize, count); 10307c478bd9Sstevel@tonic-gate else 10317c478bd9Sstevel@tonic-gate tsize = MIN(mi->mi_curwrite, count); 10327c478bd9Sstevel@tonic-gate args.offset = (offset3)offset; 10337c478bd9Sstevel@tonic-gate args.count = (count3)tsize; 10347c478bd9Sstevel@tonic-gate args.data.data_len = (uint_t)tsize; 10357c478bd9Sstevel@tonic-gate args.data.data_val = base; 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 10387c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10397c478bd9Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 10407c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate args.mblk = NULL; 10437c478bd9Sstevel@tonic-gate do { 10447c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_WRITE, 10457c478bd9Sstevel@tonic-gate xdr_WRITE3args, (caddr_t)&args, 10467c478bd9Sstevel@tonic-gate xdr_WRITE3res, (caddr_t)&res, cr, 10477c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 10487c478bd9Sstevel@tonic-gate } while (error == ENFS_TRYAGAIN); 10497c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 10507c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10517c478bd9Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 10527c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10537c478bd9Sstevel@tonic-gate } 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate if (error) 10567c478bd9Sstevel@tonic-gate return (error); 10577c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 10587c478bd9Sstevel@tonic-gate if (!error) { 10597c478bd9Sstevel@tonic-gate if (res.resok.count > args.count) { 10607c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 10617c478bd9Sstevel@tonic-gate "nfs3write: server %s wrote %u, " 10627c478bd9Sstevel@tonic-gate "requested was %u", 10637c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname, 10647c478bd9Sstevel@tonic-gate res.resok.count, args.count); 10657c478bd9Sstevel@tonic-gate return (EIO); 10667c478bd9Sstevel@tonic-gate } 10677c478bd9Sstevel@tonic-gate if (res.resok.committed == UNSTABLE) { 10687c478bd9Sstevel@tonic-gate *stab_comm = UNSTABLE; 10697c478bd9Sstevel@tonic-gate if (args.stable == DATA_SYNC || 10707c478bd9Sstevel@tonic-gate args.stable == FILE_SYNC) { 10717c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 10727c478bd9Sstevel@tonic-gate "nfs3write: server %s did not commit to stable storage", 10737c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname); 10747c478bd9Sstevel@tonic-gate return (EIO); 10757c478bd9Sstevel@tonic-gate } 10767c478bd9Sstevel@tonic-gate } 10777c478bd9Sstevel@tonic-gate tsize = (int)res.resok.count; 10787c478bd9Sstevel@tonic-gate count -= tsize; 10797c478bd9Sstevel@tonic-gate base += tsize; 10807c478bd9Sstevel@tonic-gate offset += tsize; 10817c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 10827c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10837c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->writes++; 10847c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->nwritten += 10857c478bd9Sstevel@tonic-gate tsize; 10867c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10877c478bd9Sstevel@tonic-gate } 10887c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_OUBLK, 1); 10897c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 10907c478bd9Sstevel@tonic-gate if (rp->r_flags & RHAVEVERF) { 10917c478bd9Sstevel@tonic-gate if (rp->r_verf != res.resok.verf) { 10927c478bd9Sstevel@tonic-gate nfs3_set_mod(vp); 10937c478bd9Sstevel@tonic-gate rp->r_verf = res.resok.verf; 10947c478bd9Sstevel@tonic-gate /* 10957c478bd9Sstevel@tonic-gate * If the data was written UNSTABLE, 10967c478bd9Sstevel@tonic-gate * then might as well stop because 10977c478bd9Sstevel@tonic-gate * the whole block will have to get 10987c478bd9Sstevel@tonic-gate * rewritten anyway. 10997c478bd9Sstevel@tonic-gate */ 11007c478bd9Sstevel@tonic-gate if (*stab_comm == UNSTABLE) { 11017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 11027c478bd9Sstevel@tonic-gate break; 11037c478bd9Sstevel@tonic-gate } 11047c478bd9Sstevel@tonic-gate } 11057c478bd9Sstevel@tonic-gate } else { 11067c478bd9Sstevel@tonic-gate rp->r_verf = res.resok.verf; 11077c478bd9Sstevel@tonic-gate rp->r_flags |= RHAVEVERF; 11087c478bd9Sstevel@tonic-gate } 11097c478bd9Sstevel@tonic-gate /* 11107c478bd9Sstevel@tonic-gate * Mark the attribute cache as timed out and 11117c478bd9Sstevel@tonic-gate * set RWRITEATTR to indicate that the file 11127c478bd9Sstevel@tonic-gate * was modified with a WRITE operation and 11137c478bd9Sstevel@tonic-gate * that the attributes can not be trusted. 11147c478bd9Sstevel@tonic-gate */ 11157c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE_LOCKED(rp); 11167c478bd9Sstevel@tonic-gate rp->r_flags |= RWRITEATTR; 11177c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 11187c478bd9Sstevel@tonic-gate } 11197c478bd9Sstevel@tonic-gate } while (!error && count); 11207c478bd9Sstevel@tonic-gate 11217c478bd9Sstevel@tonic-gate return (error); 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate 11247c478bd9Sstevel@tonic-gate /* 11257c478bd9Sstevel@tonic-gate * Read from a file. Reads data in largest chunks our interface can handle. 11267c478bd9Sstevel@tonic-gate */ 11277c478bd9Sstevel@tonic-gate static int 11287c478bd9Sstevel@tonic-gate nfs3read(vnode_t *vp, caddr_t base, offset_t offset, int count, 11297c478bd9Sstevel@tonic-gate size_t *residp, cred_t *cr) 11307c478bd9Sstevel@tonic-gate { 11317c478bd9Sstevel@tonic-gate mntinfo_t *mi; 11327c478bd9Sstevel@tonic-gate READ3args args; 11337c478bd9Sstevel@tonic-gate READ3vres res; 11347c478bd9Sstevel@tonic-gate int tsize; 11357c478bd9Sstevel@tonic-gate int error; 11367c478bd9Sstevel@tonic-gate int douprintf; 11377c478bd9Sstevel@tonic-gate failinfo_t fi; 11387c478bd9Sstevel@tonic-gate rnode_t *rp; 11397c478bd9Sstevel@tonic-gate struct vattr va; 11407c478bd9Sstevel@tonic-gate hrtime_t t; 11417c478bd9Sstevel@tonic-gate 11427c478bd9Sstevel@tonic-gate rp = VTOR(vp); 11437c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 1144108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 11457c478bd9Sstevel@tonic-gate douprintf = 1; 11467c478bd9Sstevel@tonic-gate 11477c478bd9Sstevel@tonic-gate args.file = *VTOFH3(vp); 11487c478bd9Sstevel@tonic-gate fi.vp = vp; 11497c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.file; 11507c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 11517c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 11527c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate res.pov.fres.vp = vp; 11557c478bd9Sstevel@tonic-gate res.pov.fres.vap = &va; 11567c478bd9Sstevel@tonic-gate 11570a701b1eSRobert Gordon res.wlist = NULL; 11587c478bd9Sstevel@tonic-gate *residp = count; 11597c478bd9Sstevel@tonic-gate do { 11607c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 11617c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 11627c478bd9Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 11637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11647c478bd9Sstevel@tonic-gate } 11657c478bd9Sstevel@tonic-gate 11667c478bd9Sstevel@tonic-gate do { 11677c478bd9Sstevel@tonic-gate if ((vp->v_flag & VNOCACHE) || 11687c478bd9Sstevel@tonic-gate (rp->r_flags & RDIRECTIO) || 11697c478bd9Sstevel@tonic-gate (mi->mi_flags & MI_DIRECTIO)) 11707c478bd9Sstevel@tonic-gate tsize = MIN(mi->mi_tsize, count); 11717c478bd9Sstevel@tonic-gate else 11727c478bd9Sstevel@tonic-gate tsize = MIN(mi->mi_curread, count); 11737c478bd9Sstevel@tonic-gate res.data.data_val = base; 11747c478bd9Sstevel@tonic-gate res.data.data_len = tsize; 11757c478bd9Sstevel@tonic-gate args.offset = (offset3)offset; 11767c478bd9Sstevel@tonic-gate args.count = (count3)tsize; 11770a701b1eSRobert Gordon args.res_uiop = NULL; 11780a701b1eSRobert Gordon args.res_data_val_alt = base; 11790a701b1eSRobert Gordon 11807c478bd9Sstevel@tonic-gate t = gethrtime(); 11817c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_READ, 11827c478bd9Sstevel@tonic-gate xdr_READ3args, (caddr_t)&args, 11837c478bd9Sstevel@tonic-gate xdr_READ3vres, (caddr_t)&res, cr, 11847c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, &fi); 11857c478bd9Sstevel@tonic-gate } while (error == ENFS_TRYAGAIN); 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 11887c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 11897c478bd9Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 11907c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11917c478bd9Sstevel@tonic-gate } 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate if (error) 11947c478bd9Sstevel@tonic-gate return (error); 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 11977c478bd9Sstevel@tonic-gate if (error) 11987c478bd9Sstevel@tonic-gate return (error); 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate if (res.count != res.data.data_len) { 12017c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 12027c478bd9Sstevel@tonic-gate "nfs3read: server %s returned incorrect amount", 12037c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname); 12047c478bd9Sstevel@tonic-gate return (EIO); 12057c478bd9Sstevel@tonic-gate } 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate count -= res.count; 12087c478bd9Sstevel@tonic-gate *residp = count; 12097c478bd9Sstevel@tonic-gate base += res.count; 12107c478bd9Sstevel@tonic-gate offset += res.count; 12117c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 12127c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12137c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->reads++; 12147c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->nread += res.count; 12157c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12167c478bd9Sstevel@tonic-gate } 12177c478bd9Sstevel@tonic-gate lwp_stat_update(LWP_STAT_INBLK, 1); 12187c478bd9Sstevel@tonic-gate } while (count && !res.eof); 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate if (res.pov.attributes) { 12217c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 12227c478bd9Sstevel@tonic-gate if (!CACHE_VALID(rp, va.va_mtime, va.va_size)) { 12237c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 12247c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 12257c478bd9Sstevel@tonic-gate } else { 12267c478bd9Sstevel@tonic-gate if (rp->r_mtime <= t) 12277c478bd9Sstevel@tonic-gate nfs_attrcache_va(vp, &va); 12287c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 12297c478bd9Sstevel@tonic-gate } 12307c478bd9Sstevel@tonic-gate } 12317c478bd9Sstevel@tonic-gate 12327c478bd9Sstevel@tonic-gate return (0); 12337c478bd9Sstevel@tonic-gate } 12347c478bd9Sstevel@tonic-gate 12357c478bd9Sstevel@tonic-gate /* ARGSUSED */ 12367c478bd9Sstevel@tonic-gate static int 1237da6c28aaSamw nfs3_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp, 1238da6c28aaSamw caller_context_t *ct) 12397c478bd9Sstevel@tonic-gate { 12407c478bd9Sstevel@tonic-gate 1241108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 12427c478bd9Sstevel@tonic-gate return (EIO); 12437c478bd9Sstevel@tonic-gate switch (cmd) { 12447c478bd9Sstevel@tonic-gate case _FIODIRECTIO: 12457c478bd9Sstevel@tonic-gate return (nfs_directio(vp, (int)arg, cr)); 12467c478bd9Sstevel@tonic-gate default: 12477c478bd9Sstevel@tonic-gate return (ENOTTY); 12487c478bd9Sstevel@tonic-gate } 12497c478bd9Sstevel@tonic-gate } 12507c478bd9Sstevel@tonic-gate 1251da6c28aaSamw /* ARGSUSED */ 12527c478bd9Sstevel@tonic-gate static int 1253da6c28aaSamw nfs3_getattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 1254da6c28aaSamw caller_context_t *ct) 12557c478bd9Sstevel@tonic-gate { 12567c478bd9Sstevel@tonic-gate int error; 12577c478bd9Sstevel@tonic-gate rnode_t *rp; 12587c478bd9Sstevel@tonic-gate 1259108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 12607c478bd9Sstevel@tonic-gate return (EIO); 12617c478bd9Sstevel@tonic-gate /* 12627c478bd9Sstevel@tonic-gate * If it has been specified that the return value will 12637c478bd9Sstevel@tonic-gate * just be used as a hint, and we are only being asked 12647c478bd9Sstevel@tonic-gate * for size, fsid or rdevid, then return the client's 12657c478bd9Sstevel@tonic-gate * notion of these values without checking to make sure 12667c478bd9Sstevel@tonic-gate * that the attribute cache is up to date. 12677c478bd9Sstevel@tonic-gate * The whole point is to avoid an over the wire GETATTR 12687c478bd9Sstevel@tonic-gate * call. 12697c478bd9Sstevel@tonic-gate */ 12707c478bd9Sstevel@tonic-gate rp = VTOR(vp); 12717c478bd9Sstevel@tonic-gate if (flags & ATTR_HINT) { 12727c478bd9Sstevel@tonic-gate if (vap->va_mask == 12737c478bd9Sstevel@tonic-gate (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) { 12747c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 12757c478bd9Sstevel@tonic-gate if (vap->va_mask | AT_SIZE) 12767c478bd9Sstevel@tonic-gate vap->va_size = rp->r_size; 12777c478bd9Sstevel@tonic-gate if (vap->va_mask | AT_FSID) 12787c478bd9Sstevel@tonic-gate vap->va_fsid = rp->r_attr.va_fsid; 12797c478bd9Sstevel@tonic-gate if (vap->va_mask | AT_RDEV) 12807c478bd9Sstevel@tonic-gate vap->va_rdev = rp->r_attr.va_rdev; 12817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 12827c478bd9Sstevel@tonic-gate return (0); 12837c478bd9Sstevel@tonic-gate } 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* 12877c478bd9Sstevel@tonic-gate * Only need to flush pages if asking for the mtime 12887c478bd9Sstevel@tonic-gate * and if there any dirty pages or any outstanding 12897c478bd9Sstevel@tonic-gate * asynchronous (write) requests for this file. 12907c478bd9Sstevel@tonic-gate */ 12917c478bd9Sstevel@tonic-gate if (vap->va_mask & AT_MTIME) { 12927c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && 12937c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_awcount > 0)) { 12947c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 12957c478bd9Sstevel@tonic-gate rp->r_gcount++; 12967c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 1297da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, 0, cr, ct); 12987c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 12997c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) { 13007c478bd9Sstevel@tonic-gate if (!rp->r_error) 13017c478bd9Sstevel@tonic-gate rp->r_error = error; 13027c478bd9Sstevel@tonic-gate } 13037c478bd9Sstevel@tonic-gate if (--rp->r_gcount == 0) 13047c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 13057c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 13067c478bd9Sstevel@tonic-gate } 13077c478bd9Sstevel@tonic-gate } 13087c478bd9Sstevel@tonic-gate 13097c478bd9Sstevel@tonic-gate return (nfs3getattr(vp, vap, cr)); 13107c478bd9Sstevel@tonic-gate } 13117c478bd9Sstevel@tonic-gate 13127c478bd9Sstevel@tonic-gate /*ARGSUSED4*/ 13137c478bd9Sstevel@tonic-gate static int 13147c478bd9Sstevel@tonic-gate nfs3_setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr, 13157c478bd9Sstevel@tonic-gate caller_context_t *ct) 13167c478bd9Sstevel@tonic-gate { 13177c478bd9Sstevel@tonic-gate int error; 13187c478bd9Sstevel@tonic-gate struct vattr va; 13197c478bd9Sstevel@tonic-gate 13207c478bd9Sstevel@tonic-gate if (vap->va_mask & AT_NOSET) 13217c478bd9Sstevel@tonic-gate return (EINVAL); 1322108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 13237c478bd9Sstevel@tonic-gate return (EIO); 13247c478bd9Sstevel@tonic-gate 13257c478bd9Sstevel@tonic-gate va.va_mask = AT_UID | AT_MODE; 13267c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &va, cr); 13277c478bd9Sstevel@tonic-gate if (error) 13287c478bd9Sstevel@tonic-gate return (error); 13297c478bd9Sstevel@tonic-gate 13307c478bd9Sstevel@tonic-gate error = secpolicy_vnode_setattr(cr, vp, vap, &va, flags, nfs3_accessx, 13317c478bd9Sstevel@tonic-gate vp); 13327c478bd9Sstevel@tonic-gate if (error) 13337c478bd9Sstevel@tonic-gate return (error); 13347c478bd9Sstevel@tonic-gate 133572102e74SBryan Cantrill error = nfs3setattr(vp, vap, flags, cr); 133672102e74SBryan Cantrill 133772102e74SBryan Cantrill if (error == 0 && (vap->va_mask & AT_SIZE) && vap->va_size == 0) 133872102e74SBryan Cantrill vnevent_truncate(vp, ct); 133972102e74SBryan Cantrill 134072102e74SBryan Cantrill return (error); 13417c478bd9Sstevel@tonic-gate } 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate static int 13447c478bd9Sstevel@tonic-gate nfs3setattr(vnode_t *vp, struct vattr *vap, int flags, cred_t *cr) 13457c478bd9Sstevel@tonic-gate { 13467c478bd9Sstevel@tonic-gate int error; 13477c478bd9Sstevel@tonic-gate uint_t mask; 13487c478bd9Sstevel@tonic-gate SETATTR3args args; 13497c478bd9Sstevel@tonic-gate SETATTR3res res; 13507c478bd9Sstevel@tonic-gate int douprintf; 13517c478bd9Sstevel@tonic-gate rnode_t *rp; 13527c478bd9Sstevel@tonic-gate struct vattr va; 13537c478bd9Sstevel@tonic-gate mode_t omode; 13547c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 13557c478bd9Sstevel@tonic-gate hrtime_t t; 13567c478bd9Sstevel@tonic-gate 1357108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 13587c478bd9Sstevel@tonic-gate mask = vap->va_mask; 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate rp = VTOR(vp); 13617c478bd9Sstevel@tonic-gate 13627c478bd9Sstevel@tonic-gate /* 13637c478bd9Sstevel@tonic-gate * Only need to flush pages if there are any pages and 13647c478bd9Sstevel@tonic-gate * if the file is marked as dirty in some fashion. The 13657c478bd9Sstevel@tonic-gate * file must be flushed so that we can accurately 13667c478bd9Sstevel@tonic-gate * determine the size of the file and the cached data 13677c478bd9Sstevel@tonic-gate * after the SETATTR returns. A file is considered to 13687c478bd9Sstevel@tonic-gate * be dirty if it is either marked with RDIRTY, has 13697c478bd9Sstevel@tonic-gate * outstanding i/o's active, or is mmap'd. In this 13707c478bd9Sstevel@tonic-gate * last case, we can't tell whether there are dirty 13717c478bd9Sstevel@tonic-gate * pages, so we flush just to be sure. 13727c478bd9Sstevel@tonic-gate */ 13737c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && 13747c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || 13757c478bd9Sstevel@tonic-gate rp->r_count > 0 || 13767c478bd9Sstevel@tonic-gate rp->r_mapcnt > 0)) { 13777c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 1378da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, 0, cr, NULL); 13797c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) { 13807c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 13817c478bd9Sstevel@tonic-gate if (!rp->r_error) 13827c478bd9Sstevel@tonic-gate rp->r_error = error; 13837c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 13847c478bd9Sstevel@tonic-gate } 13857c478bd9Sstevel@tonic-gate } 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate args.object = *RTOFH3(rp); 13887c478bd9Sstevel@tonic-gate /* 13897c478bd9Sstevel@tonic-gate * If the intent is for the server to set the times, 13907c478bd9Sstevel@tonic-gate * there is no point in have the mask indicating set mtime or 13917c478bd9Sstevel@tonic-gate * atime, because the vap values may be junk, and so result 13927c478bd9Sstevel@tonic-gate * in an overflow error. Remove these flags from the vap mask 13937c478bd9Sstevel@tonic-gate * before calling in this case, and restore them afterwards. 13947c478bd9Sstevel@tonic-gate */ 13957c478bd9Sstevel@tonic-gate if ((mask & (AT_ATIME | AT_MTIME)) && !(flags & ATTR_UTIME)) { 13967c478bd9Sstevel@tonic-gate /* Use server times, so don't set the args time fields */ 13977c478bd9Sstevel@tonic-gate vap->va_mask &= ~(AT_ATIME | AT_MTIME); 13987c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(vap, &args.new_attributes); 13997c478bd9Sstevel@tonic-gate vap->va_mask |= (mask & (AT_ATIME | AT_MTIME)); 14007c478bd9Sstevel@tonic-gate if (mask & AT_ATIME) { 14017c478bd9Sstevel@tonic-gate args.new_attributes.atime.set_it = SET_TO_SERVER_TIME; 14027c478bd9Sstevel@tonic-gate } 14037c478bd9Sstevel@tonic-gate if (mask & AT_MTIME) { 14047c478bd9Sstevel@tonic-gate args.new_attributes.mtime.set_it = SET_TO_SERVER_TIME; 14057c478bd9Sstevel@tonic-gate } 14067c478bd9Sstevel@tonic-gate } else { 14077c478bd9Sstevel@tonic-gate /* Either do not set times or use the client specified times */ 14087c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(vap, &args.new_attributes); 14097c478bd9Sstevel@tonic-gate } 14107c478bd9Sstevel@tonic-gate 14117c478bd9Sstevel@tonic-gate if (error) { 14127c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 14137c478bd9Sstevel@tonic-gate return (error); 14147c478bd9Sstevel@tonic-gate } 14157c478bd9Sstevel@tonic-gate 14167c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE | AT_CTIME; 14177c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &va, cr); 14187c478bd9Sstevel@tonic-gate if (error) 14197c478bd9Sstevel@tonic-gate return (error); 14207c478bd9Sstevel@tonic-gate omode = va.va_mode; 14217c478bd9Sstevel@tonic-gate 14227c478bd9Sstevel@tonic-gate tryagain: 14237c478bd9Sstevel@tonic-gate if (mask & AT_SIZE) { 14247c478bd9Sstevel@tonic-gate args.guard.check = TRUE; 14257c478bd9Sstevel@tonic-gate args.guard.obj_ctime.seconds = va.va_ctime.tv_sec; 14267c478bd9Sstevel@tonic-gate args.guard.obj_ctime.nseconds = va.va_ctime.tv_nsec; 14277c478bd9Sstevel@tonic-gate } else 14287c478bd9Sstevel@tonic-gate args.guard.check = FALSE; 14297c478bd9Sstevel@tonic-gate 14307c478bd9Sstevel@tonic-gate douprintf = 1; 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate t = gethrtime(); 14337c478bd9Sstevel@tonic-gate 14347c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_SETATTR, 14357c478bd9Sstevel@tonic-gate xdr_SETATTR3args, (caddr_t)&args, 14367c478bd9Sstevel@tonic-gate xdr_SETATTR3res, (caddr_t)&res, cr, 14377c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate /* 14407c478bd9Sstevel@tonic-gate * Purge the access cache and ACL cache if changing either the 14417c478bd9Sstevel@tonic-gate * owner of the file, the group owner, or the mode. These may 14427c478bd9Sstevel@tonic-gate * change the access permissions of the file, so purge old 14437c478bd9Sstevel@tonic-gate * information and start over again. 14447c478bd9Sstevel@tonic-gate */ 14457c478bd9Sstevel@tonic-gate if (mask & (AT_UID | AT_GID | AT_MODE)) { 14467c478bd9Sstevel@tonic-gate (void) nfs_access_purge_rp(rp); 14477c478bd9Sstevel@tonic-gate if (rp->r_secattr != NULL) { 14487c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 14497c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 14507c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 14517c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 14527c478bd9Sstevel@tonic-gate if (vsp != NULL) 14537c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 14547c478bd9Sstevel@tonic-gate } 14557c478bd9Sstevel@tonic-gate } 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate if (error) { 14587c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 14597c478bd9Sstevel@tonic-gate return (error); 14607c478bd9Sstevel@tonic-gate } 14617c478bd9Sstevel@tonic-gate 14627c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 14637c478bd9Sstevel@tonic-gate if (!error) { 14647c478bd9Sstevel@tonic-gate /* 14657c478bd9Sstevel@tonic-gate * If changing the size of the file, invalidate 14667c478bd9Sstevel@tonic-gate * any local cached data which is no longer part 14677c478bd9Sstevel@tonic-gate * of the file. We also possibly invalidate the 14687c478bd9Sstevel@tonic-gate * last page in the file. We could use 14697c478bd9Sstevel@tonic-gate * pvn_vpzero(), but this would mark the page as 14707c478bd9Sstevel@tonic-gate * modified and require it to be written back to 14717c478bd9Sstevel@tonic-gate * the server for no particularly good reason. 14727c478bd9Sstevel@tonic-gate * This way, if we access it, then we bring it 14737c478bd9Sstevel@tonic-gate * back in. A read should be cheaper than a 14747c478bd9Sstevel@tonic-gate * write. 14757c478bd9Sstevel@tonic-gate */ 14767c478bd9Sstevel@tonic-gate if (mask & AT_SIZE) { 14777c478bd9Sstevel@tonic-gate nfs_invalidate_pages(vp, 14787c478bd9Sstevel@tonic-gate (vap->va_size & PAGEMASK), cr); 14797c478bd9Sstevel@tonic-gate } 14807c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(vp, &res.resok.obj_wcc, t, cr); 14817c478bd9Sstevel@tonic-gate /* 14827c478bd9Sstevel@tonic-gate * Some servers will change the mode to clear the setuid 14837c478bd9Sstevel@tonic-gate * and setgid bits when changing the uid or gid. The 14847c478bd9Sstevel@tonic-gate * client needs to compensate appropriately. 14857c478bd9Sstevel@tonic-gate */ 14867c478bd9Sstevel@tonic-gate if (mask & (AT_UID | AT_GID)) { 14877c478bd9Sstevel@tonic-gate int terror; 14887c478bd9Sstevel@tonic-gate 14897c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE; 14907c478bd9Sstevel@tonic-gate terror = nfs3getattr(vp, &va, cr); 14917c478bd9Sstevel@tonic-gate if (!terror && 14927c478bd9Sstevel@tonic-gate (((mask & AT_MODE) && va.va_mode != vap->va_mode) || 14937c478bd9Sstevel@tonic-gate (!(mask & AT_MODE) && va.va_mode != omode))) { 14947c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE; 14957c478bd9Sstevel@tonic-gate if (mask & AT_MODE) 14967c478bd9Sstevel@tonic-gate va.va_mode = vap->va_mode; 14977c478bd9Sstevel@tonic-gate else 14987c478bd9Sstevel@tonic-gate va.va_mode = omode; 14997c478bd9Sstevel@tonic-gate (void) nfs3setattr(vp, &va, 0, cr); 15007c478bd9Sstevel@tonic-gate } 15017c478bd9Sstevel@tonic-gate } 15027c478bd9Sstevel@tonic-gate } else { 15037c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(vp, &res.resfail.obj_wcc, t, cr); 15047c478bd9Sstevel@tonic-gate /* 15057c478bd9Sstevel@tonic-gate * If we got back a "not synchronized" error, then 15067c478bd9Sstevel@tonic-gate * we need to retry with a new guard value. The 15077c478bd9Sstevel@tonic-gate * guard value used is the change time. If the 15087c478bd9Sstevel@tonic-gate * server returned post_op_attr, then we can just 15097c478bd9Sstevel@tonic-gate * retry because we have the latest attributes. 15107c478bd9Sstevel@tonic-gate * Otherwise, we issue a GETATTR to get the latest 15117c478bd9Sstevel@tonic-gate * attributes and then retry. If we couldn't get 15127c478bd9Sstevel@tonic-gate * the attributes this way either, then we give 15137c478bd9Sstevel@tonic-gate * up because we can't complete the operation as 15147c478bd9Sstevel@tonic-gate * required. 15157c478bd9Sstevel@tonic-gate */ 15167c478bd9Sstevel@tonic-gate if (res.status == NFS3ERR_NOT_SYNC) { 15177c478bd9Sstevel@tonic-gate va.va_mask = AT_CTIME; 15187c478bd9Sstevel@tonic-gate if (nfs3getattr(vp, &va, cr) == 0) 15197c478bd9Sstevel@tonic-gate goto tryagain; 15207c478bd9Sstevel@tonic-gate } 15217c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 15227c478bd9Sstevel@tonic-gate } 15237c478bd9Sstevel@tonic-gate 15247c478bd9Sstevel@tonic-gate return (error); 15257c478bd9Sstevel@tonic-gate } 15267c478bd9Sstevel@tonic-gate 15277c478bd9Sstevel@tonic-gate static int 15287c478bd9Sstevel@tonic-gate nfs3_accessx(void *vp, int mode, cred_t *cr) 15297c478bd9Sstevel@tonic-gate { 1530108322fbScarlsonj ASSERT(nfs_zone() == VTOMI((vnode_t *)vp)->mi_zone); 1531da6c28aaSamw return (nfs3_access(vp, mode, 0, cr, NULL)); 15327c478bd9Sstevel@tonic-gate } 15337c478bd9Sstevel@tonic-gate 15347c478bd9Sstevel@tonic-gate /* ARGSUSED */ 15357c478bd9Sstevel@tonic-gate static int 1536da6c28aaSamw nfs3_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 15377c478bd9Sstevel@tonic-gate { 15387c478bd9Sstevel@tonic-gate int error; 15397c478bd9Sstevel@tonic-gate ACCESS3args args; 15407c478bd9Sstevel@tonic-gate ACCESS3res res; 15417c478bd9Sstevel@tonic-gate int douprintf; 15427c478bd9Sstevel@tonic-gate uint32 acc; 15437c478bd9Sstevel@tonic-gate rnode_t *rp; 1544858ba0dfSsdussud cred_t *cred, *ncr, *ncrfree = NULL; 15457c478bd9Sstevel@tonic-gate failinfo_t fi; 15467c478bd9Sstevel@tonic-gate nfs_access_type_t cacc; 15477c478bd9Sstevel@tonic-gate hrtime_t t; 15487c478bd9Sstevel@tonic-gate 15497c478bd9Sstevel@tonic-gate acc = 0; 1550108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 15517c478bd9Sstevel@tonic-gate return (EIO); 15527c478bd9Sstevel@tonic-gate if (mode & VREAD) 15537c478bd9Sstevel@tonic-gate acc |= ACCESS3_READ; 15547c478bd9Sstevel@tonic-gate if (mode & VWRITE) { 15557c478bd9Sstevel@tonic-gate if (vn_is_readonly(vp) && !IS_DEVVP(vp)) 15567c478bd9Sstevel@tonic-gate return (EROFS); 15577c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) 15587c478bd9Sstevel@tonic-gate acc |= ACCESS3_DELETE; 15597c478bd9Sstevel@tonic-gate acc |= ACCESS3_MODIFY | ACCESS3_EXTEND; 15607c478bd9Sstevel@tonic-gate } 15617c478bd9Sstevel@tonic-gate if (mode & VEXEC) { 15627c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) 15637c478bd9Sstevel@tonic-gate acc |= ACCESS3_LOOKUP; 15647c478bd9Sstevel@tonic-gate else 15657c478bd9Sstevel@tonic-gate acc |= ACCESS3_EXECUTE; 15667c478bd9Sstevel@tonic-gate } 15677c478bd9Sstevel@tonic-gate 15687c478bd9Sstevel@tonic-gate rp = VTOR(vp); 15697c478bd9Sstevel@tonic-gate args.object = *VTOFH3(vp); 15707c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) { 15717c478bd9Sstevel@tonic-gate args.access = ACCESS3_READ | ACCESS3_DELETE | ACCESS3_MODIFY | 15727c478bd9Sstevel@tonic-gate ACCESS3_EXTEND | ACCESS3_LOOKUP; 15737c478bd9Sstevel@tonic-gate } else { 15747c478bd9Sstevel@tonic-gate args.access = ACCESS3_READ | ACCESS3_MODIFY | ACCESS3_EXTEND | 15757c478bd9Sstevel@tonic-gate ACCESS3_EXECUTE; 15767c478bd9Sstevel@tonic-gate } 15777c478bd9Sstevel@tonic-gate fi.vp = vp; 15787c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.object; 15797c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 15807c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 15817c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate cred = cr; 1584858ba0dfSsdussud /* 1585858ba0dfSsdussud * ncr and ncrfree both initially 1586858ba0dfSsdussud * point to the memory area returned 1587858ba0dfSsdussud * by crnetadjust(); 1588858ba0dfSsdussud * ncrfree not NULL when exiting means 1589858ba0dfSsdussud * that we need to release it 1590858ba0dfSsdussud */ 1591d76e2727Ssdussud ncr = crnetadjust(cred); 1592858ba0dfSsdussud ncrfree = ncr; 15937c478bd9Sstevel@tonic-gate tryagain: 1594d76e2727Ssdussud if (rp->r_acache != NULL) { 1595d76e2727Ssdussud cacc = nfs_access_check(rp, acc, cred); 1596858ba0dfSsdussud if (cacc == NFS_ACCESS_ALLOWED) { 1597858ba0dfSsdussud if (ncrfree != NULL) 1598858ba0dfSsdussud crfree(ncrfree); 1599d76e2727Ssdussud return (0); 1600858ba0dfSsdussud } 1601d76e2727Ssdussud if (cacc == NFS_ACCESS_DENIED) { 1602d76e2727Ssdussud /* 1603d76e2727Ssdussud * If the cred can be adjusted, try again 1604d76e2727Ssdussud * with the new cred. 1605d76e2727Ssdussud */ 1606d76e2727Ssdussud if (ncr != NULL) { 1607d76e2727Ssdussud cred = ncr; 1608d76e2727Ssdussud ncr = NULL; 1609d76e2727Ssdussud goto tryagain; 1610d76e2727Ssdussud } 1611858ba0dfSsdussud if (ncrfree != NULL) 1612858ba0dfSsdussud crfree(ncrfree); 1613d76e2727Ssdussud return (EACCES); 1614d76e2727Ssdussud } 1615d76e2727Ssdussud } 1616d76e2727Ssdussud 16177c478bd9Sstevel@tonic-gate douprintf = 1; 16187c478bd9Sstevel@tonic-gate 16197c478bd9Sstevel@tonic-gate t = gethrtime(); 16207c478bd9Sstevel@tonic-gate 16217c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_ACCESS, 16227c478bd9Sstevel@tonic-gate xdr_ACCESS3args, (caddr_t)&args, 16237c478bd9Sstevel@tonic-gate xdr_ACCESS3res, (caddr_t)&res, cred, 16247c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, &fi); 16257c478bd9Sstevel@tonic-gate 16267c478bd9Sstevel@tonic-gate if (error) { 1627858ba0dfSsdussud if (ncrfree != NULL) 1628858ba0dfSsdussud crfree(ncrfree); 16297c478bd9Sstevel@tonic-gate return (error); 16307c478bd9Sstevel@tonic-gate } 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 16337c478bd9Sstevel@tonic-gate if (!error) { 16347c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 1635d76e2727Ssdussud nfs_access_cache(rp, args.access, res.resok.access, cred); 1636858ba0dfSsdussud /* 1637858ba0dfSsdussud * we just cached results with cred; if cred is the 1638858ba0dfSsdussud * adjusted credentials from crnetadjust, we do not want 1639858ba0dfSsdussud * to release them before exiting: hence setting ncrfree 1640858ba0dfSsdussud * to NULL 1641858ba0dfSsdussud */ 1642858ba0dfSsdussud if (cred != cr) 1643858ba0dfSsdussud ncrfree = NULL; 16447c478bd9Sstevel@tonic-gate if ((acc & res.resok.access) != acc) { 1645d76e2727Ssdussud /* 1646d76e2727Ssdussud * If the cred can be adjusted, try again 1647d76e2727Ssdussud * with the new cred. 1648d76e2727Ssdussud */ 16497c478bd9Sstevel@tonic-gate if (ncr != NULL) { 16507c478bd9Sstevel@tonic-gate cred = ncr; 1651d76e2727Ssdussud ncr = NULL; 16527c478bd9Sstevel@tonic-gate goto tryagain; 16537c478bd9Sstevel@tonic-gate } 16547c478bd9Sstevel@tonic-gate error = EACCES; 16557c478bd9Sstevel@tonic-gate } 16567c478bd9Sstevel@tonic-gate } else { 16577c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 16587c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 16597c478bd9Sstevel@tonic-gate } 16607c478bd9Sstevel@tonic-gate 1661858ba0dfSsdussud if (ncrfree != NULL) 1662858ba0dfSsdussud crfree(ncrfree); 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate return (error); 16657c478bd9Sstevel@tonic-gate } 16667c478bd9Sstevel@tonic-gate 16677c478bd9Sstevel@tonic-gate static int nfs3_do_symlink_cache = 1; 16687c478bd9Sstevel@tonic-gate 1669da6c28aaSamw /* ARGSUSED */ 16707c478bd9Sstevel@tonic-gate static int 1671da6c28aaSamw nfs3_readlink(vnode_t *vp, struct uio *uiop, cred_t *cr, caller_context_t *ct) 16727c478bd9Sstevel@tonic-gate { 16737c478bd9Sstevel@tonic-gate int error; 16747c478bd9Sstevel@tonic-gate READLINK3args args; 16757c478bd9Sstevel@tonic-gate READLINK3res res; 16767c478bd9Sstevel@tonic-gate nfspath3 resdata_backup; 16777c478bd9Sstevel@tonic-gate rnode_t *rp; 16787c478bd9Sstevel@tonic-gate int douprintf; 16797c478bd9Sstevel@tonic-gate int len; 16807c478bd9Sstevel@tonic-gate failinfo_t fi; 16817c478bd9Sstevel@tonic-gate hrtime_t t; 16827c478bd9Sstevel@tonic-gate 16837c478bd9Sstevel@tonic-gate /* 16847c478bd9Sstevel@tonic-gate * Can't readlink anything other than a symbolic link. 16857c478bd9Sstevel@tonic-gate */ 16867c478bd9Sstevel@tonic-gate if (vp->v_type != VLNK) 16877c478bd9Sstevel@tonic-gate return (EINVAL); 1688108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 16897c478bd9Sstevel@tonic-gate return (EIO); 16907c478bd9Sstevel@tonic-gate 16917c478bd9Sstevel@tonic-gate rp = VTOR(vp); 16927c478bd9Sstevel@tonic-gate if (nfs3_do_symlink_cache && rp->r_symlink.contents != NULL) { 16937c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(vp, cr); 16947c478bd9Sstevel@tonic-gate if (error) 16957c478bd9Sstevel@tonic-gate return (error); 16967c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 16977c478bd9Sstevel@tonic-gate if (rp->r_symlink.contents != NULL) { 16987c478bd9Sstevel@tonic-gate error = uiomove(rp->r_symlink.contents, 16997c478bd9Sstevel@tonic-gate rp->r_symlink.len, UIO_READ, uiop); 17007c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 17017c478bd9Sstevel@tonic-gate return (error); 17027c478bd9Sstevel@tonic-gate } 17037c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 17047c478bd9Sstevel@tonic-gate } 17057c478bd9Sstevel@tonic-gate 17067c478bd9Sstevel@tonic-gate args.symlink = *VTOFH3(vp); 17077c478bd9Sstevel@tonic-gate fi.vp = vp; 17087c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.symlink; 17097c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 17107c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 17117c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 17127c478bd9Sstevel@tonic-gate 17137c478bd9Sstevel@tonic-gate res.resok.data = kmem_alloc(MAXPATHLEN, KM_SLEEP); 17147c478bd9Sstevel@tonic-gate 17157c478bd9Sstevel@tonic-gate resdata_backup = res.resok.data; 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate douprintf = 1; 17187c478bd9Sstevel@tonic-gate 17197c478bd9Sstevel@tonic-gate t = gethrtime(); 17207c478bd9Sstevel@tonic-gate 17217c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_READLINK, 17220a701b1eSRobert Gordon xdr_READLINK3args, (caddr_t)&args, 17237c478bd9Sstevel@tonic-gate xdr_READLINK3res, (caddr_t)&res, cr, 17247c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, &fi); 17257c478bd9Sstevel@tonic-gate 17267c478bd9Sstevel@tonic-gate if (res.resok.data == nfs3nametoolong) 17277c478bd9Sstevel@tonic-gate error = EINVAL; 17287c478bd9Sstevel@tonic-gate 17297c478bd9Sstevel@tonic-gate if (error) { 17307c478bd9Sstevel@tonic-gate kmem_free(resdata_backup, MAXPATHLEN); 17317c478bd9Sstevel@tonic-gate return (error); 17327c478bd9Sstevel@tonic-gate } 17337c478bd9Sstevel@tonic-gate 17347c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 17357c478bd9Sstevel@tonic-gate if (!error) { 17367c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, &res.resok.symlink_attributes, t, 17377c478bd9Sstevel@tonic-gate cr); 17387c478bd9Sstevel@tonic-gate len = strlen(res.resok.data); 17397c478bd9Sstevel@tonic-gate error = uiomove(res.resok.data, len, UIO_READ, uiop); 17407c478bd9Sstevel@tonic-gate if (nfs3_do_symlink_cache && rp->r_symlink.contents == NULL) { 17417c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 17427c478bd9Sstevel@tonic-gate if (rp->r_symlink.contents == NULL) { 17437c478bd9Sstevel@tonic-gate rp->r_symlink.contents = res.resok.data; 17447c478bd9Sstevel@tonic-gate rp->r_symlink.len = len; 17457c478bd9Sstevel@tonic-gate rp->r_symlink.size = MAXPATHLEN; 17467c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 17477c478bd9Sstevel@tonic-gate } else { 17487c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 17497c478bd9Sstevel@tonic-gate 17507c478bd9Sstevel@tonic-gate kmem_free((void *)res.resok.data, MAXPATHLEN); 17517c478bd9Sstevel@tonic-gate } 17527c478bd9Sstevel@tonic-gate } else { 17537c478bd9Sstevel@tonic-gate kmem_free((void *)res.resok.data, MAXPATHLEN); 17547c478bd9Sstevel@tonic-gate } 17557c478bd9Sstevel@tonic-gate } else { 17567c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, 17577c478bd9Sstevel@tonic-gate &res.resfail.symlink_attributes, t, cr); 17587c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 17597c478bd9Sstevel@tonic-gate 17607c478bd9Sstevel@tonic-gate kmem_free((void *)res.resok.data, MAXPATHLEN); 17617c478bd9Sstevel@tonic-gate 17627c478bd9Sstevel@tonic-gate } 17637c478bd9Sstevel@tonic-gate 17647c478bd9Sstevel@tonic-gate /* 17657c478bd9Sstevel@tonic-gate * The over the wire error for attempting to readlink something 17667c478bd9Sstevel@tonic-gate * other than a symbolic link is ENXIO. However, we need to 17677c478bd9Sstevel@tonic-gate * return EINVAL instead of ENXIO, so we map it here. 17687c478bd9Sstevel@tonic-gate */ 17697c478bd9Sstevel@tonic-gate return (error == ENXIO ? EINVAL : error); 17707c478bd9Sstevel@tonic-gate } 17717c478bd9Sstevel@tonic-gate 17727c478bd9Sstevel@tonic-gate /* 17737c478bd9Sstevel@tonic-gate * Flush local dirty pages to stable storage on the server. 17747c478bd9Sstevel@tonic-gate * 17757c478bd9Sstevel@tonic-gate * If FNODSYNC is specified, then there is nothing to do because 17767c478bd9Sstevel@tonic-gate * metadata changes are not cached on the client before being 17777c478bd9Sstevel@tonic-gate * sent to the server. 17787c478bd9Sstevel@tonic-gate */ 1779da6c28aaSamw /* ARGSUSED */ 17807c478bd9Sstevel@tonic-gate static int 1781da6c28aaSamw nfs3_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 17827c478bd9Sstevel@tonic-gate { 17837c478bd9Sstevel@tonic-gate int error; 17847c478bd9Sstevel@tonic-gate 17857c478bd9Sstevel@tonic-gate if ((syncflag & FNODSYNC) || IS_SWAPVP(vp)) 17867c478bd9Sstevel@tonic-gate return (0); 1787108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 17887c478bd9Sstevel@tonic-gate return (EIO); 17897c478bd9Sstevel@tonic-gate 17907c478bd9Sstevel@tonic-gate error = nfs3_putpage_commit(vp, (offset_t)0, 0, cr); 17917c478bd9Sstevel@tonic-gate if (!error) 17927c478bd9Sstevel@tonic-gate error = VTOR(vp)->r_error; 17937c478bd9Sstevel@tonic-gate return (error); 17947c478bd9Sstevel@tonic-gate } 17957c478bd9Sstevel@tonic-gate 17967c478bd9Sstevel@tonic-gate /* 17977c478bd9Sstevel@tonic-gate * Weirdness: if the file was removed or the target of a rename 17987c478bd9Sstevel@tonic-gate * operation while it was open, it got renamed instead. Here we 17997c478bd9Sstevel@tonic-gate * remove the renamed file. 18007c478bd9Sstevel@tonic-gate */ 1801da6c28aaSamw /* ARGSUSED */ 18027c478bd9Sstevel@tonic-gate static void 1803da6c28aaSamw nfs3_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 18047c478bd9Sstevel@tonic-gate { 18057c478bd9Sstevel@tonic-gate rnode_t *rp; 18067c478bd9Sstevel@tonic-gate 18077c478bd9Sstevel@tonic-gate ASSERT(vp != DNLC_NO_VNODE); 18087c478bd9Sstevel@tonic-gate 18097c478bd9Sstevel@tonic-gate /* 18107c478bd9Sstevel@tonic-gate * If this is coming from the wrong zone, we let someone in the right 18117c478bd9Sstevel@tonic-gate * zone take care of it asynchronously. We can get here due to 18127c478bd9Sstevel@tonic-gate * VN_RELE() being called from pageout() or fsflush(). This call may 18137c478bd9Sstevel@tonic-gate * potentially turn into an expensive no-op if, for instance, v_count 18147c478bd9Sstevel@tonic-gate * gets incremented in the meantime, but it's still correct. 18157c478bd9Sstevel@tonic-gate */ 1816108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) { 18177c478bd9Sstevel@tonic-gate nfs_async_inactive(vp, cr, nfs3_inactive); 18187c478bd9Sstevel@tonic-gate return; 18197c478bd9Sstevel@tonic-gate } 18207c478bd9Sstevel@tonic-gate 18217c478bd9Sstevel@tonic-gate rp = VTOR(vp); 18227c478bd9Sstevel@tonic-gate redo: 18237c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) { 18247c478bd9Sstevel@tonic-gate /* 18257c478bd9Sstevel@tonic-gate * Save the vnode pointer for the directory where the 18267c478bd9Sstevel@tonic-gate * unlinked-open file got renamed, then set it to NULL 18277c478bd9Sstevel@tonic-gate * to prevent another thread from getting here before 18287c478bd9Sstevel@tonic-gate * we're done with the remove. While we have the 18297c478bd9Sstevel@tonic-gate * statelock, make local copies of the pertinent rnode 18307c478bd9Sstevel@tonic-gate * fields. If we weren't to do this in an atomic way, the 18317c478bd9Sstevel@tonic-gate * the unl* fields could become inconsistent with respect 18327c478bd9Sstevel@tonic-gate * to each other due to a race condition between this 18337c478bd9Sstevel@tonic-gate * code and nfs_remove(). See bug report 1034328. 18347c478bd9Sstevel@tonic-gate */ 18357c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 18367c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) { 18377c478bd9Sstevel@tonic-gate vnode_t *unldvp; 18387c478bd9Sstevel@tonic-gate char *unlname; 18397c478bd9Sstevel@tonic-gate cred_t *unlcred; 18407c478bd9Sstevel@tonic-gate REMOVE3args args; 18417c478bd9Sstevel@tonic-gate REMOVE3res res; 18427c478bd9Sstevel@tonic-gate int douprintf; 18437c478bd9Sstevel@tonic-gate int error; 18447c478bd9Sstevel@tonic-gate hrtime_t t; 18457c478bd9Sstevel@tonic-gate 18467c478bd9Sstevel@tonic-gate unldvp = rp->r_unldvp; 18477c478bd9Sstevel@tonic-gate rp->r_unldvp = NULL; 18487c478bd9Sstevel@tonic-gate unlname = rp->r_unlname; 18497c478bd9Sstevel@tonic-gate rp->r_unlname = NULL; 18507c478bd9Sstevel@tonic-gate unlcred = rp->r_unlcred; 18517c478bd9Sstevel@tonic-gate rp->r_unlcred = NULL; 18527c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 18537c478bd9Sstevel@tonic-gate 18547c478bd9Sstevel@tonic-gate /* 18557c478bd9Sstevel@tonic-gate * If there are any dirty pages left, then flush 18567c478bd9Sstevel@tonic-gate * them. This is unfortunate because they just 18577c478bd9Sstevel@tonic-gate * may get thrown away during the remove operation, 18587c478bd9Sstevel@tonic-gate * but we have to do this for correctness. 18597c478bd9Sstevel@tonic-gate */ 18607c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && 18617c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_count > 0)) { 18627c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 1863da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, 0, 1864da6c28aaSamw cr, ct); 18657c478bd9Sstevel@tonic-gate if (error) { 18667c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 18677c478bd9Sstevel@tonic-gate if (!rp->r_error) 18687c478bd9Sstevel@tonic-gate rp->r_error = error; 18697c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 18707c478bd9Sstevel@tonic-gate } 18717c478bd9Sstevel@tonic-gate } 18727c478bd9Sstevel@tonic-gate 18737c478bd9Sstevel@tonic-gate /* 18747c478bd9Sstevel@tonic-gate * Do the remove operation on the renamed file 18757c478bd9Sstevel@tonic-gate */ 18767c478bd9Sstevel@tonic-gate setdiropargs3(&args.object, unlname, unldvp); 18777c478bd9Sstevel@tonic-gate 18787c478bd9Sstevel@tonic-gate douprintf = 1; 18797c478bd9Sstevel@tonic-gate 18807c478bd9Sstevel@tonic-gate t = gethrtime(); 18817c478bd9Sstevel@tonic-gate 18827c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(unldvp), NFSPROC3_REMOVE, 18837c478bd9Sstevel@tonic-gate xdr_diropargs3, (caddr_t)&args, 18847c478bd9Sstevel@tonic-gate xdr_REMOVE3res, (caddr_t)&res, unlcred, 18857c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 18867c478bd9Sstevel@tonic-gate 18877c478bd9Sstevel@tonic-gate if (error) { 18887c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(unldvp); 18897c478bd9Sstevel@tonic-gate } else { 18907c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 18917c478bd9Sstevel@tonic-gate if (!error) { 18927c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(unldvp, 18937c478bd9Sstevel@tonic-gate &res.resok.dir_wcc, t, cr); 18947c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(VTOR(unldvp))) 18957c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(unldvp); 18967c478bd9Sstevel@tonic-gate } else { 18977c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(unldvp, 18987c478bd9Sstevel@tonic-gate &res.resfail.dir_wcc, t, cr); 18997c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, unldvp, cr); 19007c478bd9Sstevel@tonic-gate } 19017c478bd9Sstevel@tonic-gate } 19027c478bd9Sstevel@tonic-gate 19037c478bd9Sstevel@tonic-gate /* 19047c478bd9Sstevel@tonic-gate * Release stuff held for the remove 19057c478bd9Sstevel@tonic-gate */ 19067c478bd9Sstevel@tonic-gate VN_RELE(unldvp); 19077c478bd9Sstevel@tonic-gate kmem_free(unlname, MAXNAMELEN); 19087c478bd9Sstevel@tonic-gate crfree(unlcred); 19097c478bd9Sstevel@tonic-gate goto redo; 19107c478bd9Sstevel@tonic-gate } 19117c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 19127c478bd9Sstevel@tonic-gate } 19137c478bd9Sstevel@tonic-gate 19147c478bd9Sstevel@tonic-gate rp_addfree(rp, cr); 19157c478bd9Sstevel@tonic-gate } 19167c478bd9Sstevel@tonic-gate 19177c478bd9Sstevel@tonic-gate /* 19187c478bd9Sstevel@tonic-gate * Remote file system operations having to do with directory manipulation. 19197c478bd9Sstevel@tonic-gate */ 19207c478bd9Sstevel@tonic-gate 1921da6c28aaSamw /* ARGSUSED */ 19227c478bd9Sstevel@tonic-gate static int 19237c478bd9Sstevel@tonic-gate nfs3_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 1924da6c28aaSamw int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1925da6c28aaSamw int *direntflags, pathname_t *realpnp) 19267c478bd9Sstevel@tonic-gate { 19277c478bd9Sstevel@tonic-gate int error; 19287c478bd9Sstevel@tonic-gate vnode_t *vp; 19297c478bd9Sstevel@tonic-gate vnode_t *avp = NULL; 19307c478bd9Sstevel@tonic-gate rnode_t *drp; 19317c478bd9Sstevel@tonic-gate 1932108322fbScarlsonj if (nfs_zone() != VTOMI(dvp)->mi_zone) 19337c478bd9Sstevel@tonic-gate return (EPERM); 19347c478bd9Sstevel@tonic-gate 19357c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 19367c478bd9Sstevel@tonic-gate 19377c478bd9Sstevel@tonic-gate /* 19387c478bd9Sstevel@tonic-gate * Are we looking up extended attributes? If so, "dvp" is 19397c478bd9Sstevel@tonic-gate * the file or directory for which we want attributes, and 19407c478bd9Sstevel@tonic-gate * we need a lookup of the hidden attribute directory 19417c478bd9Sstevel@tonic-gate * before we lookup the rest of the path. 19427c478bd9Sstevel@tonic-gate */ 19437c478bd9Sstevel@tonic-gate if (flags & LOOKUP_XATTR) { 19447c478bd9Sstevel@tonic-gate bool_t cflag = ((flags & CREATE_XATTR_DIR) != 0); 19457c478bd9Sstevel@tonic-gate mntinfo_t *mi; 19467c478bd9Sstevel@tonic-gate 19477c478bd9Sstevel@tonic-gate mi = VTOMI(dvp); 19487c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_EXTATTR)) 19497c478bd9Sstevel@tonic-gate return (EINVAL); 19507c478bd9Sstevel@tonic-gate 19517c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR(dvp))) 19527c478bd9Sstevel@tonic-gate return (EINTR); 19537c478bd9Sstevel@tonic-gate 19547c478bd9Sstevel@tonic-gate (void) nfs3lookup_dnlc(dvp, XATTR_DIR_NAME, &avp, cr); 19557c478bd9Sstevel@tonic-gate if (avp == NULL) 19567c478bd9Sstevel@tonic-gate error = acl_getxattrdir3(dvp, &avp, cflag, cr, 0); 19577c478bd9Sstevel@tonic-gate else 19587c478bd9Sstevel@tonic-gate error = 0; 19597c478bd9Sstevel@tonic-gate 19607c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate if (error) { 19637c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_EXTATTR) 19647c478bd9Sstevel@tonic-gate return (error); 19657c478bd9Sstevel@tonic-gate return (EINVAL); 19667c478bd9Sstevel@tonic-gate } 19677c478bd9Sstevel@tonic-gate dvp = avp; 19687c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 19697c478bd9Sstevel@tonic-gate } 19707c478bd9Sstevel@tonic-gate 19717c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_READER, INTR(dvp))) { 19727c478bd9Sstevel@tonic-gate error = EINTR; 19737c478bd9Sstevel@tonic-gate goto out; 19747c478bd9Sstevel@tonic-gate } 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, vpp, pnp, flags, rdir, cr, 0); 19777c478bd9Sstevel@tonic-gate 19787c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 19797c478bd9Sstevel@tonic-gate 19807c478bd9Sstevel@tonic-gate /* 19817c478bd9Sstevel@tonic-gate * If vnode is a device, create special vnode. 19827c478bd9Sstevel@tonic-gate */ 19837c478bd9Sstevel@tonic-gate if (!error && IS_DEVVP(*vpp)) { 19847c478bd9Sstevel@tonic-gate vp = *vpp; 19857c478bd9Sstevel@tonic-gate *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr); 19867c478bd9Sstevel@tonic-gate VN_RELE(vp); 19877c478bd9Sstevel@tonic-gate } 19887c478bd9Sstevel@tonic-gate 19897c478bd9Sstevel@tonic-gate out: 19907c478bd9Sstevel@tonic-gate if (avp != NULL) 19917c478bd9Sstevel@tonic-gate VN_RELE(avp); 19927c478bd9Sstevel@tonic-gate 19937c478bd9Sstevel@tonic-gate return (error); 19947c478bd9Sstevel@tonic-gate } 19957c478bd9Sstevel@tonic-gate 19967c478bd9Sstevel@tonic-gate static int nfs3_lookup_neg_cache = 1; 19977c478bd9Sstevel@tonic-gate 19987c478bd9Sstevel@tonic-gate #ifdef DEBUG 19997c478bd9Sstevel@tonic-gate static int nfs3_lookup_dnlc_hits = 0; 20007c478bd9Sstevel@tonic-gate static int nfs3_lookup_dnlc_misses = 0; 20017c478bd9Sstevel@tonic-gate static int nfs3_lookup_dnlc_neg_hits = 0; 20027c478bd9Sstevel@tonic-gate static int nfs3_lookup_dnlc_disappears = 0; 20037c478bd9Sstevel@tonic-gate static int nfs3_lookup_dnlc_lookups = 0; 20047c478bd9Sstevel@tonic-gate #endif 20057c478bd9Sstevel@tonic-gate 20067c478bd9Sstevel@tonic-gate /* ARGSUSED */ 20077c478bd9Sstevel@tonic-gate int 20087c478bd9Sstevel@tonic-gate nfs3lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, 20097c478bd9Sstevel@tonic-gate int flags, vnode_t *rdir, cred_t *cr, int rfscall_flags) 20107c478bd9Sstevel@tonic-gate { 20117c478bd9Sstevel@tonic-gate int error; 20127c478bd9Sstevel@tonic-gate rnode_t *drp; 20137c478bd9Sstevel@tonic-gate 2014108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone); 20157c478bd9Sstevel@tonic-gate /* 20167c478bd9Sstevel@tonic-gate * If lookup is for "", just return dvp. Don't need 20177c478bd9Sstevel@tonic-gate * to send it over the wire, look it up in the dnlc, 20187c478bd9Sstevel@tonic-gate * or perform any access checks. 20197c478bd9Sstevel@tonic-gate */ 20207c478bd9Sstevel@tonic-gate if (*nm == '\0') { 20217c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 20227c478bd9Sstevel@tonic-gate *vpp = dvp; 20237c478bd9Sstevel@tonic-gate return (0); 20247c478bd9Sstevel@tonic-gate } 20257c478bd9Sstevel@tonic-gate 20267c478bd9Sstevel@tonic-gate /* 20277c478bd9Sstevel@tonic-gate * Can't do lookups in non-directories. 20287c478bd9Sstevel@tonic-gate */ 20297c478bd9Sstevel@tonic-gate if (dvp->v_type != VDIR) 20307c478bd9Sstevel@tonic-gate return (ENOTDIR); 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate /* 20337c478bd9Sstevel@tonic-gate * If we're called with RFSCALL_SOFT, it's important that 20347c478bd9Sstevel@tonic-gate * the only rfscall is one we make directly; if we permit 20357c478bd9Sstevel@tonic-gate * an access call because we're looking up "." or validating 20367c478bd9Sstevel@tonic-gate * a dnlc hit, we'll deadlock because that rfscall will not 20377c478bd9Sstevel@tonic-gate * have the RFSCALL_SOFT set. 20387c478bd9Sstevel@tonic-gate */ 20397c478bd9Sstevel@tonic-gate if (rfscall_flags & RFSCALL_SOFT) 20407c478bd9Sstevel@tonic-gate goto callit; 20417c478bd9Sstevel@tonic-gate 20427c478bd9Sstevel@tonic-gate /* 20437c478bd9Sstevel@tonic-gate * If lookup is for ".", just return dvp. Don't need 20447c478bd9Sstevel@tonic-gate * to send it over the wire or look it up in the dnlc, 20457c478bd9Sstevel@tonic-gate * just need to check access. 20467c478bd9Sstevel@tonic-gate */ 20477c478bd9Sstevel@tonic-gate if (strcmp(nm, ".") == 0) { 2048da6c28aaSamw error = nfs3_access(dvp, VEXEC, 0, cr, NULL); 20497c478bd9Sstevel@tonic-gate if (error) 20507c478bd9Sstevel@tonic-gate return (error); 20517c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 20527c478bd9Sstevel@tonic-gate *vpp = dvp; 20537c478bd9Sstevel@tonic-gate return (0); 20547c478bd9Sstevel@tonic-gate } 20557c478bd9Sstevel@tonic-gate 20567c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 20577c478bd9Sstevel@tonic-gate if (!(drp->r_flags & RLOOKUP)) { 20587c478bd9Sstevel@tonic-gate mutex_enter(&drp->r_statelock); 20597c478bd9Sstevel@tonic-gate drp->r_flags |= RLOOKUP; 20607c478bd9Sstevel@tonic-gate mutex_exit(&drp->r_statelock); 20617c478bd9Sstevel@tonic-gate } 20627c478bd9Sstevel@tonic-gate 20637c478bd9Sstevel@tonic-gate /* 20647c478bd9Sstevel@tonic-gate * Lookup this name in the DNLC. If there was a valid entry, 20657c478bd9Sstevel@tonic-gate * then return the results of the lookup. 20667c478bd9Sstevel@tonic-gate */ 20677c478bd9Sstevel@tonic-gate error = nfs3lookup_dnlc(dvp, nm, vpp, cr); 20687c478bd9Sstevel@tonic-gate if (error || *vpp != NULL) 20697c478bd9Sstevel@tonic-gate return (error); 20707c478bd9Sstevel@tonic-gate 20717c478bd9Sstevel@tonic-gate callit: 20727c478bd9Sstevel@tonic-gate error = nfs3lookup_otw(dvp, nm, vpp, cr, rfscall_flags); 20737c478bd9Sstevel@tonic-gate 20747c478bd9Sstevel@tonic-gate return (error); 20757c478bd9Sstevel@tonic-gate } 20767c478bd9Sstevel@tonic-gate 20777c478bd9Sstevel@tonic-gate static int 20787c478bd9Sstevel@tonic-gate nfs3lookup_dnlc(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr) 20797c478bd9Sstevel@tonic-gate { 20807c478bd9Sstevel@tonic-gate int error; 20817c478bd9Sstevel@tonic-gate vnode_t *vp; 20827c478bd9Sstevel@tonic-gate 20837c478bd9Sstevel@tonic-gate ASSERT(*nm != '\0'); 2084108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone); 20857c478bd9Sstevel@tonic-gate /* 20867c478bd9Sstevel@tonic-gate * Lookup this name in the DNLC. If successful, then validate 20877c478bd9Sstevel@tonic-gate * the caches and then recheck the DNLC. The DNLC is rechecked 20887c478bd9Sstevel@tonic-gate * just in case this entry got invalidated during the call 20897c478bd9Sstevel@tonic-gate * to nfs3_validate_caches. 20907c478bd9Sstevel@tonic-gate * 20917c478bd9Sstevel@tonic-gate * An assumption is being made that it is safe to say that a 20927c478bd9Sstevel@tonic-gate * file exists which may not on the server. Any operations to 20937c478bd9Sstevel@tonic-gate * the server will fail with ESTALE. 20947c478bd9Sstevel@tonic-gate */ 20957c478bd9Sstevel@tonic-gate #ifdef DEBUG 20967c478bd9Sstevel@tonic-gate nfs3_lookup_dnlc_lookups++; 20977c478bd9Sstevel@tonic-gate #endif 20987c478bd9Sstevel@tonic-gate vp = dnlc_lookup(dvp, nm); 20997c478bd9Sstevel@tonic-gate if (vp != NULL) { 21007c478bd9Sstevel@tonic-gate VN_RELE(vp); 21017c478bd9Sstevel@tonic-gate if (vp == DNLC_NO_VNODE && !vn_is_readonly(dvp)) { 21027c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 21037c478bd9Sstevel@tonic-gate } 21047c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(dvp, cr); 21057c478bd9Sstevel@tonic-gate if (error) 21067c478bd9Sstevel@tonic-gate return (error); 21077c478bd9Sstevel@tonic-gate vp = dnlc_lookup(dvp, nm); 21087c478bd9Sstevel@tonic-gate if (vp != NULL) { 2109da6c28aaSamw error = nfs3_access(dvp, VEXEC, 0, cr, NULL); 21107c478bd9Sstevel@tonic-gate if (error) { 21117c478bd9Sstevel@tonic-gate VN_RELE(vp); 21127c478bd9Sstevel@tonic-gate return (error); 21137c478bd9Sstevel@tonic-gate } 21147c478bd9Sstevel@tonic-gate if (vp == DNLC_NO_VNODE) { 21157c478bd9Sstevel@tonic-gate VN_RELE(vp); 21167c478bd9Sstevel@tonic-gate #ifdef DEBUG 21177c478bd9Sstevel@tonic-gate nfs3_lookup_dnlc_neg_hits++; 21187c478bd9Sstevel@tonic-gate #endif 21197c478bd9Sstevel@tonic-gate return (ENOENT); 21207c478bd9Sstevel@tonic-gate } 21217c478bd9Sstevel@tonic-gate *vpp = vp; 21227c478bd9Sstevel@tonic-gate #ifdef DEBUG 21237c478bd9Sstevel@tonic-gate nfs3_lookup_dnlc_hits++; 21247c478bd9Sstevel@tonic-gate #endif 21257c478bd9Sstevel@tonic-gate return (0); 21267c478bd9Sstevel@tonic-gate } 21277c478bd9Sstevel@tonic-gate #ifdef DEBUG 21287c478bd9Sstevel@tonic-gate nfs3_lookup_dnlc_disappears++; 21297c478bd9Sstevel@tonic-gate #endif 21307c478bd9Sstevel@tonic-gate } 21317c478bd9Sstevel@tonic-gate #ifdef DEBUG 21327c478bd9Sstevel@tonic-gate else 21337c478bd9Sstevel@tonic-gate nfs3_lookup_dnlc_misses++; 21347c478bd9Sstevel@tonic-gate #endif 21357c478bd9Sstevel@tonic-gate 21367c478bd9Sstevel@tonic-gate *vpp = NULL; 21377c478bd9Sstevel@tonic-gate 21387c478bd9Sstevel@tonic-gate return (0); 21397c478bd9Sstevel@tonic-gate } 21407c478bd9Sstevel@tonic-gate 21417c478bd9Sstevel@tonic-gate static int 21427c478bd9Sstevel@tonic-gate nfs3lookup_otw(vnode_t *dvp, char *nm, vnode_t **vpp, cred_t *cr, 21437c478bd9Sstevel@tonic-gate int rfscall_flags) 21447c478bd9Sstevel@tonic-gate { 21457c478bd9Sstevel@tonic-gate int error; 21467c478bd9Sstevel@tonic-gate LOOKUP3args args; 21477c478bd9Sstevel@tonic-gate LOOKUP3vres res; 21487c478bd9Sstevel@tonic-gate int douprintf; 21497c478bd9Sstevel@tonic-gate struct vattr vattr; 21507c478bd9Sstevel@tonic-gate struct vattr dvattr; 21517c478bd9Sstevel@tonic-gate vnode_t *vp; 21527c478bd9Sstevel@tonic-gate failinfo_t fi; 21537c478bd9Sstevel@tonic-gate hrtime_t t; 21547c478bd9Sstevel@tonic-gate 21557c478bd9Sstevel@tonic-gate ASSERT(*nm != '\0'); 21567c478bd9Sstevel@tonic-gate ASSERT(dvp->v_type == VDIR); 2157108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone); 21587c478bd9Sstevel@tonic-gate 21597c478bd9Sstevel@tonic-gate setdiropargs3(&args.what, nm, dvp); 21607c478bd9Sstevel@tonic-gate 21617c478bd9Sstevel@tonic-gate fi.vp = dvp; 21627c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.what.dir; 21637c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 21647c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 21657c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 21667c478bd9Sstevel@tonic-gate res.obj_attributes.fres.vp = dvp; 21677c478bd9Sstevel@tonic-gate res.obj_attributes.fres.vap = &vattr; 21687c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vp = dvp; 21697c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vap = &dvattr; 21707c478bd9Sstevel@tonic-gate 21717c478bd9Sstevel@tonic-gate douprintf = 1; 21727c478bd9Sstevel@tonic-gate 21737c478bd9Sstevel@tonic-gate t = gethrtime(); 21747c478bd9Sstevel@tonic-gate 21757c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_LOOKUP, 21767c478bd9Sstevel@tonic-gate xdr_diropargs3, (caddr_t)&args, 21777c478bd9Sstevel@tonic-gate xdr_LOOKUP3vres, (caddr_t)&res, cr, 21787c478bd9Sstevel@tonic-gate &douprintf, &res.status, rfscall_flags, &fi); 21797c478bd9Sstevel@tonic-gate 21807c478bd9Sstevel@tonic-gate if (error) 21817c478bd9Sstevel@tonic-gate return (error); 21827c478bd9Sstevel@tonic-gate 21837c478bd9Sstevel@tonic-gate nfs3_cache_post_op_vattr(dvp, &res.dir_attributes, t, cr); 21847c478bd9Sstevel@tonic-gate 21857c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 21867c478bd9Sstevel@tonic-gate if (error) { 21877c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 21887c478bd9Sstevel@tonic-gate if (error == ENOENT && nfs3_lookup_neg_cache) 21897c478bd9Sstevel@tonic-gate dnlc_enter(dvp, nm, DNLC_NO_VNODE); 21907c478bd9Sstevel@tonic-gate return (error); 21917c478bd9Sstevel@tonic-gate } 21927c478bd9Sstevel@tonic-gate 21937c478bd9Sstevel@tonic-gate if (res.obj_attributes.attributes) { 21947c478bd9Sstevel@tonic-gate vp = makenfs3node_va(&res.object, res.obj_attributes.fres.vap, 21957c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, VTOR(dvp)->r_path, nm); 21967c478bd9Sstevel@tonic-gate } else { 21977c478bd9Sstevel@tonic-gate vp = makenfs3node_va(&res.object, NULL, 21987c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, VTOR(dvp)->r_path, nm); 21997c478bd9Sstevel@tonic-gate if (vp->v_type == VNON) { 22007c478bd9Sstevel@tonic-gate vattr.va_mask = AT_TYPE; 22017c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &vattr, cr); 22027c478bd9Sstevel@tonic-gate if (error) { 22037c478bd9Sstevel@tonic-gate VN_RELE(vp); 22047c478bd9Sstevel@tonic-gate return (error); 22057c478bd9Sstevel@tonic-gate } 22067c478bd9Sstevel@tonic-gate vp->v_type = vattr.va_type; 22077c478bd9Sstevel@tonic-gate } 22087c478bd9Sstevel@tonic-gate } 22097c478bd9Sstevel@tonic-gate 22107c478bd9Sstevel@tonic-gate if (!(rfscall_flags & RFSCALL_SOFT)) 22117c478bd9Sstevel@tonic-gate dnlc_update(dvp, nm, vp); 22127c478bd9Sstevel@tonic-gate 22137c478bd9Sstevel@tonic-gate *vpp = vp; 22147c478bd9Sstevel@tonic-gate 22157c478bd9Sstevel@tonic-gate return (error); 22167c478bd9Sstevel@tonic-gate } 22177c478bd9Sstevel@tonic-gate 22187c478bd9Sstevel@tonic-gate #ifdef DEBUG 22197c478bd9Sstevel@tonic-gate static int nfs3_create_misses = 0; 22207c478bd9Sstevel@tonic-gate #endif 22217c478bd9Sstevel@tonic-gate 22227c478bd9Sstevel@tonic-gate /* ARGSUSED */ 22237c478bd9Sstevel@tonic-gate static int 22247c478bd9Sstevel@tonic-gate nfs3_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, 2225da6c28aaSamw int mode, vnode_t **vpp, cred_t *cr, int lfaware, caller_context_t *ct, 2226da6c28aaSamw vsecattr_t *vsecp) 22277c478bd9Sstevel@tonic-gate { 22287c478bd9Sstevel@tonic-gate int error; 22297c478bd9Sstevel@tonic-gate vnode_t *vp; 22307c478bd9Sstevel@tonic-gate rnode_t *rp; 22317c478bd9Sstevel@tonic-gate struct vattr vattr; 22327c478bd9Sstevel@tonic-gate rnode_t *drp; 22337c478bd9Sstevel@tonic-gate vnode_t *tempvp; 22347c478bd9Sstevel@tonic-gate 22357c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 2236108322fbScarlsonj if (nfs_zone() != VTOMI(dvp)->mi_zone) 22377c478bd9Sstevel@tonic-gate return (EPERM); 22387c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp))) 22397c478bd9Sstevel@tonic-gate return (EINTR); 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate top: 22427c478bd9Sstevel@tonic-gate /* 22437c478bd9Sstevel@tonic-gate * We make a copy of the attributes because the caller does not 22447c478bd9Sstevel@tonic-gate * expect us to change what va points to. 22457c478bd9Sstevel@tonic-gate */ 22467c478bd9Sstevel@tonic-gate vattr = *va; 22477c478bd9Sstevel@tonic-gate 22487c478bd9Sstevel@tonic-gate /* 22497c478bd9Sstevel@tonic-gate * If the pathname is "", just use dvp. Don't need 22507c478bd9Sstevel@tonic-gate * to send it over the wire, look it up in the dnlc, 22517c478bd9Sstevel@tonic-gate * or perform any access checks. 22527c478bd9Sstevel@tonic-gate */ 22537c478bd9Sstevel@tonic-gate if (*nm == '\0') { 22547c478bd9Sstevel@tonic-gate error = 0; 22557c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 22567c478bd9Sstevel@tonic-gate vp = dvp; 22577c478bd9Sstevel@tonic-gate /* 22587c478bd9Sstevel@tonic-gate * If the pathname is ".", just use dvp. Don't need 22597c478bd9Sstevel@tonic-gate * to send it over the wire or look it up in the dnlc, 22607c478bd9Sstevel@tonic-gate * just need to check access. 22617c478bd9Sstevel@tonic-gate */ 22627c478bd9Sstevel@tonic-gate } else if (strcmp(nm, ".") == 0) { 2263da6c28aaSamw error = nfs3_access(dvp, VEXEC, 0, cr, ct); 22647c478bd9Sstevel@tonic-gate if (error) { 22657c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 22667c478bd9Sstevel@tonic-gate return (error); 22677c478bd9Sstevel@tonic-gate } 22687c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 22697c478bd9Sstevel@tonic-gate vp = dvp; 22707c478bd9Sstevel@tonic-gate /* 22717c478bd9Sstevel@tonic-gate * We need to go over the wire, just to be sure whether the 22727c478bd9Sstevel@tonic-gate * file exists or not. Using the DNLC can be dangerous in 22737c478bd9Sstevel@tonic-gate * this case when making a decision regarding existence. 22747c478bd9Sstevel@tonic-gate */ 22757c478bd9Sstevel@tonic-gate } else { 22767c478bd9Sstevel@tonic-gate error = nfs3lookup_otw(dvp, nm, &vp, cr, 0); 22777c478bd9Sstevel@tonic-gate } 22787c478bd9Sstevel@tonic-gate if (!error) { 22797c478bd9Sstevel@tonic-gate if (exclusive == EXCL) 22807c478bd9Sstevel@tonic-gate error = EEXIST; 22817c478bd9Sstevel@tonic-gate else if (vp->v_type == VDIR && (mode & VWRITE)) 22827c478bd9Sstevel@tonic-gate error = EISDIR; 22837c478bd9Sstevel@tonic-gate else { 22847c478bd9Sstevel@tonic-gate /* 22857c478bd9Sstevel@tonic-gate * If vnode is a device, create special vnode. 22867c478bd9Sstevel@tonic-gate */ 22877c478bd9Sstevel@tonic-gate if (IS_DEVVP(vp)) { 22887c478bd9Sstevel@tonic-gate tempvp = vp; 22897c478bd9Sstevel@tonic-gate vp = specvp(vp, vp->v_rdev, vp->v_type, cr); 22907c478bd9Sstevel@tonic-gate VN_RELE(tempvp); 22917c478bd9Sstevel@tonic-gate } 2292da6c28aaSamw if (!(error = VOP_ACCESS(vp, mode, 0, cr, ct))) { 22937c478bd9Sstevel@tonic-gate if ((vattr.va_mask & AT_SIZE) && 22947c478bd9Sstevel@tonic-gate vp->v_type == VREG) { 22957c478bd9Sstevel@tonic-gate rp = VTOR(vp); 22967c478bd9Sstevel@tonic-gate /* 22977c478bd9Sstevel@tonic-gate * Check here for large file handled 22987c478bd9Sstevel@tonic-gate * by LF-unaware process (as 22997c478bd9Sstevel@tonic-gate * ufs_create() does) 23007c478bd9Sstevel@tonic-gate */ 23017c478bd9Sstevel@tonic-gate if (!(lfaware & FOFFMAX)) { 23027c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 23037c478bd9Sstevel@tonic-gate if (rp->r_size > MAXOFF32_T) 23047c478bd9Sstevel@tonic-gate error = EOVERFLOW; 23057c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23067c478bd9Sstevel@tonic-gate } 23077c478bd9Sstevel@tonic-gate if (!error) { 23087c478bd9Sstevel@tonic-gate vattr.va_mask = AT_SIZE; 23097c478bd9Sstevel@tonic-gate error = nfs3setattr(vp, 23107c478bd9Sstevel@tonic-gate &vattr, 0, cr); 231172102e74SBryan Cantrill 231272102e74SBryan Cantrill /* 231372102e74SBryan Cantrill * Existing file was truncated; 231472102e74SBryan Cantrill * emit a create event. 231572102e74SBryan Cantrill */ 231672102e74SBryan Cantrill vnevent_create(vp, ct); 23177c478bd9Sstevel@tonic-gate } 23187c478bd9Sstevel@tonic-gate } 23197c478bd9Sstevel@tonic-gate } 23207c478bd9Sstevel@tonic-gate } 23217c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 23227c478bd9Sstevel@tonic-gate if (error) { 23237c478bd9Sstevel@tonic-gate VN_RELE(vp); 2324df2381bfSpraks } else { 23257c478bd9Sstevel@tonic-gate *vpp = vp; 2326df2381bfSpraks } 232772102e74SBryan Cantrill 23287c478bd9Sstevel@tonic-gate return (error); 23297c478bd9Sstevel@tonic-gate } 23307c478bd9Sstevel@tonic-gate 23317c478bd9Sstevel@tonic-gate dnlc_remove(dvp, nm); 23327c478bd9Sstevel@tonic-gate 23337c478bd9Sstevel@tonic-gate /* 23347c478bd9Sstevel@tonic-gate * Decide what the group-id of the created file should be. 23357c478bd9Sstevel@tonic-gate * Set it in attribute list as advisory... 23367c478bd9Sstevel@tonic-gate */ 23377c478bd9Sstevel@tonic-gate error = setdirgid(dvp, &vattr.va_gid, cr); 23387c478bd9Sstevel@tonic-gate if (error) { 23397c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 23407c478bd9Sstevel@tonic-gate return (error); 23417c478bd9Sstevel@tonic-gate } 23427c478bd9Sstevel@tonic-gate vattr.va_mask |= AT_GID; 23437c478bd9Sstevel@tonic-gate 23447c478bd9Sstevel@tonic-gate ASSERT(vattr.va_mask & AT_TYPE); 23457c478bd9Sstevel@tonic-gate if (vattr.va_type == VREG) { 23467c478bd9Sstevel@tonic-gate ASSERT(vattr.va_mask & AT_MODE); 23477c478bd9Sstevel@tonic-gate if (MANDMODE(vattr.va_mode)) { 23487c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 23497c478bd9Sstevel@tonic-gate return (EACCES); 23507c478bd9Sstevel@tonic-gate } 23517c478bd9Sstevel@tonic-gate error = nfs3create(dvp, nm, &vattr, exclusive, mode, vpp, cr, 23527c478bd9Sstevel@tonic-gate lfaware); 23537c478bd9Sstevel@tonic-gate /* 23547c478bd9Sstevel@tonic-gate * If this is not an exclusive create, then the CREATE 23557c478bd9Sstevel@tonic-gate * request will be made with the GUARDED mode set. This 23567c478bd9Sstevel@tonic-gate * means that the server will return EEXIST if the file 23577c478bd9Sstevel@tonic-gate * exists. The file could exist because of a retransmitted 23587c478bd9Sstevel@tonic-gate * request. In this case, we recover by starting over and 23597c478bd9Sstevel@tonic-gate * checking to see whether the file exists. This second 23607c478bd9Sstevel@tonic-gate * time through it should and a CREATE request will not be 23617c478bd9Sstevel@tonic-gate * sent. 23627c478bd9Sstevel@tonic-gate * 23637c478bd9Sstevel@tonic-gate * This handles the problem of a dangling CREATE request 23647c478bd9Sstevel@tonic-gate * which contains attributes which indicate that the file 23657c478bd9Sstevel@tonic-gate * should be truncated. This retransmitted request could 23667c478bd9Sstevel@tonic-gate * possibly truncate valid data in the file if not caught 23677c478bd9Sstevel@tonic-gate * by the duplicate request mechanism on the server or if 23687c478bd9Sstevel@tonic-gate * not caught by other means. The scenario is: 23697c478bd9Sstevel@tonic-gate * 23707c478bd9Sstevel@tonic-gate * Client transmits CREATE request with size = 0 23717c478bd9Sstevel@tonic-gate * Client times out, retransmits request. 23727c478bd9Sstevel@tonic-gate * Response to the first request arrives from the server 23737c478bd9Sstevel@tonic-gate * and the client proceeds on. 23747c478bd9Sstevel@tonic-gate * Client writes data to the file. 23757c478bd9Sstevel@tonic-gate * The server now processes retransmitted CREATE request 23767c478bd9Sstevel@tonic-gate * and truncates file. 23777c478bd9Sstevel@tonic-gate * 23787c478bd9Sstevel@tonic-gate * The use of the GUARDED CREATE request prevents this from 23797c478bd9Sstevel@tonic-gate * happening because the retransmitted CREATE would fail 23807c478bd9Sstevel@tonic-gate * with EEXIST and would not truncate the file. 23817c478bd9Sstevel@tonic-gate */ 23827c478bd9Sstevel@tonic-gate if (error == EEXIST && exclusive == NONEXCL) { 23837c478bd9Sstevel@tonic-gate #ifdef DEBUG 23847c478bd9Sstevel@tonic-gate nfs3_create_misses++; 23857c478bd9Sstevel@tonic-gate #endif 23867c478bd9Sstevel@tonic-gate goto top; 23877c478bd9Sstevel@tonic-gate } 23887c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 23897c478bd9Sstevel@tonic-gate return (error); 23907c478bd9Sstevel@tonic-gate } 23917c478bd9Sstevel@tonic-gate error = nfs3mknod(dvp, nm, &vattr, exclusive, mode, vpp, cr); 23927c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 23937c478bd9Sstevel@tonic-gate return (error); 23947c478bd9Sstevel@tonic-gate } 23957c478bd9Sstevel@tonic-gate 23967c478bd9Sstevel@tonic-gate /* ARGSUSED */ 23977c478bd9Sstevel@tonic-gate static int 23987c478bd9Sstevel@tonic-gate nfs3create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, 23997c478bd9Sstevel@tonic-gate int mode, vnode_t **vpp, cred_t *cr, int lfaware) 24007c478bd9Sstevel@tonic-gate { 24017c478bd9Sstevel@tonic-gate int error; 24027c478bd9Sstevel@tonic-gate CREATE3args args; 24037c478bd9Sstevel@tonic-gate CREATE3res res; 24047c478bd9Sstevel@tonic-gate int douprintf; 24057c478bd9Sstevel@tonic-gate vnode_t *vp; 24067c478bd9Sstevel@tonic-gate struct vattr vattr; 24077c478bd9Sstevel@tonic-gate nfstime3 *verfp; 24087c478bd9Sstevel@tonic-gate rnode_t *rp; 24097c478bd9Sstevel@tonic-gate timestruc_t now; 24107c478bd9Sstevel@tonic-gate hrtime_t t; 24117c478bd9Sstevel@tonic-gate 2412108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone); 24137c478bd9Sstevel@tonic-gate setdiropargs3(&args.where, nm, dvp); 24147c478bd9Sstevel@tonic-gate if (exclusive == EXCL) { 24157c478bd9Sstevel@tonic-gate args.how.mode = EXCLUSIVE; 24167c478bd9Sstevel@tonic-gate /* 24177c478bd9Sstevel@tonic-gate * Construct the create verifier. This verifier needs 24187c478bd9Sstevel@tonic-gate * to be unique between different clients. It also needs 24197c478bd9Sstevel@tonic-gate * to vary for each exclusive create request generated 24207c478bd9Sstevel@tonic-gate * from the client to the server. 24217c478bd9Sstevel@tonic-gate * 24227c478bd9Sstevel@tonic-gate * The first attempt is made to use the hostid and a 24237c478bd9Sstevel@tonic-gate * unique number on the client. If the hostid has not 24247c478bd9Sstevel@tonic-gate * been set, the high resolution time that the exclusive 24257c478bd9Sstevel@tonic-gate * create request is being made is used. This will work 24267c478bd9Sstevel@tonic-gate * unless two different clients, both with the hostid 24277c478bd9Sstevel@tonic-gate * not set, attempt an exclusive create request on the 24287c478bd9Sstevel@tonic-gate * same file, at exactly the same clock time. The 24297c478bd9Sstevel@tonic-gate * chances of this happening seem small enough to be 24307c478bd9Sstevel@tonic-gate * reasonable. 24317c478bd9Sstevel@tonic-gate */ 24327c478bd9Sstevel@tonic-gate verfp = (nfstime3 *)&args.how.createhow3_u.verf; 24335679c89fSjv227347 verfp->seconds = zone_get_hostid(NULL); 24347c478bd9Sstevel@tonic-gate if (verfp->seconds != 0) 24357c478bd9Sstevel@tonic-gate verfp->nseconds = newnum(); 24367c478bd9Sstevel@tonic-gate else { 24377c478bd9Sstevel@tonic-gate gethrestime(&now); 24387c478bd9Sstevel@tonic-gate verfp->seconds = now.tv_sec; 24397c478bd9Sstevel@tonic-gate verfp->nseconds = now.tv_nsec; 24407c478bd9Sstevel@tonic-gate } 24417c478bd9Sstevel@tonic-gate /* 24427c478bd9Sstevel@tonic-gate * Since the server will use this value for the mtime, 24437c478bd9Sstevel@tonic-gate * make sure that it can't overflow. Zero out the MSB. 24447c478bd9Sstevel@tonic-gate * The actual value does not matter here, only its uniqeness. 24457c478bd9Sstevel@tonic-gate */ 24467c478bd9Sstevel@tonic-gate verfp->seconds %= INT32_MAX; 24477c478bd9Sstevel@tonic-gate } else { 24487c478bd9Sstevel@tonic-gate /* 24497c478bd9Sstevel@tonic-gate * Issue the non-exclusive create in guarded mode. This 24507c478bd9Sstevel@tonic-gate * may result in some false EEXIST responses for 24517c478bd9Sstevel@tonic-gate * retransmitted requests, but these will be handled at 24527c478bd9Sstevel@tonic-gate * a higher level. By using GUARDED, duplicate requests 24537c478bd9Sstevel@tonic-gate * to do file truncation and possible access problems 24547c478bd9Sstevel@tonic-gate * can be avoided. 24557c478bd9Sstevel@tonic-gate */ 24567c478bd9Sstevel@tonic-gate args.how.mode = GUARDED; 24577c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(va, 24587c478bd9Sstevel@tonic-gate &args.how.createhow3_u.obj_attributes); 24597c478bd9Sstevel@tonic-gate if (error) { 24607c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 24617c478bd9Sstevel@tonic-gate return (error); 24627c478bd9Sstevel@tonic-gate } 24637c478bd9Sstevel@tonic-gate } 24647c478bd9Sstevel@tonic-gate 24657c478bd9Sstevel@tonic-gate douprintf = 1; 24667c478bd9Sstevel@tonic-gate 24677c478bd9Sstevel@tonic-gate t = gethrtime(); 24687c478bd9Sstevel@tonic-gate 24697c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_CREATE, 24707c478bd9Sstevel@tonic-gate xdr_CREATE3args, (caddr_t)&args, 24717c478bd9Sstevel@tonic-gate xdr_CREATE3res, (caddr_t)&res, cr, 24727c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 24737c478bd9Sstevel@tonic-gate 24747c478bd9Sstevel@tonic-gate if (error) { 24757c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 24767c478bd9Sstevel@tonic-gate return (error); 24777c478bd9Sstevel@tonic-gate } 24787c478bd9Sstevel@tonic-gate 24797c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 24807c478bd9Sstevel@tonic-gate if (!error) { 24817c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, cr); 24827c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(VTOR(dvp))) 24837c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 24847c478bd9Sstevel@tonic-gate 24857c478bd9Sstevel@tonic-gate /* 24867c478bd9Sstevel@tonic-gate * On exclusive create the times need to be explicitly 24877c478bd9Sstevel@tonic-gate * set to clear any potential verifier that may be stored 24887c478bd9Sstevel@tonic-gate * in one of these fields (see comment below). This 24897c478bd9Sstevel@tonic-gate * is done here to cover the case where no post op attrs 24907c478bd9Sstevel@tonic-gate * were returned or a 'invalid' time was returned in 24917c478bd9Sstevel@tonic-gate * the attributes. 24927c478bd9Sstevel@tonic-gate */ 24937c478bd9Sstevel@tonic-gate if (exclusive == EXCL) 24947c478bd9Sstevel@tonic-gate va->va_mask |= (AT_MTIME | AT_ATIME); 24957c478bd9Sstevel@tonic-gate 24967c478bd9Sstevel@tonic-gate if (!res.resok.obj.handle_follows) { 24977c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0); 24987c478bd9Sstevel@tonic-gate if (error) 24997c478bd9Sstevel@tonic-gate return (error); 25007c478bd9Sstevel@tonic-gate } else { 25017c478bd9Sstevel@tonic-gate if (res.resok.obj_attributes.attributes) { 25027c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, 25037c478bd9Sstevel@tonic-gate &res.resok.obj_attributes.attr, 25047c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 25057c478bd9Sstevel@tonic-gate } else { 25067c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, NULL, 25077c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 25087c478bd9Sstevel@tonic-gate 25097c478bd9Sstevel@tonic-gate /* 25107c478bd9Sstevel@tonic-gate * On an exclusive create, it is possible 25117c478bd9Sstevel@tonic-gate * that attributes were returned but those 25127c478bd9Sstevel@tonic-gate * postop attributes failed to decode 25137c478bd9Sstevel@tonic-gate * properly. If this is the case, 25147c478bd9Sstevel@tonic-gate * then most likely the atime or mtime 25157c478bd9Sstevel@tonic-gate * were invalid for our client; this 25167c478bd9Sstevel@tonic-gate * is caused by the server storing the 25177c478bd9Sstevel@tonic-gate * create verifier in one of the time 25187c478bd9Sstevel@tonic-gate * fields(most likely mtime). 25197c478bd9Sstevel@tonic-gate * So... we are going to setattr just the 25207c478bd9Sstevel@tonic-gate * atime/mtime to clear things up. 25217c478bd9Sstevel@tonic-gate */ 25227c478bd9Sstevel@tonic-gate if (exclusive == EXCL) { 25237c478bd9Sstevel@tonic-gate if (error = 25247c478bd9Sstevel@tonic-gate nfs3excl_create_settimes(vp, 25257c478bd9Sstevel@tonic-gate va, cr)) { 25267c478bd9Sstevel@tonic-gate /* 25277c478bd9Sstevel@tonic-gate * Setting the times failed. 25287c478bd9Sstevel@tonic-gate * Remove the file and return 25297c478bd9Sstevel@tonic-gate * the error. 25307c478bd9Sstevel@tonic-gate */ 25317c478bd9Sstevel@tonic-gate VN_RELE(vp); 25327c478bd9Sstevel@tonic-gate (void) nfs3_remove(dvp, 2533da6c28aaSamw nm, cr, NULL, 0); 25347c478bd9Sstevel@tonic-gate return (error); 25357c478bd9Sstevel@tonic-gate } 25367c478bd9Sstevel@tonic-gate } 25377c478bd9Sstevel@tonic-gate 25387c478bd9Sstevel@tonic-gate /* 25397c478bd9Sstevel@tonic-gate * This handles the non-exclusive case 25407c478bd9Sstevel@tonic-gate * and the exclusive case where no post op 25417c478bd9Sstevel@tonic-gate * attrs were returned. 25427c478bd9Sstevel@tonic-gate */ 25437c478bd9Sstevel@tonic-gate if (vp->v_type == VNON) { 25447c478bd9Sstevel@tonic-gate vattr.va_mask = AT_TYPE; 25457c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &vattr, cr); 25467c478bd9Sstevel@tonic-gate if (error) { 25477c478bd9Sstevel@tonic-gate VN_RELE(vp); 25487c478bd9Sstevel@tonic-gate return (error); 25497c478bd9Sstevel@tonic-gate } 25507c478bd9Sstevel@tonic-gate vp->v_type = vattr.va_type; 25517c478bd9Sstevel@tonic-gate } 25527c478bd9Sstevel@tonic-gate } 25537c478bd9Sstevel@tonic-gate dnlc_update(dvp, nm, vp); 25547c478bd9Sstevel@tonic-gate } 25557c478bd9Sstevel@tonic-gate 25567c478bd9Sstevel@tonic-gate rp = VTOR(vp); 25577c478bd9Sstevel@tonic-gate 25587c478bd9Sstevel@tonic-gate /* 25597c478bd9Sstevel@tonic-gate * Check here for large file handled by 25607c478bd9Sstevel@tonic-gate * LF-unaware process (as ufs_create() does) 25617c478bd9Sstevel@tonic-gate */ 25627c478bd9Sstevel@tonic-gate if ((va->va_mask & AT_SIZE) && vp->v_type == VREG && 25637c478bd9Sstevel@tonic-gate !(lfaware & FOFFMAX)) { 25647c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 25657c478bd9Sstevel@tonic-gate if (rp->r_size > MAXOFF32_T) { 25667c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25677c478bd9Sstevel@tonic-gate VN_RELE(vp); 25687c478bd9Sstevel@tonic-gate return (EOVERFLOW); 25697c478bd9Sstevel@tonic-gate } 25707c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 25717c478bd9Sstevel@tonic-gate } 25727c478bd9Sstevel@tonic-gate 25737c478bd9Sstevel@tonic-gate if (exclusive == EXCL && 25747c478bd9Sstevel@tonic-gate (va->va_mask & ~(AT_GID | AT_SIZE))) { 25757c478bd9Sstevel@tonic-gate /* 25767c478bd9Sstevel@tonic-gate * If doing an exclusive create, then generate 25777c478bd9Sstevel@tonic-gate * a SETATTR to set the initial attributes. 25787c478bd9Sstevel@tonic-gate * Try to set the mtime and the atime to the 25797c478bd9Sstevel@tonic-gate * server's current time. It is somewhat 25807c478bd9Sstevel@tonic-gate * expected that these fields will be used to 25817c478bd9Sstevel@tonic-gate * store the exclusive create cookie. If not, 25827c478bd9Sstevel@tonic-gate * server implementors will need to know that 25837c478bd9Sstevel@tonic-gate * a SETATTR will follow an exclusive create 25847c478bd9Sstevel@tonic-gate * and the cookie should be destroyed if 25857c478bd9Sstevel@tonic-gate * appropriate. This work may have been done 25867c478bd9Sstevel@tonic-gate * earlier in this function if post op attrs 25877c478bd9Sstevel@tonic-gate * were not available. 25887c478bd9Sstevel@tonic-gate * 25897c478bd9Sstevel@tonic-gate * The AT_GID and AT_SIZE bits are turned off 25907c478bd9Sstevel@tonic-gate * so that the SETATTR request will not attempt 25917c478bd9Sstevel@tonic-gate * to process these. The gid will be set 25927c478bd9Sstevel@tonic-gate * separately if appropriate. The size is turned 25937c478bd9Sstevel@tonic-gate * off because it is assumed that a new file will 25947c478bd9Sstevel@tonic-gate * be created empty and if the file wasn't empty, 25957c478bd9Sstevel@tonic-gate * then the exclusive create will have failed 25967c478bd9Sstevel@tonic-gate * because the file must have existed already. 25977c478bd9Sstevel@tonic-gate * Therefore, no truncate operation is needed. 25987c478bd9Sstevel@tonic-gate */ 25997c478bd9Sstevel@tonic-gate va->va_mask &= ~(AT_GID | AT_SIZE); 26007c478bd9Sstevel@tonic-gate error = nfs3setattr(vp, va, 0, cr); 26017c478bd9Sstevel@tonic-gate if (error) { 26027c478bd9Sstevel@tonic-gate /* 26037c478bd9Sstevel@tonic-gate * Couldn't correct the attributes of 26047c478bd9Sstevel@tonic-gate * the newly created file and the 26057c478bd9Sstevel@tonic-gate * attributes are wrong. Remove the 26067c478bd9Sstevel@tonic-gate * file and return an error to the 26077c478bd9Sstevel@tonic-gate * application. 26087c478bd9Sstevel@tonic-gate */ 26097c478bd9Sstevel@tonic-gate VN_RELE(vp); 2610da6c28aaSamw (void) nfs3_remove(dvp, nm, cr, NULL, 0); 26117c478bd9Sstevel@tonic-gate return (error); 26127c478bd9Sstevel@tonic-gate } 26137c478bd9Sstevel@tonic-gate } 26147c478bd9Sstevel@tonic-gate 26157c478bd9Sstevel@tonic-gate if (va->va_gid != rp->r_attr.va_gid) { 26167c478bd9Sstevel@tonic-gate /* 26177c478bd9Sstevel@tonic-gate * If the gid on the file isn't right, then 26187c478bd9Sstevel@tonic-gate * generate a SETATTR to attempt to change 26197c478bd9Sstevel@tonic-gate * it. This may or may not work, depending 26207c478bd9Sstevel@tonic-gate * upon the server's semantics for allowing 26217c478bd9Sstevel@tonic-gate * file ownership changes. 26227c478bd9Sstevel@tonic-gate */ 26237c478bd9Sstevel@tonic-gate va->va_mask = AT_GID; 26247c478bd9Sstevel@tonic-gate (void) nfs3setattr(vp, va, 0, cr); 26257c478bd9Sstevel@tonic-gate } 26267c478bd9Sstevel@tonic-gate 26277c478bd9Sstevel@tonic-gate /* 26287c478bd9Sstevel@tonic-gate * If vnode is a device create special vnode 26297c478bd9Sstevel@tonic-gate */ 26307c478bd9Sstevel@tonic-gate if (IS_DEVVP(vp)) { 26317c478bd9Sstevel@tonic-gate *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr); 26327c478bd9Sstevel@tonic-gate VN_RELE(vp); 26337c478bd9Sstevel@tonic-gate } else 26347c478bd9Sstevel@tonic-gate *vpp = vp; 26357c478bd9Sstevel@tonic-gate } else { 26367c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, t, cr); 26377c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 26387c478bd9Sstevel@tonic-gate } 26397c478bd9Sstevel@tonic-gate 26407c478bd9Sstevel@tonic-gate return (error); 26417c478bd9Sstevel@tonic-gate } 26427c478bd9Sstevel@tonic-gate 26437c478bd9Sstevel@tonic-gate /* 26447c478bd9Sstevel@tonic-gate * Special setattr function to take care of rest of atime/mtime 26457c478bd9Sstevel@tonic-gate * after successful exclusive create. This function exists to avoid 26467c478bd9Sstevel@tonic-gate * handling attributes from the server; exclusive the atime/mtime fields 26477c478bd9Sstevel@tonic-gate * may be 'invalid' in client's view and therefore can not be trusted. 26487c478bd9Sstevel@tonic-gate */ 26497c478bd9Sstevel@tonic-gate static int 26507c478bd9Sstevel@tonic-gate nfs3excl_create_settimes(vnode_t *vp, struct vattr *vap, cred_t *cr) 26517c478bd9Sstevel@tonic-gate { 26527c478bd9Sstevel@tonic-gate int error; 26537c478bd9Sstevel@tonic-gate uint_t mask; 26547c478bd9Sstevel@tonic-gate SETATTR3args args; 26557c478bd9Sstevel@tonic-gate SETATTR3res res; 26567c478bd9Sstevel@tonic-gate int douprintf; 26577c478bd9Sstevel@tonic-gate rnode_t *rp; 26587c478bd9Sstevel@tonic-gate hrtime_t t; 26597c478bd9Sstevel@tonic-gate 2660108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 26617c478bd9Sstevel@tonic-gate /* save the caller's mask so that it can be reset later */ 26627c478bd9Sstevel@tonic-gate mask = vap->va_mask; 26637c478bd9Sstevel@tonic-gate 26647c478bd9Sstevel@tonic-gate rp = VTOR(vp); 26657c478bd9Sstevel@tonic-gate 26667c478bd9Sstevel@tonic-gate args.object = *RTOFH3(rp); 26677c478bd9Sstevel@tonic-gate args.guard.check = FALSE; 26687c478bd9Sstevel@tonic-gate 26697c478bd9Sstevel@tonic-gate /* Use the mask to initialize the arguments */ 26707c478bd9Sstevel@tonic-gate vap->va_mask = 0; 26717c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(vap, &args.new_attributes); 26727c478bd9Sstevel@tonic-gate 26737c478bd9Sstevel@tonic-gate /* We want to set just atime/mtime on this request */ 26747c478bd9Sstevel@tonic-gate args.new_attributes.atime.set_it = SET_TO_SERVER_TIME; 26757c478bd9Sstevel@tonic-gate args.new_attributes.mtime.set_it = SET_TO_SERVER_TIME; 26767c478bd9Sstevel@tonic-gate 26777c478bd9Sstevel@tonic-gate douprintf = 1; 26787c478bd9Sstevel@tonic-gate 26797c478bd9Sstevel@tonic-gate t = gethrtime(); 26807c478bd9Sstevel@tonic-gate 26817c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_SETATTR, 26827c478bd9Sstevel@tonic-gate xdr_SETATTR3args, (caddr_t)&args, 26837c478bd9Sstevel@tonic-gate xdr_SETATTR3res, (caddr_t)&res, cr, 26847c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate if (error) { 26877c478bd9Sstevel@tonic-gate vap->va_mask = mask; 26887c478bd9Sstevel@tonic-gate return (error); 26897c478bd9Sstevel@tonic-gate } 26907c478bd9Sstevel@tonic-gate 26917c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 26927c478bd9Sstevel@tonic-gate if (!error) { 26937c478bd9Sstevel@tonic-gate /* 26947c478bd9Sstevel@tonic-gate * It is important to pick up the attributes. 26957c478bd9Sstevel@tonic-gate * Since this is the exclusive create path, the 26967c478bd9Sstevel@tonic-gate * attributes on the initial create were ignored 26977c478bd9Sstevel@tonic-gate * and we need these to have the correct info. 26987c478bd9Sstevel@tonic-gate */ 26997c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(vp, &res.resok.obj_wcc, t, cr); 27007c478bd9Sstevel@tonic-gate /* 27017c478bd9Sstevel@tonic-gate * No need to do the atime/mtime work again so clear 27027c478bd9Sstevel@tonic-gate * the bits. 27037c478bd9Sstevel@tonic-gate */ 27047c478bd9Sstevel@tonic-gate mask &= ~(AT_ATIME | AT_MTIME); 27057c478bd9Sstevel@tonic-gate } else { 27067c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(vp, &res.resfail.obj_wcc, t, cr); 27077c478bd9Sstevel@tonic-gate } 27087c478bd9Sstevel@tonic-gate 27097c478bd9Sstevel@tonic-gate vap->va_mask = mask; 27107c478bd9Sstevel@tonic-gate 27117c478bd9Sstevel@tonic-gate return (error); 27127c478bd9Sstevel@tonic-gate } 27137c478bd9Sstevel@tonic-gate 27147c478bd9Sstevel@tonic-gate /* ARGSUSED */ 27157c478bd9Sstevel@tonic-gate static int 27167c478bd9Sstevel@tonic-gate nfs3mknod(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive, 27177c478bd9Sstevel@tonic-gate int mode, vnode_t **vpp, cred_t *cr) 27187c478bd9Sstevel@tonic-gate { 27197c478bd9Sstevel@tonic-gate int error; 27207c478bd9Sstevel@tonic-gate MKNOD3args args; 27217c478bd9Sstevel@tonic-gate MKNOD3res res; 27227c478bd9Sstevel@tonic-gate int douprintf; 27237c478bd9Sstevel@tonic-gate vnode_t *vp; 27247c478bd9Sstevel@tonic-gate struct vattr vattr; 27257c478bd9Sstevel@tonic-gate hrtime_t t; 27267c478bd9Sstevel@tonic-gate 2727108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(dvp)->mi_zone); 27287c478bd9Sstevel@tonic-gate switch (va->va_type) { 27297c478bd9Sstevel@tonic-gate case VCHR: 27307c478bd9Sstevel@tonic-gate case VBLK: 27317c478bd9Sstevel@tonic-gate setdiropargs3(&args.where, nm, dvp); 27327c478bd9Sstevel@tonic-gate args.what.type = (va->va_type == VCHR) ? NF3CHR : NF3BLK; 27337c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(va, 27347c478bd9Sstevel@tonic-gate &args.what.mknoddata3_u.device.dev_attributes); 27357c478bd9Sstevel@tonic-gate if (error) { 27367c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 27377c478bd9Sstevel@tonic-gate return (error); 27387c478bd9Sstevel@tonic-gate } 27397c478bd9Sstevel@tonic-gate args.what.mknoddata3_u.device.spec.specdata1 = 27407c478bd9Sstevel@tonic-gate getmajor(va->va_rdev); 27417c478bd9Sstevel@tonic-gate args.what.mknoddata3_u.device.spec.specdata2 = 27427c478bd9Sstevel@tonic-gate getminor(va->va_rdev); 27437c478bd9Sstevel@tonic-gate break; 27447c478bd9Sstevel@tonic-gate 27457c478bd9Sstevel@tonic-gate case VFIFO: 27467c478bd9Sstevel@tonic-gate case VSOCK: 27477c478bd9Sstevel@tonic-gate setdiropargs3(&args.where, nm, dvp); 27487c478bd9Sstevel@tonic-gate args.what.type = (va->va_type == VFIFO) ? NF3FIFO : NF3SOCK; 27497c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(va, 27507c478bd9Sstevel@tonic-gate &args.what.mknoddata3_u.pipe_attributes); 27517c478bd9Sstevel@tonic-gate if (error) { 27527c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 27537c478bd9Sstevel@tonic-gate return (error); 27547c478bd9Sstevel@tonic-gate } 27557c478bd9Sstevel@tonic-gate break; 27567c478bd9Sstevel@tonic-gate 27577c478bd9Sstevel@tonic-gate default: 27587c478bd9Sstevel@tonic-gate return (EINVAL); 27597c478bd9Sstevel@tonic-gate } 27607c478bd9Sstevel@tonic-gate 27617c478bd9Sstevel@tonic-gate douprintf = 1; 27627c478bd9Sstevel@tonic-gate 27637c478bd9Sstevel@tonic-gate t = gethrtime(); 27647c478bd9Sstevel@tonic-gate 27657c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_MKNOD, 27667c478bd9Sstevel@tonic-gate xdr_MKNOD3args, (caddr_t)&args, 27677c478bd9Sstevel@tonic-gate xdr_MKNOD3res, (caddr_t)&res, cr, 27687c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 27697c478bd9Sstevel@tonic-gate 27707c478bd9Sstevel@tonic-gate if (error) { 27717c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 27727c478bd9Sstevel@tonic-gate return (error); 27737c478bd9Sstevel@tonic-gate } 27747c478bd9Sstevel@tonic-gate 27757c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 27767c478bd9Sstevel@tonic-gate if (!error) { 27777c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, cr); 27787c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(VTOR(dvp))) 27797c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 27807c478bd9Sstevel@tonic-gate 27817c478bd9Sstevel@tonic-gate if (!res.resok.obj.handle_follows) { 27827c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0); 27837c478bd9Sstevel@tonic-gate if (error) 27847c478bd9Sstevel@tonic-gate return (error); 27857c478bd9Sstevel@tonic-gate } else { 27867c478bd9Sstevel@tonic-gate if (res.resok.obj_attributes.attributes) { 27877c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, 27887c478bd9Sstevel@tonic-gate &res.resok.obj_attributes.attr, 27897c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 27907c478bd9Sstevel@tonic-gate } else { 27917c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, NULL, 27927c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 27937c478bd9Sstevel@tonic-gate if (vp->v_type == VNON) { 27947c478bd9Sstevel@tonic-gate vattr.va_mask = AT_TYPE; 27957c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &vattr, cr); 27967c478bd9Sstevel@tonic-gate if (error) { 27977c478bd9Sstevel@tonic-gate VN_RELE(vp); 27987c478bd9Sstevel@tonic-gate return (error); 27997c478bd9Sstevel@tonic-gate } 28007c478bd9Sstevel@tonic-gate vp->v_type = vattr.va_type; 28017c478bd9Sstevel@tonic-gate } 28027c478bd9Sstevel@tonic-gate 28037c478bd9Sstevel@tonic-gate } 28047c478bd9Sstevel@tonic-gate dnlc_update(dvp, nm, vp); 28057c478bd9Sstevel@tonic-gate } 28067c478bd9Sstevel@tonic-gate 28077c478bd9Sstevel@tonic-gate if (va->va_gid != VTOR(vp)->r_attr.va_gid) { 28087c478bd9Sstevel@tonic-gate va->va_mask = AT_GID; 28097c478bd9Sstevel@tonic-gate (void) nfs3setattr(vp, va, 0, cr); 28107c478bd9Sstevel@tonic-gate } 28117c478bd9Sstevel@tonic-gate 28127c478bd9Sstevel@tonic-gate /* 28137c478bd9Sstevel@tonic-gate * If vnode is a device create special vnode 28147c478bd9Sstevel@tonic-gate */ 28157c478bd9Sstevel@tonic-gate if (IS_DEVVP(vp)) { 28167c478bd9Sstevel@tonic-gate *vpp = specvp(vp, vp->v_rdev, vp->v_type, cr); 28177c478bd9Sstevel@tonic-gate VN_RELE(vp); 28187c478bd9Sstevel@tonic-gate } else 28197c478bd9Sstevel@tonic-gate *vpp = vp; 28207c478bd9Sstevel@tonic-gate } else { 28217c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, t, cr); 28227c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 28237c478bd9Sstevel@tonic-gate } 28247c478bd9Sstevel@tonic-gate return (error); 28257c478bd9Sstevel@tonic-gate } 28267c478bd9Sstevel@tonic-gate 28277c478bd9Sstevel@tonic-gate /* 28287c478bd9Sstevel@tonic-gate * Weirdness: if the vnode to be removed is open 28297c478bd9Sstevel@tonic-gate * we rename it instead of removing it and nfs_inactive 28307c478bd9Sstevel@tonic-gate * will remove the new name. 28317c478bd9Sstevel@tonic-gate */ 2832da6c28aaSamw /* ARGSUSED */ 28337c478bd9Sstevel@tonic-gate static int 2834da6c28aaSamw nfs3_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct, int flags) 28357c478bd9Sstevel@tonic-gate { 28367c478bd9Sstevel@tonic-gate int error; 28377c478bd9Sstevel@tonic-gate REMOVE3args args; 28387c478bd9Sstevel@tonic-gate REMOVE3res res; 28397c478bd9Sstevel@tonic-gate vnode_t *vp; 28407c478bd9Sstevel@tonic-gate char *tmpname; 28417c478bd9Sstevel@tonic-gate int douprintf; 28427c478bd9Sstevel@tonic-gate rnode_t *rp; 28437c478bd9Sstevel@tonic-gate rnode_t *drp; 28447c478bd9Sstevel@tonic-gate hrtime_t t; 28457c478bd9Sstevel@tonic-gate 2846108322fbScarlsonj if (nfs_zone() != VTOMI(dvp)->mi_zone) 28477c478bd9Sstevel@tonic-gate return (EPERM); 28487c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 28497c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp))) 28507c478bd9Sstevel@tonic-gate return (EINTR); 28517c478bd9Sstevel@tonic-gate 28527c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0); 28537c478bd9Sstevel@tonic-gate if (error) { 28547c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 28557c478bd9Sstevel@tonic-gate return (error); 28567c478bd9Sstevel@tonic-gate } 28577c478bd9Sstevel@tonic-gate 28587c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR && secpolicy_fs_linkdir(cr, dvp->v_vfsp)) { 28597c478bd9Sstevel@tonic-gate VN_RELE(vp); 28607c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 28617c478bd9Sstevel@tonic-gate return (EPERM); 28627c478bd9Sstevel@tonic-gate } 28637c478bd9Sstevel@tonic-gate 28647c478bd9Sstevel@tonic-gate /* 28657c478bd9Sstevel@tonic-gate * First just remove the entry from the name cache, as it 28667c478bd9Sstevel@tonic-gate * is most likely the only entry for this vp. 28677c478bd9Sstevel@tonic-gate */ 28687c478bd9Sstevel@tonic-gate dnlc_remove(dvp, nm); 28697c478bd9Sstevel@tonic-gate 28707c478bd9Sstevel@tonic-gate /* 28717c478bd9Sstevel@tonic-gate * If the file has a v_count > 1 then there may be more than one 28727c478bd9Sstevel@tonic-gate * entry in the name cache due multiple links or an open file, 28737c478bd9Sstevel@tonic-gate * but we don't have the real reference count so flush all 28747c478bd9Sstevel@tonic-gate * possible entries. 28757c478bd9Sstevel@tonic-gate */ 28767c478bd9Sstevel@tonic-gate if (vp->v_count > 1) 28777c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp); 28787c478bd9Sstevel@tonic-gate 28797c478bd9Sstevel@tonic-gate /* 28807c478bd9Sstevel@tonic-gate * Now we have the real reference count on the vnode 28817c478bd9Sstevel@tonic-gate */ 28827c478bd9Sstevel@tonic-gate rp = VTOR(vp); 28837c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 28847c478bd9Sstevel@tonic-gate if (vp->v_count > 1 && 28857c478bd9Sstevel@tonic-gate (rp->r_unldvp == NULL || strcmp(nm, rp->r_unlname) == 0)) { 28867c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 28877c478bd9Sstevel@tonic-gate tmpname = newname(); 2888da6c28aaSamw error = nfs3rename(dvp, nm, dvp, tmpname, cr, ct); 28897c478bd9Sstevel@tonic-gate if (error) 28907c478bd9Sstevel@tonic-gate kmem_free(tmpname, MAXNAMELEN); 28917c478bd9Sstevel@tonic-gate else { 28927c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 28937c478bd9Sstevel@tonic-gate if (rp->r_unldvp == NULL) { 28947c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 28957c478bd9Sstevel@tonic-gate rp->r_unldvp = dvp; 28967c478bd9Sstevel@tonic-gate if (rp->r_unlcred != NULL) 28977c478bd9Sstevel@tonic-gate crfree(rp->r_unlcred); 28987c478bd9Sstevel@tonic-gate crhold(cr); 28997c478bd9Sstevel@tonic-gate rp->r_unlcred = cr; 29007c478bd9Sstevel@tonic-gate rp->r_unlname = tmpname; 29017c478bd9Sstevel@tonic-gate } else { 29027c478bd9Sstevel@tonic-gate kmem_free(rp->r_unlname, MAXNAMELEN); 29037c478bd9Sstevel@tonic-gate rp->r_unlname = tmpname; 29047c478bd9Sstevel@tonic-gate } 29057c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 29067c478bd9Sstevel@tonic-gate } 29077c478bd9Sstevel@tonic-gate } else { 29087c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 29097c478bd9Sstevel@tonic-gate /* 29107c478bd9Sstevel@tonic-gate * We need to flush any dirty pages which happen to 29117c478bd9Sstevel@tonic-gate * be hanging around before removing the file. This 29127c478bd9Sstevel@tonic-gate * shouldn't happen very often and mostly on file 29137c478bd9Sstevel@tonic-gate * systems mounted "nocto". 29147c478bd9Sstevel@tonic-gate */ 29157c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && 29167c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_count > 0)) { 2917da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, 0, cr, ct); 29187c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) { 29197c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 29207c478bd9Sstevel@tonic-gate if (!rp->r_error) 29217c478bd9Sstevel@tonic-gate rp->r_error = error; 29227c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 29237c478bd9Sstevel@tonic-gate } 29247c478bd9Sstevel@tonic-gate } 29257c478bd9Sstevel@tonic-gate 29267c478bd9Sstevel@tonic-gate setdiropargs3(&args.object, nm, dvp); 29277c478bd9Sstevel@tonic-gate 29287c478bd9Sstevel@tonic-gate douprintf = 1; 29297c478bd9Sstevel@tonic-gate 29307c478bd9Sstevel@tonic-gate t = gethrtime(); 29317c478bd9Sstevel@tonic-gate 29327c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_REMOVE, 29337c478bd9Sstevel@tonic-gate xdr_diropargs3, (caddr_t)&args, 29347c478bd9Sstevel@tonic-gate xdr_REMOVE3res, (caddr_t)&res, cr, 29357c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 29367c478bd9Sstevel@tonic-gate 29377c478bd9Sstevel@tonic-gate /* 29387c478bd9Sstevel@tonic-gate * The xattr dir may be gone after last attr is removed, 29397c478bd9Sstevel@tonic-gate * so flush it from dnlc. 29407c478bd9Sstevel@tonic-gate */ 29417c478bd9Sstevel@tonic-gate if (dvp->v_flag & V_XATTRDIR) 29427c478bd9Sstevel@tonic-gate dnlc_purge_vp(dvp); 29437c478bd9Sstevel@tonic-gate 29447c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 29457c478bd9Sstevel@tonic-gate 29467c478bd9Sstevel@tonic-gate if (error) { 29477c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 29487c478bd9Sstevel@tonic-gate } else { 29497c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 29507c478bd9Sstevel@tonic-gate if (!error) { 29517c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, 29527c478bd9Sstevel@tonic-gate cr); 29537c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(drp)) 29547c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 29557c478bd9Sstevel@tonic-gate } else { 29567c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, 29577c478bd9Sstevel@tonic-gate t, cr); 29587c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 29597c478bd9Sstevel@tonic-gate } 29607c478bd9Sstevel@tonic-gate } 29617c478bd9Sstevel@tonic-gate } 29627c478bd9Sstevel@tonic-gate 2963df2381bfSpraks if (error == 0) { 2964da6c28aaSamw vnevent_remove(vp, dvp, nm, ct); 2965df2381bfSpraks } 29667c478bd9Sstevel@tonic-gate VN_RELE(vp); 29677c478bd9Sstevel@tonic-gate 29687c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 29697c478bd9Sstevel@tonic-gate 29707c478bd9Sstevel@tonic-gate return (error); 29717c478bd9Sstevel@tonic-gate } 29727c478bd9Sstevel@tonic-gate 2973da6c28aaSamw /* ARGSUSED */ 29747c478bd9Sstevel@tonic-gate static int 2975da6c28aaSamw nfs3_link(vnode_t *tdvp, vnode_t *svp, char *tnm, cred_t *cr, 2976da6c28aaSamw caller_context_t *ct, int flags) 29777c478bd9Sstevel@tonic-gate { 29787c478bd9Sstevel@tonic-gate int error; 29797c478bd9Sstevel@tonic-gate LINK3args args; 29807c478bd9Sstevel@tonic-gate LINK3res res; 29817c478bd9Sstevel@tonic-gate vnode_t *realvp; 29827c478bd9Sstevel@tonic-gate int douprintf; 29837c478bd9Sstevel@tonic-gate mntinfo_t *mi; 29847c478bd9Sstevel@tonic-gate rnode_t *tdrp; 29857c478bd9Sstevel@tonic-gate hrtime_t t; 29867c478bd9Sstevel@tonic-gate 2987108322fbScarlsonj if (nfs_zone() != VTOMI(tdvp)->mi_zone) 29887c478bd9Sstevel@tonic-gate return (EPERM); 2989da6c28aaSamw if (VOP_REALVP(svp, &realvp, ct) == 0) 29907c478bd9Sstevel@tonic-gate svp = realvp; 29917c478bd9Sstevel@tonic-gate 29927c478bd9Sstevel@tonic-gate mi = VTOMI(svp); 29937c478bd9Sstevel@tonic-gate 29947c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_LINK)) 29957c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 29967c478bd9Sstevel@tonic-gate 29977c478bd9Sstevel@tonic-gate args.file = *VTOFH3(svp); 29987c478bd9Sstevel@tonic-gate setdiropargs3(&args.link, tnm, tdvp); 29997c478bd9Sstevel@tonic-gate 30007c478bd9Sstevel@tonic-gate tdrp = VTOR(tdvp); 30017c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&tdrp->r_rwlock, RW_WRITER, INTR(tdvp))) 30027c478bd9Sstevel@tonic-gate return (EINTR); 30037c478bd9Sstevel@tonic-gate 30047c478bd9Sstevel@tonic-gate dnlc_remove(tdvp, tnm); 30057c478bd9Sstevel@tonic-gate 30067c478bd9Sstevel@tonic-gate douprintf = 1; 30077c478bd9Sstevel@tonic-gate 30087c478bd9Sstevel@tonic-gate t = gethrtime(); 30097c478bd9Sstevel@tonic-gate 30107c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_LINK, 30117c478bd9Sstevel@tonic-gate xdr_LINK3args, (caddr_t)&args, 30127c478bd9Sstevel@tonic-gate xdr_LINK3res, (caddr_t)&res, cr, 30137c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 30147c478bd9Sstevel@tonic-gate 30157c478bd9Sstevel@tonic-gate if (error) { 30167c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(tdvp); 30177c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(svp); 30187c478bd9Sstevel@tonic-gate nfs_rw_exit(&tdrp->r_rwlock); 30197c478bd9Sstevel@tonic-gate return (error); 30207c478bd9Sstevel@tonic-gate } 30217c478bd9Sstevel@tonic-gate 30227c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 30237c478bd9Sstevel@tonic-gate 30247c478bd9Sstevel@tonic-gate if (!error) { 30257c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(svp, &res.resok.file_attributes, t, cr); 30267c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(tdvp, &res.resok.linkdir_wcc, t, cr); 30277c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(tdrp)) 30287c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(tdvp); 30297c478bd9Sstevel@tonic-gate dnlc_update(tdvp, tnm, svp); 30307c478bd9Sstevel@tonic-gate } else { 30317c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(svp, &res.resfail.file_attributes, t, 30327c478bd9Sstevel@tonic-gate cr); 30337c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(tdvp, &res.resfail.linkdir_wcc, t, cr); 30347c478bd9Sstevel@tonic-gate if (error == EOPNOTSUPP) { 30357c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 30367c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_LINK; 30377c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 30387c478bd9Sstevel@tonic-gate } 30397c478bd9Sstevel@tonic-gate } 30407c478bd9Sstevel@tonic-gate 30417c478bd9Sstevel@tonic-gate nfs_rw_exit(&tdrp->r_rwlock); 30427c478bd9Sstevel@tonic-gate 3043df2381bfSpraks if (!error) { 3044df2381bfSpraks /* 3045df2381bfSpraks * Notify the source file of this link operation. 3046df2381bfSpraks */ 3047da6c28aaSamw vnevent_link(svp, ct); 3048df2381bfSpraks } 30497c478bd9Sstevel@tonic-gate return (error); 30507c478bd9Sstevel@tonic-gate } 30517c478bd9Sstevel@tonic-gate 3052da6c28aaSamw /* ARGSUSED */ 30537c478bd9Sstevel@tonic-gate static int 3054da6c28aaSamw nfs3_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, 3055da6c28aaSamw caller_context_t *ct, int flags) 30567c478bd9Sstevel@tonic-gate { 30577c478bd9Sstevel@tonic-gate vnode_t *realvp; 30587c478bd9Sstevel@tonic-gate 3059108322fbScarlsonj if (nfs_zone() != VTOMI(odvp)->mi_zone) 30607c478bd9Sstevel@tonic-gate return (EPERM); 3061da6c28aaSamw if (VOP_REALVP(ndvp, &realvp, ct) == 0) 30627c478bd9Sstevel@tonic-gate ndvp = realvp; 30637c478bd9Sstevel@tonic-gate 3064da6c28aaSamw return (nfs3rename(odvp, onm, ndvp, nnm, cr, ct)); 30657c478bd9Sstevel@tonic-gate } 30667c478bd9Sstevel@tonic-gate 30677c478bd9Sstevel@tonic-gate /* 30687c478bd9Sstevel@tonic-gate * nfs3rename does the real work of renaming in NFS Version 3. 30697c478bd9Sstevel@tonic-gate */ 30707c478bd9Sstevel@tonic-gate static int 3071da6c28aaSamw nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, 3072da6c28aaSamw caller_context_t *ct) 30737c478bd9Sstevel@tonic-gate { 30747c478bd9Sstevel@tonic-gate int error; 30757c478bd9Sstevel@tonic-gate RENAME3args args; 30767c478bd9Sstevel@tonic-gate RENAME3res res; 30777c478bd9Sstevel@tonic-gate int douprintf; 3078df2381bfSpraks vnode_t *nvp = NULL; 30797c478bd9Sstevel@tonic-gate vnode_t *ovp = NULL; 30807c478bd9Sstevel@tonic-gate char *tmpname; 30817c478bd9Sstevel@tonic-gate rnode_t *rp; 30827c478bd9Sstevel@tonic-gate rnode_t *odrp; 30837c478bd9Sstevel@tonic-gate rnode_t *ndrp; 30847c478bd9Sstevel@tonic-gate hrtime_t t; 30857c478bd9Sstevel@tonic-gate 3086108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(odvp)->mi_zone); 30877c478bd9Sstevel@tonic-gate 30887c478bd9Sstevel@tonic-gate if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 || 30897c478bd9Sstevel@tonic-gate strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) 30907c478bd9Sstevel@tonic-gate return (EINVAL); 30917c478bd9Sstevel@tonic-gate 30927c478bd9Sstevel@tonic-gate odrp = VTOR(odvp); 30937c478bd9Sstevel@tonic-gate ndrp = VTOR(ndvp); 30947c478bd9Sstevel@tonic-gate if ((intptr_t)odrp < (intptr_t)ndrp) { 30957c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&odrp->r_rwlock, RW_WRITER, INTR(odvp))) 30967c478bd9Sstevel@tonic-gate return (EINTR); 30977c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&ndrp->r_rwlock, RW_WRITER, INTR(ndvp))) { 30987c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 30997c478bd9Sstevel@tonic-gate return (EINTR); 31007c478bd9Sstevel@tonic-gate } 31017c478bd9Sstevel@tonic-gate } else { 31027c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&ndrp->r_rwlock, RW_WRITER, INTR(ndvp))) 31037c478bd9Sstevel@tonic-gate return (EINTR); 31047c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&odrp->r_rwlock, RW_WRITER, INTR(odvp))) { 31057c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 31067c478bd9Sstevel@tonic-gate return (EINTR); 31077c478bd9Sstevel@tonic-gate } 31087c478bd9Sstevel@tonic-gate } 31097c478bd9Sstevel@tonic-gate 31107c478bd9Sstevel@tonic-gate /* 31117c478bd9Sstevel@tonic-gate * Lookup the target file. If it exists, it needs to be 31127c478bd9Sstevel@tonic-gate * checked to see whether it is a mount point and whether 31137c478bd9Sstevel@tonic-gate * it is active (open). 31147c478bd9Sstevel@tonic-gate */ 31157c478bd9Sstevel@tonic-gate error = nfs3lookup(ndvp, nnm, &nvp, NULL, 0, NULL, cr, 0); 31167c478bd9Sstevel@tonic-gate if (!error) { 31177c478bd9Sstevel@tonic-gate /* 31187c478bd9Sstevel@tonic-gate * If this file has been mounted on, then just 31197c478bd9Sstevel@tonic-gate * return busy because renaming to it would remove 31207c478bd9Sstevel@tonic-gate * the mounted file system from the name space. 31217c478bd9Sstevel@tonic-gate */ 31227c478bd9Sstevel@tonic-gate if (vn_mountedvfs(nvp) != NULL) { 31237c478bd9Sstevel@tonic-gate VN_RELE(nvp); 31247c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 31257c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 31267c478bd9Sstevel@tonic-gate return (EBUSY); 31277c478bd9Sstevel@tonic-gate } 31287c478bd9Sstevel@tonic-gate 31297c478bd9Sstevel@tonic-gate /* 31307c478bd9Sstevel@tonic-gate * Purge the name cache of all references to this vnode 31317c478bd9Sstevel@tonic-gate * so that we can check the reference count to infer 31327c478bd9Sstevel@tonic-gate * whether it is active or not. 31337c478bd9Sstevel@tonic-gate */ 31347c478bd9Sstevel@tonic-gate /* 31357c478bd9Sstevel@tonic-gate * First just remove the entry from the name cache, as it 31367c478bd9Sstevel@tonic-gate * is most likely the only entry for this vp. 31377c478bd9Sstevel@tonic-gate */ 31387c478bd9Sstevel@tonic-gate dnlc_remove(ndvp, nnm); 31397c478bd9Sstevel@tonic-gate /* 31407c478bd9Sstevel@tonic-gate * If the file has a v_count > 1 then there may be more 31417c478bd9Sstevel@tonic-gate * than one entry in the name cache due multiple links 31427c478bd9Sstevel@tonic-gate * or an open file, but we don't have the real reference 31437c478bd9Sstevel@tonic-gate * count so flush all possible entries. 31447c478bd9Sstevel@tonic-gate */ 31457c478bd9Sstevel@tonic-gate if (nvp->v_count > 1) 31467c478bd9Sstevel@tonic-gate dnlc_purge_vp(nvp); 31477c478bd9Sstevel@tonic-gate 31487c478bd9Sstevel@tonic-gate /* 31497c478bd9Sstevel@tonic-gate * If the vnode is active and is not a directory, 31507c478bd9Sstevel@tonic-gate * arrange to rename it to a 31517c478bd9Sstevel@tonic-gate * temporary file so that it will continue to be 31527c478bd9Sstevel@tonic-gate * accessible. This implements the "unlink-open-file" 31537c478bd9Sstevel@tonic-gate * semantics for the target of a rename operation. 31547c478bd9Sstevel@tonic-gate * Before doing this though, make sure that the 31557c478bd9Sstevel@tonic-gate * source and target files are not already the same. 31567c478bd9Sstevel@tonic-gate */ 31577c478bd9Sstevel@tonic-gate if (nvp->v_count > 1 && nvp->v_type != VDIR) { 31587c478bd9Sstevel@tonic-gate /* 31597c478bd9Sstevel@tonic-gate * Lookup the source name. 31607c478bd9Sstevel@tonic-gate */ 31617c478bd9Sstevel@tonic-gate error = nfs3lookup(odvp, onm, &ovp, NULL, 0, NULL, 31627c478bd9Sstevel@tonic-gate cr, 0); 31637c478bd9Sstevel@tonic-gate 31647c478bd9Sstevel@tonic-gate /* 31657c478bd9Sstevel@tonic-gate * The source name *should* already exist. 31667c478bd9Sstevel@tonic-gate */ 31677c478bd9Sstevel@tonic-gate if (error) { 31687c478bd9Sstevel@tonic-gate VN_RELE(nvp); 31697c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 31707c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 31717c478bd9Sstevel@tonic-gate return (error); 31727c478bd9Sstevel@tonic-gate } 31737c478bd9Sstevel@tonic-gate 31747c478bd9Sstevel@tonic-gate /* 31757c478bd9Sstevel@tonic-gate * Compare the two vnodes. If they are the same, 31767c478bd9Sstevel@tonic-gate * just release all held vnodes and return success. 31777c478bd9Sstevel@tonic-gate */ 31787c478bd9Sstevel@tonic-gate if (ovp == nvp) { 31797c478bd9Sstevel@tonic-gate VN_RELE(ovp); 31807c478bd9Sstevel@tonic-gate VN_RELE(nvp); 31817c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 31827c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 31837c478bd9Sstevel@tonic-gate return (0); 31847c478bd9Sstevel@tonic-gate } 31857c478bd9Sstevel@tonic-gate 31867c478bd9Sstevel@tonic-gate /* 31877c478bd9Sstevel@tonic-gate * Can't mix and match directories and non- 31887c478bd9Sstevel@tonic-gate * directories in rename operations. We already 31897c478bd9Sstevel@tonic-gate * know that the target is not a directory. If 31907c478bd9Sstevel@tonic-gate * the source is a directory, return an error. 31917c478bd9Sstevel@tonic-gate */ 31927c478bd9Sstevel@tonic-gate if (ovp->v_type == VDIR) { 31937c478bd9Sstevel@tonic-gate VN_RELE(ovp); 31947c478bd9Sstevel@tonic-gate VN_RELE(nvp); 31957c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 31967c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 31977c478bd9Sstevel@tonic-gate return (ENOTDIR); 31987c478bd9Sstevel@tonic-gate } 31997c478bd9Sstevel@tonic-gate 32007c478bd9Sstevel@tonic-gate /* 32017c478bd9Sstevel@tonic-gate * The target file exists, is not the same as 32027c478bd9Sstevel@tonic-gate * the source file, and is active. Link it 32037c478bd9Sstevel@tonic-gate * to a temporary filename to avoid having 32047c478bd9Sstevel@tonic-gate * the server removing the file completely. 32057c478bd9Sstevel@tonic-gate */ 32067c478bd9Sstevel@tonic-gate tmpname = newname(); 3207da6c28aaSamw error = nfs3_link(ndvp, nvp, tmpname, cr, NULL, 0); 32087c478bd9Sstevel@tonic-gate if (error == EOPNOTSUPP) { 32097c478bd9Sstevel@tonic-gate error = nfs3_rename(ndvp, nnm, ndvp, tmpname, 3210da6c28aaSamw cr, NULL, 0); 32117c478bd9Sstevel@tonic-gate } 32127c478bd9Sstevel@tonic-gate if (error) { 32137c478bd9Sstevel@tonic-gate kmem_free(tmpname, MAXNAMELEN); 32147c478bd9Sstevel@tonic-gate VN_RELE(ovp); 32157c478bd9Sstevel@tonic-gate VN_RELE(nvp); 32167c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 32177c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 32187c478bd9Sstevel@tonic-gate return (error); 32197c478bd9Sstevel@tonic-gate } 32207c478bd9Sstevel@tonic-gate rp = VTOR(nvp); 32217c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 32227c478bd9Sstevel@tonic-gate if (rp->r_unldvp == NULL) { 32237c478bd9Sstevel@tonic-gate VN_HOLD(ndvp); 32247c478bd9Sstevel@tonic-gate rp->r_unldvp = ndvp; 32257c478bd9Sstevel@tonic-gate if (rp->r_unlcred != NULL) 32267c478bd9Sstevel@tonic-gate crfree(rp->r_unlcred); 32277c478bd9Sstevel@tonic-gate crhold(cr); 32287c478bd9Sstevel@tonic-gate rp->r_unlcred = cr; 32297c478bd9Sstevel@tonic-gate rp->r_unlname = tmpname; 32307c478bd9Sstevel@tonic-gate } else { 32317c478bd9Sstevel@tonic-gate kmem_free(rp->r_unlname, MAXNAMELEN); 32327c478bd9Sstevel@tonic-gate rp->r_unlname = tmpname; 32337c478bd9Sstevel@tonic-gate } 32347c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 32357c478bd9Sstevel@tonic-gate } 32367c478bd9Sstevel@tonic-gate } 32377c478bd9Sstevel@tonic-gate 32387c478bd9Sstevel@tonic-gate if (ovp == NULL) { 32397c478bd9Sstevel@tonic-gate /* 32407c478bd9Sstevel@tonic-gate * When renaming directories to be a subdirectory of a 32417c478bd9Sstevel@tonic-gate * different parent, the dnlc entry for ".." will no 32427c478bd9Sstevel@tonic-gate * longer be valid, so it must be removed. 32437c478bd9Sstevel@tonic-gate * 32447c478bd9Sstevel@tonic-gate * We do a lookup here to determine whether we are renaming 32457c478bd9Sstevel@tonic-gate * a directory and we need to check if we are renaming 32467c478bd9Sstevel@tonic-gate * an unlinked file. This might have already been done 32477c478bd9Sstevel@tonic-gate * in previous code, so we check ovp == NULL to avoid 32487c478bd9Sstevel@tonic-gate * doing it twice. 32497c478bd9Sstevel@tonic-gate */ 32507c478bd9Sstevel@tonic-gate 32517c478bd9Sstevel@tonic-gate error = nfs3lookup(odvp, onm, &ovp, NULL, 0, NULL, cr, 0); 32527c478bd9Sstevel@tonic-gate /* 32537c478bd9Sstevel@tonic-gate * The source name *should* already exist. 32547c478bd9Sstevel@tonic-gate */ 32557c478bd9Sstevel@tonic-gate if (error) { 32567c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 32577c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 3258df2381bfSpraks if (nvp) { 3259df2381bfSpraks VN_RELE(nvp); 3260df2381bfSpraks } 32617c478bd9Sstevel@tonic-gate return (error); 32627c478bd9Sstevel@tonic-gate } 32637c478bd9Sstevel@tonic-gate ASSERT(ovp != NULL); 32647c478bd9Sstevel@tonic-gate } 32657c478bd9Sstevel@tonic-gate 32667c478bd9Sstevel@tonic-gate dnlc_remove(odvp, onm); 32677c478bd9Sstevel@tonic-gate dnlc_remove(ndvp, nnm); 32687c478bd9Sstevel@tonic-gate 32697c478bd9Sstevel@tonic-gate setdiropargs3(&args.from, onm, odvp); 32707c478bd9Sstevel@tonic-gate setdiropargs3(&args.to, nnm, ndvp); 32717c478bd9Sstevel@tonic-gate 32727c478bd9Sstevel@tonic-gate douprintf = 1; 32737c478bd9Sstevel@tonic-gate 32747c478bd9Sstevel@tonic-gate t = gethrtime(); 32757c478bd9Sstevel@tonic-gate 32767c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(odvp), NFSPROC3_RENAME, 32777c478bd9Sstevel@tonic-gate xdr_RENAME3args, (caddr_t)&args, 32787c478bd9Sstevel@tonic-gate xdr_RENAME3res, (caddr_t)&res, cr, 32797c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 32807c478bd9Sstevel@tonic-gate 32817c478bd9Sstevel@tonic-gate if (error) { 32827c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(odvp); 32837c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(ndvp); 32847c478bd9Sstevel@tonic-gate VN_RELE(ovp); 32857c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 32867c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 3287df2381bfSpraks if (nvp) { 3288df2381bfSpraks VN_RELE(nvp); 3289df2381bfSpraks } 32907c478bd9Sstevel@tonic-gate return (error); 32917c478bd9Sstevel@tonic-gate } 32927c478bd9Sstevel@tonic-gate 32937c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 32947c478bd9Sstevel@tonic-gate 32957c478bd9Sstevel@tonic-gate if (!error) { 32967c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(odvp, &res.resok.fromdir_wcc, t, cr); 32977c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(odrp)) 32987c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(odvp); 32997c478bd9Sstevel@tonic-gate if (ndvp != odvp) { 33007c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(ndvp, &res.resok.todir_wcc, t, cr); 33017c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(ndrp)) 33027c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(ndvp); 33037c478bd9Sstevel@tonic-gate } 33047c478bd9Sstevel@tonic-gate /* 33057c478bd9Sstevel@tonic-gate * when renaming directories to be a subdirectory of a 33067c478bd9Sstevel@tonic-gate * different parent, the dnlc entry for ".." will no 33077c478bd9Sstevel@tonic-gate * longer be valid, so it must be removed 33087c478bd9Sstevel@tonic-gate */ 33097c478bd9Sstevel@tonic-gate rp = VTOR(ovp); 33107c478bd9Sstevel@tonic-gate if (ndvp != odvp) { 33117c478bd9Sstevel@tonic-gate if (ovp->v_type == VDIR) { 33127c478bd9Sstevel@tonic-gate dnlc_remove(ovp, ".."); 33137c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 33147c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(ovp); 33157c478bd9Sstevel@tonic-gate } 33167c478bd9Sstevel@tonic-gate } 33177c478bd9Sstevel@tonic-gate 33187c478bd9Sstevel@tonic-gate /* 33197c478bd9Sstevel@tonic-gate * If we are renaming the unlinked file, update the 33207c478bd9Sstevel@tonic-gate * r_unldvp and r_unlname as needed. 33217c478bd9Sstevel@tonic-gate */ 33227c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33237c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) { 33247c478bd9Sstevel@tonic-gate if (strcmp(rp->r_unlname, onm) == 0) { 33257c478bd9Sstevel@tonic-gate (void) strncpy(rp->r_unlname, nnm, MAXNAMELEN); 33267c478bd9Sstevel@tonic-gate rp->r_unlname[MAXNAMELEN - 1] = '\0'; 33277c478bd9Sstevel@tonic-gate 33287c478bd9Sstevel@tonic-gate if (ndvp != rp->r_unldvp) { 33297c478bd9Sstevel@tonic-gate VN_RELE(rp->r_unldvp); 33307c478bd9Sstevel@tonic-gate rp->r_unldvp = ndvp; 33317c478bd9Sstevel@tonic-gate VN_HOLD(ndvp); 33327c478bd9Sstevel@tonic-gate } 33337c478bd9Sstevel@tonic-gate } 33347c478bd9Sstevel@tonic-gate } 33357c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33367c478bd9Sstevel@tonic-gate } else { 33377c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(odvp, &res.resfail.fromdir_wcc, t, cr); 33387c478bd9Sstevel@tonic-gate if (ndvp != odvp) { 33397c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(ndvp, &res.resfail.todir_wcc, t, 33407c478bd9Sstevel@tonic-gate cr); 33417c478bd9Sstevel@tonic-gate } 33427c478bd9Sstevel@tonic-gate /* 33437c478bd9Sstevel@tonic-gate * System V defines rename to return EEXIST, not 33447c478bd9Sstevel@tonic-gate * ENOTEMPTY if the target directory is not empty. 33457c478bd9Sstevel@tonic-gate * Over the wire, the error is NFSERR_ENOTEMPTY 33467c478bd9Sstevel@tonic-gate * which geterrno maps to ENOTEMPTY. 33477c478bd9Sstevel@tonic-gate */ 33487c478bd9Sstevel@tonic-gate if (error == ENOTEMPTY) 33497c478bd9Sstevel@tonic-gate error = EEXIST; 33507c478bd9Sstevel@tonic-gate } 33517c478bd9Sstevel@tonic-gate 3352df2381bfSpraks if (error == 0) { 3353df2381bfSpraks if (nvp) 3354da6c28aaSamw vnevent_rename_dest(nvp, ndvp, nnm, ct); 3355df2381bfSpraks 3356df2381bfSpraks if (odvp != ndvp) 3357da6c28aaSamw vnevent_rename_dest_dir(ndvp, ct); 3358df2381bfSpraks ASSERT(ovp != NULL); 3359da6c28aaSamw vnevent_rename_src(ovp, odvp, onm, ct); 3360df2381bfSpraks } 3361df2381bfSpraks 3362df2381bfSpraks if (nvp) { 3363df2381bfSpraks VN_RELE(nvp); 3364df2381bfSpraks } 33657c478bd9Sstevel@tonic-gate VN_RELE(ovp); 33667c478bd9Sstevel@tonic-gate 33677c478bd9Sstevel@tonic-gate nfs_rw_exit(&odrp->r_rwlock); 33687c478bd9Sstevel@tonic-gate nfs_rw_exit(&ndrp->r_rwlock); 33697c478bd9Sstevel@tonic-gate 33707c478bd9Sstevel@tonic-gate return (error); 33717c478bd9Sstevel@tonic-gate } 33727c478bd9Sstevel@tonic-gate 3373da6c28aaSamw /* ARGSUSED */ 33747c478bd9Sstevel@tonic-gate static int 3375da6c28aaSamw nfs3_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp, cred_t *cr, 3376da6c28aaSamw caller_context_t *ct, int flags, vsecattr_t *vsecp) 33777c478bd9Sstevel@tonic-gate { 33787c478bd9Sstevel@tonic-gate int error; 33797c478bd9Sstevel@tonic-gate MKDIR3args args; 33807c478bd9Sstevel@tonic-gate MKDIR3res res; 33817c478bd9Sstevel@tonic-gate int douprintf; 33827c478bd9Sstevel@tonic-gate struct vattr vattr; 33837c478bd9Sstevel@tonic-gate vnode_t *vp; 33847c478bd9Sstevel@tonic-gate rnode_t *drp; 33857c478bd9Sstevel@tonic-gate hrtime_t t; 33867c478bd9Sstevel@tonic-gate 3387108322fbScarlsonj if (nfs_zone() != VTOMI(dvp)->mi_zone) 33887c478bd9Sstevel@tonic-gate return (EPERM); 33897c478bd9Sstevel@tonic-gate setdiropargs3(&args.where, nm, dvp); 33907c478bd9Sstevel@tonic-gate 33917c478bd9Sstevel@tonic-gate /* 33927c478bd9Sstevel@tonic-gate * Decide what the group-id and set-gid bit of the created directory 33937c478bd9Sstevel@tonic-gate * should be. May have to do a setattr to get the gid right. 33947c478bd9Sstevel@tonic-gate */ 33957c478bd9Sstevel@tonic-gate error = setdirgid(dvp, &va->va_gid, cr); 33967c478bd9Sstevel@tonic-gate if (error) 33977c478bd9Sstevel@tonic-gate return (error); 33987c478bd9Sstevel@tonic-gate error = setdirmode(dvp, &va->va_mode, cr); 33997c478bd9Sstevel@tonic-gate if (error) 34007c478bd9Sstevel@tonic-gate return (error); 34017c478bd9Sstevel@tonic-gate va->va_mask |= AT_MODE|AT_GID; 34027c478bd9Sstevel@tonic-gate 34037c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(va, &args.attributes); 34047c478bd9Sstevel@tonic-gate if (error) { 34057c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 34067c478bd9Sstevel@tonic-gate return (error); 34077c478bd9Sstevel@tonic-gate } 34087c478bd9Sstevel@tonic-gate 34097c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 34107c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp))) 34117c478bd9Sstevel@tonic-gate return (EINTR); 34127c478bd9Sstevel@tonic-gate 34137c478bd9Sstevel@tonic-gate dnlc_remove(dvp, nm); 34147c478bd9Sstevel@tonic-gate 34157c478bd9Sstevel@tonic-gate douprintf = 1; 34167c478bd9Sstevel@tonic-gate 34177c478bd9Sstevel@tonic-gate t = gethrtime(); 34187c478bd9Sstevel@tonic-gate 34197c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_MKDIR, 34207c478bd9Sstevel@tonic-gate xdr_MKDIR3args, (caddr_t)&args, 34217c478bd9Sstevel@tonic-gate xdr_MKDIR3res, (caddr_t)&res, cr, 34227c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 34237c478bd9Sstevel@tonic-gate 34247c478bd9Sstevel@tonic-gate if (error) { 34257c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 34267c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 34277c478bd9Sstevel@tonic-gate return (error); 34287c478bd9Sstevel@tonic-gate } 34297c478bd9Sstevel@tonic-gate 34307c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 34317c478bd9Sstevel@tonic-gate if (!error) { 34327c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, cr); 34337c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(drp)) 34347c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 34357c478bd9Sstevel@tonic-gate 34367c478bd9Sstevel@tonic-gate if (!res.resok.obj.handle_follows) { 34377c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0); 34387c478bd9Sstevel@tonic-gate if (error) { 34397c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 34407c478bd9Sstevel@tonic-gate return (error); 34417c478bd9Sstevel@tonic-gate } 34427c478bd9Sstevel@tonic-gate } else { 34437c478bd9Sstevel@tonic-gate if (res.resok.obj_attributes.attributes) { 34447c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, 34457c478bd9Sstevel@tonic-gate &res.resok.obj_attributes.attr, 34467c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 34477c478bd9Sstevel@tonic-gate } else { 34487c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, NULL, 34497c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 34507c478bd9Sstevel@tonic-gate if (vp->v_type == VNON) { 34517c478bd9Sstevel@tonic-gate vattr.va_mask = AT_TYPE; 34527c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &vattr, cr); 34537c478bd9Sstevel@tonic-gate if (error) { 34547c478bd9Sstevel@tonic-gate VN_RELE(vp); 34557c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 34567c478bd9Sstevel@tonic-gate return (error); 34577c478bd9Sstevel@tonic-gate } 34587c478bd9Sstevel@tonic-gate vp->v_type = vattr.va_type; 34597c478bd9Sstevel@tonic-gate } 34607c478bd9Sstevel@tonic-gate } 34617c478bd9Sstevel@tonic-gate dnlc_update(dvp, nm, vp); 34627c478bd9Sstevel@tonic-gate } 34637c478bd9Sstevel@tonic-gate if (va->va_gid != VTOR(vp)->r_attr.va_gid) { 34647c478bd9Sstevel@tonic-gate va->va_mask = AT_GID; 34657c478bd9Sstevel@tonic-gate (void) nfs3setattr(vp, va, 0, cr); 34667c478bd9Sstevel@tonic-gate } 34677c478bd9Sstevel@tonic-gate *vpp = vp; 34687c478bd9Sstevel@tonic-gate } else { 34697c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, t, cr); 34707c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 34717c478bd9Sstevel@tonic-gate } 34727c478bd9Sstevel@tonic-gate 34737c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 34747c478bd9Sstevel@tonic-gate 34757c478bd9Sstevel@tonic-gate return (error); 34767c478bd9Sstevel@tonic-gate } 34777c478bd9Sstevel@tonic-gate 3478da6c28aaSamw /* ARGSUSED */ 34797c478bd9Sstevel@tonic-gate static int 3480da6c28aaSamw nfs3_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr, 3481da6c28aaSamw caller_context_t *ct, int flags) 34827c478bd9Sstevel@tonic-gate { 34837c478bd9Sstevel@tonic-gate int error; 34847c478bd9Sstevel@tonic-gate RMDIR3args args; 34857c478bd9Sstevel@tonic-gate RMDIR3res res; 34867c478bd9Sstevel@tonic-gate vnode_t *vp; 34877c478bd9Sstevel@tonic-gate int douprintf; 34887c478bd9Sstevel@tonic-gate rnode_t *drp; 34897c478bd9Sstevel@tonic-gate hrtime_t t; 34907c478bd9Sstevel@tonic-gate 3491108322fbScarlsonj if (nfs_zone() != VTOMI(dvp)->mi_zone) 34927c478bd9Sstevel@tonic-gate return (EPERM); 34937c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 34947c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp))) 34957c478bd9Sstevel@tonic-gate return (EINTR); 34967c478bd9Sstevel@tonic-gate 34977c478bd9Sstevel@tonic-gate /* 34987c478bd9Sstevel@tonic-gate * Attempt to prevent a rmdir(".") from succeeding. 34997c478bd9Sstevel@tonic-gate */ 35007c478bd9Sstevel@tonic-gate error = nfs3lookup(dvp, nm, &vp, NULL, 0, NULL, cr, 0); 35017c478bd9Sstevel@tonic-gate if (error) { 35027c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 35037c478bd9Sstevel@tonic-gate return (error); 35047c478bd9Sstevel@tonic-gate } 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate if (vp == cdir) { 35077c478bd9Sstevel@tonic-gate VN_RELE(vp); 35087c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 35097c478bd9Sstevel@tonic-gate return (EINVAL); 35107c478bd9Sstevel@tonic-gate } 35117c478bd9Sstevel@tonic-gate 35127c478bd9Sstevel@tonic-gate setdiropargs3(&args.object, nm, dvp); 35137c478bd9Sstevel@tonic-gate 35147c478bd9Sstevel@tonic-gate /* 35157c478bd9Sstevel@tonic-gate * First just remove the entry from the name cache, as it 35167c478bd9Sstevel@tonic-gate * is most likely an entry for this vp. 35177c478bd9Sstevel@tonic-gate */ 35187c478bd9Sstevel@tonic-gate dnlc_remove(dvp, nm); 35197c478bd9Sstevel@tonic-gate 35207c478bd9Sstevel@tonic-gate /* 35217c478bd9Sstevel@tonic-gate * If there vnode reference count is greater than one, then 35227c478bd9Sstevel@tonic-gate * there may be additional references in the DNLC which will 35237c478bd9Sstevel@tonic-gate * need to be purged. First, trying removing the entry for 35247c478bd9Sstevel@tonic-gate * the parent directory and see if that removes the additional 35257c478bd9Sstevel@tonic-gate * reference(s). If that doesn't do it, then use dnlc_purge_vp 35267c478bd9Sstevel@tonic-gate * to completely remove any references to the directory which 35277c478bd9Sstevel@tonic-gate * might still exist in the DNLC. 35287c478bd9Sstevel@tonic-gate */ 35297c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 35307c478bd9Sstevel@tonic-gate dnlc_remove(vp, ".."); 35317c478bd9Sstevel@tonic-gate if (vp->v_count > 1) 35327c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp); 35337c478bd9Sstevel@tonic-gate } 35347c478bd9Sstevel@tonic-gate 35357c478bd9Sstevel@tonic-gate douprintf = 1; 35367c478bd9Sstevel@tonic-gate 35377c478bd9Sstevel@tonic-gate t = gethrtime(); 35387c478bd9Sstevel@tonic-gate 35397c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(dvp), NFSPROC3_RMDIR, 35407c478bd9Sstevel@tonic-gate xdr_diropargs3, (caddr_t)&args, 35417c478bd9Sstevel@tonic-gate xdr_RMDIR3res, (caddr_t)&res, cr, 35427c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 35437c478bd9Sstevel@tonic-gate 35447c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 35457c478bd9Sstevel@tonic-gate 35467c478bd9Sstevel@tonic-gate if (error) { 35477c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 35487c478bd9Sstevel@tonic-gate VN_RELE(vp); 35497c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 35507c478bd9Sstevel@tonic-gate return (error); 35517c478bd9Sstevel@tonic-gate } 35527c478bd9Sstevel@tonic-gate 35537c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 35547c478bd9Sstevel@tonic-gate if (!error) { 35557c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, cr); 35567c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(drp)) 35577c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 35587c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(VTOR(vp))) 35597c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(vp); 35607c478bd9Sstevel@tonic-gate } else { 35617c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, t, cr); 35627c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 35637c478bd9Sstevel@tonic-gate /* 35647c478bd9Sstevel@tonic-gate * System V defines rmdir to return EEXIST, not 35657c478bd9Sstevel@tonic-gate * ENOTEMPTY if the directory is not empty. Over 35667c478bd9Sstevel@tonic-gate * the wire, the error is NFSERR_ENOTEMPTY which 35677c478bd9Sstevel@tonic-gate * geterrno maps to ENOTEMPTY. 35687c478bd9Sstevel@tonic-gate */ 35697c478bd9Sstevel@tonic-gate if (error == ENOTEMPTY) 35707c478bd9Sstevel@tonic-gate error = EEXIST; 35717c478bd9Sstevel@tonic-gate } 35727c478bd9Sstevel@tonic-gate 3573df2381bfSpraks if (error == 0) { 3574da6c28aaSamw vnevent_rmdir(vp, dvp, nm, ct); 3575df2381bfSpraks } 35767c478bd9Sstevel@tonic-gate VN_RELE(vp); 35777c478bd9Sstevel@tonic-gate 35787c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 35797c478bd9Sstevel@tonic-gate 35807c478bd9Sstevel@tonic-gate return (error); 35817c478bd9Sstevel@tonic-gate } 35827c478bd9Sstevel@tonic-gate 3583da6c28aaSamw /* ARGSUSED */ 35847c478bd9Sstevel@tonic-gate static int 3585da6c28aaSamw nfs3_symlink(vnode_t *dvp, char *lnm, struct vattr *tva, char *tnm, cred_t *cr, 3586da6c28aaSamw caller_context_t *ct, int flags) 35877c478bd9Sstevel@tonic-gate { 35887c478bd9Sstevel@tonic-gate int error; 35897c478bd9Sstevel@tonic-gate SYMLINK3args args; 35907c478bd9Sstevel@tonic-gate SYMLINK3res res; 35917c478bd9Sstevel@tonic-gate int douprintf; 35927c478bd9Sstevel@tonic-gate mntinfo_t *mi; 35937c478bd9Sstevel@tonic-gate vnode_t *vp; 35947c478bd9Sstevel@tonic-gate rnode_t *rp; 35957c478bd9Sstevel@tonic-gate char *contents; 35967c478bd9Sstevel@tonic-gate rnode_t *drp; 35977c478bd9Sstevel@tonic-gate hrtime_t t; 35987c478bd9Sstevel@tonic-gate 35997c478bd9Sstevel@tonic-gate mi = VTOMI(dvp); 36007c478bd9Sstevel@tonic-gate 3601108322fbScarlsonj if (nfs_zone() != mi->mi_zone) 36027c478bd9Sstevel@tonic-gate return (EPERM); 36037c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SYMLINK)) 36047c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 36057c478bd9Sstevel@tonic-gate 36067c478bd9Sstevel@tonic-gate setdiropargs3(&args.where, lnm, dvp); 36077c478bd9Sstevel@tonic-gate error = vattr_to_sattr3(tva, &args.symlink.symlink_attributes); 36087c478bd9Sstevel@tonic-gate if (error) { 36097c478bd9Sstevel@tonic-gate /* req time field(s) overflow - return immediately */ 36107c478bd9Sstevel@tonic-gate return (error); 36117c478bd9Sstevel@tonic-gate } 36127c478bd9Sstevel@tonic-gate args.symlink.symlink_data = tnm; 36137c478bd9Sstevel@tonic-gate 36147c478bd9Sstevel@tonic-gate drp = VTOR(dvp); 36157c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&drp->r_rwlock, RW_WRITER, INTR(dvp))) 36167c478bd9Sstevel@tonic-gate return (EINTR); 36177c478bd9Sstevel@tonic-gate 36187c478bd9Sstevel@tonic-gate dnlc_remove(dvp, lnm); 36197c478bd9Sstevel@tonic-gate 36207c478bd9Sstevel@tonic-gate douprintf = 1; 36217c478bd9Sstevel@tonic-gate 36227c478bd9Sstevel@tonic-gate t = gethrtime(); 36237c478bd9Sstevel@tonic-gate 36247c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_SYMLINK, 36257c478bd9Sstevel@tonic-gate xdr_SYMLINK3args, (caddr_t)&args, 36267c478bd9Sstevel@tonic-gate xdr_SYMLINK3res, (caddr_t)&res, cr, 36277c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 36287c478bd9Sstevel@tonic-gate 36297c478bd9Sstevel@tonic-gate if (error) { 36307c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(dvp); 36317c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 36327c478bd9Sstevel@tonic-gate return (error); 36337c478bd9Sstevel@tonic-gate } 36347c478bd9Sstevel@tonic-gate 36357c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 36367c478bd9Sstevel@tonic-gate if (!error) { 36377c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resok.dir_wcc, t, cr); 36387c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(drp)) 36397c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(dvp); 36407c478bd9Sstevel@tonic-gate 36417c478bd9Sstevel@tonic-gate if (res.resok.obj.handle_follows) { 36427c478bd9Sstevel@tonic-gate if (res.resok.obj_attributes.attributes) { 36437c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, 36447c478bd9Sstevel@tonic-gate &res.resok.obj_attributes.attr, 36457c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 36467c478bd9Sstevel@tonic-gate } else { 36477c478bd9Sstevel@tonic-gate vp = makenfs3node(&res.resok.obj.handle, NULL, 36487c478bd9Sstevel@tonic-gate dvp->v_vfsp, t, cr, NULL, NULL); 36497c478bd9Sstevel@tonic-gate vp->v_type = VLNK; 36507c478bd9Sstevel@tonic-gate vp->v_rdev = 0; 36517c478bd9Sstevel@tonic-gate } 36527c478bd9Sstevel@tonic-gate dnlc_update(dvp, lnm, vp); 36537c478bd9Sstevel@tonic-gate rp = VTOR(vp); 36547c478bd9Sstevel@tonic-gate if (nfs3_do_symlink_cache && 36557c478bd9Sstevel@tonic-gate rp->r_symlink.contents == NULL) { 36567c478bd9Sstevel@tonic-gate 36577c478bd9Sstevel@tonic-gate contents = kmem_alloc(MAXPATHLEN, 36587c478bd9Sstevel@tonic-gate KM_NOSLEEP); 36597c478bd9Sstevel@tonic-gate 36607c478bd9Sstevel@tonic-gate if (contents != NULL) { 36617c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 36627c478bd9Sstevel@tonic-gate if (rp->r_symlink.contents == NULL) { 36637c478bd9Sstevel@tonic-gate rp->r_symlink.len = strlen(tnm); 36647c478bd9Sstevel@tonic-gate bcopy(tnm, contents, 36657c478bd9Sstevel@tonic-gate rp->r_symlink.len); 36667c478bd9Sstevel@tonic-gate rp->r_symlink.contents = 36677c478bd9Sstevel@tonic-gate contents; 36687c478bd9Sstevel@tonic-gate rp->r_symlink.size = MAXPATHLEN; 36697c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 36707c478bd9Sstevel@tonic-gate } else { 36717c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 36727c478bd9Sstevel@tonic-gate kmem_free((void *)contents, 36737c478bd9Sstevel@tonic-gate MAXPATHLEN); 36747c478bd9Sstevel@tonic-gate } 36757c478bd9Sstevel@tonic-gate } 36767c478bd9Sstevel@tonic-gate } 36777c478bd9Sstevel@tonic-gate VN_RELE(vp); 36787c478bd9Sstevel@tonic-gate } 36797c478bd9Sstevel@tonic-gate } else { 36807c478bd9Sstevel@tonic-gate nfs3_cache_wcc_data(dvp, &res.resfail.dir_wcc, t, cr); 36817c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, dvp, cr); 36827c478bd9Sstevel@tonic-gate if (error == EOPNOTSUPP) { 36837c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 36847c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_SYMLINK; 36857c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 36867c478bd9Sstevel@tonic-gate } 36877c478bd9Sstevel@tonic-gate } 36887c478bd9Sstevel@tonic-gate 36897c478bd9Sstevel@tonic-gate nfs_rw_exit(&drp->r_rwlock); 36907c478bd9Sstevel@tonic-gate 36917c478bd9Sstevel@tonic-gate return (error); 36927c478bd9Sstevel@tonic-gate } 36937c478bd9Sstevel@tonic-gate 36947c478bd9Sstevel@tonic-gate #ifdef DEBUG 36957c478bd9Sstevel@tonic-gate static int nfs3_readdir_cache_hits = 0; 36967c478bd9Sstevel@tonic-gate static int nfs3_readdir_cache_shorts = 0; 36977c478bd9Sstevel@tonic-gate static int nfs3_readdir_cache_waits = 0; 36987c478bd9Sstevel@tonic-gate static int nfs3_readdir_cache_misses = 0; 36997c478bd9Sstevel@tonic-gate static int nfs3_readdir_readahead = 0; 37007c478bd9Sstevel@tonic-gate #endif 37017c478bd9Sstevel@tonic-gate 37027c478bd9Sstevel@tonic-gate static int nfs3_shrinkreaddir = 0; 37037c478bd9Sstevel@tonic-gate 37047c478bd9Sstevel@tonic-gate /* 37057c478bd9Sstevel@tonic-gate * Read directory entries. 37067c478bd9Sstevel@tonic-gate * There are some weird things to look out for here. The uio_loffset 37077c478bd9Sstevel@tonic-gate * field is either 0 or it is the offset returned from a previous 37087c478bd9Sstevel@tonic-gate * readdir. It is an opaque value used by the server to find the 37097c478bd9Sstevel@tonic-gate * correct directory block to read. The count field is the number 37107c478bd9Sstevel@tonic-gate * of blocks to read on the server. This is advisory only, the server 37117c478bd9Sstevel@tonic-gate * may return only one block's worth of entries. Entries may be compressed 37127c478bd9Sstevel@tonic-gate * on the server. 37137c478bd9Sstevel@tonic-gate */ 3714da6c28aaSamw /* ARGSUSED */ 37157c478bd9Sstevel@tonic-gate static int 3716da6c28aaSamw nfs3_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp, 3717da6c28aaSamw caller_context_t *ct, int flags) 37187c478bd9Sstevel@tonic-gate { 37197c478bd9Sstevel@tonic-gate int error; 37207c478bd9Sstevel@tonic-gate size_t count; 37217c478bd9Sstevel@tonic-gate rnode_t *rp; 37227c478bd9Sstevel@tonic-gate rddir_cache *rdc; 37237c478bd9Sstevel@tonic-gate rddir_cache *nrdc; 37247c478bd9Sstevel@tonic-gate rddir_cache *rrdc; 37257c478bd9Sstevel@tonic-gate #ifdef DEBUG 37267c478bd9Sstevel@tonic-gate int missed; 37277c478bd9Sstevel@tonic-gate #endif 37287c478bd9Sstevel@tonic-gate int doreadahead; 37297c478bd9Sstevel@tonic-gate rddir_cache srdc; 37307c478bd9Sstevel@tonic-gate avl_index_t where; 37317c478bd9Sstevel@tonic-gate 3732108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 37337c478bd9Sstevel@tonic-gate return (EIO); 37347c478bd9Sstevel@tonic-gate rp = VTOR(vp); 37357c478bd9Sstevel@tonic-gate 37367c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER)); 37377c478bd9Sstevel@tonic-gate 37387c478bd9Sstevel@tonic-gate /* 37397c478bd9Sstevel@tonic-gate * Make sure that the directory cache is valid. 37407c478bd9Sstevel@tonic-gate */ 37417c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) { 37427c478bd9Sstevel@tonic-gate if (nfs_disable_rddir_cache) { 37437c478bd9Sstevel@tonic-gate /* 37447c478bd9Sstevel@tonic-gate * Setting nfs_disable_rddir_cache in /etc/system 37457c478bd9Sstevel@tonic-gate * allows interoperability with servers that do not 37467c478bd9Sstevel@tonic-gate * properly update the attributes of directories. 37477c478bd9Sstevel@tonic-gate * Any cached information gets purged before an 37487c478bd9Sstevel@tonic-gate * access is made to it. 37497c478bd9Sstevel@tonic-gate */ 37507c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(vp); 37517c478bd9Sstevel@tonic-gate } else { 37527c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(vp, cr); 37537c478bd9Sstevel@tonic-gate if (error) 37547c478bd9Sstevel@tonic-gate return (error); 37557c478bd9Sstevel@tonic-gate } 37567c478bd9Sstevel@tonic-gate } 37577c478bd9Sstevel@tonic-gate 37587c478bd9Sstevel@tonic-gate /* 37597c478bd9Sstevel@tonic-gate * It is possible that some servers may not be able to correctly 37607c478bd9Sstevel@tonic-gate * handle a large READDIR or READDIRPLUS request due to bugs in 37617c478bd9Sstevel@tonic-gate * their implementation. In order to continue to interoperate 37627c478bd9Sstevel@tonic-gate * with them, this workaround is provided to limit the maximum 37637c478bd9Sstevel@tonic-gate * size of a READDIRPLUS request to 1024. In any case, the request 37647c478bd9Sstevel@tonic-gate * size is limited to MAXBSIZE. 37657c478bd9Sstevel@tonic-gate */ 37667c478bd9Sstevel@tonic-gate count = MIN(uiop->uio_iov->iov_len, 37677c478bd9Sstevel@tonic-gate nfs3_shrinkreaddir ? 1024 : MAXBSIZE); 37687c478bd9Sstevel@tonic-gate 37697c478bd9Sstevel@tonic-gate nrdc = NULL; 37707c478bd9Sstevel@tonic-gate #ifdef DEBUG 37717c478bd9Sstevel@tonic-gate missed = 0; 37727c478bd9Sstevel@tonic-gate #endif 37737c478bd9Sstevel@tonic-gate top: 37747c478bd9Sstevel@tonic-gate /* 37757c478bd9Sstevel@tonic-gate * Short circuit last readdir which always returns 0 bytes. 37767c478bd9Sstevel@tonic-gate * This can be done after the directory has been read through 37777c478bd9Sstevel@tonic-gate * completely at least once. This will set r_direof which 37787c478bd9Sstevel@tonic-gate * can be used to find the value of the last cookie. 37797c478bd9Sstevel@tonic-gate */ 37807c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 37817c478bd9Sstevel@tonic-gate if (rp->r_direof != NULL && 37827c478bd9Sstevel@tonic-gate uiop->uio_loffset == rp->r_direof->nfs3_ncookie) { 37837c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 37847c478bd9Sstevel@tonic-gate #ifdef DEBUG 37857c478bd9Sstevel@tonic-gate nfs3_readdir_cache_shorts++; 37867c478bd9Sstevel@tonic-gate #endif 37877c478bd9Sstevel@tonic-gate if (eofp) 37887c478bd9Sstevel@tonic-gate *eofp = 1; 37897c478bd9Sstevel@tonic-gate if (nrdc != NULL) 37907c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 37917c478bd9Sstevel@tonic-gate return (0); 37927c478bd9Sstevel@tonic-gate } 37937c478bd9Sstevel@tonic-gate /* 37947c478bd9Sstevel@tonic-gate * Look for a cache entry. Cache entries are identified 37957c478bd9Sstevel@tonic-gate * by the NFS cookie value and the byte count requested. 37967c478bd9Sstevel@tonic-gate */ 37977c478bd9Sstevel@tonic-gate srdc.nfs3_cookie = uiop->uio_loffset; 37987c478bd9Sstevel@tonic-gate srdc.buflen = count; 37997c478bd9Sstevel@tonic-gate rdc = avl_find(&rp->r_dir, &srdc, &where); 38007c478bd9Sstevel@tonic-gate if (rdc != NULL) { 38017c478bd9Sstevel@tonic-gate rddir_cache_hold(rdc); 38027c478bd9Sstevel@tonic-gate /* 38037c478bd9Sstevel@tonic-gate * If the cache entry is in the process of being 38047c478bd9Sstevel@tonic-gate * filled in, wait until this completes. The 38057c478bd9Sstevel@tonic-gate * RDDIRWAIT bit is set to indicate that someone 38067c478bd9Sstevel@tonic-gate * is waiting and then the thread currently 38077c478bd9Sstevel@tonic-gate * filling the entry is done, it should do a 38087c478bd9Sstevel@tonic-gate * cv_broadcast to wakeup all of the threads 38097c478bd9Sstevel@tonic-gate * waiting for it to finish. 38107c478bd9Sstevel@tonic-gate */ 38117c478bd9Sstevel@tonic-gate if (rdc->flags & RDDIR) { 38127c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_rwlock); 38137c478bd9Sstevel@tonic-gate rdc->flags |= RDDIRWAIT; 38147c478bd9Sstevel@tonic-gate #ifdef DEBUG 38157c478bd9Sstevel@tonic-gate nfs3_readdir_cache_waits++; 38167c478bd9Sstevel@tonic-gate #endif 38177c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) { 38187c478bd9Sstevel@tonic-gate /* 38197c478bd9Sstevel@tonic-gate * We got interrupted, probably 38207c478bd9Sstevel@tonic-gate * the user typed ^C or an alarm 38217c478bd9Sstevel@tonic-gate * fired. We free the new entry 38227c478bd9Sstevel@tonic-gate * if we allocated one. 38237c478bd9Sstevel@tonic-gate */ 38247c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 38257c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&rp->r_rwlock, 38267c478bd9Sstevel@tonic-gate RW_READER, FALSE); 38277c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 38287c478bd9Sstevel@tonic-gate if (nrdc != NULL) 38297c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 38307c478bd9Sstevel@tonic-gate return (EINTR); 38317c478bd9Sstevel@tonic-gate } 38327c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 38337c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&rp->r_rwlock, 38347c478bd9Sstevel@tonic-gate RW_READER, FALSE); 38357c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 38367c478bd9Sstevel@tonic-gate goto top; 38377c478bd9Sstevel@tonic-gate } 38387c478bd9Sstevel@tonic-gate /* 38397c478bd9Sstevel@tonic-gate * Check to see if a readdir is required to 38407c478bd9Sstevel@tonic-gate * fill the entry. If so, mark this entry 38417c478bd9Sstevel@tonic-gate * as being filled, remove our reference, 38427c478bd9Sstevel@tonic-gate * and branch to the code to fill the entry. 38437c478bd9Sstevel@tonic-gate */ 38447c478bd9Sstevel@tonic-gate if (rdc->flags & RDDIRREQ) { 38457c478bd9Sstevel@tonic-gate rdc->flags &= ~RDDIRREQ; 38467c478bd9Sstevel@tonic-gate rdc->flags |= RDDIR; 38477c478bd9Sstevel@tonic-gate if (nrdc != NULL) 38487c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 38497c478bd9Sstevel@tonic-gate nrdc = rdc; 38507c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 38517c478bd9Sstevel@tonic-gate goto bottom; 38527c478bd9Sstevel@tonic-gate } 38537c478bd9Sstevel@tonic-gate #ifdef DEBUG 38547c478bd9Sstevel@tonic-gate if (!missed) 38557c478bd9Sstevel@tonic-gate nfs3_readdir_cache_hits++; 38567c478bd9Sstevel@tonic-gate #endif 38577c478bd9Sstevel@tonic-gate /* 38587c478bd9Sstevel@tonic-gate * If an error occurred while attempting 38597c478bd9Sstevel@tonic-gate * to fill the cache entry, just return it. 38607c478bd9Sstevel@tonic-gate */ 38617c478bd9Sstevel@tonic-gate if (rdc->error) { 38627c478bd9Sstevel@tonic-gate error = rdc->error; 38637c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 38647c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 38657c478bd9Sstevel@tonic-gate if (nrdc != NULL) 38667c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 38677c478bd9Sstevel@tonic-gate return (error); 38687c478bd9Sstevel@tonic-gate } 38697c478bd9Sstevel@tonic-gate 38707c478bd9Sstevel@tonic-gate /* 38717c478bd9Sstevel@tonic-gate * The cache entry is complete and good, 38727c478bd9Sstevel@tonic-gate * copyout the dirent structs to the calling 38737c478bd9Sstevel@tonic-gate * thread. 38747c478bd9Sstevel@tonic-gate */ 38757c478bd9Sstevel@tonic-gate error = uiomove(rdc->entries, rdc->entlen, UIO_READ, uiop); 38767c478bd9Sstevel@tonic-gate 38777c478bd9Sstevel@tonic-gate /* 38787c478bd9Sstevel@tonic-gate * If no error occurred during the copyout, 38797c478bd9Sstevel@tonic-gate * update the offset in the uio struct to 38807c478bd9Sstevel@tonic-gate * contain the value of the next cookie 38817c478bd9Sstevel@tonic-gate * and set the eof value appropriately. 38827c478bd9Sstevel@tonic-gate */ 38837c478bd9Sstevel@tonic-gate if (!error) { 38847c478bd9Sstevel@tonic-gate uiop->uio_loffset = rdc->nfs3_ncookie; 38857c478bd9Sstevel@tonic-gate if (eofp) 38867c478bd9Sstevel@tonic-gate *eofp = rdc->eof; 38877c478bd9Sstevel@tonic-gate } 38887c478bd9Sstevel@tonic-gate 38897c478bd9Sstevel@tonic-gate /* 38907c478bd9Sstevel@tonic-gate * Decide whether to do readahead. 38917c478bd9Sstevel@tonic-gate * 38927c478bd9Sstevel@tonic-gate * Don't if have already read to the end of 38937c478bd9Sstevel@tonic-gate * directory. There is nothing more to read. 38947c478bd9Sstevel@tonic-gate * 38957c478bd9Sstevel@tonic-gate * Don't if the application is not doing 38967c478bd9Sstevel@tonic-gate * lookups in the directory. The readahead 38977c478bd9Sstevel@tonic-gate * is only effective if the application can 38987c478bd9Sstevel@tonic-gate * be doing work while an async thread is 38997c478bd9Sstevel@tonic-gate * handling the over the wire request. 39007c478bd9Sstevel@tonic-gate */ 39017c478bd9Sstevel@tonic-gate if (rdc->eof) { 39027c478bd9Sstevel@tonic-gate rp->r_direof = rdc; 39037c478bd9Sstevel@tonic-gate doreadahead = FALSE; 39047c478bd9Sstevel@tonic-gate } else if (!(rp->r_flags & RLOOKUP)) 39057c478bd9Sstevel@tonic-gate doreadahead = FALSE; 39067c478bd9Sstevel@tonic-gate else 39077c478bd9Sstevel@tonic-gate doreadahead = TRUE; 39087c478bd9Sstevel@tonic-gate 39097c478bd9Sstevel@tonic-gate if (!doreadahead) { 39107c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39117c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 39127c478bd9Sstevel@tonic-gate if (nrdc != NULL) 39137c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 39147c478bd9Sstevel@tonic-gate return (error); 39157c478bd9Sstevel@tonic-gate } 39167c478bd9Sstevel@tonic-gate 39177c478bd9Sstevel@tonic-gate /* 39187c478bd9Sstevel@tonic-gate * Check to see whether we found an entry 39197c478bd9Sstevel@tonic-gate * for the readahead. If so, we don't need 39207c478bd9Sstevel@tonic-gate * to do anything further, so free the new 39217c478bd9Sstevel@tonic-gate * entry if one was allocated. Otherwise, 39227c478bd9Sstevel@tonic-gate * allocate a new entry, add it to the cache, 39237c478bd9Sstevel@tonic-gate * and then initiate an asynchronous readdir 39247c478bd9Sstevel@tonic-gate * operation to fill it. 39257c478bd9Sstevel@tonic-gate */ 39267c478bd9Sstevel@tonic-gate srdc.nfs3_cookie = rdc->nfs3_ncookie; 39277c478bd9Sstevel@tonic-gate srdc.buflen = count; 39287c478bd9Sstevel@tonic-gate rrdc = avl_find(&rp->r_dir, &srdc, &where); 39297c478bd9Sstevel@tonic-gate if (rrdc != NULL) { 39307c478bd9Sstevel@tonic-gate if (nrdc != NULL) 39317c478bd9Sstevel@tonic-gate rddir_cache_rele(nrdc); 39327c478bd9Sstevel@tonic-gate } else { 39337c478bd9Sstevel@tonic-gate if (nrdc != NULL) 39347c478bd9Sstevel@tonic-gate rrdc = nrdc; 39357c478bd9Sstevel@tonic-gate else { 39367c478bd9Sstevel@tonic-gate rrdc = rddir_cache_alloc(KM_NOSLEEP); 39377c478bd9Sstevel@tonic-gate } 39387c478bd9Sstevel@tonic-gate if (rrdc != NULL) { 39397c478bd9Sstevel@tonic-gate rrdc->nfs3_cookie = rdc->nfs3_ncookie; 39407c478bd9Sstevel@tonic-gate rrdc->buflen = count; 39417c478bd9Sstevel@tonic-gate avl_insert(&rp->r_dir, rrdc, where); 39427c478bd9Sstevel@tonic-gate rddir_cache_hold(rrdc); 39437c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39447c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 39457c478bd9Sstevel@tonic-gate #ifdef DEBUG 39467c478bd9Sstevel@tonic-gate nfs3_readdir_readahead++; 39477c478bd9Sstevel@tonic-gate #endif 39487c478bd9Sstevel@tonic-gate nfs_async_readdir(vp, rrdc, cr, do_nfs3readdir); 39497c478bd9Sstevel@tonic-gate return (error); 39507c478bd9Sstevel@tonic-gate } 39517c478bd9Sstevel@tonic-gate } 39527c478bd9Sstevel@tonic-gate 39537c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39547c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 39557c478bd9Sstevel@tonic-gate return (error); 39567c478bd9Sstevel@tonic-gate } 39577c478bd9Sstevel@tonic-gate 39587c478bd9Sstevel@tonic-gate /* 39597c478bd9Sstevel@tonic-gate * Didn't find an entry in the cache. Construct a new empty 39607c478bd9Sstevel@tonic-gate * entry and link it into the cache. Other processes attempting 39617c478bd9Sstevel@tonic-gate * to access this entry will need to wait until it is filled in. 39627c478bd9Sstevel@tonic-gate * 39637c478bd9Sstevel@tonic-gate * Since kmem_alloc may block, another pass through the cache 39647c478bd9Sstevel@tonic-gate * will need to be taken to make sure that another process 39657c478bd9Sstevel@tonic-gate * hasn't already added an entry to the cache for this request. 39667c478bd9Sstevel@tonic-gate */ 39677c478bd9Sstevel@tonic-gate if (nrdc == NULL) { 39687c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39697c478bd9Sstevel@tonic-gate nrdc = rddir_cache_alloc(KM_SLEEP); 39707c478bd9Sstevel@tonic-gate nrdc->nfs3_cookie = uiop->uio_loffset; 39717c478bd9Sstevel@tonic-gate nrdc->buflen = count; 39727c478bd9Sstevel@tonic-gate goto top; 39737c478bd9Sstevel@tonic-gate } 39747c478bd9Sstevel@tonic-gate 39757c478bd9Sstevel@tonic-gate /* 39767c478bd9Sstevel@tonic-gate * Add this entry to the cache. 39777c478bd9Sstevel@tonic-gate */ 39787c478bd9Sstevel@tonic-gate avl_insert(&rp->r_dir, nrdc, where); 39797c478bd9Sstevel@tonic-gate rddir_cache_hold(nrdc); 39807c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39817c478bd9Sstevel@tonic-gate 39827c478bd9Sstevel@tonic-gate bottom: 39837c478bd9Sstevel@tonic-gate #ifdef DEBUG 39847c478bd9Sstevel@tonic-gate missed = 1; 39857c478bd9Sstevel@tonic-gate nfs3_readdir_cache_misses++; 39867c478bd9Sstevel@tonic-gate #endif 39877c478bd9Sstevel@tonic-gate /* 39887c478bd9Sstevel@tonic-gate * Do the readdir. This routine decides whether to use 39897c478bd9Sstevel@tonic-gate * READDIR or READDIRPLUS. 39907c478bd9Sstevel@tonic-gate */ 39917c478bd9Sstevel@tonic-gate error = do_nfs3readdir(vp, nrdc, cr); 39927c478bd9Sstevel@tonic-gate 39937c478bd9Sstevel@tonic-gate /* 39947c478bd9Sstevel@tonic-gate * If this operation failed, just return the error which occurred. 39957c478bd9Sstevel@tonic-gate */ 39967c478bd9Sstevel@tonic-gate if (error != 0) 39977c478bd9Sstevel@tonic-gate return (error); 39987c478bd9Sstevel@tonic-gate 39997c478bd9Sstevel@tonic-gate /* 40007c478bd9Sstevel@tonic-gate * Since the RPC operation will have taken sometime and blocked 40017c478bd9Sstevel@tonic-gate * this process, another pass through the cache will need to be 40027c478bd9Sstevel@tonic-gate * taken to find the correct cache entry. It is possible that 40037c478bd9Sstevel@tonic-gate * the correct cache entry will not be there (although one was 40047c478bd9Sstevel@tonic-gate * added) because the directory changed during the RPC operation 40057c478bd9Sstevel@tonic-gate * and the readdir cache was flushed. In this case, just start 40067c478bd9Sstevel@tonic-gate * over. It is hoped that this will not happen too often... :-) 40077c478bd9Sstevel@tonic-gate */ 40087c478bd9Sstevel@tonic-gate nrdc = NULL; 40097c478bd9Sstevel@tonic-gate goto top; 40107c478bd9Sstevel@tonic-gate /* NOTREACHED */ 40117c478bd9Sstevel@tonic-gate } 40127c478bd9Sstevel@tonic-gate 40137c478bd9Sstevel@tonic-gate static int 40147c478bd9Sstevel@tonic-gate do_nfs3readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr) 40157c478bd9Sstevel@tonic-gate { 40167c478bd9Sstevel@tonic-gate int error; 40177c478bd9Sstevel@tonic-gate rnode_t *rp; 40187c478bd9Sstevel@tonic-gate mntinfo_t *mi; 40197c478bd9Sstevel@tonic-gate 40207c478bd9Sstevel@tonic-gate rp = VTOR(vp); 40217c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 4022108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 40237c478bd9Sstevel@tonic-gate /* 40247c478bd9Sstevel@tonic-gate * Issue the proper request. 40257c478bd9Sstevel@tonic-gate * 40267c478bd9Sstevel@tonic-gate * If the server does not support READDIRPLUS, then use READDIR. 40277c478bd9Sstevel@tonic-gate * 40287c478bd9Sstevel@tonic-gate * Otherwise -- 40297c478bd9Sstevel@tonic-gate * Issue a READDIRPLUS if reading to fill an empty cache or if 40307c478bd9Sstevel@tonic-gate * an application has performed a lookup in the directory which 40317c478bd9Sstevel@tonic-gate * required an over the wire lookup. The use of READDIRPLUS 40327c478bd9Sstevel@tonic-gate * will help to (re)populate the DNLC. 40337c478bd9Sstevel@tonic-gate */ 40347c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_READDIRONLY) && 40357c478bd9Sstevel@tonic-gate (rp->r_flags & (RLOOKUP | RREADDIRPLUS))) { 40367c478bd9Sstevel@tonic-gate if (rp->r_flags & RREADDIRPLUS) { 40377c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 40387c478bd9Sstevel@tonic-gate rp->r_flags &= ~RREADDIRPLUS; 40397c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 40407c478bd9Sstevel@tonic-gate } 40417c478bd9Sstevel@tonic-gate nfs3readdirplus(vp, rdc, cr); 40427c478bd9Sstevel@tonic-gate if (rdc->error == EOPNOTSUPP) 40437c478bd9Sstevel@tonic-gate nfs3readdir(vp, rdc, cr); 40447c478bd9Sstevel@tonic-gate } else 40457c478bd9Sstevel@tonic-gate nfs3readdir(vp, rdc, cr); 40467c478bd9Sstevel@tonic-gate 40477c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 40487c478bd9Sstevel@tonic-gate rdc->flags &= ~RDDIR; 40497c478bd9Sstevel@tonic-gate if (rdc->flags & RDDIRWAIT) { 40507c478bd9Sstevel@tonic-gate rdc->flags &= ~RDDIRWAIT; 40517c478bd9Sstevel@tonic-gate cv_broadcast(&rdc->cv); 40527c478bd9Sstevel@tonic-gate } 40537c478bd9Sstevel@tonic-gate error = rdc->error; 40547c478bd9Sstevel@tonic-gate if (error) 40557c478bd9Sstevel@tonic-gate rdc->flags |= RDDIRREQ; 40567c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 40577c478bd9Sstevel@tonic-gate 40587c478bd9Sstevel@tonic-gate rddir_cache_rele(rdc); 40597c478bd9Sstevel@tonic-gate 40607c478bd9Sstevel@tonic-gate return (error); 40617c478bd9Sstevel@tonic-gate } 40627c478bd9Sstevel@tonic-gate 40637c478bd9Sstevel@tonic-gate static void 40647c478bd9Sstevel@tonic-gate nfs3readdir(vnode_t *vp, rddir_cache *rdc, cred_t *cr) 40657c478bd9Sstevel@tonic-gate { 40667c478bd9Sstevel@tonic-gate int error; 40677c478bd9Sstevel@tonic-gate READDIR3args args; 40687c478bd9Sstevel@tonic-gate READDIR3vres res; 40697c478bd9Sstevel@tonic-gate vattr_t dva; 40707c478bd9Sstevel@tonic-gate rnode_t *rp; 40717c478bd9Sstevel@tonic-gate int douprintf; 40727c478bd9Sstevel@tonic-gate failinfo_t fi, *fip = NULL; 40737c478bd9Sstevel@tonic-gate mntinfo_t *mi; 40747c478bd9Sstevel@tonic-gate hrtime_t t; 40757c478bd9Sstevel@tonic-gate 40767c478bd9Sstevel@tonic-gate rp = VTOR(vp); 40777c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 4078108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 40797c478bd9Sstevel@tonic-gate 40807c478bd9Sstevel@tonic-gate args.dir = *RTOFH3(rp); 40817c478bd9Sstevel@tonic-gate args.cookie = (cookie3)rdc->nfs3_cookie; 40827c478bd9Sstevel@tonic-gate args.cookieverf = rp->r_cookieverf; 40837c478bd9Sstevel@tonic-gate args.count = rdc->buflen; 40847c478bd9Sstevel@tonic-gate 40857c478bd9Sstevel@tonic-gate /* 40867c478bd9Sstevel@tonic-gate * NFS client failover support 40877c478bd9Sstevel@tonic-gate * suppress failover unless we have a zero cookie 40887c478bd9Sstevel@tonic-gate */ 40897c478bd9Sstevel@tonic-gate if (args.cookie == (cookie3) 0) { 40907c478bd9Sstevel@tonic-gate fi.vp = vp; 40917c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.dir; 40927c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 40937c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 40947c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 40957c478bd9Sstevel@tonic-gate fip = &fi; 40967c478bd9Sstevel@tonic-gate } 40977c478bd9Sstevel@tonic-gate 40987c478bd9Sstevel@tonic-gate #ifdef DEBUG 40997c478bd9Sstevel@tonic-gate rdc->entries = rddir_cache_buf_alloc(rdc->buflen, KM_SLEEP); 41007c478bd9Sstevel@tonic-gate #else 41017c478bd9Sstevel@tonic-gate rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP); 41027c478bd9Sstevel@tonic-gate #endif 41037c478bd9Sstevel@tonic-gate 41047c478bd9Sstevel@tonic-gate res.entries = (dirent64_t *)rdc->entries; 41057c478bd9Sstevel@tonic-gate res.entries_size = rdc->buflen; 41067c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vap = &dva; 41077c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vp = vp; 41087c478bd9Sstevel@tonic-gate res.loff = rdc->nfs3_cookie; 41097c478bd9Sstevel@tonic-gate 41107c478bd9Sstevel@tonic-gate douprintf = 1; 41117c478bd9Sstevel@tonic-gate 41127c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 41137c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 41147c478bd9Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 41157c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 41167c478bd9Sstevel@tonic-gate } 41177c478bd9Sstevel@tonic-gate 41187c478bd9Sstevel@tonic-gate t = gethrtime(); 41197c478bd9Sstevel@tonic-gate 41207c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_READDIR, 41217c478bd9Sstevel@tonic-gate xdr_READDIR3args, (caddr_t)&args, 41227c478bd9Sstevel@tonic-gate xdr_READDIR3vres, (caddr_t)&res, cr, 41237c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, fip); 41247c478bd9Sstevel@tonic-gate 41257c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 41267c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 41277c478bd9Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 41287c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 41297c478bd9Sstevel@tonic-gate } 41307c478bd9Sstevel@tonic-gate 41317c478bd9Sstevel@tonic-gate if (error) 41327c478bd9Sstevel@tonic-gate goto err; 41337c478bd9Sstevel@tonic-gate 41347c478bd9Sstevel@tonic-gate nfs3_cache_post_op_vattr(vp, &res.dir_attributes, t, cr); 41357c478bd9Sstevel@tonic-gate 41367c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 41377c478bd9Sstevel@tonic-gate if (error) { 41387c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 41397c478bd9Sstevel@tonic-gate goto err; 41407c478bd9Sstevel@tonic-gate } 41417c478bd9Sstevel@tonic-gate 41427c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 41437c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 41447c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->reads++; 41457c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->nread += res.size; 41467c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 41477c478bd9Sstevel@tonic-gate } 41487c478bd9Sstevel@tonic-gate 41497c478bd9Sstevel@tonic-gate rdc->nfs3_ncookie = res.loff; 41507c478bd9Sstevel@tonic-gate rp->r_cookieverf = res.cookieverf; 41517c478bd9Sstevel@tonic-gate rdc->eof = res.eof ? 1 : 0; 41527c478bd9Sstevel@tonic-gate rdc->entlen = res.size; 41537c478bd9Sstevel@tonic-gate ASSERT(rdc->entlen <= rdc->buflen); 41547c478bd9Sstevel@tonic-gate rdc->error = 0; 41557c478bd9Sstevel@tonic-gate return; 41567c478bd9Sstevel@tonic-gate 41577c478bd9Sstevel@tonic-gate err: 41587c478bd9Sstevel@tonic-gate kmem_free(rdc->entries, rdc->buflen); 41597c478bd9Sstevel@tonic-gate rdc->entries = NULL; 41607c478bd9Sstevel@tonic-gate rdc->error = error; 41617c478bd9Sstevel@tonic-gate } 41627c478bd9Sstevel@tonic-gate 41637c478bd9Sstevel@tonic-gate /* 41647c478bd9Sstevel@tonic-gate * Read directory entries. 41657c478bd9Sstevel@tonic-gate * There are some weird things to look out for here. The uio_loffset 41667c478bd9Sstevel@tonic-gate * field is either 0 or it is the offset returned from a previous 41677c478bd9Sstevel@tonic-gate * readdir. It is an opaque value used by the server to find the 41687c478bd9Sstevel@tonic-gate * correct directory block to read. The count field is the number 41697c478bd9Sstevel@tonic-gate * of blocks to read on the server. This is advisory only, the server 41707c478bd9Sstevel@tonic-gate * may return only one block's worth of entries. Entries may be compressed 41717c478bd9Sstevel@tonic-gate * on the server. 41727c478bd9Sstevel@tonic-gate */ 41737c478bd9Sstevel@tonic-gate static void 41747c478bd9Sstevel@tonic-gate nfs3readdirplus(vnode_t *vp, rddir_cache *rdc, cred_t *cr) 41757c478bd9Sstevel@tonic-gate { 41767c478bd9Sstevel@tonic-gate int error; 41777c478bd9Sstevel@tonic-gate READDIRPLUS3args args; 41787c478bd9Sstevel@tonic-gate READDIRPLUS3vres res; 41797c478bd9Sstevel@tonic-gate vattr_t dva; 41807c478bd9Sstevel@tonic-gate rnode_t *rp; 41817c478bd9Sstevel@tonic-gate mntinfo_t *mi; 41827c478bd9Sstevel@tonic-gate int douprintf; 41837c478bd9Sstevel@tonic-gate failinfo_t fi, *fip = NULL; 41847c478bd9Sstevel@tonic-gate 41857c478bd9Sstevel@tonic-gate rp = VTOR(vp); 41867c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 4187108322fbScarlsonj ASSERT(nfs_zone() == mi->mi_zone); 41887c478bd9Sstevel@tonic-gate 41897c478bd9Sstevel@tonic-gate args.dir = *RTOFH3(rp); 41907c478bd9Sstevel@tonic-gate args.cookie = (cookie3)rdc->nfs3_cookie; 41917c478bd9Sstevel@tonic-gate args.cookieverf = rp->r_cookieverf; 41927c478bd9Sstevel@tonic-gate args.dircount = rdc->buflen; 41937c478bd9Sstevel@tonic-gate args.maxcount = mi->mi_tsize; 41947c478bd9Sstevel@tonic-gate 41957c478bd9Sstevel@tonic-gate /* 41967c478bd9Sstevel@tonic-gate * NFS client failover support 41977c478bd9Sstevel@tonic-gate * suppress failover unless we have a zero cookie 41987c478bd9Sstevel@tonic-gate */ 41997c478bd9Sstevel@tonic-gate if (args.cookie == (cookie3)0) { 42007c478bd9Sstevel@tonic-gate fi.vp = vp; 42017c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.dir; 42027c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 42037c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 42047c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 42057c478bd9Sstevel@tonic-gate fip = &fi; 42067c478bd9Sstevel@tonic-gate } 42077c478bd9Sstevel@tonic-gate 42087c478bd9Sstevel@tonic-gate #ifdef DEBUG 42097c478bd9Sstevel@tonic-gate rdc->entries = rddir_cache_buf_alloc(rdc->buflen, KM_SLEEP); 42107c478bd9Sstevel@tonic-gate #else 42117c478bd9Sstevel@tonic-gate rdc->entries = kmem_alloc(rdc->buflen, KM_SLEEP); 42127c478bd9Sstevel@tonic-gate #endif 42137c478bd9Sstevel@tonic-gate 42147c478bd9Sstevel@tonic-gate res.entries = (dirent64_t *)rdc->entries; 42157c478bd9Sstevel@tonic-gate res.entries_size = rdc->buflen; 42167c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vap = &dva; 42177c478bd9Sstevel@tonic-gate res.dir_attributes.fres.vp = vp; 42187c478bd9Sstevel@tonic-gate res.loff = rdc->nfs3_cookie; 42197c478bd9Sstevel@tonic-gate res.credentials = cr; 42207c478bd9Sstevel@tonic-gate 42217c478bd9Sstevel@tonic-gate douprintf = 1; 42227c478bd9Sstevel@tonic-gate 42237c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 42247c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 42257c478bd9Sstevel@tonic-gate kstat_runq_enter(KSTAT_IO_PTR(mi->mi_io_kstats)); 42267c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42277c478bd9Sstevel@tonic-gate } 42287c478bd9Sstevel@tonic-gate 42297c478bd9Sstevel@tonic-gate res.time = gethrtime(); 42307c478bd9Sstevel@tonic-gate 42317c478bd9Sstevel@tonic-gate error = rfs3call(mi, NFSPROC3_READDIRPLUS, 42327c478bd9Sstevel@tonic-gate xdr_READDIRPLUS3args, (caddr_t)&args, 42337c478bd9Sstevel@tonic-gate xdr_READDIRPLUS3vres, (caddr_t)&res, cr, 42347c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, fip); 42357c478bd9Sstevel@tonic-gate 42367c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 42377c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 42387c478bd9Sstevel@tonic-gate kstat_runq_exit(KSTAT_IO_PTR(mi->mi_io_kstats)); 42397c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42407c478bd9Sstevel@tonic-gate } 42417c478bd9Sstevel@tonic-gate 42427c478bd9Sstevel@tonic-gate if (error) { 42437c478bd9Sstevel@tonic-gate goto err; 42447c478bd9Sstevel@tonic-gate } 42457c478bd9Sstevel@tonic-gate 42467c478bd9Sstevel@tonic-gate nfs3_cache_post_op_vattr(vp, &res.dir_attributes, res.time, cr); 42477c478bd9Sstevel@tonic-gate 42487c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 42497c478bd9Sstevel@tonic-gate if (error) { 42507c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 42517c478bd9Sstevel@tonic-gate if (error == EOPNOTSUPP) { 42527c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 42537c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_READDIRONLY; 42547c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42557c478bd9Sstevel@tonic-gate } 42567c478bd9Sstevel@tonic-gate goto err; 42577c478bd9Sstevel@tonic-gate } 42587c478bd9Sstevel@tonic-gate 42597c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) { 42607c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 42617c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->reads++; 42627c478bd9Sstevel@tonic-gate KSTAT_IO_PTR(mi->mi_io_kstats)->nread += res.size; 42637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42647c478bd9Sstevel@tonic-gate } 42657c478bd9Sstevel@tonic-gate 42667c478bd9Sstevel@tonic-gate rdc->nfs3_ncookie = res.loff; 42677c478bd9Sstevel@tonic-gate rp->r_cookieverf = res.cookieverf; 42687c478bd9Sstevel@tonic-gate rdc->eof = res.eof ? 1 : 0; 42697c478bd9Sstevel@tonic-gate rdc->entlen = res.size; 42707c478bd9Sstevel@tonic-gate ASSERT(rdc->entlen <= rdc->buflen); 42717c478bd9Sstevel@tonic-gate rdc->error = 0; 42727c478bd9Sstevel@tonic-gate 42737c478bd9Sstevel@tonic-gate return; 42747c478bd9Sstevel@tonic-gate 42757c478bd9Sstevel@tonic-gate err: 42767c478bd9Sstevel@tonic-gate kmem_free(rdc->entries, rdc->buflen); 42777c478bd9Sstevel@tonic-gate rdc->entries = NULL; 42787c478bd9Sstevel@tonic-gate rdc->error = error; 42797c478bd9Sstevel@tonic-gate } 42807c478bd9Sstevel@tonic-gate 42817c478bd9Sstevel@tonic-gate #ifdef DEBUG 42827c478bd9Sstevel@tonic-gate static int nfs3_bio_do_stop = 0; 42837c478bd9Sstevel@tonic-gate #endif 42847c478bd9Sstevel@tonic-gate 42857c478bd9Sstevel@tonic-gate static int 42867c478bd9Sstevel@tonic-gate nfs3_bio(struct buf *bp, stable_how *stab_comm, cred_t *cr) 42877c478bd9Sstevel@tonic-gate { 42887c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(bp->b_vp); 42897c478bd9Sstevel@tonic-gate int count; 42907c478bd9Sstevel@tonic-gate int error; 42917c478bd9Sstevel@tonic-gate cred_t *cred; 42927c478bd9Sstevel@tonic-gate offset_t offset; 42937c478bd9Sstevel@tonic-gate 4294108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(bp->b_vp)->mi_zone); 42957c478bd9Sstevel@tonic-gate offset = ldbtob(bp->b_lblkno); 42967c478bd9Sstevel@tonic-gate 42977c478bd9Sstevel@tonic-gate DTRACE_IO1(start, struct buf *, bp); 42987c478bd9Sstevel@tonic-gate 42997c478bd9Sstevel@tonic-gate if (bp->b_flags & B_READ) { 43007c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43017c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) { 43027c478bd9Sstevel@tonic-gate cred = rp->r_cred; 43037c478bd9Sstevel@tonic-gate crhold(cred); 43047c478bd9Sstevel@tonic-gate } else { 43057c478bd9Sstevel@tonic-gate rp->r_cred = cr; 43067c478bd9Sstevel@tonic-gate crhold(cr); 43077c478bd9Sstevel@tonic-gate cred = cr; 43087c478bd9Sstevel@tonic-gate crhold(cred); 43097c478bd9Sstevel@tonic-gate } 43107c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43117c478bd9Sstevel@tonic-gate read_again: 43127c478bd9Sstevel@tonic-gate error = bp->b_error = nfs3read(bp->b_vp, bp->b_un.b_addr, 43137c478bd9Sstevel@tonic-gate offset, bp->b_bcount, &bp->b_resid, cred); 43147c478bd9Sstevel@tonic-gate crfree(cred); 43157c478bd9Sstevel@tonic-gate if (!error) { 43167c478bd9Sstevel@tonic-gate if (bp->b_resid) { 43177c478bd9Sstevel@tonic-gate /* 43187c478bd9Sstevel@tonic-gate * Didn't get it all because we hit EOF, 43197c478bd9Sstevel@tonic-gate * zero all the memory beyond the EOF. 43207c478bd9Sstevel@tonic-gate */ 43217c478bd9Sstevel@tonic-gate /* bzero(rdaddr + */ 43227c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr + 43237c478bd9Sstevel@tonic-gate bp->b_bcount - bp->b_resid, bp->b_resid); 43247c478bd9Sstevel@tonic-gate } 43257c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43267c478bd9Sstevel@tonic-gate if (bp->b_resid == bp->b_bcount && 43277c478bd9Sstevel@tonic-gate offset >= rp->r_size) { 43287c478bd9Sstevel@tonic-gate /* 43297c478bd9Sstevel@tonic-gate * We didn't read anything at all as we are 43307c478bd9Sstevel@tonic-gate * past EOF. Return an error indicator back 43317c478bd9Sstevel@tonic-gate * but don't destroy the pages (yet). 43327c478bd9Sstevel@tonic-gate */ 43337c478bd9Sstevel@tonic-gate error = NFS_EOF; 43347c478bd9Sstevel@tonic-gate } 43357c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43367c478bd9Sstevel@tonic-gate } else if (error == EACCES) { 43377c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43387c478bd9Sstevel@tonic-gate if (cred != cr) { 43397c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) 43407c478bd9Sstevel@tonic-gate crfree(rp->r_cred); 43417c478bd9Sstevel@tonic-gate rp->r_cred = cr; 43427c478bd9Sstevel@tonic-gate crhold(cr); 43437c478bd9Sstevel@tonic-gate cred = cr; 43447c478bd9Sstevel@tonic-gate crhold(cred); 43457c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43467c478bd9Sstevel@tonic-gate goto read_again; 43477c478bd9Sstevel@tonic-gate } 43487c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43497c478bd9Sstevel@tonic-gate } 43507c478bd9Sstevel@tonic-gate } else { 43517c478bd9Sstevel@tonic-gate if (!(rp->r_flags & RSTALE)) { 43527c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43537c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) { 43547c478bd9Sstevel@tonic-gate cred = rp->r_cred; 43557c478bd9Sstevel@tonic-gate crhold(cred); 43567c478bd9Sstevel@tonic-gate } else { 43577c478bd9Sstevel@tonic-gate rp->r_cred = cr; 43587c478bd9Sstevel@tonic-gate crhold(cr); 43597c478bd9Sstevel@tonic-gate cred = cr; 43607c478bd9Sstevel@tonic-gate crhold(cred); 43617c478bd9Sstevel@tonic-gate } 43627c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43637c478bd9Sstevel@tonic-gate write_again: 43647c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43657c478bd9Sstevel@tonic-gate count = MIN(bp->b_bcount, rp->r_size - offset); 43667c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43677c478bd9Sstevel@tonic-gate if (count < 0) 43687c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "nfs3_bio: write count < 0"); 43697c478bd9Sstevel@tonic-gate #ifdef DEBUG 43707c478bd9Sstevel@tonic-gate if (count == 0) { 43717c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 43727c478bd9Sstevel@tonic-gate "nfs3_bio: zero length write at %lld", 43737c478bd9Sstevel@tonic-gate offset); 43747c478bd9Sstevel@tonic-gate nfs_printfhandle(&rp->r_fh); 43757c478bd9Sstevel@tonic-gate if (nfs3_bio_do_stop) 43767c478bd9Sstevel@tonic-gate debug_enter("nfs3_bio"); 43777c478bd9Sstevel@tonic-gate } 43787c478bd9Sstevel@tonic-gate #endif 43797c478bd9Sstevel@tonic-gate error = nfs3write(bp->b_vp, bp->b_un.b_addr, offset, 43807c478bd9Sstevel@tonic-gate count, cred, stab_comm); 43817c478bd9Sstevel@tonic-gate if (error == EACCES) { 43827c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 43837c478bd9Sstevel@tonic-gate if (cred != cr) { 43847c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) 43857c478bd9Sstevel@tonic-gate crfree(rp->r_cred); 43867c478bd9Sstevel@tonic-gate rp->r_cred = cr; 43877c478bd9Sstevel@tonic-gate crhold(cr); 43887c478bd9Sstevel@tonic-gate crfree(cred); 43897c478bd9Sstevel@tonic-gate cred = cr; 43907c478bd9Sstevel@tonic-gate crhold(cred); 43917c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43927c478bd9Sstevel@tonic-gate goto write_again; 43937c478bd9Sstevel@tonic-gate } 43947c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 43957c478bd9Sstevel@tonic-gate } 43967c478bd9Sstevel@tonic-gate bp->b_error = error; 43977c478bd9Sstevel@tonic-gate if (error && error != EINTR) { 43987c478bd9Sstevel@tonic-gate /* 43997c478bd9Sstevel@tonic-gate * Don't print EDQUOT errors on the console. 44007c478bd9Sstevel@tonic-gate * Don't print asynchronous EACCES errors. 44017c478bd9Sstevel@tonic-gate * Don't print EFBIG errors. 44027c478bd9Sstevel@tonic-gate * Print all other write errors. 44037c478bd9Sstevel@tonic-gate */ 44047c478bd9Sstevel@tonic-gate if (error != EDQUOT && error != EFBIG && 44057c478bd9Sstevel@tonic-gate (error != EACCES || 44067c478bd9Sstevel@tonic-gate !(bp->b_flags & B_ASYNC))) 44077c478bd9Sstevel@tonic-gate nfs_write_error(bp->b_vp, error, cred); 44087c478bd9Sstevel@tonic-gate /* 44097c478bd9Sstevel@tonic-gate * Update r_error and r_flags as appropriate. 44107c478bd9Sstevel@tonic-gate * If the error was ESTALE, then mark the 44117c478bd9Sstevel@tonic-gate * rnode as not being writeable and save 44127c478bd9Sstevel@tonic-gate * the error status. Otherwise, save any 44137c478bd9Sstevel@tonic-gate * errors which occur from asynchronous 44147c478bd9Sstevel@tonic-gate * page invalidations. Any errors occurring 44157c478bd9Sstevel@tonic-gate * from other operations should be saved 44167c478bd9Sstevel@tonic-gate * by the caller. 44177c478bd9Sstevel@tonic-gate */ 44187c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 44197c478bd9Sstevel@tonic-gate if (error == ESTALE) { 44207c478bd9Sstevel@tonic-gate rp->r_flags |= RSTALE; 44217c478bd9Sstevel@tonic-gate if (!rp->r_error) 44227c478bd9Sstevel@tonic-gate rp->r_error = error; 44237c478bd9Sstevel@tonic-gate } else if (!rp->r_error && 44247c478bd9Sstevel@tonic-gate (bp->b_flags & 44257c478bd9Sstevel@tonic-gate (B_INVAL|B_FORCE|B_ASYNC)) == 44267c478bd9Sstevel@tonic-gate (B_INVAL|B_FORCE|B_ASYNC)) { 44277c478bd9Sstevel@tonic-gate rp->r_error = error; 44287c478bd9Sstevel@tonic-gate } 44297c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 44307c478bd9Sstevel@tonic-gate } 44317c478bd9Sstevel@tonic-gate crfree(cred); 44328afffe5eSbatschul } else { 44337c478bd9Sstevel@tonic-gate error = rp->r_error; 44348afffe5eSbatschul /* 44358afffe5eSbatschul * A close may have cleared r_error, if so, 44368afffe5eSbatschul * propagate ESTALE error return properly 44378afffe5eSbatschul */ 44388afffe5eSbatschul if (error == 0) 44398afffe5eSbatschul error = ESTALE; 44408afffe5eSbatschul } 44417c478bd9Sstevel@tonic-gate } 44427c478bd9Sstevel@tonic-gate 44437c478bd9Sstevel@tonic-gate if (error != 0 && error != NFS_EOF) 44447c478bd9Sstevel@tonic-gate bp->b_flags |= B_ERROR; 44457c478bd9Sstevel@tonic-gate 44467c478bd9Sstevel@tonic-gate DTRACE_IO1(done, struct buf *, bp); 44477c478bd9Sstevel@tonic-gate 44487c478bd9Sstevel@tonic-gate return (error); 44497c478bd9Sstevel@tonic-gate } 44507c478bd9Sstevel@tonic-gate 4451da6c28aaSamw /* ARGSUSED */ 44527c478bd9Sstevel@tonic-gate static int 4453da6c28aaSamw nfs3_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) 44547c478bd9Sstevel@tonic-gate { 44557c478bd9Sstevel@tonic-gate rnode_t *rp; 44567c478bd9Sstevel@tonic-gate 4457108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 44587c478bd9Sstevel@tonic-gate return (EIO); 44597c478bd9Sstevel@tonic-gate rp = VTOR(vp); 44607c478bd9Sstevel@tonic-gate 44617c478bd9Sstevel@tonic-gate if (fidp->fid_len < (ushort_t)rp->r_fh.fh_len) { 44627c478bd9Sstevel@tonic-gate fidp->fid_len = rp->r_fh.fh_len; 44637c478bd9Sstevel@tonic-gate return (ENOSPC); 44647c478bd9Sstevel@tonic-gate } 44657c478bd9Sstevel@tonic-gate fidp->fid_len = rp->r_fh.fh_len; 44667c478bd9Sstevel@tonic-gate bcopy(rp->r_fh.fh_buf, fidp->fid_data, fidp->fid_len); 44677c478bd9Sstevel@tonic-gate return (0); 44687c478bd9Sstevel@tonic-gate } 44697c478bd9Sstevel@tonic-gate 44707c478bd9Sstevel@tonic-gate /* ARGSUSED2 */ 44717c478bd9Sstevel@tonic-gate static int 44727c478bd9Sstevel@tonic-gate nfs3_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 44737c478bd9Sstevel@tonic-gate { 44747c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 44757c478bd9Sstevel@tonic-gate 44767c478bd9Sstevel@tonic-gate if (!write_lock) { 44777c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE); 44787c478bd9Sstevel@tonic-gate return (V_WRITELOCK_FALSE); 44797c478bd9Sstevel@tonic-gate } 44807c478bd9Sstevel@tonic-gate 44817c478bd9Sstevel@tonic-gate if ((rp->r_flags & RDIRECTIO) || (VTOMI(vp)->mi_flags & MI_DIRECTIO)) { 44827c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, FALSE); 44837c478bd9Sstevel@tonic-gate if (rp->r_mapcnt == 0 && !vn_has_cached_data(vp)) 44847c478bd9Sstevel@tonic-gate return (V_WRITELOCK_FALSE); 44857c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_rwlock); 44867c478bd9Sstevel@tonic-gate } 44877c478bd9Sstevel@tonic-gate 44887c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER, FALSE); 44897c478bd9Sstevel@tonic-gate return (V_WRITELOCK_TRUE); 44907c478bd9Sstevel@tonic-gate } 44917c478bd9Sstevel@tonic-gate 44927c478bd9Sstevel@tonic-gate /* ARGSUSED */ 44937c478bd9Sstevel@tonic-gate static void 44947c478bd9Sstevel@tonic-gate nfs3_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp) 44957c478bd9Sstevel@tonic-gate { 44967c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 44977c478bd9Sstevel@tonic-gate 44987c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_rwlock); 44997c478bd9Sstevel@tonic-gate } 45007c478bd9Sstevel@tonic-gate 45017c478bd9Sstevel@tonic-gate /* ARGSUSED */ 45027c478bd9Sstevel@tonic-gate static int 4503da6c28aaSamw nfs3_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 45047c478bd9Sstevel@tonic-gate { 45057c478bd9Sstevel@tonic-gate 45067c478bd9Sstevel@tonic-gate /* 45077c478bd9Sstevel@tonic-gate * Because we stuff the readdir cookie into the offset field 45087c478bd9Sstevel@tonic-gate * someone may attempt to do an lseek with the cookie which 45097c478bd9Sstevel@tonic-gate * we want to succeed. 45107c478bd9Sstevel@tonic-gate */ 45117c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) 45127c478bd9Sstevel@tonic-gate return (0); 45137c478bd9Sstevel@tonic-gate if (*noffp < 0) 45147c478bd9Sstevel@tonic-gate return (EINVAL); 45157c478bd9Sstevel@tonic-gate return (0); 45167c478bd9Sstevel@tonic-gate } 45177c478bd9Sstevel@tonic-gate 45187c478bd9Sstevel@tonic-gate /* 45197c478bd9Sstevel@tonic-gate * number of nfs3_bsize blocks to read ahead. 45207c478bd9Sstevel@tonic-gate */ 45217c478bd9Sstevel@tonic-gate static int nfs3_nra = 4; 45227c478bd9Sstevel@tonic-gate 45237c478bd9Sstevel@tonic-gate #ifdef DEBUG 45247c478bd9Sstevel@tonic-gate static int nfs3_lostpage = 0; /* number of times we lost original page */ 45257c478bd9Sstevel@tonic-gate #endif 45267c478bd9Sstevel@tonic-gate 45277c478bd9Sstevel@tonic-gate /* 45287c478bd9Sstevel@tonic-gate * Return all the pages from [off..off+len) in file 45297c478bd9Sstevel@tonic-gate */ 4530da6c28aaSamw /* ARGSUSED */ 45317c478bd9Sstevel@tonic-gate static int 45327c478bd9Sstevel@tonic-gate nfs3_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, 45337c478bd9Sstevel@tonic-gate page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 4534da6c28aaSamw enum seg_rw rw, cred_t *cr, caller_context_t *ct) 45357c478bd9Sstevel@tonic-gate { 45367c478bd9Sstevel@tonic-gate rnode_t *rp; 45377c478bd9Sstevel@tonic-gate int error; 45387c478bd9Sstevel@tonic-gate mntinfo_t *mi; 45397c478bd9Sstevel@tonic-gate 45407c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 45417c478bd9Sstevel@tonic-gate return (ENOSYS); 45427c478bd9Sstevel@tonic-gate 4543108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 45447c478bd9Sstevel@tonic-gate return (EIO); 45457c478bd9Sstevel@tonic-gate if (protp != NULL) 45467c478bd9Sstevel@tonic-gate *protp = PROT_ALL; 45477c478bd9Sstevel@tonic-gate 45487c478bd9Sstevel@tonic-gate /* 45497c478bd9Sstevel@tonic-gate * Now valididate that the caches are up to date. 45507c478bd9Sstevel@tonic-gate */ 45517c478bd9Sstevel@tonic-gate error = nfs3_validate_caches(vp, cr); 45527c478bd9Sstevel@tonic-gate if (error) 45537c478bd9Sstevel@tonic-gate return (error); 45547c478bd9Sstevel@tonic-gate 45557c478bd9Sstevel@tonic-gate rp = VTOR(vp); 45567c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 45577c478bd9Sstevel@tonic-gate retry: 45587c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 45597c478bd9Sstevel@tonic-gate 45607c478bd9Sstevel@tonic-gate /* 45617c478bd9Sstevel@tonic-gate * Don't create dirty pages faster than they 45627c478bd9Sstevel@tonic-gate * can be cleaned so that the system doesn't 45637c478bd9Sstevel@tonic-gate * get imbalanced. If the async queue is 45647c478bd9Sstevel@tonic-gate * maxed out, then wait for it to drain before 45657c478bd9Sstevel@tonic-gate * creating more dirty pages. Also, wait for 45667c478bd9Sstevel@tonic-gate * any threads doing pagewalks in the vop_getattr 45677c478bd9Sstevel@tonic-gate * entry points so that they don't block for 45687c478bd9Sstevel@tonic-gate * long periods. 45697c478bd9Sstevel@tonic-gate */ 45707c478bd9Sstevel@tonic-gate if (rw == S_CREATE) { 45717c478bd9Sstevel@tonic-gate while ((mi->mi_max_threads != 0 && 45727c478bd9Sstevel@tonic-gate rp->r_awcount > 2 * mi->mi_max_threads) || 45737c478bd9Sstevel@tonic-gate rp->r_gcount > 0) 45747c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 45757c478bd9Sstevel@tonic-gate } 45767c478bd9Sstevel@tonic-gate 45777c478bd9Sstevel@tonic-gate /* 45787c478bd9Sstevel@tonic-gate * If we are getting called as a side effect of an nfs_write() 45797c478bd9Sstevel@tonic-gate * operation the local file size might not be extended yet. 45807c478bd9Sstevel@tonic-gate * In this case we want to be able to return pages of zeroes. 45817c478bd9Sstevel@tonic-gate */ 45827c478bd9Sstevel@tonic-gate if (off + len > rp->r_size + PAGEOFFSET && seg != segkmap) { 45837c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 45847c478bd9Sstevel@tonic-gate return (EFAULT); /* beyond EOF */ 45857c478bd9Sstevel@tonic-gate } 45867c478bd9Sstevel@tonic-gate 45877c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 45887c478bd9Sstevel@tonic-gate 45897c478bd9Sstevel@tonic-gate error = pvn_getpages(nfs3_getapage, vp, off, len, protp, 45907c478bd9Sstevel@tonic-gate pl, plsz, seg, addr, rw, cr); 45917c478bd9Sstevel@tonic-gate 45927c478bd9Sstevel@tonic-gate switch (error) { 45937c478bd9Sstevel@tonic-gate case NFS_EOF: 45947c478bd9Sstevel@tonic-gate nfs_purge_caches(vp, NFS_NOPURGE_DNLC, cr); 45957c478bd9Sstevel@tonic-gate goto retry; 45967c478bd9Sstevel@tonic-gate case ESTALE: 45977c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 45987c478bd9Sstevel@tonic-gate } 45997c478bd9Sstevel@tonic-gate 46007c478bd9Sstevel@tonic-gate return (error); 46017c478bd9Sstevel@tonic-gate } 46027c478bd9Sstevel@tonic-gate 46037c478bd9Sstevel@tonic-gate /* 4604*06e6833aSJosef 'Jeff' Sipek * Called from pvn_getpages to get a particular page. 46057c478bd9Sstevel@tonic-gate */ 46067c478bd9Sstevel@tonic-gate /* ARGSUSED */ 46077c478bd9Sstevel@tonic-gate static int 46087c478bd9Sstevel@tonic-gate nfs3_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp, 46097c478bd9Sstevel@tonic-gate page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, 46107c478bd9Sstevel@tonic-gate enum seg_rw rw, cred_t *cr) 46117c478bd9Sstevel@tonic-gate { 46127c478bd9Sstevel@tonic-gate rnode_t *rp; 46137c478bd9Sstevel@tonic-gate uint_t bsize; 46147c478bd9Sstevel@tonic-gate struct buf *bp; 46157c478bd9Sstevel@tonic-gate page_t *pp; 46167c478bd9Sstevel@tonic-gate u_offset_t lbn; 46177c478bd9Sstevel@tonic-gate u_offset_t io_off; 46187c478bd9Sstevel@tonic-gate u_offset_t blkoff; 46197c478bd9Sstevel@tonic-gate u_offset_t rablkoff; 46207c478bd9Sstevel@tonic-gate size_t io_len; 46217c478bd9Sstevel@tonic-gate uint_t blksize; 46227c478bd9Sstevel@tonic-gate int error; 46237c478bd9Sstevel@tonic-gate int readahead; 46247c478bd9Sstevel@tonic-gate int readahead_issued = 0; 46257c478bd9Sstevel@tonic-gate int ra_window; /* readahead window */ 46267c478bd9Sstevel@tonic-gate page_t *pagefound; 46277c478bd9Sstevel@tonic-gate page_t *savepp; 46287c478bd9Sstevel@tonic-gate 4629108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 46307c478bd9Sstevel@tonic-gate return (EIO); 46317c478bd9Sstevel@tonic-gate rp = VTOR(vp); 46327c478bd9Sstevel@tonic-gate bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE); 46337c478bd9Sstevel@tonic-gate 46347c478bd9Sstevel@tonic-gate reread: 46357c478bd9Sstevel@tonic-gate bp = NULL; 46367c478bd9Sstevel@tonic-gate pp = NULL; 46377c478bd9Sstevel@tonic-gate pagefound = NULL; 46387c478bd9Sstevel@tonic-gate 46397c478bd9Sstevel@tonic-gate if (pl != NULL) 46407c478bd9Sstevel@tonic-gate pl[0] = NULL; 46417c478bd9Sstevel@tonic-gate 46427c478bd9Sstevel@tonic-gate error = 0; 46437c478bd9Sstevel@tonic-gate lbn = off / bsize; 46447c478bd9Sstevel@tonic-gate blkoff = lbn * bsize; 46457c478bd9Sstevel@tonic-gate 46467c478bd9Sstevel@tonic-gate /* 46477c478bd9Sstevel@tonic-gate * Queueing up the readahead before doing the synchronous read 46487c478bd9Sstevel@tonic-gate * results in a significant increase in read throughput because 46497c478bd9Sstevel@tonic-gate * of the increased parallelism between the async threads and 46507c478bd9Sstevel@tonic-gate * the process context. 46517c478bd9Sstevel@tonic-gate */ 46527c478bd9Sstevel@tonic-gate if ((off & ((vp->v_vfsp->vfs_bsize) - 1)) == 0 && 46537c478bd9Sstevel@tonic-gate rw != S_CREATE && 46547c478bd9Sstevel@tonic-gate !(vp->v_flag & VNOCACHE)) { 46557c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 46567c478bd9Sstevel@tonic-gate 46577c478bd9Sstevel@tonic-gate /* 46587c478bd9Sstevel@tonic-gate * Calculate the number of readaheads to do. 46597c478bd9Sstevel@tonic-gate * a) No readaheads at offset = 0. 46607c478bd9Sstevel@tonic-gate * b) Do maximum(nfs3_nra) readaheads when the readahead 46617c478bd9Sstevel@tonic-gate * window is closed. 46627c478bd9Sstevel@tonic-gate * c) Do readaheads between 1 to (nfs3_nra - 1) depending 46637c478bd9Sstevel@tonic-gate * upon how far the readahead window is open or close. 46647c478bd9Sstevel@tonic-gate * d) No readaheads if rp->r_nextr is not within the scope 46657c478bd9Sstevel@tonic-gate * of the readahead window (random i/o). 46667c478bd9Sstevel@tonic-gate */ 46677c478bd9Sstevel@tonic-gate 46687c478bd9Sstevel@tonic-gate if (off == 0) 46697c478bd9Sstevel@tonic-gate readahead = 0; 46707c478bd9Sstevel@tonic-gate else if (blkoff == rp->r_nextr) 46717c478bd9Sstevel@tonic-gate readahead = nfs3_nra; 46727c478bd9Sstevel@tonic-gate else if (rp->r_nextr > blkoff && 46737c478bd9Sstevel@tonic-gate ((ra_window = (rp->r_nextr - blkoff) / bsize) 46747c478bd9Sstevel@tonic-gate <= (nfs3_nra - 1))) 46757c478bd9Sstevel@tonic-gate readahead = nfs3_nra - ra_window; 46767c478bd9Sstevel@tonic-gate else 46777c478bd9Sstevel@tonic-gate readahead = 0; 46787c478bd9Sstevel@tonic-gate 46797c478bd9Sstevel@tonic-gate rablkoff = rp->r_nextr; 46807c478bd9Sstevel@tonic-gate while (readahead > 0 && rablkoff + bsize < rp->r_size) { 46817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 46827c478bd9Sstevel@tonic-gate if (nfs_async_readahead(vp, rablkoff + bsize, 46837c478bd9Sstevel@tonic-gate addr + (rablkoff + bsize - off), seg, cr, 46847c478bd9Sstevel@tonic-gate nfs3_readahead) < 0) { 46857c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 46867c478bd9Sstevel@tonic-gate break; 46877c478bd9Sstevel@tonic-gate } 46887c478bd9Sstevel@tonic-gate readahead--; 46897c478bd9Sstevel@tonic-gate rablkoff += bsize; 46907c478bd9Sstevel@tonic-gate /* 46917c478bd9Sstevel@tonic-gate * Indicate that we did a readahead so 46927c478bd9Sstevel@tonic-gate * readahead offset is not updated 46937c478bd9Sstevel@tonic-gate * by the synchronous read below. 46947c478bd9Sstevel@tonic-gate */ 46957c478bd9Sstevel@tonic-gate readahead_issued = 1; 46967c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 46977c478bd9Sstevel@tonic-gate /* 46987c478bd9Sstevel@tonic-gate * set readahead offset to 46997c478bd9Sstevel@tonic-gate * offset of last async readahead 47007c478bd9Sstevel@tonic-gate * request. 47017c478bd9Sstevel@tonic-gate */ 47027c478bd9Sstevel@tonic-gate rp->r_nextr = rablkoff; 47037c478bd9Sstevel@tonic-gate } 47047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 47057c478bd9Sstevel@tonic-gate } 47067c478bd9Sstevel@tonic-gate 47077c478bd9Sstevel@tonic-gate again: 47087c478bd9Sstevel@tonic-gate if ((pagefound = page_exists(vp, off)) == NULL) { 47097c478bd9Sstevel@tonic-gate if (pl == NULL) { 47107c478bd9Sstevel@tonic-gate (void) nfs_async_readahead(vp, blkoff, addr, seg, cr, 47117c478bd9Sstevel@tonic-gate nfs3_readahead); 47127c478bd9Sstevel@tonic-gate } else if (rw == S_CREATE) { 47137c478bd9Sstevel@tonic-gate /* 47147c478bd9Sstevel@tonic-gate * Block for this page is not allocated, or the offset 47157c478bd9Sstevel@tonic-gate * is beyond the current allocation size, or we're 47167c478bd9Sstevel@tonic-gate * allocating a swap slot and the page was not found, 47177c478bd9Sstevel@tonic-gate * so allocate it and return a zero page. 47187c478bd9Sstevel@tonic-gate */ 47197c478bd9Sstevel@tonic-gate if ((pp = page_create_va(vp, off, 47207c478bd9Sstevel@tonic-gate PAGESIZE, PG_WAIT, seg, addr)) == NULL) 47217c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "nfs3_getapage: page_create"); 47227c478bd9Sstevel@tonic-gate io_len = PAGESIZE; 47237c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 47247c478bd9Sstevel@tonic-gate rp->r_nextr = off + PAGESIZE; 47257c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 47267c478bd9Sstevel@tonic-gate } else { 47277c478bd9Sstevel@tonic-gate /* 47287c478bd9Sstevel@tonic-gate * Need to go to server to get a BLOCK, exception to 47297c478bd9Sstevel@tonic-gate * that being while reading at offset = 0 or doing 47307c478bd9Sstevel@tonic-gate * random i/o, in that case read only a PAGE. 47317c478bd9Sstevel@tonic-gate */ 47327c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 47337c478bd9Sstevel@tonic-gate if (blkoff < rp->r_size && 47347c478bd9Sstevel@tonic-gate blkoff + bsize >= rp->r_size) { 47357c478bd9Sstevel@tonic-gate /* 47367c478bd9Sstevel@tonic-gate * If only a block or less is left in 47377c478bd9Sstevel@tonic-gate * the file, read all that is remaining. 47387c478bd9Sstevel@tonic-gate */ 47397c478bd9Sstevel@tonic-gate if (rp->r_size <= off) { 47407c478bd9Sstevel@tonic-gate /* 47417c478bd9Sstevel@tonic-gate * Trying to access beyond EOF, 47427c478bd9Sstevel@tonic-gate * set up to get at least one page. 47437c478bd9Sstevel@tonic-gate */ 47447c478bd9Sstevel@tonic-gate blksize = off + PAGESIZE - blkoff; 47457c478bd9Sstevel@tonic-gate } else 47467c478bd9Sstevel@tonic-gate blksize = rp->r_size - blkoff; 47477c478bd9Sstevel@tonic-gate } else if ((off == 0) || 47487c478bd9Sstevel@tonic-gate (off != rp->r_nextr && !readahead_issued)) { 47497c478bd9Sstevel@tonic-gate blksize = PAGESIZE; 47507c478bd9Sstevel@tonic-gate blkoff = off; /* block = page here */ 47517c478bd9Sstevel@tonic-gate } else 47527c478bd9Sstevel@tonic-gate blksize = bsize; 47537c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 47547c478bd9Sstevel@tonic-gate 47557c478bd9Sstevel@tonic-gate pp = pvn_read_kluster(vp, off, seg, addr, &io_off, 47567c478bd9Sstevel@tonic-gate &io_len, blkoff, blksize, 0); 47577c478bd9Sstevel@tonic-gate 47587c478bd9Sstevel@tonic-gate /* 47597c478bd9Sstevel@tonic-gate * Some other thread has entered the page, 47607c478bd9Sstevel@tonic-gate * so just use it. 47617c478bd9Sstevel@tonic-gate */ 47627c478bd9Sstevel@tonic-gate if (pp == NULL) 47637c478bd9Sstevel@tonic-gate goto again; 47647c478bd9Sstevel@tonic-gate 47657c478bd9Sstevel@tonic-gate /* 47667c478bd9Sstevel@tonic-gate * Now round the request size up to page boundaries. 47677c478bd9Sstevel@tonic-gate * This ensures that the entire page will be 47687c478bd9Sstevel@tonic-gate * initialized to zeroes if EOF is encountered. 47697c478bd9Sstevel@tonic-gate */ 47707c478bd9Sstevel@tonic-gate io_len = ptob(btopr(io_len)); 47717c478bd9Sstevel@tonic-gate 47727c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, io_len, vp, B_READ); 47737c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 47747c478bd9Sstevel@tonic-gate 47757c478bd9Sstevel@tonic-gate /* 47767c478bd9Sstevel@tonic-gate * pageio_setup should have set b_addr to 0. This 47777c478bd9Sstevel@tonic-gate * is correct since we want to do I/O on a page 47787c478bd9Sstevel@tonic-gate * boundary. bp_mapin will use this addr to calculate 47797c478bd9Sstevel@tonic-gate * an offset, and then set b_addr to the kernel virtual 47807c478bd9Sstevel@tonic-gate * address it allocated for us. 47817c478bd9Sstevel@tonic-gate */ 47827c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == 0); 47837c478bd9Sstevel@tonic-gate 47847c478bd9Sstevel@tonic-gate bp->b_edev = 0; 47857c478bd9Sstevel@tonic-gate bp->b_dev = 0; 47867c478bd9Sstevel@tonic-gate bp->b_lblkno = lbtodb(io_off); 47877c478bd9Sstevel@tonic-gate bp->b_file = vp; 47887c478bd9Sstevel@tonic-gate bp->b_offset = (offset_t)off; 47897c478bd9Sstevel@tonic-gate bp_mapin(bp); 47907c478bd9Sstevel@tonic-gate 47917c478bd9Sstevel@tonic-gate /* 47927c478bd9Sstevel@tonic-gate * If doing a write beyond what we believe is EOF, 47937c478bd9Sstevel@tonic-gate * don't bother trying to read the pages from the 47947c478bd9Sstevel@tonic-gate * server, we'll just zero the pages here. We 47957c478bd9Sstevel@tonic-gate * don't check that the rw flag is S_WRITE here 47967c478bd9Sstevel@tonic-gate * because some implementations may attempt a 47977c478bd9Sstevel@tonic-gate * read access to the buffer before copying data. 47987c478bd9Sstevel@tonic-gate */ 47997c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 48007c478bd9Sstevel@tonic-gate if (io_off >= rp->r_size && seg == segkmap) { 48017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 48027c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, io_len); 48037c478bd9Sstevel@tonic-gate } else { 48047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 48057c478bd9Sstevel@tonic-gate error = nfs3_bio(bp, NULL, cr); 48067c478bd9Sstevel@tonic-gate } 48077c478bd9Sstevel@tonic-gate 48087c478bd9Sstevel@tonic-gate /* 48097c478bd9Sstevel@tonic-gate * Unmap the buffer before freeing it. 48107c478bd9Sstevel@tonic-gate */ 48117c478bd9Sstevel@tonic-gate bp_mapout(bp); 48127c478bd9Sstevel@tonic-gate pageio_done(bp); 48137c478bd9Sstevel@tonic-gate 48147c478bd9Sstevel@tonic-gate savepp = pp; 48157c478bd9Sstevel@tonic-gate do { 48167c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 48177c478bd9Sstevel@tonic-gate } while ((pp = pp->p_next) != savepp); 48187c478bd9Sstevel@tonic-gate 48197c478bd9Sstevel@tonic-gate if (error == NFS_EOF) { 48207c478bd9Sstevel@tonic-gate /* 48217c478bd9Sstevel@tonic-gate * If doing a write system call just return 48227c478bd9Sstevel@tonic-gate * zeroed pages, else user tried to get pages 48237c478bd9Sstevel@tonic-gate * beyond EOF, return error. We don't check 48247c478bd9Sstevel@tonic-gate * that the rw flag is S_WRITE here because 48257c478bd9Sstevel@tonic-gate * some implementations may attempt a read 48267c478bd9Sstevel@tonic-gate * access to the buffer before copying data. 48277c478bd9Sstevel@tonic-gate */ 48287c478bd9Sstevel@tonic-gate if (seg == segkmap) 48297c478bd9Sstevel@tonic-gate error = 0; 48307c478bd9Sstevel@tonic-gate else 48317c478bd9Sstevel@tonic-gate error = EFAULT; 48327c478bd9Sstevel@tonic-gate } 48337c478bd9Sstevel@tonic-gate 48347c478bd9Sstevel@tonic-gate if (!readahead_issued && !error) { 48357c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 48367c478bd9Sstevel@tonic-gate rp->r_nextr = io_off + io_len; 48377c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 48387c478bd9Sstevel@tonic-gate } 48397c478bd9Sstevel@tonic-gate } 48407c478bd9Sstevel@tonic-gate } 48417c478bd9Sstevel@tonic-gate 48427c478bd9Sstevel@tonic-gate out: 48437c478bd9Sstevel@tonic-gate if (pl == NULL) 48447c478bd9Sstevel@tonic-gate return (error); 48457c478bd9Sstevel@tonic-gate 48467c478bd9Sstevel@tonic-gate if (error) { 48477c478bd9Sstevel@tonic-gate if (pp != NULL) 48487c478bd9Sstevel@tonic-gate pvn_read_done(pp, B_ERROR); 48497c478bd9Sstevel@tonic-gate return (error); 48507c478bd9Sstevel@tonic-gate } 48517c478bd9Sstevel@tonic-gate 48527c478bd9Sstevel@tonic-gate if (pagefound) { 48537c478bd9Sstevel@tonic-gate se_t se = (rw == S_CREATE ? SE_EXCL : SE_SHARED); 48547c478bd9Sstevel@tonic-gate 48557c478bd9Sstevel@tonic-gate /* 48567c478bd9Sstevel@tonic-gate * Page exists in the cache, acquire the appropriate lock. 48577c478bd9Sstevel@tonic-gate * If this fails, start all over again. 48587c478bd9Sstevel@tonic-gate */ 48597c478bd9Sstevel@tonic-gate if ((pp = page_lookup(vp, off, se)) == NULL) { 48607c478bd9Sstevel@tonic-gate #ifdef DEBUG 48617c478bd9Sstevel@tonic-gate nfs3_lostpage++; 48627c478bd9Sstevel@tonic-gate #endif 48637c478bd9Sstevel@tonic-gate goto reread; 48647c478bd9Sstevel@tonic-gate } 48657c478bd9Sstevel@tonic-gate pl[0] = pp; 48667c478bd9Sstevel@tonic-gate pl[1] = NULL; 48677c478bd9Sstevel@tonic-gate return (0); 48687c478bd9Sstevel@tonic-gate } 48697c478bd9Sstevel@tonic-gate 48707c478bd9Sstevel@tonic-gate if (pp != NULL) 48717c478bd9Sstevel@tonic-gate pvn_plist_init(pp, pl, plsz, off, io_len, rw); 48727c478bd9Sstevel@tonic-gate 48737c478bd9Sstevel@tonic-gate return (error); 48747c478bd9Sstevel@tonic-gate } 48757c478bd9Sstevel@tonic-gate 48767c478bd9Sstevel@tonic-gate static void 48777c478bd9Sstevel@tonic-gate nfs3_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr, struct seg *seg, 48787c478bd9Sstevel@tonic-gate cred_t *cr) 48797c478bd9Sstevel@tonic-gate { 48807c478bd9Sstevel@tonic-gate int error; 48817c478bd9Sstevel@tonic-gate page_t *pp; 48827c478bd9Sstevel@tonic-gate u_offset_t io_off; 48837c478bd9Sstevel@tonic-gate size_t io_len; 48847c478bd9Sstevel@tonic-gate struct buf *bp; 48857c478bd9Sstevel@tonic-gate uint_t bsize, blksize; 48867c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 48877c478bd9Sstevel@tonic-gate page_t *savepp; 48887c478bd9Sstevel@tonic-gate 4889108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 48907c478bd9Sstevel@tonic-gate bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE); 48917c478bd9Sstevel@tonic-gate 48927c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 48937c478bd9Sstevel@tonic-gate if (blkoff < rp->r_size && blkoff + bsize > rp->r_size) { 48947c478bd9Sstevel@tonic-gate /* 48957c478bd9Sstevel@tonic-gate * If less than a block left in file read less 48967c478bd9Sstevel@tonic-gate * than a block. 48977c478bd9Sstevel@tonic-gate */ 48987c478bd9Sstevel@tonic-gate blksize = rp->r_size - blkoff; 48997c478bd9Sstevel@tonic-gate } else 49007c478bd9Sstevel@tonic-gate blksize = bsize; 49017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 49027c478bd9Sstevel@tonic-gate 49037c478bd9Sstevel@tonic-gate pp = pvn_read_kluster(vp, blkoff, segkmap, addr, 49047c478bd9Sstevel@tonic-gate &io_off, &io_len, blkoff, blksize, 1); 49057c478bd9Sstevel@tonic-gate /* 49067c478bd9Sstevel@tonic-gate * The isra flag passed to the kluster function is 1, we may have 49077c478bd9Sstevel@tonic-gate * gotten a return value of NULL for a variety of reasons (# of free 49087c478bd9Sstevel@tonic-gate * pages < minfree, someone entered the page on the vnode etc). In all 49097c478bd9Sstevel@tonic-gate * cases, we want to punt on the readahead. 49107c478bd9Sstevel@tonic-gate */ 49117c478bd9Sstevel@tonic-gate if (pp == NULL) 49127c478bd9Sstevel@tonic-gate return; 49137c478bd9Sstevel@tonic-gate 49147c478bd9Sstevel@tonic-gate /* 49157c478bd9Sstevel@tonic-gate * Now round the request size up to page boundaries. 49167c478bd9Sstevel@tonic-gate * This ensures that the entire page will be 49177c478bd9Sstevel@tonic-gate * initialized to zeroes if EOF is encountered. 49187c478bd9Sstevel@tonic-gate */ 49197c478bd9Sstevel@tonic-gate io_len = ptob(btopr(io_len)); 49207c478bd9Sstevel@tonic-gate 49217c478bd9Sstevel@tonic-gate bp = pageio_setup(pp, io_len, vp, B_READ); 49227c478bd9Sstevel@tonic-gate ASSERT(bp != NULL); 49237c478bd9Sstevel@tonic-gate 49247c478bd9Sstevel@tonic-gate /* 49257c478bd9Sstevel@tonic-gate * pageio_setup should have set b_addr to 0. This is correct since 49267c478bd9Sstevel@tonic-gate * we want to do I/O on a page boundary. bp_mapin() will use this addr 49277c478bd9Sstevel@tonic-gate * to calculate an offset, and then set b_addr to the kernel virtual 49287c478bd9Sstevel@tonic-gate * address it allocated for us. 49297c478bd9Sstevel@tonic-gate */ 49307c478bd9Sstevel@tonic-gate ASSERT(bp->b_un.b_addr == 0); 49317c478bd9Sstevel@tonic-gate 49327c478bd9Sstevel@tonic-gate bp->b_edev = 0; 49337c478bd9Sstevel@tonic-gate bp->b_dev = 0; 49347c478bd9Sstevel@tonic-gate bp->b_lblkno = lbtodb(io_off); 49357c478bd9Sstevel@tonic-gate bp->b_file = vp; 49367c478bd9Sstevel@tonic-gate bp->b_offset = (offset_t)blkoff; 49377c478bd9Sstevel@tonic-gate bp_mapin(bp); 49387c478bd9Sstevel@tonic-gate 49397c478bd9Sstevel@tonic-gate /* 49407c478bd9Sstevel@tonic-gate * If doing a write beyond what we believe is EOF, don't bother trying 49417c478bd9Sstevel@tonic-gate * to read the pages from the server, we'll just zero the pages here. 49427c478bd9Sstevel@tonic-gate * We don't check that the rw flag is S_WRITE here because some 49437c478bd9Sstevel@tonic-gate * implementations may attempt a read access to the buffer before 49447c478bd9Sstevel@tonic-gate * copying data. 49457c478bd9Sstevel@tonic-gate */ 49467c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 49477c478bd9Sstevel@tonic-gate if (io_off >= rp->r_size && seg == segkmap) { 49487c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 49497c478bd9Sstevel@tonic-gate bzero(bp->b_un.b_addr, io_len); 49507c478bd9Sstevel@tonic-gate error = 0; 49517c478bd9Sstevel@tonic-gate } else { 49527c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 49537c478bd9Sstevel@tonic-gate error = nfs3_bio(bp, NULL, cr); 49547c478bd9Sstevel@tonic-gate if (error == NFS_EOF) 49557c478bd9Sstevel@tonic-gate error = 0; 49567c478bd9Sstevel@tonic-gate } 49577c478bd9Sstevel@tonic-gate 49587c478bd9Sstevel@tonic-gate /* 49597c478bd9Sstevel@tonic-gate * Unmap the buffer before freeing it. 49607c478bd9Sstevel@tonic-gate */ 49617c478bd9Sstevel@tonic-gate bp_mapout(bp); 49627c478bd9Sstevel@tonic-gate pageio_done(bp); 49637c478bd9Sstevel@tonic-gate 49647c478bd9Sstevel@tonic-gate savepp = pp; 49657c478bd9Sstevel@tonic-gate do { 49667c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 49677c478bd9Sstevel@tonic-gate } while ((pp = pp->p_next) != savepp); 49687c478bd9Sstevel@tonic-gate 49697c478bd9Sstevel@tonic-gate pvn_read_done(pp, error ? B_READ | B_ERROR : B_READ); 49707c478bd9Sstevel@tonic-gate 49717c478bd9Sstevel@tonic-gate /* 49727c478bd9Sstevel@tonic-gate * In case of error set readahead offset 49737c478bd9Sstevel@tonic-gate * to the lowest offset. 49747c478bd9Sstevel@tonic-gate * pvn_read_done() calls VN_DISPOSE to destroy the pages 49757c478bd9Sstevel@tonic-gate */ 49767c478bd9Sstevel@tonic-gate if (error && rp->r_nextr > io_off) { 49777c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 49787c478bd9Sstevel@tonic-gate if (rp->r_nextr > io_off) 49797c478bd9Sstevel@tonic-gate rp->r_nextr = io_off; 49807c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 49817c478bd9Sstevel@tonic-gate } 49827c478bd9Sstevel@tonic-gate } 49837c478bd9Sstevel@tonic-gate 49847c478bd9Sstevel@tonic-gate /* 49857c478bd9Sstevel@tonic-gate * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE} 49867c478bd9Sstevel@tonic-gate * If len == 0, do from off to EOF. 49877c478bd9Sstevel@tonic-gate * 49887c478bd9Sstevel@tonic-gate * The normal cases should be len == 0 && off == 0 (entire vp list), 49897c478bd9Sstevel@tonic-gate * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE 49907c478bd9Sstevel@tonic-gate * (from pageout). 49917c478bd9Sstevel@tonic-gate */ 4992da6c28aaSamw /* ARGSUSED */ 49937c478bd9Sstevel@tonic-gate static int 4994da6c28aaSamw nfs3_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr, 4995da6c28aaSamw caller_context_t *ct) 49967c478bd9Sstevel@tonic-gate { 49977c478bd9Sstevel@tonic-gate int error; 49987c478bd9Sstevel@tonic-gate rnode_t *rp; 49997c478bd9Sstevel@tonic-gate 50007c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 50017c478bd9Sstevel@tonic-gate 50027c478bd9Sstevel@tonic-gate /* 50037c478bd9Sstevel@tonic-gate * XXX - Why should this check be made here? 50047c478bd9Sstevel@tonic-gate */ 50057c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 50067c478bd9Sstevel@tonic-gate return (ENOSYS); 50077c478bd9Sstevel@tonic-gate if (len == 0 && !(flags & B_INVAL) && vn_is_readonly(vp)) 50087c478bd9Sstevel@tonic-gate return (0); 5009108322fbScarlsonj if (!(flags & B_ASYNC) && nfs_zone() != VTOMI(vp)->mi_zone) 50107c478bd9Sstevel@tonic-gate return (EIO); 50117c478bd9Sstevel@tonic-gate 50127c478bd9Sstevel@tonic-gate rp = VTOR(vp); 50137c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 50147c478bd9Sstevel@tonic-gate rp->r_count++; 50157c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 50167c478bd9Sstevel@tonic-gate error = nfs_putpages(vp, off, len, flags, cr); 50177c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 50187c478bd9Sstevel@tonic-gate rp->r_count--; 50197c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 50207c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 50217c478bd9Sstevel@tonic-gate 50227c478bd9Sstevel@tonic-gate return (error); 50237c478bd9Sstevel@tonic-gate } 50247c478bd9Sstevel@tonic-gate 50257c478bd9Sstevel@tonic-gate /* 50267c478bd9Sstevel@tonic-gate * Write out a single page, possibly klustering adjacent dirty pages. 50277c478bd9Sstevel@tonic-gate */ 50287c478bd9Sstevel@tonic-gate int 50297c478bd9Sstevel@tonic-gate nfs3_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, 50307c478bd9Sstevel@tonic-gate int flags, cred_t *cr) 50317c478bd9Sstevel@tonic-gate { 50327c478bd9Sstevel@tonic-gate u_offset_t io_off; 50337c478bd9Sstevel@tonic-gate u_offset_t lbn_off; 50347c478bd9Sstevel@tonic-gate u_offset_t lbn; 50357c478bd9Sstevel@tonic-gate size_t io_len; 50367c478bd9Sstevel@tonic-gate uint_t bsize; 50377c478bd9Sstevel@tonic-gate int error; 50387c478bd9Sstevel@tonic-gate rnode_t *rp; 50397c478bd9Sstevel@tonic-gate 50407c478bd9Sstevel@tonic-gate ASSERT(!vn_is_readonly(vp)); 50417c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 50427c478bd9Sstevel@tonic-gate ASSERT(cr != NULL); 5043108322fbScarlsonj ASSERT((flags & B_ASYNC) || nfs_zone() == VTOMI(vp)->mi_zone); 50447c478bd9Sstevel@tonic-gate 50457c478bd9Sstevel@tonic-gate rp = VTOR(vp); 50467c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0); 50477c478bd9Sstevel@tonic-gate 50487c478bd9Sstevel@tonic-gate bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE); 50497c478bd9Sstevel@tonic-gate lbn = pp->p_offset / bsize; 50507c478bd9Sstevel@tonic-gate lbn_off = lbn * bsize; 50517c478bd9Sstevel@tonic-gate 50527c478bd9Sstevel@tonic-gate /* 50537c478bd9Sstevel@tonic-gate * Find a kluster that fits in one block, or in 50547c478bd9Sstevel@tonic-gate * one page if pages are bigger than blocks. If 50557c478bd9Sstevel@tonic-gate * there is less file space allocated than a whole 50567c478bd9Sstevel@tonic-gate * page, we'll shorten the i/o request below. 50577c478bd9Sstevel@tonic-gate */ 50587c478bd9Sstevel@tonic-gate pp = pvn_write_kluster(vp, pp, &io_off, &io_len, lbn_off, 50597c478bd9Sstevel@tonic-gate roundup(bsize, PAGESIZE), flags); 50607c478bd9Sstevel@tonic-gate 50617c478bd9Sstevel@tonic-gate /* 50627c478bd9Sstevel@tonic-gate * pvn_write_kluster shouldn't have returned a page with offset 50637c478bd9Sstevel@tonic-gate * behind the original page we were given. Verify that. 50647c478bd9Sstevel@tonic-gate */ 50657c478bd9Sstevel@tonic-gate ASSERT((pp->p_offset / bsize) >= lbn); 50667c478bd9Sstevel@tonic-gate 50677c478bd9Sstevel@tonic-gate /* 50687c478bd9Sstevel@tonic-gate * Now pp will have the list of kept dirty pages marked for 50697c478bd9Sstevel@tonic-gate * write back. It will also handle invalidation and freeing 50707c478bd9Sstevel@tonic-gate * of pages that are not dirty. Check for page length rounding 50717c478bd9Sstevel@tonic-gate * problems. 50727c478bd9Sstevel@tonic-gate */ 50737c478bd9Sstevel@tonic-gate if (io_off + io_len > lbn_off + bsize) { 50747c478bd9Sstevel@tonic-gate ASSERT((io_off + io_len) - (lbn_off + bsize) < PAGESIZE); 50757c478bd9Sstevel@tonic-gate io_len = lbn_off + bsize - io_off; 50767c478bd9Sstevel@tonic-gate } 50777c478bd9Sstevel@tonic-gate /* 50787c478bd9Sstevel@tonic-gate * The RMODINPROGRESS flag makes sure that nfs(3)_bio() sees a 50797c478bd9Sstevel@tonic-gate * consistent value of r_size. RMODINPROGRESS is set in writerp(). 50807c478bd9Sstevel@tonic-gate * When RMODINPROGRESS is set it indicates that a uiomove() is in 50817c478bd9Sstevel@tonic-gate * progress and the r_size has not been made consistent with the 50827c478bd9Sstevel@tonic-gate * new size of the file. When the uiomove() completes the r_size is 50837c478bd9Sstevel@tonic-gate * updated and the RMODINPROGRESS flag is cleared. 50847c478bd9Sstevel@tonic-gate * 50857c478bd9Sstevel@tonic-gate * The RMODINPROGRESS flag makes sure that nfs(3)_bio() sees a 50867c478bd9Sstevel@tonic-gate * consistent value of r_size. Without this handshaking, it is 50877c478bd9Sstevel@tonic-gate * possible that nfs(3)_bio() picks up the old value of r_size 50887c478bd9Sstevel@tonic-gate * before the uiomove() in writerp() completes. This will result 50897c478bd9Sstevel@tonic-gate * in the write through nfs(3)_bio() being dropped. 50907c478bd9Sstevel@tonic-gate * 50917c478bd9Sstevel@tonic-gate * More precisely, there is a window between the time the uiomove() 50927c478bd9Sstevel@tonic-gate * completes and the time the r_size is updated. If a VOP_PUTPAGE() 50937c478bd9Sstevel@tonic-gate * operation intervenes in this window, the page will be picked up, 50947c478bd9Sstevel@tonic-gate * because it is dirty (it will be unlocked, unless it was 50957c478bd9Sstevel@tonic-gate * pagecreate'd). When the page is picked up as dirty, the dirty 50967c478bd9Sstevel@tonic-gate * bit is reset (pvn_getdirty()). In nfs(3)write(), r_size is 50977c478bd9Sstevel@tonic-gate * checked. This will still be the old size. Therefore the page will 50987c478bd9Sstevel@tonic-gate * not be written out. When segmap_release() calls VOP_PUTPAGE(), 50997c478bd9Sstevel@tonic-gate * the page will be found to be clean and the write will be dropped. 51007c478bd9Sstevel@tonic-gate */ 51017c478bd9Sstevel@tonic-gate if (rp->r_flags & RMODINPROGRESS) { 51027c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 51037c478bd9Sstevel@tonic-gate if ((rp->r_flags & RMODINPROGRESS) && 51047c478bd9Sstevel@tonic-gate rp->r_modaddr + MAXBSIZE > io_off && 51057c478bd9Sstevel@tonic-gate rp->r_modaddr < io_off + io_len) { 51067c478bd9Sstevel@tonic-gate page_t *plist; 51077c478bd9Sstevel@tonic-gate /* 51087c478bd9Sstevel@tonic-gate * A write is in progress for this region of the file. 51097c478bd9Sstevel@tonic-gate * If we did not detect RMODINPROGRESS here then this 51107c478bd9Sstevel@tonic-gate * path through nfs_putapage() would eventually go to 51117c478bd9Sstevel@tonic-gate * nfs(3)_bio() and may not write out all of the data 51127c478bd9Sstevel@tonic-gate * in the pages. We end up losing data. So we decide 51137c478bd9Sstevel@tonic-gate * to set the modified bit on each page in the page 51147c478bd9Sstevel@tonic-gate * list and mark the rnode with RDIRTY. This write 51157c478bd9Sstevel@tonic-gate * will be restarted at some later time. 51167c478bd9Sstevel@tonic-gate */ 51177c478bd9Sstevel@tonic-gate plist = pp; 51187c478bd9Sstevel@tonic-gate while (plist != NULL) { 51197c478bd9Sstevel@tonic-gate pp = plist; 51207c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 51217c478bd9Sstevel@tonic-gate hat_setmod(pp); 51227c478bd9Sstevel@tonic-gate page_io_unlock(pp); 51237c478bd9Sstevel@tonic-gate page_unlock(pp); 51247c478bd9Sstevel@tonic-gate } 51257c478bd9Sstevel@tonic-gate rp->r_flags |= RDIRTY; 51267c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 51277c478bd9Sstevel@tonic-gate if (offp) 51287c478bd9Sstevel@tonic-gate *offp = io_off; 51297c478bd9Sstevel@tonic-gate if (lenp) 51307c478bd9Sstevel@tonic-gate *lenp = io_len; 51317c478bd9Sstevel@tonic-gate return (0); 51327c478bd9Sstevel@tonic-gate } 51337c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 51347c478bd9Sstevel@tonic-gate } 51357c478bd9Sstevel@tonic-gate 51367c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 51377c478bd9Sstevel@tonic-gate error = nfs_async_putapage(vp, pp, io_off, io_len, flags, cr, 51387c478bd9Sstevel@tonic-gate nfs3_sync_putapage); 51397c478bd9Sstevel@tonic-gate } else 51407c478bd9Sstevel@tonic-gate error = nfs3_sync_putapage(vp, pp, io_off, io_len, flags, cr); 51417c478bd9Sstevel@tonic-gate 51427c478bd9Sstevel@tonic-gate if (offp) 51437c478bd9Sstevel@tonic-gate *offp = io_off; 51447c478bd9Sstevel@tonic-gate if (lenp) 51457c478bd9Sstevel@tonic-gate *lenp = io_len; 51467c478bd9Sstevel@tonic-gate return (error); 51477c478bd9Sstevel@tonic-gate } 51487c478bd9Sstevel@tonic-gate 51497c478bd9Sstevel@tonic-gate static int 51507c478bd9Sstevel@tonic-gate nfs3_sync_putapage(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, 51517c478bd9Sstevel@tonic-gate int flags, cred_t *cr) 51527c478bd9Sstevel@tonic-gate { 51537c478bd9Sstevel@tonic-gate int error; 51547c478bd9Sstevel@tonic-gate rnode_t *rp; 51557c478bd9Sstevel@tonic-gate 5156108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 51577c478bd9Sstevel@tonic-gate 51587c478bd9Sstevel@tonic-gate flags |= B_WRITE; 51597c478bd9Sstevel@tonic-gate 51607c478bd9Sstevel@tonic-gate error = nfs3_rdwrlbn(vp, pp, io_off, io_len, flags, cr); 51617c478bd9Sstevel@tonic-gate 51627c478bd9Sstevel@tonic-gate rp = VTOR(vp); 51637c478bd9Sstevel@tonic-gate 51647c478bd9Sstevel@tonic-gate if ((error == ENOSPC || error == EDQUOT || error == EFBIG || 51657c478bd9Sstevel@tonic-gate error == EACCES) && 51667c478bd9Sstevel@tonic-gate (flags & (B_INVAL|B_FORCE)) != (B_INVAL|B_FORCE)) { 51677c478bd9Sstevel@tonic-gate if (!(rp->r_flags & ROUTOFSPACE)) { 51687c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 51697c478bd9Sstevel@tonic-gate rp->r_flags |= ROUTOFSPACE; 51707c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 51717c478bd9Sstevel@tonic-gate } 51727c478bd9Sstevel@tonic-gate flags |= B_ERROR; 51737c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags); 51747c478bd9Sstevel@tonic-gate /* 51757c478bd9Sstevel@tonic-gate * If this was not an async thread, then try again to 51767c478bd9Sstevel@tonic-gate * write out the pages, but this time, also destroy 51777c478bd9Sstevel@tonic-gate * them whether or not the write is successful. This 51787c478bd9Sstevel@tonic-gate * will prevent memory from filling up with these 51797c478bd9Sstevel@tonic-gate * pages and destroying them is the only alternative 51807c478bd9Sstevel@tonic-gate * if they can't be written out. 51817c478bd9Sstevel@tonic-gate * 51827c478bd9Sstevel@tonic-gate * Don't do this if this is an async thread because 51837c478bd9Sstevel@tonic-gate * when the pages are unlocked in pvn_write_done, 51847c478bd9Sstevel@tonic-gate * some other thread could have come along, locked 51857c478bd9Sstevel@tonic-gate * them, and queued for an async thread. It would be 51867c478bd9Sstevel@tonic-gate * possible for all of the async threads to be tied 51877c478bd9Sstevel@tonic-gate * up waiting to lock the pages again and they would 51887c478bd9Sstevel@tonic-gate * all already be locked and waiting for an async 51897c478bd9Sstevel@tonic-gate * thread to handle them. Deadlock. 51907c478bd9Sstevel@tonic-gate */ 51917c478bd9Sstevel@tonic-gate if (!(flags & B_ASYNC)) { 51927c478bd9Sstevel@tonic-gate error = nfs3_putpage(vp, io_off, io_len, 5193da6c28aaSamw B_INVAL | B_FORCE, cr, NULL); 51947c478bd9Sstevel@tonic-gate } 51957c478bd9Sstevel@tonic-gate } else { 51967c478bd9Sstevel@tonic-gate if (error) 51977c478bd9Sstevel@tonic-gate flags |= B_ERROR; 51987c478bd9Sstevel@tonic-gate else if (rp->r_flags & ROUTOFSPACE) { 51997c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 52007c478bd9Sstevel@tonic-gate rp->r_flags &= ~ROUTOFSPACE; 52017c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 52027c478bd9Sstevel@tonic-gate } 52037c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags); 52047c478bd9Sstevel@tonic-gate if (freemem < desfree) 52057c478bd9Sstevel@tonic-gate (void) nfs3_commit_vp(vp, (u_offset_t)0, 0, cr); 52067c478bd9Sstevel@tonic-gate } 52077c478bd9Sstevel@tonic-gate 52087c478bd9Sstevel@tonic-gate return (error); 52097c478bd9Sstevel@tonic-gate } 52107c478bd9Sstevel@tonic-gate 5211da6c28aaSamw /* ARGSUSED */ 52127c478bd9Sstevel@tonic-gate static int 52137c478bd9Sstevel@tonic-gate nfs3_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 5214da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 5215da6c28aaSamw cred_t *cr, caller_context_t *ct) 52167c478bd9Sstevel@tonic-gate { 52177c478bd9Sstevel@tonic-gate struct segvn_crargs vn_a; 52187c478bd9Sstevel@tonic-gate int error; 52197c478bd9Sstevel@tonic-gate rnode_t *rp; 52207c478bd9Sstevel@tonic-gate struct vattr va; 52217c478bd9Sstevel@tonic-gate 5222108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 52237c478bd9Sstevel@tonic-gate return (EIO); 52247c478bd9Sstevel@tonic-gate 52257c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 52267c478bd9Sstevel@tonic-gate return (ENOSYS); 52277c478bd9Sstevel@tonic-gate 52287c478bd9Sstevel@tonic-gate if (off < 0 || off + len < 0) 52297c478bd9Sstevel@tonic-gate return (ENXIO); 52307c478bd9Sstevel@tonic-gate 52317c478bd9Sstevel@tonic-gate if (vp->v_type != VREG) 52327c478bd9Sstevel@tonic-gate return (ENODEV); 52337c478bd9Sstevel@tonic-gate 52347c478bd9Sstevel@tonic-gate /* 52357c478bd9Sstevel@tonic-gate * If there is cached data and if close-to-open consistency 52367c478bd9Sstevel@tonic-gate * checking is not turned off and if the file system is not 52377c478bd9Sstevel@tonic-gate * mounted readonly, then force an over the wire getattr. 52387c478bd9Sstevel@tonic-gate * Otherwise, just invoke nfs3getattr to get a copy of the 52397c478bd9Sstevel@tonic-gate * attributes. The attribute cache will be used unless it 52407c478bd9Sstevel@tonic-gate * is timed out and if it is, then an over the wire getattr 52417c478bd9Sstevel@tonic-gate * will be issued. 52427c478bd9Sstevel@tonic-gate */ 52437c478bd9Sstevel@tonic-gate va.va_mask = AT_ALL; 52447c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp) && 52457c478bd9Sstevel@tonic-gate !(VTOMI(vp)->mi_flags & MI_NOCTO) && !vn_is_readonly(vp)) 52467c478bd9Sstevel@tonic-gate error = nfs3_getattr_otw(vp, &va, cr); 52477c478bd9Sstevel@tonic-gate else 52487c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &va, cr); 52497c478bd9Sstevel@tonic-gate if (error) 52507c478bd9Sstevel@tonic-gate return (error); 52517c478bd9Sstevel@tonic-gate 52527c478bd9Sstevel@tonic-gate /* 52537c478bd9Sstevel@tonic-gate * Check to see if the vnode is currently marked as not cachable. 52547c478bd9Sstevel@tonic-gate * This means portions of the file are locked (through VOP_FRLOCK). 52557c478bd9Sstevel@tonic-gate * In this case the map request must be refused. We use 52567c478bd9Sstevel@tonic-gate * rp->r_lkserlock to avoid a race with concurrent lock requests. 52577c478bd9Sstevel@tonic-gate */ 52587c478bd9Sstevel@tonic-gate rp = VTOR(vp); 52591384c586SDeepak Honnalli 52601384c586SDeepak Honnalli /* 52611384c586SDeepak Honnalli * Atomically increment r_inmap after acquiring r_rwlock. The 52621384c586SDeepak Honnalli * idea here is to acquire r_rwlock to block read/write and 52631384c586SDeepak Honnalli * not to protect r_inmap. r_inmap will inform nfs3_read/write() 52641384c586SDeepak Honnalli * that we are in nfs3_map(). Now, r_rwlock is acquired in order 52651384c586SDeepak Honnalli * and we can prevent the deadlock that would have occurred 52661384c586SDeepak Honnalli * when nfs3_addmap() would have acquired it out of order. 52671384c586SDeepak Honnalli * 52681384c586SDeepak Honnalli * Since we are not protecting r_inmap by any lock, we do not 52691384c586SDeepak Honnalli * hold any lock when we decrement it. We atomically decrement 52701384c586SDeepak Honnalli * r_inmap after we release r_lkserlock. 52711384c586SDeepak Honnalli */ 52721384c586SDeepak Honnalli 52731384c586SDeepak Honnalli if (nfs_rw_enter_sig(&rp->r_rwlock, RW_WRITER, INTR(vp))) 52747c478bd9Sstevel@tonic-gate return (EINTR); 52751a5e258fSJosef 'Jeff' Sipek atomic_inc_uint(&rp->r_inmap); 52761384c586SDeepak Honnalli nfs_rw_exit(&rp->r_rwlock); 52771384c586SDeepak Honnalli 52781384c586SDeepak Honnalli if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_READER, INTR(vp))) { 52791a5e258fSJosef 'Jeff' Sipek atomic_dec_uint(&rp->r_inmap); 52801384c586SDeepak Honnalli return (EINTR); 52811384c586SDeepak Honnalli } 52827c478bd9Sstevel@tonic-gate 52837c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOCACHE) { 52847c478bd9Sstevel@tonic-gate error = EAGAIN; 52857c478bd9Sstevel@tonic-gate goto done; 52867c478bd9Sstevel@tonic-gate } 52877c478bd9Sstevel@tonic-gate 52887c478bd9Sstevel@tonic-gate /* 52897c478bd9Sstevel@tonic-gate * Don't allow concurrent locks and mapping if mandatory locking is 52907c478bd9Sstevel@tonic-gate * enabled. 52917c478bd9Sstevel@tonic-gate */ 52927c478bd9Sstevel@tonic-gate if ((flk_has_remote_locks(vp) || lm_has_sleep(vp)) && 52937c478bd9Sstevel@tonic-gate MANDLOCK(vp, va.va_mode)) { 52947c478bd9Sstevel@tonic-gate error = EAGAIN; 52957c478bd9Sstevel@tonic-gate goto done; 52967c478bd9Sstevel@tonic-gate } 52977c478bd9Sstevel@tonic-gate 52987c478bd9Sstevel@tonic-gate as_rangelock(as); 529960946fe0Smec error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 530060946fe0Smec if (error != 0) { 53017c478bd9Sstevel@tonic-gate as_rangeunlock(as); 53027c478bd9Sstevel@tonic-gate goto done; 53037c478bd9Sstevel@tonic-gate } 53047c478bd9Sstevel@tonic-gate 53057c478bd9Sstevel@tonic-gate vn_a.vp = vp; 53067c478bd9Sstevel@tonic-gate vn_a.offset = off; 53077c478bd9Sstevel@tonic-gate vn_a.type = (flags & MAP_TYPE); 53087c478bd9Sstevel@tonic-gate vn_a.prot = (uchar_t)prot; 53097c478bd9Sstevel@tonic-gate vn_a.maxprot = (uchar_t)maxprot; 53107c478bd9Sstevel@tonic-gate vn_a.flags = (flags & ~MAP_TYPE); 53117c478bd9Sstevel@tonic-gate vn_a.cred = cr; 53127c478bd9Sstevel@tonic-gate vn_a.amp = NULL; 53137c478bd9Sstevel@tonic-gate vn_a.szc = 0; 53147c478bd9Sstevel@tonic-gate vn_a.lgrp_mem_policy_flags = 0; 53157c478bd9Sstevel@tonic-gate 53167c478bd9Sstevel@tonic-gate error = as_map(as, *addrp, len, segvn_create, &vn_a); 53177c478bd9Sstevel@tonic-gate as_rangeunlock(as); 53187c478bd9Sstevel@tonic-gate 53197c478bd9Sstevel@tonic-gate done: 53207c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_lkserlock); 53211a5e258fSJosef 'Jeff' Sipek atomic_dec_uint(&rp->r_inmap); 53227c478bd9Sstevel@tonic-gate return (error); 53237c478bd9Sstevel@tonic-gate } 53247c478bd9Sstevel@tonic-gate 53257c478bd9Sstevel@tonic-gate /* ARGSUSED */ 53267c478bd9Sstevel@tonic-gate static int 53277c478bd9Sstevel@tonic-gate nfs3_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5328da6c28aaSamw size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, 5329da6c28aaSamw cred_t *cr, caller_context_t *ct) 53307c478bd9Sstevel@tonic-gate { 53317c478bd9Sstevel@tonic-gate rnode_t *rp; 53327c478bd9Sstevel@tonic-gate 53337c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 53347c478bd9Sstevel@tonic-gate return (ENOSYS); 5335108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 53367c478bd9Sstevel@tonic-gate return (EIO); 53377c478bd9Sstevel@tonic-gate 53387c478bd9Sstevel@tonic-gate rp = VTOR(vp); 53397c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)&rp->r_mapcnt, btopr(len)); 53407c478bd9Sstevel@tonic-gate 53417c478bd9Sstevel@tonic-gate return (0); 53427c478bd9Sstevel@tonic-gate } 53437c478bd9Sstevel@tonic-gate 5344da6c28aaSamw /* ARGSUSED */ 53457c478bd9Sstevel@tonic-gate static int 53467c478bd9Sstevel@tonic-gate nfs3_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, 5347da6c28aaSamw offset_t offset, struct flk_callback *flk_cbp, cred_t *cr, 5348da6c28aaSamw caller_context_t *ct) 53497c478bd9Sstevel@tonic-gate { 53507c478bd9Sstevel@tonic-gate netobj lm_fh3; 53517c478bd9Sstevel@tonic-gate int rc; 53527c478bd9Sstevel@tonic-gate u_offset_t start, end; 53537c478bd9Sstevel@tonic-gate rnode_t *rp; 53547c478bd9Sstevel@tonic-gate int error = 0, intr = INTR(vp); 53557c478bd9Sstevel@tonic-gate 5356108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 53577c478bd9Sstevel@tonic-gate return (EIO); 53587c478bd9Sstevel@tonic-gate /* check for valid cmd parameter */ 53597c478bd9Sstevel@tonic-gate if (cmd != F_GETLK && cmd != F_SETLK && cmd != F_SETLKW) 53607c478bd9Sstevel@tonic-gate return (EINVAL); 53617c478bd9Sstevel@tonic-gate 53627c478bd9Sstevel@tonic-gate /* Verify l_type. */ 53637c478bd9Sstevel@tonic-gate switch (bfp->l_type) { 53647c478bd9Sstevel@tonic-gate case F_RDLCK: 53657c478bd9Sstevel@tonic-gate if (cmd != F_GETLK && !(flag & FREAD)) 53667c478bd9Sstevel@tonic-gate return (EBADF); 53677c478bd9Sstevel@tonic-gate break; 53687c478bd9Sstevel@tonic-gate case F_WRLCK: 53697c478bd9Sstevel@tonic-gate if (cmd != F_GETLK && !(flag & FWRITE)) 53707c478bd9Sstevel@tonic-gate return (EBADF); 53717c478bd9Sstevel@tonic-gate break; 53727c478bd9Sstevel@tonic-gate case F_UNLCK: 53737c478bd9Sstevel@tonic-gate intr = 0; 53747c478bd9Sstevel@tonic-gate break; 53757c478bd9Sstevel@tonic-gate 53767c478bd9Sstevel@tonic-gate default: 53777c478bd9Sstevel@tonic-gate return (EINVAL); 53787c478bd9Sstevel@tonic-gate } 53797c478bd9Sstevel@tonic-gate 53807c478bd9Sstevel@tonic-gate /* check the validity of the lock range */ 53817c478bd9Sstevel@tonic-gate if (rc = flk_convert_lock_data(vp, bfp, &start, &end, offset)) 53827c478bd9Sstevel@tonic-gate return (rc); 53837c478bd9Sstevel@tonic-gate if (rc = flk_check_lock_data(start, end, MAXEND)) 53847c478bd9Sstevel@tonic-gate return (rc); 53857c478bd9Sstevel@tonic-gate 53867c478bd9Sstevel@tonic-gate /* 53877c478bd9Sstevel@tonic-gate * If the filesystem is mounted using local locking, pass the 53887c478bd9Sstevel@tonic-gate * request off to the local locking code. 53897c478bd9Sstevel@tonic-gate */ 53907c478bd9Sstevel@tonic-gate if (VTOMI(vp)->mi_flags & MI_LLOCK) { 53917c478bd9Sstevel@tonic-gate if (cmd == F_SETLK || cmd == F_SETLKW) { 53927c478bd9Sstevel@tonic-gate /* 53937c478bd9Sstevel@tonic-gate * For complete safety, we should be holding 53947c478bd9Sstevel@tonic-gate * r_lkserlock. However, we can't call 53957c478bd9Sstevel@tonic-gate * lm_safelock and then fs_frlock while 53967c478bd9Sstevel@tonic-gate * holding r_lkserlock, so just invoke 53977c478bd9Sstevel@tonic-gate * lm_safelock and expect that this will 53987c478bd9Sstevel@tonic-gate * catch enough of the cases. 53997c478bd9Sstevel@tonic-gate */ 54007c478bd9Sstevel@tonic-gate if (!lm_safelock(vp, bfp, cr)) 54017c478bd9Sstevel@tonic-gate return (EAGAIN); 54027c478bd9Sstevel@tonic-gate } 5403da6c28aaSamw return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); 54047c478bd9Sstevel@tonic-gate } 54057c478bd9Sstevel@tonic-gate 54067c478bd9Sstevel@tonic-gate rp = VTOR(vp); 54077c478bd9Sstevel@tonic-gate 54087c478bd9Sstevel@tonic-gate /* 54097c478bd9Sstevel@tonic-gate * Check whether the given lock request can proceed, given the 54107c478bd9Sstevel@tonic-gate * current file mappings. 54117c478bd9Sstevel@tonic-gate */ 54127c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&rp->r_lkserlock, RW_WRITER, intr)) 54137c478bd9Sstevel@tonic-gate return (EINTR); 54147c478bd9Sstevel@tonic-gate if (cmd == F_SETLK || cmd == F_SETLKW) { 54157c478bd9Sstevel@tonic-gate if (!lm_safelock(vp, bfp, cr)) { 54167c478bd9Sstevel@tonic-gate rc = EAGAIN; 54177c478bd9Sstevel@tonic-gate goto done; 54187c478bd9Sstevel@tonic-gate } 54197c478bd9Sstevel@tonic-gate } 54207c478bd9Sstevel@tonic-gate 54217c478bd9Sstevel@tonic-gate /* 54227c478bd9Sstevel@tonic-gate * Flush the cache after waiting for async I/O to finish. For new 54237c478bd9Sstevel@tonic-gate * locks, this is so that the process gets the latest bits from the 54247c478bd9Sstevel@tonic-gate * server. For unlocks, this is so that other clients see the 54257c478bd9Sstevel@tonic-gate * latest bits once the file has been unlocked. If currently dirty 54267c478bd9Sstevel@tonic-gate * pages can't be flushed, then don't allow a lock to be set. But 54277c478bd9Sstevel@tonic-gate * allow unlocks to succeed, to avoid having orphan locks on the 54287c478bd9Sstevel@tonic-gate * server. 54297c478bd9Sstevel@tonic-gate */ 54307c478bd9Sstevel@tonic-gate if (cmd != F_GETLK) { 54317c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 54327c478bd9Sstevel@tonic-gate while (rp->r_count > 0) { 54337c478bd9Sstevel@tonic-gate if (intr) { 54347c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 54357c478bd9Sstevel@tonic-gate 54367c478bd9Sstevel@tonic-gate if (lwp != NULL) 54377c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 543893aeed83Smarks if (cv_wait_sig(&rp->r_cv, 543993aeed83Smarks &rp->r_statelock) == 0) { 54407c478bd9Sstevel@tonic-gate if (lwp != NULL) 54417c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 54427c478bd9Sstevel@tonic-gate rc = EINTR; 54437c478bd9Sstevel@tonic-gate break; 54447c478bd9Sstevel@tonic-gate } 54457c478bd9Sstevel@tonic-gate if (lwp != NULL) 54467c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 54477c478bd9Sstevel@tonic-gate } else 54487c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 54497c478bd9Sstevel@tonic-gate } 54507c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 54517c478bd9Sstevel@tonic-gate if (rc != 0) 54527c478bd9Sstevel@tonic-gate goto done; 5453da6c28aaSamw error = nfs3_putpage(vp, (offset_t)0, 0, B_INVAL, cr, ct); 54547c478bd9Sstevel@tonic-gate if (error) { 54557c478bd9Sstevel@tonic-gate if (error == ENOSPC || error == EDQUOT) { 54567c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 54577c478bd9Sstevel@tonic-gate if (!rp->r_error) 54587c478bd9Sstevel@tonic-gate rp->r_error = error; 54597c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 54607c478bd9Sstevel@tonic-gate } 54617c478bd9Sstevel@tonic-gate if (bfp->l_type != F_UNLCK) { 54627c478bd9Sstevel@tonic-gate rc = ENOLCK; 54637c478bd9Sstevel@tonic-gate goto done; 54647c478bd9Sstevel@tonic-gate } 54657c478bd9Sstevel@tonic-gate } 54667c478bd9Sstevel@tonic-gate } 54677c478bd9Sstevel@tonic-gate 54687c478bd9Sstevel@tonic-gate lm_fh3.n_len = VTOFH3(vp)->fh3_length; 54697c478bd9Sstevel@tonic-gate lm_fh3.n_bytes = (char *)&(VTOFH3(vp)->fh3_u.data); 54707c478bd9Sstevel@tonic-gate 54717c478bd9Sstevel@tonic-gate /* 54727c478bd9Sstevel@tonic-gate * Call the lock manager to do the real work of contacting 54737c478bd9Sstevel@tonic-gate * the server and obtaining the lock. 54747c478bd9Sstevel@tonic-gate */ 54757c478bd9Sstevel@tonic-gate rc = lm4_frlock(vp, cmd, bfp, flag, offset, cr, &lm_fh3, flk_cbp); 54767c478bd9Sstevel@tonic-gate 54777c478bd9Sstevel@tonic-gate if (rc == 0) 54787c478bd9Sstevel@tonic-gate nfs_lockcompletion(vp, cmd); 54797c478bd9Sstevel@tonic-gate 54807c478bd9Sstevel@tonic-gate done: 54817c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_lkserlock); 54827c478bd9Sstevel@tonic-gate return (rc); 54837c478bd9Sstevel@tonic-gate } 54847c478bd9Sstevel@tonic-gate 54857c478bd9Sstevel@tonic-gate /* 54867c478bd9Sstevel@tonic-gate * Free storage space associated with the specified vnode. The portion 54877c478bd9Sstevel@tonic-gate * to be freed is specified by bfp->l_start and bfp->l_len (already 54887c478bd9Sstevel@tonic-gate * normalized to a "whence" of 0). 54897c478bd9Sstevel@tonic-gate * 54907c478bd9Sstevel@tonic-gate * This is an experimental facility whose continued existence is not 54917c478bd9Sstevel@tonic-gate * guaranteed. Currently, we only support the special case 54927c478bd9Sstevel@tonic-gate * of l_len == 0, meaning free to end of file. 54937c478bd9Sstevel@tonic-gate */ 54947c478bd9Sstevel@tonic-gate /* ARGSUSED */ 54957c478bd9Sstevel@tonic-gate static int 54967c478bd9Sstevel@tonic-gate nfs3_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, 54977c478bd9Sstevel@tonic-gate offset_t offset, cred_t *cr, caller_context_t *ct) 54987c478bd9Sstevel@tonic-gate { 54997c478bd9Sstevel@tonic-gate int error; 55007c478bd9Sstevel@tonic-gate 55017c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VREG); 55027c478bd9Sstevel@tonic-gate if (cmd != F_FREESP) 55037c478bd9Sstevel@tonic-gate return (EINVAL); 5504108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 55057c478bd9Sstevel@tonic-gate return (EIO); 55067c478bd9Sstevel@tonic-gate 55077c478bd9Sstevel@tonic-gate error = convoff(vp, bfp, 0, offset); 55087c478bd9Sstevel@tonic-gate if (!error) { 55097c478bd9Sstevel@tonic-gate ASSERT(bfp->l_start >= 0); 55107c478bd9Sstevel@tonic-gate if (bfp->l_len == 0) { 55117c478bd9Sstevel@tonic-gate struct vattr va; 55127c478bd9Sstevel@tonic-gate 55137c478bd9Sstevel@tonic-gate /* 55147c478bd9Sstevel@tonic-gate * ftruncate should not change the ctime and 55157c478bd9Sstevel@tonic-gate * mtime if we truncate the file to its 55167c478bd9Sstevel@tonic-gate * previous size. 55177c478bd9Sstevel@tonic-gate */ 55187c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 55197c478bd9Sstevel@tonic-gate error = nfs3getattr(vp, &va, cr); 55207c478bd9Sstevel@tonic-gate if (error || va.va_size == bfp->l_start) 55217c478bd9Sstevel@tonic-gate return (error); 55227c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 55237c478bd9Sstevel@tonic-gate va.va_size = bfp->l_start; 55247c478bd9Sstevel@tonic-gate error = nfs3setattr(vp, &va, 0, cr); 552572102e74SBryan Cantrill 552672102e74SBryan Cantrill if (error == 0 && bfp->l_start == 0) 552772102e74SBryan Cantrill vnevent_truncate(vp, ct); 55287c478bd9Sstevel@tonic-gate } else 55297c478bd9Sstevel@tonic-gate error = EINVAL; 55307c478bd9Sstevel@tonic-gate } 55317c478bd9Sstevel@tonic-gate 55327c478bd9Sstevel@tonic-gate return (error); 55337c478bd9Sstevel@tonic-gate } 55347c478bd9Sstevel@tonic-gate 55357c478bd9Sstevel@tonic-gate /* ARGSUSED */ 55367c478bd9Sstevel@tonic-gate static int 5537da6c28aaSamw nfs3_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct) 55387c478bd9Sstevel@tonic-gate { 55397c478bd9Sstevel@tonic-gate 55407c478bd9Sstevel@tonic-gate return (EINVAL); 55417c478bd9Sstevel@tonic-gate } 55427c478bd9Sstevel@tonic-gate 55437c478bd9Sstevel@tonic-gate /* 55447c478bd9Sstevel@tonic-gate * Setup and add an address space callback to do the work of the delmap call. 55457c478bd9Sstevel@tonic-gate * The callback will (and must be) deleted in the actual callback function. 55467c478bd9Sstevel@tonic-gate * 55477c478bd9Sstevel@tonic-gate * This is done in order to take care of the problem that we have with holding 55487c478bd9Sstevel@tonic-gate * the address space's a_lock for a long period of time (e.g. if the NFS server 55497c478bd9Sstevel@tonic-gate * is down). Callbacks will be executed in the address space code while the 55507c478bd9Sstevel@tonic-gate * a_lock is not held. Holding the address space's a_lock causes things such 55517c478bd9Sstevel@tonic-gate * as ps and fork to hang because they are trying to acquire this lock as well. 55527c478bd9Sstevel@tonic-gate */ 55537c478bd9Sstevel@tonic-gate /* ARGSUSED */ 55547c478bd9Sstevel@tonic-gate static int 55557c478bd9Sstevel@tonic-gate nfs3_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, 5556da6c28aaSamw size_t len, uint_t prot, uint_t maxprot, uint_t flags, 5557da6c28aaSamw cred_t *cr, caller_context_t *ct) 55587c478bd9Sstevel@tonic-gate { 55597c478bd9Sstevel@tonic-gate int caller_found; 55607c478bd9Sstevel@tonic-gate int error; 55617c478bd9Sstevel@tonic-gate rnode_t *rp; 55627c478bd9Sstevel@tonic-gate nfs_delmap_args_t *dmapp; 55637c478bd9Sstevel@tonic-gate nfs_delmapcall_t *delmap_call; 55647c478bd9Sstevel@tonic-gate 55657c478bd9Sstevel@tonic-gate if (vp->v_flag & VNOMAP) 55667c478bd9Sstevel@tonic-gate return (ENOSYS); 55677c478bd9Sstevel@tonic-gate /* 55687c478bd9Sstevel@tonic-gate * A process may not change zones if it has NFS pages mmap'ed 55697c478bd9Sstevel@tonic-gate * in, so we can't legitimately get here from the wrong zone. 55707c478bd9Sstevel@tonic-gate */ 5571108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 55727c478bd9Sstevel@tonic-gate 55737c478bd9Sstevel@tonic-gate rp = VTOR(vp); 55747c478bd9Sstevel@tonic-gate 55757c478bd9Sstevel@tonic-gate /* 55767c478bd9Sstevel@tonic-gate * The way that the address space of this process deletes its mapping 55777c478bd9Sstevel@tonic-gate * of this file is via the following call chains: 55787c478bd9Sstevel@tonic-gate * - as_free()->SEGOP_UNMAP()/segvn_unmap()->VOP_DELMAP()/nfs3_delmap() 55797c478bd9Sstevel@tonic-gate * - as_unmap()->SEGOP_UNMAP()/segvn_unmap()->VOP_DELMAP()/nfs3_delmap() 55807c478bd9Sstevel@tonic-gate * 55817c478bd9Sstevel@tonic-gate * With the use of address space callbacks we are allowed to drop the 55827c478bd9Sstevel@tonic-gate * address space lock, a_lock, while executing the NFS operations that 55837c478bd9Sstevel@tonic-gate * need to go over the wire. Returning EAGAIN to the caller of this 55847c478bd9Sstevel@tonic-gate * function is what drives the execution of the callback that we add 55857c478bd9Sstevel@tonic-gate * below. The callback will be executed by the address space code 55867c478bd9Sstevel@tonic-gate * after dropping the a_lock. When the callback is finished, since 55877c478bd9Sstevel@tonic-gate * we dropped the a_lock, it must be re-acquired and segvn_unmap() 55887c478bd9Sstevel@tonic-gate * is called again on the same segment to finish the rest of the work 55897c478bd9Sstevel@tonic-gate * that needs to happen during unmapping. 55907c478bd9Sstevel@tonic-gate * 55917c478bd9Sstevel@tonic-gate * This action of calling back into the segment driver causes 55927c478bd9Sstevel@tonic-gate * nfs3_delmap() to get called again, but since the callback was 55937c478bd9Sstevel@tonic-gate * already executed at this point, it already did the work and there 55947c478bd9Sstevel@tonic-gate * is nothing left for us to do. 55957c478bd9Sstevel@tonic-gate * 55967c478bd9Sstevel@tonic-gate * To Summarize: 55977c478bd9Sstevel@tonic-gate * - The first time nfs3_delmap is called by the current thread is when 55987c478bd9Sstevel@tonic-gate * we add the caller associated with this delmap to the delmap caller 55997c478bd9Sstevel@tonic-gate * list, add the callback, and return EAGAIN. 56007c478bd9Sstevel@tonic-gate * - The second time in this call chain when nfs3_delmap is called we 56017c478bd9Sstevel@tonic-gate * will find this caller in the delmap caller list and realize there 56027c478bd9Sstevel@tonic-gate * is no more work to do thus removing this caller from the list and 56037c478bd9Sstevel@tonic-gate * returning the error that was set in the callback execution. 56047c478bd9Sstevel@tonic-gate */ 56057c478bd9Sstevel@tonic-gate caller_found = nfs_find_and_delete_delmapcall(rp, &error); 56067c478bd9Sstevel@tonic-gate if (caller_found) { 56077c478bd9Sstevel@tonic-gate /* 56087c478bd9Sstevel@tonic-gate * 'error' is from the actual delmap operations. To avoid 56097c478bd9Sstevel@tonic-gate * hangs, we need to handle the return of EAGAIN differently 56107c478bd9Sstevel@tonic-gate * since this is what drives the callback execution. 56117c478bd9Sstevel@tonic-gate * In this case, we don't want to return EAGAIN and do the 56127c478bd9Sstevel@tonic-gate * callback execution because there are none to execute. 56137c478bd9Sstevel@tonic-gate */ 56147c478bd9Sstevel@tonic-gate if (error == EAGAIN) 56157c478bd9Sstevel@tonic-gate return (0); 56167c478bd9Sstevel@tonic-gate else 56177c478bd9Sstevel@tonic-gate return (error); 56187c478bd9Sstevel@tonic-gate } 56197c478bd9Sstevel@tonic-gate 56207c478bd9Sstevel@tonic-gate /* current caller was not in the list */ 56217c478bd9Sstevel@tonic-gate delmap_call = nfs_init_delmapcall(); 56227c478bd9Sstevel@tonic-gate 56237c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 56247c478bd9Sstevel@tonic-gate list_insert_tail(&rp->r_indelmap, delmap_call); 56257c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 56267c478bd9Sstevel@tonic-gate 56277c478bd9Sstevel@tonic-gate dmapp = kmem_alloc(sizeof (nfs_delmap_args_t), KM_SLEEP); 56287c478bd9Sstevel@tonic-gate 56297c478bd9Sstevel@tonic-gate dmapp->vp = vp; 56307c478bd9Sstevel@tonic-gate dmapp->off = off; 56317c478bd9Sstevel@tonic-gate dmapp->addr = addr; 56327c478bd9Sstevel@tonic-gate dmapp->len = len; 56337c478bd9Sstevel@tonic-gate dmapp->prot = prot; 56347c478bd9Sstevel@tonic-gate dmapp->maxprot = maxprot; 56357c478bd9Sstevel@tonic-gate dmapp->flags = flags; 56367c478bd9Sstevel@tonic-gate dmapp->cr = cr; 56377c478bd9Sstevel@tonic-gate dmapp->caller = delmap_call; 56387c478bd9Sstevel@tonic-gate 56397c478bd9Sstevel@tonic-gate error = as_add_callback(as, nfs3_delmap_callback, dmapp, 56407c478bd9Sstevel@tonic-gate AS_UNMAP_EVENT, addr, len, KM_SLEEP); 56417c478bd9Sstevel@tonic-gate 56427c478bd9Sstevel@tonic-gate return (error ? error : EAGAIN); 56437c478bd9Sstevel@tonic-gate } 56447c478bd9Sstevel@tonic-gate 56457c478bd9Sstevel@tonic-gate /* 56467c478bd9Sstevel@tonic-gate * Remove some pages from an mmap'd vnode. Just update the 56477c478bd9Sstevel@tonic-gate * count of pages. If doing close-to-open, then flush and 56487c478bd9Sstevel@tonic-gate * commit all of the pages associated with this file. 56497c478bd9Sstevel@tonic-gate * Otherwise, start an asynchronous page flush to write out 56507c478bd9Sstevel@tonic-gate * any dirty pages. This will also associate a credential 56517c478bd9Sstevel@tonic-gate * with the rnode which can be used to write the pages. 56527c478bd9Sstevel@tonic-gate */ 56537c478bd9Sstevel@tonic-gate /* ARGSUSED */ 56547c478bd9Sstevel@tonic-gate static void 56557c478bd9Sstevel@tonic-gate nfs3_delmap_callback(struct as *as, void *arg, uint_t event) 56567c478bd9Sstevel@tonic-gate { 56577c478bd9Sstevel@tonic-gate int error; 56587c478bd9Sstevel@tonic-gate rnode_t *rp; 56597c478bd9Sstevel@tonic-gate mntinfo_t *mi; 56607c478bd9Sstevel@tonic-gate nfs_delmap_args_t *dmapp = (nfs_delmap_args_t *)arg; 56617c478bd9Sstevel@tonic-gate 56627c478bd9Sstevel@tonic-gate rp = VTOR(dmapp->vp); 56637c478bd9Sstevel@tonic-gate mi = VTOMI(dmapp->vp); 56647c478bd9Sstevel@tonic-gate 56657c478bd9Sstevel@tonic-gate atomic_add_long((ulong_t *)&rp->r_mapcnt, -btopr(dmapp->len)); 56667c478bd9Sstevel@tonic-gate ASSERT(rp->r_mapcnt >= 0); 56677c478bd9Sstevel@tonic-gate 56687c478bd9Sstevel@tonic-gate /* 56697c478bd9Sstevel@tonic-gate * Initiate a page flush and potential commit if there are 56707c478bd9Sstevel@tonic-gate * pages, the file system was not mounted readonly, the segment 56717c478bd9Sstevel@tonic-gate * was mapped shared, and the pages themselves were writeable. 56727c478bd9Sstevel@tonic-gate */ 56737c478bd9Sstevel@tonic-gate if (vn_has_cached_data(dmapp->vp) && !vn_is_readonly(dmapp->vp) && 56747c478bd9Sstevel@tonic-gate dmapp->flags == MAP_SHARED && (dmapp->maxprot & PROT_WRITE)) { 56757c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 56767c478bd9Sstevel@tonic-gate rp->r_flags |= RDIRTY; 56777c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 56787c478bd9Sstevel@tonic-gate /* 56797c478bd9Sstevel@tonic-gate * If this is a cross-zone access a sync putpage won't work, so 56807c478bd9Sstevel@tonic-gate * the best we can do is try an async putpage. That seems 56817c478bd9Sstevel@tonic-gate * better than something more draconian such as discarding the 56827c478bd9Sstevel@tonic-gate * dirty pages. 56837c478bd9Sstevel@tonic-gate */ 56847c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_NOCTO) || 5685108322fbScarlsonj nfs_zone() != mi->mi_zone) 56867c478bd9Sstevel@tonic-gate error = nfs3_putpage(dmapp->vp, dmapp->off, dmapp->len, 5687da6c28aaSamw B_ASYNC, dmapp->cr, NULL); 56887c478bd9Sstevel@tonic-gate else 56897c478bd9Sstevel@tonic-gate error = nfs3_putpage_commit(dmapp->vp, dmapp->off, 56907c478bd9Sstevel@tonic-gate dmapp->len, dmapp->cr); 56917c478bd9Sstevel@tonic-gate if (!error) { 56927c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 56937c478bd9Sstevel@tonic-gate error = rp->r_error; 56947c478bd9Sstevel@tonic-gate rp->r_error = 0; 56957c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 56967c478bd9Sstevel@tonic-gate } 56977c478bd9Sstevel@tonic-gate } else 56987c478bd9Sstevel@tonic-gate error = 0; 56997c478bd9Sstevel@tonic-gate 57007c478bd9Sstevel@tonic-gate if ((rp->r_flags & RDIRECTIO) || (mi->mi_flags & MI_DIRECTIO)) 57017c478bd9Sstevel@tonic-gate (void) nfs3_putpage(dmapp->vp, dmapp->off, dmapp->len, 5702da6c28aaSamw B_INVAL, dmapp->cr, NULL); 57037c478bd9Sstevel@tonic-gate 57047c478bd9Sstevel@tonic-gate dmapp->caller->error = error; 57057c478bd9Sstevel@tonic-gate (void) as_delete_callback(as, arg); 57067c478bd9Sstevel@tonic-gate kmem_free(dmapp, sizeof (nfs_delmap_args_t)); 57077c478bd9Sstevel@tonic-gate } 57087c478bd9Sstevel@tonic-gate 57097c478bd9Sstevel@tonic-gate static int nfs3_pathconf_disable_cache = 0; 57107c478bd9Sstevel@tonic-gate 57117c478bd9Sstevel@tonic-gate #ifdef DEBUG 57127c478bd9Sstevel@tonic-gate static int nfs3_pathconf_cache_hits = 0; 57137c478bd9Sstevel@tonic-gate static int nfs3_pathconf_cache_misses = 0; 57147c478bd9Sstevel@tonic-gate #endif 57157c478bd9Sstevel@tonic-gate 5716da6c28aaSamw /* ARGSUSED */ 57177c478bd9Sstevel@tonic-gate static int 5718da6c28aaSamw nfs3_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 5719da6c28aaSamw caller_context_t *ct) 57207c478bd9Sstevel@tonic-gate { 57217c478bd9Sstevel@tonic-gate int error; 57227c478bd9Sstevel@tonic-gate PATHCONF3args args; 57237c478bd9Sstevel@tonic-gate PATHCONF3res res; 57247c478bd9Sstevel@tonic-gate int douprintf; 57257c478bd9Sstevel@tonic-gate failinfo_t fi; 57267c478bd9Sstevel@tonic-gate rnode_t *rp; 57277c478bd9Sstevel@tonic-gate hrtime_t t; 57287c478bd9Sstevel@tonic-gate 5729108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 57307c478bd9Sstevel@tonic-gate return (EIO); 57317c478bd9Sstevel@tonic-gate /* 57327c478bd9Sstevel@tonic-gate * Large file spec - need to base answer on info stored 57337c478bd9Sstevel@tonic-gate * on original FSINFO response. 57347c478bd9Sstevel@tonic-gate */ 57357c478bd9Sstevel@tonic-gate if (cmd == _PC_FILESIZEBITS) { 57367c478bd9Sstevel@tonic-gate unsigned long long ll; 57377c478bd9Sstevel@tonic-gate long l = 1; 57387c478bd9Sstevel@tonic-gate 57397c478bd9Sstevel@tonic-gate ll = VTOMI(vp)->mi_maxfilesize; 57407c478bd9Sstevel@tonic-gate 57417c478bd9Sstevel@tonic-gate if (ll == 0) { 57427c478bd9Sstevel@tonic-gate *valp = 0; 57437c478bd9Sstevel@tonic-gate return (0); 57447c478bd9Sstevel@tonic-gate } 57457c478bd9Sstevel@tonic-gate 57467c478bd9Sstevel@tonic-gate if (ll & 0xffffffff00000000) { 57477c478bd9Sstevel@tonic-gate l += 32; ll >>= 32; 57487c478bd9Sstevel@tonic-gate } 57497c478bd9Sstevel@tonic-gate if (ll & 0xffff0000) { 57507c478bd9Sstevel@tonic-gate l += 16; ll >>= 16; 57517c478bd9Sstevel@tonic-gate } 57527c478bd9Sstevel@tonic-gate if (ll & 0xff00) { 57537c478bd9Sstevel@tonic-gate l += 8; ll >>= 8; 57547c478bd9Sstevel@tonic-gate } 57557c478bd9Sstevel@tonic-gate if (ll & 0xf0) { 57567c478bd9Sstevel@tonic-gate l += 4; ll >>= 4; 57577c478bd9Sstevel@tonic-gate } 57587c478bd9Sstevel@tonic-gate if (ll & 0xc) { 57597c478bd9Sstevel@tonic-gate l += 2; ll >>= 2; 57607c478bd9Sstevel@tonic-gate } 57617c478bd9Sstevel@tonic-gate if (ll & 0x2) 57627c478bd9Sstevel@tonic-gate l += 2; 57637c478bd9Sstevel@tonic-gate else if (ll & 0x1) 57647c478bd9Sstevel@tonic-gate l += 1; 57657c478bd9Sstevel@tonic-gate *valp = l; 57667c478bd9Sstevel@tonic-gate return (0); 57677c478bd9Sstevel@tonic-gate } 57687c478bd9Sstevel@tonic-gate 57697c478bd9Sstevel@tonic-gate if (cmd == _PC_ACL_ENABLED) { 57707c478bd9Sstevel@tonic-gate *valp = _ACL_ACLENT_ENABLED; 57717c478bd9Sstevel@tonic-gate return (0); 57727c478bd9Sstevel@tonic-gate } 57737c478bd9Sstevel@tonic-gate 57747c478bd9Sstevel@tonic-gate if (cmd == _PC_XATTR_EXISTS) { 57757c478bd9Sstevel@tonic-gate error = 0; 57767c478bd9Sstevel@tonic-gate *valp = 0; 57777c478bd9Sstevel@tonic-gate if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 57787c478bd9Sstevel@tonic-gate vnode_t *avp; 57797c478bd9Sstevel@tonic-gate rnode_t *rp; 57807c478bd9Sstevel@tonic-gate int error = 0; 57817c478bd9Sstevel@tonic-gate mntinfo_t *mi = VTOMI(vp); 57827c478bd9Sstevel@tonic-gate 57837c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_EXTATTR)) 57847c478bd9Sstevel@tonic-gate return (0); 57857c478bd9Sstevel@tonic-gate 57867c478bd9Sstevel@tonic-gate rp = VTOR(vp); 57877c478bd9Sstevel@tonic-gate if (nfs_rw_enter_sig(&rp->r_rwlock, RW_READER, 57887c478bd9Sstevel@tonic-gate INTR(vp))) 57897c478bd9Sstevel@tonic-gate return (EINTR); 57907c478bd9Sstevel@tonic-gate 57917c478bd9Sstevel@tonic-gate error = nfs3lookup_dnlc(vp, XATTR_DIR_NAME, &avp, cr); 57927c478bd9Sstevel@tonic-gate if (error || avp == NULL) 57937c478bd9Sstevel@tonic-gate error = acl_getxattrdir3(vp, &avp, 0, cr, 0); 57947c478bd9Sstevel@tonic-gate 57957c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_rwlock); 57967c478bd9Sstevel@tonic-gate 57977c478bd9Sstevel@tonic-gate if (error == 0 && avp != NULL) { 579893aeed83Smarks error = do_xattr_exists_check(avp, valp, cr); 57997c478bd9Sstevel@tonic-gate VN_RELE(avp); 580093aeed83Smarks } else if (error == ENOENT) { 58017c478bd9Sstevel@tonic-gate error = 0; 580293aeed83Smarks *valp = 0; 580393aeed83Smarks } 58047c478bd9Sstevel@tonic-gate } 58057c478bd9Sstevel@tonic-gate return (error); 58067c478bd9Sstevel@tonic-gate } 58077c478bd9Sstevel@tonic-gate 58087c478bd9Sstevel@tonic-gate rp = VTOR(vp); 58097c478bd9Sstevel@tonic-gate if (rp->r_pathconf != NULL) { 58107c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 58117c478bd9Sstevel@tonic-gate if (rp->r_pathconf != NULL && nfs3_pathconf_disable_cache) { 58127c478bd9Sstevel@tonic-gate kmem_free(rp->r_pathconf, sizeof (*rp->r_pathconf)); 58137c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 58147c478bd9Sstevel@tonic-gate } 58157c478bd9Sstevel@tonic-gate if (rp->r_pathconf != NULL) { 58167c478bd9Sstevel@tonic-gate error = 0; 58177c478bd9Sstevel@tonic-gate switch (cmd) { 58187c478bd9Sstevel@tonic-gate case _PC_LINK_MAX: 58197c478bd9Sstevel@tonic-gate *valp = rp->r_pathconf->link_max; 58207c478bd9Sstevel@tonic-gate break; 58217c478bd9Sstevel@tonic-gate case _PC_NAME_MAX: 58227c478bd9Sstevel@tonic-gate *valp = rp->r_pathconf->name_max; 58237c478bd9Sstevel@tonic-gate break; 58247c478bd9Sstevel@tonic-gate case _PC_PATH_MAX: 58257c478bd9Sstevel@tonic-gate case _PC_SYMLINK_MAX: 58267c478bd9Sstevel@tonic-gate *valp = MAXPATHLEN; 58277c478bd9Sstevel@tonic-gate break; 58287c478bd9Sstevel@tonic-gate case _PC_CHOWN_RESTRICTED: 58297c478bd9Sstevel@tonic-gate *valp = rp->r_pathconf->chown_restricted; 58307c478bd9Sstevel@tonic-gate break; 58317c478bd9Sstevel@tonic-gate case _PC_NO_TRUNC: 58327c478bd9Sstevel@tonic-gate *valp = rp->r_pathconf->no_trunc; 58337c478bd9Sstevel@tonic-gate break; 58347c478bd9Sstevel@tonic-gate default: 58357c478bd9Sstevel@tonic-gate error = EINVAL; 58367c478bd9Sstevel@tonic-gate break; 58377c478bd9Sstevel@tonic-gate } 58387c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 58397c478bd9Sstevel@tonic-gate #ifdef DEBUG 58407c478bd9Sstevel@tonic-gate nfs3_pathconf_cache_hits++; 58417c478bd9Sstevel@tonic-gate #endif 58427c478bd9Sstevel@tonic-gate return (error); 58437c478bd9Sstevel@tonic-gate } 58447c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 58457c478bd9Sstevel@tonic-gate } 58467c478bd9Sstevel@tonic-gate #ifdef DEBUG 58477c478bd9Sstevel@tonic-gate nfs3_pathconf_cache_misses++; 58487c478bd9Sstevel@tonic-gate #endif 58497c478bd9Sstevel@tonic-gate 58507c478bd9Sstevel@tonic-gate args.object = *VTOFH3(vp); 58517c478bd9Sstevel@tonic-gate fi.vp = vp; 58527c478bd9Sstevel@tonic-gate fi.fhp = (caddr_t)&args.object; 58537c478bd9Sstevel@tonic-gate fi.copyproc = nfs3copyfh; 58547c478bd9Sstevel@tonic-gate fi.lookupproc = nfs3lookup; 58557c478bd9Sstevel@tonic-gate fi.xattrdirproc = acl_getxattrdir3; 58567c478bd9Sstevel@tonic-gate 58577c478bd9Sstevel@tonic-gate douprintf = 1; 58587c478bd9Sstevel@tonic-gate 58597c478bd9Sstevel@tonic-gate t = gethrtime(); 58607c478bd9Sstevel@tonic-gate 58617c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_PATHCONF, 58627c478bd9Sstevel@tonic-gate xdr_nfs_fh3, (caddr_t)&args, 58637c478bd9Sstevel@tonic-gate xdr_PATHCONF3res, (caddr_t)&res, cr, 58647c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, &fi); 58657c478bd9Sstevel@tonic-gate 58667c478bd9Sstevel@tonic-gate if (error) 58677c478bd9Sstevel@tonic-gate return (error); 58687c478bd9Sstevel@tonic-gate 58697c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 58707c478bd9Sstevel@tonic-gate 58717c478bd9Sstevel@tonic-gate if (!error) { 58727c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, &res.resok.obj_attributes, t, cr); 58737c478bd9Sstevel@tonic-gate if (!nfs3_pathconf_disable_cache) { 58747c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 58757c478bd9Sstevel@tonic-gate if (rp->r_pathconf == NULL) { 58767c478bd9Sstevel@tonic-gate rp->r_pathconf = kmem_alloc( 58777c478bd9Sstevel@tonic-gate sizeof (*rp->r_pathconf), KM_NOSLEEP); 58787c478bd9Sstevel@tonic-gate if (rp->r_pathconf != NULL) 58797c478bd9Sstevel@tonic-gate *rp->r_pathconf = res.resok.info; 58807c478bd9Sstevel@tonic-gate } 58817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 58827c478bd9Sstevel@tonic-gate } 58837c478bd9Sstevel@tonic-gate switch (cmd) { 58847c478bd9Sstevel@tonic-gate case _PC_LINK_MAX: 58857c478bd9Sstevel@tonic-gate *valp = res.resok.info.link_max; 58867c478bd9Sstevel@tonic-gate break; 58877c478bd9Sstevel@tonic-gate case _PC_NAME_MAX: 58887c478bd9Sstevel@tonic-gate *valp = res.resok.info.name_max; 58897c478bd9Sstevel@tonic-gate break; 58907c478bd9Sstevel@tonic-gate case _PC_PATH_MAX: 58917c478bd9Sstevel@tonic-gate case _PC_SYMLINK_MAX: 58927c478bd9Sstevel@tonic-gate *valp = MAXPATHLEN; 58937c478bd9Sstevel@tonic-gate break; 58947c478bd9Sstevel@tonic-gate case _PC_CHOWN_RESTRICTED: 58957c478bd9Sstevel@tonic-gate *valp = res.resok.info.chown_restricted; 58967c478bd9Sstevel@tonic-gate break; 58977c478bd9Sstevel@tonic-gate case _PC_NO_TRUNC: 58987c478bd9Sstevel@tonic-gate *valp = res.resok.info.no_trunc; 58997c478bd9Sstevel@tonic-gate break; 59007c478bd9Sstevel@tonic-gate default: 59017c478bd9Sstevel@tonic-gate return (EINVAL); 59027c478bd9Sstevel@tonic-gate } 59037c478bd9Sstevel@tonic-gate } else { 59047c478bd9Sstevel@tonic-gate nfs3_cache_post_op_attr(vp, &res.resfail.obj_attributes, t, cr); 59057c478bd9Sstevel@tonic-gate PURGE_STALE_FH(error, vp, cr); 59067c478bd9Sstevel@tonic-gate } 59077c478bd9Sstevel@tonic-gate 59087c478bd9Sstevel@tonic-gate return (error); 59097c478bd9Sstevel@tonic-gate } 59107c478bd9Sstevel@tonic-gate 59117c478bd9Sstevel@tonic-gate /* 59127c478bd9Sstevel@tonic-gate * Called by async thread to do synchronous pageio. Do the i/o, wait 59137c478bd9Sstevel@tonic-gate * for it to complete, and cleanup the page list when done. 59147c478bd9Sstevel@tonic-gate */ 59157c478bd9Sstevel@tonic-gate static int 59167c478bd9Sstevel@tonic-gate nfs3_sync_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, 59177c478bd9Sstevel@tonic-gate int flags, cred_t *cr) 59187c478bd9Sstevel@tonic-gate { 59197c478bd9Sstevel@tonic-gate int error; 59207c478bd9Sstevel@tonic-gate 5921108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 59227c478bd9Sstevel@tonic-gate error = nfs3_rdwrlbn(vp, pp, io_off, io_len, flags, cr); 59237c478bd9Sstevel@tonic-gate if (flags & B_READ) 59247c478bd9Sstevel@tonic-gate pvn_read_done(pp, (error ? B_ERROR : 0) | flags); 59257c478bd9Sstevel@tonic-gate else 59267c478bd9Sstevel@tonic-gate pvn_write_done(pp, (error ? B_ERROR : 0) | flags); 59277c478bd9Sstevel@tonic-gate return (error); 59287c478bd9Sstevel@tonic-gate } 59297c478bd9Sstevel@tonic-gate 5930da6c28aaSamw /* ARGSUSED */ 59317c478bd9Sstevel@tonic-gate static int 59327c478bd9Sstevel@tonic-gate nfs3_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len, 5933da6c28aaSamw int flags, cred_t *cr, caller_context_t *ct) 59347c478bd9Sstevel@tonic-gate { 59357c478bd9Sstevel@tonic-gate int error; 59367c478bd9Sstevel@tonic-gate rnode_t *rp; 59377c478bd9Sstevel@tonic-gate 59387c478bd9Sstevel@tonic-gate if (pp == NULL) 59397c478bd9Sstevel@tonic-gate return (EINVAL); 5940108322fbScarlsonj if (!(flags & B_ASYNC) && nfs_zone() != VTOMI(vp)->mi_zone) 59417c478bd9Sstevel@tonic-gate return (EIO); 59427c478bd9Sstevel@tonic-gate 59437c478bd9Sstevel@tonic-gate rp = VTOR(vp); 59447c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 59457c478bd9Sstevel@tonic-gate rp->r_count++; 59467c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 59477c478bd9Sstevel@tonic-gate 59487c478bd9Sstevel@tonic-gate if (flags & B_ASYNC) { 59497c478bd9Sstevel@tonic-gate error = nfs_async_pageio(vp, pp, io_off, io_len, flags, cr, 59507c478bd9Sstevel@tonic-gate nfs3_sync_pageio); 59517c478bd9Sstevel@tonic-gate } else 59527c478bd9Sstevel@tonic-gate error = nfs3_rdwrlbn(vp, pp, io_off, io_len, flags, cr); 59537c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 59547c478bd9Sstevel@tonic-gate rp->r_count--; 59557c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv); 59567c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 59577c478bd9Sstevel@tonic-gate return (error); 59587c478bd9Sstevel@tonic-gate } 59597c478bd9Sstevel@tonic-gate 5960da6c28aaSamw /* ARGSUSED */ 59617c478bd9Sstevel@tonic-gate static void 5962da6c28aaSamw nfs3_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr, 5963da6c28aaSamw caller_context_t *ct) 59647c478bd9Sstevel@tonic-gate { 59657c478bd9Sstevel@tonic-gate int error; 59667c478bd9Sstevel@tonic-gate rnode_t *rp; 59677c478bd9Sstevel@tonic-gate page_t *plist; 59687c478bd9Sstevel@tonic-gate page_t *pptr; 59697c478bd9Sstevel@tonic-gate offset3 offset; 59707c478bd9Sstevel@tonic-gate count3 len; 59717c478bd9Sstevel@tonic-gate k_sigset_t smask; 59727c478bd9Sstevel@tonic-gate 59737c478bd9Sstevel@tonic-gate /* 59747c478bd9Sstevel@tonic-gate * We should get called with fl equal to either B_FREE or 59757c478bd9Sstevel@tonic-gate * B_INVAL. Any other value is illegal. 59767c478bd9Sstevel@tonic-gate * 59777c478bd9Sstevel@tonic-gate * The page that we are either supposed to free or destroy 59787c478bd9Sstevel@tonic-gate * should be exclusive locked and its io lock should not 59797c478bd9Sstevel@tonic-gate * be held. 59807c478bd9Sstevel@tonic-gate */ 59817c478bd9Sstevel@tonic-gate ASSERT(fl == B_FREE || fl == B_INVAL); 59827c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(pp) && !page_iolock_assert(pp)) || panicstr); 59837c478bd9Sstevel@tonic-gate rp = VTOR(vp); 59847c478bd9Sstevel@tonic-gate 59857c478bd9Sstevel@tonic-gate /* 59867c478bd9Sstevel@tonic-gate * If the page doesn't need to be committed or we shouldn't 59877c478bd9Sstevel@tonic-gate * even bother attempting to commit it, then just make sure 59887c478bd9Sstevel@tonic-gate * that the p_fsdata byte is clear and then either free or 59897c478bd9Sstevel@tonic-gate * destroy the page as appropriate. 59907c478bd9Sstevel@tonic-gate */ 59917c478bd9Sstevel@tonic-gate if (pp->p_fsdata == C_NOCOMMIT || (rp->r_flags & RSTALE)) { 59927c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 59937c478bd9Sstevel@tonic-gate if (fl == B_FREE) 59947c478bd9Sstevel@tonic-gate page_free(pp, dn); 59957c478bd9Sstevel@tonic-gate else 59967c478bd9Sstevel@tonic-gate page_destroy(pp, dn); 59977c478bd9Sstevel@tonic-gate return; 59987c478bd9Sstevel@tonic-gate } 59997c478bd9Sstevel@tonic-gate 60007c478bd9Sstevel@tonic-gate /* 60017c478bd9Sstevel@tonic-gate * If there is a page invalidation operation going on, then 60027c478bd9Sstevel@tonic-gate * if this is one of the pages being destroyed, then just 60037c478bd9Sstevel@tonic-gate * clear the p_fsdata byte and then either free or destroy 60047c478bd9Sstevel@tonic-gate * the page as appropriate. 60057c478bd9Sstevel@tonic-gate */ 60067c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 60077c478bd9Sstevel@tonic-gate if ((rp->r_flags & RTRUNCATE) && pp->p_offset >= rp->r_truncaddr) { 60087c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 60097c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 60107c478bd9Sstevel@tonic-gate if (fl == B_FREE) 60117c478bd9Sstevel@tonic-gate page_free(pp, dn); 60127c478bd9Sstevel@tonic-gate else 60137c478bd9Sstevel@tonic-gate page_destroy(pp, dn); 60147c478bd9Sstevel@tonic-gate return; 60157c478bd9Sstevel@tonic-gate } 60167c478bd9Sstevel@tonic-gate 60177c478bd9Sstevel@tonic-gate /* 60187c478bd9Sstevel@tonic-gate * If we are freeing this page and someone else is already 60197c478bd9Sstevel@tonic-gate * waiting to do a commit, then just unlock the page and 60207c478bd9Sstevel@tonic-gate * return. That other thread will take care of commiting 60217c478bd9Sstevel@tonic-gate * this page. The page can be freed sometime after the 60227c478bd9Sstevel@tonic-gate * commit has finished. Otherwise, if the page is marked 60237c478bd9Sstevel@tonic-gate * as delay commit, then we may be getting called from 60247c478bd9Sstevel@tonic-gate * pvn_write_done, one page at a time. This could result 60257c478bd9Sstevel@tonic-gate * in one commit per page, so we end up doing lots of small 60267c478bd9Sstevel@tonic-gate * commits instead of fewer larger commits. This is bad, 60277c478bd9Sstevel@tonic-gate * we want do as few commits as possible. 60287c478bd9Sstevel@tonic-gate */ 60297c478bd9Sstevel@tonic-gate if (fl == B_FREE) { 60307c478bd9Sstevel@tonic-gate if (rp->r_flags & RCOMMITWAIT) { 60317c478bd9Sstevel@tonic-gate page_unlock(pp); 60327c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 60337c478bd9Sstevel@tonic-gate return; 60347c478bd9Sstevel@tonic-gate } 60357c478bd9Sstevel@tonic-gate if (pp->p_fsdata == C_DELAYCOMMIT) { 60367c478bd9Sstevel@tonic-gate pp->p_fsdata = C_COMMIT; 60377c478bd9Sstevel@tonic-gate page_unlock(pp); 60387c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 60397c478bd9Sstevel@tonic-gate return; 60407c478bd9Sstevel@tonic-gate } 60417c478bd9Sstevel@tonic-gate } 60427c478bd9Sstevel@tonic-gate 60437c478bd9Sstevel@tonic-gate /* 60447c478bd9Sstevel@tonic-gate * Check to see if there is a signal which would prevent an 60457c478bd9Sstevel@tonic-gate * attempt to commit the pages from being successful. If so, 60467c478bd9Sstevel@tonic-gate * then don't bother with all of the work to gather pages and 60477c478bd9Sstevel@tonic-gate * generate the unsuccessful RPC. Just return from here and 60487c478bd9Sstevel@tonic-gate * let the page be committed at some later time. 60497c478bd9Sstevel@tonic-gate */ 60507c478bd9Sstevel@tonic-gate sigintr(&smask, VTOMI(vp)->mi_flags & MI_INT); 60517c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) { 60527c478bd9Sstevel@tonic-gate sigunintr(&smask); 60537c478bd9Sstevel@tonic-gate page_unlock(pp); 60547c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 60557c478bd9Sstevel@tonic-gate return; 60567c478bd9Sstevel@tonic-gate } 60577c478bd9Sstevel@tonic-gate sigunintr(&smask); 60587c478bd9Sstevel@tonic-gate 60597c478bd9Sstevel@tonic-gate /* 60607c478bd9Sstevel@tonic-gate * We are starting to need to commit pages, so let's try 60617c478bd9Sstevel@tonic-gate * to commit as many as possible at once to reduce the 60627c478bd9Sstevel@tonic-gate * overhead. 60637c478bd9Sstevel@tonic-gate * 60647c478bd9Sstevel@tonic-gate * Set the `commit inprogress' state bit. We must 60657c478bd9Sstevel@tonic-gate * first wait until any current one finishes. Then 60667c478bd9Sstevel@tonic-gate * we initialize the c_pages list with this page. 60677c478bd9Sstevel@tonic-gate */ 60687c478bd9Sstevel@tonic-gate while (rp->r_flags & RCOMMIT) { 60697c478bd9Sstevel@tonic-gate rp->r_flags |= RCOMMITWAIT; 60707c478bd9Sstevel@tonic-gate cv_wait(&rp->r_commit.c_cv, &rp->r_statelock); 60717c478bd9Sstevel@tonic-gate rp->r_flags &= ~RCOMMITWAIT; 60727c478bd9Sstevel@tonic-gate } 60737c478bd9Sstevel@tonic-gate rp->r_flags |= RCOMMIT; 60747c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 60757c478bd9Sstevel@tonic-gate ASSERT(rp->r_commit.c_pages == NULL); 60767c478bd9Sstevel@tonic-gate rp->r_commit.c_pages = pp; 60777c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase = (offset3)pp->p_offset; 60787c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = PAGESIZE; 60797c478bd9Sstevel@tonic-gate 60807c478bd9Sstevel@tonic-gate /* 60817c478bd9Sstevel@tonic-gate * Gather together all other pages which can be committed. 60827c478bd9Sstevel@tonic-gate * They will all be chained off r_commit.c_pages. 60837c478bd9Sstevel@tonic-gate */ 60847c478bd9Sstevel@tonic-gate nfs3_get_commit(vp); 60857c478bd9Sstevel@tonic-gate 60867c478bd9Sstevel@tonic-gate /* 60877c478bd9Sstevel@tonic-gate * Clear the `commit inprogress' status and disconnect 60887c478bd9Sstevel@tonic-gate * the list of pages to be committed from the rnode. 60897c478bd9Sstevel@tonic-gate * At this same time, we also save the starting offset 60907c478bd9Sstevel@tonic-gate * and length of data to be committed on the server. 60917c478bd9Sstevel@tonic-gate */ 60927c478bd9Sstevel@tonic-gate plist = rp->r_commit.c_pages; 60937c478bd9Sstevel@tonic-gate rp->r_commit.c_pages = NULL; 60947c478bd9Sstevel@tonic-gate offset = rp->r_commit.c_commbase; 60957c478bd9Sstevel@tonic-gate len = rp->r_commit.c_commlen; 60967c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 60977c478bd9Sstevel@tonic-gate rp->r_flags &= ~RCOMMIT; 60987c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_commit.c_cv); 60997c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 61007c478bd9Sstevel@tonic-gate 61017c478bd9Sstevel@tonic-gate if (curproc == proc_pageout || curproc == proc_fsflush || 6102108322fbScarlsonj nfs_zone() != VTOMI(vp)->mi_zone) { 61037c478bd9Sstevel@tonic-gate nfs_async_commit(vp, plist, offset, len, cr, nfs3_async_commit); 61047c478bd9Sstevel@tonic-gate return; 61057c478bd9Sstevel@tonic-gate } 61067c478bd9Sstevel@tonic-gate 61077c478bd9Sstevel@tonic-gate /* 61087c478bd9Sstevel@tonic-gate * Actually generate the COMMIT3 over the wire operation. 61097c478bd9Sstevel@tonic-gate */ 61107c478bd9Sstevel@tonic-gate error = nfs3_commit(vp, offset, len, cr); 61117c478bd9Sstevel@tonic-gate 61127c478bd9Sstevel@tonic-gate /* 61137c478bd9Sstevel@tonic-gate * If we got an error during the commit, just unlock all 61147c478bd9Sstevel@tonic-gate * of the pages. The pages will get retransmitted to the 61157c478bd9Sstevel@tonic-gate * server during a putpage operation. 61167c478bd9Sstevel@tonic-gate */ 61177c478bd9Sstevel@tonic-gate if (error) { 61187c478bd9Sstevel@tonic-gate while (plist != NULL) { 61197c478bd9Sstevel@tonic-gate pptr = plist; 61207c478bd9Sstevel@tonic-gate page_sub(&plist, pptr); 61217c478bd9Sstevel@tonic-gate page_unlock(pptr); 61227c478bd9Sstevel@tonic-gate } 61237c478bd9Sstevel@tonic-gate return; 61247c478bd9Sstevel@tonic-gate } 61257c478bd9Sstevel@tonic-gate 61267c478bd9Sstevel@tonic-gate /* 61277c478bd9Sstevel@tonic-gate * We've tried as hard as we can to commit the data to stable 61287c478bd9Sstevel@tonic-gate * storage on the server. We release the rest of the pages 61297c478bd9Sstevel@tonic-gate * and clear the commit required state. They will be put 61307c478bd9Sstevel@tonic-gate * onto the tail of the cachelist if they are nolonger 61317c478bd9Sstevel@tonic-gate * mapped. 61327c478bd9Sstevel@tonic-gate */ 61337c478bd9Sstevel@tonic-gate while (plist != pp) { 61347c478bd9Sstevel@tonic-gate pptr = plist; 61357c478bd9Sstevel@tonic-gate page_sub(&plist, pptr); 61367c478bd9Sstevel@tonic-gate pptr->p_fsdata = C_NOCOMMIT; 61377c478bd9Sstevel@tonic-gate (void) page_release(pptr, 1); 61387c478bd9Sstevel@tonic-gate } 61397c478bd9Sstevel@tonic-gate 61407c478bd9Sstevel@tonic-gate /* 61417c478bd9Sstevel@tonic-gate * It is possible that nfs3_commit didn't return error but 61427c478bd9Sstevel@tonic-gate * some other thread has modified the page we are going 61437c478bd9Sstevel@tonic-gate * to free/destroy. 61447c478bd9Sstevel@tonic-gate * In this case we need to rewrite the page. Do an explicit check 61457c478bd9Sstevel@tonic-gate * before attempting to free/destroy the page. If modified, needs to 61467c478bd9Sstevel@tonic-gate * be rewritten so unlock the page and return. 61477c478bd9Sstevel@tonic-gate */ 61487c478bd9Sstevel@tonic-gate if (hat_ismod(pp)) { 61497c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 61507c478bd9Sstevel@tonic-gate page_unlock(pp); 61517c478bd9Sstevel@tonic-gate return; 61527c478bd9Sstevel@tonic-gate } 61537c478bd9Sstevel@tonic-gate 61547c478bd9Sstevel@tonic-gate /* 61557c478bd9Sstevel@tonic-gate * Now, as appropriate, either free or destroy the page 61567c478bd9Sstevel@tonic-gate * that we were called with. 61577c478bd9Sstevel@tonic-gate */ 61587c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 61597c478bd9Sstevel@tonic-gate if (fl == B_FREE) 61607c478bd9Sstevel@tonic-gate page_free(pp, dn); 61617c478bd9Sstevel@tonic-gate else 61627c478bd9Sstevel@tonic-gate page_destroy(pp, dn); 61637c478bd9Sstevel@tonic-gate } 61647c478bd9Sstevel@tonic-gate 61657c478bd9Sstevel@tonic-gate static int 61667c478bd9Sstevel@tonic-gate nfs3_commit(vnode_t *vp, offset3 offset, count3 count, cred_t *cr) 61677c478bd9Sstevel@tonic-gate { 61687c478bd9Sstevel@tonic-gate int error; 61697c478bd9Sstevel@tonic-gate rnode_t *rp; 61707c478bd9Sstevel@tonic-gate COMMIT3args args; 61717c478bd9Sstevel@tonic-gate COMMIT3res res; 61727c478bd9Sstevel@tonic-gate int douprintf; 61737c478bd9Sstevel@tonic-gate cred_t *cred; 61747c478bd9Sstevel@tonic-gate 61757c478bd9Sstevel@tonic-gate rp = VTOR(vp); 6176108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 61777c478bd9Sstevel@tonic-gate 61787c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 61797c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) { 61807c478bd9Sstevel@tonic-gate cred = rp->r_cred; 61817c478bd9Sstevel@tonic-gate crhold(cred); 61827c478bd9Sstevel@tonic-gate } else { 61837c478bd9Sstevel@tonic-gate rp->r_cred = cr; 61847c478bd9Sstevel@tonic-gate crhold(cr); 61857c478bd9Sstevel@tonic-gate cred = cr; 61867c478bd9Sstevel@tonic-gate crhold(cred); 61877c478bd9Sstevel@tonic-gate } 61887c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 61897c478bd9Sstevel@tonic-gate 61907c478bd9Sstevel@tonic-gate args.file = *VTOFH3(vp); 61917c478bd9Sstevel@tonic-gate args.offset = offset; 61927c478bd9Sstevel@tonic-gate args.count = count; 61937c478bd9Sstevel@tonic-gate 61947c478bd9Sstevel@tonic-gate doitagain: 61957c478bd9Sstevel@tonic-gate douprintf = 1; 61967c478bd9Sstevel@tonic-gate error = rfs3call(VTOMI(vp), NFSPROC3_COMMIT, 61977c478bd9Sstevel@tonic-gate xdr_COMMIT3args, (caddr_t)&args, 61987c478bd9Sstevel@tonic-gate xdr_COMMIT3res, (caddr_t)&res, cred, 61997c478bd9Sstevel@tonic-gate &douprintf, &res.status, 0, NULL); 62007c478bd9Sstevel@tonic-gate 62017c478bd9Sstevel@tonic-gate crfree(cred); 62027c478bd9Sstevel@tonic-gate 62037c478bd9Sstevel@tonic-gate if (error) 62047c478bd9Sstevel@tonic-gate return (error); 62057c478bd9Sstevel@tonic-gate 62067c478bd9Sstevel@tonic-gate error = geterrno3(res.status); 62077c478bd9Sstevel@tonic-gate if (!error) { 62087c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RHAVEVERF); 62097c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 62107c478bd9Sstevel@tonic-gate if (rp->r_verf == res.resok.verf) { 62117c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62127c478bd9Sstevel@tonic-gate return (0); 62137c478bd9Sstevel@tonic-gate } 62147c478bd9Sstevel@tonic-gate nfs3_set_mod(vp); 62157c478bd9Sstevel@tonic-gate rp->r_verf = res.resok.verf; 62167c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62177c478bd9Sstevel@tonic-gate error = NFS_VERF_MISMATCH; 62187c478bd9Sstevel@tonic-gate } else { 62197c478bd9Sstevel@tonic-gate if (error == EACCES) { 62207c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 62217c478bd9Sstevel@tonic-gate if (cred != cr) { 62227c478bd9Sstevel@tonic-gate if (rp->r_cred != NULL) 62237c478bd9Sstevel@tonic-gate crfree(rp->r_cred); 62247c478bd9Sstevel@tonic-gate rp->r_cred = cr; 62257c478bd9Sstevel@tonic-gate crhold(cr); 62267c478bd9Sstevel@tonic-gate cred = cr; 62277c478bd9Sstevel@tonic-gate crhold(cred); 62287c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62297c478bd9Sstevel@tonic-gate goto doitagain; 62307c478bd9Sstevel@tonic-gate } 62317c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62327c478bd9Sstevel@tonic-gate } 62337c478bd9Sstevel@tonic-gate /* 62347c478bd9Sstevel@tonic-gate * Can't do a PURGE_STALE_FH here because this 62357c478bd9Sstevel@tonic-gate * can cause a deadlock. nfs3_commit can 62367c478bd9Sstevel@tonic-gate * be called from nfs3_dispose which can be called 62377c478bd9Sstevel@tonic-gate * indirectly via pvn_vplist_dirty. PURGE_STALE_FH 62387c478bd9Sstevel@tonic-gate * can call back to pvn_vplist_dirty. 62397c478bd9Sstevel@tonic-gate */ 62407c478bd9Sstevel@tonic-gate if (error == ESTALE) { 62417c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 62427c478bd9Sstevel@tonic-gate rp->r_flags |= RSTALE; 62437c478bd9Sstevel@tonic-gate if (!rp->r_error) 62447c478bd9Sstevel@tonic-gate rp->r_error = error; 62457c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62467c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 62477c478bd9Sstevel@tonic-gate } else { 62487c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 62497c478bd9Sstevel@tonic-gate if (!rp->r_error) 62507c478bd9Sstevel@tonic-gate rp->r_error = error; 62517c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 62527c478bd9Sstevel@tonic-gate } 62537c478bd9Sstevel@tonic-gate } 62547c478bd9Sstevel@tonic-gate 62557c478bd9Sstevel@tonic-gate return (error); 62567c478bd9Sstevel@tonic-gate } 62577c478bd9Sstevel@tonic-gate 62587c478bd9Sstevel@tonic-gate static void 62597c478bd9Sstevel@tonic-gate nfs3_set_mod(vnode_t *vp) 62607c478bd9Sstevel@tonic-gate { 6261108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 62627c478bd9Sstevel@tonic-gate 6263f8bbc571SPavel Filipensky pvn_vplist_setdirty(vp, nfs_setmod_check); 6264f8bbc571SPavel Filipensky } 62657c478bd9Sstevel@tonic-gate 62667c478bd9Sstevel@tonic-gate /* 62677c478bd9Sstevel@tonic-gate * This routine is used to gather together a page list of the pages 62687c478bd9Sstevel@tonic-gate * which are to be committed on the server. This routine must not 62697c478bd9Sstevel@tonic-gate * be called if the calling thread holds any locked pages. 62707c478bd9Sstevel@tonic-gate * 62717c478bd9Sstevel@tonic-gate * The calling thread must have set RCOMMIT. This bit is used to 62727c478bd9Sstevel@tonic-gate * serialize access to the commit structure in the rnode. As long 62737c478bd9Sstevel@tonic-gate * as the thread has set RCOMMIT, then it can manipulate the commit 62747c478bd9Sstevel@tonic-gate * structure without requiring any other locks. 62757c478bd9Sstevel@tonic-gate */ 62767c478bd9Sstevel@tonic-gate static void 62777c478bd9Sstevel@tonic-gate nfs3_get_commit(vnode_t *vp) 62787c478bd9Sstevel@tonic-gate { 62797c478bd9Sstevel@tonic-gate rnode_t *rp; 62807c478bd9Sstevel@tonic-gate page_t *pp; 62817c478bd9Sstevel@tonic-gate kmutex_t *vphm; 62827c478bd9Sstevel@tonic-gate 62837c478bd9Sstevel@tonic-gate rp = VTOR(vp); 62847c478bd9Sstevel@tonic-gate 62857c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RCOMMIT); 62867c478bd9Sstevel@tonic-gate 62877c478bd9Sstevel@tonic-gate vphm = page_vnode_mutex(vp); 62887c478bd9Sstevel@tonic-gate mutex_enter(vphm); 62897c478bd9Sstevel@tonic-gate 62907c478bd9Sstevel@tonic-gate /* 62917c478bd9Sstevel@tonic-gate * If there are no pages associated with this vnode, then 62927c478bd9Sstevel@tonic-gate * just return. 62937c478bd9Sstevel@tonic-gate */ 62947c478bd9Sstevel@tonic-gate if ((pp = vp->v_pages) == NULL) { 62957c478bd9Sstevel@tonic-gate mutex_exit(vphm); 62967c478bd9Sstevel@tonic-gate return; 62977c478bd9Sstevel@tonic-gate } 62987c478bd9Sstevel@tonic-gate 62997c478bd9Sstevel@tonic-gate /* 63007c478bd9Sstevel@tonic-gate * Step through all of the pages associated with this vnode 63017c478bd9Sstevel@tonic-gate * looking for pages which need to be committed. 63027c478bd9Sstevel@tonic-gate */ 63037c478bd9Sstevel@tonic-gate do { 6304f8bbc571SPavel Filipensky /* Skip marker pages. */ 6305f8bbc571SPavel Filipensky if (pp->p_hash == PVN_VPLIST_HASH_TAG) 6306f8bbc571SPavel Filipensky continue; 6307f8bbc571SPavel Filipensky 63087c478bd9Sstevel@tonic-gate /* 63097c478bd9Sstevel@tonic-gate * If this page does not need to be committed or is 63107c478bd9Sstevel@tonic-gate * modified, then just skip it. 63117c478bd9Sstevel@tonic-gate */ 63127c478bd9Sstevel@tonic-gate if (pp->p_fsdata == C_NOCOMMIT || hat_ismod(pp)) 63137c478bd9Sstevel@tonic-gate continue; 63147c478bd9Sstevel@tonic-gate 63157c478bd9Sstevel@tonic-gate /* 63167c478bd9Sstevel@tonic-gate * Attempt to lock the page. If we can't, then 63177c478bd9Sstevel@tonic-gate * someone else is messing with it and we will 63187c478bd9Sstevel@tonic-gate * just skip it. 63197c478bd9Sstevel@tonic-gate */ 63207c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 63217c478bd9Sstevel@tonic-gate continue; 63227c478bd9Sstevel@tonic-gate 63237c478bd9Sstevel@tonic-gate /* 63247c478bd9Sstevel@tonic-gate * If this page does not need to be committed or is 63257c478bd9Sstevel@tonic-gate * modified, then just skip it. Recheck now that 63267c478bd9Sstevel@tonic-gate * the page is locked. 63277c478bd9Sstevel@tonic-gate */ 63287c478bd9Sstevel@tonic-gate if (pp->p_fsdata == C_NOCOMMIT || hat_ismod(pp)) { 63297c478bd9Sstevel@tonic-gate page_unlock(pp); 63307c478bd9Sstevel@tonic-gate continue; 63317c478bd9Sstevel@tonic-gate } 63327c478bd9Sstevel@tonic-gate 63337c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 63347c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "nfs3_get_commit: %p is free", 63357c478bd9Sstevel@tonic-gate (void *)pp); 63367c478bd9Sstevel@tonic-gate } 63377c478bd9Sstevel@tonic-gate 63387c478bd9Sstevel@tonic-gate /* 63397c478bd9Sstevel@tonic-gate * The page needs to be committed and we locked it. 63407c478bd9Sstevel@tonic-gate * Update the base and length parameters and add it 63417c478bd9Sstevel@tonic-gate * to r_pages. 63427c478bd9Sstevel@tonic-gate */ 63437c478bd9Sstevel@tonic-gate if (rp->r_commit.c_pages == NULL) { 63447c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase = (offset3)pp->p_offset; 63457c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = PAGESIZE; 63467c478bd9Sstevel@tonic-gate } else if (pp->p_offset < rp->r_commit.c_commbase) { 63477c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = rp->r_commit.c_commbase - 63487c478bd9Sstevel@tonic-gate (offset3)pp->p_offset + rp->r_commit.c_commlen; 63497c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase = (offset3)pp->p_offset; 63507c478bd9Sstevel@tonic-gate } else if ((rp->r_commit.c_commbase + rp->r_commit.c_commlen) 63517c478bd9Sstevel@tonic-gate <= pp->p_offset) { 63527c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = (offset3)pp->p_offset - 63537c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase + PAGESIZE; 63547c478bd9Sstevel@tonic-gate } 63557c478bd9Sstevel@tonic-gate page_add(&rp->r_commit.c_pages, pp); 63567c478bd9Sstevel@tonic-gate } while ((pp = pp->p_vpnext) != vp->v_pages); 63577c478bd9Sstevel@tonic-gate 63587c478bd9Sstevel@tonic-gate mutex_exit(vphm); 63597c478bd9Sstevel@tonic-gate } 63607c478bd9Sstevel@tonic-gate 63617c478bd9Sstevel@tonic-gate /* 63627c478bd9Sstevel@tonic-gate * This routine is used to gather together a page list of the pages 63637c478bd9Sstevel@tonic-gate * which are to be committed on the server. This routine must not 63647c478bd9Sstevel@tonic-gate * be called if the calling thread holds any locked pages. 63657c478bd9Sstevel@tonic-gate * 63667c478bd9Sstevel@tonic-gate * The calling thread must have set RCOMMIT. This bit is used to 63677c478bd9Sstevel@tonic-gate * serialize access to the commit structure in the rnode. As long 63687c478bd9Sstevel@tonic-gate * as the thread has set RCOMMIT, then it can manipulate the commit 63697c478bd9Sstevel@tonic-gate * structure without requiring any other locks. 63707c478bd9Sstevel@tonic-gate */ 63717c478bd9Sstevel@tonic-gate static void 63727c478bd9Sstevel@tonic-gate nfs3_get_commit_range(vnode_t *vp, u_offset_t soff, size_t len) 63737c478bd9Sstevel@tonic-gate { 63747c478bd9Sstevel@tonic-gate 63757c478bd9Sstevel@tonic-gate rnode_t *rp; 63767c478bd9Sstevel@tonic-gate page_t *pp; 63777c478bd9Sstevel@tonic-gate u_offset_t end; 63787c478bd9Sstevel@tonic-gate u_offset_t off; 63797c478bd9Sstevel@tonic-gate 63807c478bd9Sstevel@tonic-gate ASSERT(len != 0); 63817c478bd9Sstevel@tonic-gate 63827c478bd9Sstevel@tonic-gate rp = VTOR(vp); 63837c478bd9Sstevel@tonic-gate 63847c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RCOMMIT); 6385108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 63867c478bd9Sstevel@tonic-gate 63877c478bd9Sstevel@tonic-gate /* 63887c478bd9Sstevel@tonic-gate * If there are no pages associated with this vnode, then 63897c478bd9Sstevel@tonic-gate * just return. 63907c478bd9Sstevel@tonic-gate */ 63917c478bd9Sstevel@tonic-gate if ((pp = vp->v_pages) == NULL) 63927c478bd9Sstevel@tonic-gate return; 63937c478bd9Sstevel@tonic-gate 63947c478bd9Sstevel@tonic-gate /* 63957c478bd9Sstevel@tonic-gate * Calculate the ending offset. 63967c478bd9Sstevel@tonic-gate */ 63977c478bd9Sstevel@tonic-gate end = soff + len; 63987c478bd9Sstevel@tonic-gate 63997c478bd9Sstevel@tonic-gate for (off = soff; off < end; off += PAGESIZE) { 64007c478bd9Sstevel@tonic-gate /* 64017c478bd9Sstevel@tonic-gate * Lookup each page by vp, offset. 64027c478bd9Sstevel@tonic-gate */ 64037c478bd9Sstevel@tonic-gate if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL) 64047c478bd9Sstevel@tonic-gate continue; 64057c478bd9Sstevel@tonic-gate 64067c478bd9Sstevel@tonic-gate /* 64077c478bd9Sstevel@tonic-gate * If this page does not need to be committed or is 64087c478bd9Sstevel@tonic-gate * modified, then just skip it. 64097c478bd9Sstevel@tonic-gate */ 64107c478bd9Sstevel@tonic-gate if (pp->p_fsdata == C_NOCOMMIT || hat_ismod(pp)) { 64117c478bd9Sstevel@tonic-gate page_unlock(pp); 64127c478bd9Sstevel@tonic-gate continue; 64137c478bd9Sstevel@tonic-gate } 64147c478bd9Sstevel@tonic-gate 64157c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp) == 0); 64167c478bd9Sstevel@tonic-gate 64177c478bd9Sstevel@tonic-gate /* 64187c478bd9Sstevel@tonic-gate * The page needs to be committed and we locked it. 64197c478bd9Sstevel@tonic-gate * Update the base and length parameters and add it 64207c478bd9Sstevel@tonic-gate * to r_pages. 64217c478bd9Sstevel@tonic-gate */ 64227c478bd9Sstevel@tonic-gate if (rp->r_commit.c_pages == NULL) { 64237c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase = (offset3)pp->p_offset; 64247c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = PAGESIZE; 64257c478bd9Sstevel@tonic-gate } else { 64267c478bd9Sstevel@tonic-gate rp->r_commit.c_commlen = (offset3)pp->p_offset - 64277c478bd9Sstevel@tonic-gate rp->r_commit.c_commbase + PAGESIZE; 64287c478bd9Sstevel@tonic-gate } 64297c478bd9Sstevel@tonic-gate page_add(&rp->r_commit.c_pages, pp); 64307c478bd9Sstevel@tonic-gate } 64317c478bd9Sstevel@tonic-gate } 64327c478bd9Sstevel@tonic-gate 64337c478bd9Sstevel@tonic-gate static int 64347c478bd9Sstevel@tonic-gate nfs3_putpage_commit(vnode_t *vp, offset_t poff, size_t plen, cred_t *cr) 64357c478bd9Sstevel@tonic-gate { 64367c478bd9Sstevel@tonic-gate int error; 64377c478bd9Sstevel@tonic-gate writeverf3 write_verf; 64387c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 64397c478bd9Sstevel@tonic-gate 6440108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 64417c478bd9Sstevel@tonic-gate /* 64427c478bd9Sstevel@tonic-gate * Flush the data portion of the file and then commit any 64437c478bd9Sstevel@tonic-gate * portions which need to be committed. This may need to 64447c478bd9Sstevel@tonic-gate * be done twice if the server has changed state since 64457c478bd9Sstevel@tonic-gate * data was last written. The data will need to be 64467c478bd9Sstevel@tonic-gate * rewritten to the server and then a new commit done. 64477c478bd9Sstevel@tonic-gate * 64487c478bd9Sstevel@tonic-gate * In fact, this may need to be done several times if the 64497c478bd9Sstevel@tonic-gate * server is having problems and crashing while we are 64507c478bd9Sstevel@tonic-gate * attempting to do this. 64517c478bd9Sstevel@tonic-gate */ 64527c478bd9Sstevel@tonic-gate 64537c478bd9Sstevel@tonic-gate top: 64547c478bd9Sstevel@tonic-gate /* 64557c478bd9Sstevel@tonic-gate * Do a flush based on the poff and plen arguments. This 64567c478bd9Sstevel@tonic-gate * will asynchronously write out any modified pages in the 64577c478bd9Sstevel@tonic-gate * range specified by (poff, plen). This starts all of the 64587c478bd9Sstevel@tonic-gate * i/o operations which will be waited for in the next 64597c478bd9Sstevel@tonic-gate * call to nfs3_putpage 64607c478bd9Sstevel@tonic-gate */ 64617c478bd9Sstevel@tonic-gate 64627c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 64637c478bd9Sstevel@tonic-gate write_verf = rp->r_verf; 64647c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 64657c478bd9Sstevel@tonic-gate 6466da6c28aaSamw error = nfs3_putpage(vp, poff, plen, B_ASYNC, cr, NULL); 64677c478bd9Sstevel@tonic-gate if (error == EAGAIN) 64687c478bd9Sstevel@tonic-gate error = 0; 64697c478bd9Sstevel@tonic-gate 64707c478bd9Sstevel@tonic-gate /* 64717c478bd9Sstevel@tonic-gate * Do a flush based on the poff and plen arguments. This 64727c478bd9Sstevel@tonic-gate * will synchronously write out any modified pages in the 64737c478bd9Sstevel@tonic-gate * range specified by (poff, plen) and wait until all of 64747c478bd9Sstevel@tonic-gate * the asynchronous i/o's in that range are done as well. 64757c478bd9Sstevel@tonic-gate */ 64767c478bd9Sstevel@tonic-gate if (!error) 6477da6c28aaSamw error = nfs3_putpage(vp, poff, plen, 0, cr, NULL); 64787c478bd9Sstevel@tonic-gate 64797c478bd9Sstevel@tonic-gate if (error) 64807c478bd9Sstevel@tonic-gate return (error); 64817c478bd9Sstevel@tonic-gate 64827c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 64837c478bd9Sstevel@tonic-gate if (rp->r_verf != write_verf) { 64847c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 64857c478bd9Sstevel@tonic-gate goto top; 64867c478bd9Sstevel@tonic-gate } 64877c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 64887c478bd9Sstevel@tonic-gate 64897c478bd9Sstevel@tonic-gate /* 64907c478bd9Sstevel@tonic-gate * Now commit any pages which might need to be committed. 64917c478bd9Sstevel@tonic-gate * If the error, NFS_VERF_MISMATCH, is returned, then 64927c478bd9Sstevel@tonic-gate * start over with the flush operation. 64937c478bd9Sstevel@tonic-gate */ 64947c478bd9Sstevel@tonic-gate 64957c478bd9Sstevel@tonic-gate error = nfs3_commit_vp(vp, poff, plen, cr); 64967c478bd9Sstevel@tonic-gate 64977c478bd9Sstevel@tonic-gate if (error == NFS_VERF_MISMATCH) 64987c478bd9Sstevel@tonic-gate goto top; 64997c478bd9Sstevel@tonic-gate 65007c478bd9Sstevel@tonic-gate return (error); 65017c478bd9Sstevel@tonic-gate } 65027c478bd9Sstevel@tonic-gate 65037c478bd9Sstevel@tonic-gate static int 65047c478bd9Sstevel@tonic-gate nfs3_commit_vp(vnode_t *vp, u_offset_t poff, size_t plen, cred_t *cr) 65057c478bd9Sstevel@tonic-gate { 65067c478bd9Sstevel@tonic-gate rnode_t *rp; 65077c478bd9Sstevel@tonic-gate page_t *plist; 65087c478bd9Sstevel@tonic-gate offset3 offset; 65097c478bd9Sstevel@tonic-gate count3 len; 65107c478bd9Sstevel@tonic-gate 65117c478bd9Sstevel@tonic-gate 65127c478bd9Sstevel@tonic-gate rp = VTOR(vp); 65137c478bd9Sstevel@tonic-gate 6514108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 65157c478bd9Sstevel@tonic-gate return (EIO); 65167c478bd9Sstevel@tonic-gate /* 65177c478bd9Sstevel@tonic-gate * Set the `commit inprogress' state bit. We must 65187c478bd9Sstevel@tonic-gate * first wait until any current one finishes. 65197c478bd9Sstevel@tonic-gate */ 65207c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 65217c478bd9Sstevel@tonic-gate while (rp->r_flags & RCOMMIT) { 65227c478bd9Sstevel@tonic-gate rp->r_flags |= RCOMMITWAIT; 65237c478bd9Sstevel@tonic-gate cv_wait(&rp->r_commit.c_cv, &rp->r_statelock); 65247c478bd9Sstevel@tonic-gate rp->r_flags &= ~RCOMMITWAIT; 65257c478bd9Sstevel@tonic-gate } 65267c478bd9Sstevel@tonic-gate rp->r_flags |= RCOMMIT; 65277c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 65287c478bd9Sstevel@tonic-gate 65297c478bd9Sstevel@tonic-gate /* 65307c478bd9Sstevel@tonic-gate * Gather together all of the pages which need to be 65317c478bd9Sstevel@tonic-gate * committed. 65327c478bd9Sstevel@tonic-gate */ 65337c478bd9Sstevel@tonic-gate if (plen == 0) 65347c478bd9Sstevel@tonic-gate nfs3_get_commit(vp); 65357c478bd9Sstevel@tonic-gate else 65367c478bd9Sstevel@tonic-gate nfs3_get_commit_range(vp, poff, plen); 65377c478bd9Sstevel@tonic-gate 65387c478bd9Sstevel@tonic-gate /* 65397c478bd9Sstevel@tonic-gate * Clear the `commit inprogress' bit and disconnect the 65407c478bd9Sstevel@tonic-gate * page list which was gathered together in nfs3_get_commit. 65417c478bd9Sstevel@tonic-gate */ 65427c478bd9Sstevel@tonic-gate plist = rp->r_commit.c_pages; 65437c478bd9Sstevel@tonic-gate rp->r_commit.c_pages = NULL; 65447c478bd9Sstevel@tonic-gate offset = rp->r_commit.c_commbase; 65457c478bd9Sstevel@tonic-gate len = rp->r_commit.c_commlen; 65467c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 65477c478bd9Sstevel@tonic-gate rp->r_flags &= ~RCOMMIT; 65487c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_commit.c_cv); 65497c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 65507c478bd9Sstevel@tonic-gate 65517c478bd9Sstevel@tonic-gate /* 65527c478bd9Sstevel@tonic-gate * If any pages need to be committed, commit them and 65537c478bd9Sstevel@tonic-gate * then unlock them so that they can be freed some 65547c478bd9Sstevel@tonic-gate * time later. 65557c478bd9Sstevel@tonic-gate */ 65567c478bd9Sstevel@tonic-gate if (plist != NULL) { 65577c478bd9Sstevel@tonic-gate /* 65587c478bd9Sstevel@tonic-gate * No error occurred during the flush portion 65597c478bd9Sstevel@tonic-gate * of this operation, so now attempt to commit 65607c478bd9Sstevel@tonic-gate * the data to stable storage on the server. 65617c478bd9Sstevel@tonic-gate * 65627c478bd9Sstevel@tonic-gate * This will unlock all of the pages on the list. 65637c478bd9Sstevel@tonic-gate */ 65647c478bd9Sstevel@tonic-gate return (nfs3_sync_commit(vp, plist, offset, len, cr)); 65657c478bd9Sstevel@tonic-gate } 65667c478bd9Sstevel@tonic-gate return (0); 65677c478bd9Sstevel@tonic-gate } 65687c478bd9Sstevel@tonic-gate 65697c478bd9Sstevel@tonic-gate static int 65707c478bd9Sstevel@tonic-gate nfs3_sync_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count, 65717c478bd9Sstevel@tonic-gate cred_t *cr) 65727c478bd9Sstevel@tonic-gate { 65737c478bd9Sstevel@tonic-gate int error; 65747c478bd9Sstevel@tonic-gate page_t *pp; 65757c478bd9Sstevel@tonic-gate 6576108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 65777c478bd9Sstevel@tonic-gate error = nfs3_commit(vp, offset, count, cr); 65787c478bd9Sstevel@tonic-gate 65797c478bd9Sstevel@tonic-gate /* 65807c478bd9Sstevel@tonic-gate * If we got an error, then just unlock all of the pages 65817c478bd9Sstevel@tonic-gate * on the list. 65827c478bd9Sstevel@tonic-gate */ 65837c478bd9Sstevel@tonic-gate if (error) { 65847c478bd9Sstevel@tonic-gate while (plist != NULL) { 65857c478bd9Sstevel@tonic-gate pp = plist; 65867c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 65877c478bd9Sstevel@tonic-gate page_unlock(pp); 65887c478bd9Sstevel@tonic-gate } 65897c478bd9Sstevel@tonic-gate return (error); 65907c478bd9Sstevel@tonic-gate } 65917c478bd9Sstevel@tonic-gate /* 65927c478bd9Sstevel@tonic-gate * We've tried as hard as we can to commit the data to stable 65937c478bd9Sstevel@tonic-gate * storage on the server. We just unlock the pages and clear 65947c478bd9Sstevel@tonic-gate * the commit required state. They will get freed later. 65957c478bd9Sstevel@tonic-gate */ 65967c478bd9Sstevel@tonic-gate while (plist != NULL) { 65977c478bd9Sstevel@tonic-gate pp = plist; 65987c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 65997c478bd9Sstevel@tonic-gate pp->p_fsdata = C_NOCOMMIT; 66007c478bd9Sstevel@tonic-gate page_unlock(pp); 66017c478bd9Sstevel@tonic-gate } 66027c478bd9Sstevel@tonic-gate 66037c478bd9Sstevel@tonic-gate return (error); 66047c478bd9Sstevel@tonic-gate } 66057c478bd9Sstevel@tonic-gate 66067c478bd9Sstevel@tonic-gate static void 66077c478bd9Sstevel@tonic-gate nfs3_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count, 66087c478bd9Sstevel@tonic-gate cred_t *cr) 66097c478bd9Sstevel@tonic-gate { 6610108322fbScarlsonj ASSERT(nfs_zone() == VTOMI(vp)->mi_zone); 66117c478bd9Sstevel@tonic-gate (void) nfs3_sync_commit(vp, plist, offset, count, cr); 66127c478bd9Sstevel@tonic-gate } 66137c478bd9Sstevel@tonic-gate 6614da6c28aaSamw /* ARGSUSED */ 66157c478bd9Sstevel@tonic-gate static int 6616da6c28aaSamw nfs3_setsecattr(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr, 6617da6c28aaSamw caller_context_t *ct) 66187c478bd9Sstevel@tonic-gate { 66197c478bd9Sstevel@tonic-gate int error; 66207c478bd9Sstevel@tonic-gate mntinfo_t *mi; 66217c478bd9Sstevel@tonic-gate 66227c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 66237c478bd9Sstevel@tonic-gate 6624108322fbScarlsonj if (nfs_zone() != mi->mi_zone) 66257c478bd9Sstevel@tonic-gate return (EIO); 66267c478bd9Sstevel@tonic-gate 66277c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_ACL) { 66287c478bd9Sstevel@tonic-gate error = acl_setacl3(vp, vsecattr, flag, cr); 66297c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_ACL) 66307c478bd9Sstevel@tonic-gate return (error); 66317c478bd9Sstevel@tonic-gate } 66327c478bd9Sstevel@tonic-gate 66337c478bd9Sstevel@tonic-gate return (ENOSYS); 66347c478bd9Sstevel@tonic-gate } 66357c478bd9Sstevel@tonic-gate 6636da6c28aaSamw /* ARGSUSED */ 66377c478bd9Sstevel@tonic-gate static int 6638da6c28aaSamw nfs3_getsecattr(vnode_t *vp, vsecattr_t *vsecattr, int flag, cred_t *cr, 6639da6c28aaSamw caller_context_t *ct) 66407c478bd9Sstevel@tonic-gate { 66417c478bd9Sstevel@tonic-gate int error; 66427c478bd9Sstevel@tonic-gate mntinfo_t *mi; 66437c478bd9Sstevel@tonic-gate 66447c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 66457c478bd9Sstevel@tonic-gate 6646108322fbScarlsonj if (nfs_zone() != mi->mi_zone) 66477c478bd9Sstevel@tonic-gate return (EIO); 66487c478bd9Sstevel@tonic-gate 66497c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_ACL) { 66507c478bd9Sstevel@tonic-gate error = acl_getacl3(vp, vsecattr, flag, cr); 66517c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_ACL) 66527c478bd9Sstevel@tonic-gate return (error); 66537c478bd9Sstevel@tonic-gate } 66547c478bd9Sstevel@tonic-gate 6655da6c28aaSamw return (fs_fab_acl(vp, vsecattr, flag, cr, ct)); 66567c478bd9Sstevel@tonic-gate } 66577c478bd9Sstevel@tonic-gate 6658da6c28aaSamw /* ARGSUSED */ 66597c478bd9Sstevel@tonic-gate static int 6660da6c28aaSamw nfs3_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr, 6661da6c28aaSamw caller_context_t *ct) 66627c478bd9Sstevel@tonic-gate { 66637c478bd9Sstevel@tonic-gate int error; 66647c478bd9Sstevel@tonic-gate struct shrlock nshr; 66657c478bd9Sstevel@tonic-gate struct nfs_owner nfs_owner; 66667c478bd9Sstevel@tonic-gate netobj lm_fh3; 66677c478bd9Sstevel@tonic-gate 6668108322fbScarlsonj if (nfs_zone() != VTOMI(vp)->mi_zone) 66697c478bd9Sstevel@tonic-gate return (EIO); 66707c478bd9Sstevel@tonic-gate 66717c478bd9Sstevel@tonic-gate /* 66727c478bd9Sstevel@tonic-gate * check for valid cmd parameter 66737c478bd9Sstevel@tonic-gate */ 66747c478bd9Sstevel@tonic-gate if (cmd != F_SHARE && cmd != F_UNSHARE && cmd != F_HASREMOTELOCKS) 66757c478bd9Sstevel@tonic-gate return (EINVAL); 66767c478bd9Sstevel@tonic-gate 66777c478bd9Sstevel@tonic-gate /* 66787c478bd9Sstevel@tonic-gate * Check access permissions 66797c478bd9Sstevel@tonic-gate */ 66807c478bd9Sstevel@tonic-gate if (cmd == F_SHARE && 66817c478bd9Sstevel@tonic-gate (((shr->s_access & F_RDACC) && !(flag & FREAD)) || 66827c478bd9Sstevel@tonic-gate ((shr->s_access & F_WRACC) && !(flag & FWRITE)))) 66837c478bd9Sstevel@tonic-gate return (EBADF); 66847c478bd9Sstevel@tonic-gate 66857c478bd9Sstevel@tonic-gate /* 66867c478bd9Sstevel@tonic-gate * If the filesystem is mounted using local locking, pass the 66877c478bd9Sstevel@tonic-gate * request off to the local share code. 66887c478bd9Sstevel@tonic-gate */ 66897c478bd9Sstevel@tonic-gate if (VTOMI(vp)->mi_flags & MI_LLOCK) 6690da6c28aaSamw return (fs_shrlock(vp, cmd, shr, flag, cr, ct)); 66917c478bd9Sstevel@tonic-gate 66927c478bd9Sstevel@tonic-gate switch (cmd) { 66937c478bd9Sstevel@tonic-gate case F_SHARE: 66947c478bd9Sstevel@tonic-gate case F_UNSHARE: 66957c478bd9Sstevel@tonic-gate lm_fh3.n_len = VTOFH3(vp)->fh3_length; 66967c478bd9Sstevel@tonic-gate lm_fh3.n_bytes = (char *)&(VTOFH3(vp)->fh3_u.data); 66977c478bd9Sstevel@tonic-gate 66987c478bd9Sstevel@tonic-gate /* 66997c478bd9Sstevel@tonic-gate * If passed an owner that is too large to fit in an 67007c478bd9Sstevel@tonic-gate * nfs_owner it is likely a recursive call from the 67017c478bd9Sstevel@tonic-gate * lock manager client and pass it straight through. If 67027c478bd9Sstevel@tonic-gate * it is not a nfs_owner then simply return an error. 67037c478bd9Sstevel@tonic-gate */ 67047c478bd9Sstevel@tonic-gate if (shr->s_own_len > sizeof (nfs_owner.lowner)) { 67057c478bd9Sstevel@tonic-gate if (((struct nfs_owner *)shr->s_owner)->magic != 67067c478bd9Sstevel@tonic-gate NFS_OWNER_MAGIC) 67077c478bd9Sstevel@tonic-gate return (EINVAL); 67087c478bd9Sstevel@tonic-gate 67097c478bd9Sstevel@tonic-gate if (error = lm4_shrlock(vp, cmd, shr, flag, &lm_fh3)) { 67107c478bd9Sstevel@tonic-gate error = set_errno(error); 67117c478bd9Sstevel@tonic-gate } 67127c478bd9Sstevel@tonic-gate return (error); 67137c478bd9Sstevel@tonic-gate } 67147c478bd9Sstevel@tonic-gate /* 67157c478bd9Sstevel@tonic-gate * Remote share reservations owner is a combination of 67167c478bd9Sstevel@tonic-gate * a magic number, hostname, and the local owner 67177c478bd9Sstevel@tonic-gate */ 67187c478bd9Sstevel@tonic-gate bzero(&nfs_owner, sizeof (nfs_owner)); 67197c478bd9Sstevel@tonic-gate nfs_owner.magic = NFS_OWNER_MAGIC; 67207c478bd9Sstevel@tonic-gate (void) strncpy(nfs_owner.hname, uts_nodename(), 67217c478bd9Sstevel@tonic-gate sizeof (nfs_owner.hname)); 67227c478bd9Sstevel@tonic-gate bcopy(shr->s_owner, nfs_owner.lowner, shr->s_own_len); 67237c478bd9Sstevel@tonic-gate nshr.s_access = shr->s_access; 67247c478bd9Sstevel@tonic-gate nshr.s_deny = shr->s_deny; 67257c478bd9Sstevel@tonic-gate nshr.s_sysid = 0; 67267c478bd9Sstevel@tonic-gate nshr.s_pid = ttoproc(curthread)->p_pid; 67277c478bd9Sstevel@tonic-gate nshr.s_own_len = sizeof (nfs_owner); 67287c478bd9Sstevel@tonic-gate nshr.s_owner = (caddr_t)&nfs_owner; 67297c478bd9Sstevel@tonic-gate 67307c478bd9Sstevel@tonic-gate if (error = lm4_shrlock(vp, cmd, &nshr, flag, &lm_fh3)) { 67317c478bd9Sstevel@tonic-gate error = set_errno(error); 67327c478bd9Sstevel@tonic-gate } 67337c478bd9Sstevel@tonic-gate 67347c478bd9Sstevel@tonic-gate break; 67357c478bd9Sstevel@tonic-gate 67367c478bd9Sstevel@tonic-gate case F_HASREMOTELOCKS: 67377c478bd9Sstevel@tonic-gate /* 67387c478bd9Sstevel@tonic-gate * NFS client can't store remote locks itself 67397c478bd9Sstevel@tonic-gate */ 67407c478bd9Sstevel@tonic-gate shr->s_access = 0; 67417c478bd9Sstevel@tonic-gate error = 0; 67427c478bd9Sstevel@tonic-gate break; 67437c478bd9Sstevel@tonic-gate 67447c478bd9Sstevel@tonic-gate default: 67457c478bd9Sstevel@tonic-gate error = EINVAL; 67467c478bd9Sstevel@tonic-gate break; 67477c478bd9Sstevel@tonic-gate } 67487c478bd9Sstevel@tonic-gate 67497c478bd9Sstevel@tonic-gate return (error); 67507c478bd9Sstevel@tonic-gate } 6751