1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #ifndef _NFS_RNODE_H 30 #define _NFS_RNODE_H 31 32 #include <sys/avl.h> 33 #include <sys/list.h> 34 #include <nfs/nfs.h> 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 typedef enum nfs_access_type { 41 NFS_ACCESS_UNKNOWN, 42 NFS_ACCESS_ALLOWED, 43 NFS_ACCESS_DENIED 44 } nfs_access_type_t; 45 46 typedef struct acache_hash { 47 struct acache *next; /* next and prev must be first */ 48 struct acache *prev; 49 krwlock_t lock; 50 } acache_hash_t; 51 52 typedef struct acache { 53 struct acache *next; /* next and prev must be first */ 54 struct acache *prev; 55 uint32_t known; 56 uint32_t allowed; 57 struct rnode *rnode; 58 cred_t *cred; 59 struct acache *list; 60 struct acache_hash *hashq; 61 } acache_t; 62 63 #define NFS_FHANDLE_LEN 72 64 65 typedef struct nfs_fhandle { 66 int fh_len; 67 char fh_buf[NFS_FHANDLE_LEN]; 68 } nfs_fhandle; 69 70 typedef struct rddir_cache { 71 lloff_t _cookie; /* cookie used to find this cache entry */ 72 lloff_t _ncookie; /* cookie used to find the next cache entry */ 73 char *entries; /* buffer containing dirent entries */ 74 int eof; /* EOF reached after this request */ 75 int entlen; /* size of dirent entries in buf */ 76 int buflen; /* size of the buffer used to store entries */ 77 int flags; /* control flags, see below */ 78 kcondvar_t cv; /* cv for blocking */ 79 int error; /* error from RPC operation */ 80 kmutex_t lock; 81 uint_t count; /* reference count */ 82 avl_node_t tree; /* AVL tree links */ 83 } rddir_cache; 84 85 #define nfs_cookie _cookie._p._l 86 #define nfs_ncookie _ncookie._p._l 87 #define nfs3_cookie _cookie._f 88 #define nfs3_ncookie _ncookie._f 89 90 #define RDDIR 0x1 /* readdir operation in progress */ 91 #define RDDIRWAIT 0x2 /* waiting on readdir in progress */ 92 #define RDDIRREQ 0x4 /* a new readdir is required */ 93 #define RDDIRCACHED 0x8 /* entry is in the cache */ 94 95 #define HAVE_RDDIR_CACHE(rp) (avl_numnodes(&(rp)->r_dir) > 0) 96 97 typedef struct symlink_cache { 98 char *contents; /* contents of the symbolic link */ 99 int len; /* length of the contents */ 100 int size; /* size of the allocated buffer */ 101 } symlink_cache; 102 103 typedef struct commit { 104 page_t *c_pages; /* list of pages to commit */ 105 offset3 c_commbase; /* base offset to do commit from */ 106 count3 c_commlen; /* len to commit */ 107 kcondvar_t c_cv; /* condvar for waiting for commit */ 108 } commit_t; 109 110 /* 111 * The various values for the commit states. These are stored in 112 * the p_fsdata byte in the page struct. 113 * NFSv3,4 can use asynchronous writes - the NFS server can send a response 114 * before storing the data to the stable store (disk). The response contains 115 * information if the data are on a disk or not. NFS client marks pages 116 * which are already on the stable store as C_NOCOMMIT. The pages which were 117 * sent but are not yet on the stable store are only partially 'safe' and are 118 * marked as C_DELAYCOMMIT, which can be later changed to C_COMMIT if the 119 * commit operation is in progress. If the NFS server is e.g. rebooted, the 120 * client needs to resend all the uncommitted data. The client walks all the 121 * vp->v_pages and if C_DELAYCOMMIT or C_COMMIT is set, the page is marked as 122 * dirty and thus will be written to the server again. 123 */ 124 #define C_NOCOMMIT 0 /* no commit is required */ 125 #define C_COMMIT 1 /* a commit is required so do it now */ 126 #define C_DELAYCOMMIT 2 /* a commit is required, but can be delayed */ 127 128 /* 129 * The lock manager holds state making it possible for the client 130 * and server to be out of sync. For example, if the response from 131 * the server granting a lock request is lost, the server will think 132 * the lock is granted and the client will think the lock is lost. 133 * To deal with this, a list of processes for which the client is 134 * not sure if the server holds a lock is attached to the rnode. 135 * When such a process closes the rnode, an unlock request is sent 136 * to the server to unlock the entire file. 137 * 138 * The list is kept as a singularly linked NULL terminated list. 139 * Because it is only added to under extreme error conditions, the 140 * list shouldn't get very big. DEBUG kernels print a console warning 141 * when the number of entries on a list go beyond nfs_lmpl_high_water 142 * an arbitrary number defined in nfs_add_locking_id() 143 */ 144 #define RLMPL_PID 1 145 #define RLMPL_OWNER 2 146 typedef struct lock_manager_pid_list { 147 int lmpl_type; 148 pid_t lmpl_pid; 149 union { 150 pid_t _pid; 151 struct { 152 int len; 153 char *owner; 154 } _own; 155 } un; 156 struct lock_manager_pid_list *lmpl_next; 157 } lmpl_t; 158 159 #define lmpl_opid un._pid 160 #define lmpl_own_len un._own.len 161 #define lmpl_owner un._own.owner 162 163 /* 164 * A homegrown reader/writer lock implementation. It addresses 165 * two requirements not addressed by the system primitives. They 166 * are that the `enter" operation is optionally interruptible and 167 * that they can be re`enter'ed by writers without deadlock. 168 */ 169 typedef struct nfs_rwlock { 170 int count; 171 int waiters; 172 kthread_t *owner; 173 kmutex_t lock; 174 kcondvar_t cv; 175 kcondvar_t cv_rd; 176 } nfs_rwlock_t; 177 178 /* 179 * The format of the hash bucket used to lookup rnodes from a file handle. 180 */ 181 typedef struct rhashq { 182 struct rnode *r_hashf; 183 struct rnode *r_hashb; 184 krwlock_t r_lock; 185 } rhashq_t; 186 187 /* 188 * Remote file information structure. 189 * 190 * The rnode is the "inode" for remote files. It contains all the 191 * information necessary to handle remote file on the client side. 192 * 193 * Note on file sizes: we keep two file sizes in the rnode: the size 194 * according to the client (r_size) and the size according to the server 195 * (r_attr.va_size). They can differ because we modify r_size during a 196 * write system call (nfs_rdwr), before the write request goes over the 197 * wire (before the file is actually modified on the server). If an OTW 198 * request occurs before the cached data is written to the server the file 199 * size returned from the server (r_attr.va_size) may not match r_size. 200 * r_size is the one we use, in general. r_attr.va_size is only used to 201 * determine whether or not our cached data is valid. 202 * 203 * Each rnode has 3 locks associated with it (not including the rnode 204 * hash table and free list locks): 205 * 206 * r_rwlock: Serializes nfs_write and nfs_setattr requests 207 * and allows nfs_read requests to proceed in parallel. 208 * Serializes reads/updates to directories. 209 * 210 * r_lkserlock: Serializes lock requests with map, write, and 211 * readahead operations. 212 * 213 * r_statelock: Protects all fields in the rnode except for 214 * those listed below. This lock is intented 215 * to be held for relatively short periods of 216 * time (not accross entire putpage operations, 217 * for example). 218 * 219 * The following members are protected by the mutex rpfreelist_lock: 220 * r_freef 221 * r_freeb 222 * 223 * The following members are protected by the hash bucket rwlock: 224 * r_hashf 225 * r_hashb 226 * 227 * Note: r_modaddr is only accessed when the r_statelock mutex is held. 228 * Its value is also controlled via r_rwlock. It is assumed that 229 * there will be only 1 writer active at a time, so it safe to 230 * set r_modaddr and release r_statelock as long as the r_rwlock 231 * writer lock is held. 232 * 233 * r_inmap informs nfsX_read()/write() that there is a call to nfsX_map() 234 * in progress. nfsX_read()/write() check r_inmap to decide whether 235 * to perform directio on the file or not. r_inmap is atomically 236 * incremented in nfsX_map() before the address space routines are 237 * called and atomically decremented just before nfsX_map() exits. 238 * r_inmap is not protected by any lock. 239 * 240 * r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0 241 * while the rnode has mapped pages. 242 * 243 * 64-bit offsets: the code formerly assumed that atomic reads of 244 * r_size were safe and reliable; on 32-bit architectures, this is 245 * not true since an intervening bus cycle from another processor 246 * could update half of the size field. The r_statelock must now 247 * be held whenever any kind of access of r_size is made. 248 * 249 * Lock ordering: 250 * r_rwlock > r_lkserlock > r_statelock 251 */ 252 struct exportinfo; /* defined in nfs/export.h */ 253 struct servinfo; /* defined in nfs/nfs_clnt.h */ 254 struct failinfo; /* defined in nfs/nfs_clnt.h */ 255 struct mntinfo; /* defined in nfs/nfs_clnt.h */ 256 257 #ifdef _KERNEL 258 259 typedef struct rnode { 260 /* the hash fields must be first to match the rhashq_t */ 261 struct rnode *r_hashf; /* hash queue forward pointer */ 262 struct rnode *r_hashb; /* hash queue back pointer */ 263 struct rnode *r_freef; /* free list forward pointer */ 264 struct rnode *r_freeb; /* free list back pointer */ 265 rhashq_t *r_hashq; /* pointer to the hash bucket */ 266 vnode_t *r_vnode; /* vnode for remote file */ 267 nfs_rwlock_t r_rwlock; /* serializes write/setattr requests */ 268 nfs_rwlock_t r_lkserlock; /* serialize lock with other ops */ 269 kmutex_t r_statelock; /* protects (most of) rnode contents */ 270 nfs_fhandle r_fh; /* file handle */ 271 struct servinfo *r_server; /* current server */ 272 char *r_path; /* path to this rnode */ 273 u_offset_t r_nextr; /* next byte read offset (read-ahead) */ 274 cred_t *r_cred; /* current credentials */ 275 cred_t *r_unlcred; /* unlinked credentials */ 276 char *r_unlname; /* unlinked file name */ 277 vnode_t *r_unldvp; /* parent dir of unlinked file */ 278 len_t r_size; /* client's view of file size */ 279 struct vattr r_attr; /* cached vnode attributes */ 280 hrtime_t r_attrtime; /* time attributes become invalid */ 281 hrtime_t r_mtime; /* client time file last modified */ 282 long r_mapcnt; /* count of mmapped pages */ 283 uint_t r_count; /* # of refs not reflect in v_count */ 284 uint_t r_awcount; /* # of outstanding async write */ 285 uint_t r_gcount; /* getattrs waiting to flush pages */ 286 ushort_t r_flags; /* flags, see below */ 287 short r_error; /* async write error */ 288 kcondvar_t r_cv; /* condvar for blocked threads */ 289 int (*r_putapage) /* address of putapage routine */ 290 (vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *); 291 avl_tree_t r_dir; /* cache of readdir responses */ 292 rddir_cache *r_direof; /* pointer to the EOF entry */ 293 symlink_cache r_symlink; /* cached readlink response */ 294 writeverf3 r_verf; /* version 3 write verifier */ 295 u_offset_t r_modaddr; /* address for page in writerp */ 296 commit_t r_commit; /* commit information */ 297 u_offset_t r_truncaddr; /* base for truncate operation */ 298 vsecattr_t *r_secattr; /* cached security attributes (acls) */ 299 cookieverf3 r_cookieverf; /* version 3 readdir cookie verifier */ 300 lmpl_t *r_lmpl; /* pids that may be holding locks */ 301 nfs3_pathconf_info *r_pathconf; /* cached pathconf information */ 302 acache_t *r_acache; /* list of access cache entries */ 303 kthread_t *r_serial; /* id of purging thread */ 304 list_t r_indelmap; /* list of delmap callers */ 305 uint_t r_inmap; /* to serialize read/write and mmap */ 306 } rnode_t; 307 #endif /* _KERNEL */ 308 309 /* 310 * Flags 311 */ 312 #define RREADDIRPLUS 0x1 /* issue a READDIRPLUS instead of READDIR */ 313 #define RDIRTY 0x2 /* dirty pages from write operation */ 314 #define RSTALE 0x4 /* file handle is stale */ 315 #define RMODINPROGRESS 0x8 /* page modification happening */ 316 #define RTRUNCATE 0x10 /* truncating, don't commit */ 317 #define RHAVEVERF 0x20 /* have a write verifier to compare against */ 318 #define RCOMMIT 0x40 /* commit in progress */ 319 #define RCOMMITWAIT 0x80 /* someone is waiting to do a commit */ 320 #define RHASHED 0x100 /* rnode is in hash queues */ 321 #define ROUTOFSPACE 0x200 /* an out of space error has happened */ 322 #define RDIRECTIO 0x400 /* bypass the buffer cache */ 323 #define RLOOKUP 0x800 /* a lookup has been performed */ 324 #define RWRITEATTR 0x1000 /* attributes came from WRITE */ 325 #define RINDNLCPURGE 0x2000 /* in the process of purging DNLC references */ 326 #define RDELMAPLIST 0x4000 /* delmap callers tracking for as callback */ 327 #define RINCACHEPURGE 0x8000 /* purging caches due to file size change */ 328 329 /* 330 * Convert between vnode and rnode 331 */ 332 #define RTOV(rp) ((rp)->r_vnode) 333 #define VTOR(vp) ((rnode_t *)((vp)->v_data)) 334 335 #define VTOFH(vp) (RTOFH(VTOR(vp))) 336 #define RTOFH(rp) ((fhandle_t *)(&(rp)->r_fh.fh_buf)) 337 #define VTOFH3(vp) (RTOFH3(VTOR(vp))) 338 #define RTOFH3(rp) ((nfs_fh3 *)(&(rp)->r_fh)) 339 340 #ifdef _KERNEL 341 extern int nfs_async_readahead(vnode_t *, u_offset_t, caddr_t, 342 struct seg *, cred_t *, 343 void (*)(vnode_t *, u_offset_t, 344 caddr_t, struct seg *, cred_t *)); 345 extern int nfs_async_putapage(vnode_t *, page_t *, u_offset_t, size_t, 346 int, cred_t *, int (*)(vnode_t *, page_t *, 347 u_offset_t, size_t, int, cred_t *)); 348 extern int nfs_async_pageio(vnode_t *, page_t *, u_offset_t, size_t, 349 int, cred_t *, int (*)(vnode_t *, page_t *, 350 u_offset_t, size_t, int, cred_t *)); 351 extern void nfs_async_readdir(vnode_t *, rddir_cache *, 352 cred_t *, int (*)(vnode_t *, 353 rddir_cache *, cred_t *)); 354 extern void nfs_async_commit(vnode_t *, page_t *, offset3, count3, 355 cred_t *, void (*)(vnode_t *, page_t *, 356 offset3, count3, cred_t *)); 357 extern void nfs_async_inactive(vnode_t *, cred_t *, void (*)(vnode_t *, 358 cred_t *, caller_context_t *)); 359 extern int writerp(rnode_t *, caddr_t, int, struct uio *, int); 360 extern int nfs_putpages(vnode_t *, u_offset_t, size_t, int, cred_t *); 361 extern void nfs_invalidate_pages(vnode_t *, u_offset_t, cred_t *); 362 extern int rfs2call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t, 363 xdrproc_t, caddr_t, cred_t *, int *, enum nfsstat *, 364 int, struct failinfo *); 365 extern int rfs3call(struct mntinfo *, rpcproc_t, xdrproc_t, caddr_t, 366 xdrproc_t, caddr_t, cred_t *, int *, nfsstat3 *, 367 int, struct failinfo *); 368 extern void nfs_setswaplike(vnode_t *, vattr_t *); 369 extern vnode_t *makenfsnode(fhandle_t *, struct nfsfattr *, struct vfs *, 370 hrtime_t, cred_t *, char *, char *); 371 extern vnode_t *makenfs3node_va(nfs_fh3 *, vattr_t *, struct vfs *, hrtime_t, 372 cred_t *, char *, char *); 373 extern vnode_t *makenfs3node(nfs_fh3 *, fattr3 *, struct vfs *, hrtime_t, 374 cred_t *, char *, char *); 375 extern void rp_addfree(rnode_t *, cred_t *); 376 extern void rp_rmhash(rnode_t *); 377 extern int check_rtable(struct vfs *); 378 extern void destroy_rtable(struct vfs *, cred_t *); 379 extern void rflush(struct vfs *, cred_t *); 380 extern nfs_access_type_t nfs_access_check(rnode_t *, uint32_t, cred_t *); 381 extern void nfs_access_cache(rnode_t *rp, uint32_t, uint32_t, cred_t *); 382 extern int nfs_access_purge_rp(rnode_t *); 383 extern int nfs_putapage(vnode_t *, page_t *, u_offset_t *, size_t *, 384 int, cred_t *); 385 extern int nfs3_putapage(vnode_t *, page_t *, u_offset_t *, size_t *, 386 int, cred_t *); 387 extern void nfs_printfhandle(nfs_fhandle *); 388 extern void nfs_write_error(vnode_t *, int, cred_t *); 389 extern rddir_cache *rddir_cache_alloc(int); 390 extern void rddir_cache_hold(rddir_cache *); 391 extern void rddir_cache_rele(rddir_cache *); 392 #ifdef DEBUG 393 extern char *rddir_cache_buf_alloc(size_t, int); 394 extern void rddir_cache_buf_free(void *, size_t); 395 #endif 396 extern int nfs_rw_enter_sig(nfs_rwlock_t *, krw_t, int); 397 extern int nfs_rw_tryenter(nfs_rwlock_t *, krw_t); 398 extern void nfs_rw_exit(nfs_rwlock_t *); 399 extern int nfs_rw_lock_held(nfs_rwlock_t *, krw_t); 400 extern void nfs_rw_init(nfs_rwlock_t *, char *, krw_type_t, void *); 401 extern void nfs_rw_destroy(nfs_rwlock_t *); 402 extern int nfs_directio(vnode_t *, int, cred_t *); 403 extern int nfs3_rddir_compar(const void *, const void *); 404 extern int nfs_rddir_compar(const void *, const void *); 405 extern struct zone *nfs_zone(void); 406 extern zoneid_t nfs_zoneid(void); 407 408 #endif 409 410 #ifdef __cplusplus 411 } 412 #endif 413 414 #endif /* _NFS_RNODE_H */ 415