1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/param.h> 41 #include <sys/t_lock.h> 42 #include <sys/errno.h> 43 #include <sys/cred.h> 44 #include <sys/user.h> 45 #include <sys/uio.h> 46 #include <sys/file.h> 47 #include <sys/pathname.h> 48 #include <sys/vfs.h> 49 #include <sys/vfs_opreg.h> 50 #include <sys/vnode.h> 51 #include <sys/rwstlock.h> 52 #include <sys/fem.h> 53 #include <sys/stat.h> 54 #include <sys/mode.h> 55 #include <sys/conf.h> 56 #include <sys/sysmacros.h> 57 #include <sys/cmn_err.h> 58 #include <sys/systm.h> 59 #include <sys/kmem.h> 60 #include <sys/debug.h> 61 #include <c2/audit.h> 62 #include <sys/acl.h> 63 #include <sys/nbmlock.h> 64 #include <sys/fcntl.h> 65 #include <fs/fs_subr.h> 66 #include <sys/taskq.h> 67 #include <fs/fs_reparse.h> 68 69 /* Determine if this vnode is a file that is read-only */ 70 #define ISROFILE(vp) \ 71 ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \ 72 (vp)->v_type != VFIFO && vn_is_readonly(vp)) 73 74 /* Tunable via /etc/system; used only by admin/install */ 75 int nfs_global_client_only; 76 77 /* 78 * Array of vopstats_t for per-FS-type vopstats. This array has the same 79 * number of entries as and parallel to the vfssw table. (Arguably, it could 80 * be part of the vfssw table.) Once it's initialized, it's accessed using 81 * the same fstype index that is used to index into the vfssw table. 82 */ 83 vopstats_t **vopstats_fstype; 84 85 /* vopstats initialization template used for fast initialization via bcopy() */ 86 static vopstats_t *vs_templatep; 87 88 /* Kmem cache handle for vsk_anchor_t allocations */ 89 kmem_cache_t *vsk_anchor_cache; 90 91 /* file events cleanup routine */ 92 extern void free_fopdata(vnode_t *); 93 94 /* 95 * Root of AVL tree for the kstats associated with vopstats. Lock protects 96 * updates to vsktat_tree. 97 */ 98 avl_tree_t vskstat_tree; 99 kmutex_t vskstat_tree_lock; 100 101 /* Global variable which enables/disables the vopstats collection */ 102 int vopstats_enabled = 1; 103 104 /* 105 * forward declarations for internal vnode specific data (vsd) 106 */ 107 static void *vsd_realloc(void *, size_t, size_t); 108 109 /* 110 * forward declarations for reparse point functions 111 */ 112 static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr); 113 114 /* 115 * VSD -- VNODE SPECIFIC DATA 116 * The v_data pointer is typically used by a file system to store a 117 * pointer to the file system's private node (e.g. ufs inode, nfs rnode). 118 * However, there are times when additional project private data needs 119 * to be stored separately from the data (node) pointed to by v_data. 120 * This additional data could be stored by the file system itself or 121 * by a completely different kernel entity. VSD provides a way for 122 * callers to obtain a key and store a pointer to private data associated 123 * with a vnode. 124 * 125 * Callers are responsible for protecting the vsd by holding v_vsd_lock 126 * for calls to vsd_set() and vsd_get(). 127 */ 128 129 /* 130 * vsd_lock protects: 131 * vsd_nkeys - creation and deletion of vsd keys 132 * vsd_list - insertion and deletion of vsd_node in the vsd_list 133 * vsd_destructor - adding and removing destructors to the list 134 */ 135 static kmutex_t vsd_lock; 136 static uint_t vsd_nkeys; /* size of destructor array */ 137 /* list of vsd_node's */ 138 static list_t *vsd_list = NULL; 139 /* per-key destructor funcs */ 140 static void (**vsd_destructor)(void *); 141 142 /* 143 * The following is the common set of actions needed to update the 144 * vopstats structure from a vnode op. Both VOPSTATS_UPDATE() and 145 * VOPSTATS_UPDATE_IO() do almost the same thing, except for the 146 * recording of the bytes transferred. Since the code is similar 147 * but small, it is nearly a duplicate. Consequently any changes 148 * to one may need to be reflected in the other. 149 * Rundown of the variables: 150 * vp - Pointer to the vnode 151 * counter - Partial name structure member to update in vopstats for counts 152 * bytecounter - Partial name structure member to update in vopstats for bytes 153 * bytesval - Value to update in vopstats for bytes 154 * fstype - Index into vsanchor_fstype[], same as index into vfssw[] 155 * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i]) 156 */ 157 158 #define VOPSTATS_UPDATE(vp, counter) { \ 159 vfs_t *vfsp = (vp)->v_vfsp; \ 160 if (vfsp && vfsp->vfs_implp && \ 161 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \ 162 vopstats_t *vsp = &vfsp->vfs_vopstats; \ 163 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \ 164 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \ 165 size_t, uint64_t *); \ 166 __dtrace_probe___fsinfo_##counter(vp, 0, stataddr); \ 167 (*stataddr)++; \ 168 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \ 169 vsp->n##counter.value.ui64++; \ 170 } \ 171 } \ 172 } 173 174 #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) { \ 175 vfs_t *vfsp = (vp)->v_vfsp; \ 176 if (vfsp && vfsp->vfs_implp && \ 177 (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) { \ 178 vopstats_t *vsp = &vfsp->vfs_vopstats; \ 179 uint64_t *stataddr = &(vsp->n##counter.value.ui64); \ 180 extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \ 181 size_t, uint64_t *); \ 182 __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \ 183 (*stataddr)++; \ 184 vsp->bytecounter.value.ui64 += bytesval; \ 185 if ((vsp = vfsp->vfs_fstypevsp) != NULL) { \ 186 vsp->n##counter.value.ui64++; \ 187 vsp->bytecounter.value.ui64 += bytesval; \ 188 } \ 189 } \ 190 } 191 192 /* 193 * If the filesystem does not support XIDs map credential 194 * If the vfsp is NULL, perhaps we should also map? 195 */ 196 #define VOPXID_MAP_CR(vp, cr) { \ 197 vfs_t *vfsp = (vp)->v_vfsp; \ 198 if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0) \ 199 cr = crgetmapped(cr); \ 200 } 201 202 /* 203 * Convert stat(2) formats to vnode types and vice versa. (Knows about 204 * numerical order of S_IFMT and vnode types.) 205 */ 206 enum vtype iftovt_tab[] = { 207 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 208 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON 209 }; 210 211 ushort_t vttoif_tab[] = { 212 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 213 S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0 214 }; 215 216 /* 217 * The system vnode cache. 218 */ 219 220 kmem_cache_t *vn_cache; 221 222 223 /* 224 * Vnode operations vector. 225 */ 226 227 static const fs_operation_trans_def_t vn_ops_table[] = { 228 VOPNAME_OPEN, offsetof(struct vnodeops, vop_open), 229 fs_nosys, fs_nosys, 230 231 VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close), 232 fs_nosys, fs_nosys, 233 234 VOPNAME_READ, offsetof(struct vnodeops, vop_read), 235 fs_nosys, fs_nosys, 236 237 VOPNAME_WRITE, offsetof(struct vnodeops, vop_write), 238 fs_nosys, fs_nosys, 239 240 VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl), 241 fs_nosys, fs_nosys, 242 243 VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl), 244 fs_setfl, fs_nosys, 245 246 VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr), 247 fs_nosys, fs_nosys, 248 249 VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr), 250 fs_nosys, fs_nosys, 251 252 VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access), 253 fs_nosys, fs_nosys, 254 255 VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup), 256 fs_nosys, fs_nosys, 257 258 VOPNAME_CREATE, offsetof(struct vnodeops, vop_create), 259 fs_nosys, fs_nosys, 260 261 VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove), 262 fs_nosys, fs_nosys, 263 264 VOPNAME_LINK, offsetof(struct vnodeops, vop_link), 265 fs_nosys, fs_nosys, 266 267 VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename), 268 fs_nosys, fs_nosys, 269 270 VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir), 271 fs_nosys, fs_nosys, 272 273 VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir), 274 fs_nosys, fs_nosys, 275 276 VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir), 277 fs_nosys, fs_nosys, 278 279 VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink), 280 fs_nosys, fs_nosys, 281 282 VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink), 283 fs_nosys, fs_nosys, 284 285 VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync), 286 fs_nosys, fs_nosys, 287 288 VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive), 289 fs_nosys, fs_nosys, 290 291 VOPNAME_FID, offsetof(struct vnodeops, vop_fid), 292 fs_nosys, fs_nosys, 293 294 VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock), 295 fs_rwlock, fs_rwlock, 296 297 VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock), 298 (fs_generic_func_p) fs_rwunlock, 299 (fs_generic_func_p) fs_rwunlock, /* no errors allowed */ 300 301 VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek), 302 fs_nosys, fs_nosys, 303 304 VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp), 305 fs_cmp, fs_cmp, /* no errors allowed */ 306 307 VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock), 308 fs_frlock, fs_nosys, 309 310 VOPNAME_SPACE, offsetof(struct vnodeops, vop_space), 311 fs_nosys, fs_nosys, 312 313 VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp), 314 fs_nosys, fs_nosys, 315 316 VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage), 317 fs_nosys, fs_nosys, 318 319 VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage), 320 fs_nosys, fs_nosys, 321 322 VOPNAME_MAP, offsetof(struct vnodeops, vop_map), 323 (fs_generic_func_p) fs_nosys_map, 324 (fs_generic_func_p) fs_nosys_map, 325 326 VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap), 327 (fs_generic_func_p) fs_nosys_addmap, 328 (fs_generic_func_p) fs_nosys_addmap, 329 330 VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap), 331 fs_nosys, fs_nosys, 332 333 VOPNAME_POLL, offsetof(struct vnodeops, vop_poll), 334 (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll, 335 336 VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump), 337 fs_nosys, fs_nosys, 338 339 VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf), 340 fs_pathconf, fs_nosys, 341 342 VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio), 343 fs_nosys, fs_nosys, 344 345 VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl), 346 fs_nosys, fs_nosys, 347 348 VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose), 349 (fs_generic_func_p) fs_dispose, 350 (fs_generic_func_p) fs_nodispose, 351 352 VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr), 353 fs_nosys, fs_nosys, 354 355 VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr), 356 fs_fab_acl, fs_nosys, 357 358 VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock), 359 fs_shrlock, fs_nosys, 360 361 VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent), 362 (fs_generic_func_p) fs_vnevent_nosupport, 363 (fs_generic_func_p) fs_vnevent_nosupport, 364 365 VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf), 366 fs_nosys, fs_nosys, 367 368 VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf), 369 fs_nosys, fs_nosys, 370 371 NULL, 0, NULL, NULL 372 }; 373 374 /* Extensible attribute (xva) routines. */ 375 376 /* 377 * Zero out the structure, set the size of the requested/returned bitmaps, 378 * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer 379 * to the returned attributes array. 380 */ 381 void 382 xva_init(xvattr_t *xvap) 383 { 384 bzero(xvap, sizeof (xvattr_t)); 385 xvap->xva_mapsize = XVA_MAPSIZE; 386 xvap->xva_magic = XVA_MAGIC; 387 xvap->xva_vattr.va_mask = AT_XVATTR; 388 xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0]; 389 } 390 391 /* 392 * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t 393 * structure. Otherwise, returns NULL. 394 */ 395 xoptattr_t * 396 xva_getxoptattr(xvattr_t *xvap) 397 { 398 xoptattr_t *xoap = NULL; 399 if (xvap->xva_vattr.va_mask & AT_XVATTR) 400 xoap = &xvap->xva_xoptattrs; 401 return (xoap); 402 } 403 404 /* 405 * Used by the AVL routines to compare two vsk_anchor_t structures in the tree. 406 * We use the f_fsid reported by VFS_STATVFS() since we use that for the 407 * kstat name. 408 */ 409 static int 410 vska_compar(const void *n1, const void *n2) 411 { 412 int ret; 413 ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid; 414 ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid; 415 416 if (p1 < p2) { 417 ret = -1; 418 } else if (p1 > p2) { 419 ret = 1; 420 } else { 421 ret = 0; 422 } 423 424 return (ret); 425 } 426 427 /* 428 * Used to create a single template which will be bcopy()ed to a newly 429 * allocated vsanchor_combo_t structure in new_vsanchor(), below. 430 */ 431 static vopstats_t * 432 create_vopstats_template() 433 { 434 vopstats_t *vsp; 435 436 vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP); 437 bzero(vsp, sizeof (*vsp)); /* Start fresh */ 438 439 /* VOP_OPEN */ 440 kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64); 441 /* VOP_CLOSE */ 442 kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64); 443 /* VOP_READ I/O */ 444 kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64); 445 kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64); 446 /* VOP_WRITE I/O */ 447 kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64); 448 kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64); 449 /* VOP_IOCTL */ 450 kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64); 451 /* VOP_SETFL */ 452 kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64); 453 /* VOP_GETATTR */ 454 kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64); 455 /* VOP_SETATTR */ 456 kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64); 457 /* VOP_ACCESS */ 458 kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64); 459 /* VOP_LOOKUP */ 460 kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64); 461 /* VOP_CREATE */ 462 kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64); 463 /* VOP_REMOVE */ 464 kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64); 465 /* VOP_LINK */ 466 kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64); 467 /* VOP_RENAME */ 468 kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64); 469 /* VOP_MKDIR */ 470 kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64); 471 /* VOP_RMDIR */ 472 kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64); 473 /* VOP_READDIR I/O */ 474 kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64); 475 kstat_named_init(&vsp->readdir_bytes, "readdir_bytes", 476 KSTAT_DATA_UINT64); 477 /* VOP_SYMLINK */ 478 kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64); 479 /* VOP_READLINK */ 480 kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64); 481 /* VOP_FSYNC */ 482 kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64); 483 /* VOP_INACTIVE */ 484 kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64); 485 /* VOP_FID */ 486 kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64); 487 /* VOP_RWLOCK */ 488 kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64); 489 /* VOP_RWUNLOCK */ 490 kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64); 491 /* VOP_SEEK */ 492 kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64); 493 /* VOP_CMP */ 494 kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64); 495 /* VOP_FRLOCK */ 496 kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64); 497 /* VOP_SPACE */ 498 kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64); 499 /* VOP_REALVP */ 500 kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64); 501 /* VOP_GETPAGE */ 502 kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64); 503 /* VOP_PUTPAGE */ 504 kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64); 505 /* VOP_MAP */ 506 kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64); 507 /* VOP_ADDMAP */ 508 kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64); 509 /* VOP_DELMAP */ 510 kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64); 511 /* VOP_POLL */ 512 kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64); 513 /* VOP_DUMP */ 514 kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64); 515 /* VOP_PATHCONF */ 516 kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64); 517 /* VOP_PAGEIO */ 518 kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64); 519 /* VOP_DUMPCTL */ 520 kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64); 521 /* VOP_DISPOSE */ 522 kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64); 523 /* VOP_SETSECATTR */ 524 kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64); 525 /* VOP_GETSECATTR */ 526 kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64); 527 /* VOP_SHRLOCK */ 528 kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64); 529 /* VOP_VNEVENT */ 530 kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64); 531 /* VOP_REQZCBUF */ 532 kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64); 533 /* VOP_RETZCBUF */ 534 kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64); 535 536 return (vsp); 537 } 538 539 /* 540 * Creates a kstat structure associated with a vopstats structure. 541 */ 542 kstat_t * 543 new_vskstat(char *ksname, vopstats_t *vsp) 544 { 545 kstat_t *ksp; 546 547 if (!vopstats_enabled) { 548 return (NULL); 549 } 550 551 ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED, 552 sizeof (vopstats_t)/sizeof (kstat_named_t), 553 KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE); 554 if (ksp) { 555 ksp->ks_data = vsp; 556 kstat_install(ksp); 557 } 558 559 return (ksp); 560 } 561 562 /* 563 * Called from vfsinit() to initialize the support mechanisms for vopstats 564 */ 565 void 566 vopstats_startup() 567 { 568 if (!vopstats_enabled) 569 return; 570 571 /* 572 * Creates the AVL tree which holds per-vfs vopstat anchors. This 573 * is necessary since we need to check if a kstat exists before we 574 * attempt to create it. Also, initialize its lock. 575 */ 576 avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t), 577 offsetof(vsk_anchor_t, vsk_node)); 578 mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL); 579 580 vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache", 581 sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL, 582 NULL, NULL, 0); 583 584 /* 585 * Set up the array of pointers for the vopstats-by-FS-type. 586 * The entries will be allocated/initialized as each file system 587 * goes through modload/mod_installfs. 588 */ 589 vopstats_fstype = (vopstats_t **)kmem_zalloc( 590 (sizeof (vopstats_t *) * nfstype), KM_SLEEP); 591 592 /* Set up the global vopstats initialization template */ 593 vs_templatep = create_vopstats_template(); 594 } 595 596 /* 597 * We need to have the all of the counters zeroed. 598 * The initialization of the vopstats_t includes on the order of 599 * 50 calls to kstat_named_init(). Rather that do that on every call, 600 * we do it once in a template (vs_templatep) then bcopy it over. 601 */ 602 void 603 initialize_vopstats(vopstats_t *vsp) 604 { 605 if (vsp == NULL) 606 return; 607 608 bcopy(vs_templatep, vsp, sizeof (vopstats_t)); 609 } 610 611 /* 612 * If possible, determine which vopstats by fstype to use and 613 * return a pointer to the caller. 614 */ 615 vopstats_t * 616 get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp) 617 { 618 int fstype = 0; /* Index into vfssw[] */ 619 vopstats_t *vsp = NULL; 620 621 if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 || 622 !vopstats_enabled) 623 return (NULL); 624 /* 625 * Set up the fstype. We go to so much trouble because all versions 626 * of NFS use the same fstype in their vfs even though they have 627 * distinct entries in the vfssw[] table. 628 * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry. 629 */ 630 if (vswp) { 631 fstype = vswp - vfssw; /* Gets us the index */ 632 } else { 633 fstype = vfsp->vfs_fstype; 634 } 635 636 /* 637 * Point to the per-fstype vopstats. The only valid values are 638 * non-zero positive values less than the number of vfssw[] table 639 * entries. 640 */ 641 if (fstype > 0 && fstype < nfstype) { 642 vsp = vopstats_fstype[fstype]; 643 } 644 645 return (vsp); 646 } 647 648 /* 649 * Generate a kstat name, create the kstat structure, and allocate a 650 * vsk_anchor_t to hold it together. Return the pointer to the vsk_anchor_t 651 * to the caller. This must only be called from a mount. 652 */ 653 vsk_anchor_t * 654 get_vskstat_anchor(vfs_t *vfsp) 655 { 656 char kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */ 657 statvfs64_t statvfsbuf; /* Needed to find f_fsid */ 658 vsk_anchor_t *vskp = NULL; /* vfs <--> kstat anchor */ 659 kstat_t *ksp; /* Ptr to new kstat */ 660 avl_index_t where; /* Location in the AVL tree */ 661 662 if (vfsp == NULL || vfsp->vfs_implp == NULL || 663 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled) 664 return (NULL); 665 666 /* Need to get the fsid to build a kstat name */ 667 if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) { 668 /* Create a name for our kstats based on fsid */ 669 (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx", 670 VOPSTATS_STR, statvfsbuf.f_fsid); 671 672 /* Allocate and initialize the vsk_anchor_t */ 673 vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP); 674 bzero(vskp, sizeof (*vskp)); 675 vskp->vsk_fsid = statvfsbuf.f_fsid; 676 677 mutex_enter(&vskstat_tree_lock); 678 if (avl_find(&vskstat_tree, vskp, &where) == NULL) { 679 avl_insert(&vskstat_tree, vskp, where); 680 mutex_exit(&vskstat_tree_lock); 681 682 /* 683 * Now that we've got the anchor in the AVL 684 * tree, we can create the kstat. 685 */ 686 ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats); 687 if (ksp) { 688 vskp->vsk_ksp = ksp; 689 } 690 } else { 691 /* Oops, found one! Release memory and lock. */ 692 mutex_exit(&vskstat_tree_lock); 693 kmem_cache_free(vsk_anchor_cache, vskp); 694 vskp = NULL; 695 } 696 } 697 return (vskp); 698 } 699 700 /* 701 * We're in the process of tearing down the vfs and need to cleanup 702 * the data structures associated with the vopstats. Must only be called 703 * from dounmount(). 704 */ 705 void 706 teardown_vopstats(vfs_t *vfsp) 707 { 708 vsk_anchor_t *vskap; 709 avl_index_t where; 710 711 if (vfsp == NULL || vfsp->vfs_implp == NULL || 712 (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled) 713 return; 714 715 /* This is a safe check since VFS_STATS must be set (see above) */ 716 if ((vskap = vfsp->vfs_vskap) == NULL) 717 return; 718 719 /* Whack the pointer right away */ 720 vfsp->vfs_vskap = NULL; 721 722 /* Lock the tree, remove the node, and delete the kstat */ 723 mutex_enter(&vskstat_tree_lock); 724 if (avl_find(&vskstat_tree, vskap, &where)) { 725 avl_remove(&vskstat_tree, vskap); 726 } 727 728 if (vskap->vsk_ksp) { 729 kstat_delete(vskap->vsk_ksp); 730 } 731 mutex_exit(&vskstat_tree_lock); 732 733 kmem_cache_free(vsk_anchor_cache, vskap); 734 } 735 736 /* 737 * Read or write a vnode. Called from kernel code. 738 */ 739 int 740 vn_rdwr( 741 enum uio_rw rw, 742 struct vnode *vp, 743 caddr_t base, 744 ssize_t len, 745 offset_t offset, 746 enum uio_seg seg, 747 int ioflag, 748 rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */ 749 cred_t *cr, 750 ssize_t *residp) 751 { 752 struct uio uio; 753 struct iovec iov; 754 int error; 755 int in_crit = 0; 756 757 if (rw == UIO_WRITE && ISROFILE(vp)) 758 return (EROFS); 759 760 if (len < 0) 761 return (EIO); 762 763 VOPXID_MAP_CR(vp, cr); 764 765 iov.iov_base = base; 766 iov.iov_len = len; 767 uio.uio_iov = &iov; 768 uio.uio_iovcnt = 1; 769 uio.uio_loffset = offset; 770 uio.uio_segflg = (short)seg; 771 uio.uio_resid = len; 772 uio.uio_llimit = ulimit; 773 774 /* 775 * We have to enter the critical region before calling VOP_RWLOCK 776 * to avoid a deadlock with ufs. 777 */ 778 if (nbl_need_check(vp)) { 779 int svmand; 780 781 nbl_start_crit(vp, RW_READER); 782 in_crit = 1; 783 error = nbl_svmand(vp, cr, &svmand); 784 if (error != 0) 785 goto done; 786 if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ, 787 uio.uio_offset, uio.uio_resid, svmand, NULL)) { 788 error = EACCES; 789 goto done; 790 } 791 } 792 793 (void) VOP_RWLOCK(vp, 794 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); 795 if (rw == UIO_WRITE) { 796 uio.uio_fmode = FWRITE; 797 uio.uio_extflg = UIO_COPY_DEFAULT; 798 error = VOP_WRITE(vp, &uio, ioflag, cr, NULL); 799 } else { 800 uio.uio_fmode = FREAD; 801 uio.uio_extflg = UIO_COPY_CACHED; 802 error = VOP_READ(vp, &uio, ioflag, cr, NULL); 803 } 804 VOP_RWUNLOCK(vp, 805 rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); 806 if (residp) 807 *residp = uio.uio_resid; 808 else if (uio.uio_resid) 809 error = EIO; 810 811 done: 812 if (in_crit) 813 nbl_end_crit(vp); 814 return (error); 815 } 816 817 /* 818 * Release a vnode. Call VOP_INACTIVE on last reference or 819 * decrement reference count. 820 * 821 * To avoid race conditions, the v_count is left at 1 for 822 * the call to VOP_INACTIVE. This prevents another thread 823 * from reclaiming and releasing the vnode *before* the 824 * VOP_INACTIVE routine has a chance to destroy the vnode. 825 * We can't have more than 1 thread calling VOP_INACTIVE 826 * on a vnode. 827 */ 828 void 829 vn_rele(vnode_t *vp) 830 { 831 VERIFY(vp->v_count > 0); 832 mutex_enter(&vp->v_lock); 833 if (vp->v_count == 1) { 834 mutex_exit(&vp->v_lock); 835 VOP_INACTIVE(vp, CRED(), NULL); 836 return; 837 } 838 vp->v_count--; 839 mutex_exit(&vp->v_lock); 840 } 841 842 /* 843 * Release a vnode referenced by the DNLC. Multiple DNLC references are treated 844 * as a single reference, so v_count is not decremented until the last DNLC hold 845 * is released. This makes it possible to distinguish vnodes that are referenced 846 * only by the DNLC. 847 */ 848 void 849 vn_rele_dnlc(vnode_t *vp) 850 { 851 VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0)); 852 mutex_enter(&vp->v_lock); 853 if (--vp->v_count_dnlc == 0) { 854 if (vp->v_count == 1) { 855 mutex_exit(&vp->v_lock); 856 VOP_INACTIVE(vp, CRED(), NULL); 857 return; 858 } 859 vp->v_count--; 860 } 861 mutex_exit(&vp->v_lock); 862 } 863 864 /* 865 * Like vn_rele() except that it clears v_stream under v_lock. 866 * This is used by sockfs when it dismantels the association between 867 * the sockfs node and the vnode in the underlaying file system. 868 * v_lock has to be held to prevent a thread coming through the lookupname 869 * path from accessing a stream head that is going away. 870 */ 871 void 872 vn_rele_stream(vnode_t *vp) 873 { 874 VERIFY(vp->v_count > 0); 875 mutex_enter(&vp->v_lock); 876 vp->v_stream = NULL; 877 if (vp->v_count == 1) { 878 mutex_exit(&vp->v_lock); 879 VOP_INACTIVE(vp, CRED(), NULL); 880 return; 881 } 882 vp->v_count--; 883 mutex_exit(&vp->v_lock); 884 } 885 886 static void 887 vn_rele_inactive(vnode_t *vp) 888 { 889 VOP_INACTIVE(vp, CRED(), NULL); 890 } 891 892 /* 893 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it 894 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering 895 * the file system as a result of releasing the vnode. Note, file systems 896 * already have to handle the race where the vnode is incremented before the 897 * inactive routine is called and does its locking. 898 * 899 * Warning: Excessive use of this routine can lead to performance problems. 900 * This is because taskqs throttle back allocation if too many are created. 901 */ 902 void 903 vn_rele_async(vnode_t *vp, taskq_t *taskq) 904 { 905 VERIFY(vp->v_count > 0); 906 mutex_enter(&vp->v_lock); 907 if (vp->v_count == 1) { 908 mutex_exit(&vp->v_lock); 909 VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive, 910 vp, TQ_SLEEP) != NULL); 911 return; 912 } 913 vp->v_count--; 914 mutex_exit(&vp->v_lock); 915 } 916 917 int 918 vn_open( 919 char *pnamep, 920 enum uio_seg seg, 921 int filemode, 922 int createmode, 923 struct vnode **vpp, 924 enum create crwhy, 925 mode_t umask) 926 { 927 return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy, 928 umask, NULL, -1)); 929 } 930 931 932 /* 933 * Open/create a vnode. 934 * This may be callable by the kernel, the only known use 935 * of user context being that the current user credentials 936 * are used for permissions. crwhy is defined iff filemode & FCREAT. 937 */ 938 int 939 vn_openat( 940 char *pnamep, 941 enum uio_seg seg, 942 int filemode, 943 int createmode, 944 struct vnode **vpp, 945 enum create crwhy, 946 mode_t umask, 947 struct vnode *startvp, 948 int fd) 949 { 950 struct vnode *vp; 951 int mode; 952 int accessflags; 953 int error; 954 int in_crit = 0; 955 int open_done = 0; 956 int shrlock_done = 0; 957 struct vattr vattr; 958 enum symfollow follow; 959 int estale_retry = 0; 960 struct shrlock shr; 961 struct shr_locowner shr_own; 962 963 mode = 0; 964 accessflags = 0; 965 if (filemode & FREAD) 966 mode |= VREAD; 967 if (filemode & (FWRITE|FTRUNC)) 968 mode |= VWRITE; 969 if (filemode & FXATTRDIROPEN) 970 mode |= VEXEC; 971 972 /* symlink interpretation */ 973 if (filemode & FNOFOLLOW) 974 follow = NO_FOLLOW; 975 else 976 follow = FOLLOW; 977 978 if (filemode & FAPPEND) 979 accessflags |= V_APPEND; 980 981 top: 982 if (filemode & FCREAT) { 983 enum vcexcl excl; 984 985 /* 986 * Wish to create a file. 987 */ 988 vattr.va_type = VREG; 989 vattr.va_mode = createmode; 990 vattr.va_mask = AT_TYPE|AT_MODE; 991 if (filemode & FTRUNC) { 992 vattr.va_size = 0; 993 vattr.va_mask |= AT_SIZE; 994 } 995 if (filemode & FEXCL) 996 excl = EXCL; 997 else 998 excl = NONEXCL; 999 1000 if (error = 1001 vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy, 1002 (filemode & ~(FTRUNC|FEXCL)), umask, startvp)) 1003 return (error); 1004 } else { 1005 /* 1006 * Wish to open a file. Just look it up. 1007 */ 1008 if (error = lookupnameat(pnamep, seg, follow, 1009 NULLVPP, &vp, startvp)) { 1010 if ((error == ESTALE) && 1011 fs_need_estale_retry(estale_retry++)) 1012 goto top; 1013 return (error); 1014 } 1015 1016 /* 1017 * Get the attributes to check whether file is large. 1018 * We do this only if the FOFFMAX flag is not set and 1019 * only for regular files. 1020 */ 1021 1022 if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) { 1023 vattr.va_mask = AT_SIZE; 1024 if ((error = VOP_GETATTR(vp, &vattr, 0, 1025 CRED(), NULL))) { 1026 goto out; 1027 } 1028 if (vattr.va_size > (u_offset_t)MAXOFF32_T) { 1029 /* 1030 * Large File API - regular open fails 1031 * if FOFFMAX flag is set in file mode 1032 */ 1033 error = EOVERFLOW; 1034 goto out; 1035 } 1036 } 1037 /* 1038 * Can't write directories, active texts, or 1039 * read-only filesystems. Can't truncate files 1040 * on which mandatory locking is in effect. 1041 */ 1042 if (filemode & (FWRITE|FTRUNC)) { 1043 /* 1044 * Allow writable directory if VDIROPEN flag is set. 1045 */ 1046 if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) { 1047 error = EISDIR; 1048 goto out; 1049 } 1050 if (ISROFILE(vp)) { 1051 error = EROFS; 1052 goto out; 1053 } 1054 /* 1055 * Can't truncate files on which 1056 * sysv mandatory locking is in effect. 1057 */ 1058 if (filemode & FTRUNC) { 1059 vnode_t *rvp; 1060 1061 if (VOP_REALVP(vp, &rvp, NULL) != 0) 1062 rvp = vp; 1063 if (rvp->v_filocks != NULL) { 1064 vattr.va_mask = AT_MODE; 1065 if ((error = VOP_GETATTR(vp, 1066 &vattr, 0, CRED(), NULL)) == 0 && 1067 MANDLOCK(vp, vattr.va_mode)) 1068 error = EAGAIN; 1069 } 1070 } 1071 if (error) 1072 goto out; 1073 } 1074 /* 1075 * Check permissions. 1076 */ 1077 if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL)) 1078 goto out; 1079 } 1080 1081 /* 1082 * Do remaining checks for FNOFOLLOW and FNOLINKS. 1083 */ 1084 if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) { 1085 error = ELOOP; 1086 goto out; 1087 } 1088 if (filemode & FNOLINKS) { 1089 vattr.va_mask = AT_NLINK; 1090 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) { 1091 goto out; 1092 } 1093 if (vattr.va_nlink != 1) { 1094 error = EMLINK; 1095 goto out; 1096 } 1097 } 1098 1099 /* 1100 * Opening a socket corresponding to the AF_UNIX pathname 1101 * in the filesystem name space is not supported. 1102 * However, VSOCK nodes in namefs are supported in order 1103 * to make fattach work for sockets. 1104 * 1105 * XXX This uses VOP_REALVP to distinguish between 1106 * an unopened namefs node (where VOP_REALVP returns a 1107 * different VSOCK vnode) and a VSOCK created by vn_create 1108 * in some file system (where VOP_REALVP would never return 1109 * a different vnode). 1110 */ 1111 if (vp->v_type == VSOCK) { 1112 struct vnode *nvp; 1113 1114 error = VOP_REALVP(vp, &nvp, NULL); 1115 if (error != 0 || nvp == NULL || nvp == vp || 1116 nvp->v_type != VSOCK) { 1117 error = EOPNOTSUPP; 1118 goto out; 1119 } 1120 } 1121 1122 if ((vp->v_type == VREG) && nbl_need_check(vp)) { 1123 /* get share reservation */ 1124 shr.s_access = 0; 1125 if (filemode & FWRITE) 1126 shr.s_access |= F_WRACC; 1127 if (filemode & FREAD) 1128 shr.s_access |= F_RDACC; 1129 shr.s_deny = 0; 1130 shr.s_sysid = 0; 1131 shr.s_pid = ttoproc(curthread)->p_pid; 1132 shr_own.sl_pid = shr.s_pid; 1133 shr_own.sl_id = fd; 1134 shr.s_own_len = sizeof (shr_own); 1135 shr.s_owner = (caddr_t)&shr_own; 1136 error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(), 1137 NULL); 1138 if (error) 1139 goto out; 1140 shrlock_done = 1; 1141 1142 /* nbmand conflict check if truncating file */ 1143 if ((filemode & FTRUNC) && !(filemode & FCREAT)) { 1144 nbl_start_crit(vp, RW_READER); 1145 in_crit = 1; 1146 1147 vattr.va_mask = AT_SIZE; 1148 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 1149 goto out; 1150 if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0, 1151 NULL)) { 1152 error = EACCES; 1153 goto out; 1154 } 1155 } 1156 } 1157 1158 /* 1159 * Do opening protocol. 1160 */ 1161 error = VOP_OPEN(&vp, filemode, CRED(), NULL); 1162 if (error) 1163 goto out; 1164 open_done = 1; 1165 1166 /* 1167 * Truncate if required. 1168 */ 1169 if ((filemode & FTRUNC) && !(filemode & FCREAT)) { 1170 vattr.va_size = 0; 1171 vattr.va_mask = AT_SIZE; 1172 if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0) 1173 goto out; 1174 } 1175 out: 1176 ASSERT(vp->v_count > 0); 1177 1178 if (in_crit) { 1179 nbl_end_crit(vp); 1180 in_crit = 0; 1181 } 1182 if (error) { 1183 if (open_done) { 1184 (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(), 1185 NULL); 1186 open_done = 0; 1187 shrlock_done = 0; 1188 } 1189 if (shrlock_done) { 1190 (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(), 1191 NULL); 1192 shrlock_done = 0; 1193 } 1194 1195 /* 1196 * The following clause was added to handle a problem 1197 * with NFS consistency. It is possible that a lookup 1198 * of the file to be opened succeeded, but the file 1199 * itself doesn't actually exist on the server. This 1200 * is chiefly due to the DNLC containing an entry for 1201 * the file which has been removed on the server. In 1202 * this case, we just start over. If there was some 1203 * other cause for the ESTALE error, then the lookup 1204 * of the file will fail and the error will be returned 1205 * above instead of looping around from here. 1206 */ 1207 VN_RELE(vp); 1208 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1209 goto top; 1210 } else 1211 *vpp = vp; 1212 return (error); 1213 } 1214 1215 /* 1216 * The following two accessor functions are for the NFSv4 server. Since there 1217 * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the 1218 * vnode open counts correct when a client "upgrades" an open or does an 1219 * open_downgrade. In NFS, an upgrade or downgrade can not only change the 1220 * open mode (add or subtract read or write), but also change the share/deny 1221 * modes. However, share reservations are not integrated with OPEN, yet, so 1222 * we need to handle each separately. These functions are cleaner than having 1223 * the NFS server manipulate the counts directly, however, nobody else should 1224 * use these functions. 1225 */ 1226 void 1227 vn_open_upgrade( 1228 vnode_t *vp, 1229 int filemode) 1230 { 1231 ASSERT(vp->v_type == VREG); 1232 1233 if (filemode & FREAD) 1234 atomic_add_32(&(vp->v_rdcnt), 1); 1235 if (filemode & FWRITE) 1236 atomic_add_32(&(vp->v_wrcnt), 1); 1237 1238 } 1239 1240 void 1241 vn_open_downgrade( 1242 vnode_t *vp, 1243 int filemode) 1244 { 1245 ASSERT(vp->v_type == VREG); 1246 1247 if (filemode & FREAD) { 1248 ASSERT(vp->v_rdcnt > 0); 1249 atomic_add_32(&(vp->v_rdcnt), -1); 1250 } 1251 if (filemode & FWRITE) { 1252 ASSERT(vp->v_wrcnt > 0); 1253 atomic_add_32(&(vp->v_wrcnt), -1); 1254 } 1255 1256 } 1257 1258 int 1259 vn_create( 1260 char *pnamep, 1261 enum uio_seg seg, 1262 struct vattr *vap, 1263 enum vcexcl excl, 1264 int mode, 1265 struct vnode **vpp, 1266 enum create why, 1267 int flag, 1268 mode_t umask) 1269 { 1270 return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag, 1271 umask, NULL)); 1272 } 1273 1274 /* 1275 * Create a vnode (makenode). 1276 */ 1277 int 1278 vn_createat( 1279 char *pnamep, 1280 enum uio_seg seg, 1281 struct vattr *vap, 1282 enum vcexcl excl, 1283 int mode, 1284 struct vnode **vpp, 1285 enum create why, 1286 int flag, 1287 mode_t umask, 1288 struct vnode *startvp) 1289 { 1290 struct vnode *dvp; /* ptr to parent dir vnode */ 1291 struct vnode *vp = NULL; 1292 struct pathname pn; 1293 int error; 1294 int in_crit = 0; 1295 struct vattr vattr; 1296 enum symfollow follow; 1297 int estale_retry = 0; 1298 uint32_t auditing = AU_AUDITING(); 1299 1300 ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); 1301 1302 /* symlink interpretation */ 1303 if ((flag & FNOFOLLOW) || excl == EXCL) 1304 follow = NO_FOLLOW; 1305 else 1306 follow = FOLLOW; 1307 flag &= ~(FNOFOLLOW|FNOLINKS); 1308 1309 top: 1310 /* 1311 * Lookup directory. 1312 * If new object is a file, call lower level to create it. 1313 * Note that it is up to the lower level to enforce exclusive 1314 * creation, if the file is already there. 1315 * This allows the lower level to do whatever 1316 * locking or protocol that is needed to prevent races. 1317 * If the new object is directory call lower level to make 1318 * the new directory, with "." and "..". 1319 */ 1320 if (error = pn_get(pnamep, seg, &pn)) 1321 return (error); 1322 if (auditing) 1323 audit_vncreate_start(); 1324 dvp = NULL; 1325 *vpp = NULL; 1326 /* 1327 * lookup will find the parent directory for the vnode. 1328 * When it is done the pn holds the name of the entry 1329 * in the directory. 1330 * If this is a non-exclusive create we also find the node itself. 1331 */ 1332 error = lookuppnat(&pn, NULL, follow, &dvp, 1333 (excl == EXCL) ? NULLVPP : vpp, startvp); 1334 if (error) { 1335 pn_free(&pn); 1336 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1337 goto top; 1338 if (why == CRMKDIR && error == EINVAL) 1339 error = EEXIST; /* SVID */ 1340 return (error); 1341 } 1342 1343 if (why != CRMKNOD) 1344 vap->va_mode &= ~VSVTX; 1345 1346 /* 1347 * If default ACLs are defined for the directory don't apply the 1348 * umask if umask is passed. 1349 */ 1350 1351 if (umask) { 1352 1353 vsecattr_t vsec; 1354 1355 vsec.vsa_aclcnt = 0; 1356 vsec.vsa_aclentp = NULL; 1357 vsec.vsa_dfaclcnt = 0; 1358 vsec.vsa_dfaclentp = NULL; 1359 vsec.vsa_mask = VSA_DFACLCNT; 1360 error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL); 1361 /* 1362 * If error is ENOSYS then treat it as no error 1363 * Don't want to force all file systems to support 1364 * aclent_t style of ACL's. 1365 */ 1366 if (error == ENOSYS) 1367 error = 0; 1368 if (error) { 1369 if (*vpp != NULL) 1370 VN_RELE(*vpp); 1371 goto out; 1372 } else { 1373 /* 1374 * Apply the umask if no default ACLs. 1375 */ 1376 if (vsec.vsa_dfaclcnt == 0) 1377 vap->va_mode &= ~umask; 1378 1379 /* 1380 * VOP_GETSECATTR() may have allocated memory for 1381 * ACLs we didn't request, so double-check and 1382 * free it if necessary. 1383 */ 1384 if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL) 1385 kmem_free((caddr_t)vsec.vsa_aclentp, 1386 vsec.vsa_aclcnt * sizeof (aclent_t)); 1387 if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL) 1388 kmem_free((caddr_t)vsec.vsa_dfaclentp, 1389 vsec.vsa_dfaclcnt * sizeof (aclent_t)); 1390 } 1391 } 1392 1393 /* 1394 * In general we want to generate EROFS if the file system is 1395 * readonly. However, POSIX (IEEE Std. 1003.1) section 5.3.1 1396 * documents the open system call, and it says that O_CREAT has no 1397 * effect if the file already exists. Bug 1119649 states 1398 * that open(path, O_CREAT, ...) fails when attempting to open an 1399 * existing file on a read only file system. Thus, the first part 1400 * of the following if statement has 3 checks: 1401 * if the file exists && 1402 * it is being open with write access && 1403 * the file system is read only 1404 * then generate EROFS 1405 */ 1406 if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) || 1407 (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) { 1408 if (*vpp) 1409 VN_RELE(*vpp); 1410 error = EROFS; 1411 } else if (excl == NONEXCL && *vpp != NULL) { 1412 vnode_t *rvp; 1413 1414 /* 1415 * File already exists. If a mandatory lock has been 1416 * applied, return error. 1417 */ 1418 vp = *vpp; 1419 if (VOP_REALVP(vp, &rvp, NULL) != 0) 1420 rvp = vp; 1421 if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) { 1422 nbl_start_crit(vp, RW_READER); 1423 in_crit = 1; 1424 } 1425 if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) { 1426 vattr.va_mask = AT_MODE|AT_SIZE; 1427 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) { 1428 goto out; 1429 } 1430 if (MANDLOCK(vp, vattr.va_mode)) { 1431 error = EAGAIN; 1432 goto out; 1433 } 1434 /* 1435 * File cannot be truncated if non-blocking mandatory 1436 * locks are currently on the file. 1437 */ 1438 if ((vap->va_mask & AT_SIZE) && in_crit) { 1439 u_offset_t offset; 1440 ssize_t length; 1441 1442 offset = vap->va_size > vattr.va_size ? 1443 vattr.va_size : vap->va_size; 1444 length = vap->va_size > vattr.va_size ? 1445 vap->va_size - vattr.va_size : 1446 vattr.va_size - vap->va_size; 1447 if (nbl_conflict(vp, NBL_WRITE, offset, 1448 length, 0, NULL)) { 1449 error = EACCES; 1450 goto out; 1451 } 1452 } 1453 } 1454 1455 /* 1456 * If the file is the root of a VFS, we've crossed a 1457 * mount point and the "containing" directory that we 1458 * acquired above (dvp) is irrelevant because it's in 1459 * a different file system. We apply VOP_CREATE to the 1460 * target itself instead of to the containing directory 1461 * and supply a null path name to indicate (conventionally) 1462 * the node itself as the "component" of interest. 1463 * 1464 * The intercession of the file system is necessary to 1465 * ensure that the appropriate permission checks are 1466 * done. 1467 */ 1468 if (vp->v_flag & VROOT) { 1469 ASSERT(why != CRMKDIR); 1470 error = VOP_CREATE(vp, "", vap, excl, mode, vpp, 1471 CRED(), flag, NULL, NULL); 1472 /* 1473 * If the create succeeded, it will have created 1474 * a new reference to the vnode. Give up the 1475 * original reference. The assertion should not 1476 * get triggered because NBMAND locks only apply to 1477 * VREG files. And if in_crit is non-zero for some 1478 * reason, detect that here, rather than when we 1479 * deference a null vp. 1480 */ 1481 ASSERT(in_crit == 0); 1482 VN_RELE(vp); 1483 vp = NULL; 1484 goto out; 1485 } 1486 1487 /* 1488 * Large File API - non-large open (FOFFMAX flag not set) 1489 * of regular file fails if the file size exceeds MAXOFF32_T. 1490 */ 1491 if (why != CRMKDIR && 1492 !(flag & FOFFMAX) && 1493 (vp->v_type == VREG)) { 1494 vattr.va_mask = AT_SIZE; 1495 if ((error = VOP_GETATTR(vp, &vattr, 0, 1496 CRED(), NULL))) { 1497 goto out; 1498 } 1499 if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) { 1500 error = EOVERFLOW; 1501 goto out; 1502 } 1503 } 1504 } 1505 1506 if (error == 0) { 1507 /* 1508 * Call mkdir() if specified, otherwise create(). 1509 */ 1510 int must_be_dir = pn_fixslash(&pn); /* trailing '/'? */ 1511 1512 if (why == CRMKDIR) 1513 /* 1514 * N.B., if vn_createat() ever requests 1515 * case-insensitive behavior then it will need 1516 * to be passed to VOP_MKDIR(). VOP_CREATE() 1517 * will already get it via "flag" 1518 */ 1519 error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(), 1520 NULL, 0, NULL); 1521 else if (!must_be_dir) 1522 error = VOP_CREATE(dvp, pn.pn_path, vap, 1523 excl, mode, vpp, CRED(), flag, NULL, NULL); 1524 else 1525 error = ENOTDIR; 1526 } 1527 1528 out: 1529 1530 if (auditing) 1531 audit_vncreate_finish(*vpp, error); 1532 if (in_crit) { 1533 nbl_end_crit(vp); 1534 in_crit = 0; 1535 } 1536 if (vp != NULL) { 1537 VN_RELE(vp); 1538 vp = NULL; 1539 } 1540 pn_free(&pn); 1541 VN_RELE(dvp); 1542 /* 1543 * The following clause was added to handle a problem 1544 * with NFS consistency. It is possible that a lookup 1545 * of the file to be created succeeded, but the file 1546 * itself doesn't actually exist on the server. This 1547 * is chiefly due to the DNLC containing an entry for 1548 * the file which has been removed on the server. In 1549 * this case, we just start over. If there was some 1550 * other cause for the ESTALE error, then the lookup 1551 * of the file will fail and the error will be returned 1552 * above instead of looping around from here. 1553 */ 1554 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1555 goto top; 1556 return (error); 1557 } 1558 1559 int 1560 vn_link(char *from, char *to, enum uio_seg seg) 1561 { 1562 struct vnode *fvp; /* from vnode ptr */ 1563 struct vnode *tdvp; /* to directory vnode ptr */ 1564 struct pathname pn; 1565 int error; 1566 struct vattr vattr; 1567 dev_t fsid; 1568 int estale_retry = 0; 1569 1570 top: 1571 fvp = tdvp = NULL; 1572 if (error = pn_get(to, seg, &pn)) 1573 return (error); 1574 if (error = lookupname(from, seg, NO_FOLLOW, NULLVPP, &fvp)) 1575 goto out; 1576 if (error = lookuppn(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP)) 1577 goto out; 1578 /* 1579 * Make sure both source vnode and target directory vnode are 1580 * in the same vfs and that it is writeable. 1581 */ 1582 vattr.va_mask = AT_FSID; 1583 if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL)) 1584 goto out; 1585 fsid = vattr.va_fsid; 1586 vattr.va_mask = AT_FSID; 1587 if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL)) 1588 goto out; 1589 if (fsid != vattr.va_fsid) { 1590 error = EXDEV; 1591 goto out; 1592 } 1593 if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) { 1594 error = EROFS; 1595 goto out; 1596 } 1597 /* 1598 * Do the link. 1599 */ 1600 (void) pn_fixslash(&pn); 1601 error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0); 1602 out: 1603 pn_free(&pn); 1604 if (fvp) 1605 VN_RELE(fvp); 1606 if (tdvp) 1607 VN_RELE(tdvp); 1608 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1609 goto top; 1610 return (error); 1611 } 1612 1613 int 1614 vn_rename(char *from, char *to, enum uio_seg seg) 1615 { 1616 return (vn_renameat(NULL, from, NULL, to, seg)); 1617 } 1618 1619 int 1620 vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, 1621 char *tname, enum uio_seg seg) 1622 { 1623 int error; 1624 struct vattr vattr; 1625 struct pathname fpn; /* from pathname */ 1626 struct pathname tpn; /* to pathname */ 1627 dev_t fsid; 1628 int in_crit_src, in_crit_targ; 1629 vnode_t *fromvp, *fvp; 1630 vnode_t *tovp, *targvp; 1631 int estale_retry = 0; 1632 uint32_t auditing = AU_AUDITING(); 1633 1634 top: 1635 fvp = fromvp = tovp = targvp = NULL; 1636 in_crit_src = in_crit_targ = 0; 1637 /* 1638 * Get to and from pathnames. 1639 */ 1640 if (error = pn_get(fname, seg, &fpn)) 1641 return (error); 1642 if (error = pn_get(tname, seg, &tpn)) { 1643 pn_free(&fpn); 1644 return (error); 1645 } 1646 1647 /* 1648 * First we need to resolve the correct directories 1649 * The passed in directories may only be a starting point, 1650 * but we need the real directories the file(s) live in. 1651 * For example the fname may be something like usr/lib/sparc 1652 * and we were passed in the / directory, but we need to 1653 * use the lib directory for the rename. 1654 */ 1655 1656 if (auditing) 1657 audit_setfsat_path(1); 1658 /* 1659 * Lookup to and from directories. 1660 */ 1661 if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) { 1662 goto out; 1663 } 1664 1665 /* 1666 * Make sure there is an entry. 1667 */ 1668 if (fvp == NULL) { 1669 error = ENOENT; 1670 goto out; 1671 } 1672 1673 if (auditing) 1674 audit_setfsat_path(3); 1675 if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) { 1676 goto out; 1677 } 1678 1679 /* 1680 * Make sure both the from vnode directory and the to directory 1681 * are in the same vfs and the to directory is writable. 1682 * We check fsid's, not vfs pointers, so loopback fs works. 1683 */ 1684 if (fromvp != tovp) { 1685 vattr.va_mask = AT_FSID; 1686 if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL)) 1687 goto out; 1688 fsid = vattr.va_fsid; 1689 vattr.va_mask = AT_FSID; 1690 if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL)) 1691 goto out; 1692 if (fsid != vattr.va_fsid) { 1693 error = EXDEV; 1694 goto out; 1695 } 1696 } 1697 1698 if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) { 1699 error = EROFS; 1700 goto out; 1701 } 1702 1703 if (targvp && (fvp != targvp)) { 1704 nbl_start_crit(targvp, RW_READER); 1705 in_crit_targ = 1; 1706 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) { 1707 error = EACCES; 1708 goto out; 1709 } 1710 } 1711 1712 if (nbl_need_check(fvp)) { 1713 nbl_start_crit(fvp, RW_READER); 1714 in_crit_src = 1; 1715 if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) { 1716 error = EACCES; 1717 goto out; 1718 } 1719 } 1720 1721 /* 1722 * Do the rename. 1723 */ 1724 (void) pn_fixslash(&tpn); 1725 error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(), 1726 NULL, 0); 1727 1728 out: 1729 pn_free(&fpn); 1730 pn_free(&tpn); 1731 if (in_crit_src) 1732 nbl_end_crit(fvp); 1733 if (in_crit_targ) 1734 nbl_end_crit(targvp); 1735 if (fromvp) 1736 VN_RELE(fromvp); 1737 if (tovp) 1738 VN_RELE(tovp); 1739 if (targvp) 1740 VN_RELE(targvp); 1741 if (fvp) 1742 VN_RELE(fvp); 1743 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1744 goto top; 1745 return (error); 1746 } 1747 1748 /* 1749 * Remove a file or directory. 1750 */ 1751 int 1752 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag) 1753 { 1754 return (vn_removeat(NULL, fnamep, seg, dirflag)); 1755 } 1756 1757 int 1758 vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag) 1759 { 1760 struct vnode *vp; /* entry vnode */ 1761 struct vnode *dvp; /* ptr to parent dir vnode */ 1762 struct vnode *coveredvp; 1763 struct pathname pn; /* name of entry */ 1764 enum vtype vtype; 1765 int error; 1766 struct vfs *vfsp; 1767 struct vfs *dvfsp; /* ptr to parent dir vfs */ 1768 int in_crit = 0; 1769 int estale_retry = 0; 1770 1771 top: 1772 if (error = pn_get(fnamep, seg, &pn)) 1773 return (error); 1774 dvp = vp = NULL; 1775 if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) { 1776 pn_free(&pn); 1777 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1778 goto top; 1779 return (error); 1780 } 1781 1782 /* 1783 * Make sure there is an entry. 1784 */ 1785 if (vp == NULL) { 1786 error = ENOENT; 1787 goto out; 1788 } 1789 1790 vfsp = vp->v_vfsp; 1791 dvfsp = dvp->v_vfsp; 1792 1793 /* 1794 * If the named file is the root of a mounted filesystem, fail, 1795 * unless it's marked unlinkable. In that case, unmount the 1796 * filesystem and proceed to unlink the covered vnode. (If the 1797 * covered vnode is a directory, use rmdir instead of unlink, 1798 * to avoid file system corruption.) 1799 */ 1800 if (vp->v_flag & VROOT) { 1801 if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) { 1802 error = EBUSY; 1803 goto out; 1804 } 1805 1806 /* 1807 * Namefs specific code starts here. 1808 */ 1809 1810 if (dirflag == RMDIRECTORY) { 1811 /* 1812 * User called rmdir(2) on a file that has 1813 * been namefs mounted on top of. Since 1814 * namefs doesn't allow directories to 1815 * be mounted on other files we know 1816 * vp is not of type VDIR so fail to operation. 1817 */ 1818 error = ENOTDIR; 1819 goto out; 1820 } 1821 1822 /* 1823 * If VROOT is still set after grabbing vp->v_lock, 1824 * noone has finished nm_unmount so far and coveredvp 1825 * is valid. 1826 * If we manage to grab vn_vfswlock(coveredvp) before releasing 1827 * vp->v_lock, any race window is eliminated. 1828 */ 1829 1830 mutex_enter(&vp->v_lock); 1831 if ((vp->v_flag & VROOT) == 0) { 1832 /* Someone beat us to the unmount */ 1833 mutex_exit(&vp->v_lock); 1834 error = EBUSY; 1835 goto out; 1836 } 1837 vfsp = vp->v_vfsp; 1838 coveredvp = vfsp->vfs_vnodecovered; 1839 ASSERT(coveredvp); 1840 /* 1841 * Note: Implementation of vn_vfswlock shows that ordering of 1842 * v_lock / vn_vfswlock is not an issue here. 1843 */ 1844 error = vn_vfswlock(coveredvp); 1845 mutex_exit(&vp->v_lock); 1846 1847 if (error) 1848 goto out; 1849 1850 VN_HOLD(coveredvp); 1851 VN_RELE(vp); 1852 error = dounmount(vfsp, 0, CRED()); 1853 1854 /* 1855 * Unmounted the namefs file system; now get 1856 * the object it was mounted over. 1857 */ 1858 vp = coveredvp; 1859 /* 1860 * If namefs was mounted over a directory, then 1861 * we want to use rmdir() instead of unlink(). 1862 */ 1863 if (vp->v_type == VDIR) 1864 dirflag = RMDIRECTORY; 1865 1866 if (error) 1867 goto out; 1868 } 1869 1870 /* 1871 * Make sure filesystem is writeable. 1872 * We check the parent directory's vfs in case this is an lofs vnode. 1873 */ 1874 if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) { 1875 error = EROFS; 1876 goto out; 1877 } 1878 1879 vtype = vp->v_type; 1880 1881 /* 1882 * If there is the possibility of an nbmand share reservation, make 1883 * sure it's okay to remove the file. Keep a reference to the 1884 * vnode, so that we can exit the nbl critical region after 1885 * calling VOP_REMOVE. 1886 * If there is no possibility of an nbmand share reservation, 1887 * release the vnode reference now. Filesystems like NFS may 1888 * behave differently if there is an extra reference, so get rid of 1889 * this one. Fortunately, we can't have nbmand mounts on NFS 1890 * filesystems. 1891 */ 1892 if (nbl_need_check(vp)) { 1893 nbl_start_crit(vp, RW_READER); 1894 in_crit = 1; 1895 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) { 1896 error = EACCES; 1897 goto out; 1898 } 1899 } else { 1900 VN_RELE(vp); 1901 vp = NULL; 1902 } 1903 1904 if (dirflag == RMDIRECTORY) { 1905 /* 1906 * Caller is using rmdir(2), which can only be applied to 1907 * directories. 1908 */ 1909 if (vtype != VDIR) { 1910 error = ENOTDIR; 1911 } else { 1912 vnode_t *cwd; 1913 proc_t *pp = curproc; 1914 1915 mutex_enter(&pp->p_lock); 1916 cwd = PTOU(pp)->u_cdir; 1917 VN_HOLD(cwd); 1918 mutex_exit(&pp->p_lock); 1919 error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(), 1920 NULL, 0); 1921 VN_RELE(cwd); 1922 } 1923 } else { 1924 /* 1925 * Unlink(2) can be applied to anything. 1926 */ 1927 error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0); 1928 } 1929 1930 out: 1931 pn_free(&pn); 1932 if (in_crit) { 1933 nbl_end_crit(vp); 1934 in_crit = 0; 1935 } 1936 if (vp != NULL) 1937 VN_RELE(vp); 1938 if (dvp != NULL) 1939 VN_RELE(dvp); 1940 if ((error == ESTALE) && fs_need_estale_retry(estale_retry++)) 1941 goto top; 1942 return (error); 1943 } 1944 1945 /* 1946 * Utility function to compare equality of vnodes. 1947 * Compare the underlying real vnodes, if there are underlying vnodes. 1948 * This is a more thorough comparison than the VN_CMP() macro provides. 1949 */ 1950 int 1951 vn_compare(vnode_t *vp1, vnode_t *vp2) 1952 { 1953 vnode_t *realvp; 1954 1955 if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0) 1956 vp1 = realvp; 1957 if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0) 1958 vp2 = realvp; 1959 return (VN_CMP(vp1, vp2)); 1960 } 1961 1962 /* 1963 * The number of locks to hash into. This value must be a power 1964 * of 2 minus 1 and should probably also be prime. 1965 */ 1966 #define NUM_BUCKETS 1023 1967 1968 struct vn_vfslocks_bucket { 1969 kmutex_t vb_lock; 1970 vn_vfslocks_entry_t *vb_list; 1971 char pad[64 - sizeof (kmutex_t) - sizeof (void *)]; 1972 }; 1973 1974 /* 1975 * Total number of buckets will be NUM_BUCKETS + 1 . 1976 */ 1977 1978 #pragma align 64(vn_vfslocks_buckets) 1979 static struct vn_vfslocks_bucket vn_vfslocks_buckets[NUM_BUCKETS + 1]; 1980 1981 #define VN_VFSLOCKS_SHIFT 9 1982 1983 #define VN_VFSLOCKS_HASH(vfsvpptr) \ 1984 ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS) 1985 1986 /* 1987 * vn_vfslocks_getlock() uses an HASH scheme to generate 1988 * rwstlock using vfs/vnode pointer passed to it. 1989 * 1990 * vn_vfslocks_rele() releases a reference in the 1991 * HASH table which allows the entry allocated by 1992 * vn_vfslocks_getlock() to be freed at a later 1993 * stage when the refcount drops to zero. 1994 */ 1995 1996 vn_vfslocks_entry_t * 1997 vn_vfslocks_getlock(void *vfsvpptr) 1998 { 1999 struct vn_vfslocks_bucket *bp; 2000 vn_vfslocks_entry_t *vep; 2001 vn_vfslocks_entry_t *tvep; 2002 2003 ASSERT(vfsvpptr != NULL); 2004 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)]; 2005 2006 mutex_enter(&bp->vb_lock); 2007 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) { 2008 if (vep->ve_vpvfs == vfsvpptr) { 2009 vep->ve_refcnt++; 2010 mutex_exit(&bp->vb_lock); 2011 return (vep); 2012 } 2013 } 2014 mutex_exit(&bp->vb_lock); 2015 vep = kmem_alloc(sizeof (*vep), KM_SLEEP); 2016 rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL); 2017 vep->ve_vpvfs = (char *)vfsvpptr; 2018 vep->ve_refcnt = 1; 2019 mutex_enter(&bp->vb_lock); 2020 for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) { 2021 if (tvep->ve_vpvfs == vfsvpptr) { 2022 tvep->ve_refcnt++; 2023 mutex_exit(&bp->vb_lock); 2024 2025 /* 2026 * There is already an entry in the hash 2027 * destroy what we just allocated. 2028 */ 2029 rwst_destroy(&vep->ve_lock); 2030 kmem_free(vep, sizeof (*vep)); 2031 return (tvep); 2032 } 2033 } 2034 vep->ve_next = bp->vb_list; 2035 bp->vb_list = vep; 2036 mutex_exit(&bp->vb_lock); 2037 return (vep); 2038 } 2039 2040 void 2041 vn_vfslocks_rele(vn_vfslocks_entry_t *vepent) 2042 { 2043 struct vn_vfslocks_bucket *bp; 2044 vn_vfslocks_entry_t *vep; 2045 vn_vfslocks_entry_t *pvep; 2046 2047 ASSERT(vepent != NULL); 2048 ASSERT(vepent->ve_vpvfs != NULL); 2049 2050 bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)]; 2051 2052 mutex_enter(&bp->vb_lock); 2053 vepent->ve_refcnt--; 2054 2055 if ((int32_t)vepent->ve_refcnt < 0) 2056 cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative"); 2057 2058 if (vepent->ve_refcnt == 0) { 2059 for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) { 2060 if (vep->ve_vpvfs == vepent->ve_vpvfs) { 2061 if (bp->vb_list == vep) 2062 bp->vb_list = vep->ve_next; 2063 else { 2064 /* LINTED */ 2065 pvep->ve_next = vep->ve_next; 2066 } 2067 mutex_exit(&bp->vb_lock); 2068 rwst_destroy(&vep->ve_lock); 2069 kmem_free(vep, sizeof (*vep)); 2070 return; 2071 } 2072 pvep = vep; 2073 } 2074 cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found"); 2075 } 2076 mutex_exit(&bp->vb_lock); 2077 } 2078 2079 /* 2080 * vn_vfswlock_wait is used to implement a lock which is logically a writers 2081 * lock protecting the v_vfsmountedhere field. 2082 * vn_vfswlock_wait has been modified to be similar to vn_vfswlock, 2083 * except that it blocks to acquire the lock VVFSLOCK. 2084 * 2085 * traverse() and routines re-implementing part of traverse (e.g. autofs) 2086 * need to hold this lock. mount(), vn_rename(), vn_remove() and so on 2087 * need the non-blocking version of the writers lock i.e. vn_vfswlock 2088 */ 2089 int 2090 vn_vfswlock_wait(vnode_t *vp) 2091 { 2092 int retval; 2093 vn_vfslocks_entry_t *vpvfsentry; 2094 ASSERT(vp != NULL); 2095 2096 vpvfsentry = vn_vfslocks_getlock(vp); 2097 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER); 2098 2099 if (retval == EINTR) { 2100 vn_vfslocks_rele(vpvfsentry); 2101 return (EINTR); 2102 } 2103 return (retval); 2104 } 2105 2106 int 2107 vn_vfsrlock_wait(vnode_t *vp) 2108 { 2109 int retval; 2110 vn_vfslocks_entry_t *vpvfsentry; 2111 ASSERT(vp != NULL); 2112 2113 vpvfsentry = vn_vfslocks_getlock(vp); 2114 retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER); 2115 2116 if (retval == EINTR) { 2117 vn_vfslocks_rele(vpvfsentry); 2118 return (EINTR); 2119 } 2120 2121 return (retval); 2122 } 2123 2124 2125 /* 2126 * vn_vfswlock is used to implement a lock which is logically a writers lock 2127 * protecting the v_vfsmountedhere field. 2128 */ 2129 int 2130 vn_vfswlock(vnode_t *vp) 2131 { 2132 vn_vfslocks_entry_t *vpvfsentry; 2133 2134 /* 2135 * If vp is NULL then somebody is trying to lock the covered vnode 2136 * of /. (vfs_vnodecovered is NULL for /). This situation will 2137 * only happen when unmounting /. Since that operation will fail 2138 * anyway, return EBUSY here instead of in VFS_UNMOUNT. 2139 */ 2140 if (vp == NULL) 2141 return (EBUSY); 2142 2143 vpvfsentry = vn_vfslocks_getlock(vp); 2144 2145 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER)) 2146 return (0); 2147 2148 vn_vfslocks_rele(vpvfsentry); 2149 return (EBUSY); 2150 } 2151 2152 int 2153 vn_vfsrlock(vnode_t *vp) 2154 { 2155 vn_vfslocks_entry_t *vpvfsentry; 2156 2157 /* 2158 * If vp is NULL then somebody is trying to lock the covered vnode 2159 * of /. (vfs_vnodecovered is NULL for /). This situation will 2160 * only happen when unmounting /. Since that operation will fail 2161 * anyway, return EBUSY here instead of in VFS_UNMOUNT. 2162 */ 2163 if (vp == NULL) 2164 return (EBUSY); 2165 2166 vpvfsentry = vn_vfslocks_getlock(vp); 2167 2168 if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER)) 2169 return (0); 2170 2171 vn_vfslocks_rele(vpvfsentry); 2172 return (EBUSY); 2173 } 2174 2175 void 2176 vn_vfsunlock(vnode_t *vp) 2177 { 2178 vn_vfslocks_entry_t *vpvfsentry; 2179 2180 /* 2181 * ve_refcnt needs to be decremented twice. 2182 * 1. To release refernce after a call to vn_vfslocks_getlock() 2183 * 2. To release the reference from the locking routines like 2184 * vn_vfsrlock/vn_vfswlock etc,. 2185 */ 2186 vpvfsentry = vn_vfslocks_getlock(vp); 2187 vn_vfslocks_rele(vpvfsentry); 2188 2189 rwst_exit(&vpvfsentry->ve_lock); 2190 vn_vfslocks_rele(vpvfsentry); 2191 } 2192 2193 int 2194 vn_vfswlock_held(vnode_t *vp) 2195 { 2196 int held; 2197 vn_vfslocks_entry_t *vpvfsentry; 2198 2199 ASSERT(vp != NULL); 2200 2201 vpvfsentry = vn_vfslocks_getlock(vp); 2202 held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER); 2203 2204 vn_vfslocks_rele(vpvfsentry); 2205 return (held); 2206 } 2207 2208 2209 int 2210 vn_make_ops( 2211 const char *name, /* Name of file system */ 2212 const fs_operation_def_t *templ, /* Operation specification */ 2213 vnodeops_t **actual) /* Return the vnodeops */ 2214 { 2215 int unused_ops; 2216 int error; 2217 2218 *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP); 2219 2220 (*actual)->vnop_name = name; 2221 2222 error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ); 2223 if (error) { 2224 kmem_free(*actual, sizeof (vnodeops_t)); 2225 } 2226 2227 #if DEBUG 2228 if (unused_ops != 0) 2229 cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied " 2230 "but not used", name, unused_ops); 2231 #endif 2232 2233 return (error); 2234 } 2235 2236 /* 2237 * Free the vnodeops created as a result of vn_make_ops() 2238 */ 2239 void 2240 vn_freevnodeops(vnodeops_t *vnops) 2241 { 2242 kmem_free(vnops, sizeof (vnodeops_t)); 2243 } 2244 2245 /* 2246 * Vnode cache. 2247 */ 2248 2249 /* ARGSUSED */ 2250 static int 2251 vn_cache_constructor(void *buf, void *cdrarg, int kmflags) 2252 { 2253 struct vnode *vp; 2254 2255 vp = buf; 2256 2257 mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL); 2258 mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL); 2259 cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL); 2260 rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL); 2261 vp->v_femhead = NULL; /* Must be done before vn_reinit() */ 2262 vp->v_path = NULL; 2263 vp->v_mpssdata = NULL; 2264 vp->v_vsd = NULL; 2265 vp->v_fopdata = NULL; 2266 2267 return (0); 2268 } 2269 2270 /* ARGSUSED */ 2271 static void 2272 vn_cache_destructor(void *buf, void *cdrarg) 2273 { 2274 struct vnode *vp; 2275 2276 vp = buf; 2277 2278 rw_destroy(&vp->v_nbllock); 2279 cv_destroy(&vp->v_cv); 2280 mutex_destroy(&vp->v_vsd_lock); 2281 mutex_destroy(&vp->v_lock); 2282 } 2283 2284 void 2285 vn_create_cache(void) 2286 { 2287 vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode), 64, 2288 vn_cache_constructor, vn_cache_destructor, NULL, NULL, 2289 NULL, 0); 2290 } 2291 2292 void 2293 vn_destroy_cache(void) 2294 { 2295 kmem_cache_destroy(vn_cache); 2296 } 2297 2298 /* 2299 * Used by file systems when fs-specific nodes (e.g., ufs inodes) are 2300 * cached by the file system and vnodes remain associated. 2301 */ 2302 void 2303 vn_recycle(vnode_t *vp) 2304 { 2305 ASSERT(vp->v_pages == NULL); 2306 2307 /* 2308 * XXX - This really belongs in vn_reinit(), but we have some issues 2309 * with the counts. Best to have it here for clean initialization. 2310 */ 2311 vp->v_rdcnt = 0; 2312 vp->v_wrcnt = 0; 2313 vp->v_mmap_read = 0; 2314 vp->v_mmap_write = 0; 2315 2316 /* 2317 * If FEM was in use, make sure everything gets cleaned up 2318 * NOTE: vp->v_femhead is initialized to NULL in the vnode 2319 * constructor. 2320 */ 2321 if (vp->v_femhead) { 2322 /* XXX - There should be a free_femhead() that does all this */ 2323 ASSERT(vp->v_femhead->femh_list == NULL); 2324 mutex_destroy(&vp->v_femhead->femh_lock); 2325 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead))); 2326 vp->v_femhead = NULL; 2327 } 2328 if (vp->v_path) { 2329 kmem_free(vp->v_path, strlen(vp->v_path) + 1); 2330 vp->v_path = NULL; 2331 } 2332 2333 if (vp->v_fopdata != NULL) { 2334 free_fopdata(vp); 2335 } 2336 vp->v_mpssdata = NULL; 2337 vsd_free(vp); 2338 } 2339 2340 /* 2341 * Used to reset the vnode fields including those that are directly accessible 2342 * as well as those which require an accessor function. 2343 * 2344 * Does not initialize: 2345 * synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv 2346 * v_data (since FS-nodes and vnodes point to each other and should 2347 * be updated simultaneously) 2348 * v_op (in case someone needs to make a VOP call on this object) 2349 */ 2350 void 2351 vn_reinit(vnode_t *vp) 2352 { 2353 vp->v_count = 1; 2354 vp->v_count_dnlc = 0; 2355 vp->v_vfsp = NULL; 2356 vp->v_stream = NULL; 2357 vp->v_vfsmountedhere = NULL; 2358 vp->v_flag = 0; 2359 vp->v_type = VNON; 2360 vp->v_rdev = NODEV; 2361 2362 vp->v_filocks = NULL; 2363 vp->v_shrlocks = NULL; 2364 vp->v_pages = NULL; 2365 2366 vp->v_locality = NULL; 2367 vp->v_xattrdir = NULL; 2368 2369 /* Handles v_femhead, v_path, and the r/w/map counts */ 2370 vn_recycle(vp); 2371 } 2372 2373 vnode_t * 2374 vn_alloc(int kmflag) 2375 { 2376 vnode_t *vp; 2377 2378 vp = kmem_cache_alloc(vn_cache, kmflag); 2379 2380 if (vp != NULL) { 2381 vp->v_femhead = NULL; /* Must be done before vn_reinit() */ 2382 vp->v_fopdata = NULL; 2383 vn_reinit(vp); 2384 } 2385 2386 return (vp); 2387 } 2388 2389 void 2390 vn_free(vnode_t *vp) 2391 { 2392 ASSERT(vp->v_shrlocks == NULL); 2393 ASSERT(vp->v_filocks == NULL); 2394 2395 /* 2396 * Some file systems call vn_free() with v_count of zero, 2397 * some with v_count of 1. In any case, the value should 2398 * never be anything else. 2399 */ 2400 ASSERT((vp->v_count == 0) || (vp->v_count == 1)); 2401 ASSERT(vp->v_count_dnlc == 0); 2402 if (vp->v_path != NULL) { 2403 kmem_free(vp->v_path, strlen(vp->v_path) + 1); 2404 vp->v_path = NULL; 2405 } 2406 2407 /* If FEM was in use, make sure everything gets cleaned up */ 2408 if (vp->v_femhead) { 2409 /* XXX - There should be a free_femhead() that does all this */ 2410 ASSERT(vp->v_femhead->femh_list == NULL); 2411 mutex_destroy(&vp->v_femhead->femh_lock); 2412 kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead))); 2413 vp->v_femhead = NULL; 2414 } 2415 2416 if (vp->v_fopdata != NULL) { 2417 free_fopdata(vp); 2418 } 2419 vp->v_mpssdata = NULL; 2420 vsd_free(vp); 2421 kmem_cache_free(vn_cache, vp); 2422 } 2423 2424 /* 2425 * vnode status changes, should define better states than 1, 0. 2426 */ 2427 void 2428 vn_reclaim(vnode_t *vp) 2429 { 2430 vfs_t *vfsp = vp->v_vfsp; 2431 2432 if (vfsp == NULL || 2433 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { 2434 return; 2435 } 2436 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED); 2437 } 2438 2439 void 2440 vn_idle(vnode_t *vp) 2441 { 2442 vfs_t *vfsp = vp->v_vfsp; 2443 2444 if (vfsp == NULL || 2445 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { 2446 return; 2447 } 2448 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED); 2449 } 2450 void 2451 vn_exists(vnode_t *vp) 2452 { 2453 vfs_t *vfsp = vp->v_vfsp; 2454 2455 if (vfsp == NULL || 2456 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { 2457 return; 2458 } 2459 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS); 2460 } 2461 2462 void 2463 vn_invalid(vnode_t *vp) 2464 { 2465 vfs_t *vfsp = vp->v_vfsp; 2466 2467 if (vfsp == NULL || 2468 vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) { 2469 return; 2470 } 2471 (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED); 2472 } 2473 2474 /* Vnode event notification */ 2475 2476 int 2477 vnevent_support(vnode_t *vp, caller_context_t *ct) 2478 { 2479 if (vp == NULL) 2480 return (EINVAL); 2481 2482 return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct)); 2483 } 2484 2485 void 2486 vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) 2487 { 2488 if (vp == NULL || vp->v_femhead == NULL) { 2489 return; 2490 } 2491 (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct); 2492 } 2493 2494 void 2495 vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name, 2496 caller_context_t *ct) 2497 { 2498 if (vp == NULL || vp->v_femhead == NULL) { 2499 return; 2500 } 2501 (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct); 2502 } 2503 2504 void 2505 vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct) 2506 { 2507 if (vp == NULL || vp->v_femhead == NULL) { 2508 return; 2509 } 2510 (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct); 2511 } 2512 2513 void 2514 vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) 2515 { 2516 if (vp == NULL || vp->v_femhead == NULL) { 2517 return; 2518 } 2519 (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct); 2520 } 2521 2522 void 2523 vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) 2524 { 2525 if (vp == NULL || vp->v_femhead == NULL) { 2526 return; 2527 } 2528 (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct); 2529 } 2530 2531 void 2532 vnevent_create(vnode_t *vp, caller_context_t *ct) 2533 { 2534 if (vp == NULL || vp->v_femhead == NULL) { 2535 return; 2536 } 2537 (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct); 2538 } 2539 2540 void 2541 vnevent_link(vnode_t *vp, caller_context_t *ct) 2542 { 2543 if (vp == NULL || vp->v_femhead == NULL) { 2544 return; 2545 } 2546 (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct); 2547 } 2548 2549 void 2550 vnevent_mountedover(vnode_t *vp, caller_context_t *ct) 2551 { 2552 if (vp == NULL || vp->v_femhead == NULL) { 2553 return; 2554 } 2555 (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct); 2556 } 2557 2558 /* 2559 * Vnode accessors. 2560 */ 2561 2562 int 2563 vn_is_readonly(vnode_t *vp) 2564 { 2565 return (vp->v_vfsp->vfs_flag & VFS_RDONLY); 2566 } 2567 2568 int 2569 vn_has_flocks(vnode_t *vp) 2570 { 2571 return (vp->v_filocks != NULL); 2572 } 2573 2574 int 2575 vn_has_mandatory_locks(vnode_t *vp, int mode) 2576 { 2577 return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode))); 2578 } 2579 2580 int 2581 vn_has_cached_data(vnode_t *vp) 2582 { 2583 return (vp->v_pages != NULL); 2584 } 2585 2586 /* 2587 * Return 0 if the vnode in question shouldn't be permitted into a zone via 2588 * zone_enter(2). 2589 */ 2590 int 2591 vn_can_change_zones(vnode_t *vp) 2592 { 2593 struct vfssw *vswp; 2594 int allow = 1; 2595 vnode_t *rvp; 2596 2597 if (nfs_global_client_only != 0) 2598 return (1); 2599 2600 /* 2601 * We always want to look at the underlying vnode if there is one. 2602 */ 2603 if (VOP_REALVP(vp, &rvp, NULL) != 0) 2604 rvp = vp; 2605 /* 2606 * Some pseudo filesystems (including doorfs) don't actually register 2607 * their vfsops_t, so the following may return NULL; we happily let 2608 * such vnodes switch zones. 2609 */ 2610 vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp)); 2611 if (vswp != NULL) { 2612 if (vswp->vsw_flag & VSW_NOTZONESAFE) 2613 allow = 0; 2614 vfs_unrefvfssw(vswp); 2615 } 2616 return (allow); 2617 } 2618 2619 /* 2620 * Return nonzero if the vnode is a mount point, zero if not. 2621 */ 2622 int 2623 vn_ismntpt(vnode_t *vp) 2624 { 2625 return (vp->v_vfsmountedhere != NULL); 2626 } 2627 2628 /* Retrieve the vfs (if any) mounted on this vnode */ 2629 vfs_t * 2630 vn_mountedvfs(vnode_t *vp) 2631 { 2632 return (vp->v_vfsmountedhere); 2633 } 2634 2635 /* 2636 * Return nonzero if the vnode is referenced by the dnlc, zero if not. 2637 */ 2638 int 2639 vn_in_dnlc(vnode_t *vp) 2640 { 2641 return (vp->v_count_dnlc > 0); 2642 } 2643 2644 /* 2645 * vn_has_other_opens() checks whether a particular file is opened by more than 2646 * just the caller and whether the open is for read and/or write. 2647 * This routine is for calling after the caller has already called VOP_OPEN() 2648 * and the caller wishes to know if they are the only one with it open for 2649 * the mode(s) specified. 2650 * 2651 * Vnode counts are only kept on regular files (v_type=VREG). 2652 */ 2653 int 2654 vn_has_other_opens( 2655 vnode_t *vp, 2656 v_mode_t mode) 2657 { 2658 2659 ASSERT(vp != NULL); 2660 2661 switch (mode) { 2662 case V_WRITE: 2663 if (vp->v_wrcnt > 1) 2664 return (V_TRUE); 2665 break; 2666 case V_RDORWR: 2667 if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1)) 2668 return (V_TRUE); 2669 break; 2670 case V_RDANDWR: 2671 if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1)) 2672 return (V_TRUE); 2673 break; 2674 case V_READ: 2675 if (vp->v_rdcnt > 1) 2676 return (V_TRUE); 2677 break; 2678 } 2679 2680 return (V_FALSE); 2681 } 2682 2683 /* 2684 * vn_is_opened() checks whether a particular file is opened and 2685 * whether the open is for read and/or write. 2686 * 2687 * Vnode counts are only kept on regular files (v_type=VREG). 2688 */ 2689 int 2690 vn_is_opened( 2691 vnode_t *vp, 2692 v_mode_t mode) 2693 { 2694 2695 ASSERT(vp != NULL); 2696 2697 switch (mode) { 2698 case V_WRITE: 2699 if (vp->v_wrcnt) 2700 return (V_TRUE); 2701 break; 2702 case V_RDANDWR: 2703 if (vp->v_rdcnt && vp->v_wrcnt) 2704 return (V_TRUE); 2705 break; 2706 case V_RDORWR: 2707 if (vp->v_rdcnt || vp->v_wrcnt) 2708 return (V_TRUE); 2709 break; 2710 case V_READ: 2711 if (vp->v_rdcnt) 2712 return (V_TRUE); 2713 break; 2714 } 2715 2716 return (V_FALSE); 2717 } 2718 2719 /* 2720 * vn_is_mapped() checks whether a particular file is mapped and whether 2721 * the file is mapped read and/or write. 2722 */ 2723 int 2724 vn_is_mapped( 2725 vnode_t *vp, 2726 v_mode_t mode) 2727 { 2728 2729 ASSERT(vp != NULL); 2730 2731 #if !defined(_LP64) 2732 switch (mode) { 2733 /* 2734 * The atomic_add_64_nv functions force atomicity in the 2735 * case of 32 bit architectures. Otherwise the 64 bit values 2736 * require two fetches. The value of the fields may be 2737 * (potentially) changed between the first fetch and the 2738 * second 2739 */ 2740 case V_WRITE: 2741 if (atomic_add_64_nv((&(vp->v_mmap_write)), 0)) 2742 return (V_TRUE); 2743 break; 2744 case V_RDANDWR: 2745 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) && 2746 (atomic_add_64_nv((&(vp->v_mmap_write)), 0))) 2747 return (V_TRUE); 2748 break; 2749 case V_RDORWR: 2750 if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) || 2751 (atomic_add_64_nv((&(vp->v_mmap_write)), 0))) 2752 return (V_TRUE); 2753 break; 2754 case V_READ: 2755 if (atomic_add_64_nv((&(vp->v_mmap_read)), 0)) 2756 return (V_TRUE); 2757 break; 2758 } 2759 #else 2760 switch (mode) { 2761 case V_WRITE: 2762 if (vp->v_mmap_write) 2763 return (V_TRUE); 2764 break; 2765 case V_RDANDWR: 2766 if (vp->v_mmap_read && vp->v_mmap_write) 2767 return (V_TRUE); 2768 break; 2769 case V_RDORWR: 2770 if (vp->v_mmap_read || vp->v_mmap_write) 2771 return (V_TRUE); 2772 break; 2773 case V_READ: 2774 if (vp->v_mmap_read) 2775 return (V_TRUE); 2776 break; 2777 } 2778 #endif 2779 2780 return (V_FALSE); 2781 } 2782 2783 /* 2784 * Set the operations vector for a vnode. 2785 * 2786 * FEM ensures that the v_femhead pointer is filled in before the 2787 * v_op pointer is changed. This means that if the v_femhead pointer 2788 * is NULL, and the v_op field hasn't changed since before which checked 2789 * the v_femhead pointer; then our update is ok - we are not racing with 2790 * FEM. 2791 */ 2792 void 2793 vn_setops(vnode_t *vp, vnodeops_t *vnodeops) 2794 { 2795 vnodeops_t *op; 2796 2797 ASSERT(vp != NULL); 2798 ASSERT(vnodeops != NULL); 2799 2800 op = vp->v_op; 2801 membar_consumer(); 2802 /* 2803 * If vp->v_femhead == NULL, then we'll call casptr() to do the 2804 * compare-and-swap on vp->v_op. If either fails, then FEM is 2805 * in effect on the vnode and we need to have FEM deal with it. 2806 */ 2807 if (vp->v_femhead != NULL || casptr(&vp->v_op, op, vnodeops) != op) { 2808 fem_setvnops(vp, vnodeops); 2809 } 2810 } 2811 2812 /* 2813 * Retrieve the operations vector for a vnode 2814 * As with vn_setops(above); make sure we aren't racing with FEM. 2815 * FEM sets the v_op to a special, internal, vnodeops that wouldn't 2816 * make sense to the callers of this routine. 2817 */ 2818 vnodeops_t * 2819 vn_getops(vnode_t *vp) 2820 { 2821 vnodeops_t *op; 2822 2823 ASSERT(vp != NULL); 2824 2825 op = vp->v_op; 2826 membar_consumer(); 2827 if (vp->v_femhead == NULL && op == vp->v_op) { 2828 return (op); 2829 } else { 2830 return (fem_getvnops(vp)); 2831 } 2832 } 2833 2834 /* 2835 * Returns non-zero (1) if the vnodeops matches that of the vnode. 2836 * Returns zero (0) if not. 2837 */ 2838 int 2839 vn_matchops(vnode_t *vp, vnodeops_t *vnodeops) 2840 { 2841 return (vn_getops(vp) == vnodeops); 2842 } 2843 2844 /* 2845 * Returns non-zero (1) if the specified operation matches the 2846 * corresponding operation for that the vnode. 2847 * Returns zero (0) if not. 2848 */ 2849 2850 #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0)) 2851 2852 int 2853 vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp) 2854 { 2855 const fs_operation_trans_def_t *otdp; 2856 fs_generic_func_p *loc = NULL; 2857 vnodeops_t *vop = vn_getops(vp); 2858 2859 ASSERT(vopname != NULL); 2860 2861 for (otdp = vn_ops_table; otdp->name != NULL; otdp++) { 2862 if (MATCHNAME(otdp->name, vopname)) { 2863 loc = (fs_generic_func_p *) 2864 ((char *)(vop) + otdp->offset); 2865 break; 2866 } 2867 } 2868 2869 return ((loc != NULL) && (*loc == funcp)); 2870 } 2871 2872 /* 2873 * fs_new_caller_id() needs to return a unique ID on a given local system. 2874 * The IDs do not need to survive across reboots. These are primarily 2875 * used so that (FEM) monitors can detect particular callers (such as 2876 * the NFS server) to a given vnode/vfs operation. 2877 */ 2878 u_longlong_t 2879 fs_new_caller_id() 2880 { 2881 static uint64_t next_caller_id = 0LL; /* First call returns 1 */ 2882 2883 return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1)); 2884 } 2885 2886 /* 2887 * Given a starting vnode and a path, updates the path in the target vnode in 2888 * a safe manner. If the vnode already has path information embedded, then the 2889 * cached path is left untouched. 2890 */ 2891 2892 size_t max_vnode_path = 4 * MAXPATHLEN; 2893 2894 void 2895 vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp, 2896 const char *path, size_t plen) 2897 { 2898 char *rpath; 2899 vnode_t *base; 2900 size_t rpathlen, rpathalloc; 2901 int doslash = 1; 2902 2903 if (*path == '/') { 2904 base = rootvp; 2905 path++; 2906 plen--; 2907 } else { 2908 base = startvp; 2909 } 2910 2911 /* 2912 * We cannot grab base->v_lock while we hold vp->v_lock because of 2913 * the potential for deadlock. 2914 */ 2915 mutex_enter(&base->v_lock); 2916 if (base->v_path == NULL) { 2917 mutex_exit(&base->v_lock); 2918 return; 2919 } 2920 2921 rpathlen = strlen(base->v_path); 2922 rpathalloc = rpathlen + plen + 1; 2923 /* Avoid adding a slash if there's already one there */ 2924 if (base->v_path[rpathlen-1] == '/') 2925 doslash = 0; 2926 else 2927 rpathalloc++; 2928 2929 /* 2930 * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held, 2931 * so we must do this dance. If, by chance, something changes the path, 2932 * just give up since there is no real harm. 2933 */ 2934 mutex_exit(&base->v_lock); 2935 2936 /* Paths should stay within reason */ 2937 if (rpathalloc > max_vnode_path) 2938 return; 2939 2940 rpath = kmem_alloc(rpathalloc, KM_SLEEP); 2941 2942 mutex_enter(&base->v_lock); 2943 if (base->v_path == NULL || strlen(base->v_path) != rpathlen) { 2944 mutex_exit(&base->v_lock); 2945 kmem_free(rpath, rpathalloc); 2946 return; 2947 } 2948 bcopy(base->v_path, rpath, rpathlen); 2949 mutex_exit(&base->v_lock); 2950 2951 if (doslash) 2952 rpath[rpathlen++] = '/'; 2953 bcopy(path, rpath + rpathlen, plen); 2954 rpath[rpathlen + plen] = '\0'; 2955 2956 mutex_enter(&vp->v_lock); 2957 if (vp->v_path != NULL) { 2958 mutex_exit(&vp->v_lock); 2959 kmem_free(rpath, rpathalloc); 2960 } else { 2961 vp->v_path = rpath; 2962 mutex_exit(&vp->v_lock); 2963 } 2964 } 2965 2966 /* 2967 * Sets the path to the vnode to be the given string, regardless of current 2968 * context. The string must be a complete path from rootdir. This is only used 2969 * by fsop_root() for setting the path based on the mountpoint. 2970 */ 2971 void 2972 vn_setpath_str(struct vnode *vp, const char *str, size_t len) 2973 { 2974 char *buf = kmem_alloc(len + 1, KM_SLEEP); 2975 2976 mutex_enter(&vp->v_lock); 2977 if (vp->v_path != NULL) { 2978 mutex_exit(&vp->v_lock); 2979 kmem_free(buf, len + 1); 2980 return; 2981 } 2982 2983 vp->v_path = buf; 2984 bcopy(str, vp->v_path, len); 2985 vp->v_path[len] = '\0'; 2986 2987 mutex_exit(&vp->v_lock); 2988 } 2989 2990 /* 2991 * Called from within filesystem's vop_rename() to handle renames once the 2992 * target vnode is available. 2993 */ 2994 void 2995 vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len) 2996 { 2997 char *tmp; 2998 2999 mutex_enter(&vp->v_lock); 3000 tmp = vp->v_path; 3001 vp->v_path = NULL; 3002 mutex_exit(&vp->v_lock); 3003 vn_setpath(rootdir, dvp, vp, nm, len); 3004 if (tmp != NULL) 3005 kmem_free(tmp, strlen(tmp) + 1); 3006 } 3007 3008 /* 3009 * Similar to vn_setpath_str(), this function sets the path of the destination 3010 * vnode to the be the same as the source vnode. 3011 */ 3012 void 3013 vn_copypath(struct vnode *src, struct vnode *dst) 3014 { 3015 char *buf; 3016 int alloc; 3017 3018 mutex_enter(&src->v_lock); 3019 if (src->v_path == NULL) { 3020 mutex_exit(&src->v_lock); 3021 return; 3022 } 3023 alloc = strlen(src->v_path) + 1; 3024 3025 /* avoid kmem_alloc() with lock held */ 3026 mutex_exit(&src->v_lock); 3027 buf = kmem_alloc(alloc, KM_SLEEP); 3028 mutex_enter(&src->v_lock); 3029 if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) { 3030 mutex_exit(&src->v_lock); 3031 kmem_free(buf, alloc); 3032 return; 3033 } 3034 bcopy(src->v_path, buf, alloc); 3035 mutex_exit(&src->v_lock); 3036 3037 mutex_enter(&dst->v_lock); 3038 if (dst->v_path != NULL) { 3039 mutex_exit(&dst->v_lock); 3040 kmem_free(buf, alloc); 3041 return; 3042 } 3043 dst->v_path = buf; 3044 mutex_exit(&dst->v_lock); 3045 } 3046 3047 /* 3048 * XXX Private interface for segvn routines that handle vnode 3049 * large page segments. 3050 * 3051 * return 1 if vp's file system VOP_PAGEIO() implementation 3052 * can be safely used instead of VOP_GETPAGE() for handling 3053 * pagefaults against regular non swap files. VOP_PAGEIO() 3054 * interface is considered safe here if its implementation 3055 * is very close to VOP_GETPAGE() implementation. 3056 * e.g. It zero's out the part of the page beyond EOF. Doesn't 3057 * panic if there're file holes but instead returns an error. 3058 * Doesn't assume file won't be changed by user writes, etc. 3059 * 3060 * return 0 otherwise. 3061 * 3062 * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs. 3063 */ 3064 int 3065 vn_vmpss_usepageio(vnode_t *vp) 3066 { 3067 vfs_t *vfsp = vp->v_vfsp; 3068 char *fsname = vfssw[vfsp->vfs_fstype].vsw_name; 3069 char *pageio_ok_fss[] = {"ufs", "nfs", NULL}; 3070 char **fsok = pageio_ok_fss; 3071 3072 if (fsname == NULL) { 3073 return (0); 3074 } 3075 3076 for (; *fsok; fsok++) { 3077 if (strcmp(*fsok, fsname) == 0) { 3078 return (1); 3079 } 3080 } 3081 return (0); 3082 } 3083 3084 /* VOP_XXX() macros call the corresponding fop_xxx() function */ 3085 3086 int 3087 fop_open( 3088 vnode_t **vpp, 3089 int mode, 3090 cred_t *cr, 3091 caller_context_t *ct) 3092 { 3093 int ret; 3094 vnode_t *vp = *vpp; 3095 3096 VN_HOLD(vp); 3097 /* 3098 * Adding to the vnode counts before calling open 3099 * avoids the need for a mutex. It circumvents a race 3100 * condition where a query made on the vnode counts results in a 3101 * false negative. The inquirer goes away believing the file is 3102 * not open when there is an open on the file already under way. 3103 * 3104 * The counts are meant to prevent NFS from granting a delegation 3105 * when it would be dangerous to do so. 3106 * 3107 * The vnode counts are only kept on regular files 3108 */ 3109 if ((*vpp)->v_type == VREG) { 3110 if (mode & FREAD) 3111 atomic_add_32(&((*vpp)->v_rdcnt), 1); 3112 if (mode & FWRITE) 3113 atomic_add_32(&((*vpp)->v_wrcnt), 1); 3114 } 3115 3116 VOPXID_MAP_CR(vp, cr); 3117 3118 ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct); 3119 3120 if (ret) { 3121 /* 3122 * Use the saved vp just in case the vnode ptr got trashed 3123 * by the error. 3124 */ 3125 VOPSTATS_UPDATE(vp, open); 3126 if ((vp->v_type == VREG) && (mode & FREAD)) 3127 atomic_add_32(&(vp->v_rdcnt), -1); 3128 if ((vp->v_type == VREG) && (mode & FWRITE)) 3129 atomic_add_32(&(vp->v_wrcnt), -1); 3130 } else { 3131 /* 3132 * Some filesystems will return a different vnode, 3133 * but the same path was still used to open it. 3134 * So if we do change the vnode and need to 3135 * copy over the path, do so here, rather than special 3136 * casing each filesystem. Adjust the vnode counts to 3137 * reflect the vnode switch. 3138 */ 3139 VOPSTATS_UPDATE(*vpp, open); 3140 if (*vpp != vp && *vpp != NULL) { 3141 vn_copypath(vp, *vpp); 3142 if (((*vpp)->v_type == VREG) && (mode & FREAD)) 3143 atomic_add_32(&((*vpp)->v_rdcnt), 1); 3144 if ((vp->v_type == VREG) && (mode & FREAD)) 3145 atomic_add_32(&(vp->v_rdcnt), -1); 3146 if (((*vpp)->v_type == VREG) && (mode & FWRITE)) 3147 atomic_add_32(&((*vpp)->v_wrcnt), 1); 3148 if ((vp->v_type == VREG) && (mode & FWRITE)) 3149 atomic_add_32(&(vp->v_wrcnt), -1); 3150 } 3151 } 3152 VN_RELE(vp); 3153 return (ret); 3154 } 3155 3156 int 3157 fop_close( 3158 vnode_t *vp, 3159 int flag, 3160 int count, 3161 offset_t offset, 3162 cred_t *cr, 3163 caller_context_t *ct) 3164 { 3165 int err; 3166 3167 VOPXID_MAP_CR(vp, cr); 3168 3169 err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct); 3170 VOPSTATS_UPDATE(vp, close); 3171 /* 3172 * Check passed in count to handle possible dups. Vnode counts are only 3173 * kept on regular files 3174 */ 3175 if ((vp->v_type == VREG) && (count == 1)) { 3176 if (flag & FREAD) { 3177 ASSERT(vp->v_rdcnt > 0); 3178 atomic_add_32(&(vp->v_rdcnt), -1); 3179 } 3180 if (flag & FWRITE) { 3181 ASSERT(vp->v_wrcnt > 0); 3182 atomic_add_32(&(vp->v_wrcnt), -1); 3183 } 3184 } 3185 return (err); 3186 } 3187 3188 int 3189 fop_read( 3190 vnode_t *vp, 3191 uio_t *uiop, 3192 int ioflag, 3193 cred_t *cr, 3194 caller_context_t *ct) 3195 { 3196 int err; 3197 ssize_t resid_start = uiop->uio_resid; 3198 3199 VOPXID_MAP_CR(vp, cr); 3200 3201 err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct); 3202 VOPSTATS_UPDATE_IO(vp, read, 3203 read_bytes, (resid_start - uiop->uio_resid)); 3204 return (err); 3205 } 3206 3207 int 3208 fop_write( 3209 vnode_t *vp, 3210 uio_t *uiop, 3211 int ioflag, 3212 cred_t *cr, 3213 caller_context_t *ct) 3214 { 3215 int err; 3216 ssize_t resid_start = uiop->uio_resid; 3217 3218 VOPXID_MAP_CR(vp, cr); 3219 3220 err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct); 3221 VOPSTATS_UPDATE_IO(vp, write, 3222 write_bytes, (resid_start - uiop->uio_resid)); 3223 return (err); 3224 } 3225 3226 int 3227 fop_ioctl( 3228 vnode_t *vp, 3229 int cmd, 3230 intptr_t arg, 3231 int flag, 3232 cred_t *cr, 3233 int *rvalp, 3234 caller_context_t *ct) 3235 { 3236 int err; 3237 3238 VOPXID_MAP_CR(vp, cr); 3239 3240 err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct); 3241 VOPSTATS_UPDATE(vp, ioctl); 3242 return (err); 3243 } 3244 3245 int 3246 fop_setfl( 3247 vnode_t *vp, 3248 int oflags, 3249 int nflags, 3250 cred_t *cr, 3251 caller_context_t *ct) 3252 { 3253 int err; 3254 3255 VOPXID_MAP_CR(vp, cr); 3256 3257 err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct); 3258 VOPSTATS_UPDATE(vp, setfl); 3259 return (err); 3260 } 3261 3262 int 3263 fop_getattr( 3264 vnode_t *vp, 3265 vattr_t *vap, 3266 int flags, 3267 cred_t *cr, 3268 caller_context_t *ct) 3269 { 3270 int err; 3271 3272 VOPXID_MAP_CR(vp, cr); 3273 3274 /* 3275 * If this file system doesn't understand the xvattr extensions 3276 * then turn off the xvattr bit. 3277 */ 3278 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) { 3279 vap->va_mask &= ~AT_XVATTR; 3280 } 3281 3282 /* 3283 * We're only allowed to skip the ACL check iff we used a 32 bit 3284 * ACE mask with VOP_ACCESS() to determine permissions. 3285 */ 3286 if ((flags & ATTR_NOACLCHECK) && 3287 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { 3288 return (EINVAL); 3289 } 3290 err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct); 3291 VOPSTATS_UPDATE(vp, getattr); 3292 return (err); 3293 } 3294 3295 int 3296 fop_setattr( 3297 vnode_t *vp, 3298 vattr_t *vap, 3299 int flags, 3300 cred_t *cr, 3301 caller_context_t *ct) 3302 { 3303 int err; 3304 3305 VOPXID_MAP_CR(vp, cr); 3306 3307 /* 3308 * If this file system doesn't understand the xvattr extensions 3309 * then turn off the xvattr bit. 3310 */ 3311 if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) { 3312 vap->va_mask &= ~AT_XVATTR; 3313 } 3314 3315 /* 3316 * We're only allowed to skip the ACL check iff we used a 32 bit 3317 * ACE mask with VOP_ACCESS() to determine permissions. 3318 */ 3319 if ((flags & ATTR_NOACLCHECK) && 3320 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { 3321 return (EINVAL); 3322 } 3323 err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct); 3324 VOPSTATS_UPDATE(vp, setattr); 3325 return (err); 3326 } 3327 3328 int 3329 fop_access( 3330 vnode_t *vp, 3331 int mode, 3332 int flags, 3333 cred_t *cr, 3334 caller_context_t *ct) 3335 { 3336 int err; 3337 3338 if ((flags & V_ACE_MASK) && 3339 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { 3340 return (EINVAL); 3341 } 3342 3343 VOPXID_MAP_CR(vp, cr); 3344 3345 err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct); 3346 VOPSTATS_UPDATE(vp, access); 3347 return (err); 3348 } 3349 3350 int 3351 fop_lookup( 3352 vnode_t *dvp, 3353 char *nm, 3354 vnode_t **vpp, 3355 pathname_t *pnp, 3356 int flags, 3357 vnode_t *rdir, 3358 cred_t *cr, 3359 caller_context_t *ct, 3360 int *deflags, /* Returned per-dirent flags */ 3361 pathname_t *ppnp) /* Returned case-preserved name in directory */ 3362 { 3363 int ret; 3364 3365 /* 3366 * If this file system doesn't support case-insensitive access 3367 * and said access is requested, fail quickly. It is required 3368 * that if the vfs supports case-insensitive lookup, it also 3369 * supports extended dirent flags. 3370 */ 3371 if (flags & FIGNORECASE && 3372 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3373 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3374 return (EINVAL); 3375 3376 VOPXID_MAP_CR(dvp, cr); 3377 3378 if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) { 3379 ret = xattr_dir_lookup(dvp, vpp, flags, cr); 3380 } else { 3381 ret = (*(dvp)->v_op->vop_lookup) 3382 (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp); 3383 } 3384 if (ret == 0 && *vpp) { 3385 VOPSTATS_UPDATE(*vpp, lookup); 3386 if ((*vpp)->v_path == NULL) { 3387 vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm)); 3388 } 3389 } 3390 3391 return (ret); 3392 } 3393 3394 int 3395 fop_create( 3396 vnode_t *dvp, 3397 char *name, 3398 vattr_t *vap, 3399 vcexcl_t excl, 3400 int mode, 3401 vnode_t **vpp, 3402 cred_t *cr, 3403 int flags, 3404 caller_context_t *ct, 3405 vsecattr_t *vsecp) /* ACL to set during create */ 3406 { 3407 int ret; 3408 3409 if (vsecp != NULL && 3410 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) { 3411 return (EINVAL); 3412 } 3413 /* 3414 * If this file system doesn't support case-insensitive access 3415 * and said access is requested, fail quickly. 3416 */ 3417 if (flags & FIGNORECASE && 3418 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3419 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3420 return (EINVAL); 3421 3422 VOPXID_MAP_CR(dvp, cr); 3423 3424 ret = (*(dvp)->v_op->vop_create) 3425 (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp); 3426 if (ret == 0 && *vpp) { 3427 VOPSTATS_UPDATE(*vpp, create); 3428 if ((*vpp)->v_path == NULL) { 3429 vn_setpath(rootdir, dvp, *vpp, name, strlen(name)); 3430 } 3431 } 3432 3433 return (ret); 3434 } 3435 3436 int 3437 fop_remove( 3438 vnode_t *dvp, 3439 char *nm, 3440 cred_t *cr, 3441 caller_context_t *ct, 3442 int flags) 3443 { 3444 int err; 3445 3446 /* 3447 * If this file system doesn't support case-insensitive access 3448 * and said access is requested, fail quickly. 3449 */ 3450 if (flags & FIGNORECASE && 3451 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3452 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3453 return (EINVAL); 3454 3455 VOPXID_MAP_CR(dvp, cr); 3456 3457 err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags); 3458 VOPSTATS_UPDATE(dvp, remove); 3459 return (err); 3460 } 3461 3462 int 3463 fop_link( 3464 vnode_t *tdvp, 3465 vnode_t *svp, 3466 char *tnm, 3467 cred_t *cr, 3468 caller_context_t *ct, 3469 int flags) 3470 { 3471 int err; 3472 3473 /* 3474 * If the target file system doesn't support case-insensitive access 3475 * and said access is requested, fail quickly. 3476 */ 3477 if (flags & FIGNORECASE && 3478 (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3479 vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3480 return (EINVAL); 3481 3482 VOPXID_MAP_CR(tdvp, cr); 3483 3484 err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags); 3485 VOPSTATS_UPDATE(tdvp, link); 3486 return (err); 3487 } 3488 3489 int 3490 fop_rename( 3491 vnode_t *sdvp, 3492 char *snm, 3493 vnode_t *tdvp, 3494 char *tnm, 3495 cred_t *cr, 3496 caller_context_t *ct, 3497 int flags) 3498 { 3499 int err; 3500 3501 /* 3502 * If the file system involved does not support 3503 * case-insensitive access and said access is requested, fail 3504 * quickly. 3505 */ 3506 if (flags & FIGNORECASE && 3507 ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3508 vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))) 3509 return (EINVAL); 3510 3511 VOPXID_MAP_CR(tdvp, cr); 3512 3513 err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags); 3514 VOPSTATS_UPDATE(sdvp, rename); 3515 return (err); 3516 } 3517 3518 int 3519 fop_mkdir( 3520 vnode_t *dvp, 3521 char *dirname, 3522 vattr_t *vap, 3523 vnode_t **vpp, 3524 cred_t *cr, 3525 caller_context_t *ct, 3526 int flags, 3527 vsecattr_t *vsecp) /* ACL to set during create */ 3528 { 3529 int ret; 3530 3531 if (vsecp != NULL && 3532 vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) { 3533 return (EINVAL); 3534 } 3535 /* 3536 * If this file system doesn't support case-insensitive access 3537 * and said access is requested, fail quickly. 3538 */ 3539 if (flags & FIGNORECASE && 3540 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3541 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3542 return (EINVAL); 3543 3544 VOPXID_MAP_CR(dvp, cr); 3545 3546 ret = (*(dvp)->v_op->vop_mkdir) 3547 (dvp, dirname, vap, vpp, cr, ct, flags, vsecp); 3548 if (ret == 0 && *vpp) { 3549 VOPSTATS_UPDATE(*vpp, mkdir); 3550 if ((*vpp)->v_path == NULL) { 3551 vn_setpath(rootdir, dvp, *vpp, dirname, 3552 strlen(dirname)); 3553 } 3554 } 3555 3556 return (ret); 3557 } 3558 3559 int 3560 fop_rmdir( 3561 vnode_t *dvp, 3562 char *nm, 3563 vnode_t *cdir, 3564 cred_t *cr, 3565 caller_context_t *ct, 3566 int flags) 3567 { 3568 int err; 3569 3570 /* 3571 * If this file system doesn't support case-insensitive access 3572 * and said access is requested, fail quickly. 3573 */ 3574 if (flags & FIGNORECASE && 3575 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3576 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3577 return (EINVAL); 3578 3579 VOPXID_MAP_CR(dvp, cr); 3580 3581 err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags); 3582 VOPSTATS_UPDATE(dvp, rmdir); 3583 return (err); 3584 } 3585 3586 int 3587 fop_readdir( 3588 vnode_t *vp, 3589 uio_t *uiop, 3590 cred_t *cr, 3591 int *eofp, 3592 caller_context_t *ct, 3593 int flags) 3594 { 3595 int err; 3596 ssize_t resid_start = uiop->uio_resid; 3597 3598 /* 3599 * If this file system doesn't support retrieving directory 3600 * entry flags and said access is requested, fail quickly. 3601 */ 3602 if (flags & V_RDDIR_ENTFLAGS && 3603 vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0) 3604 return (EINVAL); 3605 3606 VOPXID_MAP_CR(vp, cr); 3607 3608 err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags); 3609 VOPSTATS_UPDATE_IO(vp, readdir, 3610 readdir_bytes, (resid_start - uiop->uio_resid)); 3611 return (err); 3612 } 3613 3614 int 3615 fop_symlink( 3616 vnode_t *dvp, 3617 char *linkname, 3618 vattr_t *vap, 3619 char *target, 3620 cred_t *cr, 3621 caller_context_t *ct, 3622 int flags) 3623 { 3624 int err; 3625 xvattr_t xvattr; 3626 3627 /* 3628 * If this file system doesn't support case-insensitive access 3629 * and said access is requested, fail quickly. 3630 */ 3631 if (flags & FIGNORECASE && 3632 (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 && 3633 vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)) 3634 return (EINVAL); 3635 3636 VOPXID_MAP_CR(dvp, cr); 3637 3638 /* check for reparse point */ 3639 if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) && 3640 (strncmp(target, FS_REPARSE_TAG_STR, 3641 strlen(FS_REPARSE_TAG_STR)) == 0)) { 3642 if (!fs_reparse_mark(target, vap, &xvattr)) 3643 vap = (vattr_t *)&xvattr; 3644 } 3645 3646 err = (*(dvp)->v_op->vop_symlink) 3647 (dvp, linkname, vap, target, cr, ct, flags); 3648 VOPSTATS_UPDATE(dvp, symlink); 3649 return (err); 3650 } 3651 3652 int 3653 fop_readlink( 3654 vnode_t *vp, 3655 uio_t *uiop, 3656 cred_t *cr, 3657 caller_context_t *ct) 3658 { 3659 int err; 3660 3661 VOPXID_MAP_CR(vp, cr); 3662 3663 err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct); 3664 VOPSTATS_UPDATE(vp, readlink); 3665 return (err); 3666 } 3667 3668 int 3669 fop_fsync( 3670 vnode_t *vp, 3671 int syncflag, 3672 cred_t *cr, 3673 caller_context_t *ct) 3674 { 3675 int err; 3676 3677 VOPXID_MAP_CR(vp, cr); 3678 3679 err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct); 3680 VOPSTATS_UPDATE(vp, fsync); 3681 return (err); 3682 } 3683 3684 void 3685 fop_inactive( 3686 vnode_t *vp, 3687 cred_t *cr, 3688 caller_context_t *ct) 3689 { 3690 /* Need to update stats before vop call since we may lose the vnode */ 3691 VOPSTATS_UPDATE(vp, inactive); 3692 3693 VOPXID_MAP_CR(vp, cr); 3694 3695 (*(vp)->v_op->vop_inactive)(vp, cr, ct); 3696 } 3697 3698 int 3699 fop_fid( 3700 vnode_t *vp, 3701 fid_t *fidp, 3702 caller_context_t *ct) 3703 { 3704 int err; 3705 3706 err = (*(vp)->v_op->vop_fid)(vp, fidp, ct); 3707 VOPSTATS_UPDATE(vp, fid); 3708 return (err); 3709 } 3710 3711 int 3712 fop_rwlock( 3713 vnode_t *vp, 3714 int write_lock, 3715 caller_context_t *ct) 3716 { 3717 int ret; 3718 3719 ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct)); 3720 VOPSTATS_UPDATE(vp, rwlock); 3721 return (ret); 3722 } 3723 3724 void 3725 fop_rwunlock( 3726 vnode_t *vp, 3727 int write_lock, 3728 caller_context_t *ct) 3729 { 3730 (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct); 3731 VOPSTATS_UPDATE(vp, rwunlock); 3732 } 3733 3734 int 3735 fop_seek( 3736 vnode_t *vp, 3737 offset_t ooff, 3738 offset_t *noffp, 3739 caller_context_t *ct) 3740 { 3741 int err; 3742 3743 err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct); 3744 VOPSTATS_UPDATE(vp, seek); 3745 return (err); 3746 } 3747 3748 int 3749 fop_cmp( 3750 vnode_t *vp1, 3751 vnode_t *vp2, 3752 caller_context_t *ct) 3753 { 3754 int err; 3755 3756 err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct); 3757 VOPSTATS_UPDATE(vp1, cmp); 3758 return (err); 3759 } 3760 3761 int 3762 fop_frlock( 3763 vnode_t *vp, 3764 int cmd, 3765 flock64_t *bfp, 3766 int flag, 3767 offset_t offset, 3768 struct flk_callback *flk_cbp, 3769 cred_t *cr, 3770 caller_context_t *ct) 3771 { 3772 int err; 3773 3774 VOPXID_MAP_CR(vp, cr); 3775 3776 err = (*(vp)->v_op->vop_frlock) 3777 (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct); 3778 VOPSTATS_UPDATE(vp, frlock); 3779 return (err); 3780 } 3781 3782 int 3783 fop_space( 3784 vnode_t *vp, 3785 int cmd, 3786 flock64_t *bfp, 3787 int flag, 3788 offset_t offset, 3789 cred_t *cr, 3790 caller_context_t *ct) 3791 { 3792 int err; 3793 3794 VOPXID_MAP_CR(vp, cr); 3795 3796 err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct); 3797 VOPSTATS_UPDATE(vp, space); 3798 return (err); 3799 } 3800 3801 int 3802 fop_realvp( 3803 vnode_t *vp, 3804 vnode_t **vpp, 3805 caller_context_t *ct) 3806 { 3807 int err; 3808 3809 err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct); 3810 VOPSTATS_UPDATE(vp, realvp); 3811 return (err); 3812 } 3813 3814 int 3815 fop_getpage( 3816 vnode_t *vp, 3817 offset_t off, 3818 size_t len, 3819 uint_t *protp, 3820 page_t **plarr, 3821 size_t plsz, 3822 struct seg *seg, 3823 caddr_t addr, 3824 enum seg_rw rw, 3825 cred_t *cr, 3826 caller_context_t *ct) 3827 { 3828 int err; 3829 3830 VOPXID_MAP_CR(vp, cr); 3831 3832 err = (*(vp)->v_op->vop_getpage) 3833 (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct); 3834 VOPSTATS_UPDATE(vp, getpage); 3835 return (err); 3836 } 3837 3838 int 3839 fop_putpage( 3840 vnode_t *vp, 3841 offset_t off, 3842 size_t len, 3843 int flags, 3844 cred_t *cr, 3845 caller_context_t *ct) 3846 { 3847 int err; 3848 3849 VOPXID_MAP_CR(vp, cr); 3850 3851 err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct); 3852 VOPSTATS_UPDATE(vp, putpage); 3853 return (err); 3854 } 3855 3856 int 3857 fop_map( 3858 vnode_t *vp, 3859 offset_t off, 3860 struct as *as, 3861 caddr_t *addrp, 3862 size_t len, 3863 uchar_t prot, 3864 uchar_t maxprot, 3865 uint_t flags, 3866 cred_t *cr, 3867 caller_context_t *ct) 3868 { 3869 int err; 3870 3871 VOPXID_MAP_CR(vp, cr); 3872 3873 err = (*(vp)->v_op->vop_map) 3874 (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct); 3875 VOPSTATS_UPDATE(vp, map); 3876 return (err); 3877 } 3878 3879 int 3880 fop_addmap( 3881 vnode_t *vp, 3882 offset_t off, 3883 struct as *as, 3884 caddr_t addr, 3885 size_t len, 3886 uchar_t prot, 3887 uchar_t maxprot, 3888 uint_t flags, 3889 cred_t *cr, 3890 caller_context_t *ct) 3891 { 3892 int error; 3893 u_longlong_t delta; 3894 3895 VOPXID_MAP_CR(vp, cr); 3896 3897 error = (*(vp)->v_op->vop_addmap) 3898 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct); 3899 3900 if ((!error) && (vp->v_type == VREG)) { 3901 delta = (u_longlong_t)btopr(len); 3902 /* 3903 * If file is declared MAP_PRIVATE, it can't be written back 3904 * even if open for write. Handle as read. 3905 */ 3906 if (flags & MAP_PRIVATE) { 3907 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3908 (int64_t)delta); 3909 } else { 3910 /* 3911 * atomic_add_64 forces the fetch of a 64 bit value to 3912 * be atomic on 32 bit machines 3913 */ 3914 if (maxprot & PROT_WRITE) 3915 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)), 3916 (int64_t)delta); 3917 if (maxprot & PROT_READ) 3918 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3919 (int64_t)delta); 3920 if (maxprot & PROT_EXEC) 3921 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3922 (int64_t)delta); 3923 } 3924 } 3925 VOPSTATS_UPDATE(vp, addmap); 3926 return (error); 3927 } 3928 3929 int 3930 fop_delmap( 3931 vnode_t *vp, 3932 offset_t off, 3933 struct as *as, 3934 caddr_t addr, 3935 size_t len, 3936 uint_t prot, 3937 uint_t maxprot, 3938 uint_t flags, 3939 cred_t *cr, 3940 caller_context_t *ct) 3941 { 3942 int error; 3943 u_longlong_t delta; 3944 3945 VOPXID_MAP_CR(vp, cr); 3946 3947 error = (*(vp)->v_op->vop_delmap) 3948 (vp, off, as, addr, len, prot, maxprot, flags, cr, ct); 3949 3950 /* 3951 * NFS calls into delmap twice, the first time 3952 * it simply establishes a callback mechanism and returns EAGAIN 3953 * while the real work is being done upon the second invocation. 3954 * We have to detect this here and only decrement the counts upon 3955 * the second delmap request. 3956 */ 3957 if ((error != EAGAIN) && (vp->v_type == VREG)) { 3958 3959 delta = (u_longlong_t)btopr(len); 3960 3961 if (flags & MAP_PRIVATE) { 3962 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3963 (int64_t)(-delta)); 3964 } else { 3965 /* 3966 * atomic_add_64 forces the fetch of a 64 bit value 3967 * to be atomic on 32 bit machines 3968 */ 3969 if (maxprot & PROT_WRITE) 3970 atomic_add_64((uint64_t *)(&(vp->v_mmap_write)), 3971 (int64_t)(-delta)); 3972 if (maxprot & PROT_READ) 3973 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3974 (int64_t)(-delta)); 3975 if (maxprot & PROT_EXEC) 3976 atomic_add_64((uint64_t *)(&(vp->v_mmap_read)), 3977 (int64_t)(-delta)); 3978 } 3979 } 3980 VOPSTATS_UPDATE(vp, delmap); 3981 return (error); 3982 } 3983 3984 3985 int 3986 fop_poll( 3987 vnode_t *vp, 3988 short events, 3989 int anyyet, 3990 short *reventsp, 3991 struct pollhead **phpp, 3992 caller_context_t *ct) 3993 { 3994 int err; 3995 3996 err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct); 3997 VOPSTATS_UPDATE(vp, poll); 3998 return (err); 3999 } 4000 4001 int 4002 fop_dump( 4003 vnode_t *vp, 4004 caddr_t addr, 4005 offset_t lbdn, 4006 offset_t dblks, 4007 caller_context_t *ct) 4008 { 4009 int err; 4010 4011 /* ensure lbdn and dblks can be passed safely to bdev_dump */ 4012 if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks)) 4013 return (EIO); 4014 4015 err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct); 4016 VOPSTATS_UPDATE(vp, dump); 4017 return (err); 4018 } 4019 4020 int 4021 fop_pathconf( 4022 vnode_t *vp, 4023 int cmd, 4024 ulong_t *valp, 4025 cred_t *cr, 4026 caller_context_t *ct) 4027 { 4028 int err; 4029 4030 VOPXID_MAP_CR(vp, cr); 4031 4032 err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct); 4033 VOPSTATS_UPDATE(vp, pathconf); 4034 return (err); 4035 } 4036 4037 int 4038 fop_pageio( 4039 vnode_t *vp, 4040 struct page *pp, 4041 u_offset_t io_off, 4042 size_t io_len, 4043 int flags, 4044 cred_t *cr, 4045 caller_context_t *ct) 4046 { 4047 int err; 4048 4049 VOPXID_MAP_CR(vp, cr); 4050 4051 err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct); 4052 VOPSTATS_UPDATE(vp, pageio); 4053 return (err); 4054 } 4055 4056 int 4057 fop_dumpctl( 4058 vnode_t *vp, 4059 int action, 4060 offset_t *blkp, 4061 caller_context_t *ct) 4062 { 4063 int err; 4064 err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct); 4065 VOPSTATS_UPDATE(vp, dumpctl); 4066 return (err); 4067 } 4068 4069 void 4070 fop_dispose( 4071 vnode_t *vp, 4072 page_t *pp, 4073 int flag, 4074 int dn, 4075 cred_t *cr, 4076 caller_context_t *ct) 4077 { 4078 /* Must do stats first since it's possible to lose the vnode */ 4079 VOPSTATS_UPDATE(vp, dispose); 4080 4081 VOPXID_MAP_CR(vp, cr); 4082 4083 (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct); 4084 } 4085 4086 int 4087 fop_setsecattr( 4088 vnode_t *vp, 4089 vsecattr_t *vsap, 4090 int flag, 4091 cred_t *cr, 4092 caller_context_t *ct) 4093 { 4094 int err; 4095 4096 VOPXID_MAP_CR(vp, cr); 4097 4098 /* 4099 * We're only allowed to skip the ACL check iff we used a 32 bit 4100 * ACE mask with VOP_ACCESS() to determine permissions. 4101 */ 4102 if ((flag & ATTR_NOACLCHECK) && 4103 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { 4104 return (EINVAL); 4105 } 4106 err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct); 4107 VOPSTATS_UPDATE(vp, setsecattr); 4108 return (err); 4109 } 4110 4111 int 4112 fop_getsecattr( 4113 vnode_t *vp, 4114 vsecattr_t *vsap, 4115 int flag, 4116 cred_t *cr, 4117 caller_context_t *ct) 4118 { 4119 int err; 4120 4121 /* 4122 * We're only allowed to skip the ACL check iff we used a 32 bit 4123 * ACE mask with VOP_ACCESS() to determine permissions. 4124 */ 4125 if ((flag & ATTR_NOACLCHECK) && 4126 vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) { 4127 return (EINVAL); 4128 } 4129 4130 VOPXID_MAP_CR(vp, cr); 4131 4132 err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct); 4133 VOPSTATS_UPDATE(vp, getsecattr); 4134 return (err); 4135 } 4136 4137 int 4138 fop_shrlock( 4139 vnode_t *vp, 4140 int cmd, 4141 struct shrlock *shr, 4142 int flag, 4143 cred_t *cr, 4144 caller_context_t *ct) 4145 { 4146 int err; 4147 4148 VOPXID_MAP_CR(vp, cr); 4149 4150 err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct); 4151 VOPSTATS_UPDATE(vp, shrlock); 4152 return (err); 4153 } 4154 4155 int 4156 fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm, 4157 caller_context_t *ct) 4158 { 4159 int err; 4160 4161 err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct); 4162 VOPSTATS_UPDATE(vp, vnevent); 4163 return (err); 4164 } 4165 4166 int 4167 fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr, 4168 caller_context_t *ct) 4169 { 4170 int err; 4171 4172 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0) 4173 return (ENOTSUP); 4174 err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct); 4175 VOPSTATS_UPDATE(vp, reqzcbuf); 4176 return (err); 4177 } 4178 4179 int 4180 fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct) 4181 { 4182 int err; 4183 4184 if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0) 4185 return (ENOTSUP); 4186 err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct); 4187 VOPSTATS_UPDATE(vp, retzcbuf); 4188 return (err); 4189 } 4190 4191 /* 4192 * Default destructor 4193 * Needed because NULL destructor means that the key is unused 4194 */ 4195 /* ARGSUSED */ 4196 void 4197 vsd_defaultdestructor(void *value) 4198 {} 4199 4200 /* 4201 * Create a key (index into per vnode array) 4202 * Locks out vsd_create, vsd_destroy, and vsd_free 4203 * May allocate memory with lock held 4204 */ 4205 void 4206 vsd_create(uint_t *keyp, void (*destructor)(void *)) 4207 { 4208 int i; 4209 uint_t nkeys; 4210 4211 /* 4212 * if key is allocated, do nothing 4213 */ 4214 mutex_enter(&vsd_lock); 4215 if (*keyp) { 4216 mutex_exit(&vsd_lock); 4217 return; 4218 } 4219 /* 4220 * find an unused key 4221 */ 4222 if (destructor == NULL) 4223 destructor = vsd_defaultdestructor; 4224 4225 for (i = 0; i < vsd_nkeys; ++i) 4226 if (vsd_destructor[i] == NULL) 4227 break; 4228 4229 /* 4230 * if no unused keys, increase the size of the destructor array 4231 */ 4232 if (i == vsd_nkeys) { 4233 if ((nkeys = (vsd_nkeys << 1)) == 0) 4234 nkeys = 1; 4235 vsd_destructor = 4236 (void (**)(void *))vsd_realloc((void *)vsd_destructor, 4237 (size_t)(vsd_nkeys * sizeof (void (*)(void *))), 4238 (size_t)(nkeys * sizeof (void (*)(void *)))); 4239 vsd_nkeys = nkeys; 4240 } 4241 4242 /* 4243 * allocate the next available unused key 4244 */ 4245 vsd_destructor[i] = destructor; 4246 *keyp = i + 1; 4247 4248 /* create vsd_list, if it doesn't exist */ 4249 if (vsd_list == NULL) { 4250 vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 4251 list_create(vsd_list, sizeof (struct vsd_node), 4252 offsetof(struct vsd_node, vs_nodes)); 4253 } 4254 4255 mutex_exit(&vsd_lock); 4256 } 4257 4258 /* 4259 * Destroy a key 4260 * 4261 * Assumes that the caller is preventing vsd_set and vsd_get 4262 * Locks out vsd_create, vsd_destroy, and vsd_free 4263 * May free memory with lock held 4264 */ 4265 void 4266 vsd_destroy(uint_t *keyp) 4267 { 4268 uint_t key; 4269 struct vsd_node *vsd; 4270 4271 /* 4272 * protect the key namespace and our destructor lists 4273 */ 4274 mutex_enter(&vsd_lock); 4275 key = *keyp; 4276 *keyp = 0; 4277 4278 ASSERT(key <= vsd_nkeys); 4279 4280 /* 4281 * if the key is valid 4282 */ 4283 if (key != 0) { 4284 uint_t k = key - 1; 4285 /* 4286 * for every vnode with VSD, call key's destructor 4287 */ 4288 for (vsd = list_head(vsd_list); vsd != NULL; 4289 vsd = list_next(vsd_list, vsd)) { 4290 /* 4291 * no VSD for key in this vnode 4292 */ 4293 if (key > vsd->vs_nkeys) 4294 continue; 4295 /* 4296 * call destructor for key 4297 */ 4298 if (vsd->vs_value[k] && vsd_destructor[k]) 4299 (*vsd_destructor[k])(vsd->vs_value[k]); 4300 /* 4301 * reset value for key 4302 */ 4303 vsd->vs_value[k] = NULL; 4304 } 4305 /* 4306 * actually free the key (NULL destructor == unused) 4307 */ 4308 vsd_destructor[k] = NULL; 4309 } 4310 4311 mutex_exit(&vsd_lock); 4312 } 4313 4314 /* 4315 * Quickly return the per vnode value that was stored with the specified key 4316 * Assumes the caller is protecting key from vsd_create and vsd_destroy 4317 * Assumes the caller is holding v_vsd_lock to protect the vsd. 4318 */ 4319 void * 4320 vsd_get(vnode_t *vp, uint_t key) 4321 { 4322 struct vsd_node *vsd; 4323 4324 ASSERT(vp != NULL); 4325 ASSERT(mutex_owned(&vp->v_vsd_lock)); 4326 4327 vsd = vp->v_vsd; 4328 4329 if (key && vsd != NULL && key <= vsd->vs_nkeys) 4330 return (vsd->vs_value[key - 1]); 4331 return (NULL); 4332 } 4333 4334 /* 4335 * Set a per vnode value indexed with the specified key 4336 * Assumes the caller is holding v_vsd_lock to protect the vsd. 4337 */ 4338 int 4339 vsd_set(vnode_t *vp, uint_t key, void *value) 4340 { 4341 struct vsd_node *vsd; 4342 4343 ASSERT(vp != NULL); 4344 ASSERT(mutex_owned(&vp->v_vsd_lock)); 4345 4346 if (key == 0) 4347 return (EINVAL); 4348 4349 vsd = vp->v_vsd; 4350 if (vsd == NULL) 4351 vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP); 4352 4353 /* 4354 * If the vsd was just allocated, vs_nkeys will be 0, so the following 4355 * code won't happen and we will continue down and allocate space for 4356 * the vs_value array. 4357 * If the caller is replacing one value with another, then it is up 4358 * to the caller to free/rele/destroy the previous value (if needed). 4359 */ 4360 if (key <= vsd->vs_nkeys) { 4361 vsd->vs_value[key - 1] = value; 4362 return (0); 4363 } 4364 4365 ASSERT(key <= vsd_nkeys); 4366 4367 if (vsd->vs_nkeys == 0) { 4368 mutex_enter(&vsd_lock); /* lock out vsd_destroy() */ 4369 /* 4370 * Link onto list of all VSD nodes. 4371 */ 4372 list_insert_head(vsd_list, vsd); 4373 mutex_exit(&vsd_lock); 4374 } 4375 4376 /* 4377 * Allocate vnode local storage and set the value for key 4378 */ 4379 vsd->vs_value = vsd_realloc(vsd->vs_value, 4380 vsd->vs_nkeys * sizeof (void *), 4381 key * sizeof (void *)); 4382 vsd->vs_nkeys = key; 4383 vsd->vs_value[key - 1] = value; 4384 4385 return (0); 4386 } 4387 4388 /* 4389 * Called from vn_free() to run the destructor function for each vsd 4390 * Locks out vsd_create and vsd_destroy 4391 * Assumes that the destructor *DOES NOT* use vsd 4392 */ 4393 void 4394 vsd_free(vnode_t *vp) 4395 { 4396 int i; 4397 struct vsd_node *vsd = vp->v_vsd; 4398 4399 if (vsd == NULL) 4400 return; 4401 4402 if (vsd->vs_nkeys == 0) { 4403 kmem_free(vsd, sizeof (*vsd)); 4404 vp->v_vsd = NULL; 4405 return; 4406 } 4407 4408 /* 4409 * lock out vsd_create and vsd_destroy, call 4410 * the destructor, and mark the value as destroyed. 4411 */ 4412 mutex_enter(&vsd_lock); 4413 4414 for (i = 0; i < vsd->vs_nkeys; i++) { 4415 if (vsd->vs_value[i] && vsd_destructor[i]) 4416 (*vsd_destructor[i])(vsd->vs_value[i]); 4417 vsd->vs_value[i] = NULL; 4418 } 4419 4420 /* 4421 * remove from linked list of VSD nodes 4422 */ 4423 list_remove(vsd_list, vsd); 4424 4425 mutex_exit(&vsd_lock); 4426 4427 /* 4428 * free up the VSD 4429 */ 4430 kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *)); 4431 kmem_free(vsd, sizeof (struct vsd_node)); 4432 vp->v_vsd = NULL; 4433 } 4434 4435 /* 4436 * realloc 4437 */ 4438 static void * 4439 vsd_realloc(void *old, size_t osize, size_t nsize) 4440 { 4441 void *new; 4442 4443 new = kmem_zalloc(nsize, KM_SLEEP); 4444 if (old) { 4445 bcopy(old, new, osize); 4446 kmem_free(old, osize); 4447 } 4448 return (new); 4449 } 4450 4451 /* 4452 * Setup the extensible system attribute for creating a reparse point. 4453 * The symlink data 'target' is validated for proper format of a reparse 4454 * string and a check also made to make sure the symlink data does not 4455 * point to an existing file. 4456 * 4457 * return 0 if ok else -1. 4458 */ 4459 static int 4460 fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr) 4461 { 4462 xoptattr_t *xoap; 4463 4464 if ((!target) || (!vap) || (!xvattr)) 4465 return (-1); 4466 4467 /* validate reparse string */ 4468 if (reparse_validate((const char *)target)) 4469 return (-1); 4470 4471 xva_init(xvattr); 4472 xvattr->xva_vattr = *vap; 4473 xvattr->xva_vattr.va_mask |= AT_XVATTR; 4474 xoap = xva_getxoptattr(xvattr); 4475 ASSERT(xoap); 4476 XVA_SET_REQ(xvattr, XAT_REPARSE); 4477 xoap->xoa_reparse = 1; 4478 4479 return (0); 4480 } 4481 4482 /* 4483 * Function to check whether a symlink is a reparse point. 4484 * Return B_TRUE if it is a reparse point, else return B_FALSE 4485 */ 4486 boolean_t 4487 vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct) 4488 { 4489 xvattr_t xvattr; 4490 xoptattr_t *xoap; 4491 4492 if ((vp->v_type != VLNK) || 4493 !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR))) 4494 return (B_FALSE); 4495 4496 xva_init(&xvattr); 4497 xoap = xva_getxoptattr(&xvattr); 4498 ASSERT(xoap); 4499 XVA_SET_REQ(&xvattr, XAT_REPARSE); 4500 4501 if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct)) 4502 return (B_FALSE); 4503 4504 if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) || 4505 (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE)))) 4506 return (B_FALSE); 4507 4508 return (xoap->xoa_reparse ? B_TRUE : B_FALSE); 4509 } 4510